コード例 #1
0
def select_runnings(_dict={}):
    sql = '''
        SELECT
            a.id,
            b.username,
            b.company,
            a.create_time,
            a.update_time,
            a.group_name,
            a.task_name,
            a.timer,
            CASE WHEN a.data_status + a.urls_status > 0 THEN 1 ELSE 0 END AS 'data_status',
            a.progress,
            a.begin_time,
            a.finish_time,
            a.is_full
        FROM
            spider_setting AS a
            JOIN spider_user AS b ON b.id = a.user_id
        WHERE
            (a.create_time >= %s OR a.finish_time >= %s)
            AND a.is_example = 0
            AND (CASE WHEN a.data_status + a.urls_status > 0 THEN 1 ELSE 0 END) =%s
    '''
    args = [_dict['from_date'], _dict['from_date'], _dict['data_status']]
    if _dict['username'] != 'spider':
        sql += ' AND a.user_id =%s'
        args.append(_dict['user_id'])
    runnings = mysql.sql_read(sql, args)
    return runnings
コード例 #2
0
def select_details(_dict={}):
    sql = '''
                SELECT
                    data_xpath
                FROM
                    spider_setting
                WHERE
                    id =%s
            '''
    args = [_dict['id']]
    detail = mysql.sql_read(sql, args)
    detail = detail[0]["data_xpath"]
    # 固定采集
    fix_fields = json.loads(detail)['fix_fields']
    fix_fields = sorted(fix_fields, key=lambda field: field['seq'])
    fix_fields = [item['field'] for item in fix_fields]
    # 单台采集
    one_fields = json.loads(detail)['one_fields']
    one_fields = sorted(one_fields, key=lambda field: field['seq'])
    one_fields = [item['field'] for item in one_fields]
    # 多条采集
    all_fields = json.loads(detail)['all_fields']
    all_fields = sorted(all_fields, key=lambda field: field['seq'])
    all_fields = [item['field'] for item in all_fields]

    fields = fix_fields + one_fields + all_fields
    return fields
コード例 #3
0
def to_html(_dict={}):
    try:
        # 1.用户验证
        setting = select_setting(_dict)
        if not setting: return []

        # 2.获取字段
        columns = select_columns(_dict)
        if not columns: return []

        # 3.获取数据
        sql = '''
                SELECT
                    a.create_time AS '采集时间',
                    b.second_url AS '采集网址',
                    {}
                FROM
                    {} a JOIN {} b ON a.url_id = b.id 
                WHERE
                    a.create_time >= '{}'
                    AND a.create_time <= '{}'
                ORDER BY a.create_time DESC
                LIMIT 200
            '''.format(columns, SPIDER_DATA.format(_dict['id']),
                       SPIDER_URLS.format(_dict['id']), _dict['begin_date'],
                       _dict['end_date'])
        data = mysql.sql_read(sql)
    except:
        data = []
    return data
コード例 #4
0
ファイル: models.py プロジェクト: zhangjie0416/CaadSpider
def select_versions(_dict={}):
    sql = 'select create_time, version, description from spider_version order by id desc'
    if _dict['version']:
        sql += ' limit 1'
        update_read(_dict['username'])
    versions = mysql.sql_read(sql)
    return versions
コード例 #5
0
def select_setting(_dict={}):
    sql = '''
        SELECT
            a.group_name,
            a.task_name 
        FROM
            spider_setting a
            JOIN spider_user b ON b.id = a.user_id 
        WHERE
            a.id =%s
    '''
    args = [_dict['id']]
    if _dict['username'] != 'spider':
        sql += ' AND b.username =%s'
        args.append(_dict['username'])
    setting = mysql.sql_read(sql=sql, args=args)
    return setting
コード例 #6
0
ファイル: models.py プロジェクト: zhangjie0416/CaadSpider
def insert_setting(_dict={}):
    create_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    sql = '''
        INSERT INTO spider_setting (create_time, group_name, task_name, first_urls,first_url, urls_xpath,second_url, data_xpath, is_example, user_id, timer, is_full)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
    '''
    args = [
        create_time, _dict["group_name"], _dict['task_name'],
        _dict["first_urls"], _dict["first_url"], _dict["urls_xpath"],
        _dict["second_url"], _dict["data_xpath"], _dict["is_example"],
        _dict['user_id'], _dict["timer"], _dict["is_full"]
    ]
    bool = mysql.sql_write(sql, args)
    # 拿到insert这一行的id
    if bool:
        sql = '''
            SELECT
                id
            FROM
                spider_setting 
            WHERE
                create_time =%s 
                AND group_name =%s 
                AND task_name =%s 
                AND first_url =%s 
                AND urls_xpath =%s 
                AND second_url =%s 
                AND is_example =%s 
                AND user_id =%s 
                AND timer =%s
                AND is_full =%s
        '''
        args = [
            create_time, _dict["group_name"], _dict['task_name'],
            _dict["first_url"], _dict["urls_xpath"], _dict["second_url"],
            _dict["is_example"], _dict['user_id'], _dict["timer"],
            _dict["is_full"]
        ]
        id = mysql.sql_read(sql, args)[0]['id']
        create_table({'id': id})
        return id
コード例 #7
0
ファイル: models.py プロジェクト: zhangjie0416/CaadSpider
def select_setting(_dict={}):
    sql = '''
        SELECT
            a.is_full,
            a.group_name,
            a.task_name,
            a.first_urls,
            a.first_url,
            a.urls_xpath,
            a.second_url,
            a.data_xpath,
            a.is_example,
            a.timer,
            b.username
        FROM
            spider_setting a 
            JOIN spider_user b ON b.id=a.user_id
        WHERE
            a.id =%s
    '''
    args = [_dict['id']]
    setting = mysql.sql_read(sql, args)
    return setting
コード例 #8
0
ファイル: models.py プロジェクト: zhangjie0416/CaadSpider
def select_examples(_dict={}):
    sql = '''
        SELECT
            a.id,
            b.username,
            b.company,
            a.group_name,
            a.task_name,
            a.first_url,
            a.second_url,
            a.create_time,
            a.update_time
        FROM
            spider_setting AS a
            JOIN spider_user AS b ON b.id = a.user_id
        WHERE
            a.is_example = 1
    '''
    args = []
    if _dict:
        sql += ' AND a.user_id =%s'
        args.append(_dict['user_id'])
    examples = mysql.sql_read(sql=sql, args=args)
    return examples
コード例 #9
0
ファイル: models.py プロジェクト: zhangjie0416/CaadSpider
def login_check(_dict={}):
    sql = 'select id,username from spider_user where username=%s and password=%s'
    args = [_dict['username'], _dict['password']]
    user = mysql.sql_read(sql, args)
    return user
コード例 #10
0
ファイル: models.py プロジェクト: zhangjie0416/CaadSpider
def select_read(_dict={}):
    sql = 'select is_read from spider_user where username=%s;'
    args = [_dict['username']]
    is_read = mysql.sql_read(sql, args)
    return is_read