def select_runnings(_dict={}): sql = ''' SELECT a.id, b.username, b.company, a.create_time, a.update_time, a.group_name, a.task_name, a.timer, CASE WHEN a.data_status + a.urls_status > 0 THEN 1 ELSE 0 END AS 'data_status', a.progress, a.begin_time, a.finish_time, a.is_full FROM spider_setting AS a JOIN spider_user AS b ON b.id = a.user_id WHERE (a.create_time >= %s OR a.finish_time >= %s) AND a.is_example = 0 AND (CASE WHEN a.data_status + a.urls_status > 0 THEN 1 ELSE 0 END) =%s ''' args = [_dict['from_date'], _dict['from_date'], _dict['data_status']] if _dict['username'] != 'spider': sql += ' AND a.user_id =%s' args.append(_dict['user_id']) runnings = mysql.sql_read(sql, args) return runnings
def select_details(_dict={}): sql = ''' SELECT data_xpath FROM spider_setting WHERE id =%s ''' args = [_dict['id']] detail = mysql.sql_read(sql, args) detail = detail[0]["data_xpath"] # 固定采集 fix_fields = json.loads(detail)['fix_fields'] fix_fields = sorted(fix_fields, key=lambda field: field['seq']) fix_fields = [item['field'] for item in fix_fields] # 单台采集 one_fields = json.loads(detail)['one_fields'] one_fields = sorted(one_fields, key=lambda field: field['seq']) one_fields = [item['field'] for item in one_fields] # 多条采集 all_fields = json.loads(detail)['all_fields'] all_fields = sorted(all_fields, key=lambda field: field['seq']) all_fields = [item['field'] for item in all_fields] fields = fix_fields + one_fields + all_fields return fields
def to_html(_dict={}): try: # 1.用户验证 setting = select_setting(_dict) if not setting: return [] # 2.获取字段 columns = select_columns(_dict) if not columns: return [] # 3.获取数据 sql = ''' SELECT a.create_time AS '采集时间', b.second_url AS '采集网址', {} FROM {} a JOIN {} b ON a.url_id = b.id WHERE a.create_time >= '{}' AND a.create_time <= '{}' ORDER BY a.create_time DESC LIMIT 200 '''.format(columns, SPIDER_DATA.format(_dict['id']), SPIDER_URLS.format(_dict['id']), _dict['begin_date'], _dict['end_date']) data = mysql.sql_read(sql) except: data = [] return data
def select_versions(_dict={}): sql = 'select create_time, version, description from spider_version order by id desc' if _dict['version']: sql += ' limit 1' update_read(_dict['username']) versions = mysql.sql_read(sql) return versions
def select_setting(_dict={}): sql = ''' SELECT a.group_name, a.task_name FROM spider_setting a JOIN spider_user b ON b.id = a.user_id WHERE a.id =%s ''' args = [_dict['id']] if _dict['username'] != 'spider': sql += ' AND b.username =%s' args.append(_dict['username']) setting = mysql.sql_read(sql=sql, args=args) return setting
def insert_setting(_dict={}): create_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S') sql = ''' INSERT INTO spider_setting (create_time, group_name, task_name, first_urls,first_url, urls_xpath,second_url, data_xpath, is_example, user_id, timer, is_full) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s); ''' args = [ create_time, _dict["group_name"], _dict['task_name'], _dict["first_urls"], _dict["first_url"], _dict["urls_xpath"], _dict["second_url"], _dict["data_xpath"], _dict["is_example"], _dict['user_id'], _dict["timer"], _dict["is_full"] ] bool = mysql.sql_write(sql, args) # 拿到insert这一行的id if bool: sql = ''' SELECT id FROM spider_setting WHERE create_time =%s AND group_name =%s AND task_name =%s AND first_url =%s AND urls_xpath =%s AND second_url =%s AND is_example =%s AND user_id =%s AND timer =%s AND is_full =%s ''' args = [ create_time, _dict["group_name"], _dict['task_name'], _dict["first_url"], _dict["urls_xpath"], _dict["second_url"], _dict["is_example"], _dict['user_id'], _dict["timer"], _dict["is_full"] ] id = mysql.sql_read(sql, args)[0]['id'] create_table({'id': id}) return id
def select_setting(_dict={}): sql = ''' SELECT a.is_full, a.group_name, a.task_name, a.first_urls, a.first_url, a.urls_xpath, a.second_url, a.data_xpath, a.is_example, a.timer, b.username FROM spider_setting a JOIN spider_user b ON b.id=a.user_id WHERE a.id =%s ''' args = [_dict['id']] setting = mysql.sql_read(sql, args) return setting
def select_examples(_dict={}): sql = ''' SELECT a.id, b.username, b.company, a.group_name, a.task_name, a.first_url, a.second_url, a.create_time, a.update_time FROM spider_setting AS a JOIN spider_user AS b ON b.id = a.user_id WHERE a.is_example = 1 ''' args = [] if _dict: sql += ' AND a.user_id =%s' args.append(_dict['user_id']) examples = mysql.sql_read(sql=sql, args=args) return examples
def login_check(_dict={}): sql = 'select id,username from spider_user where username=%s and password=%s' args = [_dict['username'], _dict['password']] user = mysql.sql_read(sql, args) return user
def select_read(_dict={}): sql = 'select is_read from spider_user where username=%s;' args = [_dict['username']] is_read = mysql.sql_read(sql, args) return is_read