def get_index(): start_time = time.time() db = mysql_operation.getcon() select_sql = "select `tail_word` from `t_tail_word` where extr_flag = '0' and tail_word like '%php%' limit 5" update_sql = "update `t_tail_word` set extr_flag = '1' where extr_flag = '0' and tail_word like '%php%' limit 5" results = mysql_operation.baseselect(db, select_sql) mysql_operation.baseoperation(db, update_sql) search_list = [] if len(results) > 0: # 查询结果 for row in results: search_list.append(row[0]) baidu_index = BaiduIndex(search_list, '2018-01-01', '2019-04-01') for item in search_list: print(item, baidu_index.result[item]['all']) if len(baidu_index.result[item]['all']) > 0: # 搜索出来的index数量 all_avg = index_avg(baidu_index.result[item], 'all') pc_avg = index_avg(baidu_index.result[item], 'pc') wise_avg = index_avg(baidu_index.result[item], 'wise') print(all_avg, pc_avg, wise_avg) insert_sql = "INSERT INTO `baidu_index` " \ "(`word`, `start_time`, `end_time`, `status`, `all_avg`, `pc_avg`, `wise_avg`) " \ "VALUES ('%s', '%s', '%s', '0', '%s','%s','%s')" \ % (item, '2018-01-01', '2019-04-01', all_avg, pc_avg, wise_avg) else: # 未收录 insert_sql = "insert into `baidu_index` (`word`, `start_time`, `end_time`, `status`)" \ "values ('%s', '%s', '%s', '1')" % (item, '2018-01-01', '2019-04-01') mysql_operation.baseoperation(db, insert_sql) else: pass # 数据库查询结果没有 mysql_operation.closecon(db) print("times : ", time.time() - start_time) pass
def thread_handler(keyword): db = mysql_operation.getcon() print(keyword) if page_html(keyword): pass else: update_sql = "update `word_exec` set flag = 'php中文网' where `tail_word`='%s'" % ( keyword) mysql_operation.baseoperation(db, update_sql) print(update_sql) mysql_operation.closecon(db)
def wipe_handler(): print("去除无关项") file = open('wipe_word.txt', 'r', encoding='utf-8') wipe_list = file.read().split('\n') file.close() db = mysql_operation.getcon() for word in wipe_list: if len(word) > 0: word = "%" + word + "%" delete_sql = "DELETE FROM word_exec WHERE id in ( SELECT id FROM( " \ "select id,tail_word from word_exec where tail_word LIKE '%s') a)" % (word) mysql_operation.baseoperation(db, delete_sql) mysql_operation.closecon(db)
def check_php(): db = mysql_operation.getcon() select_sql = "select `tail_word` from `word_exec` where flag = '0' limit 100" update_sql = "update `word_exec` set flag = '1' where flag = '0' limit 100" results = mysql_operation.baseselect(db, select_sql) mysql_operation.baseoperation(db, update_sql) full_threads = [] for row in results: thread = threading.Thread(target=thread_handler, args=[row[0]]) full_threads.append(thread) thread.start() for thread in full_threads: thread.join() mysql_operation.closecon(db)
def get_word(): db = mysql_operation.getcon() select_sql = "select `tail_word` from `tail_word_20190529` where extr_flag = '0' limit 50" update_sql = "update `tail_word_20190529` set extr_flag = '1' where extr_flag = '0' limit 50" results = mysql_operation.baseselect(db, select_sql) mysql_operation.baseoperation(db, update_sql) mysql_operation.closecon(db) threads = [] if len(results) > 0: for row in results: # print(row[0]) # get(row[0]) thread = threading.Thread(target=get, args=[row[0]]) threads.append(thread) thread.start() for thread in threads: thread.join() else: pass
def save_tail(full_list): db = mysql_operation.getcon() relevant_word = full_list[1] if 0 == len(relevant_word): update_sql = "update t_tail_word_20190529 set flag = '0' where `tail_word` = '%s'" % ( full_list[0]) mysql_operation.baseoperation(db, update_sql) else: tail_word = full_list[0] if rm_word(tail_word): word_split = "~~".join(myutils.participle(tail_word)) relevant_word = "~~".join(relevant_word).replace('\'', '').replace( '\"', '') replace_sql = "INSERT INTO `t_tail_word_20190529` (`tail_word`, `word_split`, `relevant_word`) " \ "VALUES ('%s','%s', '%s')" \ % (tail_word, word_split, relevant_word) mysql_operation.baseoperation(db, replace_sql) print(tail_word, "---完毕") else: print("无关记录-----------------------------", tail_word) mysql_operation.closecon(db)
def save_info(keyword, info): db = mysql_operation.getcon() if len(info) > 0 and info[3] == 0: sql_opt = "INSERT INTO `word_exec` (`tail_word`, `start_date`, `end_date`, " \ "`status`, `all_avg`, `all_yoy`, `all_qoq`, `pc_avg`, `pc_yoy`, `pc_qoq`, " \ "`wise_avg`, `wise_yoy`, `wise_qoq`, " \ "`message`, `uniqid`) " \ "VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s'," \ " '%s', '%s')" \ % ( info[0], info[1], info[2], info[3], info[4], info[5], info[6], info[7], info[8], info[9], info[10], info[11], info[12], info[13], info[14]) elif len(info) > 0: sql_opt = "INSERT INTO `word_exec` (`tail_word`, `start_date`, `end_date`, `status`,`message`)" \ "VALUES ('%s', '%s', '%s', '%s', '%s')" \ % (info[0], info[1], info[2], info[3], info[4]) else: sql_opt = "update `t_tail_word` set extr_flag = '0' where tail_word = '%s'" % (keyword) mysql_operation.baseoperation(db, sql_opt) mysql_operation.closecon(db)
def readsql(): start_time = time.time() db = mysql_operation.getcon() select_sql = "select `tail_word` from `tail_word_20190529` where extr_flag = '0' limit 20" update_sql = "update `tail_word_20190529` set extr_flag = '1' where extr_flag = '0' limit 20" results = mysql_operation.baseselect(db, select_sql) mysql_operation.baseoperation(db, update_sql) file = open('keyword_relevant.txt', 'a', encoding='utf-8') search_list = [] php_list = [] if len(results) > 0: # 查询结果 for row in results: search_list.append(row[0]) if page_html(row[0]): file.write(row[0] + '\n') else: php_list.append(row[0]) # if len(php_list) > 0: # for temp in php_list: # file.write(temp + ' php中文网已有\n') file.close() mysql_operation.closecon(db)
def read_tail(): start_time = time.time() no_repeat() db = mysql_operation.getcon() # delete_sql = "DELETE FROM t_tail_word WHERE id in ( " \ # "SELECT id FROM(" \ # "select * from t_tail_word where flag='0' GROUP BY tail_word HAVING COUNT(tail_word) > 1" \ # ") alias);" # mysql_operation.baseoperation(db, delete_sql) # 每次取用前先删除一次数据库中没读取过的重复数据 select_sql = "select `tail_word`, `relevant_word` from `t_tail_word_20190529` where flag = '0' Limit 50" update_sql = "update `t_tail_word_20190529` set flag = '1' where flag = '0' Limit 50" results = mysql_operation.baseselect(db, select_sql) mysql_operation.baseoperation(db, update_sql) full_threads = [] for row in results: search_list = row[1].split("~~") thread = threading.Thread(target=sigl_thread, args=[search_list]) full_threads.append(thread) thread.start() for thread in full_threads: thread.join() mysql_operation.closecon(db) print("本次用时 : ", time.time() - start_time)
def error_word(tail_word): db = mysql_operation.getcon() update_sql = "update t_tail_word_20190529 set flag = '0' where `tail_word` = '%s'" % ( tail_word) mysql_operation.baseoperation(db, update_sql) mysql_operation.closecon(db)