def get_index(): start_time = time.time() db = mysql_operation.getcon() select_sql = "select `tail_word` from `t_tail_word` where extr_flag = '0' and tail_word like '%php%' limit 5" update_sql = "update `t_tail_word` set extr_flag = '1' where extr_flag = '0' and tail_word like '%php%' limit 5" results = mysql_operation.baseselect(db, select_sql) mysql_operation.baseoperation(db, update_sql) search_list = [] if len(results) > 0: # 查询结果 for row in results: search_list.append(row[0]) baidu_index = BaiduIndex(search_list, '2018-01-01', '2019-04-01') for item in search_list: print(item, baidu_index.result[item]['all']) if len(baidu_index.result[item]['all']) > 0: # 搜索出来的index数量 all_avg = index_avg(baidu_index.result[item], 'all') pc_avg = index_avg(baidu_index.result[item], 'pc') wise_avg = index_avg(baidu_index.result[item], 'wise') print(all_avg, pc_avg, wise_avg) insert_sql = "INSERT INTO `baidu_index` " \ "(`word`, `start_time`, `end_time`, `status`, `all_avg`, `pc_avg`, `wise_avg`) " \ "VALUES ('%s', '%s', '%s', '0', '%s','%s','%s')" \ % (item, '2018-01-01', '2019-04-01', all_avg, pc_avg, wise_avg) else: # 未收录 insert_sql = "insert into `baidu_index` (`word`, `start_time`, `end_time`, `status`)" \ "values ('%s', '%s', '%s', '1')" % (item, '2018-01-01', '2019-04-01') mysql_operation.baseoperation(db, insert_sql) else: pass # 数据库查询结果没有 mysql_operation.closecon(db) print("times : ", time.time() - start_time) pass
def no_repeat(): db = mysql_operation.getcon() select_sql = "select `tail_word` from `t_tail_word_20190529`" results = mysql_operation.baseselect(db, select_sql) global tailword_list tailword_list = [] for row in results: tailword_list.append(row[0]) mysql_operation.closecon(db)
def check_php(): db = mysql_operation.getcon() select_sql = "select `tail_word` from `word_exec` where flag = '0' limit 100" update_sql = "update `word_exec` set flag = '1' where flag = '0' limit 100" results = mysql_operation.baseselect(db, select_sql) mysql_operation.baseoperation(db, update_sql) full_threads = [] for row in results: thread = threading.Thread(target=thread_handler, args=[row[0]]) full_threads.append(thread) thread.start() for thread in full_threads: thread.join() mysql_operation.closecon(db)
def get_word(): db = mysql_operation.getcon() select_sql = "select `tail_word` from `tail_word_20190529` where extr_flag = '0' limit 50" update_sql = "update `tail_word_20190529` set extr_flag = '1' where extr_flag = '0' limit 50" results = mysql_operation.baseselect(db, select_sql) mysql_operation.baseoperation(db, update_sql) mysql_operation.closecon(db) threads = [] if len(results) > 0: for row in results: # print(row[0]) # get(row[0]) thread = threading.Thread(target=get, args=[row[0]]) threads.append(thread) thread.start() for thread in threads: thread.join() else: pass
def readsql(): start_time = time.time() db = mysql_operation.getcon() select_sql = "select `tail_word` from `tail_word_20190529` where extr_flag = '0' limit 20" update_sql = "update `tail_word_20190529` set extr_flag = '1' where extr_flag = '0' limit 20" results = mysql_operation.baseselect(db, select_sql) mysql_operation.baseoperation(db, update_sql) file = open('keyword_relevant.txt', 'a', encoding='utf-8') search_list = [] php_list = [] if len(results) > 0: # 查询结果 for row in results: search_list.append(row[0]) if page_html(row[0]): file.write(row[0] + '\n') else: php_list.append(row[0]) # if len(php_list) > 0: # for temp in php_list: # file.write(temp + ' php中文网已有\n') file.close() mysql_operation.closecon(db)
def read_tail(): start_time = time.time() no_repeat() db = mysql_operation.getcon() # delete_sql = "DELETE FROM t_tail_word WHERE id in ( " \ # "SELECT id FROM(" \ # "select * from t_tail_word where flag='0' GROUP BY tail_word HAVING COUNT(tail_word) > 1" \ # ") alias);" # mysql_operation.baseoperation(db, delete_sql) # 每次取用前先删除一次数据库中没读取过的重复数据 select_sql = "select `tail_word`, `relevant_word` from `t_tail_word_20190529` where flag = '0' Limit 50" update_sql = "update `t_tail_word_20190529` set flag = '1' where flag = '0' Limit 50" results = mysql_operation.baseselect(db, select_sql) mysql_operation.baseoperation(db, update_sql) full_threads = [] for row in results: search_list = row[1].split("~~") thread = threading.Thread(target=sigl_thread, args=[search_list]) full_threads.append(thread) thread.start() for thread in full_threads: thread.join() mysql_operation.closecon(db) print("本次用时 : ", time.time() - start_time)