예제 #1
0
def get_index():
    start_time = time.time()
    db = mysql_operation.getcon()
    select_sql = "select `tail_word` from `t_tail_word` where extr_flag = '0' and tail_word like '%php%' limit 5"
    update_sql = "update `t_tail_word` set extr_flag = '1' where extr_flag = '0' and tail_word like '%php%' limit 5"
    results = mysql_operation.baseselect(db, select_sql)
    mysql_operation.baseoperation(db, update_sql)
    search_list = []
    if len(results) > 0:  # 查询结果
        for row in results:
            search_list.append(row[0])
        baidu_index = BaiduIndex(search_list, '2018-01-01', '2019-04-01')
        for item in search_list:
            print(item, baidu_index.result[item]['all'])
            if len(baidu_index.result[item]['all']) > 0:  # 搜索出来的index数量
                all_avg = index_avg(baidu_index.result[item], 'all')
                pc_avg = index_avg(baidu_index.result[item], 'pc')
                wise_avg = index_avg(baidu_index.result[item], 'wise')
                print(all_avg, pc_avg, wise_avg)
                insert_sql = "INSERT INTO `baidu_index` " \
                             "(`word`, `start_time`, `end_time`, `status`, `all_avg`, `pc_avg`, `wise_avg`) " \
                             "VALUES ('%s', '%s', '%s', '0', '%s','%s','%s')" \
                             % (item, '2018-01-01', '2019-04-01', all_avg, pc_avg, wise_avg)
            else:  # 未收录
                insert_sql = "insert into `baidu_index` (`word`, `start_time`, `end_time`, `status`)" \
                             "values ('%s', '%s', '%s', '1')" % (item, '2018-01-01', '2019-04-01')
            mysql_operation.baseoperation(db, insert_sql)
    else:
        pass
        # 数据库查询结果没有
    mysql_operation.closecon(db)
    print("times  :  ", time.time() - start_time)
    pass
예제 #2
0
def no_repeat():
    db = mysql_operation.getcon()
    select_sql = "select `tail_word` from `t_tail_word_20190529`"
    results = mysql_operation.baseselect(db, select_sql)
    global tailword_list
    tailword_list = []
    for row in results:
        tailword_list.append(row[0])
    mysql_operation.closecon(db)
예제 #3
0
def check_php():
    db = mysql_operation.getcon()
    select_sql = "select `tail_word` from `word_exec` where flag = '0' limit 100"
    update_sql = "update `word_exec` set flag = '1' where flag = '0'  limit 100"
    results = mysql_operation.baseselect(db, select_sql)
    mysql_operation.baseoperation(db, update_sql)
    full_threads = []
    for row in results:
        thread = threading.Thread(target=thread_handler, args=[row[0]])
        full_threads.append(thread)
        thread.start()
    for thread in full_threads:
        thread.join()
    mysql_operation.closecon(db)
예제 #4
0
def get_word():
    db = mysql_operation.getcon()
    select_sql = "select `tail_word` from `tail_word_20190529` where extr_flag = '0' limit 50"
    update_sql = "update `tail_word_20190529` set extr_flag = '1' where extr_flag = '0' limit 50"
    results = mysql_operation.baseselect(db, select_sql)
    mysql_operation.baseoperation(db, update_sql)
    mysql_operation.closecon(db)
    threads = []
    if len(results) > 0:
        for row in results:
            # print(row[0])
            # get(row[0])
            thread = threading.Thread(target=get, args=[row[0]])
            threads.append(thread)
            thread.start()
        for thread in threads:
            thread.join()
    else:
        pass
예제 #5
0
def readsql():
    start_time = time.time()
    db = mysql_operation.getcon()
    select_sql = "select `tail_word` from `tail_word_20190529` where extr_flag = '0' limit 20"
    update_sql = "update `tail_word_20190529` set extr_flag = '1' where extr_flag = '0' limit 20"
    results = mysql_operation.baseselect(db, select_sql)
    mysql_operation.baseoperation(db, update_sql)
    file = open('keyword_relevant.txt', 'a', encoding='utf-8')
    search_list = []
    php_list = []
    if len(results) > 0:  # 查询结果
        for row in results:
            search_list.append(row[0])
            if page_html(row[0]):
                file.write(row[0] + '\n')
            else:
                php_list.append(row[0])
        # if len(php_list) > 0:
        #     for temp in php_list:
        #         file.write(temp + '            php中文网已有\n')
    file.close()
    mysql_operation.closecon(db)
예제 #6
0
def read_tail():
    start_time = time.time()
    no_repeat()
    db = mysql_operation.getcon()
    # delete_sql = "DELETE FROM t_tail_word WHERE id in ( " \
    #              "SELECT id FROM(" \
    #              "select * from t_tail_word where flag='0' GROUP BY tail_word HAVING COUNT(tail_word) > 1" \
    #              ") alias);"
    # mysql_operation.baseoperation(db, delete_sql)  # 每次取用前先删除一次数据库中没读取过的重复数据
    select_sql = "select `tail_word`, `relevant_word` from `t_tail_word_20190529` where flag = '0' Limit 50"
    update_sql = "update `t_tail_word_20190529` set flag = '1' where flag = '0'  Limit 50"
    results = mysql_operation.baseselect(db, select_sql)
    mysql_operation.baseoperation(db, update_sql)
    full_threads = []
    for row in results:
        search_list = row[1].split("~~")
        thread = threading.Thread(target=sigl_thread, args=[search_list])
        full_threads.append(thread)
        thread.start()
    for thread in full_threads:
        thread.join()
    mysql_operation.closecon(db)
    print("本次用时 : ", time.time() - start_time)