Example #1
0
def title():
    db = sql_operation.getcon()
    selectsql = "SELECT article_id,title FROM original_qdfuns_article WHERE title_flag IS NULL"
    results = sql_operation.baseselect(db, selectsql)
    for row in results:
        id = row[0]
        title = row[1]
        if baidudetector.url(title):
            updatesql = "UPDATE original_qdfuns_article SET title_flag = '0' WHERE (`article_id`='%d')" % (id)
        else:
            updatesql = "UPDATE original_qdfuns_article SET title_flag = '1' WHERE (`article_id`='%d')" % (id)
        sql_operation.baseoperation(db, updatesql)
    sql_operation.closecon(db)
Example #2
0
def key_extract():
    db = sql_operation.getcon()
    key_file = open("keyword_relevant.txt", "a", encoding='utf-8')
    select_sql = "SELECT keyword,key_list FROM t_relevant_search WHERE keyword LIKE '%么%' and keyword LIKE '%php%'"
    results = sql_operation.baseselect(db, select_sql)
    for row in results:
        # row_str = ""
        keyword = row[0]
        # key_list = row[1]
        key_file.write(keyword + '\n')
        print(keyword)
    key_file.close()
    sql_operation.closecon(db)
Example #3
0
def sqltofile():
    db = sql_operation.getcon()
    select_sql = "SELECT ip,`port`,type FROM t_proxy_info Where type = 'HTTPS' Order By rand() Limit 5000"
    results = sql_operation.baseselect(db, select_sql)
    proxys = []
    for tur in results:
        item_list = []
        for item in tur:
            item_list.append(item)
        proxys.append(item_list)
    sql_operation.closecon(db)
    print(proxys)
    thread(proxys)
Example #4
0
def sqlETL():
    db = sql_operation.getcon()
    select_sql = "SELECT id,key_list from t_relevant_search"
    select_key_sql = "SELECT `key` from t_relevant_search_key"
    results = sql_operation.baseselect(db, select_sql)
    for row in results:
        key_list = row[1].split('---')
        key_results = sql_operation.baseselect(db, select_key_sql)
        key_results_list = []
        for temp in key_results:
            key_results_list.append(temp[0])
        for key in key_list:
            if key in key_results_list:
                update_sql = "update t_relevant_search_key " \
                             "set keyword_id_list = CONCAT(keyword_id_list,'@','%s') where `key` = '%s'" \
                             % (row[0], key)
                print('-----------------')
                sql_operation.baseoperation(db, update_sql)
            else:
                insert_sql = "insert into t_relevant_search_key (`key`,keyword_id_list) value ('%s','%s')" \
                             % (key, row[0])
                sql_operation.baseoperation(db, insert_sql)
    sql_operation.closecon(db)
Example #5
0
def updatetosql():
    db = sql_operation.getcon()
    select_sql = "SELECT title,url FROM t_stackoverflow_question WHERE flag = '0' Order By rand() limit 10"
    results = sql_operation.baseselect(db, select_sql)
    for row in results:
        url = row[1]
        item = item_html(url)
        ansers_text = "[split]".join(item[7])
        updatesql = "UPDATE `t_stackoverflow_question` " \
                    "SET `tags`='%s', `views`='%s', `answers_num`='%s', `asked_time`='%s', `last_active_time`='%s', `question_content`='%s', `answers_contetnt`='%s' , `flag` = '1'" \
                    "WHERE (`title`='%s') " \
                    % (item[4], item[1], item[2], item[3], item[5], item[6], ansers_text, item[0],)
        # print(updatesql)
        sql_operation.baseoperation(db, updatesql)
    sql_operation.closecon(db)
Example #6
0
def content():
    db = sql_operation.getcon()
    selectsql = "SELECT article_id,url FROM original_python_article WHERE (content_flag IS NULL  or content_flag = 0) and title_flag = 0"
    results = sql_operation.baseselect(db, selectsql)
    for row in results:
        article_id = row[0]
        url = row[1]
        print('当前所处理的文章url:       ' + url)
        if circleCheck(url):
            updatesql = "UPDATE `original_python_article` SET `content_flag`='0' WHERE (`article_id`='%d')" % (
                article_id)
        else:
            updatesql = "UPDATE `original_python_article` SET `content_flag`='1' WHERE (`article_id`='%d')" % (
                article_id)
        sql_operation.baseoperation(db, updatesql)
    sql_operation.closecon(db)
Example #7
0
def sql_search():
    db = sql_operation.getcon()
    select_sql = "select keyword,relevant_search from t_relevant_search where flag = '0' Limit 50"
    update_sql = "update t_relevant_search set flag = '1' where flag = '0'  Limit 50"
    results = sql_operation.baseselect(db, select_sql)
    sql_operation.baseoperation(db, update_sql)
    search_list = []
    threads = []
    for row in results:
        search_list = row[1].split("-----")
        for keyword in search_list:
            thread = threading.Thread(target=page_html, args=[keyword])
            threads.append(thread)
            thread.start()
            # 阻塞主进程,等待所有子线程结束
        for thread in threads:
            thread.join()
    sql_operation.closecon(db)
Example #8
0
def titleandintro():
    db = sql_operation.getcon()
    selectsql = "SELECT article_id,title,intro FROM original_python_article WHERE title_flag IS NULL or title_flag = 0"
    results = sql_operation.baseselect(db, selectsql)
    # print(results)
    for row in results:
        article_id = row[0]
        title = row[1]
        intro = row[2]
        if baidudetector.url(title) and baidudetector.url(intro):
            updatesql = "UPDATE `original_python_article` SET `title_flag`='0' WHERE (`article_id`='%d')" % (
                article_id)
        else:
            updatesql = "UPDATE `original_python_article` SET `title_flag`='1' WHERE (`article_id`='%d')" % (
                article_id)
        sql_operation.baseoperation(db, updatesql)
        # print(article_id, title, intro)
    sql_operation.closecon(db)