예제 #1
0
def iptosql(rows):
    db = sql_operation.getcon()
    for row in rows:
        replace_sql = "REPLACE INTO `t_proxy_info` " \
                      "(`ip`, `port`, `anonymous`, `type`, `location`, `speed`, `last_verify_time`) " \
                      "VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s')" \
                      % (row[0], row[1], row[2], row[3], row[4], row[5], row[6])
        sql_operation.baseoperation(db, replace_sql)
    sql_operation.closecon(db)
예제 #2
0
def tosql(page):
    db = sql_operation.getcon()
    for item in page:
        replace_sql = "Replace INTO `t_stackoverflow_question` " \
                      "(`question_id`,`title`, `url`, `votes`, `flag`) " \
                      "VALUES ('%s','%s', '%s', '%s','0')" \
                      % (item[0], item[1], item[2], item[3])
        sql_operation.baseoperation(db, replace_sql)
    sql_operation.closecon(db)
예제 #3
0
def tosql(keyword, relevant_search):
    db = sql_operation.getcon()
    key_list = participle(keyword)
    key_str = "---".join(key_list)
    relevant_str = "-----".join(relevant_search)
    replace_sql = "REPLACE INTO `t_relevant_search` (`keyword`,  `key_list`, `relevant_search`, `flag`) " \
                  "VALUES ('%s','%s', '%s', '0')" \
                  % (keyword, key_str, relevant_str)
    print(replace_sql)
    sql_operation.baseoperation(db, replace_sql)
    sql_operation.closecon(db)
예제 #4
0
def title():
    db = sql_operation.getcon()
    selectsql = "SELECT article_id,title FROM original_qdfuns_article WHERE title_flag IS NULL"
    results = sql_operation.baseselect(db, selectsql)
    for row in results:
        id = row[0]
        title = row[1]
        if baidudetector.url(title):
            updatesql = "UPDATE original_qdfuns_article SET title_flag = '0' WHERE (`article_id`='%d')" % (id)
        else:
            updatesql = "UPDATE original_qdfuns_article SET title_flag = '1' WHERE (`article_id`='%d')" % (id)
        sql_operation.baseoperation(db, updatesql)
    sql_operation.closecon(db)
예제 #5
0
def tosql(infoLists):
    db = sql_operation.getcon()
    titles = infoLists[0]
    urls = infoLists[1]
    subtimes = infoLists[2]
    stock_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    for i in range(len(titles)):
        insertsql = "INSERT INTO `original_qdfuns_article` (`title`, `url`, `submit_time`, `stock_time`) " \
                    "VALUES ('%s', '%s', '%s', '%s')" % \
                    (titles[i], urls[i], subtimes[i], stock_time)
        # print(insertsql)
        sql_operation.baseoperation(db, insertsql)
    sql_operation.closecon(db)
예제 #6
0
def key_extract():
    db = sql_operation.getcon()
    key_file = open("keyword_relevant.txt", "a", encoding='utf-8')
    select_sql = "SELECT keyword,key_list FROM t_relevant_search WHERE keyword LIKE '%么%' and keyword LIKE '%php%'"
    results = sql_operation.baseselect(db, select_sql)
    for row in results:
        # row_str = ""
        keyword = row[0]
        # key_list = row[1]
        key_file.write(keyword + '\n')
        print(keyword)
    key_file.close()
    sql_operation.closecon(db)
예제 #7
0
def sqltofile():
    db = sql_operation.getcon()
    select_sql = "SELECT ip,`port`,type FROM t_proxy_info Where type = 'HTTPS' Order By rand() Limit 5000"
    results = sql_operation.baseselect(db, select_sql)
    proxys = []
    for tur in results:
        item_list = []
        for item in tur:
            item_list.append(item)
        proxys.append(item_list)
    sql_operation.closecon(db)
    print(proxys)
    thread(proxys)
예제 #8
0
def updatetosql():
    db = sql_operation.getcon()
    select_sql = "SELECT title,url FROM t_stackoverflow_question WHERE flag = '0' Order By rand() limit 10"
    results = sql_operation.baseselect(db, select_sql)
    for row in results:
        url = row[1]
        item = item_html(url)
        ansers_text = "[split]".join(item[7])
        updatesql = "UPDATE `t_stackoverflow_question` " \
                    "SET `tags`='%s', `views`='%s', `answers_num`='%s', `asked_time`='%s', `last_active_time`='%s', `question_content`='%s', `answers_contetnt`='%s' , `flag` = '1'" \
                    "WHERE (`title`='%s') " \
                    % (item[4], item[1], item[2], item[3], item[5], item[6], ansers_text, item[0],)
        # print(updatesql)
        sql_operation.baseoperation(db, updatesql)
    sql_operation.closecon(db)
예제 #9
0
def content():
    db = sql_operation.getcon()
    selectsql = "SELECT article_id,url FROM original_python_article WHERE (content_flag IS NULL  or content_flag = 0) and title_flag = 0"
    results = sql_operation.baseselect(db, selectsql)
    for row in results:
        article_id = row[0]
        url = row[1]
        print('当前所处理的文章url:       ' + url)
        if circleCheck(url):
            updatesql = "UPDATE `original_python_article` SET `content_flag`='0' WHERE (`article_id`='%d')" % (
                article_id)
        else:
            updatesql = "UPDATE `original_python_article` SET `content_flag`='1' WHERE (`article_id`='%d')" % (
                article_id)
        sql_operation.baseoperation(db, updatesql)
    sql_operation.closecon(db)
예제 #10
0
def cnblogsSpider(index):
    # cnblogs.crawler(2, 3)
    infoLists = cnblogs.getDoc(index)
    titles = infoLists[0]
    intros = infoLists[1]
    urls = infoLists[2]
    article_from = '博客园首页'
    others = infoLists[3]
    stock_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    db = sql_operation.getcon()
    for i in range(len(titles)):
        insertsql = "INSERT INTO `original_python_article` (`title`, `intro`, `url`, `from`, `other`, `stock_time`) " \
                    "VALUES ('%s', '%s', '%s', '%s', '%s', '%s')" % \
                    (titles[i], intros[i], urls[i], article_from, others[i], stock_time)
        sql_operation.baseoperation(db, insertsql)
    sql_operation.closecon(db)
예제 #11
0
def sql_search():
    db = sql_operation.getcon()
    select_sql = "select keyword,relevant_search from t_relevant_search where flag = '0' Limit 50"
    update_sql = "update t_relevant_search set flag = '1' where flag = '0'  Limit 50"
    results = sql_operation.baseselect(db, select_sql)
    sql_operation.baseoperation(db, update_sql)
    search_list = []
    threads = []
    for row in results:
        search_list = row[1].split("-----")
        for keyword in search_list:
            thread = threading.Thread(target=page_html, args=[keyword])
            threads.append(thread)
            thread.start()
            # 阻塞主进程,等待所有子线程结束
        for thread in threads:
            thread.join()
    sql_operation.closecon(db)
예제 #12
0
def titleandintro():
    db = sql_operation.getcon()
    selectsql = "SELECT article_id,title,intro FROM original_python_article WHERE title_flag IS NULL or title_flag = 0"
    results = sql_operation.baseselect(db, selectsql)
    # print(results)
    for row in results:
        article_id = row[0]
        title = row[1]
        intro = row[2]
        if baidudetector.url(title) and baidudetector.url(intro):
            updatesql = "UPDATE `original_python_article` SET `title_flag`='0' WHERE (`article_id`='%d')" % (
                article_id)
        else:
            updatesql = "UPDATE `original_python_article` SET `title_flag`='1' WHERE (`article_id`='%d')" % (
                article_id)
        sql_operation.baseoperation(db, updatesql)
        # print(article_id, title, intro)
    sql_operation.closecon(db)
예제 #13
0
def sqlETL():
    db = sql_operation.getcon()
    select_sql = "SELECT id,key_list from t_relevant_search"
    select_key_sql = "SELECT `key` from t_relevant_search_key"
    results = sql_operation.baseselect(db, select_sql)
    for row in results:
        key_list = row[1].split('---')
        key_results = sql_operation.baseselect(db, select_key_sql)
        key_results_list = []
        for temp in key_results:
            key_results_list.append(temp[0])
        for key in key_list:
            if key in key_results_list:
                update_sql = "update t_relevant_search_key " \
                             "set keyword_id_list = CONCAT(keyword_id_list,'@','%s') where `key` = '%s'" \
                             % (row[0], key)
                print('-----------------')
                sql_operation.baseoperation(db, update_sql)
            else:
                insert_sql = "insert into t_relevant_search_key (`key`,keyword_id_list) value ('%s','%s')" \
                             % (key, row[0])
                sql_operation.baseoperation(db, insert_sql)
    sql_operation.closecon(db)