예제 #1
0
def do_item(path, module_str, title):
    conn = get_conn()
    cur = conn.cursor()
    news_id = get_news_id(module_str, title, cur)
    sql = 'select count(*) from news_comment where news_id = %u'%news_id
    cur.execute(sql)
    if cur.fetchone()[0]:
        cur.close()
        conn.close()
        return
    else:
        global count
        count += 1
        print(id, count)
    module = importlib.import_module(_NEWS_MODULE_PATH%module_str)
    with open(path, 'r', encoding=_CHARSET, errors='ignore') as file:
        lines = ''
        for line in file:
            lines += line
            try:
                comment = module.match_comment(eval(lines))
                database.store_comment(comment, news_id, cur)
            except:
                pass
            else:
                lines = ''
    cur.close()
    conn.commit()
    conn.close()
예제 #2
0
파일: database.py 프로젝트: WANMAX/crawler
def store_news(news, store_args):
    conn = get_conn()
    cur = conn.cursor()
    sql = "select * from news where news_website_id = %u and news_title = '%s'" % (
        NEWS_DIT[store_args[0]], store_args[1])
    if cur.execute(sql): return
    if news.abstract:
        abstract = "'%s'" % news.abstract
    else:
        abstract = 'Null'
    if news.source_url:
        source_url = "'%s'" % news.source_url
    else:
        source_url = 'Null'
    if news.author:
        author = "'%s'" % news.author
    else:
        author = 'Null'
    if news.news_image:
        news_image = "'%s'" % news.news_image
    else:
        news_image = 'Null'
    sql = NEWS_PAT % (
        NEWS_DIT[store_args[0]], store_args[1], news.url,
        str(news.comment_url_args).replace("'", "\\'"), store_args[2],
        abstract, news.content.replace("'", "\\'"), news.source, source_url,
        author, datetime.datetime(*time.localtime(news.date)[:6]), news_image)
    cur.execute(sql)
    conn.commit()
    cur.close()
    conn.close()
예제 #3
0
def do_item(path, module_str, title):
    conn = get_conn()
    cur = conn.cursor()
    news_id = get_news_id(module_str, title, cur)
    sql = 'select count(*) from news_comment where news_id = %u' % news_id
    cur.execute(sql)
    if cur.fetchone()[0]:
        cur.close()
        conn.close()
        return
    else:
        global count
        count += 1
        print(id, count)
    module = importlib.import_module(_NEWS_MODULE_PATH % module_str)
    with open(path, 'r', encoding=_CHARSET, errors='ignore') as file:
        lines = ''
        for line in file:
            lines += line
            try:
                comment = module.match_comment(eval(lines))
                database.store_comment(comment, news_id, cur)
            except:
                pass
            else:
                lines = ''
    cur.close()
    conn.commit()
    conn.close()
예제 #4
0
파일: database.py 프로젝트: WANMAX/crawler
def store_news(news, store_args):
    conn = get_conn()
    cur = conn.cursor()
    sql = "select * from news where news_website_id = %u and news_title = '%s'"%(NEWS_DIT[store_args[0]], store_args[1])
    if cur.execute(sql):return
    if news.abstract:
        abstract = "'%s'"%news.abstract
    else:
        abstract = 'Null'
    if news.source_url:
        source_url = "'%s'"%news.source_url
    else:
        source_url = 'Null'
    if news.author:
        author = "'%s'"%news.author
    else:
        author = 'Null'
    if news.news_image:
        news_image = "'%s'"%news.news_image
    else:
        news_image = 'Null'
    sql = NEWS_PAT%(NEWS_DIT[store_args[0]], store_args[1], news.url, str(news.comment_url_args).replace("'", "\\'"), store_args[2], abstract,
                        news.content.replace("'", "\\'"), news.source, source_url, author, datetime.datetime(*time.localtime(news.date)[:6]), news_image)
    cur.execute(sql)
    conn.commit()
    cur.close()
    conn.close()
예제 #5
0
파일: database.py 프로젝트: WANMAX/crawler
def test_topic(topic_name,  weibo_module_str):
    conn = get_conn()
    cur = conn.cursor()
    sql = sql_pat10%topic_name
    if cur.execute(sql):return False
    else:return True
    cur.close()
    conn.close()
예제 #6
0
def do_item(topic):
    conn = get_conn()
    cur = conn.cursor()
    sql = sql_pat11%(topic.topic_name, datetime.datetime(*time.localtime(topic.topic_datetime)[:6]),
                     topic.topic_type, topic.topic_introduction.replace("'", "\\'"), str(topic.topic_args).replace("'", "\\'"))
    cur.execute(sql)
    conn.commit()
    cur.close()
    conn.close()
예제 #7
0
def get_topic_list():
    conn = get_conn()
    cur = conn.cursor()
    cur.execute(sql_pat)
    for item in cur.fetchall():
        sql = sql_pat2 % item[0]
        if not cur.execute(sql):
            yield item
    cur.close()
    conn.close()
예제 #8
0
파일: database.py 프로젝트: WANMAX/crawler
def store_comments(comments, store_args):
    if not comments:
        return
    conn = get_conn()
    cur = conn.cursor()
    for comment in comments:
        _store_comment(comment, store_args, cur)
    conn.commit()
    cur.close()
    conn.close()
예제 #9
0
파일: database.py 프로젝트: WANMAX/crawler
def store_reposts(reposts, store_args):
    if not reposts:
        return
    conn = get_conn()
    cur = conn.cursor()
    for repost in reposts:
        _store_repost(repost, store_args, cur)
    conn.commit()
    cur.close()
    conn.close()
예제 #10
0
파일: database.py 프로젝트: WANMAX/crawler
def store_topics(topics, store_args):
    if not topics:return
    conn = get_conn()
    cur = conn.cursor()
    for topic in topics:
        sql = sql_pat11%(topic.topic_name, datetime.datetime(*time.localtime(topic.topic_datetime)[:6]),
                         topic.topic_type, topic.topic_introduction.replace("'", "\\'"), str(topic.topic_args).replace("'", "\\'"))
        cur.execute(sql)
    conn.commit()
    cur.close()
    conn.close()
예제 #11
0
파일: database.py 프로젝트: WANMAX/crawler
def get_topics(weibo_module_str):
    conn = get_conn()
    cur = conn.cursor()
    cur.execute(sql_pat12)
    for item in cur.fetchall():
        sql = sql_pat13%item[0]
        cur.execute(sql)
        item = cur.fetchone()
        yield match_topic(item)
    cur.close()
    conn.close()
예제 #12
0
파일: database.py 프로젝트: WANMAX/crawler
def get_result_mids(store_args):
    conn = get_conn()
    cur = conn.cursor()
    sql = HOTSPOT_ID_SQL_PAT%store_args[1]
    cur.execute(sql)
    topic_id = cur.fetchone()[0]
    sql = "select weibo_id from weibo_hotspot_relative where weibo_hotspot_id = %u"%topic_id
    cur.execute(sql)
    for mid in cur.fetchall():
        yield mid[0]
    cur.close()
    conn.close()
예제 #13
0
def work():
    conn = get_conn()
    cur = conn.cursor()
    sql = "select news_id, news_author from news where news_author != ''"
    cur.execute(sql)
    sql_pat = "update news set news_author = Null and news_image = '%s' where news_id = %u"
    for item in cur.fetchall():
        cur.execute(sql_pat%(item[1], item[0]))
        conn.commit()
        print(item)
    cur.close()
    conn.close()
예제 #14
0
def do_item(item):
    conn = get_conn()
    cur = conn.cursor()
    title = Soup(urlopen_and_read(item[2]).decode(NEWS_CHARSET, 'ignore')).title.text
    title = re.sub('(_新闻)_腾讯网', '', title)
    title = re.sub(NAME_PAT, '', title)
    sql = SQL_PAT%(title, item[0])
    cur.execute(sql)
    conn.commit()
    cur.close()
    conn.close()
    print(title)
예제 #15
0
파일: database.py 프로젝트: WANMAX/crawler
def store_comments(comments, store_args):
    conn = get_conn()
    cur = conn.cursor()
    sql = NEWS_ID_SQL%(NEWS_DIT[store_args[0]], store_args[1], store_args[2])
    cur.execute(sql)
    news_id = cur.fetchone()
    if not news_id:
        return
    for comment in comments:
        _store_comment(comment, news_id[0], cur)
    conn.commit()
    cur.close()
    conn.close()
예제 #16
0
파일: database.py 프로젝트: WANMAX/crawler
def store_comments(comments, store_args):
    conn = get_conn()
    cur = conn.cursor()
    sql = NEWS_ID_SQL % (NEWS_DIT[store_args[0]], store_args[1], store_args[2])
    cur.execute(sql)
    news_id = cur.fetchone()
    if not news_id:
        return
    for comment in comments:
        _store_comment(comment, news_id[0], cur)
    conn.commit()
    cur.close()
    conn.close()
예제 #17
0
def do_item(item):
    conn = get_conn()
    cur = conn.cursor()
    title = Soup(urlopen_and_read(item[2]).decode(NEWS_CHARSET,
                                                  'ignore')).title.text
    title = re.sub('(_新闻)_腾讯网', '', title)
    title = re.sub(NAME_PAT, '', title)
    sql = SQL_PAT % (title, item[0])
    cur.execute(sql)
    conn.commit()
    cur.close()
    conn.close()
    print(title)
예제 #18
0
파일: database.py 프로젝트: WANMAX/crawler
def store_weibos(weibos, store_args):
    if not weibos:
        return
    conn = get_conn()
    cur = conn.cursor()
    sql = HOTSPOT_ID_SQL_PAT%store_args[1]
    cur.execute(sql)
    topic_id = cur.fetchone()[0]
    for weibo in weibos:
        _store_weibo(weibo, topic_id, cur)
    conn.commit()
    cur.close()
    conn.close()
예제 #19
0
파일: fix_news.py 프로젝트: WANMAX/crawler
def do_with_comment(item):
    conn = get_conn()
    cur = conn.cursor()
    sql = 'select * from news_comment where news_id = %u order by news_comment_datetime'%item
    if not cur.execute(sql):return
    temp = ()
    for item in cur.fetchall():
        if temp[1:] == item[1:]:
            sql = 'delete from news_comment where news_comment_id = %u'%temp[0]
            cur.execute(sql)
            conn.commit()
        temp = item
    cur.close()
    conn.close()
예제 #20
0
파일: fix_news.py 프로젝트: WANMAX/crawler
def do_with_comment(item):
    conn = get_conn()
    cur = conn.cursor()
    sql = 'select * from news_comment where news_id = %u order by news_comment_datetime' % item
    if not cur.execute(sql): return
    temp = ()
    for item in cur.fetchall():
        if temp[1:] == item[1:]:
            sql = 'delete from news_comment where news_comment_id = %u' % temp[
                0]
            cur.execute(sql)
            conn.commit()
        temp = item
    cur.close()
    conn.close()
예제 #21
0
파일: fix_news.py 프로젝트: WANMAX/crawler
def get_NEWS_ID_LIST():
    conn = get_conn()
    cur = conn.cursor()
    sql = 'select news_id, news_url from news'
    cur.execute(sql)
    for item in cur.fetchall():
        if item in CHANGE_LIST:
            continue
        yield Thread(target=_do_item, args=(item,))
        global count
        print(count)
        if CHANGE_LIST:
            print(CHANGE_LIST)
        count += 1
    cur.close()
    conn.close()
예제 #22
0
def do():
    conn = get_conn()
    cur = conn.cursor()
    sql = 'select news_id, news_title, news_url from news'
    cur.execute(sql)
    data = cur.fetchall()
    for item in data:
        sql = 'delete from news_comment where news_id = %u' % item[0]
        cur.execute(sql)
        conn.commit()
        if re.search('^\\S$', item[1]):
            thread_pool.add(do_item, (item, ))
    thread_pool.start()
    thread_pool.join()
    cur.close()
    conn.close()
예제 #23
0
def do(): 
    conn = get_conn()
    cur = conn.cursor()
    sql = 'select news_id, news_title, news_url from news'
    cur.execute(sql)
    data = cur.fetchall()
    for item in data:
        sql = 'delete from news_comment where news_id = %u'%item[0]
        cur.execute(sql)
        conn.commit()
        if re.search('^\\S$', item[1]):
            thread_pool.add(do_item, (item,))
    thread_pool.start()
    thread_pool.join()
    cur.close()
    conn.close()
예제 #24
0
파일: fix_news.py 프로젝트: WANMAX/crawler
def get_NEWS_ID_LIST():
    conn = get_conn()
    cur = conn.cursor()
    sql = 'select news_id, news_url from news'
    cur.execute(sql)
    for item in cur.fetchall():
        if item in CHANGE_LIST:
            continue
        yield Thread(target=_do_item, args=(item, ))
        global count
        print(count)
        if CHANGE_LIST:
            print(CHANGE_LIST)
        count += 1
    cur.close()
    conn.close()
예제 #25
0
파일: fix_news.py 프로젝트: WANMAX/crawler
def do_with_news():
    conn = get_conn()
    cur = conn.cursor()
    for item in CHANGE_LIST:
        sql = 'select news_url from news where news_id = %u'%item
        if not cur.execute(sql):continue
        news_id = cur.fetchone()[0]
        sql = sql_pat%news_id
        cur.execute(sql)
        if cur.fetchone()[0] == 1:
            continue
        try:
            sql = 'delete from news where news_id = %u'%item
            cur.execute(sql)
            conn.commit()
        except:
            pass
    cur.close()
    conn.close()
예제 #26
0
파일: fix_news.py 프로젝트: WANMAX/crawler
def do_with_news():
    conn = get_conn()
    cur = conn.cursor()
    for item in CHANGE_LIST:
        sql = 'select news_url from news where news_id = %u' % item
        if not cur.execute(sql): continue
        news_id = cur.fetchone()[0]
        sql = sql_pat % news_id
        cur.execute(sql)
        if cur.fetchone()[0] == 1:
            continue
        try:
            sql = 'delete from news where news_id = %u' % item
            cur.execute(sql)
            conn.commit()
        except:
            pass
    cur.close()
    conn.close()
예제 #27
0
파일: fix_news.py 프로젝트: WANMAX/crawler
def _do_item(item):
    conn = get_conn()
    cur = conn.cursor()
    news_id, news_url = item
    if '?' in news_url:
        news_url = news_url[:news_url.find('?')]
    sql = sql_pat % news_url
    if cur.execute(sql) == 1:
        cur.close()
        conn.close()
        return
    cl = [item[0] for item in cur.fetchall()]
    global CHANGE_LIST
    if news_id not in CHANGE_LIST:
        CHANGE_LIST.append(news_id)
        print(news_id)
    for news_id in cl:
        if news_id not in CHANGE_LIST:
            CHANGE_LIST.append(news_id)
            print(news_id)
    cur.close()
    conn.close()
예제 #28
0
파일: fix_news.py 프로젝트: WANMAX/crawler
def _do_item(item):
    conn = get_conn()
    cur = conn.cursor()
    news_id, news_url = item
    if '?' in news_url:
        news_url = news_url[:news_url.find('?')]
    sql = sql_pat%news_url
    if cur.execute(sql) == 1:
        cur.close()
        conn.close()
        return
    cl = [item[0] for item in cur.fetchall()]
    global CHANGE_LIST
    if news_id not in CHANGE_LIST:
        CHANGE_LIST.append(news_id)
        print(news_id)
    for news_id in cl:
        if news_id not in CHANGE_LIST:
            CHANGE_LIST.append(news_id)
            print(news_id)
    cur.close()
    conn.close()