def do_item(path, module_str, title): conn = get_conn() cur = conn.cursor() news_id = get_news_id(module_str, title, cur) sql = 'select count(*) from news_comment where news_id = %u'%news_id cur.execute(sql) if cur.fetchone()[0]: cur.close() conn.close() return else: global count count += 1 print(id, count) module = importlib.import_module(_NEWS_MODULE_PATH%module_str) with open(path, 'r', encoding=_CHARSET, errors='ignore') as file: lines = '' for line in file: lines += line try: comment = module.match_comment(eval(lines)) database.store_comment(comment, news_id, cur) except: pass else: lines = '' cur.close() conn.commit() conn.close()
def store_news(news, store_args): conn = get_conn() cur = conn.cursor() sql = "select * from news where news_website_id = %u and news_title = '%s'" % ( NEWS_DIT[store_args[0]], store_args[1]) if cur.execute(sql): return if news.abstract: abstract = "'%s'" % news.abstract else: abstract = 'Null' if news.source_url: source_url = "'%s'" % news.source_url else: source_url = 'Null' if news.author: author = "'%s'" % news.author else: author = 'Null' if news.news_image: news_image = "'%s'" % news.news_image else: news_image = 'Null' sql = NEWS_PAT % ( NEWS_DIT[store_args[0]], store_args[1], news.url, str(news.comment_url_args).replace("'", "\\'"), store_args[2], abstract, news.content.replace("'", "\\'"), news.source, source_url, author, datetime.datetime(*time.localtime(news.date)[:6]), news_image) cur.execute(sql) conn.commit() cur.close() conn.close()
def do_item(path, module_str, title): conn = get_conn() cur = conn.cursor() news_id = get_news_id(module_str, title, cur) sql = 'select count(*) from news_comment where news_id = %u' % news_id cur.execute(sql) if cur.fetchone()[0]: cur.close() conn.close() return else: global count count += 1 print(id, count) module = importlib.import_module(_NEWS_MODULE_PATH % module_str) with open(path, 'r', encoding=_CHARSET, errors='ignore') as file: lines = '' for line in file: lines += line try: comment = module.match_comment(eval(lines)) database.store_comment(comment, news_id, cur) except: pass else: lines = '' cur.close() conn.commit() conn.close()
def store_news(news, store_args): conn = get_conn() cur = conn.cursor() sql = "select * from news where news_website_id = %u and news_title = '%s'"%(NEWS_DIT[store_args[0]], store_args[1]) if cur.execute(sql):return if news.abstract: abstract = "'%s'"%news.abstract else: abstract = 'Null' if news.source_url: source_url = "'%s'"%news.source_url else: source_url = 'Null' if news.author: author = "'%s'"%news.author else: author = 'Null' if news.news_image: news_image = "'%s'"%news.news_image else: news_image = 'Null' sql = NEWS_PAT%(NEWS_DIT[store_args[0]], store_args[1], news.url, str(news.comment_url_args).replace("'", "\\'"), store_args[2], abstract, news.content.replace("'", "\\'"), news.source, source_url, author, datetime.datetime(*time.localtime(news.date)[:6]), news_image) cur.execute(sql) conn.commit() cur.close() conn.close()
def test_topic(topic_name, weibo_module_str): conn = get_conn() cur = conn.cursor() sql = sql_pat10%topic_name if cur.execute(sql):return False else:return True cur.close() conn.close()
def do_item(topic): conn = get_conn() cur = conn.cursor() sql = sql_pat11%(topic.topic_name, datetime.datetime(*time.localtime(topic.topic_datetime)[:6]), topic.topic_type, topic.topic_introduction.replace("'", "\\'"), str(topic.topic_args).replace("'", "\\'")) cur.execute(sql) conn.commit() cur.close() conn.close()
def get_topic_list(): conn = get_conn() cur = conn.cursor() cur.execute(sql_pat) for item in cur.fetchall(): sql = sql_pat2 % item[0] if not cur.execute(sql): yield item cur.close() conn.close()
def store_comments(comments, store_args): if not comments: return conn = get_conn() cur = conn.cursor() for comment in comments: _store_comment(comment, store_args, cur) conn.commit() cur.close() conn.close()
def store_reposts(reposts, store_args): if not reposts: return conn = get_conn() cur = conn.cursor() for repost in reposts: _store_repost(repost, store_args, cur) conn.commit() cur.close() conn.close()
def store_topics(topics, store_args): if not topics:return conn = get_conn() cur = conn.cursor() for topic in topics: sql = sql_pat11%(topic.topic_name, datetime.datetime(*time.localtime(topic.topic_datetime)[:6]), topic.topic_type, topic.topic_introduction.replace("'", "\\'"), str(topic.topic_args).replace("'", "\\'")) cur.execute(sql) conn.commit() cur.close() conn.close()
def get_topics(weibo_module_str): conn = get_conn() cur = conn.cursor() cur.execute(sql_pat12) for item in cur.fetchall(): sql = sql_pat13%item[0] cur.execute(sql) item = cur.fetchone() yield match_topic(item) cur.close() conn.close()
def get_result_mids(store_args): conn = get_conn() cur = conn.cursor() sql = HOTSPOT_ID_SQL_PAT%store_args[1] cur.execute(sql) topic_id = cur.fetchone()[0] sql = "select weibo_id from weibo_hotspot_relative where weibo_hotspot_id = %u"%topic_id cur.execute(sql) for mid in cur.fetchall(): yield mid[0] cur.close() conn.close()
def work(): conn = get_conn() cur = conn.cursor() sql = "select news_id, news_author from news where news_author != ''" cur.execute(sql) sql_pat = "update news set news_author = Null and news_image = '%s' where news_id = %u" for item in cur.fetchall(): cur.execute(sql_pat%(item[1], item[0])) conn.commit() print(item) cur.close() conn.close()
def do_item(item): conn = get_conn() cur = conn.cursor() title = Soup(urlopen_and_read(item[2]).decode(NEWS_CHARSET, 'ignore')).title.text title = re.sub('(_新闻)_腾讯网', '', title) title = re.sub(NAME_PAT, '', title) sql = SQL_PAT%(title, item[0]) cur.execute(sql) conn.commit() cur.close() conn.close() print(title)
def store_comments(comments, store_args): conn = get_conn() cur = conn.cursor() sql = NEWS_ID_SQL%(NEWS_DIT[store_args[0]], store_args[1], store_args[2]) cur.execute(sql) news_id = cur.fetchone() if not news_id: return for comment in comments: _store_comment(comment, news_id[0], cur) conn.commit() cur.close() conn.close()
def store_comments(comments, store_args): conn = get_conn() cur = conn.cursor() sql = NEWS_ID_SQL % (NEWS_DIT[store_args[0]], store_args[1], store_args[2]) cur.execute(sql) news_id = cur.fetchone() if not news_id: return for comment in comments: _store_comment(comment, news_id[0], cur) conn.commit() cur.close() conn.close()
def do_item(item): conn = get_conn() cur = conn.cursor() title = Soup(urlopen_and_read(item[2]).decode(NEWS_CHARSET, 'ignore')).title.text title = re.sub('(_新闻)_腾讯网', '', title) title = re.sub(NAME_PAT, '', title) sql = SQL_PAT % (title, item[0]) cur.execute(sql) conn.commit() cur.close() conn.close() print(title)
def store_weibos(weibos, store_args): if not weibos: return conn = get_conn() cur = conn.cursor() sql = HOTSPOT_ID_SQL_PAT%store_args[1] cur.execute(sql) topic_id = cur.fetchone()[0] for weibo in weibos: _store_weibo(weibo, topic_id, cur) conn.commit() cur.close() conn.close()
def do_with_comment(item): conn = get_conn() cur = conn.cursor() sql = 'select * from news_comment where news_id = %u order by news_comment_datetime'%item if not cur.execute(sql):return temp = () for item in cur.fetchall(): if temp[1:] == item[1:]: sql = 'delete from news_comment where news_comment_id = %u'%temp[0] cur.execute(sql) conn.commit() temp = item cur.close() conn.close()
def do_with_comment(item): conn = get_conn() cur = conn.cursor() sql = 'select * from news_comment where news_id = %u order by news_comment_datetime' % item if not cur.execute(sql): return temp = () for item in cur.fetchall(): if temp[1:] == item[1:]: sql = 'delete from news_comment where news_comment_id = %u' % temp[ 0] cur.execute(sql) conn.commit() temp = item cur.close() conn.close()
def get_NEWS_ID_LIST(): conn = get_conn() cur = conn.cursor() sql = 'select news_id, news_url from news' cur.execute(sql) for item in cur.fetchall(): if item in CHANGE_LIST: continue yield Thread(target=_do_item, args=(item,)) global count print(count) if CHANGE_LIST: print(CHANGE_LIST) count += 1 cur.close() conn.close()
def do(): conn = get_conn() cur = conn.cursor() sql = 'select news_id, news_title, news_url from news' cur.execute(sql) data = cur.fetchall() for item in data: sql = 'delete from news_comment where news_id = %u' % item[0] cur.execute(sql) conn.commit() if re.search('^\\S$', item[1]): thread_pool.add(do_item, (item, )) thread_pool.start() thread_pool.join() cur.close() conn.close()
def do(): conn = get_conn() cur = conn.cursor() sql = 'select news_id, news_title, news_url from news' cur.execute(sql) data = cur.fetchall() for item in data: sql = 'delete from news_comment where news_id = %u'%item[0] cur.execute(sql) conn.commit() if re.search('^\\S$', item[1]): thread_pool.add(do_item, (item,)) thread_pool.start() thread_pool.join() cur.close() conn.close()
def get_NEWS_ID_LIST(): conn = get_conn() cur = conn.cursor() sql = 'select news_id, news_url from news' cur.execute(sql) for item in cur.fetchall(): if item in CHANGE_LIST: continue yield Thread(target=_do_item, args=(item, )) global count print(count) if CHANGE_LIST: print(CHANGE_LIST) count += 1 cur.close() conn.close()
def do_with_news(): conn = get_conn() cur = conn.cursor() for item in CHANGE_LIST: sql = 'select news_url from news where news_id = %u'%item if not cur.execute(sql):continue news_id = cur.fetchone()[0] sql = sql_pat%news_id cur.execute(sql) if cur.fetchone()[0] == 1: continue try: sql = 'delete from news where news_id = %u'%item cur.execute(sql) conn.commit() except: pass cur.close() conn.close()
def do_with_news(): conn = get_conn() cur = conn.cursor() for item in CHANGE_LIST: sql = 'select news_url from news where news_id = %u' % item if not cur.execute(sql): continue news_id = cur.fetchone()[0] sql = sql_pat % news_id cur.execute(sql) if cur.fetchone()[0] == 1: continue try: sql = 'delete from news where news_id = %u' % item cur.execute(sql) conn.commit() except: pass cur.close() conn.close()
def _do_item(item): conn = get_conn() cur = conn.cursor() news_id, news_url = item if '?' in news_url: news_url = news_url[:news_url.find('?')] sql = sql_pat % news_url if cur.execute(sql) == 1: cur.close() conn.close() return cl = [item[0] for item in cur.fetchall()] global CHANGE_LIST if news_id not in CHANGE_LIST: CHANGE_LIST.append(news_id) print(news_id) for news_id in cl: if news_id not in CHANGE_LIST: CHANGE_LIST.append(news_id) print(news_id) cur.close() conn.close()
def _do_item(item): conn = get_conn() cur = conn.cursor() news_id, news_url = item if '?' in news_url: news_url = news_url[:news_url.find('?')] sql = sql_pat%news_url if cur.execute(sql) == 1: cur.close() conn.close() return cl = [item[0] for item in cur.fetchall()] global CHANGE_LIST if news_id not in CHANGE_LIST: CHANGE_LIST.append(news_id) print(news_id) for news_id in cl: if news_id not in CHANGE_LIST: CHANGE_LIST.append(news_id) print(news_id) cur.close() conn.close()