def main(topic, start_time, end_time): start_ts = datetime2ts(start_time) end_ts = datetime2ts(end_time) + 24 * 3600 ## datestrlist = [] ## for datestr in datestr_list: ## datestr_new = datestr.replace('-', '') ## datestrlist.append(datestr_new) query_dict = { 'timestamp': { '$gt': start_ts, '$lt': end_ts }, } ## t = topic.split(',') ## for ctopic in t: ## query_dict['$and'].append({'topics': ctopic}) start = time.time() ## statuses_search = getXapianWeiboByDuration(datestrlist) ## count, get_results = statuses_search.search(query=query_dict, fields=fields_list) topic_id = getTopicByName(topic)['_id'] xapian_search_weibo = getXapianWeiboByTopic(topic_id) count, get_results = xapian_search_weibo.search(query=query_dict, fields=fields_list) end = time.time() #print count print 'search takes %s s' % (end - start) weibo = [] for r in get_results(): weibo.append([ r['_id'], r['user'], r['text'].encode('utf-8'), r['timestamp'], r['reposts_count'], r['comments_count'] ]) ad_main(topic, weibo, '0914', 10) #开始进行微博数据的观点挖掘
mtype_count[v] = [end_ts, count] mtype_kcount[v] = [end_ts, kcount] mtype_weibo[v] = [end_ts, top_ws] save_pc_results(topic, mtype_count, during) save_kc_results(topic, mtype_kcount, during, k_limit) save_ws_results(topic, mtype_weibo, during, w_limit) if __name__ == '__main__': topic = sys.argv[ 1] # u'香港自由行' u'张灵甫遗骨疑似被埋羊圈' u'高校思想宣传' u'高校宣传思想工作' u'外滩踩踏' 'APEC' u'全军政治工作会议' start_date = sys.argv[2] # '2015-02-23' end_date = sys.argv[3] # '2015-03-02' topic = topic.decode('utf-8') topic_id = getTopicByName(topic)['_id'] start_ts = datetime2ts(start_date) end_ts = datetime2ts(end_date) duration = Fifteenminutes xapian_search_weibo = getXapianWeiboByTopic(topic_id) print 'topic: ', topic.encode('utf8'), 'from %s to %s' % (start_ts, end_ts) propagateCronTopic(topic, xapian_search_weibo, start_ts, end_ts, during=duration)
mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \ max_offset=w_limit, mset_direct=True) kcount = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=k_limit) top_ws = top_weibos(results, top=w_limit) mtype_count[v] = [end_ts, count] mtype_kcount[v] = [end_ts, kcount] mtype_weibo[v] = [end_ts, top_ws] save_pc_results(topic, mtype_count, during) save_kc_results(topic, mtype_kcount, during, k_limit) save_ws_results(topic, mtype_weibo, during, w_limit) if __name__ == '__main__': topic = sys.argv[1] # u'香港自由行' u'张灵甫遗骨疑似被埋羊圈' u'高校思想宣传' u'高校宣传思想工作' u'外滩踩踏' 'APEC' u'全军政治工作会议' start_date = sys.argv[2] # '2015-02-23' end_date = sys.argv[3] # '2015-03-02' topic = topic.decode('utf-8') topic_id = getTopicByName(topic)['_id'] start_ts = datetime2ts(start_date) end_ts = datetime2ts(end_date) duration = Fifteenminutes xapian_search_weibo = getXapianWeiboByTopic(topic_id) print 'topic: ', topic.encode('utf8'), 'from %s to %s' % (start_ts, end_ts) propagateCronTopic(topic, xapian_search_weibo, start_ts, end_ts, during=duration)
location_dict["mid"] = r["_id"] location_dict["topic"] = topic location_dict["ts"] = r["timestamp"] location_dict["origin_location"] = origin_location.split("\t")[1] location_dict["repost_location"] = None return location_dict return None if __name__ == "__main__": START_TS = datetime2ts("2015-03-02") END_TS = datetime2ts("2015-03-15") topic = u"两会2015" topic_id = getTopicByName(topic)["_id"] print "topic: ", topic.encode("utf8") print topic_id, START_TS, END_TS xapian_search = getXapianWeiboByTopic(topic_id) repost_search(topic, START_TS, END_TS) """ item_exist = db.session.query(CityRepost).filter(CityRepost.topic == topic).all() if item_exist: for item in item_exist: db.session.delete(item) db.session.commit() print 'commited' """