Ejemplo n.º 1
0
def main(topic, start_ts, end_ts):
    #在topic_status中获取还未进行计算的话题
    topics = _topic_not_calc(status='-1', module='i_news')
    topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\
                                                             TopicStatus.start==start_ts ,\
                                                             TopicStatus.end==end_ts ,\
                                                             TopicStatus.module=='i_news' ,\
                                                             TopicStatus.status==-1).first()
    if topic_status_info:
        topic_id = topic_status_info.id
        start_ts = topic_status_info.start
        end_ts = topic_status_info.end
        topicname = topic_status_info.topic
        db_date = topic_status_info.db_date
            
        _update_topic_status2Computing(topicname, start_ts, end_ts, db_date, 'i_news')
        print 'update_status'
    
        #mongodb中topic对应的collection
        print 'get_dynamic_mongo'
        news_collection , comment_collection = get_dynamic_mongo(topicname, start_ts, end_ts)
        #早期参与者
        print 'start compute early_join'
        early_join(topicname, start_ts, end_ts, news_collection)
        #趋势发起人
        print 'start compute trend_user'
        trend_user(topicname, start_ts, end_ts, news_collection, comment_collection)

        print 'update_topic_end'
        _update_topic_status2Completed(topicname, start_ts, end_ts, db_date, 'i_news') 
Ejemplo n.º 2
0
def main():
    topics = _topic_not_calc()

    if topics and len(topics):
    	topic = topics[0]
        
        start_ts = topic.start
        end_ts = topic.end
        db_date = topic.db_date
        topicname = topic.topic

        _update_topic_status2Computing(topicname, start_ts, end_ts, db_date)
        topic_id = acquire_topic_id(topicname, start_ts, end_ts)
        windowsize = (end_ts - start_ts) / Day
        date = ts2datetime(end_ts)

        if windowsize > 7:
            degree_rank(TOPK, date, topic_id, windowsize)
        else:
            pagerank_rank(TOPK, date, topic_id, windowsize)

        topic_id = int(topic_id)
        windowsize = int(windowsize)

        if not topic_id:
            gexf = ''
        else:
            gexf = make_network_graph(date, topic_id, topicname, windowsize)

        save_gexf_results(topicname, date, windowsize, gexf)

        _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)
Ejemplo n.º 3
0
def main():
    topics = _topic_not_calc()
    if topics and len(topics):
    	topic = topics[0]

        start_ts = topic.start
        end_ts = topic.end
        db_date = topic.db_date
        topicname = topic.topic
        print time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())), topicname.encode('utf-8'), 'start'
        _update_topic_status2Computing(topicname, start_ts, end_ts, db_date)
        result  = calculate(topicname,start_ts,end_ts)
        print time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())), topicname.encode('utf-8'), result
        _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)
Ejemplo n.º 4
0
def _check_run_notcustomize_topic(during=Fifteenminutes):
    '''定时执行非定制话题表中status为0的话题数量、关键词、关键微博计算
    '''

    topics = _topic_not_calc()
    if topics and len(topics):
        topic = topics[0]
        start_ts = topic.start
        end_ts = topic.end
        db_date = topic.db_date
        topicname = topic.topic
        
        # update status to 0
        _update_topic_status2Computing(topicname, start_ts, end_ts, db_date)
        
        print topicname.encode('utf-8'),  ' run realtime job from %s to %s ' % (start_ts, end_ts)
        sentimentRealTimeTopic(topicname, start_ts, end_ts+24*3600)

        # update status to 1
        _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)
Ejemplo n.º 5
0
def main(topic, start_ts, end_ts):
    '''
    topics = _topic_not_calc() # topics=[{id:x,module:x,status:x,topic:x,start:x,end:x,db_date:x}]
    '''
    topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\
                                                             TopicStatus.start==start_ts ,\
                                                             TopicStatus.end==end_ts ,\
                                                             TopicStatus.module=='identify' ,\
                                                             TopicStatus.status==-1).first()
    if topic_status_info:
        #topic = topics[0] # 每次只计算一个----为了做一个缓冲,每个n时间才计算一个
        print 'topic_id', topic_status_info.id
        start_ts = topic_status_info.start
        end_ts = topic_status_info.end
        db_date = topic_status_info.db_date
        topicname = topic
        _update_topic_status2Computing(topicname, start_ts, end_ts, db_date)
        print 'update_status'
        topic_id = acquire_topic_id(topicname, start_ts, end_ts) # 重新获取id是因为TopicStatus中id是自增加的,进行更新后,id就不是原来的那一个了
        windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小
        date = ts2datetime(end_ts)

        print 'start topic2xapianid'
        topic_xapian_id = weibo_topic2xapian(topicname, start_ts, end_ts)
        print 'topic_xapian_id:', topic_xapian_id
        
        print 'start compute first_nodes'
        start_date = ts2datetime(start_ts) # used to compute the first user
        get_first_node(topicname, start_date, date, windowsize, topic_xapian_id)
        print 'end compute first_nodes'
#
        print 'start make network'
        max_size = MAX_SIZE
        attribute_add = True
        g, gg, new_attribute_dict, ds_dg, ds_udg, ds_new_attribute_dict = make_network(topicname, date, windowsize, topic_xapian_id, max_size, attribute_add)
        print 'write gexf file'
        real_topic_id = acquire_real_topic_id(topicname, start_ts, end_ts)
        if not real_topic_id:
            print 'the topic not exist'
            return None
        key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize) 
        print 'gexf_file:', str(GRAPH_PATH)+str(key)+'_g_graph.gexf'
        nx.write_gexf(g, str(GRAPH_PATH) + str(key) + '_g_graph.gexf')
        nx.write_gexf(gg, str(GRAPH_PATH) + str(key) + '_gg_graph.gexf')
        nx.write_gexf(ds_dg, str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf')
        nx.write_gexf(ds_udg, str(GRAPH_PATH) + str(key) + '_ds_udg_graph.gexf')
        save_attribute_dict(new_attribute_dict, 'g')
        save_attribute_dict(ds_new_attribute_dict, 'ds_g')
        print 'end make network'

        print 'start PageRank'
        all_uid_pr, ds_all_uid_pr, data, ds_data = pagerank_rank(TOPK, date, topic_id, windowsize, topicname, real_topic_id)
        print 'len(all_uid_pr):', len(all_uid_pr)
        print 'end PageRank'

        print 'start make network graph'
        topic_id = int(topic_id)
        windowsize = int(windowsize)
        if not topic_id: # 待删
            gexf = ''
        else:
            gexf, ds_gexf = make_network_graph(date, topic_id, topicname, windowsize, all_uid_pr, data, ds_all_uid_pr, ds_data, real_topic_id)
        print 'save gexf'
        save_gexf_results(topicname, date, windowsize, gexf, gexf_type)
        save_gexf_results(topicname, date, windowsize, ds_gexf, ds_gexf_type)
        print 'start fu_tr'
        get_interval_count(topicname, date, windowsize, topic_xapian_id)
        print 'update_topic_end'
        _update_topic_status2Completed(topicname, start_ts, end_ts, db_date) 
Ejemplo n.º 6
0
def main(topic, start_ts, end_ts):
    '''
    topics = _topic_not_calc() # topics=[{id:x,module:x,status:x,topic:x,start:x,end:x,db_date:x}]
    '''
    topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\
                                                             TopicStatus.start==start_ts ,\
                                                             TopicStatus.end==end_ts ,\
                                                             TopicStatus.module=='identify' ,\
                                                             TopicStatus.status==-1).first()
    if topic_status_info:
        #topic = topics[0] # 每次只计算一个----为了做一个缓冲,每个n时间才计算一个
        print 'topic_id', topic_status_info.id
        start_ts = topic_status_info.start
        end_ts = topic_status_info.end
        db_date = topic_status_info.db_date
        topicname = topic
        _update_topic_status2Computing(topicname, start_ts, end_ts, db_date)
        print 'update_status'
        topic_id = acquire_topic_id(
            topicname, start_ts,
            end_ts)  # 重新获取id是因为TopicStatus中id是自增加的,进行更新后,id就不是原来的那一个了
        windowsize = (end_ts - start_ts) / Day  # 确定时间跨度的大小
        date = ts2datetime(end_ts)

        print 'start topic2xapianid'
        topic_xapian_id = weibo_topic2xapian(topicname, start_ts, end_ts)
        print 'topic_xapian_id:', topic_xapian_id

        print 'start compute first_nodes'
        start_date = ts2datetime(start_ts)  # used to compute the first user
        get_first_node(topicname, start_date, date, windowsize,
                       topic_xapian_id)
        print 'end compute first_nodes'
        #
        print 'start make network'
        max_size = MAX_SIZE
        attribute_add = True
        g, gg, new_attribute_dict, ds_dg, ds_udg, ds_new_attribute_dict = make_network(
            topicname, date, windowsize, topic_xapian_id, max_size,
            attribute_add)
        print 'write gexf file'
        real_topic_id = acquire_real_topic_id(topicname, start_ts, end_ts)
        if not real_topic_id:
            print 'the topic not exist'
            return None
        key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize)
        print 'gexf_file:', str(GRAPH_PATH) + str(key) + '_g_graph.gexf'
        nx.write_gexf(g, str(GRAPH_PATH) + str(key) + '_g_graph.gexf')
        nx.write_gexf(gg, str(GRAPH_PATH) + str(key) + '_gg_graph.gexf')
        nx.write_gexf(ds_dg, str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf')
        nx.write_gexf(ds_udg,
                      str(GRAPH_PATH) + str(key) + '_ds_udg_graph.gexf')
        save_attribute_dict(new_attribute_dict, 'g')
        save_attribute_dict(ds_new_attribute_dict, 'ds_g')
        print 'end make network'

        print 'start PageRank'
        all_uid_pr, ds_all_uid_pr, data, ds_data = pagerank_rank(
            TOPK, date, topic_id, windowsize, topicname, real_topic_id)
        print 'len(all_uid_pr):', len(all_uid_pr)
        print 'end PageRank'

        print 'start make network graph'
        topic_id = int(topic_id)
        windowsize = int(windowsize)
        if not topic_id:  # 待删
            gexf = ''
        else:
            gexf, ds_gexf = make_network_graph(date, topic_id, topicname,
                                               windowsize, all_uid_pr, data,
                                               ds_all_uid_pr, ds_data,
                                               real_topic_id)
        print 'save gexf'
        save_gexf_results(topicname, date, windowsize, gexf, gexf_type)
        save_gexf_results(topicname, date, windowsize, ds_gexf, ds_gexf_type)
        print 'start fu_tr'
        get_interval_count(topicname, date, windowsize, topic_xapian_id)
        print 'update_topic_end'
        _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)