Exemple #1
0
def main():
    topics = _topic_not_calc()

    if topics and len(topics):
    	topic = topics[0]
        
        start_ts = topic.start
        end_ts = topic.end
        db_date = topic.db_date
        topicname = topic.topic

        _update_topic_status2Computing(topicname, start_ts, end_ts, db_date)
        topic_id = acquire_topic_id(topicname, start_ts, end_ts)
        windowsize = (end_ts - start_ts) / Day
        date = ts2datetime(end_ts)

        if windowsize > 7:
            degree_rank(TOPK, date, topic_id, windowsize)
        else:
            pagerank_rank(TOPK, date, topic_id, windowsize)

        topic_id = int(topic_id)
        windowsize = int(windowsize)

        if not topic_id:
            gexf = ''
        else:
            gexf = make_network_graph(date, topic_id, topicname, windowsize)

        save_gexf_results(topicname, date, windowsize, gexf)

        _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)
def compute_network(topic, start_ts, end_ts):
    '''
    topics = _topic_not_calc() # topics=[{id:x,module:x,status:x,topic:x,start:x,end:x,db_date:x}]
    '''
    '''
    topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\
                                                             TopicStatus.start==start_ts ,\
                                                             TopicStatus.end==end_ts ,\
                                                             TopicStatus.module=='identify' ,\
                                                             TopicStatus.status==-1).first()
    if topic_status_info:
        #topic = topics[0] # 每次只计算一个----为了做一个缓冲,每个n时间才计算一个
        print 'topic_id', topic_status_info.id
        start_ts = topic_status_info.start
        end_ts = topic_status_info.end
        db_date = topic_status_info.db_date
        topicname = topic
        _update_topic_status2Computing(topicname, start_ts, end_ts, db_date)
        print 'update_status'
        topic_id = acquire_topic_id(topicname, start_ts, end_ts) # 重新获取id是因为TopicStatus中id是自增加的,进行更新后,id就不是原来的那一个了
        windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小
        date = ts2datetime(end_ts)
        '''

    #改动的地方从es表中读取话题的拼音也就是表名

    if True:
        print end_ts, type(end_ts)
        #topicname = topic
        date = ts2datetime(end_ts)
        windowsize = (end_ts - start_ts) / Day  # 确定时间跨度的大小
        topic_pinyin_name = topic
        # print 'start topic_name_transfer'   #把汉字的时间名换成拼音 奥运会>aoyunhui
        # topic_pinyin_name = weibo_TopicNameTransfer(topicname, start_ts, end_ts)
        # print topic_pinyin_name

        print 'start compute first_nodes'
        #start_date = ts2datetime(start_ts) # used to compute the first user
        get_first_node(topic_pinyin_name, start_ts, end_ts, windowsize, date)
        print 'end compute first_nodes'

        print 'start make network'
        max_size = MAX_SIZE
        attribute_add = True
        g, gg, new_attribute_dict = make_network(topic_pinyin_name, date,
                                                 windowsize, max_size,
                                                 attribute_add)
        #print g,gg,new_attribute_dict

        print 'write gexf file'
        #real_topic_id = acquire_real_topic_id(topicname, start_ts, end_ts)
        real_topic_id = topic_pinyin_name
        if not real_topic_id:
            print 'the topic not exist'
            return None
        key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize)
        print 'gexf_file:', str(GRAPH_PATH) + str(key) + '_g_graph.gexf'
        #fh = open(str(GRAPH_PATH) + str(key) + '_g_graph.gexf', 'w+')
        #fh.close()
        #fh = open(str(GRAPH_PATH) + str(key) + '_gg_graph.gexf', 'w+')
        #fh.close()
        nx.write_gexf(g, str(GRAPH_PATH) + str(key) + '_g_graph.gexf')
        nx.write_gexf(gg, str(GRAPH_PATH) + str(key) + '_gg_graph.gexf')
        #nx.write_gexf(ds_dg, str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf')
        #nx.write_gexf(ds_udg, str(GRAPH_PATH) + str(key) + '_ds_udg_graph.gexf')
        #这里要改一下 不用SSDB了
        save_attribute_dict(new_attribute_dict, 'g')
        #save_attribute_dict(ds_new_attribute_dict, 'ds_g')
        print 'end make network'

        print 'start PageRank'
        all_uid_pr, data = pagerank_rank(TOPK, date, windowsize,
                                         topic_pinyin_name)
        print 'len(all_uid_pr):', len(all_uid_pr)
        print 'end PageRank'

        print 'start make network graph'
        #topic_id = int(topic_id)
        windowsize = int(windowsize)
        if not topic_pinyin_name:  # 待删
            gexf = ''
        else:
            gexf = make_network_graph(date, topic_pinyin_name, windowsize,
                                      all_uid_pr, data)
            #gexf = json.dumps(gexf)
        print 'save gexf'
        #print '*************************'*10
        #print gexf
        #print '*************************'*10
        save_gexf_results(topic_pinyin_name, date, windowsize, gexf, gexf_type)

        print 'start fu_tr'
        get_interval_count(topic_pinyin_name, date, windowsize)
        print 'update_topic_end'
        db_date = date
        _update_topic_status2Completed(topic_pinyin_name, start_ts, end_ts,
                                       db_date)
        print 'all done!'
def main(topic, start_ts, end_ts):
    '''
    topics = _topic_not_calc() # topics=[{id:x,module:x,status:x,topic:x,start:x,end:x,db_date:x}]
    '''
    topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\
                                                             TopicStatus.start==start_ts ,\
                                                             TopicStatus.end==end_ts ,\
                                                             TopicStatus.module=='identify' ,\
                                                             TopicStatus.status==-1).first()
    if topic_status_info:
        #topic = topics[0] # 每次只计算一个----为了做一个缓冲,每个n时间才计算一个
        print 'topic_id', topic_status_info.id
        start_ts = topic_status_info.start
        end_ts = topic_status_info.end
        db_date = topic_status_info.db_date
        topicname = topic
        _update_topic_status2Computing(topicname, start_ts, end_ts, db_date)
        print 'update_status'
        topic_id = acquire_topic_id(topicname, start_ts, end_ts) # 重新获取id是因为TopicStatus中id是自增加的,进行更新后,id就不是原来的那一个了
        windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小
        date = ts2datetime(end_ts)

        print 'start topic2xapianid'
        topic_xapian_id = weibo_topic2xapian(topicname, start_ts, end_ts)
        print 'topic_xapian_id:', topic_xapian_id
        
        print 'start compute first_nodes'
        start_date = ts2datetime(start_ts) # used to compute the first user
        get_first_node(topicname, start_date, date, windowsize, topic_xapian_id)
        print 'end compute first_nodes'
#
        print 'start make network'
        max_size = MAX_SIZE
        attribute_add = True
        g, gg, new_attribute_dict, ds_dg, ds_udg, ds_new_attribute_dict = make_network(topicname, date, windowsize, topic_xapian_id, max_size, attribute_add)
        print 'write gexf file'
        real_topic_id = acquire_real_topic_id(topicname, start_ts, end_ts)
        if not real_topic_id:
            print 'the topic not exist'
            return None
        key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize) 
        print 'gexf_file:', str(GRAPH_PATH)+str(key)+'_g_graph.gexf'
        nx.write_gexf(g, str(GRAPH_PATH) + str(key) + '_g_graph.gexf')
        nx.write_gexf(gg, str(GRAPH_PATH) + str(key) + '_gg_graph.gexf')
        nx.write_gexf(ds_dg, str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf')
        nx.write_gexf(ds_udg, str(GRAPH_PATH) + str(key) + '_ds_udg_graph.gexf')
        save_attribute_dict(new_attribute_dict, 'g')
        save_attribute_dict(ds_new_attribute_dict, 'ds_g')
        print 'end make network'

        print 'start PageRank'
        all_uid_pr, ds_all_uid_pr, data, ds_data = pagerank_rank(TOPK, date, topic_id, windowsize, topicname, real_topic_id)
        print 'len(all_uid_pr):', len(all_uid_pr)
        print 'end PageRank'

        print 'start make network graph'
        topic_id = int(topic_id)
        windowsize = int(windowsize)
        if not topic_id: # 待删
            gexf = ''
        else:
            gexf, ds_gexf = make_network_graph(date, topic_id, topicname, windowsize, all_uid_pr, data, ds_all_uid_pr, ds_data, real_topic_id)
        print 'save gexf'
        save_gexf_results(topicname, date, windowsize, gexf, gexf_type)
        save_gexf_results(topicname, date, windowsize, ds_gexf, ds_gexf_type)
        print 'start fu_tr'
        get_interval_count(topicname, date, windowsize, topic_xapian_id)
        print 'update_topic_end'
        _update_topic_status2Completed(topicname, start_ts, end_ts, db_date) 
Exemple #4
0
def compute_network(topic, start_ts, end_ts):
    '''
    topics = _topic_not_calc() # topics=[{id:x,module:x,status:x,topic:x,start:x,end:x,db_date:x}]
    '''
    '''
    topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\
                                                             TopicStatus.start==start_ts ,\
                                                             TopicStatus.end==end_ts ,\
                                                             TopicStatus.module=='identify' ,\
                                                             TopicStatus.status==-1).first()
    if topic_status_info:
        #topic = topics[0] # 每次只计算一个----为了做一个缓冲,每个n时间才计算一个
        print 'topic_id', topic_status_info.id
        start_ts = topic_status_info.start
        end_ts = topic_status_info.end
        db_date = topic_status_info.db_date
        topicname = topic
        _update_topic_status2Computing(topicname, start_ts, end_ts, db_date)
        print 'update_status'
        topic_id = acquire_topic_id(topicname, start_ts, end_ts) # 重新获取id是因为TopicStatus中id是自增加的,进行更新后,id就不是原来的那一个了
        windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小
        date = ts2datetime(end_ts)
        '''

    #改动的地方从es表中读取话题的拼音也就是表名
    network_results = {}

    if True:
        print end_ts, type(end_ts)
        #topicname = topic
        date = ts2datetime(end_ts)
        windowsize = (end_ts - start_ts) / Day  # 确定时间跨度的大小
        topic_pinyin_name = topic
        # print 'start topic_name_transfer'   #把汉字的时间名换成拼音 奥运会>aoyunhui
        # topic_pinyin_name = weibo_TopicNameTransfer(topicname, start_ts, end_ts)
        # print topic_pinyin_name

        print 'start compute first_nodes'
        #start_date = ts2datetime(start_ts) # used to compute the first user
        first_node_results = get_first_node(topic_pinyin_name, start_ts,
                                            end_ts, windowsize, date)
        print 'end compute first_nodes'

        network_results['first_node_results'] = first_node_results

        print 'start make network'
        max_size = MAX_SIZE
        attribute_add = True
        g, gg, new_attribute_dict = make_network(topic_pinyin_name, date,
                                                 windowsize, max_size,
                                                 attribute_add)
        #print g,gg,new_attribute_dict

        network_results['new_attribute_dict'] = new_attribute_dict

        print 'write gexf file'
        #real_topic_id = acquire_real_topic_id(topicname, start_ts, end_ts)
        real_topic_id = topic_pinyin_name
        if not real_topic_id:
            print 'the topic not exist'
            return None
        key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize)
        print 'gexf_file:', str(GRAPH_PATH) + str(key) + '_g_graph.gexf'
        #fh = open(str(GRAPH_PATH) + str(key) + '_g_graph.gexf', 'w+')
        #fh.close()
        #fh = open(str(GRAPH_PATH) + str(key) + '_gg_graph.gexf', 'w+')
        #fh.close()
        nx.write_gexf(g, str(GRAPH_PATH) + str(key) + '_g_graph.gexf')
        nx.write_gexf(gg, str(GRAPH_PATH) + str(key) + '_gg_graph.gexf')
        #nx.write_gexf(ds_dg, str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf')
        #nx.write_gexf(ds_udg, str(GRAPH_PATH) + str(key) + '_ds_udg_graph.gexf')
        #这里要改一下 不用SSDB了
        #save_attribute_dict(new_attribute_dict, 'g')
        #save_attribute_dict(ds_new_attribute_dict, 'ds_g')
        print 'end make network'

        print 'start PageRank'
        all_uid_pr, data_dict, sorted_uids = pagerank_rank(
            TOPK, date, windowsize, topic_pinyin_name)
        network_results['pagerank'] = {}
        network_results['pagerank']['all_uid_pr'] = all_uid_pr
        network_results['pagerank']['sorted_uids'] = sorted_uids
        print 'len(all_uid_pr):', len(all_uid_pr)
        print 'end PageRank'

        print 'start make network graph'
        #topic_id = int(topic_id)
        windowsize = int(windowsize)
        if not topic_pinyin_name:  # 待删
            gexf = ''
        else:
            gexf= make_network_graph(date, topic_pinyin_name, windowsize, all_uid_pr, data_dict,sorted_uids,\
                new_attribute_dict)
            #gexf = json.dumps(gexf)
        print 'save gexf'
        #print '*************************'*10
        #print gexf
        #print '*************************'*10
        long_gexf = save_gexf_results(topic_pinyin_name, date, windowsize,
                                      gexf, gexf_type)

        network_results['long_gexf'] = long_gexf

        print 'start fu_tr'
        maker_results, pusher_results = get_interval_count(
            topic_pinyin_name, date, windowsize)
        print 'update_topic_end'
        #db_date = date
        #_update_topic_status2Completed(topic_pinyin_name, start_ts, end_ts, db_date)
        network_results['maker_results'] = maker_results
        network_results['pusher_results'] = pusher_results

        index_name = index_event_analysis_results
        index_type = type_event_analysis_results

        network_results = json.dumps(network_results)

        id = topic

        try:
            tem_exist = weibo_es.get(index=index_name,
                                     doc_type=index_type,
                                     id=id)['_source']
            weibo_es.update(index=index_name,
                            doc_type=index_type,
                            id=id,
                            body={'doc': {
                                'network_results': network_results
                            }})
        except Exception, e:
            weibo_es.index(index=index_name,
                           doc_type=index_type,
                           id=id,
                           body={'network_results': network_results})

        print 'network_results save done!!'

        print 'all done!'
def main(topic, start_ts, end_ts):
    '''
    topics = _topic_not_calc() # topics=[{id:x,module:x,status:x,topic:x,start:x,end:x,db_date:x}]
    '''
    topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\
                                                             TopicStatus.start==start_ts ,\
                                                             TopicStatus.end==end_ts ,\
                                                             TopicStatus.module=='identify' ,\
                                                             TopicStatus.status==-1).first()
    if topic_status_info:
        #topic = topics[0] # 每次只计算一个----为了做一个缓冲,每个n时间才计算一个
        print 'topic_id', topic_status_info.id
        start_ts = topic_status_info.start
        end_ts = topic_status_info.end
        db_date = topic_status_info.db_date
        topicname = topic
        _update_topic_status2Computing(topicname, start_ts, end_ts, db_date)
        print 'update_status'
        topic_id = acquire_topic_id(
            topicname, start_ts,
            end_ts)  # 重新获取id是因为TopicStatus中id是自增加的,进行更新后,id就不是原来的那一个了
        windowsize = (end_ts - start_ts) / Day  # 确定时间跨度的大小
        date = ts2datetime(end_ts)

        print 'start topic2xapianid'
        topic_xapian_id = weibo_topic2xapian(topicname, start_ts, end_ts)
        print 'topic_xapian_id:', topic_xapian_id

        print 'start compute first_nodes'
        start_date = ts2datetime(start_ts)  # used to compute the first user
        get_first_node(topicname, start_date, date, windowsize,
                       topic_xapian_id)
        print 'end compute first_nodes'
        #
        print 'start make network'
        max_size = MAX_SIZE
        attribute_add = True
        g, gg, new_attribute_dict, ds_dg, ds_udg, ds_new_attribute_dict = make_network(
            topicname, date, windowsize, topic_xapian_id, max_size,
            attribute_add)
        print 'write gexf file'
        real_topic_id = acquire_real_topic_id(topicname, start_ts, end_ts)
        if not real_topic_id:
            print 'the topic not exist'
            return None
        key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize)
        print 'gexf_file:', str(GRAPH_PATH) + str(key) + '_g_graph.gexf'
        nx.write_gexf(g, str(GRAPH_PATH) + str(key) + '_g_graph.gexf')
        nx.write_gexf(gg, str(GRAPH_PATH) + str(key) + '_gg_graph.gexf')
        nx.write_gexf(ds_dg, str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf')
        nx.write_gexf(ds_udg,
                      str(GRAPH_PATH) + str(key) + '_ds_udg_graph.gexf')
        save_attribute_dict(new_attribute_dict, 'g')
        save_attribute_dict(ds_new_attribute_dict, 'ds_g')
        print 'end make network'

        print 'start PageRank'
        all_uid_pr, ds_all_uid_pr, data, ds_data = pagerank_rank(
            TOPK, date, topic_id, windowsize, topicname, real_topic_id)
        print 'len(all_uid_pr):', len(all_uid_pr)
        print 'end PageRank'

        print 'start make network graph'
        topic_id = int(topic_id)
        windowsize = int(windowsize)
        if not topic_id:  # 待删
            gexf = ''
        else:
            gexf, ds_gexf = make_network_graph(date, topic_id, topicname,
                                               windowsize, all_uid_pr, data,
                                               ds_all_uid_pr, ds_data,
                                               real_topic_id)
        print 'save gexf'
        save_gexf_results(topicname, date, windowsize, gexf, gexf_type)
        save_gexf_results(topicname, date, windowsize, ds_gexf, ds_gexf_type)
        print 'start fu_tr'
        get_interval_count(topicname, date, windowsize, topic_xapian_id)
        print 'update_topic_end'
        _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)