Beispiel #1
0
def cal_topic_quotasystem_count_by_date(topic, start, end):
    #确定要查询Weibo的时间段
    start_date = ts2datetime(start)
    end_date = ts2datetime(end) # 若结束时间戳为2014:09:02 00:00:00,实际上还是算在9.1那一天中
    print 'start, end:', start_date, end_date
    windowsize = (end - start) / Day
    print 'windowsize:', windowsize
    datestr_list = []
    for i in range(windowsize):
        time = start + i * Day
        time_date = ts2datetime(time)
        datestr_list.append(time_date.replace('-', ''))
    print 'datestr_list:', datestr_list
    # 
    topic_xapian_id = weibo_topic2xapian(topic, start, end)
    print 'topic_xapian_id:', topic_xapian_id
    xapian_search_weibo = getXapianWeiboByTopic(topic_xapian_id)
    '''
    xapian_search_weibo = getXapianWeiboByDuration(datestr_list) # 这里是根据时间段进行查询的
    xapian_search_topic = getXapianWeiboByTopic(topic) # 直接查topic建立的索引
    '''
    if xapian_search_weibo:
        print '******start_compute'
        quota_attention(topic, xapian_search_weibo, start_ts=start, end_ts=end)
        quota_duration(topic, start_ts=start, end_ts=end)
        print 'save duration success'
        quota_sensitivity(topic, start_ts=start, end_ts=end)
        print 'save sensitivity success'
        quota_importance(topic, start_ts=start, end_ts=end)
        print 'save importance success'
        quota_sentiment(topic, xapian_search_weibo, start_ts=start, end_ts=end)
        print 'save sentiment success'
        quota_coverage(topic, xapian_search_weibo, start_ts=start, end_ts=end) # 覆盖度计算
        print 'save coverage success'
        quota_person_sensitivity(topic, xapian_search_weibo, start_ts=start, end_ts=end) # 敏感人物参与度
        print 'save person_sensitivity success'
Beispiel #2
0
                           reverse=True)
    print 'top_source_user:'******'''
    count, top_weibo = xapian_search_weibo.search(query={'_id':top_source_mid}, fields=['timestamp'])
    print 'count:', count
    for i in top_weibo():
        timestamp = i['timestamp']
        print 'timestamp:', ts2date(int(timestamp))
    '''
    return sorted_result


if __name__ == '__main__':
    '''
    topic = u'高校思想宣传'
    date = '2015-02-01'
    windowsize = 9
    topic_xapian_id = '54ccbfab5a220134d9fc1b37'
    '''
    topic = TOPIC
    date = END
    start_ts = datetime2ts(START)
    end_ts = datetime2ts(END)
    #windowsize = (end_ts - start_ts) / Day
    windowsize = (end_ts - start_ts) / Day / 2
    topic_xapian_id = weibo_topic2xapian(topic, start_ts, end_ts)
    get_interval_count(topic, date, windowsize, topic_xapian_id)
Beispiel #3
0
def main(topic, start_ts, end_ts):
    '''
    topics = _topic_not_calc() # topics=[{id:x,module:x,status:x,topic:x,start:x,end:x,db_date:x}]
    '''
    topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\
                                                             TopicStatus.start==start_ts ,\
                                                             TopicStatus.end==end_ts ,\
                                                             TopicStatus.module=='identify' ,\
                                                             TopicStatus.status==-1).first()
    if topic_status_info:
        #topic = topics[0] # 每次只计算一个----为了做一个缓冲,每个n时间才计算一个
        print 'topic_id', topic_status_info.id
        start_ts = topic_status_info.start
        end_ts = topic_status_info.end
        db_date = topic_status_info.db_date
        topicname = topic
        _update_topic_status2Computing(topicname, start_ts, end_ts, db_date)
        print 'update_status'
        topic_id = acquire_topic_id(topicname, start_ts, end_ts) # 重新获取id是因为TopicStatus中id是自增加的,进行更新后,id就不是原来的那一个了
        windowsize = (end_ts - start_ts) / Day # 确定时间跨度的大小
        date = ts2datetime(end_ts)

        print 'start topic2xapianid'
        topic_xapian_id = weibo_topic2xapian(topicname, start_ts, end_ts)
        print 'topic_xapian_id:', topic_xapian_id
        
        print 'start compute first_nodes'
        start_date = ts2datetime(start_ts) # used to compute the first user
        get_first_node(topicname, start_date, date, windowsize, topic_xapian_id)
        print 'end compute first_nodes'
#
        print 'start make network'
        max_size = MAX_SIZE
        attribute_add = True
        g, gg, new_attribute_dict, ds_dg, ds_udg, ds_new_attribute_dict = make_network(topicname, date, windowsize, topic_xapian_id, max_size, attribute_add)
        print 'write gexf file'
        real_topic_id = acquire_real_topic_id(topicname, start_ts, end_ts)
        if not real_topic_id:
            print 'the topic not exist'
            return None
        key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize) 
        print 'gexf_file:', str(GRAPH_PATH)+str(key)+'_g_graph.gexf'
        nx.write_gexf(g, str(GRAPH_PATH) + str(key) + '_g_graph.gexf')
        nx.write_gexf(gg, str(GRAPH_PATH) + str(key) + '_gg_graph.gexf')
        nx.write_gexf(ds_dg, str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf')
        nx.write_gexf(ds_udg, str(GRAPH_PATH) + str(key) + '_ds_udg_graph.gexf')
        save_attribute_dict(new_attribute_dict, 'g')
        save_attribute_dict(ds_new_attribute_dict, 'ds_g')
        print 'end make network'

        print 'start PageRank'
        all_uid_pr, ds_all_uid_pr, data, ds_data = pagerank_rank(TOPK, date, topic_id, windowsize, topicname, real_topic_id)
        print 'len(all_uid_pr):', len(all_uid_pr)
        print 'end PageRank'

        print 'start make network graph'
        topic_id = int(topic_id)
        windowsize = int(windowsize)
        if not topic_id: # 待删
            gexf = ''
        else:
            gexf, ds_gexf = make_network_graph(date, topic_id, topicname, windowsize, all_uid_pr, data, ds_all_uid_pr, ds_data, real_topic_id)
        print 'save gexf'
        save_gexf_results(topicname, date, windowsize, gexf, gexf_type)
        save_gexf_results(topicname, date, windowsize, ds_gexf, ds_gexf_type)
        print 'start fu_tr'
        get_interval_count(topicname, date, windowsize, topic_xapian_id)
        print 'update_topic_end'
        _update_topic_status2Completed(topicname, start_ts, end_ts, db_date) 
Beispiel #4
0
        user_info['statuses_count'] = result['statuses_count']
    else:
        user_info['name'] = u'未知'
        user_info['location'] = u'未知'
        user_info['friends_count'] = u'未知'
        user_info['followers_count'] = u'未知'
        user_info['profile_image_url'] = 'no'
        user_info['friends_count'] = u'未知'
        user_info['followers_count'] = u'未知'
        user_info['created_at'] = u'未知'
        user_info['statuses_count'] = u'未知'      
        
    return user_info


if __name__=='__main__':
    '''
    topic = u'高校思想宣传'
    date = '2015-02-01'
    windowsize = 9
    topic_xapian_id = '54ccbfab5a220134d9fc1b37'
    '''
    topic = TOPIC
    date = END
    start_ts = datetime2ts(START)
    end_ts = datetime2ts(END)
    windowsize = (end_ts - start_ts) / Day
    topic_xapian_id = weibo_topic2xapian(topic, start_ts, end_ts)
    get_interval_count(topic, date, windowsize, topic_xapian_id)
    
Beispiel #5
0
def main(topic, start_ts, end_ts):
    '''
    topics = _topic_not_calc() # topics=[{id:x,module:x,status:x,topic:x,start:x,end:x,db_date:x}]
    '''
    topic_status_info = db.session.query(TopicStatus).filter(TopicStatus.topic==topic ,\
                                                             TopicStatus.start==start_ts ,\
                                                             TopicStatus.end==end_ts ,\
                                                             TopicStatus.module=='identify' ,\
                                                             TopicStatus.status==-1).first()
    if topic_status_info:
        #topic = topics[0] # 每次只计算一个----为了做一个缓冲,每个n时间才计算一个
        print 'topic_id', topic_status_info.id
        start_ts = topic_status_info.start
        end_ts = topic_status_info.end
        db_date = topic_status_info.db_date
        topicname = topic
        _update_topic_status2Computing(topicname, start_ts, end_ts, db_date)
        print 'update_status'
        topic_id = acquire_topic_id(
            topicname, start_ts,
            end_ts)  # 重新获取id是因为TopicStatus中id是自增加的,进行更新后,id就不是原来的那一个了
        windowsize = (end_ts - start_ts) / Day  # 确定时间跨度的大小
        date = ts2datetime(end_ts)

        print 'start topic2xapianid'
        topic_xapian_id = weibo_topic2xapian(topicname, start_ts, end_ts)
        print 'topic_xapian_id:', topic_xapian_id

        print 'start compute first_nodes'
        start_date = ts2datetime(start_ts)  # used to compute the first user
        get_first_node(topicname, start_date, date, windowsize,
                       topic_xapian_id)
        print 'end compute first_nodes'
        #
        print 'start make network'
        max_size = MAX_SIZE
        attribute_add = True
        g, gg, new_attribute_dict, ds_dg, ds_udg, ds_new_attribute_dict = make_network(
            topicname, date, windowsize, topic_xapian_id, max_size,
            attribute_add)
        print 'write gexf file'
        real_topic_id = acquire_real_topic_id(topicname, start_ts, end_ts)
        if not real_topic_id:
            print 'the topic not exist'
            return None
        key = str(real_topic_id) + '_' + str(date) + '_' + str(windowsize)
        print 'gexf_file:', str(GRAPH_PATH) + str(key) + '_g_graph.gexf'
        nx.write_gexf(g, str(GRAPH_PATH) + str(key) + '_g_graph.gexf')
        nx.write_gexf(gg, str(GRAPH_PATH) + str(key) + '_gg_graph.gexf')
        nx.write_gexf(ds_dg, str(GRAPH_PATH) + str(key) + '_ds_dg_graph.gexf')
        nx.write_gexf(ds_udg,
                      str(GRAPH_PATH) + str(key) + '_ds_udg_graph.gexf')
        save_attribute_dict(new_attribute_dict, 'g')
        save_attribute_dict(ds_new_attribute_dict, 'ds_g')
        print 'end make network'

        print 'start PageRank'
        all_uid_pr, ds_all_uid_pr, data, ds_data = pagerank_rank(
            TOPK, date, topic_id, windowsize, topicname, real_topic_id)
        print 'len(all_uid_pr):', len(all_uid_pr)
        print 'end PageRank'

        print 'start make network graph'
        topic_id = int(topic_id)
        windowsize = int(windowsize)
        if not topic_id:  # 待删
            gexf = ''
        else:
            gexf, ds_gexf = make_network_graph(date, topic_id, topicname,
                                               windowsize, all_uid_pr, data,
                                               ds_all_uid_pr, ds_data,
                                               real_topic_id)
        print 'save gexf'
        save_gexf_results(topicname, date, windowsize, gexf, gexf_type)
        save_gexf_results(topicname, date, windowsize, ds_gexf, ds_gexf_type)
        print 'start fu_tr'
        get_interval_count(topicname, date, windowsize, topic_xapian_id)
        print 'update_topic_end'
        _update_topic_status2Completed(topicname, start_ts, end_ts, db_date)