Esempio n. 1
0
def save_tr_results(topic, date, windowsize, ds_sorted_uids, ds_all_uid_tr):
    #存直接上级转发网路trendsetter_rank的结果
    data = {}
    rank = 1
    count = 0
    exist_items = db.session.query(TsRank).filter(TsRank.topic==topic ,\
                                                  TsRank.date==date ,\
                                                  TsRank.windowsize==windowsize).all()
    for item in exist_items:
        db.session.delete(item)
    db.session.commit
    for uid in ds_sorted_uids:
        tr = ds_all_uid_tr[uid]
        user = acquire_user_by_id(uid)
        count += 1
        if not user:
            name = '未知'
            location = '未知'
            count1 = '未知'
            count2 = '未知'
        else:
            name = user['name']
            location = user['location']
            count1 = user['count1']
            count2 = user['count2']
        #row = (rank, uid, name, location, count1, count2)
        item = TsRank(topic, rank, uid, date, windowsize, tr)
        data[uid] = rank
        db.session.add(item)
        rank += 1
    db.session.commit()
    print 'success save trendsetter_rank value'
    print 'len(data):', len(data)
    return data
Esempio n. 2
0
def save_tr_results(topic, date, windowsize, ds_sorted_uids, ds_all_uid_tr):
    #存直接上级转发网路trendsetter_rank的结果
    data = {}
    rank = 1
    count = 0
    exist_items = db.session.query(TsRank).filter(TsRank.topic==topic ,\
                                                  TsRank.date==date ,\
                                                  TsRank.windowsize==windowsize).all()
    for item in exist_items:
        db.session.delete(item)
    db.session.commit
    for uid in ds_sorted_uids:
        tr = ds_all_uid_tr[uid]
        user = acquire_user_by_id(uid)
        count += 1
        if not user:
            name = '未知'
            location = '未知'
            count1 = '未知'
            count2 = '未知'
        else:
            name = user['name']
            location = user['location']
            count1 = user['count1']
            count2 = user['count2']
        #row = (rank, uid, name, location, count1, count2)
        item = TsRank(topic, rank, uid, date, windowsize, tr)
        data[uid] = rank
        db.session.add(item)
        rank += 1
    db.session.commit()
    print 'success save trendsetter_rank value'
    print 'len(data):', len(data)
    return data
Esempio n. 3
0
def generate_rank_results(sorted_uids):
    data = []
    rank = 1
    for uid in sorted_uids:
        user = acquire_user_by_id('whole', uid)
        if not user:
            continue
        name = user['name']
        location = user['location']
        count1 = user['count1']
        count2 = user['count2']
        #read from external knowledge database
        status = user_status(uid)
        row = (rank, uid, name, location, count1, count2, status)
        data.append(row)
        rank += 1
    return data
Esempio n. 4
0
def make_ds_gexf(gexf_name_1, gexf_name_2, G, node_degree, pr_key_users, all_uid_pr, ds_pr_data, partition, ds_new_attribute_dict):
    gexf = Gexf(gexf_name_1, gexf_name_2)

    node_id = {}
    graph = gexf.addGraph('directed', 'static', 'demp graph')
    graph.addNodeAttribute('name', type='string', force_id='name')
    graph.addNodeAttribute('location', type='string', force_id='location')
    graph.addNodeAttribute('timestamp', type='string', force_id='timestamp')
    graph.addNodeAttribute('pagerank', type='float', force_id='pagerank')
    #graph.addNodeAttribute('trendsetter_rank', type='float', force_id='trendsetter_rank')
    graph.addNodeAttribute('acategory', type='string', force_id='acategory')
    graph.addNodeAttribute('text', type='string', force_id='text')
    graph.addNodeAttribute('reposts_count', type='string', force_id='reposts_count')
    graph.addNodeAttribute('comments_count', type='string', force_id='comments_count')
    graph.addNodeAttribute('attitude_count', type='string', force_id='attitude_count')
    graph.addNodeAttribute('rank_pr', type='string', force_id='rank_pr')
    #graph.addNodeAttribute('rank_tr', type='string', force_id='rank_tr')

    pos = nx.spring_layout(G)

    node_counter = 0
    edge_counter = 0

    for node in G.nodes():
        x, y = pos[node]
        degree = node_degree[node]
        if node not in node_id: # 判断该节点是否已经加入到图中
            node_id[node] = node_counter
            node_counter += 1
        uid = node # 节点就是用户名
        if uid in pr_key_users:
            _node = graph.addNode(node_id[node], str(node), x=str(x), y=str(y), z='0', r='255', g='51', b='51', size=str(degree))
        else:
            _node = graph.addNode(node_id[node], str(node), x=str(x), y=str(y), z='0', r='0', g='204', b='204', size=str(degree))
        cluster_id = str(partition[node])
        _node.addAttribute('acategory', cluster_id)
        pr = str(all_uid_pr[str(uid)])
        _node.addAttribute('pagerank', pr)
        #print 'all_uid_tr:', all_uid_tr
        #print 'all_uid_pr:', all_uid_pr
        #tr = str(all_uid_tr[str(uid)])
        #_node.addAttribute('trendsetter_rank', tr)
        rank_pr = ds_pr_data[uid]
        _node.addAttribute('rank_pr', str(rank_pr))
        #rank_tr = ds_tr_data[uid]
        #_node.addAttribute('rank_tr', str(rank_tr))
        try:
            text_add = ds_new_attribute_dict[uid][0][0]
            _node.addAttribute('text', json.dumps(text_add))
            reposts_count_add = i2u(ds_new_attribute_dict[uid][0][1])
            _node.addAttribute('reposts_count', reposts_count_add)
            comment_count_add = i2u(ds_new_attribute_dict[uid][0][2])
            _node.addAttribute('comments_count', comment_count_add)
            attitude_count_add = i2u(ds_new_attribute_dict[uid][0][3])
            if attitude_count_add == None:
                attitude_count_add = u'未知'
            _node.addAttribute('attitude_count', i2u(attitude_count_add))
            timestamp_add = i2u(ds_new_attribute_dict[uid][0][4])
            _node.addAttribute('timestamp', timestamp_add)
        except KeyError:
            _node.addAttribute('text', u'未知')
            _node.addAttribute('reposts_count', u'未知')
            _node.addAttribute('comments_count', u'未知')
            _node.addAttribute('attitude_count', u'未知')
            _node.addAttribute('timestamp', u'未知')
        user_info = acquire_user_by_id(uid) # 获取对应的用户信息,添加属性
        if user_info:
            _node.addAttribute('name', user_info['name'])
            _node.addAttribute('location', user_info['location'])
        else:
            _node.addAttribute('name', u'未知')
            _node.addAttribute('location', u'未知')


    for edge in G.edges():
        start, end = edge # (repost_uid, source_uid)
        start_id = node_id[start]
        end_id = node_id[end]
        graph.addEdge(str(edge_counter), str(start_id), str(end_id))
        edge_counter += 1

    return gexf
Esempio n. 5
0
def make_gexf(gexf_name_1, gexf_name_2, G, node_degree, key_users, all_uid_pr, pr_data, partition, new_attribute_dict):
    gexf = Gexf(gexf_name_1, gexf_name_2)

    node_id = {}
    graph = gexf.addGraph("directed", "static", "demp graph")
    graph.addNodeAttribute('name', type='string', force_id='name')
    graph.addNodeAttribute('location', type='string', force_id='location') # 添加地理位置属性
    graph.addNodeAttribute('timestamp', type='int', force_id='timestamp')
    graph.addNodeAttribute('pagerank', type='string', force_id='pagerank')
    graph.addNodeAttribute('acategory', type='string', force_id='acategory')
    graph.addNodeAttribute('text', type='string', force_id='text')
    graph.addNodeAttribute('reposts_count', type='string', force_id='reposts_count') # 新添加的属性
    graph.addNodeAttribute('comments_count', type='string', force_id='comments_count')
    graph.addNodeAttribute('attitude_count', type='string', force_id='attitude_count')
    graph.addNodeAttribute('rank_pr', type='string', force_id='rank_pr') # 用户的pagerank值对应的排名
    pos = nx.spring_layout(G) # 定义一个布局 pos={node:[v...]/(v...)}

    node_counter = 0
    edge_counter = 0

    for node in G.nodes():
        x, y = pos[node] # 返回位置(x,y)
        degree = node_degree[node]
        if node not in node_id: # {node:排名}
            node_id[node] = node_counter
            node_counter += 1
        uid = node # 节点就是用户名
        if uid in key_users: # 根据是否为关键用户添加不同的节点 
            _node = graph.addNode(node_id[node], str(node), x=str(x), y=str(y), z='0', r='255', g='51', b='51', size=str(degree))
        else:
            _node = graph.addNode(node_id[node], str(node), x=str(x), y=str(y), z='0', r='0', g='204', b='204', size=str(degree))
        cluster_id = str(partition[node])
        _node.addAttribute('acategory', cluster_id)
        #print 'al_uid_pr:', all_uid_pr
        pr = str(all_uid_pr[str(uid)])
        _node.addAttribute('pagerank', pr)
        rank = pr_data[uid]
        _node.addAttribute('rank_pr', str(rank))
        #print 'pagarank_uid:', uid
        try:
            text_add = new_attribute_dict[uid][0][0] # 添加节点属性--text
            _node.addAttribute('text', json.dumps(text_add))
            reposts_count_add = i2u(new_attribute_dict[uid][0][1])
            _node.addAttribute('reposts_count', reposts_count_add) # 添加节点属性--reposts_count
            comment_count_add = i2u(new_attribute_dict[uid][0][2])
            _node.addAttribute('comments_count', comment_count_add) # 添加节点属性--comment_count
            attitude_count_add = i2u(new_attribute_dict[uid][0][3])
            if attitude_count_add == None:
                attitude_count_add = u'未知'
            _node.addAttribute('attitude_count', i2u(attitude_count_add)) # 添加节点属性--attitude_count
        except KeyError:
            _node.addAttribute('text', u'未知')
            _node.addAttribute('reposts_count', u'未知')
            _node.addAttribute('comments_count', u'未知')
            _node.addAttribute('attitude_count', u'未知')
        user_info = acquire_user_by_id(uid) # 获取对应的用户信息,添加属性
        if user_info:
            _node.addAttribute('name', user_info['name'])
            _node.addAttribute('location', user_info['location'])
        else:
            _node.addAttribute('name', u'未知')
            _node.addAttribute('location', u'未知')
            #_node.addAttribute('timestamp', str(uid_ts[uid]))

    for edge in G.edges():
        start, end = edge # (repost_uid, source_uid)
        start_id = node_id[start]
        end_id = node_id[end]
        graph.addEdge(str(edge_counter), str(start_id), str(end_id))
        edge_counter += 1

    return gexf
Esempio n. 6
0
 def test_acquire_user_by_id(self):
     user = acquire_user_by_id('area', self.test_uid)
     username = user['name'].decode('utf-8')
     self.assertNotEqual(username, None, 'search user not exist')
     self.assertEqual(username, self.test_name, 'search wrong user in Xapian')
Esempio n. 7
0
def make_network_graph(current_date, topic_id, topic, window_size, key_user_labeled=True):
    date = current_date

    if key_user_labeled:
        key_users = read_key_users(current_date, window_size, topic, top_n=10)
    else:
        key_users = []

    #topic = acquire_topic_name(topic_id)
    #if not topic:
    #    return None
              
    G = make_network(topic, date, window_size)

    N = len(G.nodes())

    if not N:
        return ''

    node_degree = nx.degree(G)

    G = cut_network(G, node_degree)
    
    gexf = Gexf("Yang Han", "Topic Network")

    node_id = {}
    graph = gexf.addGraph("directed", "static", "demp graph")
    graph.addNodeAttribute('name', type='string', force_id='name')
    graph.addNodeAttribute('location', type='string', force_id='location')
    graph.addNodeAttribute('timestamp', type='int', force_id='timestamp')

    pos = nx.spring_layout(G)

    node_counter = 0
    edge_counter = 0

    for node in G.nodes():
        x, y = pos[node]
        degree = node_degree[node]
        if node not in node_id:
            node_id[node] = node_counter
            node_counter += 1
        uid = node
        if uid in key_users:
            _node = graph.addNode(node_id[node], str(node), x=str(x), y=str(y), z='0', r='255', g='51', b='51', size=str(degree))
        else:
            _node = graph.addNode(node_id[node], str(node), x=str(x), y=str(y), z='0', r='0', g='204', b='204', size=str(degree))
        user_info = acquire_user_by_id(uid)
        if user_info:
            _node.addAttribute('name', user_info['name'])
            _node.addAttribute('location', user_info['location'])
        else:
            _node.addAttribute('name', 'Unknown')
            _node.addAttribute('location', 'Unknown')
        #_node.addAttribute('timestamp', str(uid_ts[uid]))

    for edge in G.edges():
        start, end = edge
        start_id = node_id[start]
        end_id = node_id[end]
        graph.addEdge(str(edge_counter), str(start_id), str(end_id))
        edge_counter += 1

    return etree.tostring(gexf.getXML(), pretty_print=True, encoding='utf-8', xml_declaration=True)