Beispiel #1
0
def cluster_test(file_path):
    '''
    community_multilevel and community_infomap cannot produce two clusters
    only community_leading_eigenvector and community_fastgreedy can produce two clusters
    label propagation with eigenvector has higher modularity than community_fastgreedy and community_leading_eigenvector alone
    Rand Index: 0.884, 0.974, 0.929 for communication network
                0.807, 0.915, 0.864 for retweet network
    This methods is discarded

    '''
    g = gt.Graph.Read_GraphML(file_path)
    gt.summary(g)
    # g = g.as_undirected(combine_edges=dict(weight="sum"))
    # g = gt.giant_component(g)
    # ---------treated as directed network
    seperations = []
    # modularity = []
    sizes = []
    for i in xrange(100):
        # eigen = g.community_leading_eigenvector(clusters=2, weights='weight')
        # label_pro = g.community_label_propagation(weights='weight', initial=eigen.membership)
        print i
        com = g.community_infomap(edge_weights='weight', vertex_weights='weight')
        seperations.append(com.membership)
        # modularity.append(com.modularity)
        print len(com)
        sizes.append(len(com))
    print '%.3f, %.3f, %.3f, %.3f' %(min(sizes), max(sizes), np.mean(sizes), np.std(sizes))
    aRI = []
    for i in xrange(100):
        for j in xrange(i+1, 100):
            aRI.append(metrics.adjusted_rand_score(seperations[i], seperations[j]))
    print len(aRI)
    # print '%.3f, %.3f, %.3f, %.3f' %(min(modularity), max(modularity), np.mean(modularity), np.std(modularity))
    print '%.3f, %.3f, %.3f, %.3f' %(min(aRI), max(aRI), np.mean(aRI), np.std(aRI))
Beispiel #2
0
def test_significant(file_path):
    # random shuffle the weights of edges and test the segregate of networks
    g = gt.Graph.Read_GraphML(file_path)
    gt.summary(g)
    g = g.as_undirected(combine_edges=dict(weight="sum"))
    g = gt.giant_component(g)
    gt.summary(g)
    # print g.es['weight']
    fast = g.community_fastgreedy(weights='weight')
    fast_com = fast.as_clustering(n=2)
    orig_mod = fast_com.modularity
    mod_list = []

    for i in xrange(1000):
        weights = g.es["weight"]
        g.rewire()
        g.es["weight"] = weights
        # gt.net_stat(g)
        # print g.es['weight']
        fast = g.community_fastgreedy(weights='weight')
        fast_com = fast.as_clustering()
        mod_list.append(fast_com.modularity)


    amean, astd = np.mean(mod_list), np.std(mod_list)
    print 'simulated values: %.3f +- (%.3f)' %(amean, astd)
    # absobserved = abs(raw_assort)
    # pval = (np.sum(ass_list >= absobserved) +
    #         np.sum(ass_list <= -absobserved))/float(len(ass_list))
    zscore = (orig_mod-amean)/astd
    print 'z-score: %.3f' %zscore
Beispiel #3
0
def network_pro_hashtags():
    # Extract interaction networks from proed and pro-recoveryed hashtaged tweeets
    # Select only recovery users who have hashtags from ED hashtag topics
    # rec_tag_users = set(iot.get_values_one_field('fed', 'tag_com', 'id', {'rec_tageted': True}))
    # ped_tag_users = set(iot.get_values_one_field('fed', 'tag_com', 'id', {'ped_tageted': True}))

    # rec_tag_users = set(iot.get_values_one_field('fed', 'prorec_tag', 'user.id'))
    # ped_tag_users = set(iot.get_values_one_field('fed', 'proed_tag', 'user.id'))
    # fedusers = iot.get_values_one_field('fed', 'com', 'id')
    fedusers = pickle.load(open('fed-user-id-str.pick', 'r'))
    print len(fedusers)
    users = [int(uid) for uid in fedusers]

    # only_ped = ped_tag_users - rec_tag_users
    # only_rec = rec_tag_users - ped_tag_users
    # all_users = list(rec_tag_users.union(ped_tag_users))
    for btype in ['communication']:
        # gb = gt.load_beh_network('fed', 'bnet_ed_tag', btype)
        gb = gt.load_beh_network_subset(users, 'fed', 'bnet_ed_tag', btype)
        # for v in gb.vs:
        #     if int(v['name']) in only_ped:
        #         v['set'] = -1
        #     elif int(v['name']) in only_rec:
        #         v['set'] = 1
        #     else:
        #         v['set'] = 0
        gt.summary(gb)
        gb.write_graphml(btype+'-only-fed.graphml')
Beispiel #4
0
def network_change(dbname, comname, netname):
    # filter = {'liwc_anal.result.i':{'$exists':True}, 'new_liwc_anal.result.i':{'$exists':True}}
    # users = iot.get_values_one_field(dbname, comname, 'id', filter)
    # g1 = gt.load_network_subset(users, dbname, netname, {'scraped_times': 2})
    # g2 = gt.load_network_subset(users, dbname, netname, {'scraped_times': 131})
    # pickle.dump(g1, open('data/g1.pick', 'w'))
    # pickle.dump(g2, open('data/g2.pick', 'w'))
    g1 = pickle.load(open('data/g1.pick', 'r'))
    g2 = pickle.load(open('data/g2.pick', 'r'))

    # g1 = gt.load_network_subset(dbname, 'net', {'scraped_times': 2})
    # g2 = gt.load_network_subset(dbname, 'net', {'scraped_times': 131})
    gt.summary(g1)
    gt.summary(g1)
    gt.net_stat(g1)
    gt.net_stat(g2)
    # pt.pdf_plot_one_data(g1.indegree(), 'indegree', linear_bins=False, fit_start=1, fit_end=100)
    pt.plot_pdf_mul_data(
        [np.array(g1.indegree()) + 1,
         np.array(g2.indegree()) + 1],
        'indegree', ['b', 'r'], ['o', '^'], ['G1', 'G2'],
        linear_bins=False,
        central=False,
        fit=True,
        savefile='indegree.pdf')
Beispiel #5
0
def test_user_cluster_assign_stable():
    # Test stable how final user clustering assignments (k=2)
    core = gt.Graph.Read_GraphML('alled_tag_undir_filter.graphml')
    communication = gt.Graph.Read_GraphML(
        'communication-only-fed-filter.graphml')
    gt.summary(communication)
    communication = gt.giant_component(communication)
    gt.summary(communication)
    users = [(v['name']) for v in communication.vs]
    print len(users)
    # user_hashtag_vector('fed', 'ed_tag', users)
    seperations = []
    for i in xrange(100):
        print i
        user_hashtag_profile(core, users)
        # data += user_cluster_hashtag()
        cluters, ids = user_cluster_hashtag()
        seperations.append(cluters)
    aRI = []
    for i in xrange(100):
        for j in xrange(i + 1, 100):
            aRI.append(
                metrics.adjusted_rand_score(seperations[i], seperations[j]))
    print len(aRI)
    print '%.3f, %.3f, %.3f, %.3f' % (min(aRI), max(aRI), np.mean(aRI),
                                      np.std(aRI))
Beispiel #6
0
def pmi(g, filename=None):
    '''
    Calculate the PMI weight for edges
    :param g:
    :param filename:
    :return:
    '''
    # print g.is_loop()
    vw_sum = sum(g.vs["weight"])
    for edge in g.es:
        source_vertex_id = edge.source
        target_vertex_id = edge.target
        source_vertex = g.vs[source_vertex_id]
        target_vertex = g.vs[target_vertex_id]
        ew = edge['weight']
        edge['pmi'] = np.log2(
            float(ew * vw_sum) /
            (source_vertex['weight'] * target_vertex['weight']))
    # pickle.dump(g, open('data/'+filename+'_pmi_tag.pick', 'w'))
    # g = pickle.load(open('data/'+filename+'_pmi_tag.pick', 'r'))
    # pdf(g.es['weight'])
    # plot_graph(g, 'ed-hashtag')
    gt.summary(g)
    g = g.subgraph_edges(g.es.select(pmi_gt=0))
    gt.summary(g)
    g.write_graphml(filename + '_pmi.graphml')
    # g.es['weight'] = g.es['pmi']
    return g
Beispiel #7
0
def z_scores(filename):
    # Test the significance betweet the links of two nodes
    g = gt.Graph.Read_GraphML(filename + '.graphml')
    gt.summary(g)
    ds = g.vs["weight"]
    dsum = sum(ds)
    if dsum % 2:
        g.vs[0]['weight'] += 1
        ds = g.vs["weight"]

    # distrition = {}
    # for i in xrange(1000):
    #     print i
    #     rg = gt.Graph.Degree_Sequence(ds)
    #     rg.es['weight'] = 1
    #     rg.vs['name'] = rg.degree()
    #     rg.simplify(combine_edges=sum)
    #     for edge in rg.es:
    #         source_vertex_id = edge.source
    #         target_vertex_id = edge.target
    #         source_vertex_name = rg.vs[source_vertex_id]['name']
    #         target_vertex_name = rg.vs[target_vertex_id]['name']
    #         ew = edge['weight']
    #         if source_vertex_name < target_vertex_name:
    #             key = (source_vertex_name, target_vertex_name)
    #         else:
    #             key = (target_vertex_name, source_vertex_name)
    #
    #         dis = distrition.get(key, [])
    #         dis.append(ew)
    #         distrition[key] = dis
    # pickle.dump(distrition, open('dis-all.pick', 'w'))
    distrition = pickle.load(open('dis-all.pick', 'r'))
    for edge in g.es:
        source_vertex_id = edge.source
        target_vertex_id = edge.target
        source_vertex_name = g.vs[source_vertex_id]['weight']
        target_vertex_name = g.vs[target_vertex_id]['weight']
        if source_vertex_name < target_vertex_name:
            key = (source_vertex_name, target_vertex_name)
        else:
            key = (target_vertex_name, source_vertex_name)
        dis = distrition.get(key)
        dm = np.mean(dis)
        dst = np.std(dis)
        var = (edge['weight'] - dm)
        if dst == 0 and var == 0:
            zscore = 0
        else:
            zscore = var / dst
        edge['rWeight'] = zscore
        if zscore < 0 or zscore > 1.96:
            print g.vs[source_vertex_id]['name'], g.vs[target_vertex_id][
                'name'], key, edge['weight'], dm, dst, zscore

    g.write_graphml(filename + '_zscore.graphml')
Beispiel #8
0
def recover_proed_interaction():
    # interaction network of pro-recovery and pro-ed users
    prorec = edrelatedcom.rec_user('fed', 'scom')
    proed = edrelatedcom.proed_users('fed', 'scom')
    btype_dic = {'retweet': [1], 'reply': [2], 'mention': [3], 'communication': [2, 3]}
    for btype in ['retweet', 'reply', 'mention']:
        cols = dbt.db_connect_col('fed', 'sbnet')
        name_map, edges, set_map = {}, {}, {}
        for row in cols.find({'type': {'$in': btype_dic[btype]}}, no_cursor_timeout=True):
            n1 = str(row['id0'])
            n2 = str(row['id1'])
            if n1 in prorec or n1 in proed:
                if n1 != n2:
                    n1id = name_map.get(n1, len(name_map))
                    name_map[n1] = n1id
                    n2id = name_map.get(n2, len(name_map))
                    name_map[n2] = n2id
                    wt = edges.get((n1id, n2id), 0)
                    edges[(n1id, n2id)] = wt + 1
        g = Graph(len(name_map), directed=True)
        g.vs["name"] = list(sorted(name_map, key=name_map.get))
        g.add_edges(edges.keys())
        g.es["weight"] = edges.values()
        g.vs["set"] = 0
        for v in g.vs:
            if v['name'] in prorec:
                v['set'] = 1
            elif v['name'] in proed:
                v['set'] = -1
        gt.summary(g)


        edges = g.es.select(weight_gt=3)
        edge_nodes = []
        for edge in edges:
            source_vertex_id = edge.source
            target_vertex_id = edge.target
            source_vertex = g.vs[source_vertex_id]
            target_vertex = g.vs[target_vertex_id]
            edge_nodes.append(source_vertex['name'])
            edge_nodes.append(target_vertex['name'])

        nodes = []
        for v in g.vs:
            if v['set'] == 1 or v['set'] == -1:
                nodes.append(v)
            elif v['name'] in edge_nodes:
                nodes.append(v)
            else:
                pass
        print 'Filtered nodes: %d' %len(nodes)
        g = g.subgraph(nodes)
        gt.summary(g)
        g.write_graphml('rec-proed-'+btype+'.graphml')
Beispiel #9
0
def test_stable_infomap_kmean():
    # Test the stable for the whole process, from infomap clustering hashtag and k-means clustering users
    import tag_network
    core = gt.Graph.Read_GraphML('alled_tag_undir_filter.graphml')
    communication = gt.Graph.Read_GraphML('communication-only-fed-filter.graphml')
    gt.summary(communication)
    communication = gt.giant_component(communication)
    gt.summary(communication)
    users = [(v['name']) for v in communication.vs]
    print len(users)
    tag_network.user_hashtag_profile(core, users)
Beispiel #10
0
def diversity_db(dbname, comname, behavior, netname):
    userlist = iot.get_values_one_field(dbname, comname, 'id',
                                        # {'timeline_count': {'$gt': 0}}
                                        )
    g = gt.load_beh_network_subset(userlist, dbname, netname, behavior)
    gt.summary(g)

    # g = bahavior_net(dbname, comname, netname, behavior)
    # pickle.dump(g, open('data/'+dbname+'_'+behavior+'.pick', 'w'))
    print dbname, behavior
    # g = pickle.load(open('data/' + dbname + '_' + behavior + '.pick', 'r'))
    return netstatis(dbname, behavior, g, [str(i) for i in userlist], comname)
Beispiel #11
0
def output_net_user_data(dbname, comname, netname):
    '''
    Output the social network (two-ground) and user's ED states into local files
    '''
    g = gt.load_network(dbname, netname)
    gt.summary(g)
    
    com = dbt.db_connect_col(dbname, comname)
    for v in g.vs:
        user = com.find_one({'id': int(v['name'])})
        v['l'] = user['level']
        v['ed'] = profiles_check.check_ed(user)
    g.write_graphml(dbname+'-'+netname+'.graphml')
Beispiel #12
0
def fed_all_tag_topic(filepath='data/fed_tag_undir.graphml'):
    # get topics of all hashtags posted by fed users
    # The results before are obatain using more than 3 tweets and 3 users
    # Then use the giant component.
    g = gt.Graph.Read_GraphML(filepath)
    gt.summary(g)
    vs = g.vs(weight_gt=10, user_gt=10)
    g = g.subgraph(vs)
    gt.summary(g)
    # g = gt.giant_component(g)
    com = g.community_infomap(edge_weights='weight', vertex_weights='weight')
    comclus = com.subgraphs()
    print len(comclus)
    pickle.dump(comclus, open('data/fed_tag_undir.communities'))
Beispiel #13
0
def users_with_collected_friends(dbname, comname, netname):
    # get network from random and younger datasets
    users = iot.get_values_one_field(dbname, comname, 'id', {'level':1})
    # net = gt.load_network_subset(dbname, netname, {
    #     'user': {'$in': users}, 'follower': {'$in': users}
    # })
    # net.write_graphml(dbname+'-net.graphml')

    g = gt.Graph.Read_GraphML(dbname+'-net.graphml')
    gt.summary(g)
    g.vs['outk'] = g.indegree()
    nodes = g.vs.select(outk_gt=0)
    print len(nodes)
    user_ids = [int(v['name']) for v in nodes]
    print len(set(users).intersection(set(user_ids)))
Beispiel #14
0
def profile_cluster(filepath):
    # Clustering user based on word2vec of user profiles
    g = gt.Graph.Read_GraphML(filepath)
    gt.summary(g)
    g = g.as_undirected(combine_edges=dict(weight="sum"))
    components = g.clusters()
    g = components.giant()
    gt.summary(g)

    com = dbt.db_connect_col('fed', 'com')
    data = {}
    for uid in g.vs['name']:
        user = com.find_one({'id': int(uid)}, ['description'])
        profile = user['description']
        if profile:
            tokens = pc.tokenizer_stoprm(profile)
            data[uid] = tokens
    import gensim
    # dictionary = gensim.corpora.Dictionary(data.values())
    # dictionary.save('lda.dict')
    # corpus = [dictionary.doc2bow(text) for text in data.values()]
    # lda = gensim.LdaModel(corpus, num_topics=100, id2word=dictionary)
    word2vec = gensim.models.Word2Vec(data.values(), size=300, sg=1)

    X, y = [], []
    for node in g.vs:
        k = node['name']
        v = data[k]
        vect = np.zeros(300)
        count = 0
        for word in v:
            if word in word2vec:
                vect += word2vec[word]
                count += 1
        X.append(vect/count)
        y.append(k)
    X = np.asarray(X)
    print X.shape
    print X
    matrix = g.get_adjacency()
    # clustering = AgglomerativeClustering(connectivity=matrix._get_data())
    clustering = AgglomerativeClustering()
    clustering.fit(X)

    members = clustering.labels_
    comm = gt.VertexClustering(g,  membership=members)
    layout = g.layout("fr")
    gt.plot(comm, layout=layout, vertex_size=5)
Beispiel #15
0
def recover_proed_community():
    # pro-recovery and pro-ed users, and their outlinked communities
    prorec = edrelatedcom.rec_user('fed', 'scom')
    proed = edrelatedcom.proed_users('fed', 'scom')
    cols = dbt.db_connect_col('fed', 'follownet')
    name_map, edges, set_map = {}, set(), {}
    for row in cols.find({},no_cursor_timeout=True):
        n1 = str(row['follower'])
        if n1 in prorec or n1 in proed:
            n2 = str(row['user'])
            n1id = name_map.get(n1, len(name_map))
            name_map[n1] = n1id
            n2id = name_map.get(n2, len(name_map))
            name_map[n2] = n2id
            edges.add((n1id, n2id))
    g = Graph(len(name_map), directed=True)
    g.vs["name"] = list(sorted(name_map, key=name_map.get)) # return keys ordered by values
    g.add_edges(list(edges))
    g.es["weight"] = 1
    g.vs["set"] = 0
    for v in g.vs:
        if v['name'] in prorec:
            v['set'] = 1
        elif v['name'] in proed:
            v['set'] = -1
    gt.summary(g)

    g.vs['deg'] = g.indegree()
    nodes = []
    for v in g.vs:
        if v['set'] == 1 or v['set'] == -1:
            nodes.append(v)
        elif v['deg'] > 3:
            nodes.append(v)
        else:
            pass
    print 'Filtered nodes: %d' %len(nodes)
    g = g.subgraph(nodes)
    gt.summary(g)
    g.write_graphml('rec-proed-follow.graphml')

    # sbnet have extended all interactions posted by ED users
    edusers = set(g.vs['name'])
    for btype in ['retweet', 'reply', 'mention']:
        gb = gt.load_beh_network('fed', 'sbnet', btype)
        gt.summary(gb)
        nodes = []
        for v in gb.vs:
            if v['name'] in edusers:
                nodes.append(v)
        gb = gb.subgraph(nodes)
        for v in gb.vs:
            v['set'] = g.vs.find(name=v['name'])['set']
        gt.summary(gb)
        gb.write_graphml('rec-proed-'+btype+'.graphml')
Beispiel #16
0
def friendship_community(dbname, colname, label):
    # fg = gt.load_network(dbname, colname)
    # gt.summary(fg)
    # pickle.dump(fg, open('data/'+label+'-fg.pick', 'w'))
    fg = pickle.load(open('data/'+label+'-fg.pick', 'r'))

    # fgc = gt.giant_component(fg, 'WEAK')
    # gt.summary(fgc)
    # pickle.dump(fgc, open('data/'+label+'-fgc.pick', 'w'))

    # fcoms = gt.fast_community(fg)
    # pickle.dump(fcoms, open('data/'+label+'-fcom.pick', 'w'))
    fcoms = pickle.load(open('data/'+label+'-fcom.pick', 'r'))
    # gt.plot(fcoms, 'friend_comms_den.pdf', bbox=(1200, 900))
    fclus = fcoms.as_clustering()
    gt.summary(fclus)
    print fclus.recalculate_modularity()
    community_topic(fg, fclus, dbname, 'scom', 'stimeline')
Beispiel #17
0
def friendship_community(dbname, colname, label):
    # fg = gt.load_network(dbname, colname)
    # gt.summary(fg)
    # pickle.dump(fg, open('data/'+label+'-fg.pick', 'w'))
    fg = pickle.load(open('data/' + label + '-fg.pick', 'r'))

    # fgc = gt.giant_component(fg, 'WEAK')
    # gt.summary(fgc)
    # pickle.dump(fgc, open('data/'+label+'-fgc.pick', 'w'))

    # fcoms = gt.fast_community(fg)
    # pickle.dump(fcoms, open('data/'+label+'-fcom.pick', 'w'))
    fcoms = pickle.load(open('data/' + label + '-fcom.pick', 'r'))
    # gt.plot(fcoms, 'friend_comms_den.pdf', bbox=(1200, 900))
    fclus = fcoms.as_clustering()
    gt.summary(fclus)
    print fclus.recalculate_modularity()
    community_topic(fg, fclus, dbname, 'scom', 'stimeline')
Beispiel #18
0
def test_user_cluster_stable():
    # Test stable of using infomap and test best k for k-means
    core = gt.Graph.Read_GraphML('data/alled_tag_undir_filter.graphml')
    communication = gt.Graph.Read_GraphML(
        'data/communication-only-fed-filter.graphml')
    gt.summary(communication)
    communication = gt.giant_component(communication)
    gt.summary(communication)
    users = [(v['name']) for v in communication.vs]
    print len(users)
    # user_hashtag_vector('fed', 'ed_tag', users)
    data = []
    for i in xrange(100):
        user_hashtag_profile(core, users, i)
        ###### Run by python
        data += user_cluster_hashtag()
    df = pd.DataFrame(data, columns=['cluster', 'silhouette_avg'])
    df.to_csv('user-kmeans-hashtag.csv')
Beispiel #19
0
def communtiy_feature(dbname, typename):
    fg = ntt.loadnet(dbname, typename)

    fcoms = gt.fast_community(fg)
    pickle.dump(fcoms, open('data/'+dbname+typename+'com.pick', 'w'))
    fcoms = pickle.load(open('data/'+dbname+typename+'com.pick', 'r'))
    fclus = fcoms.as_clustering()
    gt.summary(fclus)

    """Compare difference of features in cummunities"""
    features = [
        'liwc_anal.result.i',
        'liwc_anal.result.we',
        'liwc_anal.result.bio',
        'liwc_anal.result.body',
        'liwc_anal.result.health',
        'liwc_anal.result.posemo',
        'liwc_anal.result.negemo',
        'liwc_anal.result.ingest',
        'liwc_anal.result.anx',
        'liwc_anal.result.anger',
        'liwc_anal.result.sad'
                ]
    therh = 0.1 * fg.vcount()
    for feature in features:
        data = []
        for clu in fclus:
            if len(clu) > therh:
                ulist = set()
                for v in clu:
                    ulist.add(int(fg.vs[v]['name']))
                ulist = list(ulist)
                clu_values = iot.get_values_one_field(dbname, 'com', feature, {'id': {'$in': ulist}})
                data.append(clu_values)

        plot.plot_config()
        for i in xrange(len(data)):
            sns.distplot(data[i], hist=False, label=str(i)+':'+str(len(data[i])))
        plt.xlabel(feature)
        plt.ylabel('PDF')
        # plt.show()
        plt.savefig(feature+typename+'_com.pdf')
        plt.clf()
Beispiel #20
0
def cluseter_nodes(btype = 'communication'):
    # cluster users in networks
    g = gt.Graph.Read_GraphML('communication-only-fed-filter.graphml')
    g = gt.giant_component(g)
    gt.summary(g)

    cluters, ids = tn.user_cluster_hashtag('ed-'+btype+'.data')

    # ids = []
    # with open('ed-'+btype+'.data', 'r') as fo:
    #     for line in fo.readlines():
    #         ids.append(line.split(' ')[0])

    g.vs['cluster'] = -1
    for i in xrange(len(cluters)):
        id = ids[i]
        v = g.vs.find(name=id)
        v['cluster'] = cluters[i]
    g.write_graphml('communication-only-fed-filter-hashtag-cluster.graphml')
Beispiel #21
0
def count_existing_user(btype=''):
    # count how many fed users in network
    g = gt.Graph.Read_GraphML('pro-'+btype+'-hashtag.graphml')
    gt.summary(g)
    # users = iot.get_values_one_field('fed', 'com', 'id_str')
    # pickle.dump(users, open('fed-user-id.pick', 'w'))
    users = set(pickle.load(open('fed-user-id.pick', 'r')))
    print len(users)
    # nodes = g.vs.select(name_in=users)
    nodes = []
    count = 0
    for v in g.vs:
        if v['name'] in users:
            count += 1
            nodes.append(v)
    print float(count)/len(g.vs)
    g = g.subgraph(nodes)
    gt.summary(g)
    g.write_graphml('pro-'+btype+'-hashtag-fed.graphml')
Beispiel #22
0
def network_change(dbname, comname, netname):
    # filter = {'liwc_anal.result.i':{'$exists':True}, 'new_liwc_anal.result.i':{'$exists':True}}
    # users = iot.get_values_one_field(dbname, comname, 'id', filter)
    # g1 = gt.load_network_subset(users, dbname, netname, {'scraped_times': 2})
    # g2 = gt.load_network_subset(users, dbname, netname, {'scraped_times': 131})
    # pickle.dump(g1, open('data/g1.pick', 'w'))
    # pickle.dump(g2, open('data/g2.pick', 'w'))
    g1 = pickle.load(open('data/g1.pick', 'r'))
    g2 = pickle.load(open('data/g2.pick', 'r'))

    # g1 = gt.load_network_subset(dbname, 'net', {'scraped_times': 2})
    # g2 = gt.load_network_subset(dbname, 'net', {'scraped_times': 131})
    gt.summary(g1)
    gt.summary(g1)
    gt.net_stat(g1)
    gt.net_stat(g2)
    # pt.pdf_plot_one_data(g1.indegree(), 'indegree', linear_bins=False, fit_start=1, fit_end=100)
    pt.plot_pdf_mul_data([np.array(g1.indegree())+1, np.array(g2.indegree())+1],
                           'indegree', ['b', 'r'], ['o', '^'], ['G1', 'G2'],
                               linear_bins=False, central=False, fit=True, savefile='indegree.pdf')
Beispiel #23
0
def communtiy_feature(dbname, typename):
    fg = ntt.loadnet(dbname, typename)

    fcoms = gt.fast_community(fg)
    pickle.dump(fcoms, open('data/' + dbname + typename + 'com.pick', 'w'))
    fcoms = pickle.load(open('data/' + dbname + typename + 'com.pick', 'r'))
    fclus = fcoms.as_clustering()
    gt.summary(fclus)
    """Compare difference of features in cummunities"""
    features = [
        'liwc_anal.result.i', 'liwc_anal.result.we', 'liwc_anal.result.bio',
        'liwc_anal.result.body', 'liwc_anal.result.health',
        'liwc_anal.result.posemo', 'liwc_anal.result.negemo',
        'liwc_anal.result.ingest', 'liwc_anal.result.anx',
        'liwc_anal.result.anger', 'liwc_anal.result.sad'
    ]
    therh = 0.1 * fg.vcount()
    for feature in features:
        data = []
        for clu in fclus:
            if len(clu) > therh:
                ulist = set()
                for v in clu:
                    ulist.add(int(fg.vs[v]['name']))
                ulist = list(ulist)
                clu_values = iot.get_values_one_field(dbname, 'com', feature,
                                                      {'id': {
                                                          '$in': ulist
                                                      }})
                data.append(clu_values)

        plot.plot_config()
        for i in xrange(len(data)):
            sns.distplot(data[i],
                         hist=False,
                         label=str(i) + ':' + str(len(data[i])))
        plt.xlabel(feature)
        plt.ylabel('PDF')
        # plt.show()
        plt.savefig(feature + typename + '_com.pdf')
        plt.clf()
Beispiel #24
0
def two_community(file_path):
    # get two community from networks
    g = gt.Graph.Read_GraphML(file_path)
    gt.summary(g)
    # g = g.as_undirected(combine_edges=dict(weight="sum"))
    g = gt.giant_component(g)
    gt.summary(g)
    # ml = g.community_multilevel(weights='weight', return_levels=True)
    # fast = g.community_fastgreedy(weights='weight')
    # fast_com = fast.as_clustering(n=2)
    # walk = g.community_walktrap(weights='weight')
    # walk_com = walk.as_clustering(n=2)
    infor = g.community_infomap(edge_weights='weight', vertex_weights=None, trials=2)
    # eigen = g.community_leading_eigenvector(clusters=2, weights='weight')
    # label_pro = g.community_label_propagation(weights='weight', initial=eigen.membership)
    # betweet = g.community_edge_betweenness(weights='weight')
    # bet_com = betweet.as_clustering(n=2)
    g.vs['community'] = infor.membership
    g.write_graphml('com-'+file_path)

    return infor.subgraphs()
Beispiel #25
0
def tags_two_user_moduls():
    #load network from gephi output
    g = gt.Graph.Read_GraphML('communication-3-moduls.graphml')
    cluster0, cluster1, cluster2 = set(), set(), set()
    for v in g.vs:
        if v['Modularity Class'] == 0:
            cluster0.add(int(v['name']))
        elif v['Modularity Class'] == 1:
            cluster1.add(int(v['name']))
        elif v['Modularity Class'] == 2:
            cluster2.add(int(v['name']))
    g = gt.load_hashtag_coocurrent_network_undir('fed', 'ed_tag',
                                                 list(cluster0))
    gt.summary(g)
    filename = 'communication_fed_cluster0'
    g.write_graphml(filename + '_tag_undir.graphml')

    g = gt.load_hashtag_coocurrent_network_undir('fed', 'ed_tag',
                                                 list(cluster1))
    gt.summary(g)
    filename = 'communication__fed_cluster1'
    g.write_graphml(filename + '_tag_undir.graphml')

    g = gt.load_hashtag_coocurrent_network_undir('fed', 'ed_tag',
                                                 list(cluster2))
    gt.summary(g)
    filename = 'communication_fed_cluster2'
    g.write_graphml(filename + '_tag_undir.graphml')
Beispiel #26
0
def ed_follow_net():
    # construct ED and their followee network
    g = gt.load_network('fed', 'follownet')
    g.vs['deg'] = g.indegree()
    users = set(iot.get_values_one_field('fed', 'scom', 'id'))
    nodes = []
    for v in g.vs:
        if int(v['name']) in users:
            nodes.append(v)
        elif v['deg'] > 5:
            nodes.append(v)
        else:
            pass
    print 'Filtered nodes: %d' %len(nodes)
    g = g.subgraph(nodes)
    gt.summary(g)
    g.write_graphml('ed-friend'+'.graphml')

    # sbnet have extended all interactions posted by ED users
    edusers = set(g.vs['name'])
    for btype in ['retweet', 'reply', 'mention']:
        gb = gt.load_beh_network('fed', 'sbnet', btype)
        gt.summary(gb)
        nodes = []
        for v in gb.vs:
            if v['name'] in edusers:
                nodes.append(v)
        gb = gb.subgraph(nodes)
        gt.summary(gb)
        gb.write_graphml('ed-'+btype+'-follow.graphml')
Beispiel #27
0
def user_statis():
    groups = [
         ('ED', 'fed', 'com', {'liwc_anal.result.WC': {'$exists': True}, 'level': 1}),
         ('RD', 'random', 'scom', {'liwc_anal.result.WC': {'$exists': True}, 'level': 1}),
         ('YG', 'younger', 'scom', {'liwc_anal.result.WC': {'$exists': True}, 'level': 1})
    ]

    data = []
    for tag, dbname, comname, filter_values in groups:
        com = dbt.db_connect_col(dbname, comname)
        network1 = gt.Graph.Read_GraphML(tag.lower()+'-net.graphml')
        gt.summary(network1)
        network1_gc = gt.giant_component(network1)
        gt.summary(network1_gc)

        users_time = iot.get_values_one_field(dbname, comname, 'id_str', filter_values)
        try:
            v = network1.vs.find(name=str(uid))
        except ValueError:
            exist = False
        if exist:
            friends = set(network1.successors(str(uid)))
Beispiel #28
0
def tags_user_cluster(graph_file_path, filename):
    # put tweet of two cluster into two set
    g = gt.Graph.Read_GraphML(graph_file_path)
    # g_mention = gt.Graph.Read_GraphML('ed-communication'+'-hashtag-only-fed-cluster.graphml')
    gt.summary(g)
    # gt.summary(g_mention)

    # for i in range(2):
    #     g = [g_retweet, g_mention][i]
    cluster0, cluster1, cluster2 = set(), set(), set()
    for v in g.vs:
        if v['cluster'] == 0:
            cluster0.add(int(v['name']))
        elif v['cluster'] == 1:
            cluster1.add(int(v['name']))
        elif v['cluster'] == -1:
            cluster2.add(int(v['name']))
    print 'cluster size;', len(cluster0)
    g = gt.load_hashtag_coocurrent_network_undir('fed', 'ed_tag',
                                                 list(cluster0))
    gt.summary(g)
    # filename = ['ed_retweet', 'ed_communication'][i] + '_fed_cluster0'
    vs = g.vs(weight_gt=3, user_gt=3)
    g = g.subgraph(vs)
    gt.summary(g)
    g.write_graphml(filename + 'tag_undir_cluster0.graphml')

    print 'cluster size;', len(cluster1)
    g = gt.load_hashtag_coocurrent_network_undir('fed', 'ed_tag',
                                                 list(cluster1))
    gt.summary(g)
    # filename = ['ed_retweet', 'ed_communication'][i] + '_fed_cluster1'
    vs = g.vs(weight_gt=3, user_gt=3)
    g = g.subgraph(vs)
    gt.summary(g)
    g.write_graphml(filename + 'tag_undir_cluster1.graphml')
Beispiel #29
0
def behavior_community(dbname, colname, label):
    # targed_list = set()
    # db = dbt.db_connect_no_auth('fed')
    # poi = db['com']
    # for user in poi.find({}, ['id']):
    #     targed_list.add(user['id'])

    # bg = gt.load_beh_network(dbname, colname)
    # gt.summary(bg)
    # pickle.dump(bg, open('data/'+label+'-bg.pick', 'w'))
    bg = pickle.load(open('data/' + label + '-bg.pick', 'r'))

    # bgc = gt.giant_component(bg, 'WEAK')
    # gt.summary(bgc)
    # pickle.dump(bgc, open('data/'+label+'-bgc.pick', 'w'))

    # bcoms = gt.fast_community(bg)
    # pickle.dump(bcoms, open('data/'+label+'-bcom.pick', 'w'))
    bcoms = pickle.load(open('data/' + label + '-bcom.pick', 'r'))
    # gt.plot(bcoms, 'commu_comms_den.pdf', bbox=(1200, 900))
    bclus = bcoms.as_clustering()
    gt.summary(bclus)
    print bclus.recalculate_modularity()
    community_topic(bg, bclus, dbname, 'scom', 'stimeline')
Beispiel #30
0
def ed_follow_community(file_path):
    # inspect keywords of user profiles in different communities
    g = gt.Graph.Read_GraphML(file_path)
    gt.summary(g)
    g = g.as_undirected(combine_edges=dict(weight="sum"))
    components = g.clusters()
    g = components.giant()
    gt.summary(g)

    com = dbt.db_connect_col('fed', 'com')
    ml = g.community_fastgreedy(weights='weight').as_clustering()
    # ml = g.community_multilevel(weights='weight')
    common_words = []
    fdist_all = FreqDist()
    for cluster in ml:
        print len(cluster)
        fdist = FreqDist()
        for uid in cluster:
            user = com.find_one({'id': int(g.vs[uid]['name'])}, ['description'])
            profile = user['description']
            if profile:
                # text = ' '.join(pc.tokenizer_stoprm(profile))
                tokens = pc.tokenizer_stoprm(profile)
                for word in tokens:
                    fdist[word] += 1
                    fdist_all[word] += 1
        common_words.append(fdist)
    for fd in common_words:
        w_tfidf = []
        # print fd.most_common(20)
        for (word, freq) in fd.most_common(20):
            allfreq = fdist_all[word]
            # print word, freq, allfreq
            w_tfidf.append((word, float(freq)/allfreq))
        sortedlist = sorted(w_tfidf, key=lambda x: x[1], reverse=True)
        print sortedlist
Beispiel #31
0
def behavior_community(dbname, colname, label):
    # targed_list = set()
    # db = dbt.db_connect_no_auth('fed')
    # poi = db['com']
    # for user in poi.find({}, ['id']):
    #     targed_list.add(user['id'])

    # bg = gt.load_beh_network(dbname, colname)
    # gt.summary(bg)
    # pickle.dump(bg, open('data/'+label+'-bg.pick', 'w'))
    bg = pickle.load(open('data/'+label+'-bg.pick', 'r'))

    # bgc = gt.giant_component(bg, 'WEAK')
    # gt.summary(bgc)
    # pickle.dump(bgc, open('data/'+label+'-bgc.pick', 'w'))

    # bcoms = gt.fast_community(bg)
    # pickle.dump(bcoms, open('data/'+label+'-bcom.pick', 'w'))
    bcoms = pickle.load(open('data/'+label+'-bcom.pick', 'r'))
    # gt.plot(bcoms, 'commu_comms_den.pdf', bbox=(1200, 900))
    bclus = bcoms.as_clustering()
    gt.summary(bclus)
    print bclus.recalculate_modularity()
    community_topic(bg, bclus, dbname, 'scom', 'stimeline')
Beispiel #32
0
def community(g=None):
    '''
    Detect communities in the co-occurrence network of hashtag
    Use multilevel to detect communities
    Only select communities whose sizes are larger than a threshold
    :param g:
    :return:
    hash_com: {hashtag: community_index}
    com_size: {community_index: community_size}
    '''
    gt.summary(g)
    vs = g.vs(weight_gt=100, user_gt=10)
    g = g.subgraph(vs)
    g = g.subgraph_edges(g.es.select(rWeight_gt=0, rWeight_lt=float('Inf')))
    gt.summary(g)
    gc = gt.giant_component(g)
    gt.summary(gc)
    # g.write_graphml('fed_tag_undir_over3.graphml')
    # com = g.community_multilevel(weights='rWeight', return_levels=False)
    com = g.community_infomap(edge_weights='rWeight', vertex_weights=None)
    # com = louvain.find_partition(gc, method='Significance', weight=None)
    comclus = com.subgraphs()
    print 'Community stats: #communities, modularity', len(
        comclus), com.modularity
    index = 0
    nonsingle = 0
    hash_com = {}
    com_size = {}
    for comclu in comclus:
        print '---------- Community ', index, '-----------------'
        if comclu.vcount() > 1:
            nonsingle += 1
        tag_weight = {}
        for v in comclu.vs:
            if v['weight'] > 5:
                hash_com[v['name']] = index
            tag_weight[v['name']] = v['weight']
            count = com_size.get(index, 0)
            com_size[index] = v['weight'] + count
        sort_list = list(sorted(tag_weight, key=tag_weight.get, reverse=True))
        for key in sort_list[:min(len(sort_list), len(sort_list))]:
            print key, tag_weight[key]
        print '-------------Community size: ', com_size[
            index], '---------------------'
        print
        index += 1
    # print len(hash_com)
    # print len(set(hash_com.values()))
    # print set(hash_com.values())
    print '------------------all size:', sum(
        com_size.values()), '---------------------'
    print '------------------non single clusters:', nonsingle, '---------------------'

    return hash_com, com_size
Beispiel #33
0
def tfidf_tag_cluster(btype='retweet'):
    # Calculate the TFIDF of tags in two clusters
    cluster0 = gt.Graph.Read_GraphML('ed_' + btype +
                                     '_fed_cluster0_tag_undir.graphml')
    cluster1 = gt.Graph.Read_GraphML('ed_' + btype +
                                     '_fed_cluster1_tag_undir.graphml')

    gt.summary(cluster0)
    vs = cluster0.vs(weight_gt=3, user_gt=3)
    cluster0 = cluster0.subgraph(vs)
    cluster0 = gt.giant_component(cluster0)
    gt.summary(cluster0)

    gt.summary(cluster1)
    vs = cluster1.vs(weight_gt=3, user_gt=3)
    cluster1 = cluster1.subgraph(vs)
    cluster1 = gt.giant_component(cluster1)
    gt.summary(cluster1)

    for v in cluster0.vs:
        exist = True
        count_ov = 0.0
        try:
            ov = cluster1.vs.find(name=v['name'])
        except ValueError:
            exist = False
        if exist:
            count_ov = ov['weight']
        v['tfidf'] = float(v['weight']) / (v['weight'] + count_ov)
    for v in cluster1.vs:
        exist = True
        count_ov = 0.0
        try:
            ov = cluster0.vs.find(name=v['name'])
        except ValueError:
            exist = False
        if exist:
            count_ov = ov['weight']
        v['tfidf'] = float(v['weight']) / (v['weight'] + count_ov)
    cluster0.write_graphml('ed_' + btype +
                           '_fed_cluster0_tfidf_tag_undir.graphml')
    cluster1.write_graphml('ed_' + btype +
                           '_fed_cluster1_tfidf_tag_undir.graphml')
Beispiel #34
0
def remove_spam(btype):
    # remove nodes that have too much outdegree but few indegree
    g = gt.Graph.Read_GraphML('ed-'+btype+'-hashtag.graphml')
    gt.summary(g)
    g.vs['ratio'] = (np.array(g.outdegree())+1)/(np.array(g.indegree())+1)
    gt.summary(g)
    maxv = np.percentile(g.vs['ratio'], 97.5)
    print maxv
    nodes = g.vs.select(ratio_lt=maxv)
    g = g.subgraph(nodes)
    gt.summary(g)
    g.write_graphml('ed-'+btype+'-hashtag-rmspam.graphml')
Beispiel #35
0
def friend_dis(dbname, comname, netname, tagets):
    #he returned list from Graph.neighbors always includes the input vertex,
    # while those from predecessors and successors don’t.
    # So the size of returned list from neighbors is always larger
    # 1 than those from other two methods.
    db = dbt.db_connect_no_auth(dbname)
    com = db[comname]
    g = gt.load_network(dbname, netname)
    gt.add_attributes(g, ['followers_count', 'friends_count'], dbname, comname, ['followers_count', 'friends_count'])
    gt.summary(g)
    for user in com.find({}, ['id', 'net_anal']):
        uid = user['id']
        values = user.get('net_anal', {'mined': True})
        node_exist = True
        try:
           v = g.vs.find(name=str(uid))
        except ValueError:
            node_exist = False
        if node_exist:
            # followers = g.neighborhood(str(uid), mode='out')
            # followings = g.neighborhood(str(uid), mode='in')
            followers = g.successors(str(uid))
            followings = g.predecessors(str(uid))
            # print followers
            # print followings
            follower_set = set(int(name) for name in g.vs[followers]['name'])
            following_set = set(int(name) for name in g.vs[followings]['name'])
            ed_follower = len(tagets & follower_set)
            ed_following = len(tagets & following_set)
            # friend_set = follower_set | following_set
            # print follower_set
            # print following_set
            follower = v['followers_count']
            if follower == 0:
                follower = 1
            following = v['friends_count']
            if following == 0:
                following = 1
            # friend = len(friend_set)
            # if friend == 0:
            #     friend = 1
            # ed_friend = len(tagets & friend_set)

            ed_follower_p = float(ed_follower)/follower
            ed_following_p = float(ed_following)/following
            # ed_friend_p = float(ed_friend)/friend
            net_sta = {}
            # net_sta['follower_no'] = follower
            # net_sta['following_no'] = following
            # net_sta['friend_no'] = friend
            net_sta['ed_follower_no'] = ed_follower
            net_sta['ed_following_no'] = ed_following
            # net_sta['ed_friend_no'] = ed_friend
            net_sta['ed_follower_p'] = ed_follower_p
            net_sta['ed_following_p'] = ed_following_p
            # net_sta['ed_friend_p'] = ed_friend_p
            net_sta['non_ed_follower_p'] = 1 - ed_follower_p
            net_sta['non_ed_following_p'] = 1 - ed_following_p
            # net_sta['non_ed_friend_p'] = 1 - ed_friend_p
            values['ed_proportion'] = net_sta
            com.update_one({'id': uid}, {'$set': {'net_anal': values}}, upsert=True)
Beispiel #36
0
def variable_change(dbname, comname, oldtimename, newtimename):
    db = dbt.db_connect_no_auth(dbname)
    com = db[comname]
    oldtime = db[oldtimename]
    newtime = db[newtimename]

    oldfollower, newfollower, oldfollowee, newfollowee, users, liwcs, olddate, newdate, \
    oldcw, newcw, oldgw, newgw, oldage, newage, newcbmi, oldcbmi, newgbmi, oldgbmi = \
        [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []
    # filter = {'liwc_anal.result.i':{'$exists':True}, 'new_liwc_anal.result.i':{'$exists':True}}
    filter = {'$or': [{'liwc_anal.result.i':{'$exists':True}}, {'new_liwc_anal.result.i':{'$exists':True}}]}

    # full analysis variables:
    # meta_keys = ['WC', 'WPS', 'Sixltr', 'Dic']
    # category_keys = ['funct', 'pronoun', 'ppron', 'i', 'we', 'you', 'shehe',
    #     'they', 'ipron', 'article', 'verb', 'auxverb', 'past', 'present', 'future',
    #     'adverb', 'preps', 'conj', 'negate', 'quant', 'number', 'swear', 'social',
    #     'family', 'friend', 'humans', 'affect', 'posemo', 'negemo', 'anx', 'anger',
    #     'sad', 'cogmech', 'insight', 'cause', 'discrep', 'tentat', 'certain',
    #     'inhib', 'incl', 'excl', 'percept', 'see', 'hear', 'feel', 'bio', 'body',
    #     'health', 'sexual', 'ingest', 'relativ', 'motion', 'space', 'time', 'work',
    #     'achieve', 'leisure', 'home', 'money', 'relig', 'death', 'assent', 'nonfl',
    #     'filler']
    # puncuation_keys = [
    #     'Period', 'Comma', 'Colon', 'SemiC', 'QMark', 'Exclam',
    #     'Dash', 'Quote', 'Apostro', 'Parenth', 'OtherP', 'AllPct']
    # allcates = meta_keys + category_keys + puncuation_keys
    allcates = ['posemo', 'negemo', 'anx', 'anger', 'sad']

    for user in com.find(filter):
        users.append(user['id'])
        # print user['id']
        """LIWC variables"""
        oldliwc = user['liwc_anal']['result']
        newliwc = user['new_liwc_anal']['result']
        if newliwc is None:
            newliwc = {}
        if oldliwc == None:
            oldliwc = {}
        ols = [oldliwc.get(key, None) for key in allcates]
        nls = [newliwc.get(key, None) for key in allcates]
        liwcs.append(ols+nls)

        '''Follower and Followee variables'''
        # oldtweet = time.find({'user.id': user['id']}, no_cursor_timeout=True).sort([('id', 1)]).limit(1)[0]
        oldtweets = oldtime.find({'user.id': user['id']}, no_cursor_timeout=True).sort([('id', -1)]).limit(1)
        if oldtweets.count() == 0:
            oldtweets = newtime.find({'user.id': user['id']}, no_cursor_timeout=True).sort([('id', 1)]).limit(1)
        oldtweet = oldtweets[0]
        oldprofile = oldtweet['user']

        newtweets = newtime.find({'user.id': user['id']}, no_cursor_timeout=True).sort([('id', -1)]).limit(1)
        if newtweets.count() == 0:
            newtweet = oldtweet
            newprofile = oldprofile
        else:
            newtweet = newtweets[0]
            newprofile = newtweet['user']
        olddate.append(oldtweet['created_at'])
        newdate.append(newtweet['created_at'])

        newbio = des_miner.process_text(newprofile['description'])
        oldbio = des_miner.process_text(oldprofile['description'])

        oldcw.append(oldbio.get('cw', {}).get('value', None))
        newcw.append(newbio.get('cw', {}).get('value', None))
        oldgw.append(oldbio.get('gw', {}).get('value', None))
        newgw.append(newbio.get('gw', {}).get('value', None))
        oldage.append(oldbio.get('a', {}).get('value', None))
        newage.append(newbio.get('a', {}).get('value', None))
        oldcbmi.append(oldbio.get('cbmi', {}).get('value', None))
        newcbmi.append(newbio.get('cbmi', {}).get('value', None))
        oldgbmi.append(oldbio.get('gbmi', {}).get('value', None))
        newgbmi.append(newbio.get('gbmi', {}).get('value', None))

        oldfollower.append(oldprofile['followers_count'])
        newfollower.append(newprofile['followers_count'])
        oldfollowee.append(oldprofile['friends_count'])
        newfollowee.append(newprofile['friends_count'])


    """Out put Profile variables"""
    print len(liwcs)
    newliwccol = ['Old'+key for key in allcates]
    oldliwccol = ['New'+key for key in allcates]
    df = pd.DataFrame(data=liwcs, columns=newliwccol+oldliwccol)
    df['UserID'] = users
    df['OldFollower'] = oldfollower
    df['NewFollower'] = newfollower
    df['OldFollowee'] = oldfollowee
    df['NewFollowee'] = newfollowee
    df['OldDate'] = olddate
    df['NewDate'] = newdate
    df['OldCW'] = oldcw
    df['NewCW'] = newcw
    df['OldGW'] = oldgw
    df['NewGW'] = newgw
    df['OldAge'] = oldage
    df['NewAge'] = newage
    df['OldCBMI'] = oldcbmi
    df['NewCBMI'] = newcbmi
    df['OldGBMI'] = oldgbmi
    df['NewGBMI'] = newgbmi

    g1 = gt.load_network_subset(dbname, 'net', {'scraped_times': 2})
    g2 = gt.load_network_subset(dbname, 'net', {'scraped_times': 130})
    gt.summary(g1)
    gt.summary(g2)
    oldindegree_map = dict(zip(g1.vs['name'], g1.indegree()))
    oldoutdegree_map = dict(zip(g1.vs['name'], g1.outdegree()))
    oldpagerank_map = dict(zip(g1.vs['name'], g1.pagerank()))
    oldbetweenness_map = dict(zip(g1.vs['name'], g1.betweenness()))

    newindegree_map = dict(zip(g2.vs['name'], g2.indegree()))
    newoutdegree_map = dict(zip(g2.vs['name'], g2.outdegree()))
    newpagerank_map = dict(zip(g2.vs['name'], g2.pagerank()))
    newbetweenness_map = dict(zip(g2.vs['name'], g2.betweenness()))

    df['OldIndegree'] = [oldindegree_map.get(str(uid), 0) for uid in users]
    df['NewIndegree'] = [newindegree_map.get(str(uid), 0) for uid in users]
    df['OldOutdegree'] = [oldoutdegree_map.get(str(uid), 0) for uid in users]
    df['NewOutdegree'] = [newoutdegree_map.get(str(uid), 0) for uid in users]
    df['OldPagerank'] = [oldpagerank_map.get(str(uid), 0.0) for uid in users]
    df['NewPagerank'] = [newpagerank_map.get(str(uid), 0.0) for uid in users]
    df['OldBetweenness'] = [oldbetweenness_map.get(str(uid), 0.0) for uid in users]
    df['NewBetweenness'] = [newbetweenness_map.get(str(uid), 0.0) for uid in users]
    df.to_csv(dbname+'.csv')