Beispiel #1
0
def network_change(dbname, comname, netname):
    # filter = {'liwc_anal.result.i':{'$exists':True}, 'new_liwc_anal.result.i':{'$exists':True}}
    # users = iot.get_values_one_field(dbname, comname, 'id', filter)
    # g1 = gt.load_network_subset(users, dbname, netname, {'scraped_times': 2})
    # g2 = gt.load_network_subset(users, dbname, netname, {'scraped_times': 131})
    # pickle.dump(g1, open('data/g1.pick', 'w'))
    # pickle.dump(g2, open('data/g2.pick', 'w'))
    g1 = pickle.load(open('data/g1.pick', 'r'))
    g2 = pickle.load(open('data/g2.pick', 'r'))

    # g1 = gt.load_network_subset(dbname, 'net', {'scraped_times': 2})
    # g2 = gt.load_network_subset(dbname, 'net', {'scraped_times': 131})
    gt.summary(g1)
    gt.summary(g1)
    gt.net_stat(g1)
    gt.net_stat(g2)
    # pt.pdf_plot_one_data(g1.indegree(), 'indegree', linear_bins=False, fit_start=1, fit_end=100)
    pt.plot_pdf_mul_data(
        [np.array(g1.indegree()) + 1,
         np.array(g2.indegree()) + 1],
        'indegree', ['b', 'r'], ['o', '^'], ['G1', 'G2'],
        linear_bins=False,
        central=False,
        fit=True,
        savefile='indegree.pdf')
Beispiel #2
0
def statis():
    for i in xrange(1,5):
        path = 'data/Classroom_graphmls/'+'classroom_graph'+str(i)+'.graphml'
        # path ='karate.GraphML'
        # print path
        g = Graph.Read_GraphML(path)
        gt.net_stat(g)
Beispiel #3
0
def network_analysis():
    # output network among depression users
    # user1 = iot.get_values_one_field('depression', 'users1', 'id')
    # user2 = iot.get_values_one_field('depression', 'users2', 'id')
    # print len(user1), len(user2)
    # alluser = user1 + user2
    alluser = iot.get_values_one_field('depression', 'depressive', 'id')
    follow_net = gt.load_network_subset('depression', 'net', {
        'user': {
            '$in': alluser
        },
        'follower': {
            '$in': alluser
        }
    })
    gt.net_stat(follow_net)
    follow_net.write_graphml('data/follow_net.graphml')

    for beh in ['retweet', 'communication']:
        print beh
        bnetwork = gt.load_beh_network_subset(userlist=alluser,
                                              db_name='depression',
                                              collection='bnet',
                                              btype=beh)
        gt.net_stat(bnetwork)
        bnetwork.write_graphml('data/' + beh + '_net.graphml')
Beispiel #4
0
def user_cluster_hashtag():
    '''
    Cluster users based on the profiles of hashtag preference
    :return:
    '''
    from sklearn.cluster import KMeans
    from sklearn.metrics import silhouette_score
    user_hash_profile = pickle.load(open('data/user-hash-profile.pick', 'r'))
    X = np.array(user_hash_profile.values())
    print X.shape

    '''Select the best K for K-means'''
    # range_n_clusters = range(2, 21)
    # values = []
    # for n_clusters in range_n_clusters:
    #     clusterer = KMeans(n_clusters=n_clusters, random_state=10)
    #     cluster_labels = clusterer.fit_predict(X)
    #     silhouette_avg = silhouette_score(X, cluster_labels)
    #     print("For n_clusters =", n_clusters, "The average silhouette_score is :", silhouette_avg)
    #     values.append(silhouette_avg)
    # print values
    # print range_n_clusters

    clusterer = KMeans(n_clusters=2, random_state=10)
    cluster_labels = clusterer.fit_predict(X)
    dictionary = dict(zip(user_hash_profile.keys(), cluster_labels))

    print 'Follow network'
    net = gt.load_network('fed', 'snet')
    gt.net_stat(net)
    cluster_assort(dictionary, net)
Beispiel #5
0
def community_vis(filename, ctype):
    '''
    Load Network and output js to vis.js
    :param filename:
    :return:
    '''
    # load network
    # g = pickle.load(open('data/'+filename+'_tag_undir.pick', 'r'))
    # gt.net_stat(g)
    # # Filter network
    # nodes = g.vs.select(weight_gt=3)
    # print 'Filtered nodes: %d' %len(nodes)
    # g = g.subgraph(nodes)
    # nodes = g.vs.select(user_gt=3)
    # print 'Filtered nodes: %d' %len(nodes)
    # g = g.subgraph(nodes)
    g = gt.Graph.Read_GraphML(filename+'_tag_undir.graphml')

    gt.net_stat(g)
    # g = gt.giant_component(g)
    # Community detection
    if ctype == 'ml':
        com = g.community_multilevel(weights='weight', return_levels=False)
    else:
        com = g.community_infomap(edge_weights='weight', vertex_weights='weight')
    print com
    g.vs['group'] = com.membership
    # print g.vs['group']
    # gt.summary(g)
    mixing_para(g)

    # edges = g.es.select(weight_gt=50)
    # print 'Filtered edges: %d' %len(edges)
    # g = g.subgraph_edges(edges)
    # gt.net_stat(g)

    Coo={}
    for x in g.vs['group']:
        Coo[x]=(rand.randint(-600, 600), rand.randint(-600, 600))

    with open('data/' + ctype + '_' +filename+'_tag_undir.js', 'w') as fw:
        fw.write('var nodes = [\n')
        for idv, v in enumerate(g.vs):
            fw.write('{id: ' + str(idv+1) + ', '+
                     'label: \'' + g.vs[idv]['name'] +'\', ' +
                     'value: ' + str(g.vs[idv]['weight']) + ', ' +
                     'title: \' Tags: ' + g.vs[idv]['name'] + '<br> Occurrence: ' + str(g.vs[idv]['weight']) +
                     '<br> Group: ' + str(g.vs[idv]['group']) + '\', ' +
                     'x: ' + str(Coo[g.vs[idv]['group']][0]+rand.randint(0, 300)) + ', ' +
                     'y: ' + str(Coo[g.vs[idv]['group']][1]+rand.randint(0, 300)) + ', ' +
                     'group: ' + str(g.vs[idv]['group']) + '}, \n')
        fw.write('];\n var edges = [\n')
        for ide, e in enumerate(g.es):
            fw.write('{from: ' + str(e.source+1) + ', ' +
                     'to: ' + str(e.target+1) + ', ' +
                     'title: \' Tags: ' + g.vs[e.source]['name'] + ' ' + g.vs[e.target]['name'] + '<br> Co-occurrence: ' + str(g.es[ide]['weight']) + '\', ' +
                     'value: ' + str(g.es[ide]['weight']) +
                     '},\n')
        fw.write('];\n')
Beispiel #6
0
def network_stats(dbname, com, fnet, bnet):
    fields = iot.read_fields()
    # print ('Feature, #Nodes, #Edges, %Nodes, %Edges, D_assort, F_assort, F_assort, Mean, STD, z_sore, p_value')
    print(
        'Network_Feature \t #Nodes \t #Edges \t X_Min \t X_Max \t X_P2.5 \t X_P97.5 \t Y_Min \t Y_Max \t Y_P2.5 \t Y_P97.5 \t Tau_coef \t p_value'
    )
    print 'Following'
    fnetwork = gt.load_network(dbname, fnet)
    '''Out put file for Gephi'''
    # fnetwork.write_dot('friendship.DOT')

    gt.net_stat(fnetwork)
    # outputs = feature_assort_friend(fnetwork, dbname, com, fields, directed=True)
    outputs = rank_feature(fnetwork, dbname, com, fields, directed=True)
Beispiel #7
0
def pro_ed_rec_network(dbname, comname, netname):
    g = gt.load_network(dbname, netname)
    # g = gt.load_beh_network(dbname, 'sbnet', 'mention')
    rec_users = rec_user(dbname, comname)
    pro_users = proed_users(dbname, comname)

    print len(rec_users)
    print len(pro_users)
    g.vs['set'] = 0
    for user in rec_users:
        exist = True
        try:
            v = g.vs.find(name=str(user))
        except ValueError:
            exist = False
        if exist:
            v['set'] += 1 # +1 Pro-rec
    for user in pro_users:
        exist = True
        try:
            v = g.vs.find(name=str(user))
        except ValueError:
            exist = False
        if exist:
            v['set'] -= 1 # -1 Pro-ED
    vs = g.vs(set_ne=0)
    sg = g.subgraph(vs)
    gt.net_stat(sg)
    # sgc = gt.giant_component(sg)
    # gt.net_stat(sgc)

    '''Test signifi'''
    raw_assort = sg.assortativity('set', 'set', directed=True)
    raw_values = np.array(sg.vs['set'])
    ass_list = list()
    for i in xrange(3000):
        np.random.shuffle(raw_values)
        sg.vs["set"] = raw_values
        ass_list.append(sg.assortativity('set', 'set', directed=True))
    ass_list = np.array(ass_list)
    amean, astd = np.mean(ass_list), np.std(ass_list)

    absobserved = abs(raw_assort)
    pval = (np.sum(ass_list >= absobserved) +
            np.sum(ass_list <= -absobserved))/float(len(ass_list))
    zscore = (raw_assort-amean)/astd
    print '%.3f, %.3f, %.3f, %.3f, %.3f' %(raw_assort, amean, astd, zscore, pval)
    # print str(raw_assort) + ',' + str(amean) + ',' + str(astd) + ',' + str(zscore) + ',' + str(pval)
    sg.write_graphml('pro-ed-rec-mention.graphml')
Beispiel #8
0
def pro_ed_rec_network(dbname, comname, netname):
    g = gt.load_network(dbname, netname)
    # g = gt.load_beh_network(dbname, 'sbnet', 'mention')
    rec_users = rec_user(dbname, comname)
    pro_users = proed_users(dbname, comname)

    print len(rec_users)
    print len(pro_users)
    g.vs['set'] = 0
    for user in rec_users:
        exist = True
        try:
            v = g.vs.find(name=str(user))
        except ValueError:
            exist = False
        if exist:
            v['set'] += 1 # +1 Pro-rec
    for user in pro_users:
        exist = True
        try:
            v = g.vs.find(name=str(user))
        except ValueError:
            exist = False
        if exist:
            v['set'] -= 1 # -1 Pro-ED
    vs = g.vs(set_ne=0)
    sg = g.subgraph(vs)
    gt.net_stat(sg)
    # sgc = gt.giant_component(sg)
    # gt.net_stat(sgc)

    '''Test signifi'''
    raw_assort = sg.assortativity('set', 'set', directed=True)
    raw_values = np.array(sg.vs['set'])
    ass_list = list()
    for i in xrange(3000):
        np.random.shuffle(raw_values)
        sg.vs["set"] = raw_values
        ass_list.append(sg.assortativity('set', 'set', directed=True))
    ass_list = np.array(ass_list)
    amean, astd = np.mean(ass_list), np.std(ass_list)

    absobserved = abs(raw_assort)
    pval = (np.sum(ass_list >= absobserved) +
            np.sum(ass_list <= -absobserved))/float(len(ass_list))
    zscore = (raw_assort-amean)/astd
    print '%.3f, %.3f, %.3f, %.3f, %.3f' %(raw_assort, amean, astd, zscore, pval)
    # print str(raw_assort) + ',' + str(amean) + ',' + str(astd) + ',' + str(zscore) + ',' + str(pval)
    sg.write_graphml('pro-ed-rec-mention.graphml')
Beispiel #9
0
def network_stats(dbname, com, fnet, bnet):
    fields = iot.read_fields()
    # print ('Feature, #Nodes, #Edges, %Nodes, %Edges, D_assort, F_assort, F_assort, Mean, STD, z_sore, p_value')
    print (
        "Network_Feature \t #Nodes \t #Edges \t X_Min \t X_Max \t X_P2.5 \t X_P97.5 \t Y_Min \t Y_Max \t Y_P2.5 \t Y_P97.5 \t Tau_coef \t p_value"
    )
    print "Following"
    fnetwork = gt.load_network(dbname, fnet)

    """Out put file for Gephi"""
    # fnetwork.write_dot('friendship.DOT')

    gt.net_stat(fnetwork)
    # outputs = feature_assort_friend(fnetwork, dbname, com, fields, directed=True)
    outputs = rank_feature(fnetwork, dbname, com, fields, directed=True)
Beispiel #10
0
def tag_similarity_group_conflit_all():
    # computer similarity of tags between whole group and
    from scipy import spatial
    # from sklearn.metrics.pairwise import cosine_similarity
    gall = gt.Graph.Read_GraphML('dropout-tag-emotion3-rank-all.graphml')
    gt.net_stat(gall)
    nodes = gall.vs.select(weight_gt=50)
    print 'Filtered nodes: %d' %len(nodes)
    gall = gall.subgraph(nodes)
    nodes = gall.vs.select(user_gt=50)
    print 'Filtered nodes: %d' %len(nodes)
    gall = gall.subgraph(nodes)
    gt.net_stat(gall)
    voc = dict(zip(gall.vs['name'], gall.vs['user']))
    gs = []
Beispiel #11
0
def compare_direct_undir():
    from sklearn import metrics
    g = gt.Graph.Read_GraphML('ed_tag.graphml')
    gt.net_stat(g)
    gu = gt.Graph.Read_GraphML('ed_tag_undir.graphml')
    gt.net_stat(gu)
    com = g.community_infomap(edge_weights='weight', vertex_weights='weight')
    comu1 = gu.community_infomap(edge_weights='weight', vertex_weights='weight')
    comu2 = gu.community_infomap(edge_weights='weight', vertex_weights='weight')
    mem = com.membership
    memu1 = comu1.membership
    memu2 = comu2.membership
    print metrics.adjusted_rand_score(mem, memu1)
    print metrics.normalized_mutual_info_score(mem, memu1)
    print metrics.adjusted_rand_score(memu2, memu1)
    print metrics.normalized_mutual_info_score(memu2, memu1)
Beispiel #12
0
def friend_community():
    net = gt.Graph.Read_GraphML('ed_weighted_follow.graphml')
    # net = gt.load_network('fed', 'snet')
    gt.net_stat(net)
    com = net.community_infomap(edge_weights='weight')
    comclus = com.subgraphs()
    print len(comclus), com.modularity
    com = dbt.db_connect_col('fed', 'scom')
    index = 0
    hash_com = {}
    for comclu in comclus:
        print '============================================================'
        # if comclu.vcount() > 10:
        for v in comclu.vs:
            user = com.find_one({'id': int(v['name'])})
            print v['name'], user['id'], user['screen_name'], ' '.join(user['description'].split()).encode('utf-8')
            hash_com[v['name']] = index
        index += 1
Beispiel #13
0
def friend_community():
    net = gt.Graph.Read_GraphML('ed_weighted_follow.graphml')
    # net = gt.load_network('fed', 'snet')
    gt.net_stat(net)
    com = net.community_infomap(edge_weights='weight')
    comclus = com.subgraphs()
    print len(comclus), com.modularity
    com = dbt.db_connect_col('fed', 'scom')
    index = 0
    hash_com = {}
    for comclu in comclus:
        print '============================================================'
        # if comclu.vcount() > 10:
        for v in comclu.vs:
            user = com.find_one({'id': int(v['name'])})
            print v['name'], user['id'], user['screen_name'], ' '.join(
                user['description'].split()).encode('utf-8')
            hash_com[v['name']] = index
        index += 1
Beispiel #14
0
def compare_direct_undir():
    # Compare difference between directed and undirected networks
    from sklearn import metrics
    g = gt.Graph.Read_GraphML('ed_tag.graphml')
    gt.net_stat(g)
    gu = gt.Graph.Read_GraphML('ed_tag_undir.graphml')
    gt.net_stat(gu)
    com = g.community_infomap(edge_weights='weight', vertex_weights='weight')
    comu1 = gu.community_infomap(edge_weights='weight',
                                 vertex_weights='weight')
    comu2 = gu.community_infomap(edge_weights='weight',
                                 vertex_weights='weight')
    mem = com.membership
    memu1 = comu1.membership
    memu2 = comu2.membership
    print metrics.adjusted_rand_score(mem, memu1)
    print metrics.normalized_mutual_info_score(mem, memu1)
    print metrics.adjusted_rand_score(memu2, memu1)
    print metrics.normalized_mutual_info_score(memu2, memu1)
Beispiel #15
0
def network_change(dbname, comname, netname):
    # filter = {'liwc_anal.result.i':{'$exists':True}, 'new_liwc_anal.result.i':{'$exists':True}}
    # users = iot.get_values_one_field(dbname, comname, 'id', filter)
    # g1 = gt.load_network_subset(users, dbname, netname, {'scraped_times': 2})
    # g2 = gt.load_network_subset(users, dbname, netname, {'scraped_times': 131})
    # pickle.dump(g1, open('data/g1.pick', 'w'))
    # pickle.dump(g2, open('data/g2.pick', 'w'))
    g1 = pickle.load(open('data/g1.pick', 'r'))
    g2 = pickle.load(open('data/g2.pick', 'r'))

    # g1 = gt.load_network_subset(dbname, 'net', {'scraped_times': 2})
    # g2 = gt.load_network_subset(dbname, 'net', {'scraped_times': 131})
    gt.summary(g1)
    gt.summary(g1)
    gt.net_stat(g1)
    gt.net_stat(g2)
    # pt.pdf_plot_one_data(g1.indegree(), 'indegree', linear_bins=False, fit_start=1, fit_end=100)
    pt.plot_pdf_mul_data([np.array(g1.indegree())+1, np.array(g2.indegree())+1],
                           'indegree', ['b', 'r'], ['o', '^'], ['G1', 'G2'],
                               linear_bins=False, central=False, fit=True, savefile='indegree.pdf')
Beispiel #16
0
def tag_record(dbname, colname, filename):
    # ed_users = iot.get_values_one_field(dbname, 'scom', 'id')
    # print len(ed_users)
    # g = gt.load_hashtag_coocurrent_network_undir(dbname, colname, uids=ed_users)
    # pickle.dump(g, open('data/'+filename+'_tag_undir.pick', 'w'))
    g = pickle.load(open('data/'+filename+'_tag_undir.pick', 'r'))
    gt.net_stat(g)
    # g.write_graphml(filename+'_tag.graphml')
    nodes = g.vs.select(weight_gt=3)
    print 'Filtered nodes: %d' %len(nodes)
    g = g.subgraph(nodes)
    nodes = g.vs.select(user_gt=3)
    print 'Filtered nodes: %d' %len(nodes)
    g = g.subgraph(nodes)
    # gt.net_stat(g)
    # edges = g.es.select(weight_gt=3)
    # print 'Filtered edges: %d' %len(edges)
    # g = g.subgraph_edges(edges)
    # edges = g.es.select(weight_gt=1)
    # print len(edges)
    gt.net_stat(g)
    g.write_graphml(filename+'_tag_undir.graphml')
    # plot_graph(g, 'ed-hashtag')
    return g
Beispiel #17
0
def friendship_community_vis(dbname, colname, filename, ctype):
    '''Out graph for vis.js visualization'''
    ed_users = iot.get_values_one_field(dbname, 'scom', 'id')
    # fed_users = iot.get_values_one_field(dbname, 'com', 'id')
    dbcom = dbt.db_connect_col(dbname, 'com')
    fg = gt.load_network(dbname, colname)
    # fg = gt.load_beh_network_subset(ed_users, dbname, colname, 'retweet')
    gt.net_stat(fg)
    # fg = fg.as_undirected(mode="mutual")
    # gt.net_stat(fg)

    fg = gt.giant_component(fg, 'WEAK')
    gt.net_stat(fg)

    if ctype == 'ml':
        com = fg.community_multilevel(weights='weight', return_levels=False)
    elif ctype == 'lp':
        fgu = fg.as_undirected(combine_edges=sum)
        init = fgu.community_leading_eigenvector(clusters=2, weights='weight')
        print init.membership
        com = fg.community_label_propagation(weights='weight',
                                             initial=init.membership)
        print com.membership
    else:
        com = fg.community_infomap(edge_weights='weight', trials=2)
    fg.vs['group'] = com.membership

    # edges = fg.es.select(weight_gt=3)
    # print 'Filtered edges: %d' %len(edges)
    # fg = fg.subgraph_edges(edges)
    # gt.net_stat(fg)

    # fg.vs['degree'] = fg.degree(mode="all")
    # nodes = fg.vs.select(degree_gt=10)
    # fg = fg.subgraph(nodes)
    # gt.net_stat(fg)

    Coo = {}
    for x in fg.vs['group']:
        Coo[x] = (rand.randint(-1000, 1000), rand.randint(-1000, 1000))

    with open('data/' + ctype + '_' + filename + '_net_follow.js', 'w') as fw:
        fw.write('var nodes = [\n')
        for idv, v in enumerate(fg.vs):
            user = dbcom.find_one({'id': int(fg.vs[idv]['name'])})
            desc = ' '.join(user['description'].replace('\'', '').replace(
                '\"', '').split())
            fw.write('{id: ' + str(idv + 1) + ', ' + 'label: \'' +
                     user['screen_name'] + '\', ' + 'value: ' +
                     str(fg.degree(idv, mode="in")) + ', ' + 'title: \'UID: ' +
                     str(fg.vs[idv]['name']) + '<br> Screen Name: ' +
                     user['screen_name'] + '<br> Followers: ' +
                     str(user['followers_count']) + '<br> Followees: ' +
                     str(user['friends_count']) + '<br> Tweets: ' +
                     str(user['statuses_count']) + '<br> Description: ' +
                     str(desc.encode('utf-8')) + '<br> Group: ' +
                     str(fg.vs[idv]['group']) + '\', ' + 'x: ' +
                     str(Coo[fg.vs[idv]['group']][0] + rand.randint(0, 300)) +
                     ', ' + 'y: ' +
                     str(Coo[fg.vs[idv]['group']][1] + rand.randint(0, 300)) +
                     ', ' + 'group: ' + str(fg.vs[idv]['group']) + ', ')
            # if int(fg.vs[idv]['name']) in ed_users:
            #     fw.write('shape: ' + '\'triangle\'')
            # else:
            #     fw.write('shape: ' + '\'circle\'')
            fw.write('}, \n')
        fw.write('];\n var edges = [\n')
        for ide, e in enumerate(fg.es):
            fw.write('{from: ' + str(e.source + 1) + ', ' + 'to: ' +
                     str(e.target + 1) + ', ' + 'arrows: ' + '\'to\'' + ', ' +
                     'title: \' Tags: ' + fg.vs[e.source]['name'] + ' ' +
                     fg.vs[e.target]['name'] + '<br> Co-occurrence: ' +
                     str(fg.es[ide]['weight']) + '\', ' + 'value: ' +
                     str(fg.es[ide]['weight']) +
                     '},\n')  #str(fg.es[ide]['weight'])
        fw.write('];\n')
Beispiel #18
0
if __name__ == '__main__':

    # print diff_day(datetime(2010, 10,1), datetime(2010,9,1))
    # from lifelines.utils import k_fold_cross_validation
    # count_longest_tweeting_period('fed', 'timeline', 'com')
    # count_longest_tweeting_period('random', 'timeline', 'scom')
    # count_longest_tweeting_period('younger', 'timeline', 'scom')
    # read_user_time('user-durations-2.csv')
    # user_active()
    # read_user_time_iv('user-durations-iv-following-senti.csv')
    # cluster_hashtag()

    # insert_timestamp('fed2', 'com')
    network1 = gt.Graph.Read_GraphML('ed-net-all.graphml')
    gt.net_stat(network1)
    # gt.summary(network1)
    # network1_gc = gt.giant_component(network1)
    # gt.summary(network1_gc)

    # compare_dropouts_withemotions()
    # tfidf_stat()
    # tfidf_stat_dropout()
    # tag_similarity_group_dropout_emotion()
    # tag_similarity_group_conflit_all()


    # sentiment_bmi('fed', 'com')
    # com = dbt.db_connect_col('fed', 'com')
    # user_sentiment = {}
    # for u in com.find({}):
Beispiel #19
0
def friendship_community_vis(dbname, colname, filename, ctype):
    '''Out graph for vis.js visualization'''
    ed_users = iot.get_values_one_field(dbname, 'scom', 'id')
    # fed_users = iot.get_values_one_field(dbname, 'com', 'id')
    dbcom = dbt.db_connect_col(dbname, 'com')
    fg = gt.load_network(dbname, colname)
    # fg = gt.load_beh_network_subset(ed_users, dbname, colname, 'retweet')
    gt.net_stat(fg)
    # fg = fg.as_undirected(mode="mutual")
    # gt.net_stat(fg)

    fg = gt.giant_component(fg, 'WEAK')
    gt.net_stat(fg)

    if ctype == 'ml':
        com = fg.community_multilevel(weights='weight', return_levels=False)
    elif ctype == 'lp':
        fgu = fg.as_undirected(combine_edges=sum)
        init = fgu.community_leading_eigenvector(clusters=2, weights='weight')
        print init.membership
        com = fg.community_label_propagation(weights='weight', initial=init.membership)
        print com.membership
    else:
        com = fg.community_infomap(edge_weights='weight', trials=2)
    fg.vs['group'] = com.membership

    # edges = fg.es.select(weight_gt=3)
    # print 'Filtered edges: %d' %len(edges)
    # fg = fg.subgraph_edges(edges)
    # gt.net_stat(fg)

    # fg.vs['degree'] = fg.degree(mode="all")
    # nodes = fg.vs.select(degree_gt=10)
    # fg = fg.subgraph(nodes)
    # gt.net_stat(fg)

    Coo={}
    for x in fg.vs['group']:
        Coo[x]=(rand.randint(-1000, 1000), rand.randint(-1000, 1000))

    with open('data/' + ctype + '_' +filename+'_net_follow.js', 'w') as fw:
        fw.write('var nodes = [\n')
        for idv, v in enumerate(fg.vs):
            user = dbcom.find_one({'id': int(fg.vs[idv]['name'])})
            desc = ' '.join(user['description'].replace('\'', '').replace('\"', '').split())
            fw.write('{id: ' + str(idv+1) + ', '+
                     'label: \'' + user['screen_name'] +'\', ' +
                     'value: ' + str(fg.degree(idv, mode="in")) + ', ' +
                     'title: \'UID: ' + str(fg.vs[idv]['name']) +
                     '<br> Screen Name: ' + user['screen_name'] +
                     '<br> Followers: ' + str(user['followers_count']) +
                     '<br> Followees: ' + str(user['friends_count']) +
                     '<br> Tweets: ' + str(user['statuses_count']) +
                     '<br> Description: ' + str(desc.encode('utf-8')) +
                     '<br> Group: ' + str(fg.vs[idv]['group']) + '\', ' +
                     'x: ' + str(Coo[fg.vs[idv]['group']][0]+rand.randint(0, 300)) + ', ' +
                     'y: ' + str(Coo[fg.vs[idv]['group']][1]+rand.randint(0, 300)) + ', ' +
                     'group: ' + str(fg.vs[idv]['group']) + ', ')
            # if int(fg.vs[idv]['name']) in ed_users:
            #     fw.write('shape: ' + '\'triangle\'')
            # else:
            #     fw.write('shape: ' + '\'circle\'')
            fw.write('}, \n')
        fw.write('];\n var edges = [\n')
        for ide, e in enumerate(fg.es):
            fw.write('{from: ' + str(e.source+1) + ', ' +
                     'to: ' + str(e.target+1) + ', ' +
                     'arrows: ' + '\'to\'' + ', ' +
                     'title: \' Tags: ' + fg.vs[e.source]['name'] + ' ' + fg.vs[e.target]['name'] +
                     '<br> Co-occurrence: ' + str(fg.es[ide]['weight']) + '\', ' +
                     'value: ' + str(fg.es[ide]['weight']) +
                     '},\n') #str(fg.es[ide]['weight'])
        fw.write('];\n')
Beispiel #20
0
def community_vis(filename, ctype):
    '''
    Load Network and output js to vis.js
    :param filename:
    :return:
    '''
    # load network
    # g = pickle.load(open('data/'+filename+'_tag_undir.pick', 'r'))
    # gt.net_stat(g)
    # # Filter network
    # nodes = g.vs.select(weight_gt=3)
    # print 'Filtered nodes: %d' %len(nodes)
    # g = g.subgraph(nodes)
    # nodes = g.vs.select(user_gt=3)
    # print 'Filtered nodes: %d' %len(nodes)
    # g = g.subgraph(nodes)
    g = gt.Graph.Read_GraphML(filename + '_tag_undir.graphml')

    gt.net_stat(g)
    # g = gt.giant_component(g)
    # Community detection
    if ctype == 'ml':
        com = g.community_multilevel(weights='weight', return_levels=False)
    else:
        com = g.community_infomap(edge_weights='weight',
                                  vertex_weights='weight')
    print com
    g.vs['group'] = com.membership
    # print g.vs['group']
    # gt.summary(g)
    mixing_para(g)

    # edges = g.es.select(weight_gt=50)
    # print 'Filtered edges: %d' %len(edges)
    # g = g.subgraph_edges(edges)
    # gt.net_stat(g)

    Coo = {}
    for x in g.vs['group']:
        Coo[x] = (rand.randint(-600, 600), rand.randint(-600, 600))

    with open('data/' + ctype + '_' + filename + '_tag_undir.js', 'w') as fw:
        fw.write('var nodes = [\n')
        for idv, v in enumerate(g.vs):
            fw.write('{id: ' + str(idv + 1) + ', ' + 'label: \'' +
                     g.vs[idv]['name'] + '\', ' + 'value: ' +
                     str(g.vs[idv]['weight']) + ', ' + 'title: \' Tags: ' +
                     g.vs[idv]['name'] + '<br> Occurrence: ' +
                     str(g.vs[idv]['weight']) + '<br> Group: ' +
                     str(g.vs[idv]['group']) + '\', ' + 'x: ' +
                     str(Coo[g.vs[idv]['group']][0] + rand.randint(0, 300)) +
                     ', ' + 'y: ' +
                     str(Coo[g.vs[idv]['group']][1] + rand.randint(0, 300)) +
                     ', ' + 'group: ' + str(g.vs[idv]['group']) + '}, \n')
        fw.write('];\n var edges = [\n')
        for ide, e in enumerate(g.es):
            fw.write('{from: ' + str(e.source + 1) + ', ' + 'to: ' +
                     str(e.target + 1) + ', ' + 'title: \' Tags: ' +
                     g.vs[e.source]['name'] + ' ' + g.vs[e.target]['name'] +
                     '<br> Co-occurrence: ' + str(g.es[ide]['weight']) +
                     '\', ' + 'value: ' + str(g.es[ide]['weight']) + '},\n')
        fw.write('];\n')
Beispiel #21
0
def compare_communities(file_path):
    # compare the stats of communities of a network
    communities = two_community(file_path)
    for com in communities:
        gt.net_stat(com)