Beispiel #1
0
def tags_two_user_moduls():
    #load network from gephi output
    g = gt.Graph.Read_GraphML('communication-3-moduls.graphml')
    cluster0, cluster1, cluster2 = set(), set(), set()
    for v in g.vs:
        if v['Modularity Class'] == 0:
            cluster0.add(int(v['name']))
        elif v['Modularity Class'] == 1:
            cluster1.add(int(v['name']))
        elif v['Modularity Class'] == 2:
            cluster2.add(int(v['name']))
    g = gt.load_hashtag_coocurrent_network_undir('fed', 'ed_tag',
                                                 list(cluster0))
    gt.summary(g)
    filename = 'communication_fed_cluster0'
    g.write_graphml(filename + '_tag_undir.graphml')

    g = gt.load_hashtag_coocurrent_network_undir('fed', 'ed_tag',
                                                 list(cluster1))
    gt.summary(g)
    filename = 'communication__fed_cluster1'
    g.write_graphml(filename + '_tag_undir.graphml')

    g = gt.load_hashtag_coocurrent_network_undir('fed', 'ed_tag',
                                                 list(cluster2))
    gt.summary(g)
    filename = 'communication_fed_cluster2'
    g.write_graphml(filename + '_tag_undir.graphml')
Beispiel #2
0
def cluster_hashtag(filepath= 'user-durations-iv-following-senti.csv'):
    # read hashtag networks for dropouts and non-dropouts
    df = pd.read_csv(filepath)
    df['f_ratio'] = df.f_num/df.u_friends_count
    datat = df[df['f_ratio']>0.01]
    data = datat[datat['group']=='ED']
    uids_nondropout = [int(uid) for uid in data[(data['dropout']==0)]['uid']]
    uids_dropout = [int(uid) for uid in data[(data['dropout']==1)]['uid']]
    print len(uids_nondropout), len(uids_dropout)
    net_nondropout = gt.load_hashtag_coocurrent_network_undir('fed', 'timeline', uids_nondropout)
    net_dropout = gt.load_hashtag_coocurrent_network_undir('fed', 'timeline', uids_dropout)
    net_nondropout.write_graphml('nondropout-tag.graphml')
    net_dropout.write_graphml('dropout-tag.graphml')
Beispiel #3
0
def compare_dropouts_withemotions(filepath= 'user-durations-iv-following-senti-TE.csv'):
    # out hashtags network for users with different emotions
    df = pd.read_csv(filepath)
    df['f_ratio'] = df.f_num/df.u_friends_count
    data = df[df['f_ratio']>0.01]
    datasub = data[data['group'].isin(['ED', 'YG'])]
    mean_edu_prior = np.mean(datasub[datasub.group=='ED']['u_prior_scalem'])
    mean_edf_prior = np.mean(datasub[datasub.group=='ED']['f_prior_scalem'])
    mean_ygu_prior = np.mean(datasub[datasub.group=='YG']['u_prior_scalem'])
    mean_ygf_prior = np.mean(datasub[datasub.group=='YG']['f_prior_scalem'])

    datasub['u_prior_scalem'] = np.where((datasub.u_prior_scalem==0.0) & (datasub.group=='ED'), mean_edu_prior, datasub['u_prior_scalem'])
    datasub['f_prior_scalem'] = np.where((datasub.f_prior_scalem==0.0) & (datasub.group=='ED'), mean_edf_prior, datasub['f_prior_scalem'])
    datasub['u_prior_scalem'] = np.where((datasub.u_prior_scalem==0.0) & (datasub.group=='YG'), mean_ygu_prior, datasub['u_prior_scalem'])
    datasub['f_prior_scalem'] = np.where((datasub.f_prior_scalem==0.0) & (datasub.group=='YG'), mean_ygf_prior, datasub['f_prior_scalem'])

    datasub['u_changes'] = (datasub.u_post_scalem - datasub.u_prior_scalem)/(datasub.u_prior_scalem)
    datasub['f_changes'] = (datasub.f_post_scalem - datasub.f_prior_scalem)/(datasub.f_prior_scalem)

    dropouts = datasub[(datasub.group=='ED') & (datasub.dropout==0)][['u_whole_scalem', 'u_changes', 'uid', 'u_eigenvector']]
    print len(dropouts)

    dropouts = dropouts.sort('u_eigenvector', ascending='True') ## small to large

    print dropouts

    for i in xrange(2):
        start, end = i*len(dropouts)/2, (i+1)*len(dropouts)/2
        print start, end
        uidlist = []
        for uid in dropouts['uid'][start: end]:
            uidlist.append(int(uid))
        net_dropout = gt.load_hashtag_coocurrent_network_undir('fed', 'timeline', uidlist)
        net_dropout.write_graphml(str(i) + 'dropout-tag-centrality2-rank.graphml')
    uidlist = [int(uid) for uid in dropouts['uid']]
    net_dropout = gt.load_hashtag_coocurrent_network_undir('fed', 'timeline', uidlist)
    net_dropout.write_graphml('dropout-tag-centrality2-rank-all.graphml')
Beispiel #4
0
def tags_user_cluster(graph_file_path, filename):
    # put tweet of two cluster into two set
    g = gt.Graph.Read_GraphML(graph_file_path)
    # g_mention = gt.Graph.Read_GraphML('ed-communication'+'-hashtag-only-fed-cluster.graphml')
    gt.summary(g)
    # gt.summary(g_mention)

    # for i in range(2):
    #     g = [g_retweet, g_mention][i]
    cluster0, cluster1, cluster2 = set(), set(), set()
    for v in g.vs:
        if v['cluster'] == 0:
            cluster0.add(int(v['name']))
        elif v['cluster'] == 1:
            cluster1.add(int(v['name']))
        elif v['cluster'] == -1:
            cluster2.add(int(v['name']))
    print 'cluster size;', len(cluster0)
    g = gt.load_hashtag_coocurrent_network_undir('fed', 'ed_tag',
                                                 list(cluster0))
    gt.summary(g)
    # filename = ['ed_retweet', 'ed_communication'][i] + '_fed_cluster0'
    vs = g.vs(weight_gt=3, user_gt=3)
    g = g.subgraph(vs)
    gt.summary(g)
    g.write_graphml(filename + 'tag_undir_cluster0.graphml')

    print 'cluster size;', len(cluster1)
    g = gt.load_hashtag_coocurrent_network_undir('fed', 'ed_tag',
                                                 list(cluster1))
    gt.summary(g)
    # filename = ['ed_retweet', 'ed_communication'][i] + '_fed_cluster1'
    vs = g.vs(weight_gt=3, user_gt=3)
    g = g.subgraph(vs)
    gt.summary(g)
    g.write_graphml(filename + 'tag_undir_cluster1.graphml')
Beispiel #5
0
def tag_record(dbname, colname, filename):
    g = gt.load_hashtag_coocurrent_network_undir(dbname, colname)
    gt.summary(g)
    g.write_graphml(filename + '_tag_undir.graphml')
    return g
Beispiel #6
0
    # g.write_graphml('alled_tag.graphml')
    # gt.summary(g)
    # # g = gt.Graph.Read_GraphML('core_ed_tag_undir.graphml')
    # nodes = g.vs.select(weight_gt=3)
    # print 'Filtered nodes: %d' %len(nodes)
    # g = g.subgraph(nodes)
    # nodes = g.vs.select(user_gt=3)
    # print 'Filtered nodes: %d' %len(nodes)
    # g = g.subgraph(nodes)
    # # g = pmi(g)
    # g.write_graphml('alled_tag_filter.graphml')
    '''undirected network'''
    # users_net = gt.Graph.Read_GraphML('communication-only-fed-filter.graphml')
    # users = [int(uid) for uid in users_net.vs['name']]
    print 'Read fed timeline excluding all retweets'
    g = gt.load_hashtag_coocurrent_network_undir('fed', 'timeline')
    g.write_graphml('fed_tag_undir_nort.graphml')
    # # g = gt.Graph.Read_GraphML('core_ed_tag_undir.graphml')
    # gt.summary(g)
    # nodes = g.vs.select(weight_gt=3)
    # print 'Filtered nodes: %d' %len(nodes)
    # g = g.subgraph(nodes)
    # nodes = g.vs.select(user_gt=3)
    # print 'Filtered nodes: %d' %len(nodes)
    # g = g.subgraph(nodes)
    # g.write_graphml('ed_tag_undir_filter.graphml')

    #-----------------------Filter network-----------------------------------------------

    #-----------------------Community detection-----------------------------------------------
    # g = gt.Graph.Read_GraphML('core_ed_hashtag_filter.graphml')
Beispiel #7
0
def tag_net(dbname, colname, filename):
    # All tags excluding retweets
    g = gt.load_hashtag_coocurrent_network_undir(dbname, colname)
    gt.summary(g)
    g.write_graphml(filename + '_tag_undir.graphml')
Beispiel #8
0
def hashtag_net(dbname, colname):
    # built hashtag_net
    g = gt.load_hashtag_coocurrent_network_undir(dbname, colname)
    g.write_graphml('tag.graphml')