def tags_two_user_moduls(): #load network from gephi output g = gt.Graph.Read_GraphML('communication-3-moduls.graphml') cluster0, cluster1, cluster2 = set(), set(), set() for v in g.vs: if v['Modularity Class'] == 0: cluster0.add(int(v['name'])) elif v['Modularity Class'] == 1: cluster1.add(int(v['name'])) elif v['Modularity Class'] == 2: cluster2.add(int(v['name'])) g = gt.load_hashtag_coocurrent_network_undir('fed', 'ed_tag', list(cluster0)) gt.summary(g) filename = 'communication_fed_cluster0' g.write_graphml(filename + '_tag_undir.graphml') g = gt.load_hashtag_coocurrent_network_undir('fed', 'ed_tag', list(cluster1)) gt.summary(g) filename = 'communication__fed_cluster1' g.write_graphml(filename + '_tag_undir.graphml') g = gt.load_hashtag_coocurrent_network_undir('fed', 'ed_tag', list(cluster2)) gt.summary(g) filename = 'communication_fed_cluster2' g.write_graphml(filename + '_tag_undir.graphml')
def cluster_hashtag(filepath= 'user-durations-iv-following-senti.csv'): # read hashtag networks for dropouts and non-dropouts df = pd.read_csv(filepath) df['f_ratio'] = df.f_num/df.u_friends_count datat = df[df['f_ratio']>0.01] data = datat[datat['group']=='ED'] uids_nondropout = [int(uid) for uid in data[(data['dropout']==0)]['uid']] uids_dropout = [int(uid) for uid in data[(data['dropout']==1)]['uid']] print len(uids_nondropout), len(uids_dropout) net_nondropout = gt.load_hashtag_coocurrent_network_undir('fed', 'timeline', uids_nondropout) net_dropout = gt.load_hashtag_coocurrent_network_undir('fed', 'timeline', uids_dropout) net_nondropout.write_graphml('nondropout-tag.graphml') net_dropout.write_graphml('dropout-tag.graphml')
def compare_dropouts_withemotions(filepath= 'user-durations-iv-following-senti-TE.csv'): # out hashtags network for users with different emotions df = pd.read_csv(filepath) df['f_ratio'] = df.f_num/df.u_friends_count data = df[df['f_ratio']>0.01] datasub = data[data['group'].isin(['ED', 'YG'])] mean_edu_prior = np.mean(datasub[datasub.group=='ED']['u_prior_scalem']) mean_edf_prior = np.mean(datasub[datasub.group=='ED']['f_prior_scalem']) mean_ygu_prior = np.mean(datasub[datasub.group=='YG']['u_prior_scalem']) mean_ygf_prior = np.mean(datasub[datasub.group=='YG']['f_prior_scalem']) datasub['u_prior_scalem'] = np.where((datasub.u_prior_scalem==0.0) & (datasub.group=='ED'), mean_edu_prior, datasub['u_prior_scalem']) datasub['f_prior_scalem'] = np.where((datasub.f_prior_scalem==0.0) & (datasub.group=='ED'), mean_edf_prior, datasub['f_prior_scalem']) datasub['u_prior_scalem'] = np.where((datasub.u_prior_scalem==0.0) & (datasub.group=='YG'), mean_ygu_prior, datasub['u_prior_scalem']) datasub['f_prior_scalem'] = np.where((datasub.f_prior_scalem==0.0) & (datasub.group=='YG'), mean_ygf_prior, datasub['f_prior_scalem']) datasub['u_changes'] = (datasub.u_post_scalem - datasub.u_prior_scalem)/(datasub.u_prior_scalem) datasub['f_changes'] = (datasub.f_post_scalem - datasub.f_prior_scalem)/(datasub.f_prior_scalem) dropouts = datasub[(datasub.group=='ED') & (datasub.dropout==0)][['u_whole_scalem', 'u_changes', 'uid', 'u_eigenvector']] print len(dropouts) dropouts = dropouts.sort('u_eigenvector', ascending='True') ## small to large print dropouts for i in xrange(2): start, end = i*len(dropouts)/2, (i+1)*len(dropouts)/2 print start, end uidlist = [] for uid in dropouts['uid'][start: end]: uidlist.append(int(uid)) net_dropout = gt.load_hashtag_coocurrent_network_undir('fed', 'timeline', uidlist) net_dropout.write_graphml(str(i) + 'dropout-tag-centrality2-rank.graphml') uidlist = [int(uid) for uid in dropouts['uid']] net_dropout = gt.load_hashtag_coocurrent_network_undir('fed', 'timeline', uidlist) net_dropout.write_graphml('dropout-tag-centrality2-rank-all.graphml')
def tags_user_cluster(graph_file_path, filename): # put tweet of two cluster into two set g = gt.Graph.Read_GraphML(graph_file_path) # g_mention = gt.Graph.Read_GraphML('ed-communication'+'-hashtag-only-fed-cluster.graphml') gt.summary(g) # gt.summary(g_mention) # for i in range(2): # g = [g_retweet, g_mention][i] cluster0, cluster1, cluster2 = set(), set(), set() for v in g.vs: if v['cluster'] == 0: cluster0.add(int(v['name'])) elif v['cluster'] == 1: cluster1.add(int(v['name'])) elif v['cluster'] == -1: cluster2.add(int(v['name'])) print 'cluster size;', len(cluster0) g = gt.load_hashtag_coocurrent_network_undir('fed', 'ed_tag', list(cluster0)) gt.summary(g) # filename = ['ed_retweet', 'ed_communication'][i] + '_fed_cluster0' vs = g.vs(weight_gt=3, user_gt=3) g = g.subgraph(vs) gt.summary(g) g.write_graphml(filename + 'tag_undir_cluster0.graphml') print 'cluster size;', len(cluster1) g = gt.load_hashtag_coocurrent_network_undir('fed', 'ed_tag', list(cluster1)) gt.summary(g) # filename = ['ed_retweet', 'ed_communication'][i] + '_fed_cluster1' vs = g.vs(weight_gt=3, user_gt=3) g = g.subgraph(vs) gt.summary(g) g.write_graphml(filename + 'tag_undir_cluster1.graphml')
def tag_record(dbname, colname, filename): g = gt.load_hashtag_coocurrent_network_undir(dbname, colname) gt.summary(g) g.write_graphml(filename + '_tag_undir.graphml') return g
# g.write_graphml('alled_tag.graphml') # gt.summary(g) # # g = gt.Graph.Read_GraphML('core_ed_tag_undir.graphml') # nodes = g.vs.select(weight_gt=3) # print 'Filtered nodes: %d' %len(nodes) # g = g.subgraph(nodes) # nodes = g.vs.select(user_gt=3) # print 'Filtered nodes: %d' %len(nodes) # g = g.subgraph(nodes) # # g = pmi(g) # g.write_graphml('alled_tag_filter.graphml') '''undirected network''' # users_net = gt.Graph.Read_GraphML('communication-only-fed-filter.graphml') # users = [int(uid) for uid in users_net.vs['name']] print 'Read fed timeline excluding all retweets' g = gt.load_hashtag_coocurrent_network_undir('fed', 'timeline') g.write_graphml('fed_tag_undir_nort.graphml') # # g = gt.Graph.Read_GraphML('core_ed_tag_undir.graphml') # gt.summary(g) # nodes = g.vs.select(weight_gt=3) # print 'Filtered nodes: %d' %len(nodes) # g = g.subgraph(nodes) # nodes = g.vs.select(user_gt=3) # print 'Filtered nodes: %d' %len(nodes) # g = g.subgraph(nodes) # g.write_graphml('ed_tag_undir_filter.graphml') #-----------------------Filter network----------------------------------------------- #-----------------------Community detection----------------------------------------------- # g = gt.Graph.Read_GraphML('core_ed_hashtag_filter.graphml')
def tag_net(dbname, colname, filename): # All tags excluding retweets g = gt.load_hashtag_coocurrent_network_undir(dbname, colname) gt.summary(g) g.write_graphml(filename + '_tag_undir.graphml')
def hashtag_net(dbname, colname): # built hashtag_net g = gt.load_hashtag_coocurrent_network_undir(dbname, colname) g.write_graphml('tag.graphml')