def friend_user_change(dbname1, dbname2, comname1, comname2): filter_que = {'level': 1, 'liwc_anal.result.WC':{'$exists':True}} user2 = iot.get_values_one_field(dbname2, comname2, 'id', filter_que) fields = ['liwc_anal.result.posemo', 'liwc_anal.result.negemo', 'liwc_anal.result.body', 'liwc_anal.result.ingest'] network1 = gt.load_network(dbname1, 'net') network2 = gt.load_network(dbname2, 'net') for field in fields: print '-----------------%s----------------' %field user_changes, friends_changes = [], [] for uid in user2: user_feature_old = iot.get_values_one_field(dbname1, comname1, field, {'id': uid}) user_feature_new = iot.get_values_one_field(dbname2, comname2, field, {'id': uid}) if len(user_feature_old) != len(user_feature_new) and len(user_feature_new) != 1: print 'User feature value length %d, %d' %(len(user_feature_old), len(user_feature_new)) user_change = np.mean(user_feature_new) - np.mean(user_feature_old) exist = True try: v = network1.vs.find(name=str(uid)) v = network2.vs.find(name=str(uid)) except ValueError: exist = False if exist: friends_old = network1.successors(str(uid)) friends_new = network2.successors(str(uid)) old_friend_ids = [int(network1.vs[v]['name']) for v in friends_old] new_friend_ids = [int(network2.vs[v]['name']) for v in friends_new] if len(old_friend_ids) != len(new_friend_ids): print 'Friend feature value length %d, %d' % (len(old_friend_ids), len(new_friend_ids)) friends_feature_old = iot.get_values_one_field(dbname1, comname1, field, {'id': {'$in': old_friend_ids}}) friends_feature_new = iot.get_values_one_field(dbname2, comname2, field, {'id': {'$in': new_friend_ids}}) friend_change = np.mean(friends_feature_new) - np.mean(friends_feature_old) friends_changes.append(friend_change) user_changes.append(user_change) pltt.correlation(friends_changes, user_changes, r'$\Delta$(F_'+field+')', r'$\Delta$(U_'+field+')', field+'-friend-user.pdf')
def user_cluster_hashtag(): ''' Cluster users based on the profiles of hashtag preference :return: ''' from sklearn.cluster import KMeans from sklearn.metrics import silhouette_score user_hash_profile = pickle.load(open('data/user-hash-profile.pick', 'r')) X = np.array(user_hash_profile.values()) print X.shape '''Select the best K for K-means''' # range_n_clusters = range(2, 21) # values = [] # for n_clusters in range_n_clusters: # clusterer = KMeans(n_clusters=n_clusters, random_state=10) # cluster_labels = clusterer.fit_predict(X) # silhouette_avg = silhouette_score(X, cluster_labels) # print("For n_clusters =", n_clusters, "The average silhouette_score is :", silhouette_avg) # values.append(silhouette_avg) # print values # print range_n_clusters clusterer = KMeans(n_clusters=2, random_state=10) cluster_labels = clusterer.fit_predict(X) dictionary = dict(zip(user_hash_profile.keys(), cluster_labels)) print 'Follow network' net = gt.load_network('fed', 'snet') gt.net_stat(net) cluster_assort(dictionary, net)
def ed_follow_net(): # construct ED and their followee network g = gt.load_network('fed', 'follownet') g.vs['deg'] = g.indegree() users = set(iot.get_values_one_field('fed', 'scom', 'id')) nodes = [] for v in g.vs: if int(v['name']) in users: nodes.append(v) elif v['deg'] > 5: nodes.append(v) else: pass print 'Filtered nodes: %d' %len(nodes) g = g.subgraph(nodes) gt.summary(g) g.write_graphml('ed-friend'+'.graphml') # sbnet have extended all interactions posted by ED users edusers = set(g.vs['name']) for btype in ['retweet', 'reply', 'mention']: gb = gt.load_beh_network('fed', 'sbnet', btype) gt.summary(gb) nodes = [] for v in gb.vs: if v['name'] in edusers: nodes.append(v) gb = gb.subgraph(nodes) gt.summary(gb) gb.write_graphml('ed-'+btype+'-follow.graphml')
def friend_network_hashtag_weight(dbname, netname): ''' Community detection on friendship network, weighted by hashtag similarity :param dbname: :param netname: :param user_hash_profile: :return: ''' user_hash_profile = pickle.load(open('data/user-hash-profile.pick', 'r')) net = gt.load_network(dbname, netname) fields = iot.read_fields() com = dbt.db_connect_col(dbname, 'scom') for edge in net.es: source_vertex_id = edge.source target_vertex_id = edge.target source_uid = int(net.vs[source_vertex_id]['name']) target_uid = int(net.vs[target_vertex_id]['name']) source_user = com.find_one({'id':source_uid}) target_user = com.find_one({'id':target_uid}) source_user_liwc = iot.get_fields_one_doc(source_user, fields) target_user_liwc = iot.get_fields_one_doc(target_user, fields) source_user_liwc.extend(user_hash_profile[source_uid]) target_user_liwc.extend(user_hash_profile[target_uid]) print len(target_user_liwc) dis = spatial.distance.euclidean(source_user_liwc, target_user_liwc) edge['weight'] = 1.0/(1.0 + dis) net.write_graphml('ed_weighted_follow.graphml')
def friend_network_hashtag_weight(dbname, netname): ''' Community detection on friendship network, weighted by hashtag similarity :param dbname: :param netname: :param user_hash_profile: :return: ''' user_hash_profile = pickle.load(open('data/user-hash-profile.pick', 'r')) net = gt.load_network(dbname, netname) fields = iot.read_fields() com = dbt.db_connect_col(dbname, 'scom') for edge in net.es: source_vertex_id = edge.source target_vertex_id = edge.target source_uid = int(net.vs[source_vertex_id]['name']) target_uid = int(net.vs[target_vertex_id]['name']) source_user = com.find_one({'id': source_uid}) target_user = com.find_one({'id': target_uid}) source_user_liwc = iot.get_fields_one_doc(source_user, fields) target_user_liwc = iot.get_fields_one_doc(target_user, fields) source_user_liwc.extend(user_hash_profile[source_uid]) target_user_liwc.extend(user_hash_profile[target_uid]) print len(target_user_liwc) dis = spatial.distance.euclidean(source_user_liwc, target_user_liwc) edge['weight'] = 1.0 / (1.0 + dis) net.write_graphml('ed_weighted_follow.graphml')
def generate(dbname, comname, netname, type): if type == 'follow': g = gt.load_network(dbname, netname) else: # uids = iot.get_values_one_field(dbname, comname, 'id') # pickle.dump(uids, open('data/'+dbname+'-ids.pick', 'w')) uids = pickle.load(open('data/'+dbname+'-ids.pick', 'r')) g = gt.load_beh_network_subset(uids, dbname, netname, type) pickle.dump(g, open('data/'+dbname+type+'.pick', 'w'))
def generate(dbname, comname, netname, type): if type == 'follow': g = gt.load_network(dbname, netname) else: # uids = iot.get_values_one_field(dbname, comname, 'id') # pickle.dump(uids, open('data/'+dbname+'-ids.pick', 'w')) uids = pickle.load(open('data/' + dbname + '-ids.pick', 'r')) g = gt.load_beh_network_subset(uids, dbname, netname, type) pickle.dump(g, open('data/' + dbname + type + '.pick', 'w'))
def load_net(): g = gt.load_network('fed', 'net') g.write_graphml('ed-net.graphml') users = iot.get_values_one_field('random', 'scom', 'id') g = gt.load_network_subset('random', 'net', {'user': {'$in': users}, 'follower': {'$in': users}}) g.write_graphml('rd-net.graphml') users = iot.get_values_one_field('younger', 'scom', 'id') g = gt.load_network_subset('younger', 'net', {'user': {'$in': users}, 'follower': {'$in': users}}) g.write_graphml('yg-net.graphml')
def follow_network(dbname, colname, filepath): '''out follow network''' g = gt.load_network(dbname, colname) with open(filepath, 'wb') as fw: for e in g.es: source_vertex_id = e.source target_vertex_id = e.target source_vertex = g.vs[source_vertex_id] target_vertex = g.vs[target_vertex_id] fw.write('%s\t%s\t%d\n' % ('u' + source_vertex['name'], 'u' + target_vertex['name'], e['weight']))
def output_net_user_data(dbname, comname, netname): ''' Output the social network (two-ground) and user's ED states into local files ''' g = gt.load_network(dbname, netname) gt.summary(g) com = dbt.db_connect_col(dbname, comname) for v in g.vs: user = com.find_one({'id': int(v['name'])}) v['l'] = user['level'] v['ed'] = profiles_check.check_ed(user) g.write_graphml(dbname+'-'+netname+'.graphml')
def network_stats(dbname, com, fnet, bnet): fields = iot.read_fields() # print ('Feature, #Nodes, #Edges, %Nodes, %Edges, D_assort, F_assort, F_assort, Mean, STD, z_sore, p_value') print( 'Network_Feature \t #Nodes \t #Edges \t X_Min \t X_Max \t X_P2.5 \t X_P97.5 \t Y_Min \t Y_Max \t Y_P2.5 \t Y_P97.5 \t Tau_coef \t p_value' ) print 'Following' fnetwork = gt.load_network(dbname, fnet) '''Out put file for Gephi''' # fnetwork.write_dot('friendship.DOT') gt.net_stat(fnetwork) # outputs = feature_assort_friend(fnetwork, dbname, com, fields, directed=True) outputs = rank_feature(fnetwork, dbname, com, fields, directed=True)
def pro_ed_rec_network(dbname, comname, netname): g = gt.load_network(dbname, netname) # g = gt.load_beh_network(dbname, 'sbnet', 'mention') rec_users = rec_user(dbname, comname) pro_users = proed_users(dbname, comname) print len(rec_users) print len(pro_users) g.vs['set'] = 0 for user in rec_users: exist = True try: v = g.vs.find(name=str(user)) except ValueError: exist = False if exist: v['set'] += 1 # +1 Pro-rec for user in pro_users: exist = True try: v = g.vs.find(name=str(user)) except ValueError: exist = False if exist: v['set'] -= 1 # -1 Pro-ED vs = g.vs(set_ne=0) sg = g.subgraph(vs) gt.net_stat(sg) # sgc = gt.giant_component(sg) # gt.net_stat(sgc) '''Test signifi''' raw_assort = sg.assortativity('set', 'set', directed=True) raw_values = np.array(sg.vs['set']) ass_list = list() for i in xrange(3000): np.random.shuffle(raw_values) sg.vs["set"] = raw_values ass_list.append(sg.assortativity('set', 'set', directed=True)) ass_list = np.array(ass_list) amean, astd = np.mean(ass_list), np.std(ass_list) absobserved = abs(raw_assort) pval = (np.sum(ass_list >= absobserved) + np.sum(ass_list <= -absobserved))/float(len(ass_list)) zscore = (raw_assort-amean)/astd print '%.3f, %.3f, %.3f, %.3f, %.3f' %(raw_assort, amean, astd, zscore, pval) # print str(raw_assort) + ',' + str(amean) + ',' + str(astd) + ',' + str(zscore) + ',' + str(pval) sg.write_graphml('pro-ed-rec-mention.graphml')
def network_stats(dbname, com, fnet, bnet): fields = iot.read_fields() # print ('Feature, #Nodes, #Edges, %Nodes, %Edges, D_assort, F_assort, F_assort, Mean, STD, z_sore, p_value') print ( "Network_Feature \t #Nodes \t #Edges \t X_Min \t X_Max \t X_P2.5 \t X_P97.5 \t Y_Min \t Y_Max \t Y_P2.5 \t Y_P97.5 \t Tau_coef \t p_value" ) print "Following" fnetwork = gt.load_network(dbname, fnet) """Out put file for Gephi""" # fnetwork.write_dot('friendship.DOT') gt.net_stat(fnetwork) # outputs = feature_assort_friend(fnetwork, dbname, com, fields, directed=True) outputs = rank_feature(fnetwork, dbname, com, fields, directed=True)
def ed_friend_num(dbname, comname, netname, flag): ''' Only one-round social network are used ''' filter_user = {'level': 1} users = iot.get_values_one_field(dbname, comname, 'id', filter_user) net = gt.load_network(dbname, netname) com = dbt.db_connect_col(dbname, comname) data = [] for uid in users: row = [uid, flag] exist = True try: v = net.vs.find(name=str(uid)) except ValueError: exist = False if exist: followees = set([int(net.vs[v]['name']) for v in net.successors(str(uid))]) followers = set([int(net.vs[v]['name']) for v in net.predecessors(str(uid))]) common = followees.intersection(followers) followees = followees - common followers = followers - common for fids in [followees, followers, common]: if len(fids) > 0: print uid in fids print len(fids) ed_num, noned_num = 0, 0 for fid in fids: ed_flag = profiles_check.check_ed(com.find_one({'id': fid})) if ed_flag: ed_num += 1 else: noned_num += 1 row.extend([ed_num, noned_num]) else: row.extend([None] * 2) data.append(row) return data
def friendship_community_vis(dbname, colname, filename, ctype): '''Out graph for vis.js visualization''' ed_users = iot.get_values_one_field(dbname, 'scom', 'id') # fed_users = iot.get_values_one_field(dbname, 'com', 'id') dbcom = dbt.db_connect_col(dbname, 'com') fg = gt.load_network(dbname, colname) # fg = gt.load_beh_network_subset(ed_users, dbname, colname, 'retweet') gt.net_stat(fg) # fg = fg.as_undirected(mode="mutual") # gt.net_stat(fg) fg = gt.giant_component(fg, 'WEAK') gt.net_stat(fg) if ctype == 'ml': com = fg.community_multilevel(weights='weight', return_levels=False) elif ctype == 'lp': fgu = fg.as_undirected(combine_edges=sum) init = fgu.community_leading_eigenvector(clusters=2, weights='weight') print init.membership com = fg.community_label_propagation(weights='weight', initial=init.membership) print com.membership else: com = fg.community_infomap(edge_weights='weight', trials=2) fg.vs['group'] = com.membership # edges = fg.es.select(weight_gt=3) # print 'Filtered edges: %d' %len(edges) # fg = fg.subgraph_edges(edges) # gt.net_stat(fg) # fg.vs['degree'] = fg.degree(mode="all") # nodes = fg.vs.select(degree_gt=10) # fg = fg.subgraph(nodes) # gt.net_stat(fg) Coo = {} for x in fg.vs['group']: Coo[x] = (rand.randint(-1000, 1000), rand.randint(-1000, 1000)) with open('data/' + ctype + '_' + filename + '_net_follow.js', 'w') as fw: fw.write('var nodes = [\n') for idv, v in enumerate(fg.vs): user = dbcom.find_one({'id': int(fg.vs[idv]['name'])}) desc = ' '.join(user['description'].replace('\'', '').replace( '\"', '').split()) fw.write('{id: ' + str(idv + 1) + ', ' + 'label: \'' + user['screen_name'] + '\', ' + 'value: ' + str(fg.degree(idv, mode="in")) + ', ' + 'title: \'UID: ' + str(fg.vs[idv]['name']) + '<br> Screen Name: ' + user['screen_name'] + '<br> Followers: ' + str(user['followers_count']) + '<br> Followees: ' + str(user['friends_count']) + '<br> Tweets: ' + str(user['statuses_count']) + '<br> Description: ' + str(desc.encode('utf-8')) + '<br> Group: ' + str(fg.vs[idv]['group']) + '\', ' + 'x: ' + str(Coo[fg.vs[idv]['group']][0] + rand.randint(0, 300)) + ', ' + 'y: ' + str(Coo[fg.vs[idv]['group']][1] + rand.randint(0, 300)) + ', ' + 'group: ' + str(fg.vs[idv]['group']) + ', ') # if int(fg.vs[idv]['name']) in ed_users: # fw.write('shape: ' + '\'triangle\'') # else: # fw.write('shape: ' + '\'circle\'') fw.write('}, \n') fw.write('];\n var edges = [\n') for ide, e in enumerate(fg.es): fw.write('{from: ' + str(e.source + 1) + ', ' + 'to: ' + str(e.target + 1) + ', ' + 'arrows: ' + '\'to\'' + ', ' + 'title: \' Tags: ' + fg.vs[e.source]['name'] + ' ' + fg.vs[e.target]['name'] + '<br> Co-occurrence: ' + str(fg.es[ide]['weight']) + '\', ' + 'value: ' + str(fg.es[ide]['weight']) + '},\n') #str(fg.es[ide]['weight']) fw.write('];\n')
def friend_dis(dbname, comname, netname, tagets): #he returned list from Graph.neighbors always includes the input vertex, # while those from predecessors and successors don’t. # So the size of returned list from neighbors is always larger # 1 than those from other two methods. db = dbt.db_connect_no_auth(dbname) com = db[comname] g = gt.load_network(dbname, netname) gt.add_attributes(g, ['followers_count', 'friends_count'], dbname, comname, ['followers_count', 'friends_count']) gt.summary(g) for user in com.find({}, ['id', 'net_anal']): uid = user['id'] values = user.get('net_anal', {'mined': True}) node_exist = True try: v = g.vs.find(name=str(uid)) except ValueError: node_exist = False if node_exist: # followers = g.neighborhood(str(uid), mode='out') # followings = g.neighborhood(str(uid), mode='in') followers = g.successors(str(uid)) followings = g.predecessors(str(uid)) # print followers # print followings follower_set = set(int(name) for name in g.vs[followers]['name']) following_set = set(int(name) for name in g.vs[followings]['name']) ed_follower = len(tagets & follower_set) ed_following = len(tagets & following_set) # friend_set = follower_set | following_set # print follower_set # print following_set follower = v['followers_count'] if follower == 0: follower = 1 following = v['friends_count'] if following == 0: following = 1 # friend = len(friend_set) # if friend == 0: # friend = 1 # ed_friend = len(tagets & friend_set) ed_follower_p = float(ed_follower) / follower ed_following_p = float(ed_following) / following # ed_friend_p = float(ed_friend)/friend net_sta = {} # net_sta['follower_no'] = follower # net_sta['following_no'] = following # net_sta['friend_no'] = friend net_sta['ed_follower_no'] = ed_follower net_sta['ed_following_no'] = ed_following # net_sta['ed_friend_no'] = ed_friend net_sta['ed_follower_p'] = ed_follower_p net_sta['ed_following_p'] = ed_following_p # net_sta['ed_friend_p'] = ed_friend_p net_sta['non_ed_follower_p'] = 1 - ed_follower_p net_sta['non_ed_following_p'] = 1 - ed_following_p # net_sta['non_ed_friend_p'] = 1 - ed_friend_p values['ed_proportion'] = net_sta com.update_one({'id': uid}, {'$set': { 'net_anal': values }}, upsert=True)
# gt.net_stat(bnetwork) # # outputs = feature_assort_friend(bnetwork, dbname, com, fields, directed=True) # outputs = rank_feature(bnetwork, dbname, com, fields, directed=True) # # pickle.dump(outputs, open('data/'+beh+'_assort_all.pick', 'w')) # # outputs = pickle.load(open('data/'+beh+'_assort_all.pick', 'r')) # # display(outputs, 101) def calculate_extenal_user(): # Calculate how many users have been retweeted by ED but do not exist in ED users users = set(iot.get_values_one_field('fed', 'com', 'id')) print len(users) net = dbt.db_connect_col('fed', 'sbnet') i, count = 0, 0 for record in net.find(): if (record['id0'] not in users) or (record['id1'] not in users): i = +1 count += 1 print i, count, float(i) / count if __name__ == '__main__': # network_stats('fed', 'scom', 'snet', 'sbnet') # calculate_extenal_user() # ED_followee() for dbname in ['fed', 'fed2', 'fed3', 'fed4']: g = gt.load_network(dbname, 'net') g.write_graphml('data/' + dbname + '.graphml')
def friendship_community_vis(dbname, colname, filename, ctype): '''Out graph for vis.js visualization''' ed_users = iot.get_values_one_field(dbname, 'scom', 'id') # fed_users = iot.get_values_one_field(dbname, 'com', 'id') dbcom = dbt.db_connect_col(dbname, 'com') fg = gt.load_network(dbname, colname) # fg = gt.load_beh_network_subset(ed_users, dbname, colname, 'retweet') gt.net_stat(fg) # fg = fg.as_undirected(mode="mutual") # gt.net_stat(fg) fg = gt.giant_component(fg, 'WEAK') gt.net_stat(fg) if ctype == 'ml': com = fg.community_multilevel(weights='weight', return_levels=False) elif ctype == 'lp': fgu = fg.as_undirected(combine_edges=sum) init = fgu.community_leading_eigenvector(clusters=2, weights='weight') print init.membership com = fg.community_label_propagation(weights='weight', initial=init.membership) print com.membership else: com = fg.community_infomap(edge_weights='weight', trials=2) fg.vs['group'] = com.membership # edges = fg.es.select(weight_gt=3) # print 'Filtered edges: %d' %len(edges) # fg = fg.subgraph_edges(edges) # gt.net_stat(fg) # fg.vs['degree'] = fg.degree(mode="all") # nodes = fg.vs.select(degree_gt=10) # fg = fg.subgraph(nodes) # gt.net_stat(fg) Coo={} for x in fg.vs['group']: Coo[x]=(rand.randint(-1000, 1000), rand.randint(-1000, 1000)) with open('data/' + ctype + '_' +filename+'_net_follow.js', 'w') as fw: fw.write('var nodes = [\n') for idv, v in enumerate(fg.vs): user = dbcom.find_one({'id': int(fg.vs[idv]['name'])}) desc = ' '.join(user['description'].replace('\'', '').replace('\"', '').split()) fw.write('{id: ' + str(idv+1) + ', '+ 'label: \'' + user['screen_name'] +'\', ' + 'value: ' + str(fg.degree(idv, mode="in")) + ', ' + 'title: \'UID: ' + str(fg.vs[idv]['name']) + '<br> Screen Name: ' + user['screen_name'] + '<br> Followers: ' + str(user['followers_count']) + '<br> Followees: ' + str(user['friends_count']) + '<br> Tweets: ' + str(user['statuses_count']) + '<br> Description: ' + str(desc.encode('utf-8')) + '<br> Group: ' + str(fg.vs[idv]['group']) + '\', ' + 'x: ' + str(Coo[fg.vs[idv]['group']][0]+rand.randint(0, 300)) + ', ' + 'y: ' + str(Coo[fg.vs[idv]['group']][1]+rand.randint(0, 300)) + ', ' + 'group: ' + str(fg.vs[idv]['group']) + ', ') # if int(fg.vs[idv]['name']) in ed_users: # fw.write('shape: ' + '\'triangle\'') # else: # fw.write('shape: ' + '\'circle\'') fw.write('}, \n') fw.write('];\n var edges = [\n') for ide, e in enumerate(fg.es): fw.write('{from: ' + str(e.source+1) + ', ' + 'to: ' + str(e.target+1) + ', ' + 'arrows: ' + '\'to\'' + ', ' + 'title: \' Tags: ' + fg.vs[e.source]['name'] + ' ' + fg.vs[e.target]['name'] + '<br> Co-occurrence: ' + str(fg.es[ide]['weight']) + '\', ' + 'value: ' + str(fg.es[ide]['weight']) + '},\n') #str(fg.es[ide]['weight']) fw.write('];\n')
# compare_weights() # compare_opinion() # recovery_hashtag() # ed_hashtag() # pro_tag_user() # remove_noise_tags() # network_pro_hashtags() # remove_spam('retweet') # remove_spam('communication') # count_existing_user('retweet') # count_existing_user('communication') # combine_rec_ped_hashtags() # hashtag_users() # hashtag_users_label_proed() g = gt.load_network('fed', 'snet') gt.summary(g) g.write_graphml('core-ed-follow'+'.graphml') # fed-communication.graphml is the communitication network for all fed users, including core and friends # fedusers = iot.get_values_one_field('fed', 'com', 'id') # gb = gt.load_beh_network_subset(fedusers, 'fed', 'bnet', 'communication') # # gb = gt.load_beh_network('fed', 'bnet', 'communication') # gt.summary(gb) # gb.write_graphml('fed-communication.graphml')
def recover_proed_inter(): # Compare difference between pro-ed and pro-recovery uses in social networking prorec, proed = edrelatedcom.rec_proed() ## based on profiles com = dbt.db_connect_col('fed', 'scom') g = gt.load_network('fed', 'snet') data = [] for node in g.vs: uid = node['name'] user = com.find_one({'id': int(uid)}) followeecount = user['friends_count'] followercount = user['followers_count'] followees = set([g.vs[v]['name'] for v in g.successors(uid)]) followers = set([g.vs[v]['name'] for v in g.predecessors(uid)]) recc_followee, proc_followee, edc_followee = 0.0, 0.0, 0.0 for u in followees: if u in prorec: recc_followee += 1 elif u in proed: proc_followee += 1 else: edc_followee += 1 if followeecount != 0: recc_followee /= followeecount proc_followee /= followeecount edc_followee /= followeecount else: print 'Followee number is zero', uid otherc_followee = 1 - recc_followee - proc_followee - edc_followee recc_follower, proc_follower, edc_follower = 0.0, 0.0, 0.0 for u in followers: if u in prorec: recc_follower += 1 elif u in proed: proc_follower += 1 else: edc_follower += 1 if followercount != 0: recc_follower /= followercount proc_follower /= followercount edc_follower /= followercount else: print 'Follower number is zero', uid otherc_follower = 1 - recc_follower - proc_follower - edc_follower if uid in prorec: data.append(['Rec', recc_followee, 'Rec-Followees']) data.append(['Rec', proc_followee, 'Ped-Followees']) data.append(['Rec', edc_followee, 'ED-Followees']) data.append(['Rec', otherc_followee, 'Oth-Followees']) data.append(['Rec', recc_follower, 'Rec-Followers']) data.append(['Rec', proc_follower, 'Ped-Followers']) data.append(['Rec', edc_follower, 'ED-Followers']) data.append(['Rec', otherc_follower, 'Oth-Followers']) elif uid in proed: data.append(['Ped', proc_followee, 'Ped-Followees']) data.append(['Ped', recc_followee, 'Rec-Followees']) data.append(['Ped', edc_followee, 'ED-Followees']) data.append(['Ped', otherc_followee, 'Oth-Followees']) data.append(['Ped', proc_follower, 'Ped-Followers']) data.append(['Ped', recc_follower, 'Rec-Followers']) data.append(['Ped', edc_follower, 'ED-Followers']) data.append(['Ped', otherc_follower, 'Oth-Followers']) else: pass df = pd.DataFrame(data, columns=['Group', 'Proportion', 'Feature']) df.to_csv('inter.csv') sns.set(style="whitegrid", palette="pastel", color_codes=True) sns.boxplot(x="Feature", y="Proportion", hue="Group", data=df, palette="PRGn") sns.despine(offset=10, trim=True) # plt.ylim(0, 0.8) # sns.violinplot(x="Feature", y="Values", hue="Group", data=df, split=True, # inner="quart", palette="PRGn") # sns.despine(left=True) plt.show()
elif 'status' in u1 and 'status' not in u2: # delete drop = 0 elif u2['status']['id'] == u1['status']['id']: # no new post drop = 1 elif u2['status']['id'] != u1['status']['id']: # new post drop = 0 labels.append(drop) hubs.append(hub) g.vs['drop'] = labels g.vs['cen'] = hubs g.write_graphml('drop-'+g_file) if __name__ == '__main__': # friend_user_change('fed', 'fed2', 'com', 'com') network1 = gt.load_network('random', 'net') network1.write_graphml('rd-net-all.graphml') network1 = gt.load_network('younger', 'net') network1.write_graphml('yg-net-all.graphml') # g = gt.load_network('fed', 'net') # g.write_graphml('fed-net.graphml') # label_dropout_network('coreed-net.graphml', 'fed', 'com', 'fed_sur', 'com') # label_dropout_network('fed-net.graphml', 'fed', 'com', 'fed_sur', 'com') # friends_old = network1.successors(str('4036631952')) # print [network1.vs[v]['name'] for v in friends_old] # print friends_old # states_change('fed', 'fed2', 'com', 'com') # emotion_dropout_IV_split('fed', 'fed2', 'com', 'com') # load_net()
def emotion_dropout_IV_split(dbname1, dbname2, comname1, comname2): ''' Split followees and followers as different variables :param dbname1: :param dbname2: :param comname1: :param comname2: :return: ''' filter_que = {'level': 1, 'liwc_anal.result.WC':{'$exists': True}} user1 = iot.get_values_one_field(dbname1, comname1, 'id', filter_que) com1 = dbt.db_connect_col(dbname1, comname1) com2 = dbt.db_connect_col(dbname2, comname2) fields = ['liwc_anal.result.posemo', 'liwc_anal.result.negemo', 'liwc_anal.result.anx', 'liwc_anal.result.anger', 'liwc_anal.result.sad'] prof_names = ['friends_count', 'statuses_count', 'followers_count', 'friends_day', 'statuses_day', 'followers_day', 'days'] attr_names = ['uid', 'attr'] attr_names.extend(['u_'+field.split('.')[-1] for field in fields]) attr_names.extend(['u_'+field for field in prof_names]) attr_names.extend(['fr_'+field.split('.')[-1] for field in fields]) attr_names.extend(['fr_'+field for field in prof_names]) attr_names.extend(['fr_num', 'fr_palive']) attr_names.extend(['fo_'+field.split('.')[-1] for field in fields]) attr_names.extend(['fo_'+field for field in prof_names]) attr_names.extend(['fo_num', 'fo_palive']) attr_names.extend(['co_'+field.split('.')[-1] for field in fields]) attr_names.extend(['co_'+field for field in prof_names]) attr_names.extend(['co_num', 'co_palive']) print attr_names attr_length = len(fields) + len(prof_names) + 2 network1 = gt.load_network(dbname1, 'net') data = [] for uid in user1: row = [uid] u1 = com1.find_one({'id': uid}) u2 = com2.find_one({'id': uid}) if u2 is None or u2['timeline_count'] == 0: row.append(1) else: row.append(0) uatt = iot.get_fields_one_doc(u1, fields) row.extend(uatt) row.extend(active_days(u1)) exist = True try: v = network1.vs.find(name=str(uid)) except ValueError: exist = False if exist: print '--------------------user %d---------------' %uid followees = set([int(network1.vs[v]['name']) for v in network1.successors(str(uid))]) followers = set([int(network1.vs[v]['name']) for v in network1.predecessors(str(uid))]) common = followees.intersection(followers) followees = followees - common followers = followers - common for friend_ids in [followees, followers, common]: if len(friend_ids) > 0: # friend_ids = [int(network1.vs[v]['name']) for v in friends] print uid in friend_ids print len(friend_ids) fatts = [] alive = 0 for fid in friend_ids: fu = com1.find_one({'id': fid, 'liwc_anal.result.WC':{'$exists':True}, 'status':{'$exists':True}}) fu2 = com2.find_one({'id': fid}) if fu != None: fatt = iot.get_fields_one_doc(fu, fields) # Friends' LIWC fatt.extend(active_days(fu)) fatts.append(fatt) if fu2 is None or fu2['timeline_count'] == 0: alive += 0 else: alive += 1 if len(fatts) > 0: fatts = np.array(fatts) fmatts = np.mean(fatts, axis=0) row.extend(fmatts) row.append(len(fatts)) paliv = float(alive)/len(fatts) print 'Alive %d %d %.3f' % (alive, len(fatts), paliv) row.append(paliv) else: row.extend([None] * attr_length) # friends = followers # followers # if len(friends) > 0: # friend_ids = [int(network1.vs[v]['name']) for v in friends] # print uid in friend_ids # print len(friend_ids) # fatts = [] # alive = 0 # for fid in friend_ids: # fu = com1.find_one({'id': fid, 'liwc_anal.result.WC':{'$exists':True}, 'status':{'$exists':True}}) # fu2 = com2.find_one({'id': fid}) # if fu != None: # fatt = iot.get_fields_one_doc(fu, fields) # fatt.extend(active_days(fu)) # fatts.append(fatt) # if fu2 is None or fu2['timeline_count'] == 0: # alive += 0 # else: # alive += 1 # if len(fatts) > 0: # fatts = np.array(fatts) # fmatts = np.mean(fatts, axis=0) # row.extend(fmatts) # row.append(len(fatts)) # paliv = float(alive)/len(fatts) # print 'Alive %d %d %.3f' % (alive, len(fatts), paliv) # row.append(paliv) # print row data.append(row) df = pd.DataFrame(data, columns=attr_names) df.to_csv('data-attr-split.csv', index = False)
def emotion_dropout_IV_combine(dbname1, dbname2, comname1, comname2): ''' Combine followees and follower together as variables :param dbname1: :param dbname2: :param comname1: :param comname2: :return: ''' filter_que = {'level': 1, 'liwc_anal.result.WC':{'$exists': True}} user1 = iot.get_values_one_field(dbname1, comname1, 'id', filter_que) com1 = dbt.db_connect_col(dbname1, comname1) com2 = dbt.db_connect_col(dbname2, comname2) fields = ['liwc_anal.result.posemo', 'liwc_anal.result.negemo', 'liwc_anal.result.anx', 'liwc_anal.result.anger', 'liwc_anal.result.sad'] prof_names = ['friends_count', 'statuses_count', 'followers_count', 'friends_day', 'statuses_day', 'followers_day', 'days'] attr_names = ['uid', 'attr'] attr_names.extend(['u_'+field.split('.')[-1] for field in fields]) attr_names.extend(['u_'+field for field in prof_names]) attr_names.extend(['f_'+field.split('.')[-1] for field in fields]) attr_names.extend(['f_'+field for field in prof_names]) attr_names.extend(['f_num', 'f_palive']) print attr_names network1 = gt.load_network(dbname1, 'net') data = [] for uid in user1: row = [uid] u1 = com1.find_one({'id': uid}) u2 = com2.find_one({'id': uid}) if u2 is None or u2['timeline_count'] == 0: row.append(1) else: row.append(0) uatt = iot.get_fields_one_doc(u1, fields) row.extend(uatt) row.extend(active_days(u1)) exist = True try: v = network1.vs.find(name=str(uid)) except ValueError: exist = False if exist: friends = set(network1.neighbors(str(uid))) # id or name if len(friends) > 0: friend_ids = [int(network1.vs[v]['name']) for v in friends] # return id print uid in friend_ids print len(friend_ids) fatts = [] alive = 0 for fid in friend_ids: fu = com1.find_one({'id': fid, 'liwc_anal.result.WC':{'$exists':True}, 'status':{'$exists':True}}) fu2 = com2.find_one({'id': fid}) if fu != None: fatt = iot.get_fields_one_doc(fu, fields) fatt.extend(active_days(fu)) fatts.append(fatt) if fu2 is None or fu2['timeline_count'] == 0: alive += 0 else: alive += 1 if len(fatts) > 0: fatts = np.array(fatts) fmatts = np.mean(fatts, axis=0) row.extend(fmatts) row.append(len(fatts)) paliv = float(alive)/len(fatts) print 'Alive %d %d %.3f' % (alive, len(fatts), paliv) row.append(paliv) # print row data.append(row) df = pd.DataFrame(data, columns=attr_names) df.to_csv('data-attr-combine.csv', index = False)
def emotion_dropout_IV_split(dbname1, dbname2, comname1, comname2): ''' Split followees and followers as different variables :param dbname1: :param dbname2: :param comname1: :param comname2: :return: ''' filter_que = {'level': 1, 'liwc_anal.result.WC':{'$exists': True}} user1 = iot.get_values_one_field(dbname1, comname1, 'id', filter_que) com1 = dbt.db_connect_col(dbname1, comname1) com2 = dbt.db_connect_col(dbname2, comname2) fields = ['liwc_anal.result.posemo', 'liwc_anal.result.negemo', 'liwc_anal.result.anx', 'liwc_anal.result.anger', 'liwc_anal.result.sad' ] prof_names = ['friends_count', 'statuses_count', 'followers_count', 'friends_day', 'statuses_day', 'followers_day', 'days', 'eigenvector'] attr_names = ['uid', 'attr'] attr_names.extend(['u_'+field.split('.')[-1] for field in fields]) attr_names.extend(['u_'+field for field in prof_names]) attr_names.extend(['fr_'+field.split('.')[-1] for field in fields]) attr_names.extend(['fr_'+field for field in prof_names]) attr_names.extend(['fr_num', 'fr_palive']) attr_names.extend(['fo_'+field.split('.')[-1] for field in fields]) attr_names.extend(['fo_'+field for field in prof_names]) attr_names.extend(['fo_num', 'fo_palive']) attr_names.extend(['co_'+field.split('.')[-1] for field in fields]) attr_names.extend(['co_'+field for field in prof_names]) attr_names.extend(['co_num', 'co_palive']) print attr_names attr_length = len(fields) + len(prof_names) + 2 network1 = gt.load_network(dbname1, 'net') '''Centralities Calculation''' eigen = network1.eigenvector_centrality() # closeness = network1.closeness() # betweenness = network1.betweenness() nodes = [int(v['name']) for v in network1.vs] eigen_map = dict(zip(nodes, eigen)) # closeness_map = dict(zip(nodes, closeness)) # betweenness_map = dict(zip(nodes, betweenness)) data = [] for uid in user1: row = [uid] u1 = com1.find_one({'id': uid}) u2 = com2.find_one({'id': uid}) if u2 is None or u2['timeline_count'] == 0: row.append(1) else: row.append(0) uatt = iot.get_fields_one_doc(u1, fields) row.extend(uatt) row.extend(active_days(u1)) row.extend([eigen_map.get(u1['id'])]) exist = True try: v = network1.vs.find(name=str(uid)) except ValueError: exist = False if exist: print '--------------------user %d---------------' %uid followees = set([int(network1.vs[v]['name']) for v in network1.successors(str(uid))]) followers = set([int(network1.vs[v]['name']) for v in network1.predecessors(str(uid))]) common = followees.intersection(followers) followees = followees - common followers = followers - common for friend_ids in [followees, followers, common]: if len(friend_ids) > 0: # friend_ids = [int(network1.vs[v]['name']) for v in friends] print uid in friend_ids print len(friend_ids) fatts = [] alive = 0 for fid in friend_ids: fu = com1.find_one({'id': fid, 'liwc_anal.result.WC':{'$exists':True}, 'status':{'$exists':True}}) fu2 = com2.find_one({'id': fid}) if fu != None: fatt = iot.get_fields_one_doc(fu, fields) # Friends' LIWC fatt.extend(active_days(fu)) fatt.extend([eigen_map.get(fu['id'])]) fatts.append(fatt) if fu2 is None or fu2['timeline_count'] == 0: alive += 0 else: alive += 1 if len(fatts) > 0: fatts = np.array(fatts) fmatts = np.mean(fatts, axis=0) row.extend(fmatts) row.append(len(fatts)) paliv = float(alive)/len(fatts) print 'Alive %d %d %.3f' % (alive, len(fatts), paliv) row.append(paliv) else: row.extend([None] * attr_length) # friends = followers # followers # if len(friends) > 0: # friend_ids = [int(network1.vs[v]['name']) for v in friends] # print uid in friend_ids # print len(friend_ids) # fatts = [] # alive = 0 # for fid in friend_ids: # fu = com1.find_one({'id': fid, 'liwc_anal.result.WC':{'$exists':True}, 'status':{'$exists':True}}) # fu2 = com2.find_one({'id': fid}) # if fu != None: # fatt = iot.get_fields_one_doc(fu, fields) # fatt.extend(active_days(fu)) # fatts.append(fatt) # if fu2 is None or fu2['timeline_count'] == 0: # alive += 0 # else: # alive += 1 # if len(fatts) > 0: # fatts = np.array(fatts) # fmatts = np.mean(fatts, axis=0) # row.extend(fmatts) # row.append(len(fatts)) # paliv = float(alive)/len(fatts) # print 'Alive %d %d %.3f' % (alive, len(fatts), paliv) # row.append(paliv) # print row data.append(row) df = pd.DataFrame(data, columns=attr_names) df.to_csv('data-attr-split.csv', index = False)
def friend_dis(dbname, comname, netname, tagets): #he returned list from Graph.neighbors always includes the input vertex, # while those from predecessors and successors don’t. # So the size of returned list from neighbors is always larger # 1 than those from other two methods. db = dbt.db_connect_no_auth(dbname) com = db[comname] g = gt.load_network(dbname, netname) gt.add_attributes(g, ['followers_count', 'friends_count'], dbname, comname, ['followers_count', 'friends_count']) gt.summary(g) for user in com.find({}, ['id', 'net_anal']): uid = user['id'] values = user.get('net_anal', {'mined': True}) node_exist = True try: v = g.vs.find(name=str(uid)) except ValueError: node_exist = False if node_exist: # followers = g.neighborhood(str(uid), mode='out') # followings = g.neighborhood(str(uid), mode='in') followers = g.successors(str(uid)) followings = g.predecessors(str(uid)) # print followers # print followings follower_set = set(int(name) for name in g.vs[followers]['name']) following_set = set(int(name) for name in g.vs[followings]['name']) ed_follower = len(tagets & follower_set) ed_following = len(tagets & following_set) # friend_set = follower_set | following_set # print follower_set # print following_set follower = v['followers_count'] if follower == 0: follower = 1 following = v['friends_count'] if following == 0: following = 1 # friend = len(friend_set) # if friend == 0: # friend = 1 # ed_friend = len(tagets & friend_set) ed_follower_p = float(ed_follower)/follower ed_following_p = float(ed_following)/following # ed_friend_p = float(ed_friend)/friend net_sta = {} # net_sta['follower_no'] = follower # net_sta['following_no'] = following # net_sta['friend_no'] = friend net_sta['ed_follower_no'] = ed_follower net_sta['ed_following_no'] = ed_following # net_sta['ed_friend_no'] = ed_friend net_sta['ed_follower_p'] = ed_follower_p net_sta['ed_following_p'] = ed_following_p # net_sta['ed_friend_p'] = ed_friend_p net_sta['non_ed_follower_p'] = 1 - ed_follower_p net_sta['non_ed_following_p'] = 1 - ed_following_p # net_sta['non_ed_friend_p'] = 1 - ed_friend_p values['ed_proportion'] = net_sta com.update_one({'id': uid}, {'$set': {'net_anal': values}}, upsert=True)