def communtiy_feature(dbname, typename): fg = ntt.loadnet(dbname, typename) fcoms = gt.fast_community(fg) pickle.dump(fcoms, open('data/'+dbname+typename+'com.pick', 'w')) fcoms = pickle.load(open('data/'+dbname+typename+'com.pick', 'r')) fclus = fcoms.as_clustering() gt.summary(fclus) """Compare difference of features in cummunities""" features = [ 'liwc_anal.result.i', 'liwc_anal.result.we', 'liwc_anal.result.bio', 'liwc_anal.result.body', 'liwc_anal.result.health', 'liwc_anal.result.posemo', 'liwc_anal.result.negemo', 'liwc_anal.result.ingest', 'liwc_anal.result.anx', 'liwc_anal.result.anger', 'liwc_anal.result.sad' ] therh = 0.1 * fg.vcount() for feature in features: data = [] for clu in fclus: if len(clu) > therh: ulist = set() for v in clu: ulist.add(int(fg.vs[v]['name'])) ulist = list(ulist) clu_values = iot.get_values_one_field(dbname, 'com', feature, {'id': {'$in': ulist}}) data.append(clu_values) plot.plot_config() for i in xrange(len(data)): sns.distplot(data[i], hist=False, label=str(i)+':'+str(len(data[i]))) plt.xlabel(feature) plt.ylabel('PDF') # plt.show() plt.savefig(feature+typename+'_com.pdf') plt.clf()
def communtiy_feature(dbname, typename): fg = ntt.loadnet(dbname, typename) fcoms = gt.fast_community(fg) pickle.dump(fcoms, open('data/' + dbname + typename + 'com.pick', 'w')) fcoms = pickle.load(open('data/' + dbname + typename + 'com.pick', 'r')) fclus = fcoms.as_clustering() gt.summary(fclus) """Compare difference of features in cummunities""" features = [ 'liwc_anal.result.i', 'liwc_anal.result.we', 'liwc_anal.result.bio', 'liwc_anal.result.body', 'liwc_anal.result.health', 'liwc_anal.result.posemo', 'liwc_anal.result.negemo', 'liwc_anal.result.ingest', 'liwc_anal.result.anx', 'liwc_anal.result.anger', 'liwc_anal.result.sad' ] therh = 0.1 * fg.vcount() for feature in features: data = [] for clu in fclus: if len(clu) > therh: ulist = set() for v in clu: ulist.add(int(fg.vs[v]['name'])) ulist = list(ulist) clu_values = iot.get_values_one_field(dbname, 'com', feature, {'id': { '$in': ulist }}) data.append(clu_values) plot.plot_config() for i in xrange(len(data)): sns.distplot(data[i], hist=False, label=str(i) + ':' + str(len(data[i]))) plt.xlabel(feature) plt.ylabel('PDF') # plt.show() plt.savefig(feature + typename + '_com.pdf') plt.clf()
def triangles(dbname, type): '''Load networks''' g = ntt.loadnet(dbname, type) g = g.as_undirected(mode="collapse") print g.vcount() print g.ecount() '''Map User ID to Node ID in Graph''' ed_users = ed_user(dbname, 'com') ed_set = set() for ed in ed_users: try: v = g.vs.find(name=ed) ed_set.add(v.index) except ValueError: pass print len(ed_set) '''Find triangles such that two nodes are core ed and the rest is new users ''' result = set() ed_list = list(ed_set) for i in xrange(len(ed_list)): ui = ed_list[i] nui = set(g.neighbors(ui)) for j in xrange(i, len(ed_list)): uj = ed_list[j] if uj in nui: nuj = set(g.neighbors(uj)) for v in nui.intersection(nuj): result.add(v) ids = [int(g.vs[v]['name']) for v in result] pickle.dump(ids, open('data/'+dbname+type+'triangle.pick', 'w')) '''Verify triangle users''' db = dbt.db_connect_no_auth('fed') com = db['com'] for v in ids: user = com.find_one({'id': int(v)}) print user['screen_name'].encode('utf-8'), ' '.join(user['description'].split()).encode('utf-8')
def triangles(dbname, type): '''Load networks''' g = ntt.loadnet(dbname, type) g = g.as_undirected(mode="collapse") print g.vcount() print g.ecount() '''Map User ID to Node ID in Graph''' ed_users = ed_user(dbname, 'com') ed_set = set() for ed in ed_users: try: v = g.vs.find(name=ed) ed_set.add(v.index) except ValueError: pass print len(ed_set) '''Find triangles such that two nodes are core ed and the rest is new users ''' result = set() ed_list = list(ed_set) for i in xrange(len(ed_list)): ui = ed_list[i] nui = set(g.neighbors(ui)) for j in xrange(i, len(ed_list)): uj = ed_list[j] if uj in nui: nuj = set(g.neighbors(uj)) for v in nui.intersection(nuj): result.add(v) ids = [int(g.vs[v]['name']) for v in result] pickle.dump(ids, open('data/' + dbname + type + 'triangle.pick', 'w')) '''Verify triangle users''' db = dbt.db_connect_no_auth('fed') com = db['com'] for v in ids: user = com.find_one({'id': int(v)}) print user['screen_name'].encode('utf-8'), ' '.join( user['description'].split()).encode('utf-8')