예제 #1
0
파일: com_det.py 프로젝트: wtgme/ohsn
def communtiy_feature(dbname, typename):
    fg = ntt.loadnet(dbname, typename)

    fcoms = gt.fast_community(fg)
    pickle.dump(fcoms, open('data/'+dbname+typename+'com.pick', 'w'))
    fcoms = pickle.load(open('data/'+dbname+typename+'com.pick', 'r'))
    fclus = fcoms.as_clustering()
    gt.summary(fclus)

    """Compare difference of features in cummunities"""
    features = [
        'liwc_anal.result.i',
        'liwc_anal.result.we',
        'liwc_anal.result.bio',
        'liwc_anal.result.body',
        'liwc_anal.result.health',
        'liwc_anal.result.posemo',
        'liwc_anal.result.negemo',
        'liwc_anal.result.ingest',
        'liwc_anal.result.anx',
        'liwc_anal.result.anger',
        'liwc_anal.result.sad'
                ]
    therh = 0.1 * fg.vcount()
    for feature in features:
        data = []
        for clu in fclus:
            if len(clu) > therh:
                ulist = set()
                for v in clu:
                    ulist.add(int(fg.vs[v]['name']))
                ulist = list(ulist)
                clu_values = iot.get_values_one_field(dbname, 'com', feature, {'id': {'$in': ulist}})
                data.append(clu_values)

        plot.plot_config()
        for i in xrange(len(data)):
            sns.distplot(data[i], hist=False, label=str(i)+':'+str(len(data[i])))
        plt.xlabel(feature)
        plt.ylabel('PDF')
        # plt.show()
        plt.savefig(feature+typename+'_com.pdf')
        plt.clf()
예제 #2
0
파일: com_det.py 프로젝트: abiraja2004/ohsn
def communtiy_feature(dbname, typename):
    fg = ntt.loadnet(dbname, typename)

    fcoms = gt.fast_community(fg)
    pickle.dump(fcoms, open('data/' + dbname + typename + 'com.pick', 'w'))
    fcoms = pickle.load(open('data/' + dbname + typename + 'com.pick', 'r'))
    fclus = fcoms.as_clustering()
    gt.summary(fclus)
    """Compare difference of features in cummunities"""
    features = [
        'liwc_anal.result.i', 'liwc_anal.result.we', 'liwc_anal.result.bio',
        'liwc_anal.result.body', 'liwc_anal.result.health',
        'liwc_anal.result.posemo', 'liwc_anal.result.negemo',
        'liwc_anal.result.ingest', 'liwc_anal.result.anx',
        'liwc_anal.result.anger', 'liwc_anal.result.sad'
    ]
    therh = 0.1 * fg.vcount()
    for feature in features:
        data = []
        for clu in fclus:
            if len(clu) > therh:
                ulist = set()
                for v in clu:
                    ulist.add(int(fg.vs[v]['name']))
                ulist = list(ulist)
                clu_values = iot.get_values_one_field(dbname, 'com', feature,
                                                      {'id': {
                                                          '$in': ulist
                                                      }})
                data.append(clu_values)

        plot.plot_config()
        for i in xrange(len(data)):
            sns.distplot(data[i],
                         hist=False,
                         label=str(i) + ':' + str(len(data[i])))
        plt.xlabel(feature)
        plt.ylabel('PDF')
        # plt.show()
        plt.savefig(feature + typename + '_com.pdf')
        plt.clf()
예제 #3
0
파일: link_ed.py 프로젝트: wtgme/ohsn
def triangles(dbname, type):
    '''Load networks'''
    g = ntt.loadnet(dbname, type)
    g = g.as_undirected(mode="collapse")
    print g.vcount()
    print g.ecount()

    '''Map User ID to Node ID in Graph'''
    ed_users = ed_user(dbname, 'com')
    ed_set = set()
    for ed in ed_users:
        try:
            v = g.vs.find(name=ed)
            ed_set.add(v.index)
        except ValueError:
            pass
    print len(ed_set)

    '''Find triangles such that two nodes are core ed and the rest is new users '''
    result = set()
    ed_list = list(ed_set)
    for i in xrange(len(ed_list)):
        ui = ed_list[i]
        nui = set(g.neighbors(ui))
        for j in xrange(i, len(ed_list)):
            uj = ed_list[j]
            if uj in nui:
                nuj = set(g.neighbors(uj))
                for v in nui.intersection(nuj):
                    result.add(v)

    ids = [int(g.vs[v]['name']) for v in result]
    pickle.dump(ids, open('data/'+dbname+type+'triangle.pick', 'w'))

    '''Verify triangle users'''
    db = dbt.db_connect_no_auth('fed')
    com = db['com']
    for v in ids:
        user = com.find_one({'id': int(v)})
        print user['screen_name'].encode('utf-8'), ' '.join(user['description'].split()).encode('utf-8')
예제 #4
0
def triangles(dbname, type):
    '''Load networks'''
    g = ntt.loadnet(dbname, type)
    g = g.as_undirected(mode="collapse")
    print g.vcount()
    print g.ecount()
    '''Map User ID to Node ID in Graph'''
    ed_users = ed_user(dbname, 'com')
    ed_set = set()
    for ed in ed_users:
        try:
            v = g.vs.find(name=ed)
            ed_set.add(v.index)
        except ValueError:
            pass
    print len(ed_set)
    '''Find triangles such that two nodes are core ed and the rest is new users '''
    result = set()
    ed_list = list(ed_set)
    for i in xrange(len(ed_list)):
        ui = ed_list[i]
        nui = set(g.neighbors(ui))
        for j in xrange(i, len(ed_list)):
            uj = ed_list[j]
            if uj in nui:
                nuj = set(g.neighbors(uj))
                for v in nui.intersection(nuj):
                    result.add(v)

    ids = [int(g.vs[v]['name']) for v in result]
    pickle.dump(ids, open('data/' + dbname + type + 'triangle.pick', 'w'))
    '''Verify triangle users'''
    db = dbt.db_connect_no_auth('fed')
    com = db['com']
    for v in ids:
        user = com.find_one({'id': int(v)})
        print user['screen_name'].encode('utf-8'), ' '.join(
            user['description'].split()).encode('utf-8')