コード例 #1
0
ファイル: graph_util.py プロジェクト: abiraja2004/ohsn
def add_attributes(g, att_names, dbname, colname, db_field_names):
    db = dbt.db_connect_no_auth(dbname)
    com = db[colname]
    for att_name in att_names:
        g.vs[att_name] = 0.0
    for x in com.find({}, ['id'] + db_field_names):
        uid = x['id']
        exist = True
        try:
            v = g.vs.find(name=str(uid))
        except ValueError:
            exist = False
        if exist:
            for db_field_name in db_field_names:
                if '.' in db_field_name:
                    levels = db_field_name.split('.')
                    t = x.get(levels[0])
                    for level in levels[1:]:
                        t = t.get(level)
                        if t is None:
                            break
                    v[att_name] = t
                else:
                    v[att_name] = x.get(db_field_name)
    return g
コード例 #2
0
ファイル: graph_util.py プロジェクト: abiraja2004/ohsn
def load_network_subset(db_name, collection='None', filter={}):
    '''
    Friendship network: directed network from a user list
    Edge: user---------> follower
    '''
    if collection is 'None':
        cols = db_name
    else:
        db = dbt.db_connect_no_auth(db_name)
        cols = db[collection]
    name_map, edges = {}, set()
    # filter['user'] = {'$in': uset_list}
    # filter['follower'] = {'$in': uset_list}
    for row in cols.find(filter, no_cursor_timeout=True):
        n1 = str(row['follower'])
        n2 = str(row['user'])

        n1id = name_map.get(n1, len(name_map))
        name_map[n1] = n1id
        n2id = name_map.get(n2, len(name_map))
        name_map[n2] = n2id
        edges.add((n1id, n2id))
    g = Graph(len(name_map), directed=True)
    g.vs["name"] = list(sorted(name_map, key=name_map.get))
    g.add_edges(list(edges))
    g.es["weight"] = 1
    return g
コード例 #3
0
ファイル: graph_util.py プロジェクト: wtgme/ohsn
def load_network_subset(db_name, collection='None', filter={}):
    '''
    Friendship network: directed network from a user list
    Edge: user---------> follower
    '''
    if collection is 'None':
        cols = db_name
    else:
        db = dbt.db_connect_no_auth(db_name)
        cols = db[collection]
    name_map, edges = {}, set()
    # filter['user'] = {'$in': uset_list}
    # filter['follower'] = {'$in': uset_list}
    for row in cols.find(filter, no_cursor_timeout=True):
        n1 = str(row['follower'])
        n2 = str(row['user'])

        n1id = name_map.get(n1, len(name_map))
        name_map[n1] = n1id
        n2id = name_map.get(n2, len(name_map))
        name_map[n2] = n2id
        edges.add((n1id, n2id))
    g = Graph(len(name_map), directed=True)
    g.vs["name"] = list(sorted(name_map, key=name_map.get))
    g.add_edges(list(edges))
    g.es["weight"] = 1
    return g
コード例 #4
0
ファイル: graph_util.py プロジェクト: abiraja2004/ohsn
def add_attribute(g, att_name, dbname, colname, db_field_name):
    db = dbt.db_connect_no_auth(dbname)
    com = db[colname]
    defaultV = -1000000000.0
    g.vs[att_name] = defaultV
    for x in com.find({db_field_name: {
            '$exists': True
    }}, ['id', db_field_name]):
        uid = x['id']
        exist = True
        try:
            v = g.vs.find(name=str(uid))
        except ValueError:
            exist = False
        if exist:
            if '.' in db_field_name:
                levels = db_field_name.split('.')
                t = x.get(levels[0])
                for level in levels[1:]:
                    t = t.get(level)
                    # if t is None:
                    #     t = defaultV
                    #     break
                v[att_name] = t
            else:
                v[att_name] = x.get(db_field_name)
    return g
コード例 #5
0
ファイル: graph_util.py プロジェクト: abiraja2004/ohsn
def load_user_hashtag_network(db_name, collection='None'):
    '''
    User-Hashtag network: weighted directed network
    Edge: user---------> hashtag
    '''
    if collection is 'None':
        cols = db_name
    else:
        db = dbt.db_connect_no_auth(db_name)
        cols = db[collection]
    name_map, edges = {}, {}
    for row in cols.find({'$where': "this.entities.hashtags.length>0"},
                         no_cursor_timeout=True):
        n1 = row['user']['id_str']
        hashtags = row['entities']['hashtags']
        hash_set = set()
        for hash in hashtags:
            # need no .encode('utf-8')
            hash_set.add(hash['text'].encode('utf-8').lower().replace(
                '_', '').replace('-', ''))
        for n2 in hash_set:
            n1id = name_map.get(n1, len(name_map))
            name_map[n1] = n1id
            n2id = name_map.get(n2, len(name_map))
            name_map[n2] = n2id
            wt = edges.get((n1id, n2id), 0)
            edges[(n1id, n2id)] = wt + 1
    g = Graph(len(name_map), directed=True)
    #get key list of dict according to value ranking
    g.vs["name"] = list(sorted(name_map, key=name_map.get))
    g.add_edges(edges.keys())
    g.es["weight"] = edges.values()
    return g
コード例 #6
0
ファイル: graph_util.py プロジェクト: wtgme/ohsn
def add_attributes(g, att_names, dbname, colname, db_field_names):
    db = dbt.db_connect_no_auth(dbname)
    com = db[colname]
    for att_name in att_names:
        g.vs[att_name] = 0.0
    for x in com.find({}, ['id'] + db_field_names):
        uid = x['id']
        exist = True
        try:
            v = g.vs.find(name=str(uid))
        except ValueError:
            exist = False
        if exist:
            for db_field_name in db_field_names:
                if '.' in db_field_name:
                    levels = db_field_name.split('.')
                    t = x.get(levels[0])
                    for level in levels[1:]:
                        t = t.get(level)
                        if t is None:
                            break
                    v[att_name] = t
                else:
                    v[att_name] = x.get(db_field_name)
    return g
コード例 #7
0
ファイル: graph_util.py プロジェクト: wtgme/ohsn
def add_attribute(g, att_name, dbname, colname, db_field_name):
    db = dbt.db_connect_no_auth(dbname)
    com = db[colname]
    defaultV = -1000000000.0
    g.vs[att_name] = defaultV
    for x in com.find({db_field_name: {'$exists': True}}, ['id', db_field_name]):
        uid = x['id']
        exist = True
        try:
            v = g.vs.find(name=str(uid))
        except ValueError:
            exist = False
        if exist:
            if '.' in db_field_name:
                levels = db_field_name.split('.')
                t = x.get(levels[0])
                for level in levels[1:]:
                    t = t.get(level)
                    # if t is None:
                    #     t = defaultV
                    #     break
                v[att_name] = t
            else:
                v[att_name] = x.get(db_field_name)
    return g
コード例 #8
0
ファイル: graph_util.py プロジェクト: wtgme/ohsn
def load_user_hashtag_network(db_name, collection='None'):
    '''
    User-Hashtag network: weighted directed network
    Edge: user---------> hashtag
    '''
    if collection is 'None':
        cols = db_name
    else:
        db = dbt.db_connect_no_auth(db_name)
        cols = db[collection]
    name_map, edges = {}, {}
    for row in cols.find({'$where': "this.entities.hashtags.length>0"}, no_cursor_timeout=True):
        n1 = row['user']['id_str']
        hashtags = row['entities']['hashtags']
        hash_set = set()
        for hash in hashtags:
            # need no .encode('utf-8')
            hash_set.add(hash['text'].encode('utf-8').lower().replace('_', '').replace('-', ''))
        for n2 in hash_set:
            n1id = name_map.get(n1, len(name_map))
            name_map[n1] = n1id
            n2id = name_map.get(n2, len(name_map))
            name_map[n2] = n2id
            wt = edges.get((n1id, n2id), 0)
            edges[(n1id, n2id)] = wt + 1
    g = Graph(len(name_map), directed=True)
    #get key list of dict according to value ranking
    g.vs["name"] = list(sorted(name_map, key=name_map.get))
    g.add_edges(edges.keys())
    g.es["weight"] = edges.values()
    return g
コード例 #9
0
ファイル: graph_util.py プロジェクト: wtgme/ohsn
def load_beh_network(db_name, collection='None', btype='communication'):
    '''
    All interctions of a user
    behavior network: directed weighted network
    Tweet: 0
    Retweet: 1;
    Reply: 2;
    Direct Mention: 3;
    undirect mention: 4
    Reply and mention Edge: u0 -----------> u1
    Retweet Edge: u1 ----------> u0
    '''
    btype_dic = {'retweet': [1], 'reply': [2], 'mention': [3], 'communication': [2, 3]}
    if collection is 'None':
        cols = db_name
    else:
        db = dbt.db_connect_no_auth(db_name)
        cols = db[collection]
    name_map, edges = {}, {}
    # for row in cols.find({}):
    for row in cols.find({'type': {'$in': btype_dic[btype]}}, no_cursor_timeout=True):
        n1 = str(row['id0'])
        n2 = str(row['id1'])
        if n1 != n2:
            n1id = name_map.get(n1, len(name_map))
            name_map[n1] = n1id
            n2id = name_map.get(n2, len(name_map))
            name_map[n2] = n2id
            wt = edges.get((n1id, n2id), 0)
            edges[(n1id, n2id)] = wt + 1
    g = Graph(len(name_map), directed=True)
    g.vs["name"] = list(sorted(name_map, key=name_map.get))
    g.add_edges(edges.keys())
    g.es["weight"] = edges.values()
    return g
コード例 #10
0
ファイル: export_csv.py プロジェクト: abiraja2004/ohsn
def export_poi(dbname, colname, index='All'):
    db = dbutil.db_connect_no_auth(dbname)
    poidb = db[colname]
    data = []
    for x in poidb.find({'timeline_count': {'$gt': 0}}):
        if index != 0:
            x['time_index'] = index
        data.append(x)
    return data
コード例 #11
0
ファイル: graph_util.py プロジェクト: wtgme/ohsn
def load_hashtag_coocurrent_network_undir(db_name, collection='None', uids=[]):
    '''
    Hashtag Co-occurrence Network: weighted undirected network
    Edge: Hashtag --------- Hashtag
    '''
    if collection is 'None':
        cols = db_name
    else:
        db = dbt.db_connect_no_auth(db_name)
        cols = db[collection]
    name_map, edges, node_weight = {}, {}, {}
    filter = {}
    tag_user = {}
    if len(uids) > 0:
        filter['user.id'] = {'$in': uids}
    filter['$where'] = 'this.entities.hashtags.length>0'
    for row in cols.find(filter, no_cursor_timeout=True):
        # if 'retweeted_status' in row:
        #     continue
        hashtags = row['entities']['hashtags']
        hash_set = set()
        for hash in hashtags:
            # need no .encode('utf-8')
            hash_set.add(hash['text'].encode('utf-8').lower().replace('_', '').replace('-', ''))
        hash_list = list(hash_set)
        # print hash_list
        for i in xrange(len(hash_list)):
            n1 = hash_list[i]
            n1id = name_map.get(n1, len(name_map))
            name_map[n1] = n1id
            w = node_weight.get(n1id, 0)
            node_weight[n1id] = w + 1

            user_set = tag_user.get(n1id, set())
            user_set.add(row['user']['id'])
            tag_user[n1id] = user_set

            for j in xrange(i+1, len(hash_list)):
                n2 = hash_list[j]
                if n1 != n2:
                    n2id = name_map.get(n2, len(name_map))
                    name_map[n2] = n2id
                    if n1id < n2id:
                        wt = edges.get((n1id, n2id), 0)
                        edges[(n1id, n2id)] = wt + 1
                    else:
                        wt = edges.get((n2id, n1id), 0)
                        edges[(n2id, n1id)] = wt + 1
    g = Graph(len(name_map), directed=False)
    #get key list of dict according to value ranking
    name_list = list(sorted(name_map, key=name_map.get))
    g.vs["name"] = name_list
    g.vs["weight"] = [node_weight[name_map[name]] for name in name_list]
    g.vs['user'] = [len(tag_user[name_map[name]]) for name in name_list]
    g.add_edges(edges.keys())
    g.es["weight"] = edges.values()
    return g
コード例 #12
0
ファイル: graph_util.py プロジェクト: abiraja2004/ohsn
def load_beh_network_subset(userlist,
                            db_name,
                            collection='None',
                            btype='communication',
                            tag=None):
    '''
    only interaction among poi
    behavior network: directed weighted network
    Tweet: 0
    Retweet: 1;
    Reply: 2;
    Direct Mention: 3;
    undirect mention: 4
    Reply and mention Edge: u0 -----------> u1
    Retweet Edge: u1 ----------> u0
    '''
    btype_dic = {
        'retweet': [1],
        'reply': [2],
        'mention': [3],
        'communication': [2, 3],
        'all': [1, 2, 3]
    }
    if collection is 'None':
        cols = db_name
    else:
        db = dbt.db_connect_no_auth(db_name)
        cols = db[collection]
    name_map, edges = {}, {}
    filter = {}
    filter['type'] = {'$in': btype_dic[btype]}
    filter['id0'] = {'$in': userlist}
    filter['id1'] = {'$in': userlist}
    if tag:
        filter['tags'] = {'$in': tag}
    # for row in cols.find({}):
    for row in cols.find(filter, no_cursor_timeout=True):
        n1 = str(row['id0'])
        n2 = str(row['id1'])
        if n1 != n2:
            n1id = name_map.get(n1, len(name_map))
            name_map[n1] = n1id
            n2id = name_map.get(n2, len(name_map))
            name_map[n2] = n2id
            wt = edges.get((n1id, n2id), 0)
            edges[(n1id, n2id)] = wt + 1
    g = Graph(len(name_map), directed=True)
    g.vs["name"] = list(sorted(name_map, key=name_map.get))
    # If items(), keys(), values(), iteritems(), iterkeys(), and itervalues() are called with no intervening modifications to the dictionary, the lists will directly correspond.
    # http://stackoverflow.com/questions/835092/python-dictionary-are-keys-and-values-always-the-same-order
    g.add_edges(edges.keys())
    g.es["weight"] = edges.values()
    return g
コード例 #13
0
ファイル: graph_util.py プロジェクト: abiraja2004/ohsn
def load_beh_network(db_name, collection='None', btype='communication'):
    '''
    All interctions of a user
    behavior network: directed weighted network
    Tweet: 0
    Retweet: 1;
    Reply: 2;
    Direct Mention: 3;
    undirect mention: 4
    Reply and mention Edge: u0 -----------> u1
    Retweet Edge: u1 ----------> u0
    '''
    btype_dic = {
        'retweet': [1],
        'reply': [2],
        'mention': [3],
        'communication': [2, 3]
    }
    if collection is 'None':
        cols = db_name
    else:
        db = dbt.db_connect_no_auth(db_name)
        cols = db[collection]
    name_map, edges = {}, {}
    # for row in cols.find({}):
    for row in cols.find({'type': {
            '$in': btype_dic[btype]
    }},
                         no_cursor_timeout=True):
        n1 = str(row['id0'])
        n2 = str(row['id1'])
        if n1 != n2:
            n1id = name_map.get(n1, len(name_map))
            name_map[n1] = n1id
            n2id = name_map.get(n2, len(name_map))
            name_map[n2] = n2id
            wt = edges.get((n1id, n2id), 0)
            edges[(n1id, n2id)] = wt + 1
    g = Graph(len(name_map), directed=True)
    g.vs["name"] = list(sorted(name_map, key=name_map.get))
    g.add_edges(edges.keys())
    g.es["weight"] = edges.values()
    return g
コード例 #14
0
ファイル: export_csv.py プロジェクト: abiraja2004/ohsn
def export_net_agg(dbname, colname, file_name):
    db = dbutil.db_connect_no_auth(dbname)
    net = db[colname]
    fields = ['id0', 'id1', 'type', 'count']
    ttypes = {1: 'retweet', 2: 'reply', 3: 'mention'}

    '''Only include poi users'''
    data = []
    tems = {}
    for re in net.find({"type": {'$in': [1, 2, 3]}}):
        id0 = re['id0']
        id1 = re['id1']
        typeid = re['type']
        if id0 != id1:
            count = tems.get((id0, id1, typeid), 0)
            tems[(id0, id1, typeid)] = count+1

    for id0, id1, typeid in tems.keys():
        data.append({'id0': id0, 'id1': id1, 'type': ttypes[typeid], 'count': tems[(id0, id1, typeid)]})
    csv_output(fields, file_name, data)
コード例 #15
0
ファイル: graph_util.py プロジェクト: abiraja2004/ohsn
def load_hashtag_coocurrent_network_undir(db_name, collection='None', uids=[]):
    '''
    Hashtag Co-occurrence Network: weighted undirected network
    Edge: Hashtag --------- Hashtag
    excluding retweets
    '''
    if collection is 'None':
        cols = db_name
    else:
        db = dbt.db_connect_no_auth(db_name)
        cols = db[collection]
    name_map, edges, node_weight = {}, {}, {}
    filter = {}
    tag_user = {}
    if len(uids) > 0:
        filter['user.id'] = {'$in': uids}
    filter['$where'] = 'this.entities.hashtags.length>0'
    filter['retweeted_status'] = {'$exists': False}
    for row in cols.find(filter, no_cursor_timeout=True):
        # if 'retweeted_status' in row:
        #     continue
        hashtags = row['entities']['hashtags']
        hash_set = set()
        for hash in hashtags:
            # need no .encode('utf-8')
            hash_set.add(hash['text'].encode('utf-8').lower().replace(
                '_', '').replace('-', ''))
        hash_list = list(hash_set)
        # print hash_list
        for i in xrange(len(hash_list)):
            n1 = hash_list[i]
            n1id = name_map.get(n1, len(name_map))
            name_map[n1] = n1id
            w = node_weight.get(n1id, 0)
            node_weight[n1id] = w + 1

            user_set = tag_user.get(n1id, set())
            user_set.add(row['user']['id'])  ## for norm data
            # user_set.add(row['from_user_id']) ## for ian data
            tag_user[n1id] = user_set

            for j in xrange(i + 1, len(hash_list)):
                n2 = hash_list[j]
                if n1 != n2:
                    n2id = name_map.get(n2, len(name_map))
                    name_map[n2] = n2id
                    if n1id < n2id:
                        wt = edges.get((n1id, n2id), 0)
                        edges[(n1id, n2id)] = wt + 1
                    else:
                        wt = edges.get((n2id, n1id), 0)
                        edges[(n2id, n1id)] = wt + 1
    g = Graph(len(name_map), directed=False)
    #get key list of dict according to value ranking
    name_list = list(sorted(name_map, key=name_map.get))
    g.vs["name"] = name_list
    g.vs["weight"] = [node_weight[name_map[name]]
                      for name in name_list]  ## numbers of occurrences
    g.vs['user'] = [len(tag_user[name_map[name]])
                    for name in name_list]  ## numbers of users who use
    g.add_edges(edges.keys())
    g.es["weight"] = edges.values()  ## numbers of co-occurrence
    return g