예제 #1
0
def write_graph(G, user_names, media_prov, is_conservative):
    res = {'nodes': [], 'links': []}
    media = list(MEDIA.keys())[::-1]
    for n in G.nodes():
        user_name = user_names[n]
        if media_prov[user_name] == '':
            media_index = 0
        else:
            media_index = media.index(media_prov[user_name]) + 1
        if user_name.lower() in NEOCONSERVATIVES:
            neoconservative = True
        else:
            neoconservative = False
        res['nodes'].append({
            'id': user_name,
            'media': media_prov[user_name],
            'media_index': media_index,
            'neoconservative': neoconservative,
            'conservative': is_conservative[user_name]
        })
    for e in G.edges():
        res['links'].append({
            'source': user_names[e[0]],
            'target': user_names[e[1]],
            'value': G[e[0]][e[1]]['weight']
        })
    writej(res, 'website/outgroup.json')
예제 #2
0
async def get_user_by_id(user_id, client):
    file_path = PREFIX + user_id + '_user'
    obj = loadj(file_path)
    if obj is not None:
        return obj

    res = await client.api.users.lookup.get(user_id=user_id)
    writej(res[0], file_path)
    return res[0]
예제 #3
0
async def get_friends(user_id, screen_name, client):
    friends_ids = client.api.friends.ids.get.iterator.with_cursor(
        user_id=user_id, count=3000)

    friends = []
    try:
        async for data in friends_ids:
            friends.extend([str(f) for f in data.ids])

        file_path = PREFIX + str(user_id) + '_' + screen_name + '_friends'
        writej(friends, file_path, overwrite=False)
    except Exception as e:
        print(e)

    return friends
예제 #4
0
def gen_keywords():
    if os.path.exists(KEYWORDS_PATH):
        keywords = load_keywords()
    else:
        keywords = {}

    # We're keeping a cache of already parsed links and also
    # are storing their matched canonical link to ensure that we can
    # add a user to the set of an already parsed link.
    if os.path.exists(PARSED_LINKS_PATH):
        parsed_links = loadj(PARSED_LINKS_PATH)
    else:
        parsed_links = {}

    graph_names = [n['id'] for n in loadj('website/outgroup.json')['nodes']]
    user_ids = [USER_IDS[name] for name in graph_names]
    user_links = get_user_links(user_ids)
    all_links = []
    for user_name, links in user_links.items():
        for l in links:
            # Don't process the link if we already did.
            if l not in parsed_links:
                all_links.append((user_name, l))
                parsed_links[l] = ''
            elif parsed_links[l] != '' and parsed_links[l] in keywords:
                keywords[parsed_links[l]]['users'].add(user_name)

    print(f'{len(all_links)} to parse...')
    p = Pool(NUM_WORKERS)
    kw_tuples = p.starmap(get_keywords_pmap, all_links)
    for user_name, c_link, l, kws, p_time, title in kw_tuples:
        if c_link is not None:
            parsed_links[l] = c_link
            keywords[c_link]['kws'] = kws
            keywords[c_link]['time'] = p_time
            if 'users' not in keywords[c_link]:
                keywords[c_link]['users'] = set()
            keywords[c_link]['users'].add(user_name)
            keywords[c_link]['title'] = title
        else:
            parsed_links[l] = ''
    # Make the keywords dict serializable.
    for c_link in keywords:
        keywords[c_link]['users'] = list(keywords[c_link]['users'])
        keywords[c_link]['kws'] = list(keywords[c_link]['kws'])

    writej(keywords, KEYWORDS_PATH)
    writej(parsed_links, PARSED_LINKS_PATH)
예제 #5
0
def write_user_names():
    user_names = {}
    files = [f for f in os.listdir(PREFIX) if f.find('_user')>0]
    for f_name in files:
        try:
            obj = loadj(PREFIX+f_name)
            user_names[obj['id_str']] = obj['screen_name']
        except Exception as e:
            print(f_name, e)
    writej(user_names, PREFIX + 'user_names')

    # Write the reverse dict as well.
    user_ids = {}
    for user_id, name in user_names.items():
        user_ids[name] = user_id
    writej(user_ids, PREFIX + 'user_ids')
예제 #6
0
async def write_followers(user_id, client):
    file_path = PREFIX + user_id + '_followers'
    if os.path.exists(file_path):
        return

    print(f'Getting followers for {user_id}...')
    followers_ids = client.api.followers.ids.get.iterator.with_cursor(
        id=user_id, count=MAX_SEED_FOLLOWER_COUNT)

    try:
        followers = []
        async for data in followers_ids:
            followers.extend(data.ids)

        writej(followers, file_path, overwrite=False)
    except Exception as e:
        print(e)

    print(f'Done getting followers for {user_id}.')
    return
예제 #7
0
async def get_user_by_name(screen_name, client):
    res = await client.api.users.lookup.get(screen_name=screen_name)
    user = res[0]
    file_path = PREFIX + user['id_str'] + '_user'
    writej(user, file_path, overwrite=False)
    return user
예제 #8
0
def flush_cache(app):
    writej(app['link_cache'], PREFIX + 'link_cache.json')