def write_graph(G, user_names, media_prov, is_conservative): res = {'nodes': [], 'links': []} media = list(MEDIA.keys())[::-1] for n in G.nodes(): user_name = user_names[n] if media_prov[user_name] == '': media_index = 0 else: media_index = media.index(media_prov[user_name]) + 1 if user_name.lower() in NEOCONSERVATIVES: neoconservative = True else: neoconservative = False res['nodes'].append({ 'id': user_name, 'media': media_prov[user_name], 'media_index': media_index, 'neoconservative': neoconservative, 'conservative': is_conservative[user_name] }) for e in G.edges(): res['links'].append({ 'source': user_names[e[0]], 'target': user_names[e[1]], 'value': G[e[0]][e[1]]['weight'] }) writej(res, 'website/outgroup.json')
async def get_user_by_id(user_id, client): file_path = PREFIX + user_id + '_user' obj = loadj(file_path) if obj is not None: return obj res = await client.api.users.lookup.get(user_id=user_id) writej(res[0], file_path) return res[0]
async def get_friends(user_id, screen_name, client): friends_ids = client.api.friends.ids.get.iterator.with_cursor( user_id=user_id, count=3000) friends = [] try: async for data in friends_ids: friends.extend([str(f) for f in data.ids]) file_path = PREFIX + str(user_id) + '_' + screen_name + '_friends' writej(friends, file_path, overwrite=False) except Exception as e: print(e) return friends
def gen_keywords(): if os.path.exists(KEYWORDS_PATH): keywords = load_keywords() else: keywords = {} # We're keeping a cache of already parsed links and also # are storing their matched canonical link to ensure that we can # add a user to the set of an already parsed link. if os.path.exists(PARSED_LINKS_PATH): parsed_links = loadj(PARSED_LINKS_PATH) else: parsed_links = {} graph_names = [n['id'] for n in loadj('website/outgroup.json')['nodes']] user_ids = [USER_IDS[name] for name in graph_names] user_links = get_user_links(user_ids) all_links = [] for user_name, links in user_links.items(): for l in links: # Don't process the link if we already did. if l not in parsed_links: all_links.append((user_name, l)) parsed_links[l] = '' elif parsed_links[l] != '' and parsed_links[l] in keywords: keywords[parsed_links[l]]['users'].add(user_name) print(f'{len(all_links)} to parse...') p = Pool(NUM_WORKERS) kw_tuples = p.starmap(get_keywords_pmap, all_links) for user_name, c_link, l, kws, p_time, title in kw_tuples: if c_link is not None: parsed_links[l] = c_link keywords[c_link]['kws'] = kws keywords[c_link]['time'] = p_time if 'users' not in keywords[c_link]: keywords[c_link]['users'] = set() keywords[c_link]['users'].add(user_name) keywords[c_link]['title'] = title else: parsed_links[l] = '' # Make the keywords dict serializable. for c_link in keywords: keywords[c_link]['users'] = list(keywords[c_link]['users']) keywords[c_link]['kws'] = list(keywords[c_link]['kws']) writej(keywords, KEYWORDS_PATH) writej(parsed_links, PARSED_LINKS_PATH)
def write_user_names(): user_names = {} files = [f for f in os.listdir(PREFIX) if f.find('_user')>0] for f_name in files: try: obj = loadj(PREFIX+f_name) user_names[obj['id_str']] = obj['screen_name'] except Exception as e: print(f_name, e) writej(user_names, PREFIX + 'user_names') # Write the reverse dict as well. user_ids = {} for user_id, name in user_names.items(): user_ids[name] = user_id writej(user_ids, PREFIX + 'user_ids')
async def write_followers(user_id, client): file_path = PREFIX + user_id + '_followers' if os.path.exists(file_path): return print(f'Getting followers for {user_id}...') followers_ids = client.api.followers.ids.get.iterator.with_cursor( id=user_id, count=MAX_SEED_FOLLOWER_COUNT) try: followers = [] async for data in followers_ids: followers.extend(data.ids) writej(followers, file_path, overwrite=False) except Exception as e: print(e) print(f'Done getting followers for {user_id}.') return
async def get_user_by_name(screen_name, client): res = await client.api.users.lookup.get(screen_name=screen_name) user = res[0] file_path = PREFIX + user['id_str'] + '_user' writej(user, file_path, overwrite=False) return user
def flush_cache(app): writej(app['link_cache'], PREFIX + 'link_cache.json')