def get_social_encounters(social_path, encounters_path, dest_path, save_file=True): print 'loading social graph:', social_path social_graph = utils.load_pickle(social_path) print 'loading encounters graph:', encounters_path encs_graph = utils.load_pickle(encounters_path) encs_edges_set = set(encs_graph.edges()) df_dict = {constants.USER_1: [], constants.USER_2: [], constants.ENCS_COUNT: [], constants.SOC_DIST: []} print 'iterating through', len(encs_edges_set), 'edges.' disconnected_edges = 0 count = 0 hund = len(encs_edges_set)/100 for source, dest in encs_edges_set: count += 1 if count % hund == 0: print float(count)/(hund*100) encs_count = len(encs_graph[source][dest]) df_dict[constants.USER_1].append(source) df_dict[constants.USER_2].append(dest) df_dict[constants.ENCS_COUNT].append(encs_count) try: distance = nx.shortest_path_length(social_graph, source, dest) df_dict[constants.SOC_DIST].append(distance) except Exception as e: disconnected_edges += 1 df_dict[constants.SOC_DIST].append(NO_PATH) pass print 'edges without a social connection:', disconnected_edges dataframe = pd.DataFrame(df_dict) if save_file: dataframe.to_csv(dest_path, index=False) return dataframe
def get_social_encounters(social_path_0, social_path_1, social_path_2, encounters_path, dest_path, save_file=True): print 'loading social graph:', social_path_0 social_graph_0 = utils.load_pickle(social_path_0) print 'loading social graph:', social_path_1 social_graph_1 = utils.load_pickle(social_path_1) print 'loading social graph:', social_path_2 social_graph_2 = utils.load_pickle(social_path_2) print 'loading encounters graph:', encounters_path encs_graph = utils.load_pickle(encounters_path) encs_edges_set = set(encs_graph.edges()) df_dict = {constants.USER_1: [], constants.USER_2: [], constants.ENCS_COUNT: [], constants.MODE_0_DIST: [], constants.MODE_1_DIST: [], constants.MODE_2_DIST: []} print 'iterating through', len(encs_edges_set), 'edges.' disconnected_edges = 0 count = 0 hund = len(encs_edges_set)/100 for source, dest in encs_edges_set: count += 1 if count % hund == 0: print float(count)/(hund*100) encs_count = len(encs_graph[source][dest]) df_dict[constants.USER_1].append(source) df_dict[constants.USER_2].append(dest) df_dict[constants.ENCS_COUNT].append(encs_count) if (source in social_graph_0) and (dest in social_graph_0) and (nx.has_path(social_graph_0, source, dest)): distance_0 = nx.shortest_path_length(social_graph_0, source, dest) df_dict[constants.MODE_0_DIST].append(distance_0) else: df_dict[constants.MODE_0_DIST].append(NO_PATH) if (source in social_graph_1) and (dest in social_graph_1) and (nx.has_path(social_graph_1, source, dest)): distance_1 = nx.shortest_path_length(social_graph_1, source, dest) df_dict[constants.MODE_1_DIST].append(distance_1) else: df_dict[constants.MODE_1_DIST].append(NO_PATH) if (source in social_graph_2) and (dest in social_graph_2) and (nx.has_path(social_graph_2, source, dest)): distance_2 = nx.shortest_path_length(social_graph_2, source, dest) df_dict[constants.MODE_2_DIST].append(distance_2) else: df_dict[constants.MODE_2_DIST].append(NO_PATH) print 'edges without a social connection:', disconnected_edges dataframe = pd.DataFrame(df_dict) if save_file: dataframe.to_csv(dest_path, index=False) return dataframe
def combine_maps_for_months(data_dir='/home/niquo/niquo_data', months_paths=USE_MONTHS): print 'combing interactin graphs for data from:', data_dir TOWER_ENCS = 'tower_encounters' master_graph_filename = 'MASTER_GRAPH.p' month_master_paths = [] for dir_str in months_paths: month_path = os.path.join(data_dir, dir_str) print 'combing for month path:', month_path inter_map_obj = imap.InteractionMap(month_path) tower_data_path = os.path.join(month_path, TOWER_ENCS) month_master = os.path.join(tower_data_path, master_graph_filename) month_master_paths.append(month_master) inter_map_obj.combine_all_graphs(tower_data_path) general_imap = imap.InteractionMap(data_dir) print 'made a general imap object for:', data_dir dest_filename = os.path.join(data_dir, master_graph_filename) for m_mast in month_master_paths: print 'combing map from:', m_mast month_graph = utils.load_pickle(m_mast) general_imap.combine_maps(month_graph) print 'storing data:', dest_filename general_imap.store_data(dest_filename) print 'complete.' return True
def create_encs_df_select_friends(first_call_csv, root_path, dest_filename=constants.PAIRS_CSV): encs_path = os.path.join(root_path, constants.ENCS_DICT) print 'loading all encounter pairs from:', encs_path encs_dict = utils.load_pickle(encs_path) friend_df = split_users_first_call_csv(first_call_csv) friends_set = set([(user1, user2) for user1, user2 in friend_df[ [constants.USER_1, constants.USER_2]].values]) intersection_pairs = friends_set.intersection(set(encs_dict.keys())) relevant_encs = {k: encs_dict[k] for k in intersection_pairs} mode_0_path = os.path.join(root_path, constants.MODE_0_GRAPH) print 'loading graph mode 0:', mode_0_path mode_0_graph = utils.load_pickle(mode_0_path).to_undirected() mode_1_path = os.path.join(root_path, constants.MODE_1_GRAPH) print 'loading graph mode 1:', mode_1_path mode_1_graph = utils.load_pickle(mode_1_path).to_undirected() mode_2_path = os.path.join(root_path, constants.MODE_2_GRAPH) print 'loading graph mode 2:', mode_2_path mode_2_graph = utils.load_pickle(mode_2_path).to_undirected() friend_df[constants.ENCS_COUNT] = friend_df.apply( lambda row: apply_encs(relevant_encs, row), axis=1) friend_df[constants.MODE_0_DIST] = friend_df.apply( lambda row: apply_distance(mode_0_graph, row), axis=1) friend_df[constants.MODE_1_DIST] = friend_df.apply( lambda row: apply_distance(mode_1_graph, row), axis=1) friend_df[constants.MODE_2_DIST] = friend_df.apply( lambda row: apply_distance(mode_2_graph, row), axis=1) friend_df = friend_df[friend_df[constants.ENCS_COUNT] >= 0] dest_path = os.path.join(root_path, dest_filename) print 'storing dataframe at ', dest_path friend_df.to_csv(dest_path, index=False) return friend_df
def get_encounters_for_pairs(root_path, dest_path): encs_dict = {} root = root_path.split('/')[-1] encs_path = os.path.join(root_path, TOWER_ENCS_DIR) print '**************************' print 'entering root path:', root for day_dir in os.listdir(encs_path): day_path = os.path.join(encs_path, day_dir) print '________________' print 'opening day path:', day_path for tower_file in os.listdir(day_path): print 'opening tower file:', tower_file tower_path = os.path.join(day_path, tower_file) tower_graph = utils.load_pickle(tower_path) tower_edges = set(tower_graph.edges()) for user_1, user_2 in tower_edges: count = len(tower_graph[user_1][user_2]) add_encs_count(encs_dict, user_1, user_2, count) del tower_graph with open(dest_path, 'wb') as outfile: cPickle.dump(encs_dict, outfile) return encs_dict