Example #1
0
def get_social_encounters(social_path, encounters_path, dest_path, save_file=True):
	print 'loading social graph:', social_path
	social_graph = utils.load_pickle(social_path)
	print 'loading encounters graph:', encounters_path
	encs_graph = utils.load_pickle(encounters_path)
	encs_edges_set = set(encs_graph.edges())
	df_dict = {constants.USER_1: [], constants.USER_2: [], constants.ENCS_COUNT: [], constants.SOC_DIST: []}
	print 'iterating through', len(encs_edges_set), 'edges.'
	disconnected_edges = 0
	count = 0
	hund = len(encs_edges_set)/100
	for source, dest in encs_edges_set:
		count += 1
		if count % hund == 0:
			print float(count)/(hund*100)
		encs_count = len(encs_graph[source][dest])
		df_dict[constants.USER_1].append(source)
		df_dict[constants.USER_2].append(dest)
		df_dict[constants.ENCS_COUNT].append(encs_count)
		try:
			distance = nx.shortest_path_length(social_graph, source, dest)
			df_dict[constants.SOC_DIST].append(distance)
		except Exception as e:
			disconnected_edges += 1
			df_dict[constants.SOC_DIST].append(NO_PATH)
			pass
	print 'edges without a social connection:', disconnected_edges
	dataframe = pd.DataFrame(df_dict)
	if save_file:
		dataframe.to_csv(dest_path, index=False)
	return dataframe
Example #2
0
def get_social_encounters(social_path_0, social_path_1, social_path_2, encounters_path, dest_path, save_file=True):
	print 'loading social graph:', social_path_0
	social_graph_0 = utils.load_pickle(social_path_0)
	print 'loading social graph:', social_path_1
	social_graph_1 = utils.load_pickle(social_path_1)
	print 'loading social graph:', social_path_2
	social_graph_2 = utils.load_pickle(social_path_2)

	print 'loading encounters graph:', encounters_path
	encs_graph = utils.load_pickle(encounters_path)
	encs_edges_set = set(encs_graph.edges())
	df_dict = {constants.USER_1: [], constants.USER_2: [], constants.ENCS_COUNT: [], constants.MODE_0_DIST: [],
				constants.MODE_1_DIST: [], constants.MODE_2_DIST: []}
	print 'iterating through', len(encs_edges_set), 'edges.'
	disconnected_edges = 0
	count = 0
	hund = len(encs_edges_set)/100
	for source, dest in encs_edges_set:
		count += 1
		if count % hund == 0:
			print float(count)/(hund*100)
		encs_count = len(encs_graph[source][dest])
		df_dict[constants.USER_1].append(source)
		df_dict[constants.USER_2].append(dest)
		df_dict[constants.ENCS_COUNT].append(encs_count)

		if (source in social_graph_0) and (dest in social_graph_0) and (nx.has_path(social_graph_0, source, dest)):
			distance_0 = nx.shortest_path_length(social_graph_0, source, dest)
			df_dict[constants.MODE_0_DIST].append(distance_0)
		else:
			df_dict[constants.MODE_0_DIST].append(NO_PATH)

		if (source in social_graph_1) and (dest in social_graph_1) and (nx.has_path(social_graph_1, source, dest)):
			distance_1 = nx.shortest_path_length(social_graph_1, source, dest)
			df_dict[constants.MODE_1_DIST].append(distance_1)
		else:
			df_dict[constants.MODE_1_DIST].append(NO_PATH)

		if (source in social_graph_2) and (dest in social_graph_2) and (nx.has_path(social_graph_2, source, dest)):
			distance_2 = nx.shortest_path_length(social_graph_2, source, dest)
			df_dict[constants.MODE_2_DIST].append(distance_2)
		else:
			df_dict[constants.MODE_2_DIST].append(NO_PATH)

	print 'edges without a social connection:', disconnected_edges
	dataframe = pd.DataFrame(df_dict)
	if save_file:
		dataframe.to_csv(dest_path, index=False)
	return dataframe
Example #3
0
def combine_maps_for_months(data_dir='/home/niquo/niquo_data',
                            months_paths=USE_MONTHS):
    print 'combing interactin graphs for data from:', data_dir
    TOWER_ENCS = 'tower_encounters'
    master_graph_filename = 'MASTER_GRAPH.p'
    month_master_paths = []
    for dir_str in months_paths:
        month_path = os.path.join(data_dir, dir_str)
        print 'combing for month path:', month_path
        inter_map_obj = imap.InteractionMap(month_path)
        tower_data_path = os.path.join(month_path, TOWER_ENCS)
        month_master = os.path.join(tower_data_path, master_graph_filename)
        month_master_paths.append(month_master)
        inter_map_obj.combine_all_graphs(tower_data_path)

    general_imap = imap.InteractionMap(data_dir)
    print 'made a general imap object for:', data_dir
    dest_filename = os.path.join(data_dir, master_graph_filename)
    for m_mast in month_master_paths:
        print 'combing map from:', m_mast
        month_graph = utils.load_pickle(m_mast)
        general_imap.combine_maps(month_graph)
    print 'storing data:', dest_filename
    general_imap.store_data(dest_filename)
    print 'complete.'
    return True
Example #4
0
def create_encs_df_select_friends(first_call_csv,
                                  root_path,
                                  dest_filename=constants.PAIRS_CSV):
    encs_path = os.path.join(root_path, constants.ENCS_DICT)
    print 'loading all encounter pairs from:', encs_path
    encs_dict = utils.load_pickle(encs_path)
    friend_df = split_users_first_call_csv(first_call_csv)
    friends_set = set([(user1, user2) for user1, user2 in friend_df[
        [constants.USER_1, constants.USER_2]].values])
    intersection_pairs = friends_set.intersection(set(encs_dict.keys()))
    relevant_encs = {k: encs_dict[k] for k in intersection_pairs}

    mode_0_path = os.path.join(root_path, constants.MODE_0_GRAPH)
    print 'loading graph mode 0:', mode_0_path
    mode_0_graph = utils.load_pickle(mode_0_path).to_undirected()

    mode_1_path = os.path.join(root_path, constants.MODE_1_GRAPH)
    print 'loading graph mode 1:', mode_1_path
    mode_1_graph = utils.load_pickle(mode_1_path).to_undirected()

    mode_2_path = os.path.join(root_path, constants.MODE_2_GRAPH)
    print 'loading graph mode 2:', mode_2_path
    mode_2_graph = utils.load_pickle(mode_2_path).to_undirected()

    friend_df[constants.ENCS_COUNT] = friend_df.apply(
        lambda row: apply_encs(relevant_encs, row), axis=1)
    friend_df[constants.MODE_0_DIST] = friend_df.apply(
        lambda row: apply_distance(mode_0_graph, row), axis=1)
    friend_df[constants.MODE_1_DIST] = friend_df.apply(
        lambda row: apply_distance(mode_1_graph, row), axis=1)
    friend_df[constants.MODE_2_DIST] = friend_df.apply(
        lambda row: apply_distance(mode_2_graph, row), axis=1)

    friend_df = friend_df[friend_df[constants.ENCS_COUNT] >= 0]
    dest_path = os.path.join(root_path, dest_filename)
    print 'storing dataframe at ', dest_path
    friend_df.to_csv(dest_path, index=False)
    return friend_df
Example #5
0
def get_encounters_for_pairs(root_path, dest_path):
	encs_dict = {}
	root = root_path.split('/')[-1]
	encs_path = os.path.join(root_path, TOWER_ENCS_DIR)
	print '**************************'
	print 'entering root path:', root
	for day_dir in os.listdir(encs_path):
		day_path = os.path.join(encs_path, day_dir)
		print '________________'
		print 'opening day path:', day_path
		for tower_file in os.listdir(day_path):
			print 'opening tower file:', tower_file
			tower_path = os.path.join(day_path, tower_file)
			tower_graph = utils.load_pickle(tower_path)
			tower_edges = set(tower_graph.edges())
			for user_1, user_2 in tower_edges:
				count = len(tower_graph[user_1][user_2])
				add_encs_count(encs_dict, user_1, user_2, count)
			del tower_graph
	with open(dest_path, 'wb') as outfile:
		cPickle.dump(encs_dict, outfile)
	return encs_dict