def condense_data(root_path, partitioned_data_path, chunk_size=10, just_path=False): destination_path = utils.create_dir(root_path, 'condensed_data') if just_path: print 'skipping operation to just return directory path.' return destination_path condensed_file_path = RD.main(partitioned_data_path, destination_path, chunk_size=chunk_size) return condensed_file_path
def quick_script_generate(): dates_use = [ 'cdr_date_2016_07_25', 'cdr_date_2016_07_26', 'cdr_date_2016_07_27', 'cdr_date_2016_07_29', 'cdr_date_2016_07_30', 'cdr_date_2016_07_31' ] dates_dir = '../niquo_data/small_range/tower_encounters' data_path = '../niquo_data/small_range/condensed_data/cdr_data_1_31_time_10.csv' dest_dir = '../niquo_data/small_range/tower_encounters_REDUCED_V2' range_set = [(5, 10), (11, 20), (21, 50)] for lower, upper in range_set: print 'current range', lower, upper range_dir = utils.create_dir(dest_dir, 'counts_' + str(lower) + '_' + str(upper)) for d_dir in dates_use: graphs_dir = os.path.join(dates_dir, d_dir) print 'graphs dir:', graphs_dir final_dest_dir = utils.create_dir(range_dir, d_dir) make_smaller_graphs(data_path, graphs_dir, final_dest_dir, lower, upper, 24, 31) return True
def find_encounters(root_path, condensed_data_path, enc_window=10, just_path=False, user_pair_set=None): destination_path = utils.create_dir(root_path, 'tower_encounters') if just_path: print 'skipping operation to just return path.' return destination_path tpart = TP.TowersPartitioned(condensed_data_path, destination_path) tpart.pair_users_from_towers(enc_window=enc_window, user_pair_set=user_pair_set) return destination_path
def partition_data(root_path, data_path, delimiter=',', filter_func=utils.remove_foreigners, just_path=False): destination_path = utils.create_dir(root_path, 'partitioned_data') if just_path: print 'skipping operation to just return path.' return destination_path rawData = raw.RawCDRCSV(data_path) print 'beginning filtering on data from:', data_path print 'data will be stored at:', destination_path rawData.filter_and_partition(destination_path, delimiter=delimiter, filter_func=filter_func) return destination_path
def create_maps_for_months(data_dir='/home/niquo/niquo_data', months_paths=USE_MONTHS, new_friend_csv=constants.FIRST_CALL): print 'retreiving friend set from', new_friend_csv pair_set = get_new_friend_pair_set(new_friend_csv) friend_set = get_new_friend_set(new_friend_csv, pair_set) month_filter = lambda row: row[constants.SOURCE] in friend_set chunk_size = 10 print 'iterating through months to start encounter process' for dir_str in months_paths: month = dir_str + '.csv' root_path = utils.create_dir(data_dir, dir_str) print 'current month root path:', root_path csv_month = os.path.join(constants.FILTERED_MONTHS, month) print 'current data path:', csv_month print 'partitioning data...' partitioned_data_path = Main.partition_data(root_path, csv_month, filter_func=month_filter) print 'condensing data...' condense_data_path = Main.condense_data(root_path, partitioned_data_path, chunk_size=10) print 'finding encoutners...' encs_path = Main.find_encounters(root_path, condense_data_path, enc_window=chunk_size, user_pair_set=pair_set) digraph_base_store_path = os.path.join(root_path, constants.BASE_DIGRAPH) print 'creating base directed graph to be stored at', digraph_base_store_path net.create_graph_directed(csv_month, digraph_base_store_path) for mode in range(3): filtered_graph_name = 'filtered_graph_mode_' + str(mode) + '.p' filt_graph_store_path = os.path.join(root_path, filtered_graph_name) print 'creating graph for mode', mode, 'to be stored at ', filtered_graph_name net.clean_dir_graph(digraph_base_store_path, filt_graph_store_path, mode) return True
def main(root_path, condensed_data_path, lower, upper): destination_path = utils.create_dir(root_path, 'tower_encounters') tpart = TowersPartitioned(condensed_data_path, destination_path) tpart.pair_users_from_towers(lower, upper) return None
def produce_larger_graphs(root_path, condensed_data_path, lower, upper): destination_path = utils.create_dir(root_path, 'tower_encounters') tpart = TowersPartitioned(condensed_data_path, destination_path) tpart.pair_users_from_towers(lower, upper, thresh_compare=operator.ge) return None