def find_nearest_wrf0_station(origin_csv, wrf0_stations_csv): origins = read_csv(origin_csv) wrf0_stations = read_csv(wrf0_stations_csv) nearest_wrf0_stations_list = [[ 'origin_id', 'origin_name', 'nearest_wrf0_station_id', 'dist' ]] for origin_index in range(len(origins)): nearest_wrf0_station = [ origins[origin_index][2], "{}_{}".format(origins[origin_index][3], origins[origin_index][1]) ] origin_lat = float(origins[origin_index][4]) origin_lng = float(origins[origin_index][5]) distances = {} for wrf0_index in range(len(wrf0_stations)): lat = float(wrf0_stations[wrf0_index][2]) lng = float(wrf0_stations[wrf0_index][1]) intermediate_value = cos(radians(origin_lat)) * cos( radians(lat)) * cos(radians(lng) - radians(origin_lng)) + sin( radians(origin_lat)) * sin(radians(lat)) if intermediate_value < 1: distance = 6371 * acos(intermediate_value) else: distance = 6371 * acos(1) distances[wrf0_stations[wrf0_index][0]] = distance sorted_distances = collections.OrderedDict( sorted(distances.items(), key=operator.itemgetter(1))[:10]) count = 0 for key in sorted_distances.keys(): if count < 1: nearest_wrf0_station.extend([key, sorted_distances.get(key)]) count += 1 print(nearest_wrf0_station) nearest_wrf0_stations_list.append(nearest_wrf0_station) create_csv('obs_wrf0_stations_mapping.csv', nearest_wrf0_stations_list)
def find_nearest_d03_station_for_flo2d_grids(flo2d_stations_csv, d03_stations_csv): flo2d_grids = read_csv(flo2d_stations_csv) d03_stations = read_csv(d03_stations_csv) nearest_d03_stations_list = [[ 'flo2d_grid_id', 'nearest_d03_station_id', 'dist' ]] for origin_index in range(len(flo2d_grids)): nearest_d03_station = [flo2d_grids[origin_index][0]] origin_lat = float(flo2d_grids[origin_index][2]) origin_lng = float(flo2d_grids[origin_index][1]) distances = {} for d03_index in range(len(d03_stations)): lat = float(d03_stations[d03_index][1]) lng = float(d03_stations[d03_index][2]) intermediate_value = cos(radians(origin_lat)) * cos( radians(lat)) * cos(radians(lng) - radians(origin_lng)) + sin( radians(origin_lat)) * sin(radians(lat)) if intermediate_value < 1: distance = 6371 * acos(intermediate_value) else: distance = 6371 * acos(1) distances[d03_stations[d03_index][0]] = distance sorted_distances = collections.OrderedDict( sorted(distances.items(), key=operator.itemgetter(1))[:10]) count = 0 for key in sorted_distances.keys(): if count < 1: nearest_d03_station.extend([key, sorted_distances.get(key)]) count += 1 print(nearest_d03_station) nearest_d03_stations_list.append(nearest_d03_station) create_csv('MDPA_flo2d_30_d03_stations_mapping.csv', nearest_d03_stations_list)
def generate_rain_files(active_obs_stations_file, start_time, end_time): # Connect to the database connection = pymysql.connect(host='104.198.0.87', user='', password='', db='', charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) active_obs_stations = read_csv(active_obs_stations_file) stations_dict = {} for obs_index in range(len(active_obs_stations)): stations_dict[active_obs_stations[obs_index] [2]] = active_obs_stations[obs_index][0] obs_timeseries = extract_rain_obs(connection=connection, stations_dict=stations_dict, start_time=start_time, end_time=end_time) for obs_index in range(len(active_obs_stations)): data = [['time', 'value']] station_id = active_obs_stations[obs_index][2] for i in range(len(obs_timeseries[station_id])): data.append(obs_timeseries[station_id][i]) create_csv( '{}_{}_{}_{}'.format(active_obs_stations[obs_index][3], active_obs_stations[obs_index][1], start_time, end_time), data)
def format_rain(csv_file, start): timeseries = read_csv(csv_file) rain_dat = [] total_rain = 0 cumulative_timeseries = [] for i in range(len(timeseries)): total_rain += float(timeseries[i][1]) cumulative_timeseries.append(total_rain) for i in range(len(timeseries)): time_col = ((datetime.strptime(timeseries[i][0], DATE_TIME_FORMAT) - start).total_seconds()) / 3600 rain_col = float(timeseries[i][1]) / total_rain rain_dat.append("R {} {}".format( '%.4f' % time_col, cumulative_timeseries[i] / total_rain)) rain_dat.insert(0, "R 0 0") rain_dat.insert(0, "{} 5 0 0".format(total_rain)) rain_dat.insert(0, "0 0 ") write_to_file("RAIN.DAT", rain_dat)
def find_nearest_obs_stations_for_flo2d_stations(flo2d_stations_csv, obs_stations_csv): obs_stations = read_csv(obs_stations_csv) flo2d_station = read_csv(flo2d_stations_csv) flo2d_obs_mapping_list = [[ 'flo2d_250_station_id', 'ob_1_id', 'ob_1_dist', 'ob_2_id', 'ob_2_dist', 'ob_3_id', 'ob_3_dist', 'ob_4_id', 'ob_4_dist', 'ob_5_id', 'ob_5_dist', 'ob_6_id', 'ob_6_dist', 'ob_7_id', 'ob_7_dist', 'ob_8_id', 'ob_8_dist', 'ob_9_id', 'ob_9_dist', 'ob_10_id', 'ob_10_dist' ]] for flo2d_index in range(len(flo2d_station)): flo2d_obs_mapping = [flo2d_station[flo2d_index][0]] flo2d_lat = float(flo2d_station[flo2d_index][2]) flo2d_lng = float(flo2d_station[flo2d_index][1]) distances = {} for obs_index in range(len(obs_stations)): lat = float(obs_stations[obs_index][4]) lng = float(obs_stations[obs_index][5]) intermediate_value = cos(radians(flo2d_lat)) * cos( radians(lat)) * cos(radians(lng) - radians(flo2d_lng)) + sin( radians(flo2d_lat)) * sin(radians(lat)) if intermediate_value < 1: distance = 6371 * acos(intermediate_value) else: distance = 6371 * acos(1) distances[obs_stations[obs_index][2]] = distance sorted_distances = collections.OrderedDict( sorted(distances.items(), key=operator.itemgetter(1))[:10]) for key in sorted_distances.keys(): flo2d_obs_mapping.extend([key, sorted_distances.get(key)]) print(flo2d_obs_mapping) flo2d_obs_mapping_list.append(flo2d_obs_mapping) create_csv('MDPA_flo2d_30_obs_mapping.csv', flo2d_obs_mapping_list)
def get_display_names(): try: names = csv_utils.read_csv(get_names_file_file_name()) return names except FileNotFoundError: init.create_names_file() return get_display_names() pass
def get_problem_table(): try: table = csv_utils.read_csv(get_problem_table_file_name()) for i in range(0, len(table)): table[i][constants.ProblemTableStruct.ID.value] = int(table[i][constants.ProblemTableStruct.ID.value]) return table except FileNotFoundError: init.create_problems_table() return get_problem_table()
def get_all_attempts(problem_id): try: attempts = csv_utils.read_csv(get_problem_file_name(problem_id)) for i in range(0, len(attempts)): attempts[i][constants.ProblemFileStruct.PERCENT.value] = \ float(attempts[i][constants.ProblemFileStruct.PERCENT.value]) return attempts except FileNotFoundError: create_problem_file(problem_id) return get_all_attempts(problem_id)
def process_data( dataset_dir, model_type, dataset_type, dataset_id, #layer, csv_filename, num_classes, num_procs): print("Generating new files!") #open files try: csv_contents = read_csv(csv_filename) except: print("ERROR: Cannot open CSV file: " + csv_filename) b_dir_name = os.path.join( dataset_dir, 'b_{0}_{1}_{2}'.format(model_type, dataset_type, dataset_id)) sp_dir_name = os.path.join( dataset_dir, 'sp_{0}_{1}_{2}'.format(model_type, dataset_type, dataset_id)) pp_dir_name = os.path.join( dataset_dir, 'pp_{0}_{1}_{2}'.format(model_type, dataset_type, dataset_id)) if (not os.path.exists(sp_dir_name)): os.makedirs(sp_dir_name) if (not os.path.exists(pp_dir_name)): os.makedirs(pp_dir_name) print("Organizing csv_contents") for ex in csv_contents: ex['b_path'] = os.path.join(b_dir_name, '{0}.b'.format(ex['example_id'])) ex['sp_path'] = os.path.join(sp_dir_name, '{0}.npy'.format(ex['example_id'])) ex['pp_path'] = os.path.join(pp_dir_name, '{0}.npz'.format(ex['example_id'])) ''' ex['b_path'] = os.path.join(b_dir_name, '{0}_{1}.b'.format(ex['example_id'], layer)) ex['sp_path'] = os.path.join(sp_dir_name, '{0}_{1}.npy'.format(ex['example_id'], layer)) ex['pp_path'] = os.path.join(pp_dir_name, '{0}_{1}.npz'.format(ex['example_id'], layer)) ''' dataset = [ex for ex in csv_contents if ex['label'] < num_classes] #print("dataset_length:", len(dataset), len([x for x in os.listdir(sp_dir_name) if "_3." in x])) print("dataset_length:", len(dataset), len(os.listdir(pp_dir_name))) #dataset = dataset[:41] # CONVERT BINARY EVENTS TO ITRS convert_event_to_itr(dataset, num_procs=num_procs)
def get_global_leader_board(): try: board = csv_utils.read_csv(get_global_leader_board_file_name()) for i in range(0, len(board)): board[i][constants.GlobalLeaderboardStruct.POINTS.value] = float( board[i][constants.GlobalLeaderboardStruct.POINTS.value]) board[i][constants.GlobalLeaderboardStruct.NO_PROBLEMS.value] = int( board[i][constants.GlobalLeaderboardStruct.NO_PROBLEMS.value]) board[i][constants.GlobalLeaderboardStruct.AVERAGE_PERCENT.value] = float( board[i][constants.GlobalLeaderboardStruct.AVERAGE_PERCENT.value]) return board except FileNotFoundError: init.create_leaderboard() return get_global_leader_board()
def get_config_vars(): try: config_vars = csv_utils.read_csv(get_config_file_name()) casted_vars = [] for read_var in config_vars: for actual_var in ConfigVars: if actual_var.value.var_name() == read_var[ constants.ConfigFileStruct.VAR_NAME.value]: temp_var = read_var temp_var[constants.ConfigFileStruct.VAR_VALUE.value] = \ actual_var.value.type_func()(read_var[constants.ConfigFileStruct.VAR_VALUE.value]) casted_vars.append(temp_var) return casted_vars except FileNotFoundError: init.create_config_file() return get_config_vars()
def main(model_type, dataset_dir, csv_filename, dataset_type, dataset_id, max_features, num_procs): if (model_type == 'i3d'): from gi3d_wrapper import DEPTH_SIZE, CNN_FEATURE_COUNT if (model_type == 'trn'): from trn_wrapper import DEPTH_SIZE, CNN_FEATURE_COUNT if (model_type == 'tsm'): from tsm_wrapper3 import DEPTH_SIZE, CNN_FEATURE_COUNT dataset_type_list = [] if (dataset_type == "frames" or dataset_type == "both"): dataset_type_list.append("frames") if (dataset_type == "flow" or dataset_type == "both"): dataset_type_list.append("flow") #get files from dataset csv_contents = read_csv(csv_filename) #num_features = [min(feat_num, max_features) for feat_num in CNN_FEATURE_COUNT] num_features = 128 #[min(feat_num, max_features) for feat_num in CNN_FEATURE_COUNT] for ex in csv_contents: ''' for layer in range(DEPTH_SIZE): #get IAD files for read for dtype in dataset_type_list: iad_path = 'iad_path_{0}_{1}'.format(dtype, layer) ex[iad_path] = os.path.join(dataset_dir, 'iad_{0}_{1}_{2}'.format(model_type, dtype, dataset_id), ex['label_name'], '{0}_{1}.npz'.format(ex['example_id'], layer)) assert os.path.exists(ex[iad_path]), "Cannot locate IAD file: "+ ex[iad_path] #generate binary directory for write bin_dir = os.path.join(dataset_dir, 'b_{0}_{1}_{2}'.format(model_type, dataset_type, dataset_id), ex['label_name']) if ( not os.path.exists(bin_dir) ): os.makedirs(bin_dir) bin_path = 'b_path_{0}'.format(layer) ex[bin_path] = os.path.join(bin_dir, '{0}_{1}.b'.format(ex['example_id'], layer)) ''' #get IAD files for read (no layer) for dtype in dataset_type_list: iad_path = 'iad_path_{0}'.format(dtype) ex[iad_path] = os.path.join( dataset_dir, 'iad_{0}_{1}_{2}'.format(model_type, dtype, dataset_id), ex['label_name'], '{0}.npz'.format(ex['example_id'])) assert os.path.exists( ex[iad_path]), "Cannot locate IAD file: " + ex[iad_path] #generate binary directory for write bin_dir = os.path.join( dataset_dir, 'b_{0}_{1}_{2}'.format(model_type, dataset_type, dataset_id), ex['label_name']) if (not os.path.exists(bin_dir)): os.makedirs(bin_dir) bin_path = 'b_path' ex[bin_path] = os.path.join(bin_dir, '{0}.b'.format(ex['example_id'])) #csv_contents = csv_contents p = Pool(num_procs) #get the threshold values for each feature in the training dataset training_dataset = [ ex for ex in csv_contents if ex['dataset_id'] >= dataset_id ] other_args = [DEPTH_SIZE, dataset_type_list, num_features] print("Getting Threshold") print("other_args:", other_args) split_threshold_info = split_dataset_run_func(p, determine_threshold, training_dataset, other_args) #combine chunked threshodl info together threshold_matrix = np.zeros((DEPTH_SIZE, max_features)) threshold_count = np.zeros((DEPTH_SIZE, max_features)) for x in split_threshold_info: #for layer in range(DEPTH_SIZE): layer = 0 for feature in range(num_features): threshold_matrix[ layer, feature] += x[layer][feature].mean * x[layer][feature].count threshold_count[layer, feature] += x[layer][feature].count threshold_count[np.where(threshold_count == 0)] = 1 threshold_matrix /= threshold_count filename = os.path.join( dataset_dir, 'b_{0}_{1}_{2}'.format(model_type, dataset_type, dataset_id), 'threshold_values.npy') np.save(filename, threshold_matrix) assert os.path.exists(filename), "filename cannot be found: " + filename threshold_matrix = np.load(filename) #process the IADs and save the parsed files full_dataset = [ ex for ex in csv_contents if ex['dataset_id'] >= dataset_id or ex['dataset_id'] == 0 ] other_args = [ DEPTH_SIZE, dataset_type_list, threshold_matrix, num_features ] print("Converting to Binary") split_dataset_run_func(p, sparsify_iad_dataset, full_dataset, other_args)
from csv_utils import read_csv titles = [ "Air Pressure", "Water vapor pressure", "Relative air humidity", "Specific air humidity", "Average cloud cover", "Temperature", "Wind speed", "Downfall", "Cloudy weather", ] date_time_key = "date" df = read_csv('../../data/processed/processed_data.csv') x = df[[clean_string(titles[1])]] def show_raw_visualization(data): Path("../../plots/data").mkdir(parents=True, exist_ok=True) for i in titles: title = clean_string(i) x = data[[title]] fig = x.plot().get_figure() fig.savefig("../../plots/data/{}.png".format(title)) def show_heatmap(data): plt.matshow(data.corr()) plt.xticks(range(data.shape[1]), data.columns, fontsize=14, rotation=90)
def main(model_type, dataset_dir, csv_filename, dataset_type, dataset_id, num_features, num_procs): if (model_type == 'i3d'): from gi3d_wrapper import depth_size if (model_type == 'trn'): from trn_wrapper import depth_size if (model_type == 'tsm'): from tsm_wrapper import depth_size dataset_type_list = [] if (dataset_type == "frames" or dataset_type == "both"): dataset_type_list.append("frames") if (dataset_type == "flow" or dataset_type == "both"): dataset_type_list.append("flow") #get files from dataset csv_contents = read_csv(csv_filename)[:23] for ex in csv_contents: print(ex['example_id']) for layer in range(depth_size): #get IAD files for read for dtype in dataset_type_list: iad_path = 'iad_path_{0}_{1}'.format(dtype, layer) ex[iad_path] = os.path.join( dataset_dir, 'iad_{0}_{1}_{2}'.format(model_type, dtype, dataset_id), ex['label_name'], '{0}_{1}.npz'.format(ex['example_id'], layer)) assert os.path.exists( ex[iad_path]), "Cannot locate IAD file: " + ex[iad_path] #generate txt directory for write txt_dir = os.path.join( dataset_dir, 'txt_{0}_{1}_{2}'.format(model_type, dataset_type, dataset_id), ex['label_name']) if (not os.path.exists(txt_dir)): os.makedirs(txt_dir) txt_path = 'txt_path_{0}'.format(layer) ex[txt_path] = os.path.join( txt_dir, '{0}_{1}.txt'.format(ex['example_id'], layer)) p = Pool(num_procs) #get the threshold values for each feature in the training dataset training_dataset = [ ex for ex in csv_contents if ex['dataset_id'] >= dataset_id ][:20] other_args = [depth_size, dataset_type_list, num_features] split_threshold_info = split_dataset_run_func(p, determine_threshold, training_dataset, other_args) #determine_threshold((training_dataset, depth_size,dataset_type_list,num_features)) #combine chunked threshodl info together threshold_matrix = np.zeros((depth_size, num_features)) for x in split_threshold_info: for layer in range(depth_size): for feature in range(num_features): threshold_matrix[layer, feature] += x[layer][feature].mean * x[ layer][feature].count '''
else: print(data[METADATA_PERSON_ID] + ' blacklisted') return outdata if __name__ == "__main__": if len(sys.argv) != 2: print('You did not enter metadata file path') print('E.g.: python rgbd_match.py metadata_path') sys.exit(1) metadata_file = sys.argv[1] # Create a map indata = read_csv(metadata_file) indata = blacklist_invalid(indata) size = len(indata) mapping = {} for index in range(1, size): data = indata[index] if data[METADATA_EXTENSION] != '.jpg': continue scanid = data[METADATA_SCAN_ID] order = data[METADATA_ORDER] key = scanid + str(order) mapping[key] = data # For every depthmap add rgb file output = [] processed = {}
def main(dataset_dir, csv_filename, dataset_type, dataset_id, feature_retain_count): datatset_type_list = [] if (dataset_type == "frames" or dataset_type == "both"): datatset_type_list.append("frames") if (dataset_type == "flow" or dataset_type == "both"): datatset_type_list.append("flow") #setup feature_rank_parser frame_ranking_file = os.path.join( dataset_dir, 'iad_frames_' + str(dataset_id), "feature_ranks_" + str(dataset_id) + ".npz") flow_ranking_file = os.path.join( dataset_dir, 'iad_flow_' + str(dataset_id), "feature_ranks_" + str(dataset_id) + ".npz") pruning_indexes = {} if (dataset_type == "frames"): assert os.path.exists( frame_ranking_file ), "Cannot locate Feature Ranking file: " + frame_ranking_file pruning_indexes["frames"] = get_top_n_feature_indexes( frame_ranking_file, feature_retain_count) elif (dataset_type == "flow"): assert os.path.exists( flow_ranking_file ), "Cannot locate Feature Ranking file: " + flow_ranking_file pruning_indexes["flow"] = get_top_n_feature_indexes( flow_ranking_file, feature_retain_count) elif (dataset_type == "both"): assert os.path.exists( frame_ranking_file ), "Cannot locate Feature Ranking file: " + frame_ranking_file assert os.path.exists( flow_ranking_file ), "Cannot locate Feature Ranking file: " + flow_ranking_file pruning_indexes = get_top_n_feature_indexes_combined( frame_ranking_file, flow_ranking_file, feature_retain_count) #setup file-io txt_path = os.path.join(dataset_dir, 'txt_' + dataset_type + '_' + str(dataset_id)) if (not os.path.exists(txt_path)): os.makedirs(txt_path) #get files from dataset try: csv_contents = read_csv(csv_filename) except: print("ERROR: Cannot open CSV file: " + csv_filename) file_list = [ ex for ex in csv_contents if ex['dataset_id'] >= dataset_id or ex['dataset_id'] == 0 ] for ex in file_list: file_location = os.path.join(ex['label_name'], ex['example_id']) print("Converting " + file_location) for layer in range(5): iad_filenames = {} for dt in datatset_type_list: iad_filenames[dt] = os.path.join( dataset_dir, 'iad_' + dt + '_' + str(dataset_id), file_location + "_" + str(layer) + ".npz") assert os.path.exists( iad_filenames[dt] ), "Cannot locate IAD file: " + iad_filenames[dt] label_dir = os.path.join(txt_path, str(layer), ex['label_name']) if (not os.path.exists(label_dir)): os.makedirs(label_dir) txt_filename = os.path.join( txt_path, str(layer), file_location + "_" + str(layer) + ".txt") sparsify_iad(datatset_type_list, iad_filenames, pruning_indexes, layer, name=txt_filename) sparsify_iad2(datatset_type_list, iad_filenames, pruning_indexes, layer, name=txt_filename)
def test_read_csv_good_case(self): file_path = self._create_test_csv( ['label1,label2,label3', '1,2,3', '4,5,6']) self.assertListEqual( [['label1', 'label2', 'label3'], ['1', '2', '3'], ['4', '5', '6']], csv_utils.read_csv(file_path))
def read_csv(file_name): return csv_utils.read_csv(os.path.join(DATA_DIRECTORY, file_name))
def main(dataset_dir, csv_filename, dataset_type, dataset_id, feature_retain_count): datatset_type_list = [] if(dataset_type=="frames" or dataset_type=="both"): datatset_type_list.append("frames") if(dataset_type=="flow" or dataset_type=="both"): datatset_type_list.append("flow") #setup feature_rank_parser frame_ranking_file = os.path.join( dataset_dir, 'iad_frames_'+str(dataset_id), "feature_ranks_"+str(dataset_id)+".npz") flow_ranking_file = os.path.join( dataset_dir, 'iad_flow_'+str(dataset_id), "feature_ranks_"+str(dataset_id)+".npz") pruning_indexes = {} if(dataset_type=="frames"): assert os.path.exists(frame_ranking_file), "Cannot locate Feature Ranking file: "+ frame_ranking_file pruning_indexes["frames"] = get_top_n_feature_indexes(frame_ranking_file, feature_retain_count) elif(dataset_type=="flow"): assert os.path.exists(flow_ranking_file), "Cannot locate Feature Ranking file: "+ flow_ranking_file pruning_indexes["flow"] = get_top_n_feature_indexes(flow_ranking_file, feature_retain_count) elif(dataset_type=="both"): assert os.path.exists(frame_ranking_file), "Cannot locate Feature Ranking file: "+ frame_ranking_file assert os.path.exists(flow_ranking_file), "Cannot locate Feature Ranking file: "+ flow_ranking_file if(save_name == "ucf"): if(dataset_id == 1): # UCF 1 -> waiting to finish training weight_ranking = [[0.177901,0.334655,0.437483,0.801745,0.916997],[0.299762,0.409992,0.519958,0.7917,0.911182]] if(dataset_id == 2): # UCF 2* - > training weight_ranking = [[0.120804,0.24663,0.31483,0.674861,0.842982],[0.208829,0.256675,0.354216,0.557494,0.6894]] if(dataset_id == 3): # UCF 3* - > training weight_ranking = [[0.100449,0.204335,0.270949,0.528152,0.726408],[0.161248,0.187946,0.21438,0.378007,0.473169]] if(save_name == "hmdb"): if(dataset_id == 1): # HMDB 1* - > training weight_ranking = [[0.085621,0.201307,0.21634,0.527451,0.675817],[0.190196,0.233333,0.263399,0.296732,0.332026]] if(dataset_id == 2): # HMDB 2* - > finished weight_ranking = [[0.075163,0.145752,0.169281,0.365359,0.566013],[0.131373,0.184314,0.205882,0.282353,0.443791]] if(dataset_id == 3): # HMDB 3* - > finished weight_ranking = [[0.054248,0.127451,0.14183,0.25098,0.462092],[0.10915,0.145098,0.137255,0.138562,0.231373]] if(save_name == "bm"): if(dataset_id == 1): # BLOCKMOVING 1* - > finished weight_ranking = [[0.921875,0.9296875,0.9296875,0.6171875,0.578125],[0.828125,0.898438,0.945313,0.945313,0.953125]] if(dataset_id == 2): # BLOCKMOVING 2* - > finished weight_ranking = [[0.765625,0.8125,0.859375,0.671875,0.695313],[0.742188,0.835938,0.898438,0.875,0.835938]] if(dataset_id == 3): # BLOCKMOVING 3* - > finished weight_ranking = [[0.671875,0.679688,0.742188,0.609375,0.539063],[0.703125,0.71875,0.84375,0.75,0.742188]] pruning_indexes = get_top_n_feature_indexes_combined(frame_ranking_file, flow_ranking_file, feature_retain_count, weight_ranking) #setup file-io txt_path = os.path.join(dataset_dir, 'txt_'+dataset_type+'_'+str(dataset_id)) if(not os.path.exists(txt_path)): os.makedirs(txt_path) #get files from dataset try: csv_contents = read_csv(csv_filename) except: print("ERROR: Cannot open CSV file: "+ csv_filename) global_threshold_values = {"mean": [], "std_dev":[], "count":[]} for i in range(5): global_threshold_values["mean"].append([0]*feature_retain_count) global_threshold_values["std_dev"].append([0]*feature_retain_count) global_threshold_values["count"].append(0) train_list = [ex for ex in csv_contents if ex['dataset_id'] >= dataset_id] file_list = [ex for ex in csv_contents if ex['dataset_id'] >= dataset_id or ex['dataset_id'] == 0] for ex in train_list: file_location = os.path.join(ex['label_name'], ex['example_id']) print("Converting "+file_location) for layer in range(5): iad_filenames = {} for dt in datatset_type_list: iad_filenames[dt] = os.path.join(dataset_dir, 'iad_'+dt+'_'+str(dataset_id), file_location+"_"+str(layer)+".npz") assert os.path.exists(iad_filenames[dt]), "Cannot locate IAD file: "+ iad_filenames[dt] txt_filename = os.path.join(txt_path, str(layer), file_location+"_"+str(layer)+".txt") add_to_global_threshold(datatset_type_list, iad_filenames, pruning_indexes, layer, global_threshold_values) ''' for i in range(5): print(str(i)) print("mean:", global_threshold_values["mean"][i]) print("std_dev:", global_threshold_values["std_dev"][i]) print("count:", global_threshold_values["count"][i]) print('') ''' for ex in file_list: file_location = os.path.join(ex['label_name'], ex['example_id']) print("Converting "+file_location) for layer in range(5): iad_filenames = {} for dt in datatset_type_list: iad_filenames[dt] = os.path.join(dataset_dir, 'iad_'+dt+'_'+str(dataset_id), file_location+"_"+str(layer)+".npz") assert os.path.exists(iad_filenames[dt]), "Cannot locate IAD file: "+ iad_filenames[dt] label_dir = os.path.join(txt_path, str(layer),ex['label_name']) if ( not os.path.exists(label_dir) ): os.makedirs(label_dir) txt_filename = os.path.join(txt_path, str(layer), file_location+"_"+str(layer)+".txt") sparsify_iad(datatset_type_list, iad_filenames, pruning_indexes, layer, global_threshold_values, name=txt_filename)