def find_nearest_wrf0_station(origin_csv, wrf0_stations_csv):

    origins = read_csv(origin_csv)

    wrf0_stations = read_csv(wrf0_stations_csv)

    nearest_wrf0_stations_list = [[
        'origin_id', 'origin_name', 'nearest_wrf0_station_id', 'dist'
    ]]

    for origin_index in range(len(origins)):

        nearest_wrf0_station = [
            origins[origin_index][2], "{}_{}".format(origins[origin_index][3],
                                                     origins[origin_index][1])
        ]

        origin_lat = float(origins[origin_index][4])
        origin_lng = float(origins[origin_index][5])

        distances = {}

        for wrf0_index in range(len(wrf0_stations)):
            lat = float(wrf0_stations[wrf0_index][2])
            lng = float(wrf0_stations[wrf0_index][1])

            intermediate_value = cos(radians(origin_lat)) * cos(
                radians(lat)) * cos(radians(lng) - radians(origin_lng)) + sin(
                    radians(origin_lat)) * sin(radians(lat))
            if intermediate_value < 1:
                distance = 6371 * acos(intermediate_value)
            else:
                distance = 6371 * acos(1)

            distances[wrf0_stations[wrf0_index][0]] = distance

        sorted_distances = collections.OrderedDict(
            sorted(distances.items(), key=operator.itemgetter(1))[:10])

        count = 0
        for key in sorted_distances.keys():
            if count < 1:
                nearest_wrf0_station.extend([key, sorted_distances.get(key)])
                count += 1

        print(nearest_wrf0_station)
        nearest_wrf0_stations_list.append(nearest_wrf0_station)

    create_csv('obs_wrf0_stations_mapping.csv', nearest_wrf0_stations_list)
def find_nearest_d03_station_for_flo2d_grids(flo2d_stations_csv,
                                             d03_stations_csv):

    flo2d_grids = read_csv(flo2d_stations_csv)

    d03_stations = read_csv(d03_stations_csv)

    nearest_d03_stations_list = [[
        'flo2d_grid_id', 'nearest_d03_station_id', 'dist'
    ]]

    for origin_index in range(len(flo2d_grids)):

        nearest_d03_station = [flo2d_grids[origin_index][0]]

        origin_lat = float(flo2d_grids[origin_index][2])
        origin_lng = float(flo2d_grids[origin_index][1])

        distances = {}

        for d03_index in range(len(d03_stations)):
            lat = float(d03_stations[d03_index][1])
            lng = float(d03_stations[d03_index][2])

            intermediate_value = cos(radians(origin_lat)) * cos(
                radians(lat)) * cos(radians(lng) - radians(origin_lng)) + sin(
                    radians(origin_lat)) * sin(radians(lat))
            if intermediate_value < 1:
                distance = 6371 * acos(intermediate_value)
            else:
                distance = 6371 * acos(1)

            distances[d03_stations[d03_index][0]] = distance

        sorted_distances = collections.OrderedDict(
            sorted(distances.items(), key=operator.itemgetter(1))[:10])

        count = 0
        for key in sorted_distances.keys():
            if count < 1:
                nearest_d03_station.extend([key, sorted_distances.get(key)])
                count += 1

        print(nearest_d03_station)
        nearest_d03_stations_list.append(nearest_d03_station)

    create_csv('MDPA_flo2d_30_d03_stations_mapping.csv',
               nearest_d03_stations_list)
Example #3
0
def generate_rain_files(active_obs_stations_file, start_time, end_time):
    # Connect to the database
    connection = pymysql.connect(host='104.198.0.87',
                                 user='',
                                 password='',
                                 db='',
                                 charset='utf8mb4',
                                 cursorclass=pymysql.cursors.DictCursor)

    active_obs_stations = read_csv(active_obs_stations_file)

    stations_dict = {}

    for obs_index in range(len(active_obs_stations)):
        stations_dict[active_obs_stations[obs_index]
                      [2]] = active_obs_stations[obs_index][0]

    obs_timeseries = extract_rain_obs(connection=connection,
                                      stations_dict=stations_dict,
                                      start_time=start_time,
                                      end_time=end_time)

    for obs_index in range(len(active_obs_stations)):
        data = [['time', 'value']]
        station_id = active_obs_stations[obs_index][2]
        for i in range(len(obs_timeseries[station_id])):
            data.append(obs_timeseries[station_id][i])
        create_csv(
            '{}_{}_{}_{}'.format(active_obs_stations[obs_index][3],
                                 active_obs_stations[obs_index][1], start_time,
                                 end_time), data)
Example #4
0
def format_rain(csv_file, start):

    timeseries = read_csv(csv_file)

    rain_dat = []

    total_rain = 0

    cumulative_timeseries = []

    for i in range(len(timeseries)):
        total_rain += float(timeseries[i][1])
        cumulative_timeseries.append(total_rain)

    for i in range(len(timeseries)):
        time_col = ((datetime.strptime(timeseries[i][0], DATE_TIME_FORMAT) -
                     start).total_seconds()) / 3600
        rain_col = float(timeseries[i][1]) / total_rain

        rain_dat.append("R   {}   {}".format(
            '%.4f' % time_col, cumulative_timeseries[i] / total_rain))

    rain_dat.insert(0, "R   0   0")
    rain_dat.insert(0, "{}   5   0   0".format(total_rain))
    rain_dat.insert(0, "0   0   ")

    write_to_file("RAIN.DAT", rain_dat)
def find_nearest_obs_stations_for_flo2d_stations(flo2d_stations_csv,
                                                 obs_stations_csv):

    obs_stations = read_csv(obs_stations_csv)

    flo2d_station = read_csv(flo2d_stations_csv)

    flo2d_obs_mapping_list = [[
        'flo2d_250_station_id', 'ob_1_id', 'ob_1_dist', 'ob_2_id', 'ob_2_dist',
        'ob_3_id', 'ob_3_dist', 'ob_4_id', 'ob_4_dist', 'ob_5_id', 'ob_5_dist',
        'ob_6_id', 'ob_6_dist', 'ob_7_id', 'ob_7_dist', 'ob_8_id', 'ob_8_dist',
        'ob_9_id', 'ob_9_dist', 'ob_10_id', 'ob_10_dist'
    ]]

    for flo2d_index in range(len(flo2d_station)):

        flo2d_obs_mapping = [flo2d_station[flo2d_index][0]]

        flo2d_lat = float(flo2d_station[flo2d_index][2])
        flo2d_lng = float(flo2d_station[flo2d_index][1])

        distances = {}

        for obs_index in range(len(obs_stations)):
            lat = float(obs_stations[obs_index][4])
            lng = float(obs_stations[obs_index][5])

            intermediate_value = cos(radians(flo2d_lat)) * cos(
                radians(lat)) * cos(radians(lng) - radians(flo2d_lng)) + sin(
                    radians(flo2d_lat)) * sin(radians(lat))
            if intermediate_value < 1:
                distance = 6371 * acos(intermediate_value)
            else:
                distance = 6371 * acos(1)

            distances[obs_stations[obs_index][2]] = distance

        sorted_distances = collections.OrderedDict(
            sorted(distances.items(), key=operator.itemgetter(1))[:10])

        for key in sorted_distances.keys():
            flo2d_obs_mapping.extend([key, sorted_distances.get(key)])

        print(flo2d_obs_mapping)
        flo2d_obs_mapping_list.append(flo2d_obs_mapping)

    create_csv('MDPA_flo2d_30_obs_mapping.csv', flo2d_obs_mapping_list)
def get_display_names():
    try:
        names = csv_utils.read_csv(get_names_file_file_name())
        return names
    except FileNotFoundError:
        init.create_names_file()
        return get_display_names()
    pass
Example #7
0
def get_problem_table():
    try:
        table = csv_utils.read_csv(get_problem_table_file_name())
        for i in range(0, len(table)):
            table[i][constants.ProblemTableStruct.ID.value] = int(table[i][constants.ProblemTableStruct.ID.value])
        return table
    except FileNotFoundError:
        init.create_problems_table()
        return get_problem_table()
Example #8
0
def get_all_attempts(problem_id):
    try:
        attempts = csv_utils.read_csv(get_problem_file_name(problem_id))
        for i in range(0, len(attempts)):
            attempts[i][constants.ProblemFileStruct.PERCENT.value] = \
                float(attempts[i][constants.ProblemFileStruct.PERCENT.value])
        return attempts
    except FileNotFoundError:
        create_problem_file(problem_id)
        return get_all_attempts(problem_id)
Example #9
0
def process_data(
        dataset_dir,
        model_type,
        dataset_type,
        dataset_id,  #layer, 
        csv_filename,
        num_classes,
        num_procs):
    print("Generating new files!")

    #open files
    try:
        csv_contents = read_csv(csv_filename)
    except:
        print("ERROR: Cannot open CSV file: " + csv_filename)

    b_dir_name = os.path.join(
        dataset_dir, 'b_{0}_{1}_{2}'.format(model_type, dataset_type,
                                            dataset_id))
    sp_dir_name = os.path.join(
        dataset_dir, 'sp_{0}_{1}_{2}'.format(model_type, dataset_type,
                                             dataset_id))
    pp_dir_name = os.path.join(
        dataset_dir, 'pp_{0}_{1}_{2}'.format(model_type, dataset_type,
                                             dataset_id))

    if (not os.path.exists(sp_dir_name)):
        os.makedirs(sp_dir_name)
    if (not os.path.exists(pp_dir_name)):
        os.makedirs(pp_dir_name)

    print("Organizing csv_contents")
    for ex in csv_contents:
        ex['b_path'] = os.path.join(b_dir_name,
                                    '{0}.b'.format(ex['example_id']))
        ex['sp_path'] = os.path.join(sp_dir_name,
                                     '{0}.npy'.format(ex['example_id']))
        ex['pp_path'] = os.path.join(pp_dir_name,
                                     '{0}.npz'.format(ex['example_id']))
        '''
		ex['b_path'] = os.path.join(b_dir_name, '{0}_{1}.b'.format(ex['example_id'], layer))
		ex['sp_path'] = os.path.join(sp_dir_name, '{0}_{1}.npy'.format(ex['example_id'], layer))
		ex['pp_path'] = os.path.join(pp_dir_name, '{0}_{1}.npz'.format(ex['example_id'], layer))
		'''

    dataset = [ex for ex in csv_contents if ex['label'] < num_classes]
    #print("dataset_length:", len(dataset), len([x for x in os.listdir(sp_dir_name) if "_3." in x]))
    print("dataset_length:", len(dataset), len(os.listdir(pp_dir_name)))

    #dataset = dataset[:41]

    # CONVERT BINARY EVENTS TO ITRS
    convert_event_to_itr(dataset, num_procs=num_procs)
Example #10
0
def get_global_leader_board():
    try:
        board = csv_utils.read_csv(get_global_leader_board_file_name())
        for i in range(0, len(board)):
            board[i][constants.GlobalLeaderboardStruct.POINTS.value] = float(
                board[i][constants.GlobalLeaderboardStruct.POINTS.value])
            board[i][constants.GlobalLeaderboardStruct.NO_PROBLEMS.value] = int(
                board[i][constants.GlobalLeaderboardStruct.NO_PROBLEMS.value])
            board[i][constants.GlobalLeaderboardStruct.AVERAGE_PERCENT.value] = float(
                board[i][constants.GlobalLeaderboardStruct.AVERAGE_PERCENT.value])
        return board
    except FileNotFoundError:
        init.create_leaderboard()
        return get_global_leader_board()
Example #11
0
def get_config_vars():
    try:
        config_vars = csv_utils.read_csv(get_config_file_name())
        casted_vars = []
        for read_var in config_vars:
            for actual_var in ConfigVars:
                if actual_var.value.var_name() == read_var[
                        constants.ConfigFileStruct.VAR_NAME.value]:
                    temp_var = read_var
                    temp_var[constants.ConfigFileStruct.VAR_VALUE.value] = \
                        actual_var.value.type_func()(read_var[constants.ConfigFileStruct.VAR_VALUE.value])
                    casted_vars.append(temp_var)
        return casted_vars
    except FileNotFoundError:
        init.create_config_file()
        return get_config_vars()
def main(model_type, dataset_dir, csv_filename, dataset_type, dataset_id,
         max_features, num_procs):

    if (model_type == 'i3d'):
        from gi3d_wrapper import DEPTH_SIZE, CNN_FEATURE_COUNT
    if (model_type == 'trn'):
        from trn_wrapper import DEPTH_SIZE, CNN_FEATURE_COUNT
    if (model_type == 'tsm'):
        from tsm_wrapper3 import DEPTH_SIZE, CNN_FEATURE_COUNT

    dataset_type_list = []
    if (dataset_type == "frames" or dataset_type == "both"):
        dataset_type_list.append("frames")
    if (dataset_type == "flow" or dataset_type == "both"):
        dataset_type_list.append("flow")

    #get files from dataset
    csv_contents = read_csv(csv_filename)
    #num_features = [min(feat_num, max_features) for feat_num in CNN_FEATURE_COUNT]
    num_features = 128  #[min(feat_num, max_features) for feat_num in CNN_FEATURE_COUNT]

    for ex in csv_contents:
        '''
		for layer in range(DEPTH_SIZE):

			#get IAD files for read
			for dtype in dataset_type_list:

				iad_path = 'iad_path_{0}_{1}'.format(dtype, layer)
				ex[iad_path] = os.path.join(dataset_dir, 'iad_{0}_{1}_{2}'.format(model_type, dtype, dataset_id), ex['label_name'], '{0}_{1}.npz'.format(ex['example_id'], layer))
				assert os.path.exists(ex[iad_path]), "Cannot locate IAD file: "+ ex[iad_path]

			#generate binary directory for write
			bin_dir = os.path.join(dataset_dir, 'b_{0}_{1}_{2}'.format(model_type, dataset_type, dataset_id), ex['label_name']) 
			if ( not os.path.exists(bin_dir) ):
				os.makedirs(bin_dir)
			bin_path = 'b_path_{0}'.format(layer)
			ex[bin_path] = os.path.join(bin_dir, '{0}_{1}.b'.format(ex['example_id'], layer))
		'''

        #get IAD files for read (no layer)
        for dtype in dataset_type_list:

            iad_path = 'iad_path_{0}'.format(dtype)
            ex[iad_path] = os.path.join(
                dataset_dir, 'iad_{0}_{1}_{2}'.format(model_type, dtype,
                                                      dataset_id),
                ex['label_name'], '{0}.npz'.format(ex['example_id']))
            assert os.path.exists(
                ex[iad_path]), "Cannot locate IAD file: " + ex[iad_path]

        #generate binary directory for write
        bin_dir = os.path.join(
            dataset_dir, 'b_{0}_{1}_{2}'.format(model_type, dataset_type,
                                                dataset_id), ex['label_name'])
        if (not os.path.exists(bin_dir)):
            os.makedirs(bin_dir)
        bin_path = 'b_path'
        ex[bin_path] = os.path.join(bin_dir, '{0}.b'.format(ex['example_id']))
    #csv_contents = csv_contents

    p = Pool(num_procs)

    #get the threshold values for each feature in the training dataset
    training_dataset = [
        ex for ex in csv_contents if ex['dataset_id'] >= dataset_id
    ]
    other_args = [DEPTH_SIZE, dataset_type_list, num_features]

    print("Getting Threshold")
    print("other_args:", other_args)
    split_threshold_info = split_dataset_run_func(p, determine_threshold,
                                                  training_dataset, other_args)

    #combine chunked threshodl info together
    threshold_matrix = np.zeros((DEPTH_SIZE, max_features))
    threshold_count = np.zeros((DEPTH_SIZE, max_features))
    for x in split_threshold_info:
        #for layer in range(DEPTH_SIZE):
        layer = 0
        for feature in range(num_features):
            threshold_matrix[
                layer,
                feature] += x[layer][feature].mean * x[layer][feature].count
            threshold_count[layer, feature] += x[layer][feature].count
    threshold_count[np.where(threshold_count == 0)] = 1

    threshold_matrix /= threshold_count

    filename = os.path.join(
        dataset_dir, 'b_{0}_{1}_{2}'.format(model_type, dataset_type,
                                            dataset_id),
        'threshold_values.npy')
    np.save(filename, threshold_matrix)

    assert os.path.exists(filename), "filename cannot be found: " + filename
    threshold_matrix = np.load(filename)

    #process the IADs and save the parsed files
    full_dataset = [
        ex for ex in csv_contents
        if ex['dataset_id'] >= dataset_id or ex['dataset_id'] == 0
    ]

    other_args = [
        DEPTH_SIZE, dataset_type_list, threshold_matrix, num_features
    ]
    print("Converting to Binary")
    split_dataset_run_func(p, sparsify_iad_dataset, full_dataset, other_args)
Example #13
0
from csv_utils import read_csv

titles = [
    "Air Pressure",
    "Water vapor pressure",
    "Relative air humidity",
    "Specific air humidity",
    "Average cloud cover",
    "Temperature",
    "Wind speed",
    "Downfall",
    "Cloudy weather",
]

date_time_key = "date"
df = read_csv('../../data/processed/processed_data.csv')
x = df[[clean_string(titles[1])]]


def show_raw_visualization(data):
    Path("../../plots/data").mkdir(parents=True, exist_ok=True)
    for i in titles:
        title = clean_string(i)
        x = data[[title]]
        fig = x.plot().get_figure()
        fig.savefig("../../plots/data/{}.png".format(title))


def show_heatmap(data):
    plt.matshow(data.corr())
    plt.xticks(range(data.shape[1]), data.columns, fontsize=14, rotation=90)
Example #14
0
def main(model_type, dataset_dir, csv_filename, dataset_type, dataset_id,
         num_features, num_procs):

    if (model_type == 'i3d'):
        from gi3d_wrapper import depth_size
    if (model_type == 'trn'):
        from trn_wrapper import depth_size
    if (model_type == 'tsm'):
        from tsm_wrapper import depth_size

    dataset_type_list = []
    if (dataset_type == "frames" or dataset_type == "both"):
        dataset_type_list.append("frames")
    if (dataset_type == "flow" or dataset_type == "both"):
        dataset_type_list.append("flow")

    #get files from dataset
    csv_contents = read_csv(csv_filename)[:23]

    for ex in csv_contents:

        print(ex['example_id'])

        for layer in range(depth_size):

            #get IAD files for read
            for dtype in dataset_type_list:

                iad_path = 'iad_path_{0}_{1}'.format(dtype, layer)
                ex[iad_path] = os.path.join(
                    dataset_dir,
                    'iad_{0}_{1}_{2}'.format(model_type, dtype,
                                             dataset_id), ex['label_name'],
                    '{0}_{1}.npz'.format(ex['example_id'], layer))
                assert os.path.exists(
                    ex[iad_path]), "Cannot locate IAD file: " + ex[iad_path]

            #generate txt directory for write
            txt_dir = os.path.join(
                dataset_dir, 'txt_{0}_{1}_{2}'.format(model_type, dataset_type,
                                                      dataset_id),
                ex['label_name'])
            if (not os.path.exists(txt_dir)):
                os.makedirs(txt_dir)
            txt_path = 'txt_path_{0}'.format(layer)
            ex[txt_path] = os.path.join(
                txt_dir, '{0}_{1}.txt'.format(ex['example_id'], layer))

    p = Pool(num_procs)

    #get the threshold values for each feature in the training dataset
    training_dataset = [
        ex for ex in csv_contents if ex['dataset_id'] >= dataset_id
    ][:20]
    other_args = [depth_size, dataset_type_list, num_features]

    split_threshold_info = split_dataset_run_func(p, determine_threshold,
                                                  training_dataset, other_args)
    #determine_threshold((training_dataset, depth_size,dataset_type_list,num_features))

    #combine chunked threshodl info together
    threshold_matrix = np.zeros((depth_size, num_features))
    for x in split_threshold_info:
        for layer in range(depth_size):
            for feature in range(num_features):
                threshold_matrix[layer, feature] += x[layer][feature].mean * x[
                    layer][feature].count
    '''
Example #15
0
        else:
            print(data[METADATA_PERSON_ID] + ' blacklisted')
    return outdata


if __name__ == "__main__":

    if len(sys.argv) != 2:
        print('You did not enter metadata file path')
        print('E.g.: python rgbd_match.py metadata_path')
        sys.exit(1)

    metadata_file = sys.argv[1]

    # Create a map
    indata = read_csv(metadata_file)
    indata = blacklist_invalid(indata)
    size = len(indata)
    mapping = {}
    for index in range(1, size):
        data = indata[index]
        if data[METADATA_EXTENSION] != '.jpg':
            continue
        scanid = data[METADATA_SCAN_ID]
        order = data[METADATA_ORDER]
        key = scanid + str(order)
        mapping[key] = data

    # For every depthmap add rgb file
    output = []
    processed = {}
def main(dataset_dir, csv_filename, dataset_type, dataset_id,
         feature_retain_count):

    datatset_type_list = []
    if (dataset_type == "frames" or dataset_type == "both"):
        datatset_type_list.append("frames")
    if (dataset_type == "flow" or dataset_type == "both"):
        datatset_type_list.append("flow")

    #setup feature_rank_parser
    frame_ranking_file = os.path.join(
        dataset_dir, 'iad_frames_' + str(dataset_id),
        "feature_ranks_" + str(dataset_id) + ".npz")
    flow_ranking_file = os.path.join(
        dataset_dir, 'iad_flow_' + str(dataset_id),
        "feature_ranks_" + str(dataset_id) + ".npz")

    pruning_indexes = {}
    if (dataset_type == "frames"):
        assert os.path.exists(
            frame_ranking_file
        ), "Cannot locate Feature Ranking file: " + frame_ranking_file
        pruning_indexes["frames"] = get_top_n_feature_indexes(
            frame_ranking_file, feature_retain_count)
    elif (dataset_type == "flow"):
        assert os.path.exists(
            flow_ranking_file
        ), "Cannot locate Feature Ranking file: " + flow_ranking_file
        pruning_indexes["flow"] = get_top_n_feature_indexes(
            flow_ranking_file, feature_retain_count)
    elif (dataset_type == "both"):
        assert os.path.exists(
            frame_ranking_file
        ), "Cannot locate Feature Ranking file: " + frame_ranking_file
        assert os.path.exists(
            flow_ranking_file
        ), "Cannot locate Feature Ranking file: " + flow_ranking_file

        pruning_indexes = get_top_n_feature_indexes_combined(
            frame_ranking_file, flow_ranking_file, feature_retain_count)

    #setup file-io
    txt_path = os.path.join(dataset_dir,
                            'txt_' + dataset_type + '_' + str(dataset_id))
    if (not os.path.exists(txt_path)):
        os.makedirs(txt_path)

    #get files from dataset
    try:
        csv_contents = read_csv(csv_filename)
    except:
        print("ERROR: Cannot open CSV file: " + csv_filename)

    file_list = [
        ex for ex in csv_contents
        if ex['dataset_id'] >= dataset_id or ex['dataset_id'] == 0
    ]

    for ex in file_list:
        file_location = os.path.join(ex['label_name'], ex['example_id'])
        print("Converting " + file_location)
        for layer in range(5):

            iad_filenames = {}
            for dt in datatset_type_list:
                iad_filenames[dt] = os.path.join(
                    dataset_dir, 'iad_' + dt + '_' + str(dataset_id),
                    file_location + "_" + str(layer) + ".npz")
                assert os.path.exists(
                    iad_filenames[dt]
                ), "Cannot locate IAD file: " + iad_filenames[dt]

            label_dir = os.path.join(txt_path, str(layer), ex['label_name'])
            if (not os.path.exists(label_dir)):
                os.makedirs(label_dir)

            txt_filename = os.path.join(
                txt_path, str(layer),
                file_location + "_" + str(layer) + ".txt")
            sparsify_iad(datatset_type_list,
                         iad_filenames,
                         pruning_indexes,
                         layer,
                         name=txt_filename)
            sparsify_iad2(datatset_type_list,
                          iad_filenames,
                          pruning_indexes,
                          layer,
                          name=txt_filename)
Example #17
0
 def test_read_csv_good_case(self):
     file_path = self._create_test_csv(
         ['label1,label2,label3', '1,2,3', '4,5,6'])
     self.assertListEqual(
         [['label1', 'label2', 'label3'], ['1', '2', '3'], ['4', '5', '6']],
         csv_utils.read_csv(file_path))
Example #18
0
def read_csv(file_name):
    return csv_utils.read_csv(os.path.join(DATA_DIRECTORY, file_name))
def main(dataset_dir, csv_filename, dataset_type, dataset_id, feature_retain_count):

	datatset_type_list = []
	if(dataset_type=="frames" or dataset_type=="both"):
		datatset_type_list.append("frames")
	if(dataset_type=="flow" or dataset_type=="both"):
		datatset_type_list.append("flow")

	#setup feature_rank_parser
	frame_ranking_file = os.path.join( dataset_dir, 'iad_frames_'+str(dataset_id), "feature_ranks_"+str(dataset_id)+".npz") 
	flow_ranking_file = os.path.join( dataset_dir, 'iad_flow_'+str(dataset_id), "feature_ranks_"+str(dataset_id)+".npz") 

	pruning_indexes = {}
	if(dataset_type=="frames"):
		assert os.path.exists(frame_ranking_file), "Cannot locate Feature Ranking file: "+ frame_ranking_file
		pruning_indexes["frames"] = get_top_n_feature_indexes(frame_ranking_file, feature_retain_count)
	elif(dataset_type=="flow"):
		assert os.path.exists(flow_ranking_file), "Cannot locate Feature Ranking file: "+ flow_ranking_file
		pruning_indexes["flow"] = get_top_n_feature_indexes(flow_ranking_file, feature_retain_count)
	elif(dataset_type=="both"):
		assert os.path.exists(frame_ranking_file), "Cannot locate Feature Ranking file: "+ frame_ranking_file
		assert os.path.exists(flow_ranking_file), "Cannot locate Feature Ranking file: "+ flow_ranking_file

		if(save_name == "ucf"):
			if(dataset_id == 1):
				# UCF 1 -> waiting to finish training
				weight_ranking = [[0.177901,0.334655,0.437483,0.801745,0.916997],[0.299762,0.409992,0.519958,0.7917,0.911182]]
			if(dataset_id == 2):
				# UCF 2* - > training
				weight_ranking = [[0.120804,0.24663,0.31483,0.674861,0.842982],[0.208829,0.256675,0.354216,0.557494,0.6894]]
			if(dataset_id == 3):
				# UCF 3* - > training
				weight_ranking = [[0.100449,0.204335,0.270949,0.528152,0.726408],[0.161248,0.187946,0.21438,0.378007,0.473169]]
		if(save_name == "hmdb"):
			if(dataset_id == 1):
				# HMDB 1* - > training
				weight_ranking = [[0.085621,0.201307,0.21634,0.527451,0.675817],[0.190196,0.233333,0.263399,0.296732,0.332026]]
			if(dataset_id == 2):
				# HMDB 2* - > finished
				weight_ranking = [[0.075163,0.145752,0.169281,0.365359,0.566013],[0.131373,0.184314,0.205882,0.282353,0.443791]]
			if(dataset_id == 3):
				# HMDB 3* - > finished
				weight_ranking = [[0.054248,0.127451,0.14183,0.25098,0.462092],[0.10915,0.145098,0.137255,0.138562,0.231373]]
		if(save_name == "bm"):
			if(dataset_id == 1):
				# BLOCKMOVING 1* - > finished
				weight_ranking = [[0.921875,0.9296875,0.9296875,0.6171875,0.578125],[0.828125,0.898438,0.945313,0.945313,0.953125]]
			if(dataset_id == 2):
				# BLOCKMOVING 2* - > finished
				weight_ranking = [[0.765625,0.8125,0.859375,0.671875,0.695313],[0.742188,0.835938,0.898438,0.875,0.835938]]
			if(dataset_id == 3):	
				# BLOCKMOVING 3* - > finished
				weight_ranking = [[0.671875,0.679688,0.742188,0.609375,0.539063],[0.703125,0.71875,0.84375,0.75,0.742188]]
		
		pruning_indexes = get_top_n_feature_indexes_combined(frame_ranking_file, flow_ranking_file, feature_retain_count, weight_ranking)

	#setup file-io
	txt_path = os.path.join(dataset_dir, 'txt_'+dataset_type+'_'+str(dataset_id))
	if(not os.path.exists(txt_path)):
		os.makedirs(txt_path)

	#get files from dataset
	try:
		csv_contents = read_csv(csv_filename)
	except:
		print("ERROR: Cannot open CSV file: "+ csv_filename)

	global_threshold_values = {"mean": [], "std_dev":[], "count":[]}
	for i in range(5):
		global_threshold_values["mean"].append([0]*feature_retain_count)
		global_threshold_values["std_dev"].append([0]*feature_retain_count)
		global_threshold_values["count"].append(0)


	train_list = [ex for ex in csv_contents if ex['dataset_id'] >= dataset_id]
	file_list = [ex for ex in csv_contents if ex['dataset_id'] >= dataset_id or ex['dataset_id'] == 0]
	
	for ex in train_list:
		file_location = os.path.join(ex['label_name'], ex['example_id'])
		print("Converting "+file_location)
		for layer in range(5):

			iad_filenames = {}
			for dt in datatset_type_list:
				iad_filenames[dt] = os.path.join(dataset_dir, 'iad_'+dt+'_'+str(dataset_id), file_location+"_"+str(layer)+".npz") 
				assert os.path.exists(iad_filenames[dt]), "Cannot locate IAD file: "+ iad_filenames[dt]

			txt_filename = os.path.join(txt_path, str(layer), file_location+"_"+str(layer)+".txt")
			add_to_global_threshold(datatset_type_list, iad_filenames, pruning_indexes, layer, global_threshold_values)
	'''
	for i in range(5):
		print(str(i))
		print("mean:", global_threshold_values["mean"][i])
		print("std_dev:", global_threshold_values["std_dev"][i])
		print("count:", global_threshold_values["count"][i])
		print('')
	'''

	
	for ex in file_list:
		file_location = os.path.join(ex['label_name'], ex['example_id'])
		print("Converting "+file_location)
		for layer in range(5):

			iad_filenames = {}
			for dt in datatset_type_list:
				iad_filenames[dt] = os.path.join(dataset_dir, 'iad_'+dt+'_'+str(dataset_id), file_location+"_"+str(layer)+".npz") 
				assert os.path.exists(iad_filenames[dt]), "Cannot locate IAD file: "+ iad_filenames[dt]
			
			label_dir = os.path.join(txt_path, str(layer),ex['label_name'])
			if ( not os.path.exists(label_dir) ):
				os.makedirs(label_dir)

			txt_filename = os.path.join(txt_path, str(layer), file_location+"_"+str(layer)+".txt")
			sparsify_iad(datatset_type_list, iad_filenames, pruning_indexes, layer, global_threshold_values, name=txt_filename)