def test_merge_final_files(self): output_csv_name = "final_sum_sum_sum_summed.csv" base_path = "Unit_tests/csv_test_data/merge_final_files_csv" merge_final_files(base_path, output_csv_name, dissasemble_cell) expected_csv = cvs_object(f"{base_path}/result_{output_csv_name}") output_csv = cvs_object(f"{base_path}/{output_csv_name}") expected_rows = expected_csv.get_lines() output_lines = output_csv.get_lines() self.assertEqual(expected_rows, output_lines)
def calc_avg_from_base_and_csv(base, csv_list, output_folder, calc_deviant=True): all_paths = walk_base(base, csv_list) for key, item in all_paths.items(): path_csv = f"{output_folder}/{key}" avg_csv, deviant_csv = get_processed_csv(item, calc_deviant, len(item)) avg_csv_obj = cvs_object(get_avg_path(path_csv)) avg_csv_obj.write(avg_csv) if calc_deviant: deviant_csv_obj = cvs_object(get_deviation_path(path_csv)) deviant_csv_obj.write(deviant_csv)
def merge_csv(filter_names, saved_path, class_size_dict, model_names, base_path): class_dict = {} for name in filter_names: for model_name in model_names: csv_path = f"{base_path}/{name}_{model_name}.csv" with open(csv_path, 'r') as read_obj: reader = csv.reader(read_obj) data = list(reader) data[0][2] = f"{name}_{model_name}" for row in data: if not row[0] in class_dict: class_dict[row[0]] = [row[0]] class_dict[row[0]].append(row[2]) if name == filter_names[-1] and model_name == model_names[ -1] and row[0] in class_size_dict: class_dict[row[0]].append(class_size_dict[row[0]]) elif name == filter_names[ -1] and model_name == model_names[-1]: class_dict[row[0]].append('images') list_data = [class_dict[key] for key in class_dict.keys()] sort_list = list_data[1:] list_data = [list_data[0]] sort_list.sort(key=lambda sort_list: int(sort_list[0])) list_data.extend(sort_list) csv_obj = cvs_object(saved_path) csv_obj.write(list_data)
def sum_merged_files_to_one_file(base_path, output_csv_name): """iterates through all folders in the base path, and combines Args: base_path ([type]): [description] output_csv_name ([type]): [description] """ data_to_combine = [] for folder in os.listdir(base_path): folder_path = f"{base_path}/{folder}" csv_path = f"{folder_path}/{output_csv_name}" if os.path.exists(csv_path): data_to_append = read_csv_file(csv_path) add_row_zero = True if len(data_to_combine) == 0 else False for i in range(len(data_to_append)): if add_row_zero: data_to_combine.append(data_to_append[i]) else: data_to_combine[i].extend(data_to_append[i][1:]) elif not csv_path.endswith('.csv'): print( f"WARNING: The folder \"{folder}\" does not contain the csv file \"{output_csv_name}\". The program will continue." ) save_path = f"{base_path}/{output_csv_name}" csv_to_write = cvs_object(save_path) csv_to_write.write(data_to_combine)
def test_sum_summed_plot(self): base_path = 'Unit_tests/csv_test_data/sum_summed_plot_csv' expected_csv_path = f"{base_path}/result_test_sum_summed.csv" output_csv_path = f"{base_path}/test_sum_summed.csv" model_object_list = get_satina_gains_model_object_list(120) sum_summed_plots(model_object_list, 'test', base_path) expected_csv = cvs_object(expected_csv_path) output_csv = cvs_object(output_csv_path) expected_rows = expected_csv.get_lines() output_rows = output_csv.get_lines() self.assertEqual(expected_rows, output_rows)
def make_train_test_size_graph(save_path): test_path = get_h5_test() train_path = get_h5_train() train_Key = 'train' test_key = 'test' result_list = [['class', 'train', 'test']] h5_train = h5_object(train_path, training_split=1) h5_test = h5_object(test_path, training_split=1) csv_dict = {} iterate_through_dict(h5_train.images_in_classes, csv_dict, train_Key) iterate_through_dict(h5_test.images_in_classes, csv_dict, test_key) csv_list = list(csv_dict.items()) csv_list.sort(key=lambda x: int(x[0])) for dick in csv_list: result_list.append([ dick[0], dick[1][train_Key] if train_Key in dick[1] else 0, dick[1][test_key] if test_key in dick[1] else 0 ]) csv_obj = cvs_object(save_path) csv_obj.write(result_list)
def combine_fitdata(model_object_list, base_path): data = [['epoch']] max_len = max([len(x.fit_data) for x in model_object_list]) for model_object in model_object_list: for i in range(max_len): try: if i+1 > len(data): data.append([i]) if i == 0: data[i].append(model_object.get_csv_name()) else: if i >= len(model_object.fit_data): data[i].append(' ') else: data[i].append(model_object.fit_data[i][1]) except IndexError as e: print(f"ERROR {e}") raise IndexError except TypeError as e: print(f"ERROR: {e}") raise TypeError except Exception as e: print(f"ERROR: {e}") raise Exception fitdata_path = f"{base_path}/fitdata_combined.csv" csv_obj = cvs_object(fitdata_path) csv_obj.write(data)
def construct_dict(folder, extensions): temp_dict = {} for p in os.listdir(folder): if p.endswith('.csv') and extensions in p: temp_path = f"{folder}/{p}" temp_csv = cvs_object(temp_path) temp_dict[p] = temp_csv.get_lines() return temp_dict
def test_sum_plot(self): base_path = 'Unit_tests/csv_test_data/sum_plot_csv' expected_csv_path = f"{base_path}/result_model_42_test_summed.csv" output_csv_path = f"{base_path}/model_42_test_summed.csv" model_object_list = [get_satina_gains_model_object_list(120)[-1]] sum_plot(model_object_list, 'test', base_path) expected_csv = cvs_object(expected_csv_path) output_csv = cvs_object(output_csv_path) expected_rows = expected_csv.get_lines() output_rows = output_csv.get_lines() output_rows[1][1] = str(round(float(output_rows[1][1]), 2)) self.assertEqual(expected_rows, output_rows)
def combine_two_summed_class_accracy(sum_test_path, sum_val_path, base_path): save_path = f"{base_path}/test_val_sum_class_accuracy.csv" test_rows = get_rows(sum_test_path, 'test') val_rows = get_rows(sum_val_path, 'val') rows = combine_rows(test_rows, val_rows) cvs_obj = cvs_object(save_path) cvs_obj.write(rows)
def save_plot(model_object_list:list, extension, base_path)->None: """Iterates through each model object, and saves the accuracy for each class in a Args: model_object_list (list): the list of models to iterate through extension (str): the extension, deciding whether it is 'test' or 'val' """ for model_object in model_object_list: cvs_obj = cvs_object(f"{base_path}/{model_object.get_csv_name(extension=extension)}.csv") cvs_obj.write(model_object.csv_data)
def read_csv_file(csv_path: str) -> list: """reas a csv file and returns all the lines as a list Args: csv_path (str): the csv file to open Returns: list: the list of rows """ csv_obj = cvs_object(csv_path) return csv_obj.get_lines()
def test_combine_fitdata(self): base_path = "Unit_tests/csv_test_data/fitdata_csv" output_csv_path = f"{base_path}/fitdata_combined.csv" expected_csv_path = f"{base_path}/result_fitdata_combined.csv" model_object_list = get_satina_gains_model_object_list(120) for model_object in model_object_list: csv_path = f"{base_path}/{model_object.get_csv_name()}_fitdata.csv" csv_obj = cvs_object(csv_path) model_object.fit_data = csv_obj.get_lines() combine_fitdata(model_object_list, base_path) expected_csv = cvs_object(expected_csv_path) output_csv = cvs_object(output_csv_path) expected_rows = expected_csv.get_lines() output_rows = output_csv.get_lines() self.assertEqual(expected_rows, output_rows)
def sum_plot(model_object_list:list, extension:str, base_path)->None: """converts the csv file showing the accuracy for each class, to a csv showing the accuracy for each sub category Args: model_object_list (list): a list of model objects which is iterated through extension (str): the extenson to add the the csv, in this case either being 'val' or 'test' """ csv_object_list = [] for model_object in model_object_list: obj = cvs_object(f"{base_path}/{model_object.get_csv_name(extension=extension)}.csv", label=model_object.get_size()) data = sum_for_model(obj) obj.write(data, f"{base_path}/{model_object.get_summed_csv_name(extension=extension)}.csv", overwrite_path=True) csv_object_list.append(obj)
def save_fitdata(model_object_list:list, base_path:str)->None: """This is the data used to produce the loss/epoch graht, and is saved in a csv file. as "epoch", "loss", "accuracy" Args: model_object_list (list): the list of model objects to save the data from base_path (str): the base path to save the data in """ for model in model_object_list: fitdata_path = f"{base_path}/{model.get_csv_name()}_fitdata.csv" csv_obj = cvs_object(fitdata_path) csv_obj.write(model.fit_data) combine_fitdata(model_object_list, base_path)
def sum_train_test_file(get_category, get_sub_category, get_class_accuracy): base_path = get_paths('phase_one_csv') train_test_path = f"{base_path}/train_test_dist.csv" sum_path = f"{base_path}/sum_train_test_sub_cat.csv" sum_summed_path = f"{base_path}/sum_summed_train_test_sub_cat.csv" make_train_test_size_graph(train_test_path) csv_obj = cvs_object(train_test_path) data = generalized_sum( csv_obj, sum_con(get_sub_category, get_category, get_class_accuracy)) csv_obj.write(data, path=sum_path, overwrite_path=True) data = generalized_sum(csv_obj, sum_summed_con(get_class_accuracy)) csv_obj.write(data, path=sum_summed_path, overwrite_path=True)
def test_sum_summed_for_class_accuracy(self): base_path = 'Unit_tests/csv_test_data/sum_summed_for_class_accuracy_csv' expected_csv_path = f"{base_path}/result_test_sum_summed_class_accuracy.csv" output_csv_path = f"{base_path}/test_sum_summed_class_accuracy.csv" input_csv_path = f"{base_path}/test_sum_class_accuracy.csv" csv_obj = cvs_object(input_csv_path) data = sum_summed_for_class_accuracy(csv_obj) try: for i in range(1, len(data[2])): data[2][i] = round(data[2][i], 2) except: print("index/cast stuff when wrong") csv_obj.write(data, path=output_csv_path, overwrite_path=True) expected_csv = cvs_object(expected_csv_path) output_csv = cvs_object(output_csv_path) expected_rows = expected_csv.get_lines() output_rows = output_csv.get_lines() self.assertEqual(expected_rows, output_rows)
def iterate_and_sum(model_object_list, extension, sum_path, image_dataset, lable_dataset, epochs_end, images_in_classes, base_path, folder_extension, epochs=None): try: save_folder = f"/home/biks/Desktop/{folder_extension}" if not os.path.exists(save_folder): os.mkdir(save_folder) save_folder = f"/home/biks/Desktop/{folder_extension}/{extension}" if not os.path.exists(save_folder): os.mkdir(save_folder) except Exception as e: print(f"ERROR: {e}, iterate and sum") iterate_trough_models(model_object_list, epochs_end, image_dataset, lable_dataset, save_folder, epochs=epochs) save_plot(model_object_list, extension, base_path) sum_plot(model_object_list, extension, base_path) sum_summed_plots(model_object_list, extension, base_path) path = sum_class_accuracy(model_object_list, images_in_classes, extension, base_path) data_class_acc_val = sum_for_class_accuracy(cvs_object(path)) csv_obj = cvs_object(sum_path) csv_obj.write(data_class_acc_val) data = sum_summed_for_class_accuracy(csv_obj) csv_obj.write(data, path=f"{base_path}/{extension}_sum_summed_class_accuracy.csv", overwrite_path=True)
def get_processed_csv(paths, calc_deviant, length): avg_csv = [] deviant_csv = [] for i in range(len(paths)): temp_csv = cvs_object(paths[i]) temp_lines = temp_csv.get_lines() if len(avg_csv) == 0: avg_csv = temp_lines.copy() else: avg_csv = process_two_csv(copy.deepcopy(avg_csv), copy.deepcopy(temp_lines), i, avg_calc) for i in range(len(paths)): temp_csv = cvs_object(paths[i]) temp_lines = temp_csv.get_lines() if calc_deviant: if len(deviant_csv) == 0: deviant_csv = process_two_csv(copy.deepcopy(temp_lines), copy.deepcopy(temp_lines), i, deviant_calc_first_time, avg_csv=avg_csv) else: deviant_csv = process_two_csv(copy.deepcopy(deviant_csv), copy.deepcopy(temp_lines), i, deviant_calc, avg_csv=avg_csv) if calc_deviant: deviant_csv = divide_csv(deviant_csv, length) return avg_csv, deviant_csv
def append_to_headers(header_extension: str, csv_path: str) -> None: """add an extension to all models in the headers before combining them Args: header_extension (str): the extension to add csv_path (str): the path to the csv file to edit """ csv_obj = cvs_object(csv_path) rows = csv_obj.get_lines() new_rows = [] new_rows = [ append_extension_to_header(x, header_extension) for x in rows[0][2:] ] del rows[0][2:] rows[0].extend(new_rows) csv_obj.write(rows)
def merge_combined_files(data_to_combine, base_path, output_csv_name): data_list = convert_to_list(data_to_combine) custom_error_check(verify_dimensions(data_list), 'data_list has the wrong dimensions') data_to_save = [] for i in range(len(data_list)): for j in range(len(data_list[0])): if len(data_to_save) <= j: data_to_save.append([]) data_to_save[j].append(data_list[i][j]) # data_to_save[-1].extend(data_list[j][i]) save_path = f"{base_path}/{output_csv_name}" csv_to_write = cvs_object(save_path) csv_to_write.write(data_to_save)
def sum_class_accuracy(model_object_list:list, images_in_classes, extension, base_path)->dict: """When training the accuracy for each class for each epoch is recorded. Here the sum of all accuracies for all classes for each epoch is summed together. Args: model_object_list (list): The list of Returns: dict: [description] """ save_path = f"{base_path}/{extension}_class_accuracy.csv" model_class_accuracy = {} for model_object in model_object_list: model_class_accuracy[model_object.get_csv_name()] = {} open_path = f"{base_path}/{model_object.get_csv_name(extension=extension)}.csv" check_if_valid_path(open_path) with open(open_path, 'r') as csvfile: plots = csv.reader(csvfile, delimiter=',') next(plots) for row in plots: try: if not row[0] in model_class_accuracy[model_object.get_csv_name()]: model_class_accuracy[model_object.get_csv_name()][row[0]] = {} model_class_accuracy[model_object.get_csv_name()][row[0]][row[2]] = row[3] except IndexError as e: print(f"ERROR: {e}") raise IndexError except Exception as e: print(f"ERROR: {e}") raise Exception data_list = convert_dict_to_list(model_class_accuracy, images_in_classes) save_data_obj = cvs_object(save_path) save_data_obj.write(data_list) return save_path
def sum_summed_plots(model_object_list:list, extension, base_path)->None: csv_data = [] raw_data = [] for model_object in model_object_list: csv_path = f"{base_path}/{model_object.get_summed_csv_name(extension=extension)}.csv" check_if_valid_path(csv_path) with open(csv_path, 'r') as csv_obj: rows = csv.reader(csv_obj, delimiter=',') rows = list(rows) custom_error_check(not verify_list_lenght(rows), f"the file \"{csv_path}\" only has {len(rows)} items, should be {model_object.output_layer_size}") try: rows[0][1] = model_object.get_csv_name(extension=extension) except IndexError as e: print(f"ERROR: {e}") raise IndexError raw_data.append(rows) try: csv_data = [x[0:1] for x in raw_data[0]] except IndexError as e: print(f"ERROR {e}") raise IndexError for i in range(len(raw_data)): for j in range(len(csv_data)): try: csv_data[j].append(raw_data[i][j][1]) except IndexError as e: print(f"ERROR: {e}") raise IndexError csv_obj = cvs_object(f"{base_path}/{extension}_sum_summed.csv") csv_obj.write(csv_data)