def delete_duplicate_results(result_folder): res_pool = ResultPool() res_pool.load_results(result_folder) var_params = res_pool.varying_params() unique_var_params = [] duplicate_ids = [] all_result_file_names = res_pool.result_file_names() for i_exp, params in enumerate(var_params): if np.any([dict_equal(params, p) for p in unique_var_params]): log.warn("Duplicate result {:s}".format(all_result_file_names[i_exp])) duplicate_ids.append(i_exp) else: unique_var_params.append(params) # Delete result/experiment/model(outdated, used to exist)/param files for i_exp in duplicate_ids: result_file_name = all_result_file_names[i_exp] yaml_file_name = result_file_name.replace('.result.pkl', '.yaml') model_file_name = result_file_name.replace('.result.pkl', '.pkl') model_param_file_name = result_file_name.replace('.result.pkl', '.npy') delete_if_exists(result_file_name) delete_if_exists(yaml_file_name) delete_if_exists(model_file_name) delete_if_exists(model_param_file_name)
def delete_duplicate_results(result_folder): res_pool = ResultPool() res_pool.load_results(result_folder) var_params = res_pool.varying_params() unique_var_params = [] duplicate_ids = [] all_result_file_names = res_pool.result_file_names() for i_exp, params in enumerate(var_params): if np.any([dict_equal(params, p) for p in unique_var_params]): log.warn("Duplicate result {:s}".format( all_result_file_names[i_exp])) duplicate_ids.append(i_exp) else: unique_var_params.append(params) # Delete result/experiment/model(outdated, used to exist)/param files for i_exp in duplicate_ids: result_file_name = all_result_file_names[i_exp] yaml_file_name = result_file_name.replace('.result.pkl', '.yaml') model_file_name = result_file_name.replace('.result.pkl', '.pkl') model_param_file_name = result_file_name.replace('.result.pkl', '.npy') delete_if_exists(result_file_name) delete_if_exists(yaml_file_name) delete_if_exists(model_file_name) delete_if_exists(model_param_file_name)
def mark_duplicate_results(result_folder, tag_dict): res_pool = ResultPool() res_pool.load_results(result_folder) var_params = res_pool.varying_params() unique_var_params = [] duplicate_ids = [] all_result_file_names = res_pool.result_file_names() for i_exp, params in enumerate(var_params): if np.any([dict_equal(params, p) for p in unique_var_params]): log.warn("Duplicate result {:s}".format(all_result_file_names[i_exp])) duplicate_ids.append(i_exp) else: unique_var_params.append(params) # Update parameters for i_exp in duplicate_ids: result_file_name = all_result_file_names[i_exp] result = np.load(result_file_name) result.parameters.update(tag_dict) pickle.dump(result, open(result_file_name, 'w'))
def _skip_already_done_experiments(self): log.info("Check if some experiments were already run...") clean_all_train_strs = [] # Go through all experiments, all result folders # and check if experiments already exist # First collect all folder paths and load results # in order not to load results twice # Possible optimization: First get all save paths # Then only load results once for respective save paths all_folder_paths = [] for i_experiment in range(len(self._all_train_strs)): folder_path = self._create_save_folder_path(i_experiment) all_folder_paths.append(folder_path) unique_folder_paths = set(all_folder_paths) folder_path_to_results = dict() for folder_path in unique_folder_paths: existing_result_files = glob(folder_path + "*[0-9].result.pkl") results = [np.load(f) for f in existing_result_files] folder_path_to_results[folder_path] = results for i_experiment in range(len(self._all_train_strs)): train_str = self._all_train_strs[i_experiment] train_dict = self._load_without_layers(train_str) original_params = train_dict['original_params'] folder_path = all_folder_paths[i_experiment] results = folder_path_to_results[folder_path] experiment_already_ran = False for r in results: if dict_equal(r.parameters, original_params): experiment_already_ran = True log.warn("Already ran id {:d} {:s}".format( i_experiment, str(original_params))) if not experiment_already_ran: clean_all_train_strs.append(train_str) self._all_train_strs = clean_all_train_strs
def _skip_already_done_experiments(self): log.info("Check if some experiments were already run...") clean_all_train_strs = [] # Go through all experiments, all result folders # and check if experiments already exist # First collect all folder paths and load results # in order not to load results twice # Possible optimization: First get all save paths # Then only load results once for respective save paths all_folder_paths = [] for i_experiment in range(len(self._all_train_strs)): folder_path = self._create_save_folder_path(i_experiment) all_folder_paths.append(folder_path) unique_folder_paths = set(all_folder_paths) folder_path_to_results = dict() for folder_path in unique_folder_paths: existing_result_files = glob(folder_path + "*[0-9].result.pkl") results = [np.load(f) for f in existing_result_files] folder_path_to_results[folder_path] = results for i_experiment in range(len(self._all_train_strs)): train_str = self._all_train_strs[i_experiment] train_dict = self._load_without_layers(train_str) original_params = train_dict['original_params'] folder_path = all_folder_paths[i_experiment] results = folder_path_to_results[folder_path] experiment_already_ran = False for r in results: if dict_equal(r.parameters, original_params): experiment_already_ran = True log.warn("Already ran id {:d} {:s}".format(i_experiment, str(original_params))) if not experiment_already_ran: clean_all_train_strs.append(train_str) self._all_train_strs = clean_all_train_strs
def mark_duplicate_results(result_folder, tag_dict): res_pool = ResultPool() res_pool.load_results(result_folder) var_params = res_pool.varying_params() unique_var_params = [] duplicate_ids = [] all_result_file_names = res_pool.result_file_names() for i_exp, params in enumerate(var_params): if np.any([dict_equal(params, p) for p in unique_var_params]): log.warn("Duplicate result {:s}".format( all_result_file_names[i_exp])) duplicate_ids.append(i_exp) else: unique_var_params.append(params) # Update parameters for i_exp in duplicate_ids: result_file_name = all_result_file_names[i_exp] result = np.load(result_file_name) result.parameters.update(tag_dict) pickle.dump(result, open(result_file_name, 'w'))