Beispiel #1
0
def delete_duplicate_results(result_folder):
    res_pool = ResultPool()
    res_pool.load_results(result_folder)
    
    var_params = res_pool.varying_params()
    
    unique_var_params = []
    duplicate_ids = []
    all_result_file_names = res_pool.result_file_names()
    for i_exp, params in enumerate(var_params):
        if np.any([dict_equal(params, p) for p in unique_var_params]):
            log.warn("Duplicate result {:s}".format(all_result_file_names[i_exp]))
            duplicate_ids.append(i_exp)
        else:
            unique_var_params.append(params)

    # Delete result/experiment/model(outdated, used to exist)/param files    
    for i_exp in duplicate_ids:
        result_file_name = all_result_file_names[i_exp]
        yaml_file_name = result_file_name.replace('.result.pkl', '.yaml')
        model_file_name = result_file_name.replace('.result.pkl', '.pkl')
        model_param_file_name = result_file_name.replace('.result.pkl', '.npy')
        delete_if_exists(result_file_name)
        delete_if_exists(yaml_file_name)
        delete_if_exists(model_file_name)
        delete_if_exists(model_param_file_name)    
Beispiel #2
0
def delete_duplicate_results(result_folder):
    res_pool = ResultPool()
    res_pool.load_results(result_folder)

    var_params = res_pool.varying_params()

    unique_var_params = []
    duplicate_ids = []
    all_result_file_names = res_pool.result_file_names()
    for i_exp, params in enumerate(var_params):
        if np.any([dict_equal(params, p) for p in unique_var_params]):
            log.warn("Duplicate result {:s}".format(
                all_result_file_names[i_exp]))
            duplicate_ids.append(i_exp)
        else:
            unique_var_params.append(params)

    # Delete result/experiment/model(outdated, used to exist)/param files
    for i_exp in duplicate_ids:
        result_file_name = all_result_file_names[i_exp]
        yaml_file_name = result_file_name.replace('.result.pkl', '.yaml')
        model_file_name = result_file_name.replace('.result.pkl', '.pkl')
        model_param_file_name = result_file_name.replace('.result.pkl', '.npy')
        delete_if_exists(result_file_name)
        delete_if_exists(yaml_file_name)
        delete_if_exists(model_file_name)
        delete_if_exists(model_param_file_name)
Beispiel #3
0
def mark_duplicate_results(result_folder, tag_dict):
    res_pool = ResultPool()
    res_pool.load_results(result_folder)
    
    var_params = res_pool.varying_params()
    
    unique_var_params = []
    duplicate_ids = []
    all_result_file_names = res_pool.result_file_names()
    for i_exp, params in enumerate(var_params):
        if np.any([dict_equal(params, p) for p in unique_var_params]):
            log.warn("Duplicate result {:s}".format(all_result_file_names[i_exp]))
            duplicate_ids.append(i_exp)
        else:
            unique_var_params.append(params)

    # Update parameters
    for i_exp in duplicate_ids:
        result_file_name = all_result_file_names[i_exp]
        result = np.load(result_file_name)
        result.parameters.update(tag_dict)
        pickle.dump(result, open(result_file_name, 'w'))
    def _skip_already_done_experiments(self):
        log.info("Check if some experiments were already run...")
        clean_all_train_strs = []
        # Go through all experiments, all result folders
        # and check if experiments already exist

        # First collect all folder paths and load results
        # in order not to load results twice
        # Possible optimization: First get all save paths
        # Then only load results once for respective save paths
        all_folder_paths = []
        for i_experiment in range(len(self._all_train_strs)):
            folder_path = self._create_save_folder_path(i_experiment)
            all_folder_paths.append(folder_path)

        unique_folder_paths = set(all_folder_paths)
        folder_path_to_results = dict()
        for folder_path in unique_folder_paths:
            existing_result_files = glob(folder_path + "*[0-9].result.pkl")
            results = [np.load(f) for f in existing_result_files]
            folder_path_to_results[folder_path] = results

        for i_experiment in range(len(self._all_train_strs)):
            train_str = self._all_train_strs[i_experiment]
            train_dict = self._load_without_layers(train_str)
            original_params = train_dict['original_params']
            folder_path = all_folder_paths[i_experiment]
            results = folder_path_to_results[folder_path]
            experiment_already_ran = False
            for r in results:
                if dict_equal(r.parameters, original_params):
                    experiment_already_ran = True
                    log.warn("Already ran id {:d} {:s}".format(
                        i_experiment, str(original_params)))
            if not experiment_already_ran:
                clean_all_train_strs.append(train_str)

        self._all_train_strs = clean_all_train_strs
 def _skip_already_done_experiments(self):
     log.info("Check if some experiments were already run...")
     clean_all_train_strs = []
     # Go through all experiments, all result folders
     # and check if experiments already exist
     
     # First collect all folder paths and load results
     # in order not to load results twice
     # Possible optimization: First get all save paths
     # Then only load results once for respective save paths
     all_folder_paths = []
     for i_experiment in range(len(self._all_train_strs)):
         folder_path = self._create_save_folder_path(i_experiment)
         all_folder_paths.append(folder_path)
     
     unique_folder_paths = set(all_folder_paths)
     folder_path_to_results = dict()
     for folder_path in unique_folder_paths:
         existing_result_files = glob(folder_path + "*[0-9].result.pkl")
         results = [np.load(f) for f in existing_result_files]
         folder_path_to_results[folder_path] = results
     
     for i_experiment in range(len(self._all_train_strs)):
         train_str = self._all_train_strs[i_experiment]
         train_dict = self._load_without_layers(train_str)
         original_params = train_dict['original_params']
         folder_path = all_folder_paths[i_experiment]
         results = folder_path_to_results[folder_path]
         experiment_already_ran = False
         for r in results:
             if dict_equal(r.parameters, original_params):
                 experiment_already_ran = True
                 log.warn("Already ran id {:d} {:s}".format(i_experiment,
                     str(original_params)))
         if not experiment_already_ran:
             clean_all_train_strs.append(train_str)
 
     self._all_train_strs = clean_all_train_strs
Beispiel #6
0
def mark_duplicate_results(result_folder, tag_dict):
    res_pool = ResultPool()
    res_pool.load_results(result_folder)

    var_params = res_pool.varying_params()

    unique_var_params = []
    duplicate_ids = []
    all_result_file_names = res_pool.result_file_names()
    for i_exp, params in enumerate(var_params):
        if np.any([dict_equal(params, p) for p in unique_var_params]):
            log.warn("Duplicate result {:s}".format(
                all_result_file_names[i_exp]))
            duplicate_ids.append(i_exp)
        else:
            unique_var_params.append(params)

    # Update parameters
    for i_exp in duplicate_ids:
        result_file_name = all_result_file_names[i_exp]
        result = np.load(result_file_name)
        result.parameters.update(tag_dict)
        pickle.dump(result, open(result_file_name, 'w'))