Python DirectoryTree.get_git_hashes Exemples

Langage de programmation: Python

Espace de nommage/Pack: alfred.utils.directory_tree

Class/Type: DirectoryTree

Méthode/Fonction: get_git_hashes

Exemples au hotexamples.com: 2

Python DirectoryTree.get_git_hashes - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de alfred.utils.directory_tree.DirectoryTree.get_git_hashes extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

DirectoryTree(4)

extract_info_from_storage_name(4)

get_all_seeds(4)

get_all_experiments(3)

create_directories(2)

get_git_hashes(2)

init_from_branching_info(1)

Méthodes fréquemment utilisées

DirectoryTree (4)

extract_info_from_storage_name (4)

get_all_seeds (4)

get_all_experiments (3)

create_directories (2)

get_git_hashes (2)

init_from_branching_info (1)

Exemple #1

0

Afficher le fichier

Fichier : prepare_schedule.py Projet : julienroyd/alfred

def prepare_schedule(desc, schedule_file, root_dir, add_to_folder, resample, logger, ask_for_validation): # Infers the search_type (grid or random) from provided schedule_file schedule_file_path = Path(schedule_file) assert schedule_file_path.suffix == '.py', f"The provided --schedule_file should be a python file " \ f"(see: alfred/schedule_examples). You provided " \ f"'--schedule_file={schedule_file}'" if "grid_schedule" in schedule_file_path.name: search_type = 'grid' elif "random_schedule" in schedule_file_path.name: search_type = 'random' else: raise ValueError(f"Provided --schedule_file has the name '{schedule_file_path.name}'. " "Only grid_schedule's and random_schedule's are supported. " "The name of the provided '--schedule_file' must fit one of the following forms: " "'grid_schedule_NAME.py' or 'random_schedule_NAME.py'.") if not schedule_file_path.exists(): raise ValueError(f"Cannot find the provided '--schedule_file': {schedule_file_path}") # Gets experiments parameters schedule_module = re.sub('\.py$', '', ".".join(schedule_file.split('/'))) if search_type == 'grid': VARIATIONS, ALG_NAMES, TASK_NAMES, SEEDS, experiments, varied_params, get_run_args, schedule = extract_schedule_grid(schedule_module) elif search_type == 'random': param_samples, ALG_NAMES, TASK_NAMES, SEEDS, experiments, varied_params, get_run_args, schedule = extract_schedule_random(schedule_module) else: raise NotImplementedError # Creates a list of alg_agent and task_name unique combinations if desc is not None: assert add_to_folder is None, "If --desc is defined, a new storage_dir folder will be created." \ "No --add_to_folder should be provided." desc = f"{search_type}_{desc}" agent_task_combinations = list(itertools.product(ALG_NAMES, TASK_NAMES)) mode = "NEW_STORAGE" elif add_to_folder is not None: assert (get_root(root_dir) / add_to_folder).exists(), f"{add_to_folder} does not exist." assert desc is None, "If --add_to_folder is defined, new experiments will be added to the existing folder." \ "No --desc should be provided." storage_name_id, git_hashes, alg_name, task_name, desc = \ DirectoryTree.extract_info_from_storage_name(add_to_folder) agent_task_combinations = list(itertools.product([alg_name], [task_name])) mode = "EXISTING_STORAGE" else: raise NotImplementedError # Duplicates or resamples hyperparameters to match the number of agent_task_combinations n_combinations = len(agent_task_combinations) experiments = [experiments] if search_type == 'random': param_samples = [param_samples] if search_type == 'random' and resample: assert not add_to_folder for i in range(n_combinations - 1): param_sa, _, _, _, expe, varied_pa, get_run_args, _ = extract_schedule_random(schedule_module) experiments.append(expe) param_samples.append(param_sa) else: experiments = experiments * n_combinations if search_type == 'random': param_samples = param_samples * n_combinations # Printing summary of schedule_xyz.py info_str = f"\n\nPreparing a {search_type.upper()} search over {len(experiments)} experiments, {len(SEEDS)} seeds" info_str += f"\nALG_NAMES: {ALG_NAMES}" info_str += f"\nTASK_NAMES: {TASK_NAMES}" info_str += f"\nSEEDS: {SEEDS}" if search_type == "grid": info_str += f"\n\nVARIATIONS:" for key in VARIATIONS.keys(): info_str += f"\n\t{key}: {VARIATIONS[key]}" else: info_str += f"\n\nParams to be varied over: {varied_params}" info_str += f"\n\nDefault {config_to_str(get_run_args(overwritten_cmd_line=''))}\n" logger.debug(info_str) # Asking for user validation if ask_for_validation: if mode == "NEW_STORAGE": git_hashes = DirectoryTree.get_git_hashes() string = "\n" for alg_name, task_name in agent_task_combinations: string += f"\n\tID_{git_hashes}_{alg_name}_{task_name}_{desc}" logger.debug(f"\n\nAbout to create {len(agent_task_combinations)} storage directories, " f"each with {len(experiments)} experiments:" f"{string}") else: n_existing_experiments = len([path for path in get_root(root_dir) / add_to_folder.iterdir() if path.name.startswith('experiment')]) logger.debug(f"\n\nAbout to add {len(experiments)} experiment folders in the following directory" f" (there are currently {n_existing_experiments} in this folder):" f"\n\t{add_to_folder}") answer = input("\nShould we proceed? [y or n]") if answer.lower() not in ['y', 'yes']: logger.debug("Aborting...") sys.exit() logger.debug("Starting...") # For each storage_dir to be created all_storage_dirs = [] for alg_task_i, (alg_name, task_name) in enumerate(agent_task_combinations): # Determines storing ID (if new storage_dir) if mode == "NEW_STORAGE": tmp_dir_tree = DirectoryTree(alg_name=alg_name, task_name=task_name, desc=desc, seed=1, root=root_dir) storage_name_id = tmp_dir_tree.storage_dir.name.split('_')[0] # For each experiments... for param_dict in experiments[alg_task_i]: # Creates dictionary pointer-access to a training config object initialized by default config = get_run_args(overwritten_cmd_line="") config_dict = vars(config) # Modifies the config for this particular experiment config.alg_name = alg_name config.task_name = task_name config.desc = desc config_unique_dict = {k: v for k, v in param_dict.items() if k in varied_params} config_unique_dict['alg_name'] = config.alg_name config_unique_dict['task_name'] = config.task_name config_unique_dict['seed'] = config.seed for param_name in param_dict.keys(): if param_name not in config_dict.keys(): raise ValueError(f"'{param_name}' taken from the schedule is not a valid hyperparameter " f"i.e. it cannot be found in the Namespace returned by get_run_args().") else: config_dict[param_name] = param_dict[param_name] # Create the experiment directory dir_tree = create_experiment_dir(storage_name_id, config, config_unique_dict, SEEDS, root_dir, git_hashes) all_storage_dirs.append(dir_tree.storage_dir) # Saves VARIATIONS in the storage directory first_experiment_created = int(dir_tree.current_experiment.strip('experiment')) - len(experiments[0]) + 1 last_experiment_created = first_experiment_created + len(experiments[0]) - 1 if search_type == 'grid': VARIATIONS['alg_name'] = ALG_NAMES VARIATIONS['task_name'] = TASK_NAMES VARIATIONS['seed'] = SEEDS key = f'{first_experiment_created}-{last_experiment_created}' if (dir_tree.storage_dir / 'variations.json').exists(): variations_dict = load_dict_from_json(filename=str(dir_tree.storage_dir / 'variations.json')) assert key not in variations_dict.keys() variations_dict[key] = VARIATIONS else: variations_dict = {key: VARIATIONS} save_dict_to_json(variations_dict, filename=str(dir_tree.storage_dir / 'variations.json')) open(str(dir_tree.storage_dir / 'GRID_SEARCH'), 'w+').close() elif search_type == 'random': len_samples = len(param_samples[alg_task_i]) fig_width = 2 * len_samples if len_samples > 0 else 2 fig, ax = plt.subplots(len(param_samples[alg_task_i]), 1, figsize=(6, fig_width)) if not hasattr(ax, '__iter__'): ax = [ax] plot_sampled_hyperparams(ax, param_samples[alg_task_i], log_params=['lr', 'tau', 'initial_alpha', 'grad_clip_value', 'lamda1', 'lamda2']) j = 1 while True: if (dir_tree.storage_dir / f'variations{j}.png').exists(): j += 1 else: break fig.savefig(str(dir_tree.storage_dir / f'variations{j}.png')) plt.close(fig) open(str(dir_tree.storage_dir / 'RANDOM_SEARCH'), 'w+').close() # Printing summary logger.info(f'Created directories ' f'{str(dir_tree.storage_dir)}/experiment{first_experiment_created}-{last_experiment_created}') # Saving the list of created storage_dirs in a text file located with the provided schedule_file schedule_name = Path(schedule.__file__).parent.stem with open(Path(schedule.__file__).parent / f"list_searches_{schedule_name}.txt", "a+") as f: for storage_dir in all_storage_dirs: f.write(f"{storage_dir.name}\n") logger.info(f"\nEach of these experiments contain directories for the following seeds: {SEEDS}")

Exemple #2

0

Afficher le fichier

def create_retrain_best(from_file, storage_name, best_experiments_mapping, n_retrain_seeds, train_time_factor, root_dir): logger = create_logger(name="CREATE_RETRAIN", loglevel=logging.INFO) logger.info("\nCREATING retrainBest directories") # Select storage_dirs to run over storage_dirs = select_storage_dirs(from_file, storage_name, root_dir) # Sanity-check that storages exist storage_dirs = [ storage_dir for storage_dir in storage_dirs if sanity_check_exists(storage_dir, logger) ] # Imports schedule file to have same settings for DirectoryTree.git_repos_to_track if from_file: schedule_file = str([ path for path in Path(from_file).parent.iterdir() if 'schedule' in path.name and path.name.endswith('.py') ][0]) schedule_module = ".".join(schedule_file.split('/')).strip('.py') schedule = import_module(schedule_module) # Creates retrainBest directories retrainBest_storage_dirs = [] new_retrainBest_storage_dirs = [] for storage_dir in storage_dirs: try: # Checks if a retrainBest directory already exists for this search search_storage_id = storage_dir.name.split('_')[0] corresponding_retrain_directories = [ path for path in get_root(root_dir).iterdir() if f"retrainBest{search_storage_id}" in path.name.split('_') ] if len(corresponding_retrain_directories) > 0: assert len(corresponding_retrain_directories) == 1 retrainBest_dir = corresponding_retrain_directories[0] logger.info(f"Existing retrainBest\n\n" f"\t{storage_dir.name} -> {retrainBest_dir.name}") retrainBest_storage_dirs.append(retrainBest_dir) continue else: # The retrainBest directory will contain one experiment with bestConfig from the search... if best_experiments_mapping is None: # ... bestConfig is found in the summary/ folder from the search best_config = [ path for path in (storage_dir / "summary").iterdir() if path.name.startswith("bestConfig") ][0] assert len(best_config) == 1 and type(best_config) is list else: # ... bestConfig is loaded based on specified --best_experiment_mapping best_experiments_mapping_dict = load_dict_from_json( best_experiments_mapping) assert storage_dir.name in best_experiments_mapping_dict.keys( ) best_experiment_num = best_experiments_mapping_dict[ storage_dir.name] seed_dir = DirectoryTree.get_all_seeds( experiment_dir=storage_dir / f"experiment{best_experiment_num}")[0] best_config = seed_dir / "config.json" config_dict = load_dict_from_json(filename=str(best_config)) # Retrain experiments run for twice as long if config_dict['max_episodes'] is not None: config_dict['max_episodes'] = int( config_dict['max_episodes'] * train_time_factor) elif config_dict['max_steps'] is not None: config_dict['max_steps'] = int(config_dict['max_steps'] * train_time_factor) else: raise ValueError( "At least one of max_episodes or max_steps should be defined" ) # Updates the description if "random" in config_dict['desc'] or "grid" in config_dict[ 'desc']: new_desc = config_dict['desc'] \ .replace("random", f"retrainBest{search_storage_id}") \ .replace("grid", f"retrainBest{search_storage_id}") else: new_desc = config_dict[ 'desc'] + f"_retrainBest{search_storage_id}" config_dict['desc'] = new_desc # Creates config Namespace with loaded config_dict config = argparse.ArgumentParser().parse_args("") config_pointer = vars(config) config_pointer.update(config_dict) # updates config config_unique_dict = {} config_unique_dict['alg_name'] = config.alg_name config_unique_dict['task_name'] = config.task_name config_unique_dict['seed'] = config.seed # Gets new storage_name_id tmp_dir_tree = DirectoryTree(alg_name="", task_name="", desc="", seed=1, root=root_dir) retrain_storage_id = tmp_dir_tree.storage_dir.name.split( '_')[0] # Creates the new storage_dir for retrainBest dir_tree = create_experiment_dir( storage_name_id=retrain_storage_id, config=config, config_unique_dict=config_unique_dict, SEEDS=[i * 10 for i in range(n_retrain_seeds)], root_dir=root_dir, git_hashes=DirectoryTree.get_git_hashes()) retrainBest_storage_dirs.append(dir_tree.storage_dir) new_retrainBest_storage_dirs.append(dir_tree.storage_dir) logger.info( f"New retrainBest:\n\n" f"\t{storage_dir.name} -> {dir_tree.storage_dir.name}") except Exception as e: logger.info( f"Could not create retrainBest-storage_dir {storage_dir}") logger.info(f"\n\n{e}\n{traceback.format_exc()}") # Saving the list of created storage_dirs in a text file located with the provided schedule_file schedule_name = Path(from_file).parent.stem with open( Path(from_file).parent / f"list_retrains_{schedule_name}.txt", "a+") as f: for storage_dir in new_retrainBest_storage_dirs: f.write(f"{storage_dir.name}\n") return retrainBest_storage_dirs