def upload_file(storage_client: Union[Minio, None], object_key, local_source_path, bucket_name=BUCKET_NAME): if storage_client is not None: if bucket_name is None: raise ValueError( "Must specify a bucket name if using cloud storage") etag = storage_client.fput_object(bucket_name=bucket_name, object_name=object_key, file_path=local_source_path) logger.info("uploaded {}".format(object_key)) local_dest_path = get_default_path_on_disk_for_minio_key( object_name=object_key) if local_source_path == local_dest_path: if storage_client is None: return local_dest_path else: ensure_dir(local_dest_path) copyfile(src=local_source_path, dst=local_dest_path) logger.info(f"copied {local_source_path} to {local_dest_path}") if storage_client is None: return local_dest_path else: return etag
def submit_new_policy(self, policy_file_key, policy_weights, steps_trained, extra_data: dict = None): self.add_to_cache(policy_key=policy_file_key, weights=policy_weights) # save policy weights to disk and cloud disk_path = get_default_path_on_disk_for_minio_key( object_name=policy_file_key) ensure_dir(disk_path) with open(disk_path, "wb") as dill_file: dump(obj=policy_weights, file=dill_file) upload_file(self.storage_client, bucket_name=self.bucket_name, object_key=policy_file_key, local_source_path=disk_path) if policy_file_key not in self.catalog: ranked_games_played = 0 skill_ranking = Rating() policy_mixture = None data = { STEPS_TRAINED: steps_trained, TIME_CREATED: time.time() - self.start_time } if extra_data is not None: data.update(extra_data) self.catalog[policy_file_key] = (skill_ranking, data, ranked_games_played, policy_mixture)
def _add_record_entry(self): if self.record_file_path: if not os.path.exists(self.record_file_path): ensure_dir(self.record_file_path) header_needs_write = True else: header_needs_write = False with open(self.record_file_path, 'a') as csv_file: writer = csv.DictWriter(f=csv_file, fieldnames=RECORD_CSV_FIELDS + self.extra_data_keys) if header_needs_write: writer.writeheader() wall_time = time.time() - self.start_time for policy_key, vals in self.catalog.items(): skill_ranking, data, games_ranked_on, policy_mixture = vals row = { WALL_TIME_SINCE_START: wall_time, POLICY_KEY: policy_key, SKILL_RANKING: skill_ranking.mu, STEPS_TRAINED: data[STEPS_TRAINED], TIME_CREATED: data[TIME_CREATED], GAMES_RANKED_ON: games_ranked_on, IS_POLICY_MIXTURE: policy_mixture is not None } row.update({k: data[k] for k in self.extra_data_keys}) writer.writerow(row)
def save_model_weights(self, save_file_path: str, remove_scope_prefix: str = None): weights = self.get_model_weights( remove_scope_prefix=remove_scope_prefix) ensure_dir(save_file_path) with open(save_file_path, "wb") as dill_file: dump(obj=weights, file=dill_file)
def save_weights(trainer, weights_dict, dir_name, name, iter_num): # experiment_save_dir = /home/ubuntu/ray_results/leduc_poker_pipe_1_workers_learner_leduc_poker_sac_arch1_pipeline_psro_Area-51-16_pid_27214_04.08.09PM_Nov-25-2020 checkpoints_dir = os.path.join(experiment_save_dir, dir_name) # New dir for this checkpoint_name = f"policy_{trainer.claimed_policy_num}_{datetime_str()}_{name}_{iter_num}.dill" checkpoint_save_path = os.path.join(checkpoints_dir, checkpoint_name) ensure_dir(checkpoint_save_path) with open(checkpoint_save_path, "wb") as dill_file: dump(obj=weights_dict, file=dill_file) return checkpoint_save_path
def post_key(storage_client: Union[Minio, None], key, bucket_name=BUCKET_NAME, bulletin_prefix="bulletin"): new_key = f"{bulletin_prefix}/{key.replace('/','_')}.txt" post_local_path = get_default_path_on_disk_for_minio_key( object_name=new_key) ensure_dir(post_local_path) with open(post_local_path, "+w") as file: file.write(key) return upload_file(storage_client=storage_client, object_key=new_key, local_source_path=post_local_path, bucket_name=bucket_name)
def copy_object(storage_client: Union[Minio, None], source_object_name, dest_object_name, bucket_name=BUCKET_NAME): if storage_client is None: src_path = get_default_path_on_disk_for_minio_key( object_name=source_object_name) dest_path = get_default_path_on_disk_for_minio_key( object_name=dest_object_name) ensure_dir(dest_path) return copyfile(src=src_path, dst=dest_path) return storage_client.copy_object( bucket_name=bucket_name, object_name=dest_object_name, object_source=f"/{bucket_name}/{source_object_name}")
def maybe_download_object(storage_client: Union[Minio, None], object_name, bucket_name=BUCKET_NAME, local_directory=DEFAULT_LOCAL_SAVE_PATH, force_download=False): save_file_path = os.path.join(local_directory, object_name) ensure_dir(save_file_path) file_lock = FileLock(lock_file=f"{save_file_path}.lock", timeout=60 * 5) file_lock.acquire() try: # return if the file is already downloaded if os.path.exists(save_file_path) and not force_download: logger.debug("{} already exists".format(save_file_path)) return save_file_path, None if storage_client is None: default_path = get_default_path_on_disk_for_minio_key( object_name=object_name) if not os.path.exists(default_path): raise ValueError( f"Using Local Storage and {default_path} doesn't exist") if save_file_path == default_path: return save_file_path, None else: copyfile(src=default_path, dst=save_file_path) if bucket_name is None: raise ValueError( "Must specify a bucket name if using cloud storage") # ensure the bucket exists if not storage_client.bucket_exists(bucket_name=bucket_name): raise ValueError("Bucket {} doesn't exist.".join(bucket_name)) # download the object to the file path logger.info("downloading {}".format(object_name)) object_stat_info = storage_client.fget_object(bucket_name=bucket_name, object_name=object_name, file_path=save_file_path) finally: file_lock.release() return save_file_path, object_stat_info
def move_object(storage_client: Union[Minio, None], source_object_name, dest_object_name, bucket_name=BUCKET_NAME): if storage_client is None: src_path = get_default_path_on_disk_for_minio_key( object_name=source_object_name) dest_path = get_default_path_on_disk_for_minio_key( object_name=dest_object_name) ensure_dir(dest_path) return move(src=src_path, dst=dest_path) if bucket_name is None: raise ValueError("Must specify a bucket name if using cloud storage") assert copy_object(storage_client=storage_client, source_object_name=source_object_name, dest_object_name=dest_object_name, bucket_name=bucket_name) storage_client.remove_object(bucket_name=bucket_name, object_name=source_object_name)
def upload_directory(storage_client: Union[Minio, None], dest_object_key_prefix, local_source_dir, bucket_name=BUCKET_NAME): files = glob.glob(local_source_dir + "/**/*", recursive=True) results = [] if storage_client is None: local_dest_prefix_path = get_default_path_on_disk_for_minio_key( object_name=dest_object_key_prefix) if local_source_dir == local_dest_prefix_path: return [None] if storage_client is not None and bucket_name is None: raise ValueError("Must specify a bucket name if using cloud storage") for local_file_path in files: if os.path.isfile(local_file_path): key = os.path.relpath(path=local_file_path, start=local_source_dir) object_name = os.path.join(dest_object_key_prefix, key) if storage_client is None: dest_file_path = get_default_path_on_disk_for_minio_key( object_name=object_name) ensure_dir(dest_file_path) copyfile(src=local_file_path, dst=dest_file_path) logger.info(f"copied {local_file_path} to {dest_file_path}") results.append(dest_file_path) else: etag = storage_client.fput_object(bucket_name=bucket_name, object_name=object_name, file_path=local_file_path) logger.info(f"uploaded {object_name}") results.append(etag) return results
"callbacks_after_trainer_init": [ init_static_policy_distribution_after_trainer_init_callback, ], "callbacks": { "on_train_result": stop_and_submit_if_not_improving_on_train_result_callback, 'on_episode_start': sample_new_static_policy_weights_for_each_worker_on_episode_start, }, }) # save running script to file current_code_file_path = os.path.abspath(__file__) copy_code_to_path = os.path.join(experiment_save_dir, "launch_script.py") ensure_dir(copy_code_to_path) shutil.copy2(src=current_code_file_path, dst=copy_code_to_path, follow_symlinks=True) def trial_name_creator(trial): config = trial.config return "sac_learner" analysis = tune.run( local_dir=TUNE_SAVE_DIR, name=full_experiment_name, # upload_dir=base_experiment_name, # sync_to_cloud=get_tune_sync_to_cloud_fn(storage_client=storage_client, bucket_name=BUCKET_NAME), checkpoint_at_end=False, keep_checkpoints_num=0,