import src.core.es as es from src.core.noisetable import NoiseTable from src.core.policy import Policy from src.gym import gym_runner from src.gym.training_result import TrainingResult, RewardResult from src.nn.nn import FeedForward from src.nn.obstat import ObStat from src.nn.optimizers import Adam from src.utils import utils from src.utils.rankers import CenteredRanker if __name__ == '__main__': comm: MPI.Comm = MPI.COMM_WORLD cfg_file = utils.parse_args() cfg = utils.load_config(cfg_file) env: gym.Env = gym.make(cfg.env.name) # seeding; this must be done before creating the neural network so that params are deterministic across processes rs, my_seed, global_seed = utils.seed(comm, cfg.general.seed, env) all_seeds = comm.alltoall( [my_seed] * comm.size) # simply for saving/viewing the seeds used on each proc print(f'seeds:{all_seeds}') # initializing obstat, policy, optimizer, noise and ranker nn = FeedForward(cfg.policy.layer_sizes, torch.nn.Tanh(), env, cfg.policy.ac_std, cfg.policy.ob_clip) policy: Policy = Policy(nn, cfg.noise.std,
obj_weight[idx], policies_best_rewards[idx], time_since_best[ idx] = nsra(cfg, rew, obj_weight[idx], policies_best_rewards[idx], time_since_best[idx]) elif cfg.nsr.progressive: obj_weight[ idx] = 1 if gen > cfg.nsr.end_progression_gen else gen / cfg.nsr.end_progression_gen # Saving policy if it obtained a better reward or distance if (rew > best_rew or dist > best_dist) and comm.rank == 0: best_rew = max(rew, best_rew) best_dist = max(dist, best_dist) # Only need to save the archive, policy is saved by DefaultMpiReportedSet archive_path = path.join('saved', full_name, 'archives') if not path.exists(archive_path): os.makedirs(archive_path) np.save(path.join(archive_path, f'{gen}.np'), archive) reporter.end_gen() mlflow.end_run() # ending the outer mlflow run if __name__ == '__main__': gym.logger.set_level(40) config_file = utils.parse_args() config = utils.load_config(config_file) main(config)