コード例 #1
0
ファイル: simple_example.py プロジェクト: sash-a/es_pytorch
import src.core.es as es
from src.core.noisetable import NoiseTable
from src.core.policy import Policy
from src.gym import gym_runner
from src.gym.training_result import TrainingResult, RewardResult
from src.nn.nn import FeedForward
from src.nn.obstat import ObStat
from src.nn.optimizers import Adam
from src.utils import utils
from src.utils.rankers import CenteredRanker

if __name__ == '__main__':
    comm: MPI.Comm = MPI.COMM_WORLD

    cfg_file = utils.parse_args()
    cfg = utils.load_config(cfg_file)

    env: gym.Env = gym.make(cfg.env.name)

    # seeding; this must be done before creating the neural network so that params are deterministic across processes
    rs, my_seed, global_seed = utils.seed(comm, cfg.general.seed, env)
    all_seeds = comm.alltoall(
        [my_seed] *
        comm.size)  # simply for saving/viewing the seeds used on each proc
    print(f'seeds:{all_seeds}')

    # initializing obstat, policy, optimizer, noise and ranker
    nn = FeedForward(cfg.policy.layer_sizes, torch.nn.Tanh(), env,
                     cfg.policy.ac_std, cfg.policy.ob_clip)
    policy: Policy = Policy(nn, cfg.noise.std,
コード例 #2
0
            obj_weight[idx], policies_best_rewards[idx], time_since_best[
                idx] = nsra(cfg, rew, obj_weight[idx],
                            policies_best_rewards[idx], time_since_best[idx])
        elif cfg.nsr.progressive:
            obj_weight[
                idx] = 1 if gen > cfg.nsr.end_progression_gen else gen / cfg.nsr.end_progression_gen

        # Saving policy if it obtained a better reward or distance
        if (rew > best_rew or dist > best_dist) and comm.rank == 0:
            best_rew = max(rew, best_rew)
            best_dist = max(dist, best_dist)

            # Only need to save the archive, policy is saved by DefaultMpiReportedSet
            archive_path = path.join('saved', full_name, 'archives')
            if not path.exists(archive_path):
                os.makedirs(archive_path)
            np.save(path.join(archive_path, f'{gen}.np'), archive)

        reporter.end_gen()

    mlflow.end_run()  # ending the outer mlflow run


if __name__ == '__main__':
    gym.logger.set_level(40)

    config_file = utils.parse_args()
    config = utils.load_config(config_file)

    main(config)