Ejemplo n.º 1
0
    "Server.log_every_n": 10,
    "PoloWrappedReplayBuffer.batch_size": 32,
    "PoloOutOfGraphReplayBuffer.solved_unsolved_ratio": 0.5,
    "training_steps": 200000,
    "train_every_num_steps": 10,
    "game_buffer_size": 10,
    "run_eval_worker": False,
}

params_grid = {
    "get_env_creator.N": [40],  # Here set problem size
    "MCTS.episode_max_steps": [
        51,
    ],  # should be no less than N for ChainEnvironment
    "EnsembleValueAccumulatorMeanStdMaxUCB.kappa_fn": [
        ConstKappa(0),
    ],
    "ValueBase.model_name": [
        "linear_multi_head",
    ],
}

experiments_list = create_experiments_helper(
    experiment_name='Ensemble ChainEnv',
    python_path='.:./deps/gym-sokoban:./deps/ourlib:'
    './deps/baselines:./deps/dopamine:./deps/gym-sokoban-fast:./deps/chainenv:',
    paths_to_dump='',
    exclude=[],
    base_config=base_config,
    params_grid=params_grid)
    "MCTS.num_sampling_moves": 0,
    "MCTS.value_annealing": 1.0,
    "MCTS.avoid_loops": True,
    "MCTS.gamma": 0.99,
    "MCTS.node_value_mode": "bootstrap",
    "MCTS.episode_max_steps": 50,
    "Server.min_replay_history": 1000,
    "PoloWrappedReplayBuffer.batch_size": 32,
    "PoloOutOfGraphReplayBuffer.solved_unsolved_ratio": 0.5,
    "curriculum": False,
    "training_steps": 500000,
    "train_every_num_steps": 100,
    "game_buffer_size": 25,
    "log_every_n": 50,
    "run_eval_worker": False,
    "Server.save_checkpoint_every_train_steps": 500,
}

params_grid = {
    "MCTS.avoid_loops": [True, False],
    "ValueBase.model_name": ["convnet_mnist", "kc_parametrized_cnn_v0_2"]
}

experiments_list = create_experiments_helper(
    experiment_name='Mcts sanity experiment',
    python_path='.:./deps/gym-sokoban:./deps/ourlib:'
    './deps/baselines:./deps/dopamine:./deps/gym-sokoban-fast',
    paths_to_dump='',
    base_config=base_config,
    params_grid=params_grid)
Ejemplo n.º 3
0
    ],
    "KC_MCTS.num_ensembles_per_game": [
        1,
    ],
    "ValueBase.learning_rate_fn": [
        0.00025,
    ],
    "EnsembleValueAccumulatorMeanStdMaxUCB.ucb_coeff": [
        0.0,
    ],
    "EnsembleValueAccumulatorMeanStdMaxUCB.exploration_target": [
        False,
    ],
    "MCTS.num_mcts_passes": [
        10,
    ],
    "ValueBase.model_name": [
        "multiple_mlps",
    ],
    "ValueEnsemble2.prior_scale": [None],
}

experiments_list = create_experiments_helper(
    experiment_name='Sokoban single-board',
    python_path='.:./deps/gym-sokoban:./deps/ourlib:'
    './deps/baselines:./deps/dopamine:./deps/gym-sokoban-fast:./deps/chainenv:./deps/toy-mr:',
    paths_to_dump='',
    callbacks=(),
    base_config=base_config,
    params_grid=params_grid)
               "EnsembleValueTraits.dead_end_value": -2.0,
               "MCTSWithVotingTwoModels.num_mcts_passes": 10,
               "MCTSWithVotingTwoModels.num_sampling_moves": 0,
               "MCTSWithVotingTwoModels.avoid_loops": True,
               "MCTSWithVotingTwoModels.gamma": 0.99,
               "MCTSWithVotingTwoModels.episode_max_steps": 200,
               "MCTSWithVotingTwoModels.avoid_history_coeff": -2.,

               "Server.min_replay_history": 1000,
               "PoloOutOfGraphReplayBuffer.solved_unsolved_ratio": 0.5,
               "training_steps": 500000,
               "train_every_num_steps": 100,
               "game_buffer_size": 25,
               "run_eval_worker": False,
               "Server.log_every_n": 50,
               "MCTSWithVotingTwoModels.node_value_mode": "bootstrap",
               }


params_grid = {
    "use_perfect_env.value": [False,],
    "SimulatedSokobanEnvModel.model_path": ["checkpoints/epoch.0003.hdf5"],
    "PoloWrappedReplayBuffer.batch_size": [96,],
}

experiments_list = create_experiments_helper(experiment_name='sokoban with learned model',
                                             python_path='.:./deps/gym-sokoban:./deps/ourlib:'
                                                         './deps/baselines:./deps/dopamine:./deps/gym-sokoban-fast:./deps/chainenv:./polo_plus/kc:./deps/toy-mr:',
                                             paths_to_dump='',
                                             base_config=base_config, params_grid=params_grid)