Example #1
0
def main(args):
    if args.resume:
        conf_file = os.path.join(args.debugging_folder, 'args.json')
        assert os.path.exists(
            conf_file
        ), "Could not find an args.json file in the debugging folder"
        for k, v in logger_utils.load_args(args.debugging_folder).items():
            setattr(args, k, v)

    logger.debug('Configuration: {}'.format(args))
    logger_utils.save_args(args, args.debugging_folder)
    if 'gpu' in args.device:
        agent_gpu = str(misc_utils.pick_gpu_lowest_memory())
        os.environ["CUDA_VISIBLE_DEVICES"] = agent_gpu
        logger.debug('Agent will be run on device /gpu:{}'.format(agent_gpu))

    args.random_seed = 3  # random_seed
    env_creator = environment_creator.EnvironmentCreator(args)
    args.num_actions = env_creator.num_actions
    args.state_shape = env_creator.state_shape

    import numpy as np
    # Create a set of arrays (as many as emulators) to exchange states, rewards, etc. between the agent and the emulator
    n_emulators = args.n_emulator_runners * args.n_emulators_per_emulator_runner
    variables = {
        "s": np.zeros((n_emulators, ) + args.state_shape, dtype=np.float32),
        "a": np.zeros((n_emulators), dtype=np.int32),  # Actions
        "r": np.zeros((n_emulators), dtype=np.float32),  # Rewards
        "done": np.zeros((n_emulators), dtype=np.bool)
    }  # Dones
    sim_coordinator = SimulatorsCoordinator(
        env_creator, args.n_emulators_per_emulator_runner,
        args.n_emulator_runners, variables)
    # Start all simulator processes
    sim_coordinator.start()

    network = QNetwork

    def network_creator(name='value_learning', learning_network=None):
        nonlocal args
        args.name = name
        return network(args, learning_network=learning_network)

    learner = PDQFDLearner(network_creator, env_creator, args, sim_coordinator)

    setup_kill_signal_handler(learner)

    logger.info('Starting training')
    learner.train()
    logger.info('Finished training')
Example #2
0
                        type=int,
                        help="Number of possible repetitions",
                        dest="nb_choices")
    parser.add_argument('--checkpoint_interval',
                        default=1000000,
                        type=int,
                        help="Interval of steps btw checkpoints",
                        dest="checkpoint_interval")
    parser.add_argument('--activation',
                        default='relu',
                        type=str,
                        help="activation function for the network",
                        dest="activation")
    parser.add_argument('--alpha_leaky_relu',
                        default=0.1,
                        type=float,
                        help="coef for leaky relu",
                        dest="alpha_leaky_relu")

    return parser


if __name__ == '__main__':
    args = get_arg_parser().parse_args()

    import logger_utils
    logger_utils.save_args(args, args.debugging_folder)
    logging.debug(args)

    main(args)