Ejemplo n.º 1
0
def main(args):
    if args.resume:
        conf_file = os.path.join(args.debugging_folder, 'args.json')
        assert os.path.exists(
            conf_file
        ), "Could not find an args.json file in the debugging folder"
        for k, v in logger_utils.load_args(args.debugging_folder).items():
            setattr(args, k, v)

    logger.debug('Configuration: {}'.format(args))
    logger_utils.save_args(args, args.debugging_folder)
    if 'gpu' in args.device:
        agent_gpu = str(misc_utils.pick_gpu_lowest_memory())
        os.environ["CUDA_VISIBLE_DEVICES"] = agent_gpu
        logger.debug('Agent will be run on device /gpu:{}'.format(agent_gpu))

    args.random_seed = 3  # random_seed
    env_creator = environment_creator.EnvironmentCreator(args)
    args.num_actions = env_creator.num_actions
    args.state_shape = env_creator.state_shape

    import numpy as np
    # Create a set of arrays (as many as emulators) to exchange states, rewards, etc. between the agent and the emulator
    n_emulators = args.n_emulator_runners * args.n_emulators_per_emulator_runner
    variables = {
        "s": np.zeros((n_emulators, ) + args.state_shape, dtype=np.float32),
        "a": np.zeros((n_emulators), dtype=np.int32),  # Actions
        "r": np.zeros((n_emulators), dtype=np.float32),  # Rewards
        "done": np.zeros((n_emulators), dtype=np.bool)
    }  # Dones
    sim_coordinator = SimulatorsCoordinator(
        env_creator, args.n_emulators_per_emulator_runner,
        args.n_emulator_runners, variables)
    # Start all simulator processes
    sim_coordinator.start()

    network = QNetwork

    def network_creator(name='value_learning', learning_network=None):
        nonlocal args
        args.name = name
        return network(args, learning_network=learning_network)

    learner = PDQFDLearner(network_creator, env_creator, args, sim_coordinator)

    setup_kill_signal_handler(learner)

    logger.info('Starting training')
    learner.train()
    logger.info('Finished training')
Ejemplo n.º 2
0
    return memory


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-f', '--folder', type=str, help="Folder where to save the debugging information.", dest="folder", required=True)
    parser.add_argument('-tc', '--test_count', default='1', type=int, help="The amount of tests to run on the given network", dest="test_count")
    parser.add_argument('-np', '--noops', default=30, type=int, help="Maximum amount of no-ops to use", dest="noops")
    parser.add_argument('-gn', '--gif_name', default=None, type=str, help="If provided, a gif will be produced and stored with this name", dest="gif_name")
    parser.add_argument('-gf', '--gif_folder', default='', type=str, help="The folder where to save gifs.", dest="gif_folder")
    parser.add_argument('-d', '--device', default='/gpu:0', type=str, help="Device to be used ('/cpu:0', '/gpu:0', '/gpu:1',...)", dest="device")

    args = parser.parse_args()
    arg_file = os.path.join(args.folder, 'args.json')
    device = args.device
    for k, v in logger_utils.load_args(arg_file).items():
        setattr(args, k, v)
    args.max_global_steps = 0
    df = args.folder
    args.debugging_folder = '/tmp/logs'
    args.device = device

    args.random_start = False
    args.single_life_episodes = False
    if args.gif_name:
        args.visualize = 1

    args.actor_id = 0
    rng = np.random.RandomState(int(time.time()))
    args.random_seed = rng.randint(1000)