def main(args): if args.resume: conf_file = os.path.join(args.debugging_folder, 'args.json') assert os.path.exists( conf_file ), "Could not find an args.json file in the debugging folder" for k, v in logger_utils.load_args(args.debugging_folder).items(): setattr(args, k, v) logger.debug('Configuration: {}'.format(args)) logger_utils.save_args(args, args.debugging_folder) if 'gpu' in args.device: agent_gpu = str(misc_utils.pick_gpu_lowest_memory()) os.environ["CUDA_VISIBLE_DEVICES"] = agent_gpu logger.debug('Agent will be run on device /gpu:{}'.format(agent_gpu)) args.random_seed = 3 # random_seed env_creator = environment_creator.EnvironmentCreator(args) args.num_actions = env_creator.num_actions args.state_shape = env_creator.state_shape import numpy as np # Create a set of arrays (as many as emulators) to exchange states, rewards, etc. between the agent and the emulator n_emulators = args.n_emulator_runners * args.n_emulators_per_emulator_runner variables = { "s": np.zeros((n_emulators, ) + args.state_shape, dtype=np.float32), "a": np.zeros((n_emulators), dtype=np.int32), # Actions "r": np.zeros((n_emulators), dtype=np.float32), # Rewards "done": np.zeros((n_emulators), dtype=np.bool) } # Dones sim_coordinator = SimulatorsCoordinator( env_creator, args.n_emulators_per_emulator_runner, args.n_emulator_runners, variables) # Start all simulator processes sim_coordinator.start() network = QNetwork def network_creator(name='value_learning', learning_network=None): nonlocal args args.name = name return network(args, learning_network=learning_network) learner = PDQFDLearner(network_creator, env_creator, args, sim_coordinator) setup_kill_signal_handler(learner) logger.info('Starting training') learner.train() logger.info('Finished training')
return memory if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-f', '--folder', type=str, help="Folder where to save the debugging information.", dest="folder", required=True) parser.add_argument('-tc', '--test_count', default='1', type=int, help="The amount of tests to run on the given network", dest="test_count") parser.add_argument('-np', '--noops', default=30, type=int, help="Maximum amount of no-ops to use", dest="noops") parser.add_argument('-gn', '--gif_name', default=None, type=str, help="If provided, a gif will be produced and stored with this name", dest="gif_name") parser.add_argument('-gf', '--gif_folder', default='', type=str, help="The folder where to save gifs.", dest="gif_folder") parser.add_argument('-d', '--device', default='/gpu:0', type=str, help="Device to be used ('/cpu:0', '/gpu:0', '/gpu:1',...)", dest="device") args = parser.parse_args() arg_file = os.path.join(args.folder, 'args.json') device = args.device for k, v in logger_utils.load_args(arg_file).items(): setattr(args, k, v) args.max_global_steps = 0 df = args.folder args.debugging_folder = '/tmp/logs' args.device = device args.random_start = False args.single_life_episodes = False if args.gif_name: args.visualize = 1 args.actor_id = 0 rng = np.random.RandomState(int(time.time())) args.random_seed = rng.randint(1000)