def main(args): if args.resume: conf_file = os.path.join(args.debugging_folder, 'args.json') assert os.path.exists( conf_file ), "Could not find an args.json file in the debugging folder" for k, v in logger_utils.load_args(args.debugging_folder).items(): setattr(args, k, v) logger.debug('Configuration: {}'.format(args)) logger_utils.save_args(args, args.debugging_folder) if 'gpu' in args.device: agent_gpu = str(misc_utils.pick_gpu_lowest_memory()) os.environ["CUDA_VISIBLE_DEVICES"] = agent_gpu logger.debug('Agent will be run on device /gpu:{}'.format(agent_gpu)) args.random_seed = 3 # random_seed env_creator = environment_creator.EnvironmentCreator(args) args.num_actions = env_creator.num_actions args.state_shape = env_creator.state_shape import numpy as np # Create a set of arrays (as many as emulators) to exchange states, rewards, etc. between the agent and the emulator n_emulators = args.n_emulator_runners * args.n_emulators_per_emulator_runner variables = { "s": np.zeros((n_emulators, ) + args.state_shape, dtype=np.float32), "a": np.zeros((n_emulators), dtype=np.int32), # Actions "r": np.zeros((n_emulators), dtype=np.float32), # Rewards "done": np.zeros((n_emulators), dtype=np.bool) } # Dones sim_coordinator = SimulatorsCoordinator( env_creator, args.n_emulators_per_emulator_runner, args.n_emulator_runners, variables) # Start all simulator processes sim_coordinator.start() network = QNetwork def network_creator(name='value_learning', learning_network=None): nonlocal args args.name = name return network(args, learning_network=learning_network) learner = PDQFDLearner(network_creator, env_creator, args, sim_coordinator) setup_kill_signal_handler(learner) logger.info('Starting training') learner.train() logger.info('Finished training')
type=int, help="Number of possible repetitions", dest="nb_choices") parser.add_argument('--checkpoint_interval', default=1000000, type=int, help="Interval of steps btw checkpoints", dest="checkpoint_interval") parser.add_argument('--activation', default='relu', type=str, help="activation function for the network", dest="activation") parser.add_argument('--alpha_leaky_relu', default=0.1, type=float, help="coef for leaky relu", dest="alpha_leaky_relu") return parser if __name__ == '__main__': args = get_arg_parser().parse_args() import logger_utils logger_utils.save_args(args, args.debugging_folder) logging.debug(args) main(args)