def main(_): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=calc_gpu_fraction(FLAGS.gpu_fraction)) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: config = get_config(FLAGS) or FLAGS if config.env_type == 'simple': env = SimpleGymEnvironment(config) else: env = GymEnvironment(config) ACPconfig = ACPConfig(env) if not tf.test.is_gpu_available() and FLAGS.use_gpu: raise Exception("use_gpu flag is true when no GPUs are available") if not FLAGS.use_gpu: config.cnn_format = 'NHWC' # Becuase of code shittines, these steps should be after each other! acpAgent = acp.acp(sess, ACPconfig) agentDQN = Agent(config, env, acpAgent, sess) acpAgent.setdir(agentDQN.model_dir) sess.run(tf.initializers.global_variables()) # Load both models if exist any checkpoint acpAgent.load() agentDQN.load() if FLAGS.is_train: agentDQN.train() else: raise Exception('agentDQN.play() is Not Implemented') agentDQN.play()
deque(maxlen=200) for _ in range(4) ] agent_obs = [None] * flags.num_agents agent_obs_buffer = [None] * flags.num_agents agent_action_buffer = [2] * flags.num_agents max_steps = 8 * (flags.grid_width + flags.grid_height) start_time = time.time() # Load an RL agent and initialize it from checkpoint if necessary if flags.agent_type == "dqn": agent = DQN_Agent(state_size, action_size, flags.num_agents) elif flags.agent_type == "ppo": agent = PPO_Agent(state_size, action_size, flags.num_agents) if flags.load_model: start, eps = agent.load(project_root / 'checkpoints', 0, 1.0) else: start, eps = 0, 1.0 if not flags.train: eps = 0.0 # We don't want to retrain on old railway networks when we restart from a checkpoint, so we just loop # through the generators to get all the old networks out of the way if start > 0: print(f"Skipping {start} railways") for _ in range(0, start): rail_generator() schedule_generator() # Helper function to detect collisions ACTIONS = {0: 'B', 1: 'L', 2: 'F', 3: 'R', 4: 'S'}