Beispiel #1
0
def main(_):
    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=calc_gpu_fraction(FLAGS.gpu_fraction))

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        config = get_config(FLAGS) or FLAGS

        if config.env_type == 'simple':
            env = SimpleGymEnvironment(config)
        else:
            env = GymEnvironment(config)

        ACPconfig = ACPConfig(env)

        if not tf.test.is_gpu_available() and FLAGS.use_gpu:
            raise Exception("use_gpu flag is true when no GPUs are available")

        if not FLAGS.use_gpu:
            config.cnn_format = 'NHWC'

        # Becuase of code shittines, these steps should be after each other!
        acpAgent = acp.acp(sess, ACPconfig)
        agentDQN = Agent(config, env, acpAgent, sess)
        acpAgent.setdir(agentDQN.model_dir)

        sess.run(tf.initializers.global_variables())
        # Load both models if exist any checkpoint
        acpAgent.load()
        agentDQN.load()
        if FLAGS.is_train:
            agentDQN.train()
        else:
            raise Exception('agentDQN.play() is Not Implemented')
            agentDQN.play()
Beispiel #2
0
    deque(maxlen=200) for _ in range(4)
]
agent_obs = [None] * flags.num_agents
agent_obs_buffer = [None] * flags.num_agents
agent_action_buffer = [2] * flags.num_agents
max_steps = 8 * (flags.grid_width + flags.grid_height)
start_time = time.time()

# Load an RL agent and initialize it from checkpoint if necessary
if flags.agent_type == "dqn":
    agent = DQN_Agent(state_size, action_size, flags.num_agents)
elif flags.agent_type == "ppo":
    agent = PPO_Agent(state_size, action_size, flags.num_agents)

if flags.load_model:
    start, eps = agent.load(project_root / 'checkpoints', 0, 1.0)
else:
    start, eps = 0, 1.0

if not flags.train:
    eps = 0.0

# We don't want to retrain on old railway networks when we restart from a checkpoint, so we just loop
# through the generators to get all the old networks out of the way
if start > 0: print(f"Skipping {start} railways")
for _ in range(0, start):
    rail_generator()
    schedule_generator()

# Helper function to detect collisions
ACTIONS = {0: 'B', 1: 'L', 2: 'F', 3: 'R', 4: 'S'}