Ejemplo n.º 1
0
    def __init__(self, env, nnet, nb_actions):
        model = nnet.model

        # keras-rl
        memory = SequentialMemory(limit=MEMORY_LIMIT,
                                  window_length=WINDOW_LENGTH)
        policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                      attr='eps',
                                      value_max=1.,
                                      value_min=.1,
                                      value_test=.05,
                                      nb_steps=EPS_GREEDY_NB_STEPS)
        dqn = DQNAgent(model=model,
                       nb_actions=nb_actions,
                       policy=policy,
                       memory=memory,
                       nb_steps_warmup=NB_STEPS_WARMUP,
                       target_model_update=TARGET_MODEL_UPDATE
                       )  # gamma=.99, train_interval=4, delta_clip=1.
        dqn.compile(Adam(lr=.00025), metrics=['mae'])

        self.processor = AgentProcessor()
        dqn.processor = self.processor

        #for training

        self.env = env
        self.dqn = dqn
Ejemplo n.º 2
0
    conc = concatenate([model_phase_encoded, model_vehicle_encoded])
    hidden = Dense(128)(conc)
    hidden = LeakyReLU()(hidden)
    hidden = Dense(64)(hidden)
    hidden = LeakyReLU()(hidden)
    output = Dense(nb_actions, activation='linear')(hidden)
    model = Model(inputs=[model_phase_input, model_vehicle_input], outputs=output)
    model_path = "dqn_model.h5"
    try:
        model.load_weights(model_path)
        print(f"Success loading previous weights at {model_path}")
    except BaseException as e:
        print(f"Did not load previous weights due to {e}, {model_path}")

    ### Policy, Memory & Agent set-up.
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.01, value_test=.01, nb_steps=100000)
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, policy=policy, batch_size=64, gamma=.95, nb_steps_warmup=2000, target_model_update=.001)
    dqn.processor = MultiInputProcessor(2)
    dqn.compile(optimizer=Adam(lr=.001))

    ### Fit.
    hist = dqn.fit(env, nb_steps=200, verbose=1, log_interval=10)
    dqn.save_weights(model_path,  overwrite=True)
    print("Saved model to disk")

    test_env = CityFlowAgent(mode='predict', config_path=config_path)
    start_time = default_timer()
    dqn.test(test_env, nb_episodes=1, visualize=False) 
    print(f"\n Done testing inn {default_timer()-start_time} seconds")
Ejemplo n.º 3
0
train_processor = ImageProcessor(train_env)

dqn = DQNAgent(enable_double_dqn=False,
               model=model,
               nb_actions=nb_actions,
               gamma=gamma,
               batch_size=batch_size,
               memory=memory,
               nb_steps_warmup=50000,
               target_model_update=1e2,
               policy=policy,
               processor=train_processor)

dqn.compile(Adam(lr=learning_rate), metrics=['accuracy'])
dqn.processor = train_processor

experiment_name = "NoseTip"

history_train = dqn.fit(train_env,
                        nb_steps=500,
                        nb_max_episode_steps=100,
                        log_interval=30000,
                        visualize=False,
                        verbose=2)

dqn.save_weights(experiment_name, overwrite=True)

print("******", train_env.wander)

L = Logging()