def __init__(self, env, nnet, nb_actions): model = nnet.model # keras-rl memory = SequentialMemory(limit=MEMORY_LIMIT, window_length=WINDOW_LENGTH) policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=EPS_GREEDY_NB_STEPS) dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory, nb_steps_warmup=NB_STEPS_WARMUP, target_model_update=TARGET_MODEL_UPDATE ) # gamma=.99, train_interval=4, delta_clip=1. dqn.compile(Adam(lr=.00025), metrics=['mae']) self.processor = AgentProcessor() dqn.processor = self.processor #for training self.env = env self.dqn = dqn
conc = concatenate([model_phase_encoded, model_vehicle_encoded]) hidden = Dense(128)(conc) hidden = LeakyReLU()(hidden) hidden = Dense(64)(hidden) hidden = LeakyReLU()(hidden) output = Dense(nb_actions, activation='linear')(hidden) model = Model(inputs=[model_phase_input, model_vehicle_input], outputs=output) model_path = "dqn_model.h5" try: model.load_weights(model_path) print(f"Success loading previous weights at {model_path}") except BaseException as e: print(f"Did not load previous weights due to {e}, {model_path}") ### Policy, Memory & Agent set-up. policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.01, value_test=.01, nb_steps=100000) memory = SequentialMemory(limit=50000, window_length=1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, policy=policy, batch_size=64, gamma=.95, nb_steps_warmup=2000, target_model_update=.001) dqn.processor = MultiInputProcessor(2) dqn.compile(optimizer=Adam(lr=.001)) ### Fit. hist = dqn.fit(env, nb_steps=200, verbose=1, log_interval=10) dqn.save_weights(model_path, overwrite=True) print("Saved model to disk") test_env = CityFlowAgent(mode='predict', config_path=config_path) start_time = default_timer() dqn.test(test_env, nb_episodes=1, visualize=False) print(f"\n Done testing inn {default_timer()-start_time} seconds")
train_processor = ImageProcessor(train_env) dqn = DQNAgent(enable_double_dqn=False, model=model, nb_actions=nb_actions, gamma=gamma, batch_size=batch_size, memory=memory, nb_steps_warmup=50000, target_model_update=1e2, policy=policy, processor=train_processor) dqn.compile(Adam(lr=learning_rate), metrics=['accuracy']) dqn.processor = train_processor experiment_name = "NoseTip" history_train = dqn.fit(train_env, nb_steps=500, nb_max_episode_steps=100, log_interval=30000, visualize=False, verbose=2) dqn.save_weights(experiment_name, overwrite=True) print("******", train_env.wander) L = Logging()