def test_ddpg(): # TODO: replace this with a simpler environment where we can actually test if it finds a solution env = gym.make('Pendulum-v0') np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.shape[0] actor = Sequential() actor.add(Flatten(input_shape=(1,) + env.observation_space.shape)) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('linear')) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = Concatenate()([action_input, flattened_observation]) x = Dense(16)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) memory = SequentialMemory(limit=1000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=50, nb_steps_warmup_actor=50, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile([Adam(lr=1e-3), Adam(lr=1e-3)]) agent.fit(env, nb_steps=400, visualize=False, verbose=0, nb_max_episode_steps=100) h = agent.test(env, nb_episodes=2, visualize=False, nb_max_episode_steps=100)
def fit_ddpg(env, force: bool = False, root_dir: str = ""): nb_actions = env.action_space.n loaded = False actor_weights_path = pathlib.Path(f"{root_dir}/ddpg-actor.h5") critic_weights_path = pathlib.Path(f"{root_dir}/ddpg-critic.h5") train_history_path = pathlib.Path(f"{root_dir}/ddpg.log") if not force and actor_weights_path.exists(): actor = load_model(str(actor_weights_path)) critic = load_model(str(critic_weights_path), compile=False) with open(train_history_path, "rb") as f: history = pickle.load(f) loaded = True else: actor = Sequential() actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) actor.add(Dense(16)) actor.add(Dropout(0.5)) actor.add(Activation("relu")) actor.add(Dense(16)) actor.add(Dropout(0.5)) actor.add(Activation("relu")) actor.add(Dense(16)) actor.add(Dropout(0.5)) actor.add(Activation("relu")) actor.add(Dense(nb_actions)) actor.add(Activation("linear")) action_input = Input(shape=(nb_actions, ), name="action_input") observation_input = Input(shape=(1, ) + env.observation_space.shape, name="observation_input") flattened_observation = Flatten()(observation_input) x = Concatenate()([action_input, flattened_observation]) x = Dense(32)(x) x = Activation("relu")(x) x = Dense(32)(x) x = Activation("relu")(x) x = Dense(32)(x) x = Dropout(0.5)(x) x = Activation("relu")(x) x = Dense(1)(x) x = Activation("linear")(x) critic = Model(inputs=[action_input, observation_input], outputs=x) memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=0.15, mu=0.0, sigma=5) ddpg = DDPGAgent( nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=critic.inputs[0], memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000, random_process=random_process, gamma=0.99, target_model_update=1e-3, ) ddpg.compile(Adam(lr=1e-3), metrics=["mae"]) if loaded: return ddpg, history metrics = Metrics(ddpg) history = ddpg.fit( env, nb_steps=10000, start_step_policy=env.start_step_policy, nb_max_start_steps=10, nb_max_episode_steps=100, callbacks=[metrics], ) actor.save(str(actor_weights_path)) critic.save(str(critic_weights_path)) with open(train_history_path, "wb") as f: history = history.history history.update(metrics.metrics) pickle.dump(history, f) return ddpg, history
def train_with_params(sigma_v = 0., sigma_o = 0.,test=False): ENV_NAME = 'PongSolo' conf_name = '{}_sv_{}_so_{}'.format(ENV_NAME,sigma_v,sigma_o) # sv, so = sigma_v et sigma_orientation # Get the environment and extract the number of actions. env = EnvPongSolo(sigma_v = sigma_v, sigma_o = sigma_v) np.random.seed(123) #assert len(env.action_space.shape) == 1 nb_actions = 1 leaky_alpha = 0.2 # Next, we build a very simple model. actor = Sequential() actor.add(Flatten(input_shape=(1,) + env.observation_space.shape)) actor.add(Dense(100)) actor.add(LeakyReLU(leaky_alpha)) actor.add(Dense(nb_actions)) actor.add(Activation('linear')) print(actor.summary()) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = merge([action_input, flattened_observation], mode='concat') x = Dense(200)(x) x = LeakyReLU(leaky_alpha)(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(input=[action_input, observation_input], output=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) n_steps = 5000000 random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=1., mu=0., sigma=.3, sigma_min=0.01, n_steps_annealing=n_steps) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. directory_weights = "weights/ddpg/{}".format(conf_name) if not os.path.exists(directory_weights): os.makedirs(directory_weights) if test == False: perfCheckPoint = ModelPerformanceCheckpoint('{}/checkpoint_avg{}_steps{}'.format(directory_weights,'{}','{}'), 800) agent.fit(env, nb_steps=n_steps, visualize=False, verbose=2, nb_max_episode_steps=200,callbacks=[perfCheckPoint]) # After training is done, we save the final weights. agent.save_weights('{}/final.h5f'.format(directory_weights), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=100, visualize=False, nb_max_episode_steps=200) else: agent.load_weights('{}/final.h5f'.format(directory_weights)) agent.test(env, nb_episodes=1000, visualize=False, nb_max_episode_steps=200)
def run_ddpg(): global N_NODE_NETWORK env = SnakeGymContinuous() assert len(env.action_space.shape) == 1 nb_actions = env.action_space.shape[0] # initialize randomness np.random.seed(123) env.seed(123) # Next, we build a very simple model. actor = Sequential() actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) actor.add(Dense(N_NODE_NETWORK)) actor.add(Activation('relu')) actor.add(Dense(N_NODE_NETWORK)) actor.add(Activation('relu')) actor.add(Dense(N_NODE_NETWORK)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('linear')) print(actor.summary()) action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = Concatenate()([action_input, flattened_observation]) x = Dense(N_NODE_NETWORK * 2)(x) x = Activation('relu')(x) x = Dense(N_NODE_NETWORK * 2)(x) x = Activation('relu')(x) x = Dense(N_NODE_NETWORK * 2)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=500, nb_steps_warmup_actor=500, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile('adam', metrics=['mae']) agent.fit(env, nb_steps=50000, visualize=True, verbose=2, nb_max_episode_steps=200) agent.save_weights('ddpg_SnakeGymContinuous_weights.h5f', overwrite=True) agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) agent.fit(env, nb_steps=50000, visualize=True, verbose=1, nb_max_episode_steps=200) agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True) agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, delta_clip=1.) # agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model, # memory=memory, nb_steps_warmup=1000, random_process=random_process, # gamma=.99, target_model_update=0.1) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. if args.train: agent.fit(env, nb_steps=nallsteps, visualize=False, verbose=1, nb_max_episode_steps=200, log_interval=10000) # After training is done, we save the final weights. agent.save_weights(args.model, overwrite=True) if not args.train: agent.load_weights(args.model) # Finally, evaluate our algorithm for 1 episode. agent.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=1000)
mu=0.0, sigma=0.3) agent = DDPGAgent( nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000, random_process=random_process, gamma=0.99, target_model_update=1e-3, ) agent.compile(Adam(lr=0.001, clipnorm=1.0), metrics=["mae"]) # # Okay, now it's time to learn something! We visualize the training here for show, but this # # slows down training quite a lot. You can always safely abort the training prematurely using # # Ctrl + C. agent.fit(env, nb_steps=100000, visualize=False, verbose=1, nb_max_episode_steps=288) # # After training is done, we save the final weights. agent.save_weights("ddpg_{}_weights.h5f".format(ENV_NAME), overwrite=True) # # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=288)
x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) plot_model(critic, to_file='critic.png', show_shapes=True) # # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # # even the metrics! memory = SequentialMemory(limit=10000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=0.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # # Okay, now it's time to learn something! We visualize the training here for show, but this # # slows down training quite a lot. You can always safely abort the training prematurely using # # Ctrl + C. agent.fit(env, nb_steps=25000, visualize=False, verbose=1, nb_max_episode_steps=200) # # After training is done, we save the final weights. agent.save_weights('ddpg_stokes_weights.h5f', overwrite=True) # # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
# ''' # # history = agent.warm_fit(env, policy, policy_list, nb_steps=5e6, visualize=False, log_interval=1000, verbose=2, nb_max_episode_steps=2000) # sio.savemat('warm-up-' + ENV_NAME + '-' + nowtime + '.mat', history.history) # agent.save_weights('ddpg_{}_weights_after_warm_start.h5f'.format(ENV_NAME), overwrite=True) # ''' # the test after warm_up # ''' # history = agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=2000) # sio.savemat('test-'+ENV_NAME+'-'+nowtime+'.mat',history.history) # after = history.history['episode_reward'] # print('before training ', before) # print('after training ', after) history = agent.fit(env, nb_steps=5e6, visualize=False, log_interval=1000, verbose=2, nb_max_episode_steps=2000) ''' # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. history = agent.fit(env, nb_steps=5e6, visualize=False, log_interval=1000, verbose=2, nb_max_episode_steps=2000) # print(history.history['metrics']) action = np.zeros([3]) observation = np.zeros([env.observation_space.shape[0]]) reward = np.zeros([1]) metrics = np.zeros([3]) for time in range(len(history.history['action'])): action = np.vstack((action,history.history['action'][time])) observation = np.vstack((observation, history.history['observation'][time])) reward = np.vstack((reward, history.history['reward'][time]))
nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.995, target_model_update=1e-3, delta_clip=1.) # agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model, # memory=memory, nb_steps_warmup=1000, random_process=random_process, # gamma=.99, target_model_update=0.1) agent.compile(Adam(lr=1e-3, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. if args.train: history_cb = agent.fit(env, nb_steps=nallsteps, visualize=args.visualize, verbose=1, nb_max_episode_steps=None, log_interval=1000) # After training is done, we save the final weights. agent.save_weights(args.model, overwrite=True) reward_history = history_cb.history["episode_reward"] np.savetxt("episode_reward.txt", reward_history, delimiter=",") if not args.train: agent.load_weights(args.model) # Finally, evaluate our algorithm for 1 episode. agent.test(env, nb_episodes=1, visualize=False, nb_max_episode_steps=1000)
with open(f"{tb_folder_path}/actor_config.json", "w") as outfile: json_string = actor.to_json() json.dump(json_string, outfile) with open(f"{tb_folder_path}/critic_config.json", "w") as outfile: json_string = critic.to_json() json.dump(json_string, outfile) # This function saves all the important hypterparameters to the run summary file. save_hyperparameters(["DDPG.py", "gym_bizhawk.py"], f"{tb_folder_path}/run_summary.txt") start_time_ascii = time.asctime(time.localtime(time.time())) start_time = time.time() print("Training has started!") # BREADCRUMBS_START callback = [callbacks.TensorBoard(log_dir=tb_folder_path, write_graph=False)] agent.fit(env, nb_steps=8192 * 2, visualize=True, verbose=1, nb_max_episode_steps=512, callbacks=callback) # BREADCRUMBS_END # After training is done, we save the final weights. agent.save_weights('{}\{}_run{}_weights.h5f'.format(tb_folder_path, ENV_NAME, folder_count), overwrite=True) total_run_time = round(time.time() - start_time, 2) print("Training is done.") send_email(f"The training of {run_name} finalized!\nIt started at {start_time_ascii} and took {total_run_time/60} minutes .") env.shut_down_bizhawk_game() # Finally, evaluate our algorithm for 5 episodes. # movie.save("C:/Users/user/Desktop/VideoGame Ret/RL Retrieval/movie") # dqn.test(env, nb_episodes=1, visualize=False)
def main(): set_gpu_option() # OPTIONS ENV_NAME = 'DDPGEnv-v0' TIME_STEP = 30 # Get the environment and extract the number of actions. PATH_TRAIN = '/home/data/training_x_150.h5' PATH_TEST = '/home/data/test_x_150.h5' """ env = OhlcvEnv(TIME_STEP, path=PATH_TRAIN) env_test = OhlcvEnv(TIME_STEP, path=PATH_TEST) """ store = pd.HDFStore(PATH_TRAIN, mode='r') varieties_list = store.keys() print('varieties_list: ', varieties_list) print('num varieties: ', len(varieties_list)) variety = 'RB' print('variety: ', variety) # get selected features SELECTED_FACTOR_PATH = '~/feature_selection/根据互信息选出的特征,根据重要性排序.csv' selected_factor_df = pd.read_csv(SELECTED_FACTOR_PATH, index_col=0) selected_factor_list = selected_factor_df[variety].to_list() env = DDPGEnv(TIME_STEP, variety=variety, path=PATH_TRAIN, selected_factor_list=selected_factor_list) #env_test = DDPGEnv(TIME_STEP, variety=variety, path=PATH_TEST, selected_factor_list=selected_factor_list) # random seed np.random.seed(123) env.seed(123) nb_actions = env.action_space.shape[0] print('nb_actions: ', nb_actions) print('env.observation_space.shape: ', env.observation_space.shape) print('env.observation_space: ', env.observation_space) # create actor actor = create_actor(input_shape=env.shape, nb_actions=nb_actions) # create critic action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=env.shape, name='observation_input') critic = create_critic(action_input, observation_input) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and even the metrics! memory = SequentialMemory(limit=50000, window_length=TIME_STEP) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) ddpg = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=DDPGProcessor()) ddpg.compile(optimizer=Adam(lr=1e-3), metrics=['mae']) log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, write_grads=True) for _ in range(3): ddpg.fit(env, nb_steps=140000, nb_max_episode_steps=140000, visualize=False, verbose=2) """
#setup agent, using defined keras model alog with the policy and actions from above #Discrete actions: policy = EpsGreedyQPolicy() testPolicy = GreedyQPolicy() #agent = DQNAgent(model=actorModel, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, policy=policy, test_policy=testPolicy) #continuous actions: random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) agent = DDPGAgent(actor=actorModel, critic=criticModel, nb_actions=nb_actions, memory=memory, nb_steps_warmup_actor=100, nb_steps_warmup_critic=100, critic_action_input=action_input, random_process=random_process) #compile model agent.compile(Nadam(lr=1e-3, clipnorm=0.1), metrics=['mae']) # Okay, now it's time to learn something! # We visualize the training here for show, but this slows down training quite a lot. agent.fit(env, nb_steps=50000, visualize=True, verbose=2) #TEST! #blockingVar = input('Press a key!: ') agent.test(env, nb_episodes=5, visualize=True)
critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=2000, nb_steps_warmup_actor=10000, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=0.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! mode = 'test' if mode == 'train': hist = agent.fit(env, nb_steps=1000000, visualize=False, verbose=2, nb_max_episode_steps=1000) filename = '600kit_rn4_maior2_mem20k_target01_theta3_batch32_adam2' # we save the history of learning, it can further be used to plot reward evolution with open('_experiments/history_ddpg__redetorcs' + filename + '.pickle', 'wb') as handle: pickle.dump(hist.history, handle, protocol=pickle.HIGHEST_PROTOCOL) #After training is done, we save the final weights. agent.save_weights('h5f_files/ddpg_{}_weights.h5f'.format( '600kit_rn4_maior2_mem20k_target01_theta3_batch32_adam2_action_lim_1'), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=10, visualize=True, nb_max_episode_steps=1000) elif mode == 'test':
theta=.15, mu=0., sigma=.1) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=50, nb_steps_warmup_actor=50, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) #agent.load_weights('/home/bdb3m/swmm_rl/agent_weights_gated/ddpg_swmm_weights.h5f') # added to continue training agent.fit(env, nb_steps=train_steps, verbose=0) agent.save_weights( '/home/bdb3m/swmm_rl/agent_weights_gated/ddpg_swmm_weights.h5f', overwrite=True) env.close() else: agent.load_weights( '/home/bdb3m/swmm_rl/agent_weights_gated/ddpg_swmm_weights.h5f' ) agent.fit(env, nb_steps=train_steps, verbose=0) agent.save_weights( '/home/bdb3m/swmm_rl/agent_weights_gated/ddpg_swmm_weights.h5f', overwrite=True) env.close()
x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(input=[action_input, observation_input], output=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, delta_range=(-10., 10.)) agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=1000000, visualize=True, verbose=1, nb_max_episode_steps=200) # After training is done, we save the final weights. agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=50000, visualize=False, verbose=1, log_interval=50, nb_max_episode_steps=None) # After training is done, we save the final weights. agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True) #agent.load_weights('ddpg_Reacher-v2_weights_128.h5f') # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=30, visualize=True, nb_max_episode_steps=None)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=0.15, mu=0.0, sigma=0.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=nb_steps_warmup, nb_steps_warmup_actor=nb_steps_warmup, random_process=random_process, gamma=0.9, target_model_update=1e-3) agent.compile(SGD(lr=1e-5, clipvalue=0.001), metrics=['mae']) callbacks = [ ModelIntervalCheckpoint(weights_name + '_{step}.h5f', interval=10_000), TrainEpisodeLogger(), TensorBoard() ] agent.fit(env, nb_steps=nb_steps, visualize=False, verbose=1, callbacks=callbacks) agent.save_weights(weights_name + '_final.h5f', overwrite=True) # agent.test(env, nb_episodes=1, visualize=False)
x = Dense(20)(flattened_observation) x = Activation('relu')(x) x = Concatenate()([x, action_input]) x = Dense(20)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('tanh')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=Processor()) agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae']) # agent.load_weights('ddpg_20181006160521_Ship_Env_weights.h5f') for i in range(10): # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=50000, visualize=False, verbose=1, log_interval=5000, callbacks=[logger]) # After training is done, we save the final weights. agent.save_weights('ddpg_{}_{}_weights.h5f'.format(timestamp, 'Ship_Env'), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=20000)
# agent.load_weights(WEIGHTS_FILENAME) callbacks = [] checkpoint_weights_filename = 'weights/ddpg_{}_checkpointWeights_{{step}}_{}_{}_{}_{}.h5f'.format( ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID) log_filename = 'logs/ddpg_{}_log_{}_{}_{}_{}.json'.format( ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID) #callbacks += [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=100000)] callbacks += [FileLogger(log_filename, interval=100)] # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=NUM_STEPS, callbacks=callbacks, visualize=False, verbose=1) #, nb_max_episode_steps=500) # After training is done, we save the final weights. filename = 'weights/ddpg_{}_weights_{}_{}_{}_{}.h5f'.format( ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID) agent.save_weights(filename, overwrite=True) # We'll also save a simply named version to make running test immediately # following training easier. filename = 'weights/ddpg_{}_weights.h5f'.format(ENV_NAME) agent.save_weights(filename, overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, visualize=True) #nb_max_episode_steps=500,
def train(): # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(123) env.seed(123) assert len(env.action_space.shape) == 1 nb_actions = env.action_space.shape[0] config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) from keras import backend as K K.set_session(sess) # Next, we build a very simple model. actor = Sequential() actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('linear')) # print(actor.summary()) action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = Concatenate()([action_input, flattened_observation]) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) # print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) if REWARD == "normal": ddpg_normal = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) ddpg_normal.compile(Adam(lr=.0005, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. history_normal = ddpg_normal.fit(env, nb_steps=150000, visualize=False, verbose=2, nb_max_episode_steps=200) # After training is done, we save the final weights. ddpg_normal.save_weights(os.path.join( LOG_DIR, 'ddpg_normal_{}_weights.h5f'.format(ENV_NAME)), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. ddpg_normal.test(env, nb_episodes=5, visualize=False, verbose=2, nb_max_episode_steps=200) pandas.DataFrame(history_normal.history).to_csv( os.path.join(LOG_DIR, "normal.csv")) elif REWARD == "noisy": processor_noisy = PendulumSurrogateProcessor(weight=WEIGHT, surrogate=False, noise_type=NOISE_TYPE) ddpg_noisy = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=processor_noisy) ddpg_noisy.compile(Adam(lr=.0005, clipnorm=1.), metrics=['mae']) history_noisy = ddpg_noisy.fit(env, nb_steps=150000, visualize=False, verbose=2, nb_max_episode_steps=200) ddpg_noisy.save_weights(os.path.join( LOG_DIR, 'ddpg_noisy_{}_weights.h5f'.format(ENV_NAME)), overwrite=True) ddpg_noisy.test(env, nb_episodes=5, visualize=False, verbose=2, nb_max_episode_steps=200) pandas.DataFrame(history_noisy.history).to_csv( os.path.join(LOG_DIR, "noisy.csv")) elif REWARD == "surrogate": processor_surrogate = PendulumSurrogateProcessor(weight=WEIGHT, surrogate=True, noise_type=NOISE_TYPE) ddpg_surrogate = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=processor_surrogate) ddpg_surrogate.compile(Adam(lr=.0005, clipnorm=1.), metrics=['mae']) history_surrogate = ddpg_surrogate.fit(env, nb_steps=150000, visualize=False, verbose=2, nb_max_episode_steps=200) ddpg_surrogate.save_weights(os.path.join( LOG_DIR, 'ddpg_surrogate_{}_weights.h5f'.format(ENV_NAME)), overwrite=True) ddpg_surrogate.test(env, nb_episodes=5, visualize=False, verbose=2, nb_max_episode_steps=200) pandas.DataFrame(history_surrogate.history).to_csv( os.path.join(LOG_DIR, "surrogate.csv")) else: raise NotImplementedError
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000, batch_size=64, random_process=random_process, gamma=.98, target_model_update=1e-3, processor=MujocoProcessor()) agent.compile([Adam(lr=5e-4), Adam(lr=1e-3)], metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. save_data_path_local = ENV_NAME + '.json' agent.fit(env, nb_steps=1000000, visualize=False, verbose=1, save_data_path=save_data_path_local, file_interval=10000) # After training is done, we save the final weights. # agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True) plot_af(file_path=ENV_NAME + '.json', save_file_name=ENV_NAME + '.png') # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=MujocoProcessor()) agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. checkpoint_weights_filename = 'checkpoint/dqn_' + ENV_NAME + '_weights_{step}.h5f' callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=10000) ] log_filename = 'ddpg_{}_log.json'.format(ENV_NAME) callbacks += [FileLogger(log_filename, interval=200)] agent.fit(env, nb_steps=1000000, visualize=True, verbose=2, callbacks=callbacks) # After training is done, we save the final weights. agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
# Create Actor and Critic networks k.clear_session() actor = get_actor(obs_n, actions_n) critic, action_input = get_critic(obs_n, actions_n) print(actor.summary()) print(critic.summary()) memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=actions_n, theta=.15, mu=0., sigma=.1) agent = DDPGAgent(nb_actions=actions_n[0], actor=actor, critic=critic, batch_size=64, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000, random_process=random_process, gamma=.99) agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mse']) #agent.load_weights('ddpg_' + ENV_NAME + 'weights.h5f') agent.fit(env, env_name=ENV_NAME, nb_steps=500000, action_repetition=5, visualize=False, verbose=1) env = wrappers.Monitor(env,'/home/wolfie/PycharmProjects/pythonProject/ddpg_halfcheetah', video_callable=lambda episode_id: True, force=True) agent.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=1000, verbose=1) p.disconnect()
critic=critic, critic_action_input=action_input, memory=memory, random_process=random_process, nb_steps_warmup_actor=2048, nb_steps_warmup_critic=1024, target_model_update=1000, gamma=0.9, batch_size=128, memory_interval=2) agent.compile([Adam(lr=3e-5), Adam(lr=3e-3)]) # Start training for 75000 simulation steps agent.fit( env, nb_steps=75000, nb_max_start_steps=0, nb_max_episode_steps=10000, visualize=True, action_repetition=1, verbose=2, log_interval=10000, callbacks=[], ) # Test the agent hist = agent.test(env, nb_episodes=5, action_repetition=1, visualize=True, nb_max_episode_steps=10000)
# Finally, we configure and compile our agent. You can use every built-in tensorflow.keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=MujocoProcessor()) agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=1000, visualize=False, verbose=1) # After training is done, we save the final weights. agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=600)
random_process = GaussianWhiteNoiseProcess(mu=0.0, sigma=0.8, sigma_min=0.05, n_steps_annealing=650000) # Create the agent agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, random_process=random_process, nb_steps_warmup_actor=32, nb_steps_warmup_critic=32, target_model_update=1e-4, gamma=0.9, batch_size=32) agent.compile(Adam(lr=1e-4), metrics=['mae']) # Start training for 7.5M simulation steps (1.5M training steps with actions repeated 5 times) agent.fit(env, nb_steps=1500000, visualize=False, action_repetition=5, verbose=2, nb_max_start_steps=0, log_interval=10000, callbacks=[]) # Test the agent hist = agent.test(env, nb_episodes=10, action_repetition=1, visualize=True)
class Agent: def __init__(self, env): self.nb_actions = env.action_space.shape[0] self.nb_states = env.observation_space.shape[0] self.env = env self.actor = self.build_actor(env) self.actor.compile('Adam', 'mse') self.critic, action_input = self.build_critic(env) self.loss = self.build_loss() self.processor = WhiteningNormalizerProcessor() self.memory = SequentialMemory(limit=5000000, window_length=1) self.random_process = OrnsteinUhlenbeckProcess(size=self.nb_actions, theta=0.75, mu=0.5, sigma=0.25) self.agent = DDPGAgent(nb_actions=self.nb_actions, actor=self.actor, critic=self.critic, critic_action_input=action_input, memory=self.memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=self.random_process, gamma=.99, target_model_update=1e-3, processor=self.processor) self.agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=self.loss) self.sym_actor = self.build_sym_actor() self.sym_actor.compile(optimizer='Adam', loss='mse') def build_loss(self): return ['mse'] def build_actor(self, env): actor = Sequential() actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) actor.add(Dense(64, activation='tanh')) actor.add(GaussianNoise(0.05)) actor.add(Dense(64, activation='tanh')) actor.add(GaussianNoise(0.05)) actor.add(Dense(self.nb_actions, activation='hard_sigmoid')) actor.summary() inD = Input(shape=(1, ) + env.observation_space.shape) out = actor(inD) return Model(inD, out) def build_critic(self, env): action_input = Input(shape=(self.nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = Dense(64, activation='relu')(flattened_observation) x = Concatenate()([x, action_input]) x = Dense(32, activation='relu')(x) x = Dense(1)(x) critic = Model(inputs=[action_input, observation_input], outputs=x) critic.summary() return critic, action_input def build_sym_actor(self): stateSwap = [] actionSwap = [] state_desc = self.env.get_state_desc() for x in state_desc.keys(): keys = list(state_desc[x].keys()) for (k, key) in enumerate(keys): if '_r' in key: i = keys.index(key.replace('_r', '_l')) if i != -1: stateSwap += [(k, i), (i, k)] muscle_list = [] for i in range(self.env.osim_model.muscleSet.getSize()): muscle_list.append(self.env.osim_model.muscleSet.get(i).getName()) for (k, key) in enumerate(muscle_list): if '_r' in key: i = muscle_list.index(key.replace('_r', '_l')) if i != -1: actionSwap += [(k, i), (i, k)] stateSwapMat = np.zeros((self.nb_states, self.nb_states)) actionSwapMat = np.zeros((self.nb_actions, self.nb_actions)) stateSwapMat[0, 0] for (i, j) in stateSwap: stateSwapMat[i, j] = 1 for (i, j) in actionSwap: actionSwapMat[i, j] = 1 def ssT(shape, dtype=None): if shape != stateSwapMat.shape: raise Exception("State Swap Tensor Shape Error") return K.variable(stateSwapMat, dtype=dtype) def asT(shape, dtype=None): if shape != actionSwapMat.shape: raise Exception("Action Swap Tensor Shape Error") return K.variable(actionSwapMat, dtype=dtype) model1 = Sequential() model1.add( Dense(self.nb_states, input_shape=(1, ) + self.env.observation_space.shape, trainable=False, kernel_initializer=ssT, bias_initializer='zeros')) inD = Input(shape=(1, ) + self.env.observation_space.shape) symState = model1(inD) symPol = self.actor(symState) model2 = Sequential() model2.add( Dense(self.nb_actions, input_shape=(1, self.nb_actions), trainable=False, kernel_initializer=asT, bias_initializer='zeros')) out = model2(symPol) return Model(inD, out) def fit(self, **kwargs): if 'nb_max_episode_steps' in kwargs.keys(): self.env.spec.timestep_limit = kwargs['nb_max_episode_steps'] else: self.env.spec.timestep_limit = self.env.time_limit out = self.agent.fit(self.env, **kwargs) print("\n\ndo symetric loss back propigation\n\n") states = np.random.normal( 0, 10, (kwargs['nb_steps'] // 200, 1, self.nb_states)) actions = self.actor.predict_on_batch(states) self.sym_actor.train_on_batch(states, actions) return out def test(self, **kwargs): print("testing") print("VA:", self.env.get_VA()) if 'nb_max_episode_steps' in kwargs.keys(): self.env.spec.timestep_limit = kwargs['nb_max_episode_steps'] else: self.env.spec.timestep_limit = self.env.time_limit return self.agent.test(self.env, **kwargs) def test_get_steps(self, **kwargs): return self.test(**kwargs).history['nb_steps'][-1] def save_weights(self, filename='osim-rl/ddpg_{}_weights.h5f'): self.agent.save_weights(filename.format("opensim"), overwrite=True) self.save_processor() def load_weights(self, filename='osim-rl/ddpg_{}_weights.h5f'): self.agent.load_weights(filename.format("opensim")) self.load_processor() def search_VA(self): # 1-D line search state = self.env.get_VA() goal = 0.0 if abs(state - goal) < 0.01: self.env.upd_VA(goal) return steps = self.test_get_steps(nb_episodes=1, visualize=False, nb_max_episode_steps=1000) dv = 0.0 dsteps = steps while (state - dv > goal and dsteps > 0.8 * steps): dv += 0.02 self.env.upd_VA(state - dv) dsteps = self.test_get_steps(nb_episodes=1, visualize=False, nb_max_episode_steps=1000) if abs((state - dv) - goal) < 0.01: self.env.upd_VA(goal) else: dv -= 0.02 self.env.upd_VA(state - dv) def save_processor(self): np.savez('osim-rl/processor.npz', _sum=self.processor.normalizer._sum, _count=np.array([self.processor.normalizer._count]), _sumsq=self.processor.normalizer._sumsq, mean=self.processor.normalizer.mean, std=self.processor.normalizer.std) def load_processor(self): f = np.load('osim-rl/processor.npz') dtype = f['_sum'].dtype if (self.processor.normalizer == None): self.processor.normalizer = WhiteningNormalizer( shape=(1, ) + self.env.observation_space.shape, dtype=dtype) self.processor.normalizer._sum = f['_sum'] self.processor.normalizer._count = int(f['_count'][0]) self.processor.normalizer._sumsq = f['_sumsq'] self.processor.normalizer.mean = f['mean'] self.processor.normalizer.std = f['std']
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) try: agent.load_weights('ddpg_{}_nomad_v3_weights.h5f'.format(ENV_NAME)) except (OSError, IOError): logger.warning("File not found") n = 0 while True: n += 1 logger.info('Iteration #{}'.format(n)) #train train_history = agent.fit(env, nb_steps=nb_stepis, visualize=False, verbose=1, nb_max_episode_steps=nb_stepis) # After training is done, we save the final weights. agent.save_weights('ddpg_{}_nomad_v3_weights.h5f'.format(ENV_NAME), overwrite=True) # Save memory pickle.dump(memory, open("memory2.pkl", "wb")) # Finally, evaluate our algorithm for nb_episodes episodes. test_history = agent.test(env, nb_episodes=nb_episodes, visualize=False, nb_max_episode_steps=nb_stepis)
random_process=random_process, gamma=.99, target_model_update=1e-3, delta_clip=1.) # agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model, # memory=memory, nb_steps_warmup=1000, random_process=random_process, # gamma=.99, target_model_update=0.1) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. if args.train: agent.fit(env, nb_steps=nallsteps, visualize=False, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000) # After training is done, we save the final weights. agent.save_weights(args.model, overwrite=True) # If TEST and TOKEN, submit to crowdAI if not args.train and args.token: agent.load_weights(args.model) # Settings remote_base = 'http://grader.crowdai.org:1729' client = Client(remote_base) # Create environment observation = client.env_create(args.token)
critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.8, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. nb_steps = 800 * 1440 #1*(env.periods-2)# 100*(env.periods-2) #100000+1870#env.periods-2 agent.fit(env, nb_steps, visualize=True, verbose=2, nb_max_episode_steps=1440, log_interval=10) plt.figure(0) plt.plot(env.portfolio_value) plt.figure(1) noise_over_action_array = np.array(agent.noise_over_action) noise_over_action_array = np.transpose(noise_over_action_array) for i in range(nb_actions): plt.plot(noise_over_action_array[i, :]) plt.show() # After training is done, we save the final weights. agent.save_weights('ddpg_{}_weights5.h5f'.format('Crypto'), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) # Set up the agent for training memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.noutput) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, delta_clip=1.) # agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model, # memory=memory, nb_steps_warmup=1000, random_process=random_process, # gamma=.99, target_model_update=0.1) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. if args.train: agent.fit(env, nb_steps=nallsteps, visualize=False, verbose=1, nb_max_episode_steps=200, log_interval=10000) # After training is done, we save the final weights. agent.save_weights(args.model, overwrite=True) if not args.train: agent.load_weights(args.model) # Finally, evaluate our algorithm for 1 episode. agent.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=1000)
do_PER=args.PER, epsilon=1e-4, processor=MujocoProcessor(), pretanh_weight=args.pretanh_weight) agent.compile([Adam(lr=args.actor_lr, clipnorm=args.actor_gradient_clip), Adam(lr=args.critic_lr, clipnorm=args.critic_gradient_clip)], metrics=['mae']) if(args.HER==True and args.PER==False): print("\nTraining with Hindsight Experience Replay\n") save_data_path_local = 'HER/'+args.ENV_NAME+'.json' elif(args.HER==False and args.PER==True): print("\nTraining with Prioritised Experience Replay\n") save_data_path_local = 'PER/'+args.ENV_NAME+'.json' elif(args.HER==True and args.PER==True): print("\nTraining with Prioritised Hindsight Experience Replay\n") save_data_path_local = 'PHER/'+args.ENV_NAME+'.json' if(args.train): """ Start Training (You can always safely abort the training prematurely using Ctrl + C, *once* ) """ agent.fit(env, nb_steps=args.nb_train_steps, visualize=False, verbose=1, save_data_path=save_data_path_local, file_interval=args.file_interval, nb_max_episode_steps=args.max_step_episode) # After training is done, we save the final weights and plot the training graph. try: if(args.HER==True and args.PER==False): if(args.train): agent.save_weights('HER/ddpg_{}_weights.h5f'.format(args.ENV_NAME), overwrite=True) plot_af(file_path='HER/'+args.ENV_NAME+'.json',save_file_name='HER/'+args.ENV_NAME,plot_what='success') plot_af(file_path='HER/'+args.ENV_NAME+'.json',save_file_name='HER/'+args.ENV_NAME,plot_what='loss') elif(args.HER==False and args.PER==True): if(args.train): agent.save_weights('PER/ddpg_{}_weights.h5f'.format(args.ENV_NAME), overwrite=True) plot_af(file_path='PER/'+args.ENV_NAME+'.json',save_file_name='PER/'+args.ENV_NAME,plot_what='success') plot_af(file_path='PER/'+args.ENV_NAME+'.json',save_file_name='PER/'+args.ENV_NAME,plot_what='loss') elif(args.HER==True and args.PER==True): if(args.train):
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.noutput) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, delta_range=(-100., 100.)) # agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model, # memory=memory, nb_steps_warmup=1000, random_process=random_process, # gamma=.99, target_model_update=0.1) #agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. if args.train: agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000) # After training is done, we save the final weights. agent.save_weights(args.output, overwrite=True) if not args.train: agent.load_weights(args.output) # Finally, evaluate our algorithm for 5 episodes. if args.env != "Arm": agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500) else: for i in range(10000): if i % 300 == 0: env.new_target() print("Target shoulder = %f, elbow = %f" % (env.shoulder,env.elbow))
actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.0001, clipnorm=1.), metrics=['mae']) if (args.action == 'test'): agent.test(env, nb_episodes=1000, verbose=2, visualize=False, nb_max_episode_steps=300) tbCallback = TensorBoard(log_dir='./Graph/', write_grads=True, write_graph=True, histogram_freq=0) ckptCallback = ModelIntervalCheckpoint(filepath='./CheckPoints/', interval=1000) agent.fit(env, nb_steps=250000, visualize=False, verbose=2, nb_max_episode_steps=300, callbacks=[tbCallback, ckptCallback])
# Optionally, we can reload a previous model's weights and continue training from there # Remove the _actor or _critic from the filename. The load method automatically # appends these. WEIGHTS_FILENAME = 'weights/ddpg_planar_crane_continuous-v0_weights.h5f' # agent.load_weights(WEIGHTS_FILENAME) callbacks = [] checkpoint_weights_filename = 'weights/ddpg_{}_checkpointWeights_{{step}}_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID) log_filename = 'logs/ddpg_{}_log_{}_{}_{}_{}.json'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID) #callbacks += [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=100000)] callbacks += [FileLogger(log_filename, interval=100)] # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=NUM_STEPS, callbacks=callbacks, visualize=False, verbose=1)#, nb_max_episode_steps=500) # After training is done, we save the final weights. filename = 'weights/ddpg_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID) agent.save_weights(filename, overwrite=True) # We'll also save a simply named version to make running test immediately # following training easier. filename = 'weights/ddpg_{}_weights.h5f'.format(ENV_NAME) agent.save_weights(filename, overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, visualize=True) #nb_max_episode_steps=500,
x = Dense(400)(flattened_observation) x = Activation('relu')(x) x = Concatenate()([x, action_input]) x = Dense(300)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=MujocoProcessor()) agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=1000000, visualize=False, verbose=1) # After training is done, we save the final weights. agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)