def main(weights_name, video_name=None, get_image=False): env = DQN.Environment(render=True, sigma=0.02, down=1.0, get_image=get_image) s_size = env.env.s_size agent = DQN.Agent(s_size=s_size) agent.network.model.load_weights("data/" + weights_name + ".h5", by_name=True) print("model loaded") for _ in range(3): s = time.time() if video_name: env.record("data/mov/" + video_name + ".mp4") step = env.replay(agent.policy) print("unicycle lasted {} steps and {:2f} seconds.".format(step, step/30)) print("time = {}".format(time.time() - s)) env.close()
def run(): training_intervals = 100 interval_interactions = 100 training_steps = training_intervals * interval_interactions game_settings = GameSettings() title = 'Convolutional DDQN + PER' print(title) plt.title(title) agent = DQN.Agent(game_settings, training_steps) interactions, scores, devs = agent.learning_curve(training_intervals, interval_interactions) plt.plot(interactions, smooth(scores), label='Conv. DDQN+PER') # plt.fill_between(interactions, scores - devs, scores + devs, alpha=0.3) plt.xlabel('Interactions') plt.ylabel('Score') plt.legend() plt.show()
Return_test = Return[cutpoint:] Price_test = Price[cutpoint:] state_size = 7 lag = int((state_size - 3) / 2) #ticker1=np.random.choice(return1.columns) #ticker2=np.random.choice(return2.columns) ticker1 = 'AEP' ticker2 = 'AAPL' environment = env.env(act, state_size, ticker1, ticker2, Return_train, Price_train, 100) optimizer = Adam(learning_rate=0.05) agent = DQN.Agent(environment, optimizer) ############################################################################## """ Training model """ keras.backend.clear_session() batch_size = 8 num_of_episodes = 10 timesteps_per_episode = len(Return_train) agent.q_network.summary() print( "#######################################################################") print("Constructing portfolio with", ticker1, 'and', ticker2) #clear expirience storage agent.store_clear()
import tensorflow as tf import gym import numpy as np import random import matplotlib.pyplot as plt import DQN env = gym.make('MountainCar-v0') env.reset() env._max_episodes = 3000 model = tf.keras.Sequential() model.add(tf.keras.layers.Dense(24, input_shape=(4, ), activation='relu')) model.add(tf.keras.layers.Dense(2, activation='linear')) model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=.001)) model = tf.keras.models.load_model("traditional.hd5") episode = [] memory = [] agent = DQN.Agent(.95, .5, .999, model, env) print(env.observation_space.shape[0]) agent.train(100, 10, 64) agent.playGame() model.save("model.hd5") env.close()
def __init__(self): self.agent = DQN.Agent(num_states=24, num_actions=9, memory_cap=1000) self.num_states = 24 self.num_actions = 9
def reset(self): self.agent = DQN.Agent(num_states=self.num_states, num_actions=self.num_actions, memory_cap=1000) print("Load parameters...") self.agent.brain.load_model('./test_weight.hdf5')