def test(agent: DQNAgent, test_eps): env = gym.make(ENV_NAME) ep_rewards = [] for test_ep in range(test_eps): obs = env.reset() done = False ep_reward = 0 ep_step = 0 while not done: action = agent.act(np.array(obs), evaluate=True) next_obs, reward, done, _ = env.step(action) env.render() obs = next_obs ep_reward += reward ep_step += 1 ep_rewards.append(ep_reward) time.sleep(0.2) print('\n') print('=== Test performance ===') print(f'Mean: {np.mean(ep_rewards):.1f} / ' f'Min: {np.min(ep_rewards):.1f} / ' f'Max: {np.max(ep_rewards):.1f}') env.close() return ep_rewards
def dqn_run(episodes=2500, eps_start=1.0, eps_end=0.01, eps_decay=0.995, double_dqn=False, dueling_dqn=False, seed=42): env = start_env() env_info = reset_env_info(env) state_size = get_state_size(env_info) action_size = get_action_size(env) print('Seed used:', seed) agent = DQNAgent(state_size, action_size, double_dqn, dueling_dqn, seed) scores = [] scores_window = deque(maxlen=100) eps = eps_start for episode in range(1, episodes + 1): env_info = reset_env_info(env) score = 0.0 done = False while not done: state = env_info.vector_observations[0] action = agent.act(state, epsilon=eps) env_info = env_step(env, action) next_state = env_info.vector_observations[0] reward = env_info.rewards[0] done = env_info.local_done[0] agent.step(state, action, reward, next_state, done) score += reward scores_window.append(score) scores.append(score) eps = max(eps * eps_decay, eps_end) print('\rEpisode {}/{}\tAverage Score: {:.2f}, epsilon: {:.3f}'.format( episode, episodes, np.mean(scores_window), eps), end=' ') if episode % 100 == 0: print('\rEpisode {}/{}\tAverage Score: {:.2f}, epsilon: {:.3f}'. format(episode, episodes, np.mean(scores_window), eps)) if np.mean(scores_window) > 13.0: print( '\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}' .format(episode - 100, np.mean(scores_window))) torch.save(agent.qnetwork_local.state_dict(), 'checkpoint.pth') break env.close() return scores
def test_model(filename): env = gym.make("CartPole-v1") agent = DQNAgent(4, 2) agent.load_model(filename) state = env.reset() for _ in range(1000): env.render() state, _, done, _ = env.step(agent.act(state, explore=False)) if done: break env.close()
def advise(): n1 = float(request.form['n1']) n2 = float(request.form['n2']) n3 = float(request.form['n3']) cash = float(request.form['cash']) print(n1) print(cash) agent = DQNAgent(state_size, action_size) scaler = get_scaler(env) agent.load("202005011635-dqn.h5") state = env.reset() state[0] = n1 state[1] = n2 state[2] = n3 state[-1] = cash state = scaler.transform([state]) action = agent.act(state) # action_combo = list(map(list, itertools.product([0, 1, 2], repeat=3))) action_vec = action_combo[action] # action_map = {0: "sell", 1: "hold", 2: "buy"} # print(action_map[action_vec[0]], action_map[action_vec[1]], action_map[action_vec[2]]) ans = [] tmp = 1 if action_vec[0] == 0 and n1 == 0 else action_vec[0] if cash == 0 and tmp == 2: tmp = 1 ans.append(action_map[tmp]) tmp = 1 if action_vec[1] == 0 and n2 == 0 else action_vec[1] if cash == 0 and tmp == 2: tmp = 1 ans.append(action_map[tmp]) tmp = 1 if action_vec[2] == 0 and n3 == 0 else action_vec[2] if cash == 0 and tmp == 2: tmp = 1 ans.append(action_map[tmp]) print(ans) return render_template('index.html', ans=ans, n1=n1, n2=n2, n3=n3, cash=cash)
def main(): env = gym.make('carla-v0') state_size = env.image_size_net_chans action_size = len(env.action_space) agent = DQNAgent(state_size, action_size) done = False batch_size = 10 try: for episode in range(EPISODES): state = env.reset(render=True) score = 0.0 for time in range(10000): env.render() action = agent.act(state) next_state, reward, done = env.step(action) if done: reward = -15 else: if abs(reward) < 0.5: continue score += reward agent.remember(state, action, reward, next_state, done) state = next_state if done: agent.update_target_model() print('episode: {}/{}, score: {:.5}, e: {}'.format( episode, EPISODES, score, agent.epsilon)) break if len(agent.memory) > batch_size: agent.replay(batch_size) if episode % 10 == 0: agent.save(os.path.join('..', 'models', 'carla-ddqn.h5')) finally: env.world.destroy()
def main(): # enable GPU memory growth physical_devices = tf.config.list_physical_devices('GPU') tf.config.experimental.set_memory_growth(physical_devices[0], True) # model model_name = input("Model name -> ") model_file = input("Model file -> ") my_model = "models/{}/{}.h5".format(model_name, model_file) epsilon = float(input("Epsilon -> ")) episode_count = int(input("Episode count -> ")) print("Loading", my_model, "with epsilon", epsilon) agent = DQNAgent(my_model, float(epsilon)) # information resizeScale = (40, 30) frame_n = 3 max_cte = 4.35 # statistics score = [] rewards = [] highest_score = 0 highest_reward = 0 max_score = None # velocity max_velocity = 10.0 max_acceleration = 0.75 # steering max_steering = 0.75 steering_step = 2 * max_steering / (agent.action_space - 1) steering_table = [ i * steering_step - max_steering for i in range(agent.action_space) ] # setup donkey environment conf = { # "exe_path":"remote", "exe_path": "D:/sdsandbox/build2/donkey_sim.exe", "host": "127.0.0.1", "port": 9094, "body_style": "donkey", "body_rgb": (128, 128, 128), "car_name": "rl", "font_size": 100 } # env = gym.make("donkey-generated-roads-v0", conf=conf) env = gym.make("donkey-generated-track-v0", conf=conf) env.viewer.handler.max_cte = max_cte cv2.namedWindow("camera") start = time.time() first_start = start for e in range(episode_count): # at each episode, reset environment to starting position state = env.reset() states = np.empty((frame_n, resizeScale[1], resizeScale[0], 3)) states[0] = preprocessImage(state, resizeScale) need_frames = frame_n - 1 done = False score.append(0) rewards.append(0.0) last_velocity = [0.0] laps = 0 start = time.time() while not done and (score[-1] < max_score if max_score else True): if need_frames > 0: next_state, reward, done, info = env.step([ steering_table[random.randint(0, agent.action_space - 1)], 0.15 ]) states[frame_n - need_frames] = preprocessImage( next_state, resizeScale) need_frames -= 1 last_velocity.append(info["speed"]) continue # select action, observe environment, calculate reward action, Q = agent.act(np.asarray([states])) steering = steering_table[action] throttle = calculateThrottle(last_velocity[-1], max_velocity, max_acceleration) next_state, reward, done, info = env.step([steering, throttle]) img = cv2.resize(next_state, (320, 240), interpolation=cv2.INTER_AREA) cv2.imshow("camera", img) last_velocity.append(round(info["speed"], 4)) if abs(info["cte"]) >= max_cte: done = True reward = -1.0 # for track else: reward = (1.0 - (abs(info["cte"]) / max_cte)) # for roads # if not done: # reward = (1.0 - (abs(info["cte"]) / max_cte)); if info["lap_finished"]: laps += 1 score[-1] += 1 rewards[-1] += reward # for roads # if self.score[-1] > 1500: # laps = max_laps next_states = np.roll(states, -1, axis=0) next_states[-1] = preprocessImage(next_state, resizeScale) states = next_states cv2.waitKey(1) env.step([0.0, -0.03]) if len(score) > 20: score = score[-20:] if len(rewards) > 20: rewards = rewards[-20:] if score[-1] >= highest_score: highest_score = score[-1] if rewards[-1] >= highest_reward: highest_reward = rewards[-1] print( "episode: {}/{}, score: {}, reward: {}, laps: {}, e: {:.2}".format( e + 1, episode_count, score[-1], round(rewards[-1], 2), laps, round(agent.epsilon, 2))) if (e + 1) % 5 == 0: print("Took", round((time.time() - start) / 60, 2), "minutes\n") start = time.time() print("Showcase time:", round((time.time() - first_start) / 60, 2), "minutes")
portfolio_value = [] if args.mode == 'test': # remake the env with test data env = TradingEnv(test_data, args.initial_invest) # load trained weights agent.load(args.weights) # when test, the timestamp is same as time when weights was trained timestamp = re.findall(r'\d{12}', args.weights)[0] for e in range(args.episode): state = env.reset() state = scaler.transform([state]) for time in range(env.n_step): action = agent.act(state) next_state, reward, done, info = env.step(action) next_state = scaler.transform([next_state]) if args.mode == 'train': agent.remember(state, action, reward, next_state, done) state = next_state if done: print("episode: {}/{}, episode end value: {}".format( e + 1, args.episode, info['cur_val'])) portfolio_value.append(info['cur_val']) # append episode end portfolio value break if args.mode == 'train' and len(agent.memory) > args.batch_size: agent.replay(args.batch_size) if args.mode == 'train' and (e + 1) % 10 == 0: # checkpoint weights agent.save('weights/{}-dqn.h5'.format(timestamp))
# Reset the environment obs = env.reset() obs = obs[0] env_renderer.reset() # Run a episode (until successful or max number of steps reached) for step in range(max_steps): # Normalize the observations # norm_obs = normalize_observation(obs[0], tree_depth=tree_depth) # Agent performs an action for _idx in range(n_agents): if obs[_idx] is not None: norm_obs = normalize_observation(obs[_idx], tree_depth=tree_depth) action = agent.act(state=norm_obs, eps=eps) action_dict.update({_idx: action}) # Environment executes action and returns # 1. next observations for all agents # 2. corresponding rewards for all agents # 3. status if the agents are done # 4. information about actions, malfunction, speed and status next_obs, all_rewards, done, info = env.step(action_dict) for _idx in range(n_agents): if not done[_idx]: next_norm_obs = normalize_observation(next_obs[_idx], tree_depth=tree_depth) agent.remember((norm_obs, action_dict[_idx], all_rewards[_idx], next_norm_obs, done[_idx])) # Render the environment -> show me what you got! env_renderer.render_env(show=True, show_observations=True)
for e in range(EPISODES): episode_number = e + 1 # reset state in the beginning of each game state = env.reset() state = np.reshape(state, [1, 4]) # time_t represents each frame of the game # Our goal is to keep the pole upright as long as possible until score of 500 # the more time_t the more score for time_t in range(500): # turn this on if you want to render # env.render() # Decide action action = agent.act(state) # Advance the game to the next frame based on the action. # Reward is 1 for every frame the pole survived next_state, reward, done, _ = env.step(action) next_state = np.reshape(next_state, [1, 4]) # Remember the previous state, action, reward, and done agent.remember(state, action, reward, next_state, done) # make next_state the new current state for the next frame. state = next_state agent.replay(32) # done becomes True when the game ends
def main(): # enable GPU memory growth physical_devices = tf.config.list_physical_devices('GPU') tf.config.experimental.set_memory_growth(physical_devices[0], True) # model & training information model_name = input("Model name -> ") load_trained = input("Load trained (y/n)? ").lower() == "y" epsilon = float(input("Epsilon -> ")) episode_count = int(input("Episode count -> ")) model_location = "models/" + model_name + "/" model_path = model_location + ("model_trained.h5" if load_trained else "model.h5") print("Loading", model_path, "with epsilon", epsilon) agent = DQNAgent(model_path, epsilon) try: agent.memory = json.load(model_location + "data.json") except: agent.memory = [] # training information resizeScale = (40, 30) batch_size = 12 frame_n = 3 max_cte = 4.35 # max_cte = 3.5; # statistics score = [] rewards = [] highest_score = 0 highest_reward = 0 max_score = None # velocity max_velocity = 10.0 max_acceleration = 0.75 # steering max_steering = 0.75 steering_step = 2 * max_steering / (agent.action_space - 1) steering_table = [ i * steering_step - max_steering for i in range(agent.action_space) ] file = open("log.csv", "w+", newline="") log = writer(file) log.writerow([ 'Episode', 'Timestep', 'Avg Steer', 'Min Reward', 'Avg Reward', 'Max Reward', 'Episode Length', 'Reward Sum', 'Max Q steer', 'Max Q throttle', 'Epsilon', 'Episode Time', 'Avg Speed', 'Max Speed', 'Min CTE', 'Avg CTE', 'Max CTE', 'Distance', "Average Throttle", "Max Throttle", "Min Throttle", "Average Absolute CTE", "Min Absolute CTE", "Max Absolute CTE" ]) # setup donkey environment conf = { # "exe_path":"remote", "exe_path": "D:/sdsandbox/build2/donkey_sim.exe", "host": "127.0.0.1", "port": 9091, "body_style": "donkey", "body_rgb": (128, 128, 128), "car_name": "rl", "font_size": 100 } # env = gym.make("donkey-generated-roads-v0", conf=conf) env = gym.make("donkey-generated-track-v0", conf=conf) env.viewer.handler.max_cte = max_cte cv2.namedWindow("camera") first_train = True first_start = time.time() timestep = 0 success_episodes = 0 max_laps = 5 for e in range(episode_count): # at each episode, reset the environment state = env.reset() states = np.empty((frame_n, resizeScale[1], resizeScale[0], 3)) states[0] = preprocessImage(state, resizeScale) need_frames = frame_n - 1 done = False score.append(0) rewards.append(0.0) last_velocity = [0.0] laps = 0 start = time.time() # logging steers = [] throttles = [] rewards_ = [] velocities = [] ctes = [] ctes_absolute = [] max_q_steer = 0.0 distance = 0.0 distance_time = start while not done and (score[-1] < max_score if max_score else True): if need_frames > 0: next_state, reward, done, info = env.step([ steering_table[random.randint(0, agent.action_space - 1)], 0.15 ]) states[frame_n - need_frames] = preprocessImage( next_state, resizeScale) need_frames -= 1 last_velocity.append(info["speed"]) continue # select action, observe environment, calculate reward action, Q = agent.act(np.asarray([states])) steering = steering_table[action] throttle = calculateThrottle(last_velocity[-1], max_velocity, max_acceleration) next_state, reward, done, info = env.step([steering, throttle]) last_velocity.append(round(info["speed"], 4)) img = cv2.resize(next_state, (320, 240), interpolation=cv2.INTER_AREA) cv2.imshow("camera", img) reward = 0.0 if not done else -1.0 if abs(info["cte"]) >= max_cte: done = True reward = -1.0 if not done: reward = (1.0 - (abs(info["cte"]) / max_cte)) # for track if info["lap_finished"]: laps += 1 if laps == max_laps: done = True timestep += 1 score[-1] += 1 rewards[-1] += reward next_states = np.roll(states, -1, axis=0) next_states[-1] = preprocessImage(next_state, resizeScale) # save experience and update current state agent.remember([states], action, reward, [next_states], done) states = next_states if not first_train: agent.replay(batch_size) # logging steers.append(steering) throttles.append(throttle) rewards_.append(reward) velocities.append(last_velocity[-1]) ctes.append(info["cte"]) ctes_absolute.append(abs(info["cte"])) distance += last_velocity[-1] * (time.time() - distance_time) distance_time = time.time() if Q != None and (max_q_steer == None or Q > max_q_steer): max_q_steer = Q cv2.waitKey(1) # for roads # if distance > 1900: # laps = max_laps # logging if score[-1] > 0: log.writerow([ e, timestep, round(mean(steers), 2), round(min(rewards_), 2), round(mean(rewards_), 2), round(max(rewards_), 2), score[-1], round(rewards[-1], 2), round(max_q_steer, 2), 0, agent.epsilon, round(time.time() - start, 2), round(mean(velocities), 2), round(max(velocities), 2), round(min(ctes), 2), round(mean(ctes), 2), round(max(ctes), 2), round(distance, 2), round(mean(throttles), 2), round(max(throttles), 2), round(min(throttles), 2), round(mean(ctes_absolute), 2), round(min(ctes_absolute), 2), round(max(ctes_absolute), 2) ]) else: # sometimes, something goes really wrong... don't count this episode e -= 1 file.flush() # fix for persisting throttle bug env.step([0.0, -0.03]) if len(agent.memory) > batch_size * 4 and first_train: agent.replay(batch_size) agent.act(np.asarray([states])) first_train = False if len(score) > 20: score = score[-20:] if len(rewards) > 20: rewards = rewards[-20:] if score[-1] >= highest_score: highest_score = score[-1] if rewards[-1] >= highest_reward: highest_reward = rewards[-1] agent.save() print( "episode: {}/{}, steps: {}, reward: {}, highest reward: {}, average: {}, laps: {}, e: {:.2}, memory: {}, replays: {}" .format(e + 1, episode_count, score[-1], round(rewards[-1], 2), round(highest_reward, 2), round(mean(rewards), 2), laps, agent.epsilon, len(agent.memory), agent.replays)) if (e + 1) % 5 == 0: print("Took", round((time.time() - start) / 60, 2), "minutes\n") start = time.time() agent.merge_models() if laps == max_laps: success_episodes += 1 else: success_episodes = 0 if success_episodes == 5: print("Training successfull! Time: {} minutes.".format( round((time.time() - first_start) / 60.0, 2))) agent.save("end.h5") file.close() break agent.save() print("Total training time:", round((time.time() - first_start) / 60, 2), "minutes")
env = DrivingEnv(client) agent = DQNAgent() loss = -1 epsilon = 1 epsilon_decay = 0.995 epsilon_min = 0.01 for episode in range(10000000): view = env.reset() for iteration in range(1000000): if random.random() < epsilon: action = random.choice(range(3)) else: action = agent.act(view) control = carla.VehicleControl(throttle=1, steer=[-1, 0, 1][action]) next_view, reward, done = env.step(control) if iteration > 30: loss = agent.memorize(view, action, next_view, reward, done) view = next_view cv2.imshow('rgb', view[:, :, 0:3]) cv2.imshow('depth', view[:, :, 3]) cv2.imshow('seg', view[:, :, 4:7]) cv2.waitKey(1) if done: break epsilon = epsilon * epsilon_decay epsilon = max(epsilon, epsilon_min)
def main(): trial_len = 1030 env = Environment(100000, 1, trial_len, stock1, stock2) trials = 100 action_info = { 's1_buys_per_trial': [], 's1_sells_per_trial': [], 's2_buys_per_trial': [], 's2_sells_per_trial': [], 'holds_per_trial': [], 'illegal_action_trial': [], 'profits_per_trial': [], 'ranges_per_trial': [], 'good_profits_and_range': [] } dqn_agent = DQNAgent(env, stock1.name, stock2.name) menu_option = input( "Press 1 to load a model from filepath. Press any other button to start a new model " ) if menu_option == "1": dqn_agent.load_model() steps = [] for trial in range(trials): print('Trial ', trial) cur_state = env.state step_count = 0 start_funds = env.get_funds() action = '' stock1_buys = 0 stock1_sells = 0 stock2_buys = 0 stock2_sells = 0 holds = 0 illegal_action = False returns = [] for step in range(trial_len): action_num = dqn_agent.act(cur_state) action, stock = None, None # Get action from Deep Q Net output if action_num == 0: action, stock = 'BUY', stock1.name stock1_buys += 1 elif action_num == 1: action, stock = 'SELL', stock1.name stock1_sells += 1 elif action_num == 2: action, stock = 'BUY', stock2.name stock2_buys += 1 elif action_num == 3: action, stock = 'SELL', stock2.name stock2_sells += 1 elif action_num == 4: action, stock = 'HOLD', '' holds += 1 else: action, stock = None, None prev_funds = env.get_funds() print('Step {}:'.format(step)) print(' Action: ', action) print(' Stock: ', stock) new_state, reward, illegal_action = env.step(action, stock, 1) reward = reward if not illegal_action else -10000 new_funds = env.get_funds() returns.append(new_funds - prev_funds) print(' Reward: ', reward) dqn_agent.remember(cur_state, action_num, reward, new_state, illegal_action) dqn_agent.replay() dqn_agent.target_train() cur_state = new_state step_count += 1 if illegal_action: print('Illegal action taken, starting new trial') break profit = start_funds - env.get_funds() df_range = (env.init_day_index, env.init_day_index + trial_len) print('Profit: ', start_funds - env.get_funds()) if profit >= 5000.00: action_info['good_profits_and_range'].append((df_range, returns)) print(action_info['good_profits_and_range']) action_info['profits_per_trial'].append(profit) action_info['s1_buys_per_trial'].append(stock1_buys) action_info['s1_sells_per_trial'].append(stock1_sells) action_info['s2_buys_per_trial'].append(stock2_buys) action_info['s2_sells_per_trial'].append(stock2_sells) action_info['holds_per_trial'].append(holds) action_info['illegal_action_trial'].append(illegal_action) action_info['ranges_per_trial'].append( (env.init_day_index, env.init_day_index + trial_len)) n = random.randint(0, len(stock1) - trial_len) env = Environment(100000, 1, trial_len, stock1, stock2) print( "Average Profit: ", sum(action_info['profits_per_trial']) / len(action_info['profits_per_trial'])) data_file_name = input( 'Please type the name of the file you would like to save the action info to: ' ) menu_option2 = input( "Press 0 to quit, press 1 to save to model to location/ ") if menu_option2 == "1": fp = input("Enter the filepath to save this model to ") dqn_agent.custom_save_model(fp) action_info_df = pd.DataFrame(action_info) action_info_df.to_csv(data_file_name)
player._client.moveByVelocityAsync(0, 0, 0, 5).join() time.sleep(2) #time for airsim to setup num_target_achieved = 0 num_actions_in_episode = 0 episode_next_ratio = 0.3 source, target = player.initAnEpisode() # see [https://github.com/microsoft/AirSim/blob/master/docs/image_apis.md] responses = player._client.simGetImages( [airsim.ImageRequest(3, airsim.ImageType.DepthPerspective, True, False)]) current_state = transform_input(responses) current_state = np.expand_dims(current_state, axis=0) print("Starting Training...") while (True): action = agent.act(current_state) print(f"action : {action}") num_actions_in_episode += 1 quad_offset = player.interpretAction(action, scaling_factor=0.25) quad_vel = player._client.getMultirotorState( ).kinematics_estimated.linear_velocity # player._client.moveByVelocityAsync(quad_vel.x_val+quad_offset[0], quad_vel.y_val+quad_offset[1], # quad_vel.z_val+quad_offset[2], 5).join() #for initail phase let velocity in z be 0 player._client.moveByVelocityAsync(quad_vel.x_val + quad_offset[0], quad_vel.y_val + quad_offset[1], 0, 5).join() time.sleep(0.5) quad_state = player._client.getMultirotorState(
def train( training_episodes: int, env_name: str, agent_kwargs: dict, log_every: int = 500, render: bool = True ): env = gym.make(env_name) agent_kwargs['observation_space_dim'] = env.observation_space.shape[0] agent_kwargs['n_actions'] = env.action_space.n agent = DQNAgent(**agent_kwargs) print('Agents initialised. Training...') episode_rewards = [] start_time = time.time() for episode in range(1, training_episodes + 1): obs = env.reset() done = False agent_losses = 0 ep_reward = 0 ep_step = 0 while not done: action = agent.act(np.array(obs)) next_obs, reward, done, info = env.step(action) env.render() if render and not episode % log_every else None ep_reward += reward loss = agent.step( state=np.array(obs), action=action, reward=reward, next_state=np.array(next_obs), done=done ) agent_losses += loss if loss else 0 obs = next_obs ep_step += 1 episode_rewards.append(ep_reward) TB_WRITER.add_scalar('Loss', agent_losses, episode) TB_WRITER.add_scalar('Episode reward', ep_reward, episode) TB_WRITER.add_scalar('Epsilon', agent.epsilon, episode) if not episode % log_every: current_time = time.time() if render: time.sleep(0.2) # pause to see final state print(f'Ep: {episode} / ' f'(Last {log_every:,.0f}) Mean: {np.mean(episode_rewards[-log_every:]):.1f} / ' f'Min: {np.min(episode_rewards[-log_every:]):.1f} / ' f'Max: {np.max(episode_rewards[-log_every:]):.1f} / ' f'EPS: {episode / (current_time - start_time):.1f} / ' f'Agent epsilon: {agent.epsilon:.2f}' ) print('Done training!\n') env.close() return agent, episode_rewards
def DqnProgram(args, setResult, training_result): parser = argparse.ArgumentParser() parser.add_argument('-e', '--episode', type=int, default=2000, help='number of episode to run') parser.add_argument('-b', '--batch_size', type=int, default=32, help='batch size for experience replay') parser.add_argument('-i', '--initial_invest', type=int, default=20000, help='initial investment amount') parser.add_argument('-m', '--mode', type=str, required=True, help='either "train" or "test"') parser.add_argument('-w', '--weights', type=str, help='a trained model weights') args = parser.parse_args(args) maybe_make_dir('weights') maybe_make_dir('portfolio_val') import time timestamp = time.strftime('%Y%m%d%H%M') data = get_data(mode=args.mode) # TODO UI의 종목과 연결시키기. data = np.array([c['종가'] for c in data]) env = TradingEnv(data, args.initial_invest) state_size = env.observation_space.shape action_size = env.action_space.shape agent = DQNAgent(state_size, action_size) scaler = get_scaler(env) portfolio_value = [] if not args.weights is None: agent.load(args.weights) timestamp = re.findall(r'\d{12}', args.weights)[0] for e in range(args.episode): state = env.reset() state = scaler.transform([state]) for time in range(env.n_step): action = agent.act(state) next_state, reward, done, info = env.step(action) next_state = scaler.transform([next_state]) if args.mode == 'train': agent.remember(state, action, reward, next_state, done) state = next_state if done: msg = "episode: {}/{}, episode end value: {}".format( e + 1, args.episode, info['cur_val']) print(msg) setResult(msg=msg) training_result.append(info['cur_val']) portfolio_value.append( info['cur_val']) # append episode end portfolio value break if args.mode == 'train' and len(agent.memory) > args.batch_size: agent.replay(args.batch_size) if args.mode == 'train' and (e + 1) % 10 == 0: # checkpoint weights agent.save('weights/{}-dqn.h5'.format(timestamp)) # save portfolio value history to disk with open('portfolio_val/{}-{}.p'.format(timestamp, args.mode), 'wb') as fp: pickle.dump(portfolio_value, fp)
# Let's explore the enviroment with random acitions #run_gym(env) from agent import DQNAgent state_size = env.observation_space.shape[0] action_size = env.action_space.n # Instantiate agent agent = DQNAgent( state_size=state_size, action_size=action_size, # use_double=True, # use_dueling=True, # use_priority=True, use_noise=True, seed=42) agent.summary() # Let's watch an untrained agent #run_gym(env, get_action=lambda state: agent.act(state)) scores = train_agent(agent, env) plot_scores(scores, 'NoisyNets Deep Q-Network', polyfit_deg=6) #agent.load_weights('prioritized_local_weights.pth') run_gym(env, get_action=lambda state: agent.act(state), max_t=1000)