scores, episodes = [], [] sim = Simulator(orig_data, data) #viz = Visualizer( for e in range(EPISODES): score = 0 while not sim.sim_done(): state = sim.state # Get state action = agent.get_action(state) # Simulate trading #----------- max_idx = np.argmax(action[:3]) # Choose buy/sell/hold reward, done = sim.step(max_idx, action[3]) next_state = sim.state # Get new state #----------- # save the sample <s, a, r, s'> to the replay memory agent.replay_memory(state, action, reward, next_state, done) # every time step do the training lstm_layer = agent.model.layers[0] # Store lstm states state_record = lstm_layer.states # Reset states agent.model.layers[0].reset_states() agent.target_model.layers[0].reset_states() agent.train_replay()
sim = Simulator(orig_data, data, windowsize=windowsize) for e in range(EPISODES): # Write actions to log file score = 0 state = Tensor(sim.reset()) while not sim.sim_done(): #state = Tensor(sim.state) # Get state action = agent.get_action(state) # Simulate trading #----------- max_idx = np.argmax(action[:3]) # Choose buy/sell/hold next_state, reward, done = sim.step(max_idx, action[3]) next_state = Tensor(next_state) #----------- # save the sample <s, a, r, s'> to the replay memory agent.replay_memory(state, action, reward, next_state, done) state = next_state.clone() #loss = agent.train_replay() #losses.append(loss.data.numpy()[0]) score += reward if done: print('done!') sim.reset()