lstm_layer = agent.model.layers[0] # Store lstm states state_record = lstm_layer.states # Reset states agent.model.layers[0].reset_states() agent.target_model.layers[0].reset_states() agent.train_replay() # Restore states agent.model.layers[0].states = state_record score += reward state = next_state if done: sim.reset() break # every episode update the target model to be same with model agent.update_target_model() # every episode, plot the play time scores.append(score) episodes.append(e) #plt.plot(episodes, scores, 'b') ''' Plot normalized data ''' if False: try: t = np.arange(len(data))
agent.load_state() perturb = torch.from_numpy(np.random.rand(10,10) / 1) agent.model.state_dict()['linear2.weight'] += perturb.float() perturb2 = torch.from_numpy(np.random.rand(4,10) / 1) agent.model.state_dict()['linear3.weight'] += perturb2.float() print(agent.model.state_dict()) ''' losses, scores, episodes = [], [], [] sim = Simulator(orig_data, data, windowsize=windowsize) for e in range(EPISODES): # Write actions to log file score = 0 state = Tensor(sim.reset()) while not sim.sim_done(): #state = Tensor(sim.state) # Get state action = agent.get_action(state) # Simulate trading #----------- max_idx = np.argmax(action[:3]) # Choose buy/sell/hold next_state, reward, done = sim.step(max_idx, action[3]) next_state = Tensor(next_state) #----------- # save the sample <s, a, r, s'> to the replay memory agent.replay_memory(state, action, reward, next_state, done) state = next_state.clone()