Beispiel #1
0
            lstm_layer = agent.model.layers[0]
            # Store lstm states
            state_record = lstm_layer.states
            # Reset states
            agent.model.layers[0].reset_states()
            agent.target_model.layers[0].reset_states()

            agent.train_replay()
            # Restore states
            agent.model.layers[0].states = state_record

            score += reward
            state = next_state

            if done:
                sim.reset()
                break

        # every episode update the target model to be same with model
        agent.update_target_model()

        # every episode, plot the play time
        scores.append(score)
        episodes.append(e)
        #plt.plot(episodes, scores, 'b')
        '''
        Plot normalized data
        '''
        if False:
            try:
                t = np.arange(len(data))
Beispiel #2
0
    agent.load_state()
    perturb = torch.from_numpy(np.random.rand(10,10) / 1)
    agent.model.state_dict()['linear2.weight'] += perturb.float()
    perturb2 = torch.from_numpy(np.random.rand(4,10) / 1)
    agent.model.state_dict()['linear3.weight'] += perturb2.float()
    print(agent.model.state_dict())
    '''

    losses, scores, episodes = [], [], []

    sim = Simulator(orig_data, data, windowsize=windowsize)

    for e in range(EPISODES):
        # Write actions to log file
        score = 0
        state = Tensor(sim.reset())

        while not sim.sim_done():
            #state = Tensor(sim.state) # Get state
            action = agent.get_action(state)

            # Simulate trading
            #-----------
            max_idx = np.argmax(action[:3])  # Choose buy/sell/hold
            next_state, reward, done = sim.step(max_idx, action[3])
            next_state = Tensor(next_state)
            #-----------

            # save the sample <s, a, r, s'> to the replay memory
            agent.replay_memory(state, action, reward, next_state, done)
            state = next_state.clone()