コード例 #1
0
    scores, episodes = [], []

    sim = Simulator(orig_data, data)
    #viz = Visualizer(

    for e in range(EPISODES):
        score = 0

        while not sim.sim_done():
            state = sim.state  # Get state
            action = agent.get_action(state)

            # Simulate trading
            #-----------
            max_idx = np.argmax(action[:3])  # Choose buy/sell/hold
            reward, done = sim.step(max_idx, action[3])
            next_state = sim.state  # Get new state
            #-----------

            # save the sample <s, a, r, s'> to the replay memory
            agent.replay_memory(state, action, reward, next_state, done)

            # every time step do the training
            lstm_layer = agent.model.layers[0]
            # Store lstm states
            state_record = lstm_layer.states
            # Reset states
            agent.model.layers[0].reset_states()
            agent.target_model.layers[0].reset_states()

            agent.train_replay()
コード例 #2
0
ファイル: torch_run.py プロジェクト: DinoGi/tradebot
    sim = Simulator(orig_data, data, windowsize=windowsize)

    for e in range(EPISODES):
        # Write actions to log file
        score = 0
        state = Tensor(sim.reset())

        while not sim.sim_done():
            #state = Tensor(sim.state) # Get state
            action = agent.get_action(state)

            # Simulate trading
            #-----------
            max_idx = np.argmax(action[:3])  # Choose buy/sell/hold
            next_state, reward, done = sim.step(max_idx, action[3])
            next_state = Tensor(next_state)
            #-----------

            # save the sample <s, a, r, s'> to the replay memory
            agent.replay_memory(state, action, reward, next_state, done)
            state = next_state.clone()

            #loss = agent.train_replay()
            #losses.append(loss.data.numpy()[0])

            score += reward

            if done:
                print('done!')
                sim.reset()