예제 #1
0
def watch_result(episode, s_time, e_time, c_index, all_index, action, reward,
                 profit):
    print('-------------------- Check -------------------------')
    print('start time: ' + s_time)
    print('counter : ', c_index, '/', all_index, ' of episode : ', episode,
          '/', EPISODES)
    print('action : ', action)
    print('current profit : ', profit * MARGIN)
    print('reward (all profit): ', reward)
    print('end_time: ' + e_time)
    print('-------------------End Check -----------------------')


if __name__ == "__main__":

    agent = DQNAgent(state_size)
    #agent.load("agent_model.h5")
    num_index = all_index - state_size
    env = TrainEnvironment(X_train, num_index)
    batch_size = 32
    for e in range(EPISODES):
        state = env.reset()
        state = np.reshape(state, (1, state_size, 1))

        for t in range(end_index - start_index):
            start_time = str(datetime.datetime.now().time())
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            next_state = np.reshape(next_state, (1, state_size, 1))
            agent.remember(state, action, reward, next_state, done)
            state = next_state
예제 #2
0
def watch_result(episode, s_time, e_time, c_index, all_index, action, reward,
                 profit):
    print('-------------------- Check -------------------------')
    print('start time: ' + s_time)
    print('counter : ', c_index, '/', all_index, ' of episode : ', episode,
          '/', EPISODES)
    print('action : ', action)
    print('current profit : ', profit * MARGIN)
    print('reward (all profit): ', reward)
    print('end_time: ' + e_time)
    print('-------------------End Check -----------------------')


if __name__ == "__main__":

    agent = DQNAgent(state_size)
    agent.load("agent_model.h5")
    num_index = all_index - state_size
    env = TrainEnvironment(X_train, num_index)
    batch_size = 3
    test_profit = []

    for e in range(EPISODES):
        state = env.reset()
        state = np.reshape(state, (1, state_size, 1))
        test_profit = []
        for t in range(end_index - start_index):
            start_time = str(datetime.datetime.now().time())
            action = agent.act(state, False)  # test
            next_state, reward, done = env.step(action)
            next_state = np.reshape(next_state, (1, state_size, 1))