def watch_result(episode, s_time, e_time, c_index, all_index, action, reward, profit): print('-------------------- Check -------------------------') print('start time: ' + s_time) print('counter : ', c_index, '/', all_index, ' of episode : ', episode, '/', EPISODES) print('action : ', action) print('current profit : ', profit * MARGIN) print('reward (all profit): ', reward) print('end_time: ' + e_time) print('-------------------End Check -----------------------') if __name__ == "__main__": agent = DQNAgent(state_size) #agent.load("agent_model.h5") num_index = all_index - state_size env = TrainEnvironment(X_train, num_index) batch_size = 32 for e in range(EPISODES): state = env.reset() state = np.reshape(state, (1, state_size, 1)) for t in range(end_index - start_index): start_time = str(datetime.datetime.now().time()) action = agent.act(state) next_state, reward, done = env.step(action) next_state = np.reshape(next_state, (1, state_size, 1)) agent.remember(state, action, reward, next_state, done) state = next_state
def watch_result(episode, s_time, e_time, c_index, all_index, action, reward, profit): print('-------------------- Check -------------------------') print('start time: ' + s_time) print('counter : ', c_index, '/', all_index, ' of episode : ', episode, '/', EPISODES) print('action : ', action) print('current profit : ', profit * MARGIN) print('reward (all profit): ', reward) print('end_time: ' + e_time) print('-------------------End Check -----------------------') if __name__ == "__main__": agent = DQNAgent(state_size) agent.load("agent_model.h5") num_index = all_index - state_size env = TrainEnvironment(X_train, num_index) batch_size = 3 test_profit = [] for e in range(EPISODES): state = env.reset() state = np.reshape(state, (1, state_size, 1)) test_profit = [] for t in range(end_index - start_index): start_time = str(datetime.datetime.now().time()) action = agent.act(state, False) # test next_state, reward, done = env.step(action) next_state = np.reshape(next_state, (1, state_size, 1))