loss=agent.loss) #print(action, reward) state = next_state if done: print('start', env.start, 'previous', (cash, nown), 'current', tuple(env.holdings)) print("episode: {}/{}, score: {}, e: {:.5}".format( e, EPISODES, time, agent.epsilon)) print('average_loss =', agent.loss / env.init['span']) f.write(str(agent.loss) + '\n') f.flush() agent.loss = 0 if e % 2 == 0: grapher.show(action_labels=env.action_labels, ep=e, t=time, e=agent.epsilon) grapher.reset() agent.save(save_string) break # if len(agent.memory) > batch_size: # agent.replay(batch_size) # Test if e % 2 == 0: state = test_env.reset() state = np.reshape(state, [1, state_size]) for time in range(500): cash, nown, price = test_env.holdings[ 0], test_env.holdings[1], test_env.state[-1] action = agent.act(state, time, is_test=True)
lines = [] updatexy(t) p[2] = y + 40 p[1] = x test.update_destination_point(p, s) lines = to_lines(test.joint_points) lines2 = to_lines(test.foward_model(test.joint_angle_only)) for _line in lines2: lines.append(_line) g.redraw(lines) # sys.stdout.write("\r" + test.return_model()) print test.return_model_for_low_level() test.send_serial() sys.stdout.flush() test = RoverArm([50, 40, 15]) test.update_destination_point([40, 0, 40], [1, 0, 0]) test.ros_begin() test.send_serial() lines = to_lines(test.joint_points) # test.establish_serial_connection() # test.serial_write() g = Grapher(lines) g.redraw(lines) while not test.my_rospy.is_shutdown(): g.show(anim)
model.model.fit(x_train, y_train) y_pred = model.model.predict(x_test) prev_prices, prices, pred = x_test[:, -1], y_test, y_pred.reshape( y_pred.shape[0]) if e % 100 == 0: for mode, action_labels in modes.items(): trader = Trader(mode) strategy = trader.to_strategy(prev_prices, pred) optimal = trader.to_strategy(prev_prices, prices) trader.run(strategy, prices, grapher=grapher) # f.write('{} accuracy_score={} balanced_accuracy_score={} average_precision_score={} f1_score={}\n'.format( # e, # accuracy_score(strategy, optimal), # balanced_accuracy_score(strategy, optimal), # average_precision_score(strategy, optimal), # f1_score(strategy, optimal))) grapher.action_labels = action_labels grapher.pred = pred[:-1] grapher.show(action_labels=action_labels, ep=e, span=span, w=window, mode=trader.mode) grapher.reset()