def init(): # set up viewing parameters gl.glMatrixMode(gl.GL_PROJECTION) glu.gluPerspective(60.0, 1.0, 1.0, 100.0) gl.glMatrixMode(gl.GL_MODELVIEW) gl.glEnable(gl.GL_DEPTH_TEST) # set wireframe mode #gl.glPolygonMode(gl.GL_FRONT_AND_BACK, gl.GL_LINE) # set up lighting gl.glEnable(gl.GL_LIGHTING) gl.glEnable(gl.GL_COLOR_MATERIAL) gl.glEnable(gl.GL_LIGHT0) gl.glLightfv(gl.GL_LIGHT0, gl.GL_POSITION, [-5.0, 4.0, 2.0]) # time counter global lastTime lastTime = glut.glutGet(glut.GLUT_ELAPSED_TIME) # set up a few bodies to simulate c = rigidbody.cube() c.position = np.array([2.0, 0.0, 0.0]) c.mass = 0.0 c.color = np.array([1.0, 0.0, 0.0]) c.apply_force(np.array([0.00, 30.0, 0.0])) c.apply_force_at(np.array([0.0, 10.0, 0.0]), np.array([2.1, -0.1, 0.1])) bodies.append(c) c2 = rigidbody.sphere(0.5) c2.position = np.array([-2.0, -3.0, 0.0]) c2.mass = 1.0 c2.color = np.array([0.0, 0.0, 1.0]) #c2.apply_force_at(np.array([0.0, 50.0, 0.0]), np.array([-2.1, -0.1, 0.0])) bodies.append(c2) c3 = rigidbody.cube() c3.position = np.array([-4.0, -1.0, 0.0]) c3.mass = 3.0 c3.color = np.array([0.0, 0.0, 1.0]) bodies.append(c3) #s = spring.spring(2.0, c, np.array([1.5, 0.5, 0.5]), c2, np.array([-1.5, 0.5, 0.5])) s = spring.spring(5.0, c, np.array([2.0, -0.5, 0.0]), c2, np.array([-2.1, -2.5, 0.2])) s.length = 2.0 springs.append(s) s2 = spring.spring(5.0, c2, c2.position, c3, c3.position + np.array([0.3, 0.5, 0.4])) springs.append(s2)
def __generate_springs_diagonal(self): for x in xrange(self.__width_x - 1): for y in xrange(self.__width_y - 1): self.__springs.append( spring.spring(self.__spring_stiffness, self.__particles[x][y], self.__particles[x][y].position, self.__particles[x + 1][y + 1], self.__particles[x + 1][y + 1].position)) self.__springs.append( spring.spring(self.__spring_stiffness, self.__particles[x + 1][y], self.__particles[x + 1][y].position, self.__particles[x][y + 1], self.__particles[x][y + 1].position))
def update(self, start, end): self.start = start self.end = end self.x, self.y, self.p1, self.p2 = spring(self.start, self.end, self.nodes, self.width, self.lead1, self.lead2) self.p1 = (int(self.p1[0]), int(self.p1[1])) self.p2 = (int(self.p2[0]), int(self.p2[1]))
def simulate(self, environment: Environment, strategy: list): """ Simulate over a dataset, given a strategy and an environment. :param environment: the environment for the simulation :param strategy: strategy data structure to be used in the simulation :return: """ done = False total_reward = 0. self.params.debug = True state = environment.reset() stop_drop = spring(self.params, environment.price_) self.log.debug('STARTING Simulation') while not done: action = environment.decide_next_action(state, strategy) action = stop_drop.correction(action, environment) next_state, reward, done, _ = environment.step(action) total_reward += reward state = next_state # Do I need to init a portfolio, after a simulation if self.params.init_portfolio: environment.save_portfolio(init=True) # display the result of the simulation (maybe only the totals summ) if self.params.totals is True: self.params.display.report_totals(environment.memory.results, self.params.mode) else: self.params.display.summary(environment.memory.results, do_plot=self.params.do_plot) return 0
def reinforce_learn(self, env: Environment): """ Implements the learning loop over the states, actions and strategies to learn what is the sequence of actions that maximize reward. :param env: the environment :return: avg_rewards, avg_loss, avg_mae, last_profit """ rl_stats = RLStats() epsilon = self.params.epsilon stop_drop = spring(self.params, env.price_) # Loop over 'num_episodes' self.log.debug('Loop over {} episodes'.format( self.params.num_episodes)) for episode in range(self.params.num_episodes): state = env.reset() done = False rl_stats.reset() episode_step = 0 while not done: self.log.debug('-----------') # Decide whether generating random action or predict most # likely from the give state. action = self.epsilon_greedy(epsilon, state) # Experimental: Insert the behavior defined by stop_drop # overriding random choice or predictions. action = stop_drop.correction(action, env) # Send the action to the environment and get new state, # reward and information on whether we've finish. new_state, reward, done, _ = env.step(action) self.experience.append( (state, action, reward, new_state, done)) if self.time_to_learn(episode, episode_step): loss, mae = self.nn.do_learn(episode, episode_step, self.experience) rl_stats.step(loss, mae, reward) # Update states and metrics state = new_state episode_step += 1 self.display.rl_train_report(episode, episode_step, rl_stats.avg_rewards, rl_stats.last_avg, rl_stats.start) # Update average metrics rl_stats.update(self.params.num_episodes, env.memory.results.profit.iloc[-1]) # Epsilon decays here if epsilon >= self.params.epsilon_min: epsilon *= self.params.decay_factor self.log.debug('Updated epsilon: {:.2f}'.format(epsilon)) return rl_stats.avg_rewards, rl_stats.avg_loss, \ rl_stats.avg_mae, rl_stats.avg_profit
def main(): with open("./test_data.pkl", "rb") as f: X, Y = pickle.load(f) pathes = [] plt.plot(X, label="data") plt.plot(Y, label="query") for path, cost in spring(X, Y, 80): plt.plot(path[:, 0], X[path[:, 0]], C="C2", label="matched") pathes.append(path) plt.legend() plt.show() print(len(pathes))
def __generate_springs(self): # generate grid without the last row/column for x in xrange(self.__width_x - 1): for y in xrange(self.__width_y - 1): self.__springs.append( spring.spring(self.__spring_stiffness, self.__particles[x][y], self.__particles[x][y].position, self.__particles[x + 1][y], self.__particles[x + 1][y].position)) self.__springs.append( spring.spring(self.__spring_stiffness, self.__particles[x][y], self.__particles[x][y].position, self.__particles[x][y + 1], self.__particles[x][y + 1].position)) # last row for x in xrange(self.__width_x - 1): self.__springs.append( spring.spring( self.__spring_stiffness, self.__particles[x][self.__width_y - 1], self.__particles[x][self.__width_y - 1].position, self.__particles[x + 1][self.__width_y - 1], self.__particles[x + 1][self.__width_y - 1].position)) # last column for y in xrange(self.__width_y - 1): self.__springs.append( spring.spring( self.__spring_stiffness, self.__particles[self.__width_x - 1][y], self.__particles[self.__width_x - 1][y].position, self.__particles[self.__width_x - 1][y + 1], self.__particles[self.__width_x - 1][y + 1].position))
def __generate_springs_bending(self): for x in xrange(self.__width_x - 2): for y in xrange(self.__width_y - 2): self.__springs.append( spring.spring(self.__spring_stiffness, self.__particles[x][y], self.__particles[x][y].position, self.__particles[x][y + 2], self.__particles[x][y + 2].position)) self.__springs.append( spring.spring(self.__spring_stiffness, self.__particles[x][y], self.__particles[x][y].position, self.__particles[x + 2][y], self.__particles[x + 2][y].position)) # last rows for x in xrange(self.__width_x - 2): self.__springs.append( spring.spring( self.__spring_stiffness, self.__particles[x][self.__width_y - 2], self.__particles[x][self.__width_y - 2].position, self.__particles[x + 2][self.__width_y - 2], self.__particles[x + 2][self.__width_y - 2].position)) self.__springs.append( spring.spring( self.__spring_stiffness, self.__particles[x][self.__width_y - 1], self.__particles[x][self.__width_y - 1].position, self.__particles[x + 2][self.__width_y - 1], self.__particles[x + 2][self.__width_y - 1].position)) # last columns for y in xrange(self.__width_y - 2): self.__springs.append( spring.spring( self.__spring_stiffness, self.__particles[self.__width_x - 2][y], self.__particles[self.__width_x - 2][y].position, self.__particles[self.__width_x - 2][y + 2], self.__particles[self.__width_x - 2][y + 2].position)) self.__springs.append( spring.spring( self.__spring_stiffness, self.__particles[self.__width_x - 2][y], self.__particles[self.__width_x - 2][y].position, self.__particles[self.__width_x - 2][y + 2], self.__particles[self.__width_x - 2][y + 2].position))
def single_step(self, environment: Environment, strategy): """ Simulate a single step, given a strategy and an environment. :param environment: the environment for the simulation :param strategy: strategy data structure to be used in the simulation :return: None """ state = environment.resume() if state == -1: self.log.warn( 'Portfolio({}) and forecast({}) are in same state(len)'.format( self.params.portfolio_name, self.params.forecast_file)) self.log.warn('No action/recommendation produced by environment') return -1 action = environment.decide_next_action(state, strategy) self.log.info('Decided action is: {}'.format(action)) if self.params.stop_drop is True: # is this a failed action? is_failed_action = environment.portfolio.failed_action( action, environment.price_) action = spring(self.params, environment.price_).check(action, environment.price_, is_failed_action) next_state, reward, done, _ = environment.step(action) # Save the action to the tmp file. last_action = environment.memory.results.iloc[-1]['action'] self.log.info('Last action is: {}'.format(last_action)) pd.Series({'action': last_action}).to_json(self.params.json_action) self.log.info('Saved action to: {}'.format(self.params.json_action)) # Save the updated portfolio, overwriting the file. if self.params.no_dump is not True: environment.save_portfolio() return 0