def step(self) -> Tuple[float, float, float, float, float]: examples_test = self._stream_test.next() inputs_test, targets_test = zip(*examples_test) self.reward_test = smear(self.reward_test, self._stream_test.get_reward(), self._iterations) examples_train = self._stream_train.next() inputs_train, targets_train = zip(*examples_train) self.reward_train = smear(self.reward_train, self._stream_train.get_reward(), self._iterations) this_time = time.time() outputs_train = self._predictor.predict(inputs_train) self._predictor.fit(inputs_train, targets_train) self.duration = smear(self.duration, (time.time() - this_time) * 1000., self._iterations) outputs_test = self._predictor.predict(inputs_test) self.error_train = smear( self.error_train, self._stream_train.total_error(outputs_train, targets_train), self._iterations) self.error_test = smear( self.error_test, self._stream_train.total_error(outputs_test, targets_test), self._iterations) self._iterations += 1 return self.duration, self.error_train, self.error_test, self.reward_train, self.reward_test
def fit(self, in_value: float, out_value: float, drag: int): assert drag >= 0 for _r, _var_row in enumerate(self._var_matrix): for _c in range(self._degree + 1): _var_row[_c] = smear(_var_row[_c], in_value**(_r + _c), drag) self._cov_matrix[_r] = smear(self._cov_matrix[_r], out_value * in_value**_r, drag)
def _integrate(self, sensor: RATIONAL_SENSOR, motor: RATIONAL_MOTOR, reward: float): # todo: fix gradient descent! # todo: optimize with score function trick: http://www.youtube.com/watch?v=bRfUxQs6xIM&t=37m24s # todo: check out advantage: https://medium.freecodecamp.org/an-intro-to-advantage-actor-critic-methods-lets-play-sonic-the-hedgehog-86d6240171d iteration = self.get_iterations() if iteration >= 1: last_input = self._last_sensor + self._last_motor # evaluation S -> float evaluation = self._evaluation.output(sensor) evaluation_error = self._evaluation.fit( self._last_sensor, self._last_reward + self._gamma * evaluation) self.average_value_error = smear(self.average_value_error, evaluation_error, iteration - 1) # advantage S x M -> float last_advantage = self._last_reward + self._gamma * evaluation - self._evaluation.output( self._last_sensor) advantage_error = self._advantage.fit(last_input, last_advantage) self.average_advantage_error = smear(self.average_advantage_error, advantage_error, iteration - 1) # actor S -> M (consider advantage instead of critic value) best_known = tuple( clip(_m, *_ranges) for _m, _ranges in zip( self._actor.output(self._last_sensor), self._motor_range)) best_known_advantage = self._advantage.output(self._last_sensor + best_known) delta_eval = last_advantage - best_known_advantage delta_step = tuple(_l - _b for _l, _b in zip(self._last_motor, best_known)) better_motor = tuple( clip(smear(_b, _b + _d * delta_eval, self._past_scope), *_ranges) for _b, _d, _ranges in zip(best_known, delta_step, self._motor_range)) actor_errors = self._actor.fit(self._last_sensor, better_motor) self.average_actor_error = smear(self.average_actor_error, cartesian_distance(actor_errors), iteration - 1) self._last_sensor, self._last_motor = sensor, motor self._last_reward = reward
def integrate(self, perception: Optional[SENSOR_TYPE], action: MOTOR_TYPE, reward: float): self.average_reward = smear(self.average_reward, reward, self.__iteration) if perception is None: return self._integrate(perception, action, reward)
def test_3d(): from mpl_toolkits.mplot3d import Axes3D dim_range = -10., 10. plot_axis, error_axis = setup_3d_axes() r = MultiplePolynomialFromLinearRegression(2, 4, past_scope=100, learning_drag=0) # fun = lambda _x, _y: 10. + 1. * _x ** 1. + 1. * _y ** 1. + 4. * _x * _y + 1. * _x ** 2. + -2.6 * _y ** 2. fun = lambda _x, _y: -cos(_x / (1. * math.pi)) + -cos(_y / (1. * math.pi)) plot_surface(plot_axis, fun, (dim_range, dim_range), resize=True) #pyplot.pause(.001) #pyplot.draw() iterations = 0 error_development = deque(maxlen=10000) while True: x = random.uniform(*dim_range) y = random.uniform(*dim_range) z_o = r.output([x, y]) z_t = fun(x, y) error = 0 if iterations < 1 else smear(error_development[-1], abs(z_o - z_t), iterations) error_development.append(error) if Timer.time_passed(1000): print(f"{iterations:d} finished") c = r.get_coefficients() print(c) print(r.derive_coefficients(c, 0)) ln = plot_surface(plot_axis, lambda _x, _y: r.output([_x, _y]), (dim_range, dim_range), resize=False) # ln_d = plot_surface(plot_axis, lambda _x, _y: r.derivation_output([_x, _y], 0), (dim_range, dim_range), resize=False) e, = error_axis.plot(range(len(error_development)), error_development, color="black") error_axis.set_ylim( (min(error_development), max(error_development))) pyplot.pause(.001) pyplot.draw() ln.remove() # ln_d.remove() e.remove() r.fit([x, y], z_t) # , past_scope=iterations) iterations += 1
def fit(self, x: Tuple[float, ...], y: float): assert len(x) == self._input_dimensions if self._drag < 0: return dy = y - self._mean_y for _i, (_var_x, _cov_xy) in enumerate(zip(self._var_x, self._cov_xy)): _dx = x[_i] - self._mean_x[_i] # distance from mean x self._var_x[_i] = smear(_var_x, _dx**2., self._drag) self._cov_xy[_i] = smear(_cov_xy, _dx * dy, self._drag) self._var_y = smear(self._var_y, dy**2., self._drag) if 0 >= self._iterations: self._mean_x = list(x) self._mean_y = y for _i, (_mean_x, _x) in enumerate(zip(self._mean_x, x)): self._mean_x[_i] = smear(_mean_x, _x, self._drag) self._mean_y = smear(self._mean_y, y, self._drag) self._iterations = 1 # TODO: change
def fit(self, input_value: float, output_value: float, past_scope: int = -1, learning_drag: int = -1): assert self._past_scope >= 0 or past_scope >= 0 assert self._learning_drag >= 0 or learning_drag >= 0 _past_scope = max(self._past_scope, past_scope) _learning_drag = max(self._learning_drag, learning_drag) dy = output_value - self._mean_y dx = input_value - self._mean_x self._variance_x = smear(self._variance_x, dx**2., _past_scope) # remove smear? self._cross_variance_xy = smear(self._cross_variance_xy, dx * dy, _past_scope) self._mean_x = smear(self._mean_x, input_value, _past_scope) self._mean_y = smear(self._mean_y, output_value, _past_scope) self.gradient = smear( self.gradient, 0. if self._variance_x == 0. else self._cross_variance_xy / self._variance_x, _learning_drag)
def some_random_games_first(): # Each of these is its own game. # this is each frame, up to 200...but we wont make it that far. average_reward = 0. iterations = 0 sensor = None visualize = False while True: # This will display the environment # Only display if you really want to see it. # Takes much longer to display it. if average_reward >= .9: visualize = True if visualize: env.render() # This will just create a sample action in any environment. # In this environment, the action can be 0 or 1, which is left or right if sensor is None: # motor = env.action_space.sample() motor = 0., else: motor = controller.react(tuple(sensor)) # this executes the environment with an action, # and returns the observation of the environment, # the reward, if the env is over, and other info. state = env.step(numpy.array(motor)) sensor, reward, done, info = state # (x_pos, x_vel, theta_ang, theta_vel) controller.integrate(tuple(sensor), tuple(motor), reward) average_reward = smear(average_reward, reward, iterations) iterations += 1 if Timer.time_passed(2000): print(f"{iterations:010d} iterations, average reward: {average_reward:.2f}")
def fit(self, input_value: float, output_value: float, past_scope: int = -1, learning_drag: int = -1) -> float: assert self._past_scope >= 0 or past_scope >= 0 assert self._learning_drag >= 0 or learning_drag >= 0 _past_scope = max(self._past_scope, past_scope) _learning_drag = max(self._learning_drag, learning_drag) error = abs(self.output(input_value) - output_value) self.linear_gradient.fit(input_value, output_value, past_scope=_past_scope, learning_drag=_learning_drag) self.offset = smear( self.offset, self._mean_y - self.linear_gradient.gradient * self._mean_x, _learning_drag) return error
def test_2d(): dim_range = -10., 10. plot_axis, error_axis = setup_2d_axes() r = MultiplePolynomialFromLinearRegression(1, 3, -1) # r = MultiplePolynomialOptimizationRegression(1, 3) # fun = lambda _x: -cos(_x / (1. * math.pi)) fun = lambda _x: 0. + 0. * _x**1. + 0. * _x**2. + 1. * _x**3. # + 1. * _x ** 4. x_range = tuple( _x / 10. for _x in range(int(dim_range[0]) * 10, int(dim_range[1]) * 10)) y_range = tuple(fun(_x) for _x in x_range) plot_axis.plot(x_range, y_range, color="C0") plot_axis.set_xlim(*dim_range) iterations = 0 window_size = 100000 error_development = deque(maxlen=window_size) while True: x = random.uniform(*dim_range) y_o = r.output([x]) y_t = fun(x) error = 0 if iterations < 1 else smear(error_development[-1], abs(y_o - y_t), iterations) error_development.append(error) if Timer.time_passed(1000): print(f"{iterations:d} iterations finished") values = tuple(r.output([_x]) for _x in x_range) l, = plot_axis.plot(x_range, values, color="C1") values_d = tuple(r.derivation_output([_x], 0) for _x in x_range) l_d, = plot_axis.plot(x_range, values_d, color="C2") plot_axis.set_ylim( (min(values + values_d), max(values + values_d))) x_min = max(0, iterations - window_size) x_max = x_min + window_size error_axis.set_xlim((x_min, x_max)) x_min_limit = max(0, iterations - len(error_development)) e, = error_axis.plot(range(x_min_limit, x_min_limit + len(error_development)), error_development, color="black") error_axis.set_ylim( (min(error_development), max(error_development))) pyplot.pause(.001) pyplot.draw() l.remove() l_d.remove() e.remove() r.fit([x], y_t, past_scope=1000, learning_drag=0) iterations += 1
def test_3d(): fig = pyplot.figure() axis_3d = fig.add_subplot(221, projection='3d') axis_3d.set_xlabel("x") axis_3d.set_ylabel("y") axis_3d.set_zlabel("z") axis_2d_error = fig.add_subplot(222) axis_2d_error.set_xlabel("t") axis_2d_error.set_ylabel("error") axis_2d_yz = fig.add_subplot(223) axis_2d_yz.set_xlabel("y") axis_2d_yz.set_ylabel("z") axis_2d_xz = fig.add_subplot(224) axis_2d_xz.set_xlabel("x") axis_2d_xz.set_ylabel("z") # x_coefficients = 0., 1., # y_coefficients = 0., -1., # degree = number of coefficients - 1 x_coefficients = -375., 400., -140., 20., -1., y_coefficients = 375., -400., 140., -20., 1., # fun = poly_function(x_coefficients, y_coefficients) fun = trig_function() number_of_points = 1000 drag_value = number_of_points value_range = 0., 10. plot_surface(axis_3d, fun, value_range, colormap=cm.viridis) # , color="C0") # r = MultiplePolynomialRegressor([6, 6]) r = LinearRegressor(2, number_of_points) error = [] tar_z = [] out_z = [] z_min, z_max = .0, .0 for _t in range(number_of_points): p_x = random.uniform(*value_range) p_y = random.uniform(*value_range) input_values = p_x, p_y output_value = r.output(input_values) out_z.append((p_x, p_y, output_value)) p_z = fun(p_x, p_y) if _t < 1: z_min = p_z z_max = p_z else: z_min = min(z_min, p_z) z_max = max(z_max, p_z) tar_z.append((p_x, p_y, p_z)) e = abs(output_value - p_z) error_value = e if _t < 1 else smear(error[-1], e, _t) error.append(error_value) r.fit(input_values, p_z, drag_value) #fit_fun = poly_function(*r.get_parameters()) #fit_surface = plot_surface(axis_3d, fit_fun, value_range, colormap=cm.brg) #pyplot.pause(.001) #pyplot.draw() #if _t < number_of_points - 1: # fit_surface.remove() if Timer.time_passed(2000): print(f"{_t * 100. / number_of_points:05.2f}% finished.") margin = (z_max - z_min) * .1 axis_3d.set_zlim([z_min - margin, z_max + margin]) print(error[-1]) axis_3d.scatter(*zip(*out_z), alpha=.4, color="C1") # axis_3d.scatter(*zip(*tar_z), color="blue") # axis_3d.scatter(*zip(*points)) axis_2d_error.plot(error) axis_2d_yz.scatter([_p[1] for _p in tar_z], [_p[2] for _p in tar_z], color="C0") axis_2d_xz.scatter([_p[0] for _p in tar_z], [_p[2] for _p in tar_z], color="C0") fit_x_co, fit_y_co = r.get_parameters() # print((x_coefficients, y_coefficients)) # print((fit_x_co, fit_y_co)) # print() plot_line(axis_2d_xz, fit_x_co, value_range, color="C1") plot_line(axis_2d_yz, fit_y_co, value_range, color="C1") pyplot.tight_layout() pyplot.show()
def test_2d(): fig = pyplot.figure() plot_axis = fig.add_subplot(211) plot_axis.set_xlabel("x") plot_axis.set_ylabel("y") error_axis = fig.add_subplot(212) error_axis.set_xlabel("t") error_axis.set_ylabel("error") dim = -10., 10. # r = MyRegression(4) r = LinearRegressor(1, 100) # fun = lambda _x: ((_x*.35) ** 2. - 4.) * ((_x*.35) + 3.) * ((_x*.35) - 4.) ** 2. # fun = lambda _x: 1. * _x ** 4. + -4. * _x ** 2. + 3 * _x ** 1. fun = lambda _x: 2. * _x**3. + -5. * _x**2. + 23. * _x + -112. # fun = lambda _x: sin(.5 * _x / math.pi) X = tuple(_x / 10. for _x in range(int(dim[0] * 10.), int(dim[1] * 10.))) Y = tuple(fun(_x) for _x in X) x_max, x_min = max(X), min(X) x_margin = (x_max - x_min) * .1 y_max, y_min = max(Y), min(Y) y_margin = (y_max - y_min) * .1 plot_axis.set_xlim((x_min - x_margin, x_max + x_margin)) plot_axis.set_ylim((y_min - y_margin, y_max + y_margin)) plot_axis.plot(X, Y, color="C0") number_of_points = 1000000 error_axis.set_xlim((0, number_of_points)) drag = number_of_points predictions = [] training = [] E = [] error = 0 for _t in range(number_of_points): x = random.uniform(*dim) p = r.output((x, )) predictions.append((x, p)) y = fun(x) training.append((x, y)) r.fit((x, ), y) # , drag) error = smear(error, abs(p - y), _t) E.append(error) #fit_fun = lambda _x: sum(_c * _x ** _i for _i, _c in enumerate(r._weights)) #P = [fit_fun(_x) for _x in X] e_line, = error_axis.plot(E, color="black") #line, = plot_axis.plot(X, P, color="C1") sc = plot_axis.scatter(*zip(*predictions), alpha=.2, color="black") pyplot.pause(.001) pyplot.draw() if _t < number_of_points - 1: #line.remove() sc.remove() e_line.remove() pyplot.show()
def render(self, mode: str = "human"): screen_width = 600 screen_height = 400 car_width = 40 car_height = 20 def v_x(real_x: float) -> float: return screen_width * (real_x + math.pi) / (2. * math.pi) def v_y(real_y: float) -> float: return real_y * screen_height / 25. def render_track(fun: Callable[[float], float], value_range: Tuple[float, float]): x_range = numpy.linspace(*value_range, 100) y_values = numpy.array(tuple(v_y(fun(_y)) for _y in x_range)) track_data = list( zip((v_x(_x) for _x in x_range), (v_y(_y) for _y in y_values))) self._track = rendering.make_polyline(track_data) self._track.set_linewidth(4) self._viewer.add_geom(self._track) if self._viewer is None: from gym.envs.classic_control import rendering self._viewer = rendering.Viewer(screen_width, screen_height) render_track(self._hill, (-math.pi, math.pi)) clearance = 10 l, r, t, b = -car_width / 2, car_width / 2, car_height, 0 self._car = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)]) self._car.add_attr(rendering.Transform(translation=(0, clearance))) self._car_trans = rendering.Transform() self._car.add_attr(self._car_trans) self._viewer.add_geom(self._car) front_wheel = rendering.make_circle(car_height / 2.5) front_wheel.set_color(.5, .5, .5) front_wheel.add_attr( rendering.Transform(translation=(car_width / 4, clearance))) front_wheel.add_attr(self._car_trans) self._viewer.add_geom(front_wheel) back_wheel = rendering.make_circle(car_height / 2.5) back_wheel.add_attr( rendering.Transform(translation=(-car_width / 4, clearance))) back_wheel.add_attr(self._car_trans) back_wheel.set_color(.5, .5, .5) self._viewer.add_geom(back_wheel) if self._at_top: self._car_green = 1. else: self._car_green = smear(self._car_green, 0., 10) self._car.set_color(0., self._car_green, 0.) pos = v_x(self._location) self._car_trans.set_translation(pos, v_y(self._hill(self._location)) * 20.) self._car_trans.set_rotation(math.sin(self._location)) return self._viewer.render(return_rgb_array=mode == "rgb_array")
def _low_fit(self, data_in: Tuple[Tuple[RATIONAL_INPUT, ...], ...], data_out: Tuple[RATIONAL_OUTPUT, ...]): inertia = self._iteration if self._drag == 0 else self._drag for each_target, each_average in zip(data_out, self._average): for _i, (_t, _a) in enumerate(zip(each_target, each_average)): each_average[_i] = smear(_a, _t, inertia)