Ejemplo n.º 1
0
    def step(self) -> Tuple[float, float, float, float, float]:
        examples_test = self._stream_test.next()
        inputs_test, targets_test = zip(*examples_test)
        self.reward_test = smear(self.reward_test,
                                 self._stream_test.get_reward(),
                                 self._iterations)

        examples_train = self._stream_train.next()
        inputs_train, targets_train = zip(*examples_train)
        self.reward_train = smear(self.reward_train,
                                  self._stream_train.get_reward(),
                                  self._iterations)

        this_time = time.time()

        outputs_train = self._predictor.predict(inputs_train)
        self._predictor.fit(inputs_train, targets_train)

        self.duration = smear(self.duration, (time.time() - this_time) * 1000.,
                              self._iterations)

        outputs_test = self._predictor.predict(inputs_test)
        self.error_train = smear(
            self.error_train,
            self._stream_train.total_error(outputs_train, targets_train),
            self._iterations)
        self.error_test = smear(
            self.error_test,
            self._stream_train.total_error(outputs_test, targets_test),
            self._iterations)

        self._iterations += 1

        return self.duration, self.error_train, self.error_test, self.reward_train, self.reward_test
 def fit(self, in_value: float, out_value: float, drag: int):
     assert drag >= 0
     for _r, _var_row in enumerate(self._var_matrix):
         for _c in range(self._degree + 1):
             _var_row[_c] = smear(_var_row[_c], in_value**(_r + _c), drag)
         self._cov_matrix[_r] = smear(self._cov_matrix[_r],
                                      out_value * in_value**_r, drag)
Ejemplo n.º 3
0
    def _integrate(self, sensor: RATIONAL_SENSOR, motor: RATIONAL_MOTOR,
                   reward: float):
        # todo: fix gradient descent!
        # todo: optimize with score function trick: http://www.youtube.com/watch?v=bRfUxQs6xIM&t=37m24s
        # todo: check out advantage: https://medium.freecodecamp.org/an-intro-to-advantage-actor-critic-methods-lets-play-sonic-the-hedgehog-86d6240171d

        iteration = self.get_iterations()

        if iteration >= 1:
            last_input = self._last_sensor + self._last_motor

            # evaluation S -> float
            evaluation = self._evaluation.output(sensor)
            evaluation_error = self._evaluation.fit(
                self._last_sensor,
                self._last_reward + self._gamma * evaluation)
            self.average_value_error = smear(self.average_value_error,
                                             evaluation_error, iteration - 1)

            # advantage S x M -> float
            last_advantage = self._last_reward + self._gamma * evaluation - self._evaluation.output(
                self._last_sensor)
            advantage_error = self._advantage.fit(last_input, last_advantage)
            self.average_advantage_error = smear(self.average_advantage_error,
                                                 advantage_error,
                                                 iteration - 1)

            # actor S -> M (consider advantage instead of critic value)
            best_known = tuple(
                clip(_m, *_ranges) for _m, _ranges in zip(
                    self._actor.output(self._last_sensor), self._motor_range))
            best_known_advantage = self._advantage.output(self._last_sensor +
                                                          best_known)
            delta_eval = last_advantage - best_known_advantage
            delta_step = tuple(_l - _b
                               for _l, _b in zip(self._last_motor, best_known))
            better_motor = tuple(
                clip(smear(_b, _b +
                           _d * delta_eval, self._past_scope), *_ranges)
                for _b, _d, _ranges in zip(best_known, delta_step,
                                           self._motor_range))
            actor_errors = self._actor.fit(self._last_sensor, better_motor)
            self.average_actor_error = smear(self.average_actor_error,
                                             cartesian_distance(actor_errors),
                                             iteration - 1)

        self._last_sensor, self._last_motor = sensor, motor
        self._last_reward = reward
Ejemplo n.º 4
0
 def integrate(self, perception: Optional[SENSOR_TYPE], action: MOTOR_TYPE,
               reward: float):
     self.average_reward = smear(self.average_reward, reward,
                                 self.__iteration)
     if perception is None:
         return
     self._integrate(perception, action, reward)
Ejemplo n.º 5
0
def test_3d():
    from mpl_toolkits.mplot3d import Axes3D

    dim_range = -10., 10.

    plot_axis, error_axis = setup_3d_axes()

    r = MultiplePolynomialFromLinearRegression(2,
                                               4,
                                               past_scope=100,
                                               learning_drag=0)

    # fun = lambda _x, _y: 10. + 1. * _x ** 1. + 1. * _y ** 1. + 4. * _x * _y + 1. * _x ** 2. + -2.6 * _y ** 2.
    fun = lambda _x, _y: -cos(_x / (1. * math.pi)) + -cos(_y / (1. * math.pi))
    plot_surface(plot_axis, fun, (dim_range, dim_range), resize=True)
    #pyplot.pause(.001)
    #pyplot.draw()

    iterations = 0

    error_development = deque(maxlen=10000)

    while True:
        x = random.uniform(*dim_range)
        y = random.uniform(*dim_range)
        z_o = r.output([x, y])

        z_t = fun(x, y)
        error = 0 if iterations < 1 else smear(error_development[-1],
                                               abs(z_o - z_t), iterations)
        error_development.append(error)

        if Timer.time_passed(1000):
            print(f"{iterations:d} finished")

            c = r.get_coefficients()
            print(c)
            print(r.derive_coefficients(c, 0))

            ln = plot_surface(plot_axis,
                              lambda _x, _y: r.output([_x, _y]),
                              (dim_range, dim_range),
                              resize=False)
            # ln_d = plot_surface(plot_axis, lambda _x, _y: r.derivation_output([_x, _y], 0), (dim_range, dim_range), resize=False)
            e, = error_axis.plot(range(len(error_development)),
                                 error_development,
                                 color="black")
            error_axis.set_ylim(
                (min(error_development), max(error_development)))

            pyplot.pause(.001)
            pyplot.draw()

            ln.remove()
            # ln_d.remove()
            e.remove()

        r.fit([x, y], z_t)  # , past_scope=iterations)
        iterations += 1
    def fit(self, x: Tuple[float, ...], y: float):
        assert len(x) == self._input_dimensions
        if self._drag < 0:
            return

        dy = y - self._mean_y
        for _i, (_var_x, _cov_xy) in enumerate(zip(self._var_x, self._cov_xy)):
            _dx = x[_i] - self._mean_x[_i]  # distance from mean x
            self._var_x[_i] = smear(_var_x, _dx**2., self._drag)
            self._cov_xy[_i] = smear(_cov_xy, _dx * dy, self._drag)

        self._var_y = smear(self._var_y, dy**2., self._drag)

        if 0 >= self._iterations:
            self._mean_x = list(x)
            self._mean_y = y

        for _i, (_mean_x, _x) in enumerate(zip(self._mean_x, x)):
            self._mean_x[_i] = smear(_mean_x, _x, self._drag)

        self._mean_y = smear(self._mean_y, y, self._drag)
        self._iterations = 1  # TODO: change
Ejemplo n.º 7
0
    def fit(self,
            input_value: float,
            output_value: float,
            past_scope: int = -1,
            learning_drag: int = -1):
        assert self._past_scope >= 0 or past_scope >= 0
        assert self._learning_drag >= 0 or learning_drag >= 0
        _past_scope = max(self._past_scope, past_scope)
        _learning_drag = max(self._learning_drag, learning_drag)

        dy = output_value - self._mean_y
        dx = input_value - self._mean_x
        self._variance_x = smear(self._variance_x, dx**2.,
                                 _past_scope)  # remove smear?
        self._cross_variance_xy = smear(self._cross_variance_xy, dx * dy,
                                        _past_scope)

        self._mean_x = smear(self._mean_x, input_value, _past_scope)
        self._mean_y = smear(self._mean_y, output_value, _past_scope)

        self.gradient = smear(
            self.gradient,
            0. if self._variance_x == 0. else self._cross_variance_xy /
            self._variance_x, _learning_drag)
Ejemplo n.º 8
0
    def some_random_games_first():
        # Each of these is its own game.
        # this is each frame, up to 200...but we wont make it that far.

        average_reward = 0.
        iterations = 0
        sensor = None
        visualize = False
        while True:
            # This will display the environment
            # Only display if you really want to see it.
            # Takes much longer to display it.
            if average_reward >= .9:
                visualize = True
            if visualize:
                env.render()

            # This will just create a sample action in any environment.
            # In this environment, the action can be 0 or 1, which is left or right

            if sensor is None:
                # motor = env.action_space.sample()
                motor = 0.,
            else:
                motor = controller.react(tuple(sensor))

            # this executes the environment with an action,
            # and returns the observation of the environment,
            # the reward, if the env is over, and other info.
            state = env.step(numpy.array(motor))
            sensor, reward, done, info = state

            # (x_pos, x_vel, theta_ang, theta_vel)

            controller.integrate(tuple(sensor), tuple(motor), reward)

            average_reward = smear(average_reward, reward, iterations)
            iterations += 1

            if Timer.time_passed(2000):
                print(f"{iterations:010d} iterations, average reward: {average_reward:.2f}")
Ejemplo n.º 9
0
    def fit(self,
            input_value: float,
            output_value: float,
            past_scope: int = -1,
            learning_drag: int = -1) -> float:
        assert self._past_scope >= 0 or past_scope >= 0
        assert self._learning_drag >= 0 or learning_drag >= 0
        _past_scope = max(self._past_scope, past_scope)
        _learning_drag = max(self._learning_drag, learning_drag)

        error = abs(self.output(input_value) - output_value)
        self.linear_gradient.fit(input_value,
                                 output_value,
                                 past_scope=_past_scope,
                                 learning_drag=_learning_drag)
        self.offset = smear(
            self.offset,
            self._mean_y - self.linear_gradient.gradient * self._mean_x,
            _learning_drag)

        return error
Ejemplo n.º 10
0
def test_2d():
    dim_range = -10., 10.

    plot_axis, error_axis = setup_2d_axes()

    r = MultiplePolynomialFromLinearRegression(1, 3, -1)
    # r = MultiplePolynomialOptimizationRegression(1, 3)

    # fun = lambda _x: -cos(_x / (1. * math.pi))
    fun = lambda _x: 0. + 0. * _x**1. + 0. * _x**2. + 1. * _x**3.  #  + 1. * _x ** 4.
    x_range = tuple(
        _x / 10.
        for _x in range(int(dim_range[0]) * 10,
                        int(dim_range[1]) * 10))
    y_range = tuple(fun(_x) for _x in x_range)
    plot_axis.plot(x_range, y_range, color="C0")
    plot_axis.set_xlim(*dim_range)

    iterations = 0

    window_size = 100000
    error_development = deque(maxlen=window_size)

    while True:
        x = random.uniform(*dim_range)
        y_o = r.output([x])

        y_t = fun(x)
        error = 0 if iterations < 1 else smear(error_development[-1],
                                               abs(y_o - y_t), iterations)
        error_development.append(error)

        if Timer.time_passed(1000):
            print(f"{iterations:d} iterations finished")

            values = tuple(r.output([_x]) for _x in x_range)
            l, = plot_axis.plot(x_range, values, color="C1")
            values_d = tuple(r.derivation_output([_x], 0) for _x in x_range)
            l_d, = plot_axis.plot(x_range, values_d, color="C2")
            plot_axis.set_ylim(
                (min(values + values_d), max(values + values_d)))

            x_min = max(0, iterations - window_size)
            x_max = x_min + window_size
            error_axis.set_xlim((x_min, x_max))

            x_min_limit = max(0, iterations - len(error_development))
            e, = error_axis.plot(range(x_min_limit,
                                       x_min_limit + len(error_development)),
                                 error_development,
                                 color="black")
            error_axis.set_ylim(
                (min(error_development), max(error_development)))

            pyplot.pause(.001)
            pyplot.draw()

            l.remove()
            l_d.remove()
            e.remove()

        r.fit([x], y_t, past_scope=1000, learning_drag=0)

        iterations += 1
def test_3d():
    fig = pyplot.figure()
    axis_3d = fig.add_subplot(221, projection='3d')
    axis_3d.set_xlabel("x")
    axis_3d.set_ylabel("y")
    axis_3d.set_zlabel("z")

    axis_2d_error = fig.add_subplot(222)
    axis_2d_error.set_xlabel("t")
    axis_2d_error.set_ylabel("error")

    axis_2d_yz = fig.add_subplot(223)
    axis_2d_yz.set_xlabel("y")
    axis_2d_yz.set_ylabel("z")

    axis_2d_xz = fig.add_subplot(224)
    axis_2d_xz.set_xlabel("x")
    axis_2d_xz.set_ylabel("z")

    # x_coefficients = 0., 1.,
    # y_coefficients = 0., -1.,

    # degree = number of coefficients - 1
    x_coefficients = -375., 400., -140., 20., -1.,
    y_coefficients = 375., -400., 140., -20., 1.,

    # fun = poly_function(x_coefficients, y_coefficients)
    fun = trig_function()

    number_of_points = 1000
    drag_value = number_of_points

    value_range = 0., 10.

    plot_surface(axis_3d, fun, value_range,
                 colormap=cm.viridis)  # , color="C0")

    # r = MultiplePolynomialRegressor([6, 6])
    r = LinearRegressor(2, number_of_points)

    error = []
    tar_z = []
    out_z = []
    z_min, z_max = .0, .0
    for _t in range(number_of_points):
        p_x = random.uniform(*value_range)
        p_y = random.uniform(*value_range)

        input_values = p_x, p_y
        output_value = r.output(input_values)

        out_z.append((p_x, p_y, output_value))

        p_z = fun(p_x, p_y)
        if _t < 1:
            z_min = p_z
            z_max = p_z
        else:
            z_min = min(z_min, p_z)
            z_max = max(z_max, p_z)

        tar_z.append((p_x, p_y, p_z))

        e = abs(output_value - p_z)
        error_value = e if _t < 1 else smear(error[-1], e, _t)
        error.append(error_value)

        r.fit(input_values, p_z, drag_value)

        #fit_fun = poly_function(*r.get_parameters())
        #fit_surface = plot_surface(axis_3d, fit_fun, value_range, colormap=cm.brg)
        #pyplot.pause(.001)
        #pyplot.draw()
        #if _t < number_of_points - 1:
        #    fit_surface.remove()
        if Timer.time_passed(2000):
            print(f"{_t * 100. / number_of_points:05.2f}% finished.")

    margin = (z_max - z_min) * .1
    axis_3d.set_zlim([z_min - margin, z_max + margin])
    print(error[-1])
    axis_3d.scatter(*zip(*out_z), alpha=.4, color="C1")
    # axis_3d.scatter(*zip(*tar_z), color="blue")

    # axis_3d.scatter(*zip(*points))
    axis_2d_error.plot(error)
    axis_2d_yz.scatter([_p[1] for _p in tar_z], [_p[2] for _p in tar_z],
                       color="C0")
    axis_2d_xz.scatter([_p[0] for _p in tar_z], [_p[2] for _p in tar_z],
                       color="C0")

    fit_x_co, fit_y_co = r.get_parameters()

    # print((x_coefficients, y_coefficients))
    # print((fit_x_co, fit_y_co))
    # print()
    plot_line(axis_2d_xz, fit_x_co, value_range, color="C1")
    plot_line(axis_2d_yz, fit_y_co, value_range, color="C1")

    pyplot.tight_layout()
    pyplot.show()
def test_2d():
    fig = pyplot.figure()
    plot_axis = fig.add_subplot(211)
    plot_axis.set_xlabel("x")
    plot_axis.set_ylabel("y")

    error_axis = fig.add_subplot(212)
    error_axis.set_xlabel("t")
    error_axis.set_ylabel("error")

    dim = -10., 10.
    # r = MyRegression(4)
    r = LinearRegressor(1, 100)

    # fun = lambda _x: ((_x*.35) ** 2. - 4.) * ((_x*.35) + 3.) * ((_x*.35) - 4.) ** 2.
    # fun = lambda _x: 1. * _x ** 4. + -4. * _x ** 2. + 3 * _x ** 1.
    fun = lambda _x: 2. * _x**3. + -5. * _x**2. + 23. * _x + -112.
    # fun = lambda _x: sin(.5 * _x / math.pi)

    X = tuple(_x / 10. for _x in range(int(dim[0] * 10.), int(dim[1] * 10.)))
    Y = tuple(fun(_x) for _x in X)

    x_max, x_min = max(X), min(X)
    x_margin = (x_max - x_min) * .1

    y_max, y_min = max(Y), min(Y)
    y_margin = (y_max - y_min) * .1

    plot_axis.set_xlim((x_min - x_margin, x_max + x_margin))
    plot_axis.set_ylim((y_min - y_margin, y_max + y_margin))
    plot_axis.plot(X, Y, color="C0")

    number_of_points = 1000000
    error_axis.set_xlim((0, number_of_points))
    drag = number_of_points

    predictions = []
    training = []

    E = []
    error = 0
    for _t in range(number_of_points):
        x = random.uniform(*dim)
        p = r.output((x, ))
        predictions.append((x, p))

        y = fun(x)
        training.append((x, y))

        r.fit((x, ), y)  # , drag)

        error = smear(error, abs(p - y), _t)

        E.append(error)

        #fit_fun = lambda _x: sum(_c * _x ** _i for _i, _c in enumerate(r._weights))
        #P = [fit_fun(_x) for _x in X]

        e_line, = error_axis.plot(E, color="black")
        #line, = plot_axis.plot(X, P, color="C1")
        sc = plot_axis.scatter(*zip(*predictions), alpha=.2, color="black")

        pyplot.pause(.001)
        pyplot.draw()

        if _t < number_of_points - 1:
            #line.remove()
            sc.remove()
            e_line.remove()

    pyplot.show()
    def render(self, mode: str = "human"):
        screen_width = 600
        screen_height = 400

        car_width = 40
        car_height = 20

        def v_x(real_x: float) -> float:
            return screen_width * (real_x + math.pi) / (2. * math.pi)

        def v_y(real_y: float) -> float:
            return real_y * screen_height / 25.

        def render_track(fun: Callable[[float], float],
                         value_range: Tuple[float, float]):
            x_range = numpy.linspace(*value_range, 100)
            y_values = numpy.array(tuple(v_y(fun(_y)) for _y in x_range))
            track_data = list(
                zip((v_x(_x) for _x in x_range), (v_y(_y) for _y in y_values)))

            self._track = rendering.make_polyline(track_data)
            self._track.set_linewidth(4)
            self._viewer.add_geom(self._track)

        if self._viewer is None:
            from gym.envs.classic_control import rendering
            self._viewer = rendering.Viewer(screen_width, screen_height)

            render_track(self._hill, (-math.pi, math.pi))

            clearance = 10

            l, r, t, b = -car_width / 2, car_width / 2, car_height, 0
            self._car = rendering.FilledPolygon([(l, b), (l, t), (r, t),
                                                 (r, b)])
            self._car.add_attr(rendering.Transform(translation=(0, clearance)))

            self._car_trans = rendering.Transform()
            self._car.add_attr(self._car_trans)
            self._viewer.add_geom(self._car)
            front_wheel = rendering.make_circle(car_height / 2.5)
            front_wheel.set_color(.5, .5, .5)
            front_wheel.add_attr(
                rendering.Transform(translation=(car_width / 4, clearance)))
            front_wheel.add_attr(self._car_trans)
            self._viewer.add_geom(front_wheel)

            back_wheel = rendering.make_circle(car_height / 2.5)
            back_wheel.add_attr(
                rendering.Transform(translation=(-car_width / 4, clearance)))
            back_wheel.add_attr(self._car_trans)
            back_wheel.set_color(.5, .5, .5)
            self._viewer.add_geom(back_wheel)

        if self._at_top:
            self._car_green = 1.
        else:
            self._car_green = smear(self._car_green, 0., 10)

        self._car.set_color(0., self._car_green, 0.)

        pos = v_x(self._location)
        self._car_trans.set_translation(pos,
                                        v_y(self._hill(self._location)) * 20.)
        self._car_trans.set_rotation(math.sin(self._location))

        return self._viewer.render(return_rgb_array=mode == "rgb_array")
    def _low_fit(self, data_in: Tuple[Tuple[RATIONAL_INPUT, ...], ...], data_out: Tuple[RATIONAL_OUTPUT, ...]):
        inertia = self._iteration if self._drag == 0 else self._drag

        for each_target, each_average in zip(data_out, self._average):
            for _i, (_t, _a) in enumerate(zip(each_target, each_average)):
                each_average[_i] = smear(_a, _t, inertia)