def functionality_nominal(sequence: Sequence[Hashable]) -> float: examples = dict() for _t, (each_input, each_output) in enumerate(sequence): sub_dict = examples.get(each_input) if sub_dict is None: sub_dict = {each_output: 1} examples[each_input] = sub_dict else: sub_dict[each_output] = sub_dict.get(each_output, 0) + 1 if Timer.time_passed(2000): print("{:05d} examples processed...".format(_t)) best = 0 total = 0 for _i, (each_input, output_frequencies) in enumerate(examples.items()): frequencies = output_frequencies.values() max_frequency = max(frequencies) best += max_frequency total += sum(frequencies) if Timer.time_passed(2000): print("{:05d} examples processed...".format(total)) return best / total
def test_3d(): from mpl_toolkits.mplot3d import Axes3D dim_range = -10., 10. plot_axis, error_axis = setup_3d_axes() r = MultiplePolynomialFromLinearRegression(2, 4, past_scope=100, learning_drag=0) # fun = lambda _x, _y: 10. + 1. * _x ** 1. + 1. * _y ** 1. + 4. * _x * _y + 1. * _x ** 2. + -2.6 * _y ** 2. fun = lambda _x, _y: -cos(_x / (1. * math.pi)) + -cos(_y / (1. * math.pi)) plot_surface(plot_axis, fun, (dim_range, dim_range), resize=True) #pyplot.pause(.001) #pyplot.draw() iterations = 0 error_development = deque(maxlen=10000) while True: x = random.uniform(*dim_range) y = random.uniform(*dim_range) z_o = r.output([x, y]) z_t = fun(x, y) error = 0 if iterations < 1 else smear(error_development[-1], abs(z_o - z_t), iterations) error_development.append(error) if Timer.time_passed(1000): print(f"{iterations:d} finished") c = r.get_coefficients() print(c) print(r.derive_coefficients(c, 0)) ln = plot_surface(plot_axis, lambda _x, _y: r.output([_x, _y]), (dim_range, dim_range), resize=False) # ln_d = plot_surface(plot_axis, lambda _x, _y: r.derivation_output([_x, _y], 0), (dim_range, dim_range), resize=False) e, = error_axis.plot(range(len(error_development)), error_development, color="black") error_axis.set_ylim( (min(error_development), max(error_development))) pyplot.pause(.001) pyplot.draw() ln.remove() # ln_d.remove() e.remove() r.fit([x, y], z_t) # , past_scope=iterations) iterations += 1
def _plot_h_stacked_bars(axis: pyplot.Axes.axes, segments: Sequence[Sequence[Tuple[Any, float]]]): for _i, each_level in enumerate(segments): for _x in range(len(each_level) - 1): each_left, each_shape = each_level[_x] each_right, _ = each_level[_x + 1] each_width = each_right - each_left hsv = distribute_circular(each_shape), .2, 1. axis.barh(_i, each_width, height=1., align="edge", left=each_left, color=hsv_to_rgb(hsv)) if Timer.time_passed(2000): print("Finished {:5.2f}% of plotting level {:d}/{:d}...".format(100. * _x / (len(each_level) - 1), _i, len(segments)))
def new_setup(): iterations = 500000 no_ex = 1 cryptos = "qtum", "bnt", "snt", "eos" train_in_cryptos = cryptos[:1] train_out_crypto = cryptos[0] test_in_cryptos = cryptos[:1] test_out_crypto = cryptos[0] in_dim = len(train_in_cryptos) out_dim = 1 start_stamp = 1501113780 end_stamp = 1532508240 behind = 60 predictor = RationalSemioticModel(input_dimension=in_dim, output_dimension=out_dim, no_examples=no_ex, alpha=100, sigma=.2, drag=100, trace_length=1) training_streams = exchange_rate_sequence(start_stamp, end_stamp - behind, behind, train_in_cryptos, train_out_crypto) test_streams = exchange_rate_sequence(start_stamp + behind, end_stamp, behind, test_in_cryptos, test_out_crypto) setup = SetupPrediction("test", predictor, training_streams, test_streams, logging_steps=iterations // 1000) for _i in range(iterations): data = next(setup) if Timer.time_passed(2000): print( f"finished {(_i + 1) * 100 / iterations:5.2f}%...\n{str(data):s}\n" )
def some_random_games_first(): # Each of these is its own game. # this is each frame, up to 200...but we wont make it that far. average_reward = 0. iterations = 0 sensor = None visualize = False while True: # This will display the environment # Only display if you really want to see it. # Takes much longer to display it. if average_reward >= .9: visualize = True if visualize: env.render() # This will just create a sample action in any environment. # In this environment, the action can be 0 or 1, which is left or right if sensor is None: # motor = env.action_space.sample() motor = 0., else: motor = controller.react(tuple(sensor)) # this executes the environment with an action, # and returns the observation of the environment, # the reward, if the env is over, and other info. state = env.step(numpy.array(motor)) sensor, reward, done, info = state # (x_pos, x_vel, theta_ang, theta_vel) controller.integrate(tuple(sensor), tuple(motor), reward) average_reward = smear(average_reward, reward, iterations) iterations += 1 if Timer.time_passed(2000): print(f"{iterations:010d} iterations, average reward: {average_reward:.2f}")
def _get_segments(time_axis: Sequence[Any], states: List[Tuple[int, ...]]) -> Tuple[Sequence[Tuple[int, Any]], ...]: assert(len(time_axis) == len(states)) max_level = max(len(_x) for _x in states) levels = tuple([] for _ in range(max_level)) for _j, (each_time, each_context) in enumerate(zip(time_axis, states)): for _i, each_level in enumerate(levels): each_shape = each_context[_i] if _i < len(each_context) else -1 if 0 < len(each_level): _, last_shape = each_level[-1] else: last_shape = -1 if each_shape != last_shape: data_point = each_time, each_shape each_level.append(data_point) if Timer.time_passed(2000): print("Finished {:5.2f}% of segmenting...".format(100. * _j / len(time_axis))) return levels
def setup(predictor: Predictor, train_generator, test_generator, visualization_steps: int, iterations: int = 500000): print("Starting experiment with {:s} for {:d} iterations...".format( predictor.name(), iterations)) average_train_error = 0. average_test_error = 0. average_duration = 0. # exchange rate adaptation # error_list = [] for t in range(iterations): # get concurrent examples examples_train = next(train_generator) inputs_train, targets_train = zip(*examples_train) examples_test = next(test_generator) inputs_test, targets_test = zip(*examples_test) # perform predictors and fit this_time = time.time() outputs_train = predictor.predict(inputs_train) outputs_test = predictor.predict(inputs_test) predictor.fit(examples_train) duration = time.time() - this_time # todo: continue from here # update plot try: train_error = sum( sqrt(sum((__o - __t)**2 for __o, __t in zip(_o, _t))) for _o, _t in zip(outputs_train, targets_train)) / len(targets_train) except TypeError: train_error = sum( float(_o != _t) for _o, _t in zip( outputs_train, targets_train)) / len(targets_train) try: test_error = sum( sqrt(sum((__o - __t)**2 for __o, __t in zip(_o, _t))) for _o, _t in zip( outputs_test, targets_test)) / len(targets_test) except TypeError: test_error = sum( float(_o != _t) for _o, _t in zip( outputs_test, targets_test)) / len(targets_test) # exchange rate adaptation # if .5 < concurrent_outputs[0][0]: # error_list.append(error) average_train_error = (average_train_error * t + train_error) / (t + 1) average_test_error = (average_test_error * t + test_error) / (t + 1) average_duration = (average_duration * t + duration) / (t + 1) if (t + 1) % visualization_steps == 0: # exchange rate adaptation Visualize.append("error train", predictor.__class__.__name__, average_train_error) Visualize.append("error test", predictor.__class__.__name__, average_test_error) Visualize.append("duration", predictor.__class__.__name__, average_duration) try: for _e, (each_train_output, each_train_target) in enumerate( zip(outputs_train, targets_train)): for _o, (train_output_value, train_target_value) in enumerate( zip(each_train_output, each_train_target)): axis_key = f"output train {_o:02d}/{_e:02d}" Visualize.append(axis_key, predictor.__class__.__name__, train_output_value) Visualize.append(axis_key, "target train", train_target_value) except TypeError: pass try: for _e, (each_test_output, each_test_target) in enumerate( zip(outputs_test, targets_test)): for _o, (test_output_value, test_target_value) in enumerate( zip(each_test_output, each_test_target)): axis_key = f"output test {_o:02d}/{_e:02d}" Visualize.append(axis_key, predictor.__class__.__name__, test_output_value) Visualize.append(axis_key, "target test", test_target_value) except TypeError: pass if Timer.time_passed(2000): print("Finished {:05.2f}%...".format(100. * t / iterations)) Visualize.finalize("error train", predictor.__class__.__name__) Visualize.finalize("error test", predictor.__class__.__name__) Visualize.finalize("duration", predictor.__class__.__name__)
def test_2d(): dim_range = -10., 10. plot_axis, error_axis = setup_2d_axes() r = MultiplePolynomialFromLinearRegression(1, 3, -1) # r = MultiplePolynomialOptimizationRegression(1, 3) # fun = lambda _x: -cos(_x / (1. * math.pi)) fun = lambda _x: 0. + 0. * _x**1. + 0. * _x**2. + 1. * _x**3. # + 1. * _x ** 4. x_range = tuple( _x / 10. for _x in range(int(dim_range[0]) * 10, int(dim_range[1]) * 10)) y_range = tuple(fun(_x) for _x in x_range) plot_axis.plot(x_range, y_range, color="C0") plot_axis.set_xlim(*dim_range) iterations = 0 window_size = 100000 error_development = deque(maxlen=window_size) while True: x = random.uniform(*dim_range) y_o = r.output([x]) y_t = fun(x) error = 0 if iterations < 1 else smear(error_development[-1], abs(y_o - y_t), iterations) error_development.append(error) if Timer.time_passed(1000): print(f"{iterations:d} iterations finished") values = tuple(r.output([_x]) for _x in x_range) l, = plot_axis.plot(x_range, values, color="C1") values_d = tuple(r.derivation_output([_x], 0) for _x in x_range) l_d, = plot_axis.plot(x_range, values_d, color="C2") plot_axis.set_ylim( (min(values + values_d), max(values + values_d))) x_min = max(0, iterations - window_size) x_max = x_min + window_size error_axis.set_xlim((x_min, x_max)) x_min_limit = max(0, iterations - len(error_development)) e, = error_axis.plot(range(x_min_limit, x_min_limit + len(error_development)), error_development, color="black") error_axis.set_ylim( (min(error_development), max(error_development))) pyplot.pause(.001) pyplot.draw() l.remove() l_d.remove() e.remove() r.fit([x], y_t, past_scope=1000, learning_drag=0) iterations += 1
def test_3d(): fig = pyplot.figure() axis_3d = fig.add_subplot(221, projection='3d') axis_3d.set_xlabel("x") axis_3d.set_ylabel("y") axis_3d.set_zlabel("z") axis_2d_error = fig.add_subplot(222) axis_2d_error.set_xlabel("t") axis_2d_error.set_ylabel("error") axis_2d_yz = fig.add_subplot(223) axis_2d_yz.set_xlabel("y") axis_2d_yz.set_ylabel("z") axis_2d_xz = fig.add_subplot(224) axis_2d_xz.set_xlabel("x") axis_2d_xz.set_ylabel("z") # x_coefficients = 0., 1., # y_coefficients = 0., -1., # degree = number of coefficients - 1 x_coefficients = -375., 400., -140., 20., -1., y_coefficients = 375., -400., 140., -20., 1., # fun = poly_function(x_coefficients, y_coefficients) fun = trig_function() number_of_points = 1000 drag_value = number_of_points value_range = 0., 10. plot_surface(axis_3d, fun, value_range, colormap=cm.viridis) # , color="C0") # r = MultiplePolynomialRegressor([6, 6]) r = LinearRegressor(2, number_of_points) error = [] tar_z = [] out_z = [] z_min, z_max = .0, .0 for _t in range(number_of_points): p_x = random.uniform(*value_range) p_y = random.uniform(*value_range) input_values = p_x, p_y output_value = r.output(input_values) out_z.append((p_x, p_y, output_value)) p_z = fun(p_x, p_y) if _t < 1: z_min = p_z z_max = p_z else: z_min = min(z_min, p_z) z_max = max(z_max, p_z) tar_z.append((p_x, p_y, p_z)) e = abs(output_value - p_z) error_value = e if _t < 1 else smear(error[-1], e, _t) error.append(error_value) r.fit(input_values, p_z, drag_value) #fit_fun = poly_function(*r.get_parameters()) #fit_surface = plot_surface(axis_3d, fit_fun, value_range, colormap=cm.brg) #pyplot.pause(.001) #pyplot.draw() #if _t < number_of_points - 1: # fit_surface.remove() if Timer.time_passed(2000): print(f"{_t * 100. / number_of_points:05.2f}% finished.") margin = (z_max - z_min) * .1 axis_3d.set_zlim([z_min - margin, z_max + margin]) print(error[-1]) axis_3d.scatter(*zip(*out_z), alpha=.4, color="C1") # axis_3d.scatter(*zip(*tar_z), color="blue") # axis_3d.scatter(*zip(*points)) axis_2d_error.plot(error) axis_2d_yz.scatter([_p[1] for _p in tar_z], [_p[2] for _p in tar_z], color="C0") axis_2d_xz.scatter([_p[0] for _p in tar_z], [_p[2] for _p in tar_z], color="C0") fit_x_co, fit_y_co = r.get_parameters() # print((x_coefficients, y_coefficients)) # print((fit_x_co, fit_y_co)) # print() plot_line(axis_2d_xz, fit_x_co, value_range, color="C1") plot_line(axis_2d_yz, fit_y_co, value_range, color="C1") pyplot.tight_layout() pyplot.show()
def controlled_grid_interaction(predictor: Predictor, iterations: int = 500000): c = Config("../configs/config.json") data_dir = c["data_dir"] + "grid_worlds/" grid_world = GridWorldLocal(data_dir + "square.txt", rotational=True) # grid_world = GridWorldLocal(data_dir + "simple.txt", rotational=False) # grid_world = GridWorldGlobal(data_dir + "sutton.txt", rotational=False) controller = SarsaController(grid_world.get_motor_range(), alpha=.8, gamma=.1, epsilon=.1) # controller = RandomController(grid_world.get_motor_range()) last_sensor = None last_motor = None sensor, reward = grid_world.react_to(None) visualization_steps = iterations // 1000 average_reward = .0 average_error = .0 average_duration = .0 for t in range(iterations): # get data this_time = time.time() concurrent_inputs = (last_sensor, last_motor), concurrent_outputs = predictor.predict(concurrent_inputs) concurrent_targets = sensor, concurrent_examples = (concurrent_inputs[0], concurrent_targets[0]), predictor.fit(concurrent_examples) d = time.time() - this_time # query controller perception = predictor.get_state(), last_sensor motor = controller.react_to(perception, reward) error = sum( float(_o != _t) for _o, _t in zip(concurrent_outputs, concurrent_targets)) / len( concurrent_targets) average_reward = (average_reward * t + reward) / (t + 1) average_error = (average_error * t + error) / (t + 1) average_duration = (average_duration * t + d) / (t + 1) if (t + 1) % visualization_steps == 0: Visualize.append("reward", predictor.__class__.__name__, average_reward) Visualize.append("error", predictor.__class__.__name__, average_error) Visualize.append("duration", predictor.__class__.__name__, average_duration) last_sensor = sensor last_motor = motor sensor, reward = grid_world.react_to(motor) if Timer.time_passed(2000): print("Finished {:05.2f}%...".format(100. * t / iterations)) Visualize.finalize("reward", predictor.__class__.__name__) Visualize.finalize("error", predictor.__class__.__name__) Visualize.finalize("duration", predictor.__class__.__name__)
def actor_critic(): fig = pyplot.figure() actor_axis, critic_axis, reward_axis = fig.subplots(nrows=3, sharex="all") actor_axis.set_ylabel("actor error") actor_axis.yaxis.set_label_position("right") critic_axis.set_ylabel("critic error") critic_axis.yaxis.set_label_position("right") reward_axis.set_ylabel("average reward") reward_axis.yaxis.set_label_position("right") reward_axis.set_xlabel("iteration") # https://github.com/openai/gym/blob/master/gym/envs/__init__.py env = gym.make("CustomMountainCar-infinite-v0") # env = gym.make("VanillaMountainCar-infinite-v0") env.reset() controller = RationalSarsaAC(((-.5, .5),), 2, 500, 5, .5, .1, polynomial_degree=2) # controller = RandomRational(((-.5, .5),)) sensor = None visualize = False plot = True window_size = 100000 actor_plot, critic_plot, reward_plot = None, None, None actor_data, critic_data, reward_data = deque(maxlen=window_size), deque(maxlen=window_size), deque(maxlen=window_size) while True: if visualize: env.render() if sensor is None: # motor = tuple(random.uniform(*_range) for _range in MountainCar.motor_range()) motor = .0, else: motor = controller.react(tuple(sensor)) state = env.step(numpy.array(motor)) sensor, reward, done, info = state # (x_loc, x_vel) controller.integrate(tuple(sensor), tuple(motor), reward) actor_data.append(controller.average_actor_error) critic_data.append(controller.average_critic_error) reward_data.append(controller.average_reward) if Timer.time_passed(500) and plot: if actor_plot is not None: actor_plot.remove() if critic_plot is not None: critic_plot.remove() if reward_plot is not None: reward_plot.remove() x_min = max(controller.get_iterations() - window_size, 0) actor_plot, = actor_axis.plot(range(x_min, x_min + len(actor_data)), actor_data, color="black") actor_axis.set_xlim((x_min, x_min + window_size)) actor_axis.set_ylim((min(actor_data), max(actor_data))) critic_plot, = critic_axis.plot(range(x_min, x_min + len(critic_data)), critic_data, color="black") critic_axis.set_xlim((x_min, x_min + window_size)) critic_axis.set_ylim((min(critic_data), max(critic_data))) reward_plot, = reward_axis.plot(range(x_min, x_min + len(reward_data)), reward_data, color="black") reward_axis.set_xlim((x_min, x_min + window_size)) reward_axis.set_ylim((min(reward_data), max(reward_data))) pyplot.tight_layout() pyplot.draw() pyplot.pause(.001) env.close()