예제 #1
0
 def plot_value_dist(self, w0, n):
     aav = AAV(self.params)
     u = -aav.u(self.params.lambda0, w0)
     mc = self.value_acc(w0, n)
     fig, ax = plt.subplots()
     sns.distplot(mc, ax=ax)
     ax.axvline(np.mean(mc), color='red', linestyle='--')
     ax.axvline(u, color='green', linestyle='--')
     fig.show()
예제 #2
0
 def plot_value_dist(self, l0, w0, n):
     aav = AAV(self.params)
     u = -aav.u(l0, w0)
     mc = self.value_acc(l0, w0, n)
     fig, ax = plt.subplots()
     sns.distplot(mc, ax=ax)
     ax.axvline(np.mean(mc), color='red', linestyle='--')
     ax.axvline(u, color='green', linestyle='--')
     label = self.name + ' dt:' + str(self.dt)
     ax.set_title(label)
     fig.show()
     return mc
예제 #3
0
    def sim_value(self, l, w, n=5000):
        means = np.zeros_like(self.dts)
        stds = np.zeros_like(self.dts)

        for i, dt in enumerate(self.dts):
            vals = self.simulator.value_acc(l, w, n)
            means[i] = np.mean(vals)
            stds[i] = np.std(vals)

        aav = AAV(self.simulator.env.params)
        u = -aav.u(self.simulator.params.lambda0, w)
        fig, ax = plt.subplots()
        ax.plot(self.dts, means, marker='x')
        ax.fill_between(self.dts,
                        means - stds,
                        means + stds,
                        alpha=0.5,
                        color='salmon')
        ax.plot(self.dts, np.ones_like(self.dts) * u)
        fig.show()
def test3():
    """
    Test the account value
    Returns:

    """
    lambda0 = 1
    w0 = 100
    dt = 0.01
    s0 = np.array([lambda0, w0], dtype=np.float32)
    p = Parameters()
    env = CollectionsEnv()
    reward_per_episode = []
    n_epochs = 1000
    for epoch in range(n_epochs):
        # print(f'start in state: {env.current_state}')
        env.reset()
        # print(f'reset in state: {env.current_state}')
        tot_reward_per_episode = 0
        lambdas = []
        ws = []
        # print(f'Start state: {env.starting_state}')
        while True:
            action = 0
            state_next, reward, terminal, _ = env.step(action)
            # print(f'Start state: {env.starting_state}')
            ws.append(state_next[1])
            tot_reward_per_episode += reward
            lambdas.append(state_next[0])
            if terminal:
                reward_per_episode.append(tot_reward_per_episode)
                break
    aav = AAV(p)
    exact_v = -aav.u(lambda0, w0)
    plt.show()
    plt.plot(reward_per_episode, marker='x')
    plt.axhline(exact_v, color='red', linestyle='--')
    plt.show()
    print(exact_v)
    print(np.mean(reward_per_episode))
def construct_lattice(env, config, initialize=False):
    """
    Constructs deep lattice network with 832 parameters
    Returns:
            lattice model instance
    """

    min_w, min_l = (env.MIN_ACCOUNT_BALANCE, env.params.lambdainf)
    max_w, max_l = (env.w0, env.MAX_LAMBDA)

    ## Calibrators Block

    combined_calibratorsl = []
    combined_calibratorsr = []

    lattice_units_layer = [3, 3, 1]
    n_lattice_points = [3, 3, 3]

    for i in range(lattice_units_layer[0]):
        calibration_layer_l_l = tfl.layers.PWLCalibration(input_keypoints=np.linspace(0, max_l, num=100),
                                                          dtype=tf.float32, output_min=0.0,
                                                          output_max=n_lattice_points[0] - 1.0,
                                                          monotonicity='increasing', convexity='concave')
        calibration_layer_w_l = tfl.layers.PWLCalibration(input_keypoints=np.linspace(0, max_w, num=100),
                                                          dtype=tf.float32, output_min=0.0,
                                                          output_max=n_lattice_points[0] - 1.0,
                                                          monotonicity='increasing', convexity='convex')
        calibration_layer_l_r = tfl.layers.PWLCalibration(input_keypoints=np.linspace(0, max_l, num=100),
                                                          dtype=tf.float32, output_min=0.0,
                                                          output_max=n_lattice_points[0] - 1.0,
                                                          monotonicity='increasing', convexity='concave')
        calibration_layer_w_r = tfl.layers.PWLCalibration(input_keypoints=np.linspace(0, max_w, num=100),
                                                          dtype=tf.float32, output_min=0.0,
                                                          output_max=n_lattice_points[0] - 1.0,
                                                          monotonicity='increasing', convexity='convex')

        combined_calibratorsl.append(calibration_layer_l_l)
        combined_calibratorsl.append(calibration_layer_w_l)

        combined_calibratorsr.append(calibration_layer_l_r)
        combined_calibratorsr.append(calibration_layer_w_r)

    input_callibratorsl = tfl.layers.ParallelCombination(combined_calibratorsl, single_output=True)
    input_callibratorsr = tfl.layers.ParallelCombination(combined_calibratorsr, single_output=True)

    # API FUNCTIONAL MODEL
    inputs = keras.Input(shape=(2,))
    repeated_input = keras.layers.RepeatVector(lattice_units_layer[0])(inputs)
    repeated_input = keras.layers.Flatten()(repeated_input)
    # left tree
    calibrator1l = input_callibratorsl(repeated_input)
    calibrator1l = keras.layers.Reshape((lattice_units_layer[0], 2))(calibrator1l)
    lattice1l = tfl.layers.Lattice(units=lattice_units_layer[0], lattice_sizes=[n_lattice_points[0]] * 2,
                                   monotonicities=2 * ['increasing'], output_min=0, output_max=1)(calibrator1l)
    lattice1l = keras.layers.RepeatVector(lattice_units_layer[1])(lattice1l)
    lattice1l = keras.layers.Flatten()(lattice1l)
    calibrator2l = tfl.layers.PWLCalibration(input_keypoints=np.linspace(0, 1, num=100),
                                             units=lattice_units_layer[0] * lattice_units_layer[1], dtype=tf.float32,
                                             output_min=0.0, output_max=n_lattice_points[1] - 1.0,
                                             monotonicity='increasing')(lattice1l)
    calibrator2l = keras.layers.Reshape((lattice_units_layer[1], lattice_units_layer[0]))(calibrator2l)

    lattice2l = tfl.layers.Lattice(units=lattice_units_layer[1],
                                   lattice_sizes=[n_lattice_points[1]] * lattice_units_layer[0],
                                   monotonicities=['increasing'] * lattice_units_layer[0], output_min=0, output_max=1)(
        calibrator2l)

    calibrator3l = tfl.layers.PWLCalibration(units=lattice_units_layer[1] * lattice_units_layer[2],
                                             input_keypoints=np.linspace(0, 1, 100),
                                             dtype=tf.float32, monotonicity='increasing', output_min=0, output_max=n_lattice_points[2] - 1.0)(
        lattice2l)

    lattice3l = tfl.layers.Lattice(units=lattice_units_layer[2],
                                   lattice_sizes=[n_lattice_points[2]] * lattice_units_layer[1],
                                   monotonicities=['increasing'] * lattice_units_layer[0], output_min=0, output_max=1)(
        calibrator3l)

    calibratorfl = tfl.layers.PWLCalibration(units=lattice_units_layer[2], input_keypoints=np.linspace(0, 1, 100),
                                             dtype=tf.float32, monotonicity='increasing')(lattice3l)

    # right tree
    calibrator1r = input_callibratorsr(repeated_input)
    calibrator1r = keras.layers.Reshape((lattice_units_layer[0], 2))(calibrator1r)
    lattice1r = tfl.layers.Lattice(units=lattice_units_layer[0], lattice_sizes=[n_lattice_points[0]] * 2,
                                   monotonicities=2 * ['increasing'], output_min=0, output_max=1)(calibrator1r)
    lattice1r = keras.layers.RepeatVector(lattice_units_layer[1])(lattice1r)
    lattice1r = keras.layers.Flatten()(lattice1l)
    calibrator2r = tfl.layers.PWLCalibration(input_keypoints=np.linspace(0, 1, num=100),
                                             units=lattice_units_layer[0] * lattice_units_layer[1], dtype=tf.float32,
                                             output_min=0.0, output_max=n_lattice_points[1] - 1.0,
                                             monotonicity='increasing')(lattice1r)
    calibrator2r = keras.layers.Reshape((lattice_units_layer[1], lattice_units_layer[0]))(calibrator2r)

    lattice2r = tfl.layers.Lattice(units=lattice_units_layer[1],
                                   lattice_sizes=[n_lattice_points[1]] * lattice_units_layer[0],
                                   monotonicities=['increasing'] * lattice_units_layer[0], output_min=0, output_max=1)(
        calibrator2r)

    calibrator3r = tfl.layers.PWLCalibration(units=lattice_units_layer[1] * lattice_units_layer[2],
                                             input_keypoints=np.linspace(0, 1, 100),
                                             dtype=tf.float32, monotonicity='increasing', output_min=0, output_max=n_lattice_points[2] - 1.0)(
        lattice2r)

    lattice3r = tfl.layers.Lattice(units=lattice_units_layer[2],
                                   lattice_sizes=[n_lattice_points[2]] * lattice_units_layer[1],
                                   monotonicities=['increasing'] * lattice_units_layer[0], output_min=0, output_max=1)(
        calibrator3r)

    calibratorfr = tfl.layers.PWLCalibration(units=lattice_units_layer[2], input_keypoints=np.linspace(0, 1, 100),
                                             dtype=tf.float32, monotonicity='increasing')(lattice3r)

    # Combine
    final = keras.layers.Concatenate()([calibratorfl, calibratorfr])

    model = keras.Model(inputs=inputs, outputs=final, name="combined")
    model.compile(loss=tf.keras.losses.mean_squared_error, optimizer='adam', metrics=['mean_absolute_percentage_error'])


    if initialize:
        # works only for one discrete action
        aav = AAV(env.params)
        if config.normalize_states:
            lowbounds = env.observation(np.array([env.params.lambda0, env.MIN_ACCOUNT_BALANCE]))
            highbounds = env.observation(np.array([env.MAX_LAMBDA, env.w0]))
            ws = np.linspace(lowbounds[1], highbounds[1], 100)
            ls = np.linspace(lowbounds[0], highbounds[0], 100)
        else:
            ws = np.linspace(0, env.w0, 100)
            ls = np.linspace(0, env.MAX_LAMBDA, 100)
        wt = np.linspace(0, env.w0, 100)
        lt = np.linspace(0, env.MAX_LAMBDA, 100)
        ww, ll = np.meshgrid(ws, ls)
        z = np.zeros_like(ww)
        zt = np.zeros_like(ww)
        features = []
        for i, wx in enumerate(ws):
            for j, ly in enumerate(ls):
                z[j, i] = -aav.u(lt[j], wt[i])
                zt[j, i] = -aav.u(lt[j] + env.action(1), wt[i]) - env.action(1) * env.params.c
                features.append([ls[j], ws[i], z[j, i], zt[j, i]])

        dataset = pd.DataFrame(features, columns=['l', 'w', 'target', 'target2'])
        train_dataset = dataset.sample(frac=1.0, random_state=0)
        # test_dataset = dataset.drop(train_dataset.index)
        train_labels = train_dataset[['target', 'target2']].copy()
        # test_labels = test_dataset[['target', 'target2']].copy()
        train_dataset = train_dataset.drop(labels=['target', 'target2'], axis=1)
        # test_dataset = test_dataset.drop(labels=['target', 'target2'], axis=1)

        callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)
        model.fit(train_dataset.to_numpy(), train_labels.to_numpy(), epochs=50, validation_split=0.1,
                  shuffle=False, verbose=True, callbacks=[callback])

        lattice_pred = np.zeros_like(ww)
        lattice_pred2 = np.zeros_like(ww)
        for i, wx in enumerate(ws):
            for j, ly in enumerate(ls):
                obs = np.array([ls[j], ws[i]])
                pred = model.predict_on_batch(obs[None, :]).numpy()
                lattice_pred[j, i] = pred[0][0]
                lattice_pred2[j, i] = pred[0][1]
        fig, ax = plt.subplots(ncols=2, nrows=1)

        CS = ax[0].contour(ww, ll, lattice_pred)
        ax[0].clabel(CS, inline=1, fontsize=10)
        ax[0].set_title('vf')
        CS = ax[1].contour(ww, ll, lattice_pred2)
        ax[1].clabel(CS, inline=1, fontsize=10)
        ax[1].set_title('vf')
        plt.show()
        ##
        w_points = 60
        l_points = 60
        l = np.linspace(0, 5, l_points)
        w = np.linspace(0, 200, w_points)
        ww, ll = np.meshgrid(w, l)
        z = np.zeros_like(ww)
        p = np.zeros_like(ww)
        for i, xp in enumerate(w):
            for j, yp in enumerate(l):
                fixed_obs = np.array([ls[j], ws[i]])
                z[j, i] = np.argmax(model.predict_on_batch(fixed_obs[None, :]).numpy().flatten())

        fig, ax = plt.subplots(nrows=1, ncols=2)
        im = ax[0].pcolor(ww, ll, p)
        cdict = {
            'red': ((0.0, 0.25, .25), (0.02, .59, .59), (1., 1., 1.)),
            'green': ((0.0, 0.0, 0.0), (0.02, .45, .45), (1., .97, .97)),
            'blue': ((0.0, 1.0, 1.0), (0.02, .75, .75), (1., 0.45, 0.45))
        }

        cm = m.colors.LinearSegmentedColormap('my_colormap', cdict, 1024)
        im = ax[0].pcolor(ww, ll, z, cmap=cm)
        fig.colorbar(im)
        fig.show()

    return model
예제 #6
0
            return (0.001 * (w - shift)).clip(min=0)
        elif np.isscalar(w):
            return np.maximum(0.001 * (w - shift), 0)
        else:
            raise ValueError('Stupid error in w.')

    l0 = 1
    w0 = 100

    params = Parameters()
    chp = CHP(starting_balance=w0,
              starting_intensity=l0,
              params=params,
              collection_horizon=100,
              value_precision_thershold=1e-3,
              control_function=None)
    print(chp.calculate_value_single())
    chp.plot_statespace()
    chp.plot_intensity()
    chp.plot_policy()
    chp.plot_balance()
    chp.test_increments()

    mc_vals = chp.calculate_value(10000, returntype='array')
    print(np.mean(mc_vals))
    aav = AAV(params)
    print(aav.u(l0, w0))

    sns.distplot(mc_vals)
    plt.show()
예제 #7
0
 def __init__(self, env, name, config=None, training=True):
     super().__init__(env=env, name=name, config=config, training=training)
     self.aav = AAV(self.env.params)
예제 #8
0
class DQNAgentBaselined(DQNAgent):
    def __init__(self, env, name, config=None, training=True):
        super().__init__(env=env, name=name, config=config, training=training)
        self.aav = AAV(self.env.params)

    def train(self, *args, **kwargs):
        batch = self.memory.sample(self.config.batch_size)
        states = batch['s']
        actions = batch['a']
        rewards = batch['r']
        next_states = batch['s_next']
        dones = batch['done']

        if self.config.prioritized_memory_replay:
            idx = batch['indices']
            weights = batch['weights']

        dqn_variable = self.main_net.trainable_variables
        network_input_to_watch = tf.Variable(
            tf.convert_to_tensor(states, dtype='float32'))

        with tf.GradientTape() as tape:
            tape.watch(dqn_variable)
            with tf.GradientTape() as tape_inner:
                tape_inner.watch(network_input_to_watch)
                # simple dqn
                # target_q = self.target_net.predict_on_batch(next_states)
                # next_action = np.argmax(target_q.numpy(), axis=1)
                # double_dqn -- this is a feature, not a bug
                target_q = self.main_net(next_states)
                next_action = np.argmax(target_q.numpy(), axis=1)
                target_q = self.target_net(next_states)

                target_value = tf.reduce_sum(
                    tf.one_hot(next_action, self.act_size) * target_q, axis=1)
                target_value = (
                    1 - dones) * self.config.gamma * target_value + rewards + [
                        self.aav.u(*self.env.convert_back(s)) for s in states
                    ]
                main_q = self.main_net(network_input_to_watch)
                main_value = tf.reduce_sum(tf.one_hot(actions, self.act_size) *
                                           main_q,
                                           axis=1)
                # main_q_to_penalize = tf.identity(main_q)

            # penalization = self.config.penal_coeff * tf.reduce_sum(
            #     tf.maximum(-tape_inner.gradient(main_value, network_input_to_watch), 0.0))

            td_error = target_value - main_value
            element_wise_loss = tf.square(td_error) * 0.5

            if self.config.prioritized_memory_replay:
                error = tf.reduce_mean(element_wise_loss * weights)

            else:
                error = tf.reduce_mean(element_wise_loss)

            error = error  #  + penalization

        dqn_grads = tape.gradient(error, dqn_variable)

        # Update priorities
        if self.config.prioritized_memory_replay:
            self.memory.update_priorities(
                idx,
                np.abs(td_error.numpy()) + self.config.prior_eps)

        self.optimizer.apply_gradients(zip(dqn_grads, dqn_variable))
        # Logging
        self.global_step += 1
        self._tb_log_holder = {
            'Gradients': dqn_grads[0],
            'Weights': tf.convert_to_tensor(self.main_net.weights[0]),
            'Prediction': main_value,
            'Target': target_value,
            'TD error': td_error,
            'Elementwise Loss': element_wise_loss,
            'Loss': tf.reduce_mean(element_wise_loss)
        }  #, 'Penalization': penalization}
        return tf.reduce_sum(element_wise_loss)
def construct_nn(env, config, initialize=False):
    target_net = tf.keras.Sequential()
    target_net.add(tf.keras.layers.Input(shape=env.observation_space.shape))
    for i, layer_size in enumerate(config.layers):
        if config.regularizer is None:
            target_net.add(tf.keras.layers.Dense(layer_size, activation='relu'))
        elif config.regularizer == "l1":
            target_net.add(tf.keras.layers.Dense(layer_size, activation='relu',
                                                 kernel_regularizer=tf.keras.regularizers.l1(
                                                     config.regularizer_parameter)))
        elif config.regularizer == "l2":
            target_net.add(tf.keras.layers.Dense(layer_size, activation='relu',
                                                 kernel_regularizer=tf.keras.regularizers.l2(
                                                     config.regularizer_parameter)))
        elif config.regularizer == "l1_l2":
            target_net.add(tf.keras.layers.Dense(layer_size, activation='relu',
                                                 kernel_regularizer=tf.keras.regularizers.l1_l2(
                                                     l1=config.regularizer_parameter, l2=config.regularizer_parameter)))
        if config.batch_normalization:
            target_net.add(tf.keras.layers.BatchNormalization())
    target_net.add(tf.keras.layers.Dense(env.action_space.n, activation='linear'))
    target_net.build()
    target_net.compile(optimizer='adam', loss=tf.keras.losses.MeanSquaredError())

    if initialize:
        # works only for one discrete action
        aav = AAV(env.params)
        if config.normalize_states:
            lowbounds = env.observation(np.array([env.params.lambda0, env.MIN_ACCOUNT_BALANCE]))
            highbounds = env.observation(np.array([env.MAX_LAMBDA, env.w0]))
            ws = np.linspace(lowbounds[1], highbounds[1], 100)
            ls = np.linspace(lowbounds[0], highbounds[0], 100)
        else:
            ws = np.linspace(0, env.w0, 100)
            ls = np.linspace(0, env.MAX_LAMBDA, 100)
        wt = np.linspace(0, env.w0, 100)
        lt = np.linspace(0, env.MAX_LAMBDA, 100)
        ww, ll = np.meshgrid(ws, ls)
        z = np.zeros_like(ww)
        zt = np.zeros_like(ww)
        features = []
        for i, wx in enumerate(ws):
            for j, ly in enumerate(ls):
                z[j, i] = -aav.u(lt[j], wt[i])
                zt[j, i] = -aav.u(lt[j] + env.action(1), wt[i]) - env.action(1) * env.params.c
                features.append([ls[j], ws[i], z[j, i], zt[j, i]])

        dataset = pd.DataFrame(features, columns=['l', 'w', 'target', 'target2'])
        train_dataset = dataset.sample(frac=0.8, random_state=0)
        test_dataset = dataset.drop(train_dataset.index)
        train_labels = train_dataset[['target', 'target2']].copy()
        test_labels = test_dataset[['target', 'target2']].copy()
        train_dataset = train_dataset.drop(labels=['target', 'target2'], axis=1)
        test_dataset = test_dataset.drop(labels=['target', 'target2'], axis=1)

        callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)
        target_net.fit(train_dataset.to_numpy(), train_labels.to_numpy(), epochs=1000, validation_split=0.1,
                       shuffle=False, verbose=True, callbacks=[callback])

        lattice_pred = np.zeros_like(ww)
        lattice_pred2 = np.zeros_like(ww)
        for i, wx in enumerate(ws):
            for j, ly in enumerate(ls):
                obs = np.array([ls[j], ws[i]])
                pred = target_net.predict_on_batch(obs[None, :]).numpy()
                lattice_pred[j, i] = pred[0][0]
                lattice_pred2[j, i] = pred[0][1]
        fig, ax = plt.subplots(ncols=2, nrows=1)

        CS = ax[0].contour(ww, ll, lattice_pred)
        ax[0].clabel(CS, inline=1, fontsize=10)
        ax[0].set_title('vf')
        CS = ax[1].contour(ww, ll, lattice_pred2)
        ax[1].clabel(CS, inline=1, fontsize=10)
        ax[1].set_title('vf')
        plt.show()
        ##
        w_points = 60
        l_points = 60
        l = np.linspace(0, 5, l_points)
        w = np.linspace(0, 200, w_points)
        ww, ll = np.meshgrid(w, l)
        z = np.zeros_like(ww)
        p = np.zeros_like(ww)
        for i, xp in enumerate(w):
            for j, yp in enumerate(l):
                fixed_obs = np.array([ls[j], ws[i]])
                z[j, i] = np.argmax(target_net.predict_on_batch(fixed_obs[None, :]).numpy().flatten())

        fig, ax = plt.subplots(nrows=1, ncols=2)
        im = ax[0].pcolor(ww, ll, p)
        cdict = {
            'red': ((0.0, 0.25, .25), (0.02, .59, .59), (1., 1., 1.)),
            'green': ((0.0, 0.0, 0.0), (0.02, .45, .45), (1., .97, .97)),
            'blue': ((0.0, 1.0, 1.0), (0.02, .75, .75), (1., 0.45, 0.45))
        }

        cm = m.colors.LinearSegmentedColormap('my_colormap', cdict, 1024)
        im = ax[0].pcolor(ww, ll, z, cmap=cm)
        fig.colorbar(im)
        fig.show()

    return target_net