コード例 #1
0
def load_data(path_data, action_space, force_reload=False):
    path_data_processed = path_data + ', processed'
    file_data_processed = path_data_processed + '/data'
    if not force_reload and os.path.exists(file_data_processed):
        print(f'load data from {file_data_processed}')
        vs = load_vars(file_data_processed)
        return vs

    print(f'load data from {path_data}')
    tools.mkdir(path_data_processed)
    files = tools.get_files(path_rel=path_data, sort=True)
    # inputs_final, outputs_final = np.zeros((0, 2)), np.zeros((0, 4))
    inputs_final, outputs_final = np.zeros((0, 2 * action_space)), np.zeros((0, 4 * action_space))
    counts = np.zeros((len(files)), dtype=np.int)
    for ind, f in enumerate(files):
        mu0s_ats_batch, logsigma0s_batch, ress = load_vars(f)
        inputs = np.concatenate((mu0s_ats_batch, logsigma0s_batch), axis=-1)

        max_values = np.array([res['max'].x for res in ress])
        min_values = np.array([res['min'].x for res in ress])
        outputs = np.concatenate((max_values, min_values), axis=-1)

        inputs_final = np.concatenate((inputs_final, inputs))  # shape:(None, 2)
        outputs_final = np.concatenate((outputs_final, outputs))  # shape:(None, 4)
        counts[ind] = mu0s_ats_batch.shape[0]
    weights = []
    cnt_normalize = counts.mean()
    for cnt in counts:
        weight = cnt_normalize * 1. / cnt * np.ones(cnt)
        weights.append(weight)
    weights = np.concatenate(weights, axis=0)

    # final = np.concatenate((inputs_final, outputs_final), axis=-1)

    # --- delete nan and inf
    # final = final[~np.isnan(final).any(axis=1)]
    # final = final[~np.isinf(final).any(axis=1)]
    inds_reserve = np.logical_and(~np.isnan(outputs_final).any(axis=1), ~np.isinf(outputs_final).any(axis=1))
    inputs_final = inputs_final[inds_reserve]
    outputs_final = outputs_final[inds_reserve]
    weights = weights[inds_reserve]

    # --- shuffle
    # np.random.shuffle(final)
    N = inputs_final.shape[0]
    inds_shuffle = np.random.permutation(N)
    inputs_final = inputs_final[inds_shuffle]
    outputs_final = outputs_final[inds_shuffle]
    weights = weights[inds_shuffle]

    # inputs_final, outputs_final = np.split(final, indices_or_sections=[2], axis=-1)

    ind_split = -500
    train_x, train_y, train_weight = \
        inputs_final[:ind_split], outputs_final[:ind_split], weights[:ind_split]
    eval_x, eval_y, eval_weight = \
        inputs_final[ind_split:], outputs_final[ind_split:], weights[ind_split:]
    save_vars(file_data_processed, train_x, train_y, train_weight, eval_x, eval_y, eval_weight)
    return train_x, train_y, train_weight, eval_x, eval_y, eval_weight
コード例 #2
0
def prepare_data(dim,
                 delta,
                 sharelogsigma,
                 clipcontroltype,
                 cliprange,
                 clip_clipratio,
                 search_delta=False):
    global ress_tf_last
    path_data = path_root + '/KL2Clip/data/train_lambda'
    Name = f'dim={dim}, delta={delta}, train'
    path_data_processed = path_data + f'/{Name}'
    tools.mkdir(path_data_processed)

    if dim == 1:
        logsigma0s = np.array([0])
    else:
        raise NotImplementedError
    logsigma0s = logsigma0s.reshape((-1, dim))
    batch_size = 2048
    mu = np.zeros((dim, ))

    opt = KL2Clip(dim=dim,
                  batch_size=batch_size,
                  sharelogsigma=sharelogsigma,
                  clipcontroltype=clipcontroltype,
                  cliprange=cliprange)

    def get_fn_sample():
        mu0 = tf.placeholder(shape=[dim], dtype=tf.float32)
        a = tf.placeholder(shape=[batch_size, dim], dtype=tf.float32)
        logsigma0 = tf.placeholder(shape=[dim], dtype=tf.float32)
        sample_size = tf.placeholder(shape=(), dtype=tf.int32)
        dist = DiagGaussianPd(tf.concat((mu0, logsigma0), axis=0))
        samples = dist.sample(sample_size)
        fn_sample = U.function([mu0, logsigma0, sample_size], samples)
        fn_p = U.function([mu0, logsigma0, a], dist.p(a))
        return fn_sample, fn_p

    sess = U.make_session(make_default=True)
    results = []
    fn_sample, fn_p = get_fn_sample()
    for logsigma0 in logsigma0s:
        prefix_save = f'{path_data_processed}/logsigma0={logsigma0}'
        Name_f = f"{Name},logsigma0={logsigma0}"
        file_fig = f'{prefix_save}.png'
        # a_s_batch = fn_sample( mu, logsigma0, batch_size )
        a_s_batch = np.linspace(-5, 5, batch_size).reshape((-1, 1))
        logsigma0s_batch = np.tile(logsigma0, (batch_size, 1))
        print(a_s_batch.max(), a_s_batch.min())
        # --- sort the data: have problem in 2-dim
        # inds = np.argsort(a_s_batch, axis=0)
        # inds = inds.reshape(-1)
        # a_s_batch = a_s_batch[inds]
        # logsigma0s_batch = logsigma0s_batch[inds]

        # tools.reset_time()
        # a_s_batch.fill(0)
        # print(a_s_batch.shape)
        # a_s_batch[0, :]=0
        # if search_delta:
        # for i in range( batch_size):
        # a_s_batch[i,:] = 0.001 * (batch_size-i)
        if not os.path.exists(f'{prefix_save}.pkl'):
            # ress_tf = opt( mu0_logsigma0_tuple=(a_s_batch, logsigma0s_batch), a=None, delta=delta, clip_clipratio=clip_clipratio)
            ress_tf = opt(mu0_logsigma0_tuple=(np.zeros_like(logsigma0s_batch),
                                               logsigma0s_batch),
                          a=a_s_batch,
                          delta=delta,
                          clip_clipratio=clip_clipratio)
            print(a_s_batch[0], ress_tf.x.max[0], ress_tf.x.min[0])

            save_vars(f'{prefix_save}.pkl', a_s_batch, logsigma0,
                      logsigma0s_batch, ress_tf)
        print(prefix_save)
        a_s_batch, logsigma0, logsigma0s_batch, ress_tf = load_vars(
            f'{prefix_save}.pkl')

        if search_delta:
            results.append(ress_tf)
            break
        if cliprange == clipranges[0]:  # TODO tmp
            fig = plt.figure(figsize=(20, 10))
        markers = ['^', '.']
        colors = [['blue', 'red'], ['green', 'hotpink']]
        # for ind, opt_name in enumerate(['max']):
        for ind, opt_name in enumerate(['max', 'min']):
            # if ind == 1:
            #     continue
            # --- plot tensorflow result
            ratios, cons = ress_tf.ratio[opt_name], ress_tf.con[opt_name]
            print(
                f'clip-{opt_name}_mean:{ratios.mean()}, clip-{opt_name}_min:{ratios.min()}, clip-{opt_name}_max:{ratios.max()}'
            )
            if search_delta:
                continue
            if DEBUG:
                pass
            inds_good = cons <= get_ConstraintThreshold(ress_tf.delta)
            inds_bad = np.logical_not(inds_good)
            if dim == 1:
                if ind == 0 and 1:
                    ps = fn_p(mu, logsigma0, a_s_batch)
                    # +np.abs(ps.max()) + 1
                    ratio_new = -np.log(ps)
                    ratio_new = ratio_new - ratio_new.min() + ratios.min()
                    alpha = np.exp(-ps * 2)
                    print(alpha)
                    # plt.scatter(a_s_batch, ratio_new, s=5, label='ratio_new0')
                    ratio_new = ratio_new.min() + alpha * (ratio_new -
                                                           ratio_new.min())
                    # plt.scatter( a_s_batch, ratio_new, s=5, label='ratio_new1' )

                    # ps = -ps
                    # ratios = ps - ps.min() + ratios.min()
                    # print( ps.min() )
                    # ratios_new =np.square( a_s_batch-mu ) * np.exp( -logsigma0 )
                    # ratio_min = ps  / (ps.max()-ps.min()) * ress_tf.ratio.min.max()
                    # plt.scatter(a_s_batch, ratio_min, s=5, label='square')
                    # plt.scatter(a_s_batch, 1./ratio_min, s=5, label='square')
                    # plt.scatter(a_s_batch, 1./ratios, s=5, label='1/max')

                def plot_new(alpha):
                    clip_max_new, clip_min_new = get_clip_new(
                        alpha,
                        ress_tf.ratio['max'],
                        ress_tf.ratio['min'],
                        clipcontroltype=clipcontroltype)
                    plt.scatter(a_s_batch,
                                clip_max_new,
                                s=5,
                                label=f'clip_max_{alpha}')
                    plt.scatter(a_s_batch,
                                clip_min_new,
                                s=5,
                                label=f'clip_min_{alpha}')

                if ind == 0:
                    pass
                    # plot_new(0.5)
                    # plot_new(0.5)
                    # plot_new(-1)

                plt.scatter(a_s_batch[inds_good],
                            ratios[inds_good],
                            label='ratio_predict-good_' + opt_name,
                            s=5,
                            color=colors[ind][0],
                            marker=markers[ind])
                plt.scatter(a_s_batch[inds_bad],
                            ratios[inds_bad],
                            label='ratio_predict-bad_' + opt_name,
                            s=5,
                            color=colors[ind][1],
                            marker=markers[ind])
            elif dim == 2:
                ax = fig.gca(projection='3d')
                # ax.view_init(30, 30)
                ax.view_init(90, 90)
                # ax.plot_trisurf(a_s_batch[:, 0], a_s_batch[:, 1], ratios)
                ax.scatter(a_s_batch[inds_good, 0],
                           a_s_batch[inds_good, 1],
                           ratios[inds_good],
                           label='ratio_predict-good_' + opt_name,
                           s=5,
                           color=colors[ind][0],
                           marker=markers[ind])
                ax.scatter(a_s_batch[inds_bad, 0],
                           a_s_batch[inds_bad, 1],
                           ratios[inds_bad],
                           label='ratio_predict-bad_' + opt_name,
                           s=5,
                           color=colors[ind][1],
                           marker=markers[ind])

        if dim <= 2 and not search_delta:
            plt.title(
                Name_f +
                f'\nstep:{ress_tf.step},rate_satisfycon:{ress_tf.rate_satisfycon_}, rate_statisfydifference_:{ress_tf.rate_statisfydifference_}, difference_max_:{ress_tf.difference_max_}'
            )
            plt.legend(loc='best')
            if not DEBUG:
                plt.savefig(file_fig)
    opt.close()
    if dim <= 2 and not search_delta:
        if DEBUG:
            if cliprange == clipranges[-1]:
                plt_tools.set_postion()
                plt.show()
    plt.close()
コード例 #3
0
def batch_norm_relu(inputs, is_training):
    """Performs a batch normalization followed by a ReLU."""
    # We set fused=True for a significant performance boost. See
    # https://www.tensorflow.org/performance/performance_guide#common_fused_ops
    inputs = tf.layers.batch_normalization(
        inputs=inputs, momentum=_BATCH_NORM_DECAY,
        epsilon=_BATCH_NORM_EPSILON, center=True,
        scale=True, training=is_training, fused=True)
    inputs = tf.nn.relu(inputs)
    return inputs


from toolsm import process as tools_process

path_root_tabular = f'{path_root}/tabular'
tools.mkdir(path_root_tabular)
path_root_tabular += f'/precision_{TabularActionPrecision}'
tools.mkdir(path_root_tabular)
path_root_tabluar_locker = f'{path_root_tabular}/locker'
tools.mkdir(path_root_tabluar_locker)


class KL2Clip_tabular(object):
    def __init__(self, createtablur_initialwithpresol=True):
        self.deltas_dict = {}
        self.createtablur_initialwithpresol = createtablur_initialwithpresol
        ...

    def get_tabular(self, delta):
        save_path = f'{path_root_tabular}/{delta:.16f}'
        if delta in self.deltas_dict:
コード例 #4
0
def load_data_normal(path_data, USE_MULTIPROCESSING=True):
    path_save = f'{path_data}/train_preprocessed_reduce_v3'

    if os.path.exists(f'{path_save}/data'):
        print(f'load data from {path_save}/data')
        vs = load_vars(f'{path_save}/data')
        return vs
    tools.mkdir(f'{path_data}/train_preprocessed')
    files = tools.get_files(path_rel=path_data, only_sub=False, sort=False, suffix='.pkl')

    actions, deltas, max_mu_logsigma, min_mu_logsigma = [], [], [], []
    for ind, f in enumerate(files[:1]):
        a_s_batch, _, _, ress_tf = load_vars(f)
        actions.append(a_s_batch)
        deltas.append(np.ones_like(a_s_batch) * ress_tf.delta)
        min_mu_logsigma.append(ress_tf.x.min)
        max_mu_logsigma.append(ress_tf.x.max)
    actions = np.concatenate(actions, axis=0)
    deltas = np.concatenate(deltas, axis=0)
    min_mu_logsigma = np.concatenate(min_mu_logsigma, axis=0)
    max_mu_logsigma = np.concatenate(max_mu_logsigma, axis=0)

    min_mu_tfopt, _ = np.split(min_mu_logsigma, indices_or_sections=2, axis=-1)
    max_mu_tfopt, _ = np.split(max_mu_logsigma, indices_or_sections=2, axis=-1)

    time0 = time.time()
    calculate_mu = get_calculate_mu_func(True)
    # TODO: 以下为mu_logsigma_fsolve
    if USE_MULTIPROCESSING:
        p = multiprocessing.Pool(4)
        min_mu_fsolve = p.map(calculate_mu, zip(min_mu_tfopt, actions, deltas))
        max_mu_fsolve = p.map(calculate_mu, zip(max_mu_tfopt, actions, deltas))
    else:
        min_mu_fsolve = list(map(calculate_mu, zip(min_mu_tfopt, actions, deltas)))
        max_mu_fsolve = list(map(calculate_mu, zip(max_mu_tfopt, actions, deltas)))

    min_mu_fsolve = [_[0] for _ in min_mu_fsolve]
    max_mu_fsolve = [_[0] for _ in max_mu_fsolve]
    # f_mu_to_logsigma = lambda m, a: (m - a) * (m ** 2 - a * u - 1) / a
    time1 = time.time()
    print(time1 - time0)
    mu_tf_opt = np.concatenate((min_mu_tfopt, max_mu_tfopt), axis=1)
    mu_fsolve = np.stack(
        (np.concatenate(min_mu_fsolve, axis=0).squeeze(),
         np.concatenate(max_mu_fsolve, axis=0).squeeze())
        , axis=1)
    print(mu_tf_opt - mu_fsolve)
    # exit()

    inds_shuffle = np.random.permutation(actions.shape[0])
    all_ = np.concatenate((actions, deltas, mu_fsolve), axis=1)[inds_shuffle]
    all_ = all_[~np.isnan(all_).any(axis=1)]
    inputs_all, outputs_all = np.split(all_, indices_or_sections=2,
                                       axis=1)  # (actions, deltas) (lambda_min_true, lambda_max_true)
    weights = np.ones(shape=(inputs_all.shape[0],))

    print(outputs_all.shape)

    ind_split = -3000
    train_x, train_y, train_weight = \
        inputs_all[:ind_split], outputs_all[:ind_split], weights[:ind_split]
    eval_x, eval_y, eval_weight = \
        inputs_all[ind_split:], outputs_all[ind_split:], weights[ind_split:]
    save_vars(f'{path_save}/data', train_x, train_y, train_weight, eval_x, eval_y, eval_weight)
    return train_x, train_y, train_weight, eval_x, eval_y, eval_weight,