Beispiel #1
0
# make grid samples

if Use_simple:
    Comb, Batch_U = grid_samples_simple(D, Grid_num)
else:
    Comb, Batch_U = grid_samples(D, Grid_num)

# Define paramaters for the model
learning_rate = 0.05
batch_size = 1000
minibatch_U = 1
minibatch_Ut = 100
n_epochs = 60

data = generator([X_train, y_train], batch_size)
data2 = generator([X_test, y_test], batch_size)

Uniform = generator([Comb], minibatch_U)

Num = 10

#prepare the placeholders
dictU = make_placeholders(Num, D)
X = tf.placeholder(tf.float32, [None, 2], name='X_placeholder')
Y = tf.placeholder(tf.float32, [None, 1], name='Y_placeholder')

#this is the place holder for the prediction
_Ut = make_placeholders(minibatch_Ut, D)

################## Define your model
else:
    Comb, Batch_U = grid_samples(D, Grid_num)

# Define paramaters for the model
learning_rate = 0.01
'''
the minibatch size of data X and Y. This should be as large values as possible for numerical stability 
'''

minibatch_U = 1
n_epochs = 60

#hom many placeholders we use for the loss
Num = 90

Uniform = generator([Comb], minibatch_U)

#how many samples we use for the predictions
Ut = tf.contrib.distributions.Uniform(low=0.1,
                                      high=0.8).sample(sample_shape=(1000, D))

#prepare the placeholders
dictU = make_placeholders_ones(Num, D)

#this is the place holder for the prediction

################## Define your model


def Gauss(loc, scale, x):
    return tf.exp(-tf.square(x - loc) / np.sqrt(
# how many parameters are in the model we use
D = 3

# Define paramaters for the model
learning_rate = 0.01
'''
the minibatch size of data X and Y. This should be as large values as possible for numerical stability 
'''
batch_size = 1000
minibatch_U = 1
n_epochs = 60

#hom many placeholders we use for the loss
Num = 50

data = generator([X_train, y_train], batch_size)
data2 = generator([X_test, y_test], batch_size)

#how many samples we use for the predictions

#prepare the placeholders
X = tf.placeholder(tf.float32, [None, 2], name='X_placeholder')
Y = tf.placeholder(tf.float32, [None, 1], name='Y_placeholder')

#this is the place holder for the prediction

################## Define your model


def Logistic_regression_Model_prior(X, Y, params, minibatch_U=1):
    '''
'''
batch_size = 1000
minibatch_U = 1
n_epochs = 60

#hom many placeholders we use for the loss
Num = 4

n_fold = 10  # 交差検定の回数
k_fold = cross_validation.KFold(n=len(X0), n_folds=n_fold, random_state=0)
for train_index, test_index in k_fold:
    X_train, X_test = X0[train_index, :], X0[test_index, :]
    X_train, X_test = preprocessing2(X_train, X_test)
    y_train, y_test = Y0[train_index][:, None], Y0[test_index][:, None]

    data = generator([X_train, y_train], batch_size)

################## Define your model
'''
Bayesian neural net, one hidden layer
'''

Num_of_hidden = 20

target_shape = [[Num_of_hidden, data_dim], [Num_of_hidden, 1],
                [1, Num_of_hidden], [1, 1]]
target_shape = np.array(target_shape)
a = np.cumprod(target_shape, 1)[:, -1]
b = np.cumsum(np.cumprod(target_shape, 1)[:, -1])

D = np.cumsum(np.cumprod(target_shape, 1)[:, -1])[-1]
Beispiel #5
0
def train(model,
          data,
          batch_size,
          warmup_epoch,
          total_epoch,
          lr,
          temperature,
          checkpoint_path="models/checkpoints",
          model_summary_path="models/summary"):

    total_iterations = math.ceil(len(data) / batch_size)

    zdim = model.output.shape[1]
    checkpoint_path = f'models/{model.layers[1].name}_{datetime.date.today().strftime("%Y%m%d")}_checkpoints'

    lr_decayed_fn = tf.keras.experimental.CosineDecay(lr, total_iterations)
    optimizer = tf.keras.optimizers.SGD(learning_rate=lr, momentum=0.9)

    checkpoint = tf.train.Checkpoint(step=tf.Variable(1),
                                     optimizer=optimizer,
                                     net=model)
    manager = tf.train.CheckpointManager(checkpoint,
                                         checkpoint_path,
                                         max_to_keep=10)

    summary_writer = tf.summary.create_file_writer(model_summary_path)

    images = generator(data, batch_size)

    # warm up head
    for e in range(warmup_epoch):
        start = time.time()
        epoch_loss = []
        checkpoint = tf.train.Checkpoint(step=tf.Variable(1),
                                         optimizer=optimizer,
                                         net=model)
        #print(f"********************************************************************")
        #print(f"**                        Warmup Epoch: {e}                         **")
        #print(f"********************************************************************")
        print(f"Warmup Epoch {e}: ", end='')
        for i in range(total_iterations):
            image1, image2 = next(images)
            # Train one step
            with tf.GradientTape() as tape:
                z1 = model(image1, training=True)
                z2 = model(image2, training=True)
                z1 = tf.math.l2_normalize(z1, axis=1)
                z2 = tf.math.l2_normalize(z2, axis=1)
                loss = nt_xent(z1, z2, batch_size, temperature, zdim)
                reg_loss = tf.add_n(model.losses) if model.losses else 0
                loss = loss + reg_loss
            gradients = tape.gradient(loss, model.trainable_variables)

            # record loss
            epoch_loss.append(loss)

            # update optimizer
            optimizer.__setattr__('lr', lr_decayed_fn(i + 1))

            # apply gradients
            optimizer.apply_gradients(zip(gradients,
                                          model.trainable_variables))

            #
            checkpoint.step.assign_add(1)
            if checkpoint.step.numpy() % 10 == 0:
                #print(f"Iter: {i+2} Step: {checkpoint.step.numpy()} Loss: {loss.numpy():.5f} LR: {optimizer.__getattribute__('lr').numpy():9f}")
                add_to_summary(summary_writer, loss,
                               optimizer.__getattribute__('lr'), image1[:1],
                               image2[:1], checkpoint.step.numpy())
                summary_writer.flush()
        save_path = manager.save()
        print("time: {:5.0f} loss {:1.5f}  ".format(time.time() - start,
                                                    np.mean(epoch_loss)),
              end='')
        print(f"checkpoint: {save_path}")
        #print("loss {:1.2f}".format(np.mean(epoch_loss)))

    # train all layers
    for l in model.layers:
        l.trainable = True

    for e in range(total_epoch):
        epoch_loss = []
        start = time.time()
        checkpoint = tf.train.Checkpoint(step=tf.Variable(1),
                                         optimizer=optimizer,
                                         net=model)
        #print(f"********************************************************************")
        #print(f"**                               Epoch: {e}                         **")
        #print(f"********************************************************************")
        print(f"Epoch {e}: ", end='')
        for i in range(total_iterations):
            image1, image2 = next(images)
            # Train one step
            with tf.GradientTape() as tape:
                z1 = model(image1, training=True)
                z2 = model(image2, training=True)
                z1 = tf.math.l2_normalize(z1, axis=1)
                z2 = tf.math.l2_normalize(z2, axis=1)
                loss = nt_xent(z1, z2, batch_size, temperature, zdim)
                reg_loss = tf.add_n(model.losses) if model.losses else 0
                loss = loss + reg_loss
            gradients = tape.gradient(loss, model.trainable_variables)

            # record loss
            epoch_loss.append(loss)

            # update optimizer
            optimizer.__setattr__('lr', lr_decayed_fn(i + 1))

            # apply gradients
            optimizer.apply_gradients(zip(gradients,
                                          model.trainable_variables))

            #
            checkpoint.step.assign_add(1)
            if checkpoint.step.numpy() % 10 == 0:
                #print(f"Iter: {i+2} Step: {checkpoint.step.numpy()} Loss: {loss.numpy():.5f} LR: {optimizer.__getattribute__('lr').numpy():9f}")
                add_to_summary(summary_writer, loss,
                               optimizer.__getattribute__('lr'), image1[:1],
                               image2[:1], checkpoint.step.numpy())
                summary_writer.flush()
        print("time: {:5.0f} loss {:1.5f}  ".format(time.time() - start,
                                                    np.mean(epoch_loss)),
              end='')
        save_path = manager.save()
        print(f"Checkpoint: {save_path}")
        #print("loss {:1.2f}".format(loss.numpy()))

    return model
def Reyni_estimator(VI, dataset_name, per):
    name = dataset_name
    X0, y0, n_splits, indexes = dataload2(name)
    N, D = X0.shape

    def neural_network(X):
        h = tf.nn.relu(tf.matmul(X, W_0) + b_0)
        h = tf.nn.relu(tf.matmul(h, W_1) + b_1)
        h = tf.matmul(h, W_2) + b_2
        return tf.reshape(h, [-1])

    M = 128  # batch size during training
    H = 20

    n_batch = int(N / M)
    n_epoch = 200

    if VI == 'KL':
        gamma_list = [1]
    # The outlier_tuning hyper-parameter
    # In this code, the cross validation will not be done for simplicity.
    # Just separate the training dataset into training data and test data and run 10 times for each hyper parameter.
    elif VI == 'beta':
        gamma_list = [0.1, 0.2, 0.3, 0.4]

    var = 0.01
    var2 = 1.0000
    num = 10

    Results = np.zeros(num)
    for gamma in gamma_list:
        # MODEL
        with tf.name_scope("model"):
            W_0 = Normal(loc=tf.zeros([D, H]),
                         scale=tf.ones([D, H]) * var2,
                         name="W_0")
            W_1 = Normal(loc=tf.zeros([H, H]),
                         scale=tf.ones([H, H]) * var2,
                         name="W_1")
            W_2 = Normal(loc=tf.zeros([H, 1]),
                         scale=tf.ones([H, 1]) * var2,
                         name="W_2")
            b_0 = Normal(loc=tf.zeros(H), scale=tf.ones(H) * var2, name="b_0")
            b_1 = Normal(loc=tf.zeros(H), scale=tf.ones(H) * var2, name="b_1")
            b_2 = Normal(loc=tf.zeros(1), scale=tf.ones(1) * var2, name="b_2")

            X = tf.placeholder(tf.float32, [None, D], name="X")
            y_ph = tf.placeholder(tf.float32, [None])
            y = Normal(loc=neural_network(X), scale=1., name="y")

        # INFERENCE
        with tf.name_scope("posterior"):
            with tf.name_scope("qW_0"):
                qW_0 = Normal(loc=tf.Variable(tf.random_normal([D, H],
                                                               stddev=var),
                                              name="loc"),
                              scale=tf.nn.softplus(
                                  tf.Variable(tf.random_normal([D, H],
                                                               stddev=var),
                                              name="scale")))
            with tf.name_scope("qW_1"):
                qW_1 = Normal(loc=tf.Variable(tf.random_normal([H, H],
                                                               stddev=var),
                                              name="loc"),
                              scale=tf.nn.softplus(
                                  tf.Variable(tf.random_normal([H, H],
                                                               stddev=var),
                                              name="scale")))
            with tf.name_scope("qW_2"):
                qW_2 = Normal(loc=tf.Variable(tf.random_normal([H, 1],
                                                               stddev=var),
                                              name="loc"),
                              scale=tf.nn.softplus(
                                  tf.Variable(tf.random_normal([H, 1],
                                                               stddev=var),
                                              name="scale")))
            with tf.name_scope("qb_0"):
                qb_0 = Normal(loc=tf.Variable(tf.random_normal([H],
                                                               stddev=var),
                                              name="loc"),
                              scale=tf.nn.softplus(
                                  tf.Variable(tf.random_normal([H],
                                                               stddev=var),
                                              name="scale")))
            with tf.name_scope("qb_1"):
                qb_1 = Normal(loc=tf.Variable(tf.random_normal([H],
                                                               stddev=var),
                                              name="loc"),
                              scale=tf.nn.softplus(
                                  tf.Variable(tf.random_normal([H],
                                                               stddev=var),
                                              name="scale")))
            with tf.name_scope("qb_2"):
                qb_2 = Normal(loc=tf.Variable(tf.random_normal([1],
                                                               stddev=var),
                                              name="loc"),
                              scale=tf.nn.softplus(
                                  tf.Variable(tf.random_normal([1],
                                                               stddev=var),
                                              name="scale")))

        results = []
        for data in range(num):
            index = indexes[data]
            X_train, X_test = X0[index[0], :], X0[index[1], :]
            y_train, y_test = y0[index[0]], y0[index[1]]
            X_train, y_train, X_test, y_test, mean_y_train, std_y_train = preprocessing(
                X_train, y_train, X_test, y_test)

            X_train, y_train = add_noise(
                X_train,
                y_train,
                noise_x=(0, 6, np.random.choice(D, D, replace=False)),
                noise_y=(0, 6),
                percent=per)
            data = generator([X_train, y_train], M)
            N = X_train.shape[0]

            if VI == 'KL':
                print("KL")
                inference = ed.KLqp(
                    {
                        W_0: qW_0,
                        b_0: qb_0,
                        W_1: qW_1,
                        b_1: qb_1,
                        W_2: qW_2,
                        b_2: qb_2
                    },
                    data={y: y_ph})
                inference.initialize(n_iter=10000,
                                     n_samples=15,
                                     scale={y: N / M})
            elif VI == 'beta':
                print("beta")
                inference = KLqp_beta(
                    {
                        W_0: qW_0,
                        b_0: qb_0,
                        W_1: qW_1,
                        b_1: qb_1,
                        W_2: qW_2,
                        b_2: qb_2
                    },
                    data={y: y_ph})
                inference.initialize(n_iter=10000,
                                     n_samples=15,
                                     alpha=gamma,
                                     size=M,
                                     tot=N,
                                     scale={y: N / M})

            tf.global_variables_initializer().run()

            for _ in range(inference.n_iter):
                X_batch, y_batch = next(data)
                info_dict = inference.update({X: X_batch, y_ph: y_batch})
                inference.print_progress(info_dict)

            y_post = ed.copy(y, {
                W_0: qW_0,
                b_0: qb_0,
                W_1: qW_1,
                b_1: qb_1,
                W_2: qW_2,
                b_2: qb_2
            })
            print("Mean squared error on test data:")
            a = ed.evaluate('mean_squared_error',
                            data={
                                X: X_test,
                                y_post: y_test
                            })
            print(std_y_train * (a**0.5), a, std_y_train)
            print(gamma)
            results.append(std_y_train * (a**0.5))
        results = np.array(results)
        Results = np.vstack((Results, results))

    mu = np.mean(Results, -1)
    std = np.std(Results, -1)

    Saving = [mu, std]
    np.save(str('RMSE_') + str(VI) + str(name) + str(per) + '.npy', Saving)
    H = tf.nn.sigmoid(tf.matmul(X, w_h) + b_h)
    H2 = tf.nn.sigmoid(tf.matmul(H, w_h2) + b_h2)
    W0 = tf.nn.sigmoid(tf.matmul(H2, w_h3) + b_h3)
    return W0


n_u = 10
U = tf.placeholder(tf.float32, [n_u, 1], name='U_placeholder')
W = trial_solution(U)

Grid_num = 100

Comb = np.arange(1, Grid_num)[:, None]
np.random.shuffle(Comb)
Comb = Comb / Grid_num
Uniform = generator([Comb], n_u)


#### Variable Transformation
# Since the support of Gaussian is (-inf,inf), we transform it to the support (0,1) for the numerical boundary condition
def minus_inf_inf_to_zero_one(variable):
    return tf.log(variable / (1 - variable))


def minus_inf_inf_to_zero_one_Log_det_J(variable):
    return tf.log(1 / variable * (1 - variable))


#Transform
params = minus_inf_inf_to_zero_one(W)
#Variable transoformation's Log_det_Jacobian
'''
batch_size = 1000
minibatch_U = 1
n_epochs = 60

#hom many placeholders we use for the loss
Num = 4

n_fold = 10  # 交差検定の回数
k_fold = cross_validation.KFold(n=len(X0), n_folds=n_fold, random_state=0)
for train_index, test_index in k_fold:
    X_train, X_test = X0[train_index, :], X0[test_index, :]
    X_train, X_test = preprocessing2(X_train, X_test)
    y_train, y_test = Y0[train_index][:, None], Y0[test_index][:, None]

    data = generator([X_train, y_train], batch_size)

X = tf.placeholder(tf.float32, [None, data_dim], name='X_placeholder')
Y = tf.placeholder(tf.float32, [None, 1], name='Y_placeholder')

################## Define your model
'''
Bayesian neural net, one hidden layer
'''

Num_of_hidden = 20

target_shape = [[Num_of_hidden, data_dim], [Num_of_hidden, 1],
                [1, Num_of_hidden], [1, 1]]
target_shape = np.array(target_shape)
a = np.cumprod(target_shape, 1)[:, -1]