class Grad_policy(object):
    def __init__(self, state_size, action_size, lr=0.001):
        self.init = xavier_initializer()
        with tf.variable_scope('supervised_policy'):
            self.st = tf.placeholder(tf.float32, [None, state_size], name='st')
            self.acts_prob = self.sl_policy_nn(self.st, state_size,
                                               action_size, self.init)
            self.act = tf.placeholder(tf.int32, [None], name='act')
            self.reward = tf.placeholder(tf.float32, name='reward')

            act_mask = tf.cast(tf.one_hot(self.act, depth=action_size),
                               tf.bool)
            self.act_prob = tf.boolean_mask(self.acts_prob, act_mask)

            self.loss = sum(
                tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                                  scope='supervised_policy')) + tf.reduce_sum(
                                      -tf.log(self.act_prob) * self.reward)
            self.optimizer = AdamOptimizer(learning_rate=lr)
            self.training_op = self.optimizer.minimize(self.loss)

    def sl_policy_nn(self, state, state_size, action_size, init):
        w1 = tf.get_variable('W1', [state_size, 512],
                             initializer=init,
                             regularizer=l2_regularizer(0.01))
        b1 = tf.get_variable('b1', [512],
                             initializer=tf.constant_initializer(0.0))
        h1 = tf.nn.relu(tf.matmul(state, w1) + b1)
        w2 = tf.get_variable('w2', [512, 1024],
                             initializer=init,
                             regularizer=l2_regularizer(0.01))
        b2 = tf.get_variable('b2', [1024],
                             initializer=tf.constant_initializer(0.0))
        h2 = tf.nn.relu(tf.matmul(h1, w2) + b2)
        w3 = tf.get_variable('w3', [1024, action_size],
                             initializer=init,
                             regularizer=l2_regularizer(0.01))
        b3 = tf.get_variable('b3', [action_size],
                             initializer=tf.constant_initializer(0.0))
        acts_prob = tf.nn.softmax(tf.matmul(h2, w3) + b3)
        return acts_prob

    def get_act_probs(self, st, sess=None):
        sess = sess or tf.get_default_session()
        return sess.run(self.acts_prob, {self.st: st})

    def train_batch(self, st, act, reward, sess=None):
        sess = sess or tf.get_default_session()
        _, loss = sess.run([self.training_op, self.loss], {
            self.st: st,
            self.act: act,
            self.reward: reward
        })
        return loss
Exemple #2
0
def train():
    classifier = get_model()
    opt = AdamOptimizer(1e-5)
    images_data = get_classification_data("../data/data_classification_train.json")
    count = 0
    print("Training started")
    shuffle(images_data)
    for (i, label) in images_data:
        img = get_img("../pictures/pictures_classification_train/{}.png".format(i))
        def get_loss():
            img_vector = tf.convert_to_tensor([img], dtype=np.float32)
            logits = classifier(img_vector)
            entropy = sparse_softmax_cross_entropy_with_logits(labels=[label], logits=logits)
            entropy = tf.gather(entropy, 0)
            save_data(label, logits[0].numpy().tolist(), entropy.numpy().tolist())
            return entropy
        opt.minimize(get_loss)
        count += 1
        if (count % 1000 == 0):
            classifier.save_weights(weights_path)
            print("Weights saved")
    classifier.save_weights(weights_path)
    print("Weights saved")
Exemple #3
0
model = LKM(data, additive_kernels, likelihoods)

gp_train_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="gp_hyperparameters")

ibp_train_vars = list(set(tf.global_variables()) - set(gp_train_vars))
update_tau = model.closed_form_update_tau()
elbo = model.build_marginal_loglikelihood()

z, nll_gp_refined = model.refine()

t_test, K, K_star, K_star_star, noise = model.prepare_for_postprocess()

# train IBP parameters with Adam
adam = AdamOptimizer(0.01)
# train_ibp = adam.minimize(-elbo, var_list=ibp_train_vars)
train_ibp = adam.minimize(-elbo, var_list=ibp_train_vars)

train_gp = ScipyOptimizerInterface(-elbo,
                                   var_list=gp_train_vars,
                                   method='L-BFGS-B',
                                   options={"maxiter": 10})

# refined train
train_gp_refine = ScipyOptimizerInterface(nll_gp_refined,
                                          var_list=gp_train_vars,
                                          method='L-BFGS-B',
                                          options={"maxiter": 300}
                                          )

init = tf.global_variables_initializer()