Esempio n. 1
0
 def __init__(self,
              lr=0.1,
              use_locking=False,
              name="ExponentialMappingOptimizer"):
     super(ExponentialMappingOptimizer, self).__init__(use_locking, name)
     self.lr = lr
     self.euclidean_optimizer = AdamOptimizer()
Esempio n. 2
0
    def _build(self, n_inputs):
        # Build the input/output variables
        x = tf.placeholder(shape=[None, n_inputs], name='x', dtype=tf.float32)
        y = tf.placeholder(shape=[None, 1], name='y', dtype=tf.float32)

        # Build the model
        w, phi = self._build_subnets(x)
        n_hidden = int(w.shape[1])
        assert phi.shape[1] == n_hidden, \
            'w(x) and phi(x) have incompatible shapes: {} {}'.format(w.shape, phi.shape)
        dot = tf.reduce_sum(w * phi, axis=1, keepdims=True, name='dot')
        f = tf.sigmoid(dot, name='f')

        z = tf.placeholder(shape=[None, n_hidden], dtype=tf.float32, name='z')

        # Build the losses
        loss_y = log_loss(y, f)
        loss_z = tf.reduce_mean(tf.reduce_sum((z - w) * (z - w), axis=1))

        # Build the regularizer
        # XXX remove bias?
        grad_f = tf.gradients(f, [x])[0]
        jacob_phi = batch_jacobian(phi, x)
        w_times_jacob_phi = tf.einsum('boi,bo->bi', jacob_phi, w)
        reg_z = tf.reduce_sum(tf.squared_difference(grad_f, w_times_jacob_phi))

        # Build the optimizers
        l0, l1, l2 = 1 - sum(self.lambdas), self.lambdas[0], self.lambdas[1]
        self.train_op_y = AdamOptimizer(self.eta) \
                              .minimize(l0 * loss_y + l2 * reg_z)
        self.train_op_z = AdamOptimizer(self.eta) \
                              .minimize(l1 * loss_z + l2 * reg_z)
        self.train_op_y_z = AdamOptimizer(self.eta) \
                                .minimize(l0 * loss_y + l1 * loss_z + l2 * reg_z)

        # Build the tensorflow session
        self.session = tf.Session()
        self.session.run(tf.global_variables_initializer())

        self._saver = tf.train.Saver()
        self._saver.save(self.session, _CHECKPOINT)

        self.tf_vars = {
            'x': x,
            'z': z,
            'y': y,
            'w': w,
            'phi': phi,
            'dot': dot,
            'f': f,
            'loss_y': loss_y,
            'loss_z': loss_z,
            'reg_z': reg_z,
        }
Esempio n. 3
0
    def model_fn(features, labels, mode):
        tf.logging.set_verbosity(tf.logging.WARN)
        model = hub.Module(IMG_ENCODER, trainable=True)
        tf.logging.set_verbosity(tf.logging.INFO)
        model = model(features['x'])
        regularizer = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        output = tf.layers.dense(model,
                                 VEC_SPACE_DIMENSIONS,
                                 activation=tf.nn.relu)
        output = tf.layers.dense(model,
                                 VEC_SPACE_DIMENSIONS,
                                 activation=tf.nn.relu)
        output = tf.layers.dense(model,
                                 VEC_SPACE_DIMENSIONS,
                                 activation=tf.nn.tanh)

        if mode == ModeKeys.TRAIN or mode == ModeKeys.EVAL:
            loss = mean_squared_error(labels, output)
            regularizer = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
            loss = loss + 0.25 * sum(regularizer)
        if mode == ModeKeys.TRAIN:
            train_op = AdamOptimizer(learning_rate=0.00001).minimize(
                loss=loss, global_step=get_global_step())
            return EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
        elif mode == ModeKeys.EVAL:
            eval_metric_ops = {
                'accuracy': tf.metrics.mean_cosine_distance(labels, output, 0)
            }
            return EstimatorSpec(mode=mode,
                                 loss=loss,
                                 eval_metric_ops=eval_metric_ops)
        elif mode == ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode=mode, predictions=output)
Esempio n. 4
0
    def __init__(self, state_size, action_size, lr=0.001):
        self.init = xavier_initializer()
        with tf.variable_scope('supervised_policy'):
            self.st = tf.placeholder(tf.float32, [None, state_size], name='st')
            self.acts_prob = self.sl_policy_nn(self.st, state_size,
                                               action_size, self.init)
            self.act = tf.placeholder(tf.int32, [None], name='act')

            act_mask = tf.cast(tf.one_hot(self.act, depth=action_size),
                               tf.bool)
            self.act_prob = tf.boolean_mask(self.acts_prob, act_mask)

            self.loss = sum(
                tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                                  scope='supervised_policy')) + tf.reduce_sum(
                                      -tf.log(self.act_prob))
            self.optimizer = AdamOptimizer(learning_rate=lr)
            self.training_op = self.optimizer.minimize(self.loss)
Esempio n. 5
0
class Grad_policy(object):
    def __init__(self, state_size, action_size, lr=0.001):
        self.init = xavier_initializer()
        with tf.variable_scope('supervised_policy'):
            self.st = tf.placeholder(tf.float32, [None, state_size], name='st')
            self.acts_prob = self.sl_policy_nn(self.st, state_size,
                                               action_size, self.init)
            self.act = tf.placeholder(tf.int32, [None], name='act')
            self.reward = tf.placeholder(tf.float32, name='reward')

            act_mask = tf.cast(tf.one_hot(self.act, depth=action_size),
                               tf.bool)
            self.act_prob = tf.boolean_mask(self.acts_prob, act_mask)

            self.loss = sum(
                tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                                  scope='supervised_policy')) + tf.reduce_sum(
                                      -tf.log(self.act_prob) * self.reward)
            self.optimizer = AdamOptimizer(learning_rate=lr)
            self.training_op = self.optimizer.minimize(self.loss)

    def sl_policy_nn(self, state, state_size, action_size, init):
        w1 = tf.get_variable('W1', [state_size, 512],
                             initializer=init,
                             regularizer=l2_regularizer(0.01))
        b1 = tf.get_variable('b1', [512],
                             initializer=tf.constant_initializer(0.0))
        h1 = tf.nn.relu(tf.matmul(state, w1) + b1)
        w2 = tf.get_variable('w2', [512, 1024],
                             initializer=init,
                             regularizer=l2_regularizer(0.01))
        b2 = tf.get_variable('b2', [1024],
                             initializer=tf.constant_initializer(0.0))
        h2 = tf.nn.relu(tf.matmul(h1, w2) + b2)
        w3 = tf.get_variable('w3', [1024, action_size],
                             initializer=init,
                             regularizer=l2_regularizer(0.01))
        b3 = tf.get_variable('b3', [action_size],
                             initializer=tf.constant_initializer(0.0))
        acts_prob = tf.nn.softmax(tf.matmul(h2, w3) + b3)
        return acts_prob

    def get_act_probs(self, st, sess=None):
        sess = sess or tf.get_default_session()
        return sess.run(self.acts_prob, {self.st: st})

    def train_batch(self, st, act, reward, sess=None):
        sess = sess or tf.get_default_session()
        _, loss = sess.run([self.training_op, self.loss], {
            self.st: st,
            self.act: act,
            self.reward: reward
        })
        return loss
Esempio n. 6
0
    def build(self) -> None:
        self.model = self.model_instances[-1]

        # Build losses and training operators
        ratio = tf.exp(self.model.logp - self.act_prob_ph)
        clipped_ratio = tf.clip_by_value(ratio, 1 - self.clip_range,
                                         1 + self.clip_range)
        self.pi_loss = -tf.reduce_mean(
            tf.minimum(ratio * self.advantage_ph,
                       clipped_ratio * self.advantage_ph))
        self.v_loss = tf.reduce_mean((self.value_ph - self.model.v)**2)
        self.approx_kl = tf.reduce_mean(self.act_prob_ph - self.model.logp)

        self.train_pi = AdamOptimizer(learning_rate=self.pi_lr).minimize(
            self.pi_loss)
        self.train_v = AdamOptimizer(learning_rate=self.vf_lr).minimize(
            self.v_loss)

        # Initialize variables
        self.model.sess.run(tf.global_variables_initializer())
Esempio n. 7
0
    def build(self) -> None:
        self.policy_model = self.model_instances[0]
        self.target_model = self.model_instances[1]

        self.loss = tf.reduce_mean(
            (self.policy_model.values - self.target_ph)**2)
        self.train_q = AdamOptimizer(learning_rate=self.lr).minimize(self.loss)

        self.policy_model.sess.run(tf.global_variables_initializer())
        self.target_model.sess.run(tf.global_variables_initializer())
        self.update_target_model()
Esempio n. 8
0
def train():
    classifier = get_model()
    opt = AdamOptimizer(1e-5)
    images_data = get_classification_data("../data/data_classification_train.json")
    count = 0
    print("Training started")
    shuffle(images_data)
    for (i, label) in images_data:
        img = get_img("../pictures/pictures_classification_train/{}.png".format(i))
        def get_loss():
            img_vector = tf.convert_to_tensor([img], dtype=np.float32)
            logits = classifier(img_vector)
            entropy = sparse_softmax_cross_entropy_with_logits(labels=[label], logits=logits)
            entropy = tf.gather(entropy, 0)
            save_data(label, logits[0].numpy().tolist(), entropy.numpy().tolist())
            return entropy
        opt.minimize(get_loss)
        count += 1
        if (count % 1000 == 0):
            classifier.save_weights(weights_path)
            print("Weights saved")
    classifier.save_weights(weights_path)
    print("Weights saved")
Esempio n. 9
0
    def __init__(self, batch_size=100, epochs=50, verbose=1):
        self.batch_size = batch_size
        self.epochs = epochs
        self.verbose = verbose
        self.num_classes = 2
        self.test_data = None
        self.test_labels = None
        self.tracker = None
        self.img_shape = (224, 224, 1)

        self.model = self.create_model()
        self.model.compile(optimizer=AdamOptimizer(),
                           loss='binary_crossentropy',
                           metrics=['accuracy'])
Esempio n. 10
0
def train(_seed, minibatch_size, no_iterations, lrate, show_training_info, net=None, n_data=1e-4, X_test=None, y_test=None, y_train_std=None, y_train_mean=None):
    if not minibatch_size:
        minibatch_size = min(1e4, n_data)

    n_batches = np.ceil(n_data / minibatch_size)
    n_epochs = int(np.ceil(no_iterations / n_batches))

    # train
    t0 = time.process_time()
    net.train(AdamOptimizer(lrate), n_epochs, minibatch_size=minibatch_size, X_test=X_test,
              show_training_info=show_training_info, y_test=y_test, y_train_std=y_train_std,
              y_train_mean=y_train_mean, log_every=100)
    t1 = time.process_time()

    return (t1-t0)
Esempio n. 11
0
    def train_optimizer(self):
        with tf.variable_scope('train_step'):
            self.global_step_ = tf.Variable(0,
                                            name='global_step_',
                                            trainable=False)
            if self.optimizer_ == 'Adam':
                opt = AdamOptimizer(learning_rate=self.learning_rate_ph_)
            elif self.optimizer_ == 'Adagrad':
                opt = AdagradOptimizer(learning_rate=self.learning_rate_ph_)
            elif self.optimizer_ == 'Adadelta':
                opt = AdadeltaOptimizer(learning_rate=self.learning_rate_ph_)
            elif self.optimizer_ == 'RMSProp':
                opt = RMSPropOptimizer(learning_rate=self.learning_rate_ph_)
            elif self.optimizer_ == 'Momentum':
                opt = MomentumOptimizer(learning_rate=self.learning_rate_ph_,
                                        momentum=0.9)
            else:
                opt = GradientDescentOptimizer(
                    learning_rate=self.learning_rate_ph_)
            """
            #修正梯度参数的另一种写法
            #获取全部可以训练的参数tf_variables
            tf_variables = tf.trainable_variables()
            #提前计算梯度
            tf_grads = tf.gradients(self.loss_, tf_variables)
            #由它们的范数之和之比求多个张量的值
            tf_grads,_ = tf.clip_by_global_norm(tf_grads, self.clip_grad_)
            #将前面clip过的梯度应用到可训练的参数上
            self.train_optimizer_ = opt.apply_gradients(zip(tf_grads, tf_variables))
            """

            # 获取参数,提前计算梯度
            grads_and_vars = opt.compute_gradients(self.loss_)
            # 修正梯度值
            grads_and_vars_clip = [[
                tf.clip_by_value(g, -self.clip_grad_, self.clip_grad_), v
            ] for g, v in grads_and_vars]
            # 应用修正后的梯度值
            self.train_optimizer_ = opt.apply_gradients(
                grads_and_vars_clip, global_step=self.global_step_)
Esempio n. 12
0
    def __init__(self, num_states, num_actions):
        self.memory = ExperienceMemory(CAPACITY)
        self.num_states, self.num_actions = num_states, num_actions

        self.batch = None
        self.state_batch = None
        self.action_batch = None
        self.states_next_batch = None
        self.reward_batch = None

        self.optimizer = AdamOptimizer()
        self.main_q_network = Net(num_states, num_actions)
        self.main_q_network.compile(loss=huber_loss, optimizer=self.optimizer)
        self.target_q_network = Net(num_states, num_actions)
        self.target_q_network.compile(loss=huber_loss,
                                      optimizer=self.optimizer)
    def load_model(self, filename='model'):
        """
        Loads the model and trained weights created by train_model.py
        """
        with open(filename + '.json', 'r') as json_file:
            loaded_model_json = json_file.read()

        loaded_model = model_from_json(loaded_model_json)
        loaded_model.load_weights(filename + '.h5')
        print('Loaded model')

        loaded_model.compile(optimizer=AdamOptimizer(),
                           loss='binary_crossentropy',
                           metrics=['accuracy'])

        print(loaded_model.summary())
        return loaded_model
Esempio n. 14
0
    def model(self, input_shape, label_shape, opt, lr=1e-4, training=True):
        '''define how to build model'''
        ##TODO:定义网络,使用"Ctrl + 点击函数名"查看函数##
        self.set_inputs(input_shape, label_shape)
        self.create_encoder()
        self.create_decode_head()
        if not training:
            self.reshape_output()
        self.define_loss()

        ##TODO:选择优化器, 优化器参数完善##
        if opt == 'adam':
            self.optimzer = AdamOptimizer(learning_rate=lr)
        elif opt == 'sgd':
            self.optimzer = GradientDescentOptimizer(learning_rate=lr)
        ##TODO:一次训练迭代操作##
        self.train_op = self.optimzer.minimize(self.loss,
                                               global_step=self.global_step)
Esempio n. 15
0
def train(_seed,
          minibatch_size,
          no_iterations,
          lrate,
          show_training_info,
          net=None,
          n_data=1e-4):
    if not minibatch_size:
        minibatch_size = min(1e4, n_data)

    n_batches = np.ceil(n_data / minibatch_size)
    n_epochs = int(np.ceil(no_iterations / n_batches))

    # train
    t0 = time.process_time()
    net.train(AdamOptimizer(lrate),
              n_epochs,
              minibatch_size=minibatch_size,
              show_training_info=show_training_info)
    t1 = time.process_time()

    return (t1 - t0)
Esempio n. 16
0
def fit_model(X_train, Y_train, model, checkpoint_dir, imgtup):

    imgname, imgfunc = imgtup
    
    chk = os.listdir(checkpoint_dir)
    if len(chk) > 1:
    #    latest = tf.train.latest_checkpoint(checkpoint_dir)
    #    model.load_weights(latest)
        pass

    else:
        datagen = ImageDataGenerator(
            preprocessing_function=imgfunc)

        # Transform all training images
        datagen.fit(X_train)

        # Compile model

        learning_rate = 1e-3
        opt = AdamOptimizer(learning_rate=learning_rate)

        model.compile(optimizer=opt,
                      loss=mean_absolute_error,
                      metrics=['accuracy'])

        model.summary()

        # Fit model

        history = model.fit_generator(datagen.flow(X_train,Y_train,
                                                   batch_size=32),
                                      steps_per_epoch=X_train.shape[0] / 32,
                                      epochs=100)
        plot_loss('review/train_val_loss_021_{}.png'.format(imgname), history)

    return model
Esempio n. 17
0
#Y_test = X_test
X_train = X_train[0:m, ...]
Y_train = X_train[0:m, ...]
X_test = X_test[0:m, ...]
Y_test = X_test[0:m, ...]

logger.debug("X_train default shape: {}".format(X_train.shape))
logger.debug("Y_train default shape: {}".format(Y_train.shape))

# Compiling model using Keras

learning_rate = 1e-3
model = simple_sony()
#model = full_sony()
#opt = Adam(lr=1e-4)
opt = AdamOptimizer(learning_rate=learning_rate)

model.compile(optimizer=opt, loss=mean_absolute_error, metrics=['accuracy'])

# Fitting the model

history = model.fit(X_train,
                    Y_train,
                    validation_split=0.25,
                    epochs=100,
                    batch_size=32,
                    callbacks=[cp_callback])
plot_loss('review/train_val_loss.png', history)

# Predicting with the model
from layers.capsule_max_pool import CapsMaxPool
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.train import AdamOptimizer
import numpy as np
import tensorflow as tf

tf.enable_eager_execution()

shape = (1, 2, 2, 2, 3)
x = Input(shape=shape[1:])
maxpool = CapsMaxPool()(x)
model = Model(inputs=x, outputs=maxpool)
input_x = np.array([[
    [[[1, 2, 3], [1, 2, 3]], [[4, 5, 6], [4, 5, 6]]],
    [[[7, 8, 9], [7, 8, 9]], [[10, 11, 12], [10, 11, 12]]],
]],
                   dtype=np.float32)
input_y = np.array([[[
    [[10, 11, 12], [10, 11, 12]],
]]], dtype=np.float32)
tensor_x = tf.cast(input_x, dtype=tf.float32)
tensor_y = tf.cast(input_y, dtype=tf.float32)
opt = AdamOptimizer()
model.compile(optimizer=opt, loss='mean_squared_error')
print(model.fit(x=input_x, y=input_y, batch_size=1))

model.compile(optimizer=opt, loss='mean_squared_error')
Esempio n. 19
0
tf.reset_default_graph()

X_data = tf.placeholder(tf.float32, shape=[None, x_vals.shape[1]])
y_target = tf.placeholder(tf.float32, shape=[None, 1])

W = tf.get_variable(shape=[x_vals.shape[1], 1], name="W", initializer=xavier_initializer())
b = tf.get_variable(shape=[1, 1], name="b", initializer=xavier_initializer())

output = tf.matmul(X_data, W) - b
l2_norm = mean_squared_error(output, y_target)
# -

# $$ Loss = \max(0, 1 - \hat{y(i)} \cdot y(i)) + \alpha  ||X \cdot W - b||^2 $$

loss = tf.reduce_mean(tf.maximum(0., 1. - output * y_target)) + 0.01 * l2_norm
optimizer = AdamOptimizer(0.01).minimize(loss)

# +
batch_size = 1024

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(20000):
        rand_index = np.random.choice(len(X_train), size=batch_size)
        rand_x = X_train[rand_index]
        rand_y = np.transpose([y_train[rand_index]])
        sess.run(optimizer, feed_dict={X_data: rand_x, y_target: rand_y})
    [[a1], [a2]] = sess.run(W)
    [[b]] = sess.run(b)
# -
Esempio n. 20
0
    noisyLabels.append(labels)

##########################
## BUILD the Neural net ##
##########################
model = []
for netNum in range(0,
                    2):  #for each net with hidden neuron numbers 5, 15, ... 25
    model.append(keras.Sequential())
    for i in range(0, len(options.layerSizes)):  #add layers to the net
        model[netNum].add(
            keras.layers.Dense(options.layerSizes[i],
                               activation="sigmoid",
                               kernel_initializer=options.initializer))

    model[netNum].compile(optimizer=AdamOptimizer(0.001),
                          loss='categorical_crossentropy',
                          metrics=['accuracy'])

    # ### TRAIN ###
#net trained on only clean
model[0].fit(cleanData, labels, epochs=500, batch_size=5, verbose=1)

# net trained on both types
for trainingCycles in range(0, 500):
    if (trainingCycles % 10 == 0):
        print("Training noise set, cycle: ", trainingCycles, "/500")
    noisyData = []
    for i in range(0, 7):  # noise levels {0.0, 0.5, 1.0 ... 3.0}
        noisyData.append(helper.makeNoisy(cleanData, i / 2))
    for i in range(0, 7):  ## for all noisy and clean data
Esempio n. 21
0
def build_headnet(N,
                  features,
                  embedding_dim,
                  num_negative_samples,
                  num_hidden=128,
                  identity_variance=False):

    if features is not None:  # HEADNet with attributes

        print("training using node attributes")

        input_layer = Input((features.shape[1], ),
                            name="attributed_input_layer")

        input_transform = Dense(
            num_hidden,
            # activation="relu",
            # kernel_initializer=initializer,
            kernel_regularizer=regularizers.l2(reg),
            bias_regularizer=regularizers.l2(reg),
            name="euclidean_transform",
        )(input_layer)

    else:

        print("training without using attributes")

        input_layer = Input((1, ), name="unattributed_input_layer")
        input_transform = Embedding(N, num_hidden)(input_layer)

    input_transform = Activation("relu")(input_transform)

    hyperboloid_embedding_layer = Dense(
        embedding_dim,
        # kernel_initializer=initializer,
        kernel_regularizer=regularizers.l2(reg),
        bias_regularizer=regularizers.l2(reg),
        name="dense_to_hyperboloid",
    )(input_transform)

    to_hyperboloid = Lambda(exp_map_0,
                            name="to_hyperboloid")(hyperboloid_embedding_layer)

    sigma_layer = Dense(
        embedding_dim,
        activation=lambda x: K.elu(x) + 1.,
        kernel_initializer="zeros",
        kernel_regularizer=regularizers.l2(reg),
        bias_regularizer=regularizers.l2(reg),
        name="dense_to_sigma",
        trainable=not identity_variance,
    )(input_transform)
    if identity_variance:
        sigma_layer = Lambda(K.stop_gradient,
                             name="variance_stop_gradient")(sigma_layer)

    embedder_model = Model(input_layer, [to_hyperboloid, sigma_layer],
                           name="embedder_model")

    if features is not None:

        trainable_input = Input((
            1 + num_negative_samples,
            2,
            features.shape[1],
        ),
                                name="trainable_input_attributed")
    else:

        trainable_input = Input((
            1 + num_negative_samples,
            2,
        ),
                                name="trainable_input_non_attributed")

    mus, sigmas = embedder_model(trainable_input)

    assert len(mus.shape) == len(sigmas.shape) == 4

    mus = Lambda(map_to_tangent_space_mu_zero,
                 name="to_tangent_space_mu_zero")(mus)

    kds = Lambda(kullback_leibler_divergence,
                 name="kullback_leibler_layer")([mus, sigmas])

    trainable_model = Model(trainable_input, kds, name="trainable_model")

    optimizer = AdamOptimizer(1e-3, )

    trainable_model.compile(optimizer=optimizer,
                            loss=asym_hyperbolic_loss,
                            target_tensors=[
                                tf.placeholder(dtype=tf.int64,
                                               shape=(None, 1)),
                            ])

    return embedder_model, trainable_model
Esempio n. 22
0
    def build(self,
              word_length,
              num_labels,
              num_intent_labels,
              word_vocab_size,
              char_vocab_size,
              word_emb_dims=100,
              char_emb_dims=30,
              char_lstm_dims=30,
              tagger_lstm_dims=100,
              dropout=0.2):

        self.word_length = word_length
        self.num_labels = num_labels
        self.num_intent_labels = num_intent_labels
        self.word_vocab_size = word_vocab_size
        self.char_vocab_size = char_vocab_size

        words_input = Input(shape=(None, ), name='words_input')
        embedding_layer = Embedding(word_vocab_size,
                                    word_emb_dims,
                                    name='word_embedding')
        word_embeddings = embedding_layer(words_input)
        word_embeddings = Dropout(dropout)(word_embeddings)

        word_chars_input = Input(shape=(None, word_length),
                                 name='word_chars_input')
        char_embedding_layer = Embedding(char_vocab_size,
                                         char_emb_dims,
                                         input_length=word_length,
                                         name='char_embedding')
        char_embeddings = char_embedding_layer(word_chars_input)
        char_embeddings = TimeDistributed(Bidirectional(
            LSTM(char_lstm_dims)))(char_embeddings)
        char_embeddings = Dropout(dropout)(char_embeddings)

        # first BiLSTM layer (used for intent classification)
        first_bilstm_layer = Bidirectional(
            LSTM(tagger_lstm_dims, return_sequences=True, return_state=True))
        first_lstm_out = first_bilstm_layer(word_embeddings)

        lstm_y_sequence = first_lstm_out[:1][
            0]  # save y states of the LSTM layer
        states = first_lstm_out[1:]
        hf, _, hb, _ = states  # extract last hidden states
        h_state = concatenate([hf, hb], axis=-1)
        intents = Dense(num_intent_labels,
                        activation='softmax',
                        name='intent_classifier_output')(h_state)
        # create the 2nd feature vectors
        combined_features = concatenate([lstm_y_sequence, char_embeddings],
                                        axis=-1)

        # 2nd BiLSTM layer (used for entity/slots classification)
        second_bilstm_layer = Bidirectional(
            LSTM(tagger_lstm_dims, return_sequences=True))(combined_features)
        second_bilstm_layer = Dropout(dropout)(second_bilstm_layer)
        bilstm_out = Dense(num_labels)(second_bilstm_layer)

        # feed BiLSTM vectors into CRF
        crf = CRF(num_labels, name='intent_slot_crf')
        entities = crf(bilstm_out)

        model = Model(inputs=[words_input, word_chars_input],
                      outputs=[intents, entities])

        loss_f = {
            'intent_classifier_output': 'categorical_crossentropy',
            'intent_slot_crf': crf.loss
        }
        metrics = {
            'intent_classifier_output': 'categorical_accuracy',
            'intent_slot_crf': crf.viterbi_accuracy
        }
        model.compile(loss=loss_f, optimizer=AdamOptimizer(), metrics=metrics)
        self.model = model
Esempio n. 23
0
(x0**3) - 60 * (x0**2) - 4 * x0 + 6

# # Gradient Descent
#
# $$ f(x)=x^3-60x^2-4x+6 $$

import tensorflow as tf
from tensorflow.train import AdamOptimizer

start = time()

x = tf.get_variable('x', initializer=tf.constant(100.0))
y = x * x * x - 60 * x * x - 4 * x + 6

# +
optimizer = AdamOptimizer(learning_rate=1e-2).minimize(y)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for _ in range(50000):
        sess.run(optimizer)
    print(sess.run(x))
# -

print("Gradient Descent for y is about : %0.2f seconds" % (time() - start))

# # Binary Search
#
# similiar to Newton method from [0, 100]. Somehow simple, just skip it

# # MCTS
Esempio n. 24
0
def neural_transfer(content_image,
                    style_image,
                    output_dirpath,
                    epochs=1000,
                    epoch_length=100,
                    alpha=1,
                    beta=10):
    """ Main function to execute neural transfer algorithm using tensorflow eager execution """

    tf.enable_eager_execution()

    optimizer = AdamOptimizer(learning_rate=0.003)

    # Layers for loss calculations
    content_layers = ['block4_conv2']
    style_layers = [
        'block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1',
        'block5_conv1'
    ]

    model = init_model(content_layers, style_layers)

    # Get target featuremaps tensors from the content and style images
    content_featuremaps = model(np.expand_dims(content_image,
                                               axis=0))[:len(content_layers)]
    style_featuremaps = model(np.expand_dims(style_image,
                                             axis=0))[len(content_layers):]

    # Starting point of combination image
    image_zero = np.expand_dims(np.random.random(np.shape(content_image)),
                                axis=0)
    combined_image_tensor = tf.Variable(image_zero,
                                        name='combined_image_tensor',
                                        dtype=tf.float32)

    for epoch in range(epochs):

        print('\nEpoch: ', epoch)

        # Convert tensor to array then save image to output directory for viewing
        combined_image = np.squeeze(combined_image_tensor.numpy(), axis=0)
        output_filepath = os.path.join(output_dirpath,
                                       'epoch_{}.png'.format(epoch))
        cv2.imwrite(output_filepath, combined_image * 255)

        content_losses_array_avg = np.zeros(len(content_layers),
                                            dtype=np.float32)
        style_losses_array_avg = np.zeros(len(style_layers), dtype=np.float32)

        for _ in tqdm(range(epoch_length)):

            # Operations here are recorded to "GradientTape" for backpropagation
            with tf.GradientTape() as tape:

                combination_featuremaps = model(combined_image_tensor)

                total_loss, content_losses, style_losses = calc_total_loss(
                    content_featuremaps, style_featuremaps,
                    combination_featuremaps, alpha, beta)

            gradients = tape.gradient(total_loss, combined_image_tensor)
            optimizer.apply_gradients([[gradients, combined_image_tensor]])

            # Ensure output image/tensor is bounded between 0 and 1
            clipped = tf.clip_by_value(combined_image_tensor,
                                       clip_value_min=0,
                                       clip_value_max=1)
            combined_image_tensor.assign(clipped)

            # Record the average losses for the epoch
            content_losses_array_avg += content_losses / epoch_length
            style_losses_array_avg += style_losses / epoch_length

        # Display individual losses for analysis
        print('Content loss: ', content_losses_array_avg)
        print('Style loss: ', style_losses_array_avg)
        print(
            'Total loss: ',
            np.sum(style_losses_array_avg) + np.sum(content_losses_array_avg))
Esempio n. 25
0
    num_classes = 10
    batch_size = 32
    epochs = 10

    (x_train, y_train), (x_test, y_test) = load_data()
    x_train = x_train.reshape(60000, 784) / 255
    x_test = x_test.reshape(10000, 784) / 255

    y_train = to_categorical(y_train, num_classes)
    y_test = to_categorical(y_test, num_classes)

    train_ds = Dataset.from_tensor_slices(
        (x_train, y_train)).shuffle(60000).batch(batch_size)
    test_ds = Dataset.from_tensor_slices(
        (x_test, y_test)).shuffle(10000).batch(batch_size)

    optimizer = AdamOptimizer()
    model = Net()

    train(model, train_ds, epochs=2)
    test(model, test_ds)

    class_name = [str(i) for i in range(num_classes)]

    x, y = iter(test_ds).next()
    pred = predict(model, x, class_name)

    plt.imshow(x[0].numpy().reshape(28, 28))
    plt.title(pred[0])
    plt.show()
Esempio n. 26
0
def train(dataset_path,checkpoint_path, logdir, batch_size, epochs):

    '''
    Load the data
    '''

    tf.enable_eager_execution()
    dataset = load_audionet_dataset(dataset_path)


    def make_tuple(record):
        return (tf.reshape(record['data'],(8000,)),tf.reshape(record['data'],(8000,)))


    
    '''
    Split the dataset
    '''
    train_dataset = dataset.filter(split('digit', 'train')) \
        .map(make_tuple) \
        .shuffle(18000, seed=42) \
        .batch(batch_size) \
        .repeat()

    train_nb_samples = len(splits['digit']['train'][0])*500


    test_dataset = dataset.filter(split('digit', 'test')) \
        .map(make_tuple) \
        .shuffle(10000, seed=42) \
        .batch(batch_size)

    test_nb_samples = len(splits['digit']['test'][0])*500
    
    
    '''
    Neural Net model
    '''
        
    x = Input(shape=(8000,))
    latent_dim = 500
    intermediate_dim = 2000
    original_dim = 8000

    h = Dense(intermediate_dim, activation='relu')(x)
    z_mean = Dense(latent_dim, activation='linear')(h)
    z_log_sigma = Dense(latent_dim, activation='linear', \
                kernel_initializer='zeros', \
                bias_initializer='zeros')(h)

    def sampling(args):
        z_mean, z_log_sigma = args
        epsilon = K.random_normal(shape=(batch_size, latent_dim))
        return z_mean + K.exp(z_log_sigma/2) * epsilon

    z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_sigma])
    decoder_h = Dense(intermediate_dim, activation='relu')
    decoder_mean = Dense(original_dim, activation='sigmoid')
    h_decoded = decoder_h(z)
    x_decoded_mean = decoder_mean(h_decoded)

    # end-to-end autoencoder
    vae = Model(x, x_decoded_mean)
    # encoder, from inputs to latent space
    encoder = Model(x, z_mean)
    # generator, from latent space to reconstructed inputs
    decoder_input = Input(shape=(latent_dim,))
    _h_decoded = decoder_h(decoder_input)
    _x_decoded_mean = decoder_mean(_h_decoded)
    generator = Model(decoder_input, _x_decoded_mean)

    def vae_loss(x, x_decoded_mean):
        xent_loss = K.mean(K.binary_crossentropy(x, x_decoded_mean),axis=1)
        kl_loss = - 0.5 * K.mean(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=1)
        return xent_loss + kl_loss

    adam = AdamOptimizer(learning_rate=0.001)
    vae.compile(optimizer=adam, loss=vae_loss,metrics = ['accuracy'])

    '''
    Callbacks
    '''
    if not os.path.isdir(logdir): os.mkdir(logdir)
    tb_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir,
                                                 batch_size=batch_size)

    if not os.path.isdir(checkpoint_path): os.mkdir(checkpoint_path)
    checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(os.path.join(checkpoint_path, "model.{epoch:02d}-{val_acc:.2f}"),
                                                            save_weights_only=True)

    gc_callback = tf.keras.callbacks.LambdaCallback(on_batch_end=lambda batch,_: gc.collect())

    
    '''
    Fit the model
    '''
    vae.fit(train_dataset, \
                epochs=epochs, \
                steps_per_epoch = math.ceil(train_nb_samples/batch_size), \
                batch_size = batch_size, \
                shuffle=True, \
                validation_data=test_dataset, \
                validation_steps=math.ceil(test_nb_samples/batch_size), \
                callbacks = [tb_callback, checkpoint_callback])
Esempio n. 27
0
class ExponentialMappingOptimizer(optimizer.Optimizer):
    def __init__(self,
                 lr=0.1,
                 use_locking=False,
                 name="ExponentialMappingOptimizer"):
        super(ExponentialMappingOptimizer, self).__init__(use_locking, name)
        self.lr = lr
        self.euclidean_optimizer = AdamOptimizer()

    def _apply_dense(self, grad, var):
        assert False
        spacial_grad = grad[..., :-1]
        t_grad = -1 * grad[..., -1:]

        ambient_grad = tf.concat([spacial_grad, t_grad], axis=-1)
        tangent_grad = project_onto_tangent_space(var, ambient_grad)

        exp_map = exponential_mapping(var, -self.lr * tangent_grad)

        return tf.assign(var, exp_map)

    def _apply_sparse(self, grad, var):

        if "hyperbolic" in var.name:

            indices = grad.indices
            values = grad.values

            p = tf.gather(var, indices, name="gather_apply_sparse")

            spacial_grad = values[..., :-1]
            t_grad = -1 * values[..., -1:]

            ambient_grad = K.concatenate(\
             [spacial_grad, t_grad],
             axis=-1, )
            tangent_grad = project_onto_tangent_space(p, ambient_grad)

            exp_map = exponential_mapping(p, -self.lr * tangent_grad)

            return tf.scatter_update(ref=var,
                                     indices=indices,
                                     updates=exp_map,
                                     name="scatter_update")

        else:
            # euclidean update using Adam optimizer
            return self.euclidean_optimizer.apply_gradients([
                (grad, var),
            ])


# class MyAdamOptimizer(optimizer.Optimizer):
# 	"""Optimizer that implements the Adam algorithm.
# 	See [Kingma et al., 2014](http://arxiv.org/abs/1412.6980)
# 	([pdf](http://arxiv.org/pdf/1412.6980.pdf)).
# 	"""

# 	def __init__(self,
# 		learning_rate=1e-3,
# 		beta1=0.9,
# 		beta2=0.999,
# 		epsilon=1e-8,
# 		use_locking=False,
# 		name="Adam"):
# 		r"""Construct a new Adam optimizer.
# 		Initialization:
# 		$$m_0 := 0 \text{(Initialize initial 1st moment vector)}$$
# 		$$v_0 := 0 \text{(Initialize initial 2nd moment vector)}$$
# 		$$t := 0 \text{(Initialize timestep)}$$
# 		The update rule for `variable` with gradient `g` uses an optimization
# 		described at the end of section 2 of the paper:
# 		$$t := t + 1$$
# 		$$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$
# 		$$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
# 		$$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
# 		$$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$
# 		The default value of 1e-8 for epsilon might not be a good default in
# 		general. For example, when training an Inception network on ImageNet a
# 		current good choice is 1.0 or 0.1. Note that since AdamOptimizer uses the
# 		formulation just before Section 2.1 of the Kingma and Ba paper rather than
# 		the formulation in Algorithm 1, the "epsilon" referred to here is "epsilon
# 		hat" in the paper.
# 		The sparse implementation of this algorithm (used when the gradient is an
# 		IndexedSlices object, typically because of `tf.gather` or an embedding
# 		lookup in the forward pass) does apply momentum to variable slices even if
# 		they were not used in the forward pass (meaning they have a gradient equal
# 		to zero). Momentum decay (beta1) is also applied to the entire momentum
# 		accumulator. This means that the sparse behavior is equivalent to the dense
# 		behavior (in contrast to some momentum implementations which ignore momentum
# 		unless a variable slice was actually used).
# 		Args:
# 			learning_rate: A Tensor or a floating point value.  The learning rate.
# 			beta1: A float value or a constant float tensor. The exponential decay
# 				rate for the 1st moment estimates.
# 			beta2: A float value or a constant float tensor. The exponential decay
# 				rate for the 2nd moment estimates.
# 			epsilon: A small constant for numerical stability. This epsilon is
# 				"epsilon hat" in the Kingma and Ba paper (in the formula just before
# 				Section 2.1), not the epsilon in Algorithm 1 of the paper.
# 			use_locking: If True use locks for update operations.
# 			name: Optional name for the operations created when applying gradients.
# 				Defaults to "Adam".  @compatibility(eager) When eager execution is
# 				enabled, `learning_rate`, `beta1`, `beta2`, and `epsilon` can each be a
# 				callable that takes no arguments and returns the actual value to use.
# 				This can be useful for changing these values across different
# 				invocations of optimizer functions. @end_compatibility
# 		"""
# 		super(MyAdamOptimizer, self).__init__(use_locking, name)
# 		self._lr = learning_rate
# 		self._beta1 = beta1
# 		self._beta2 = beta2
# 		self._epsilon = epsilon

# 		# Tensor versions of the constructor arguments, created in _prepare().
# 		self._lr_t = None
# 		self._beta1_t = None
# 		self._beta2_t = None
# 		self._epsilon_t = None

# 	def _get_beta_accumulators(self):
# 		with ops.init_scope():
# 			if context.executing_eagerly():
# 				graph = None
# 			else:
# 				graph = ops.get_default_graph()
# 			return (self._get_non_slot_variable("beta1_power", graph=graph),
# 							self._get_non_slot_variable("beta2_power", graph=graph))

# 	def _create_slots(self, var_list):
# 		# Create the beta1 and beta2 accumulators on the same device as the first
# 		# variable. Sort the var_list to make sure this device is consistent across
# 		# workers (these need to go on the same PS, otherwise some updates are
# 		# silently ignored).
# 		first_var = min(var_list, key=lambda x: x.name)
# 		self._create_non_slot_variable(
# 				initial_value=self._beta1, name="beta1_power", colocate_with=first_var)
# 		self._create_non_slot_variable(
# 				initial_value=self._beta2, name="beta2_power", colocate_with=first_var)

# 		# Create slots for the first and second moments.
# 		for v in var_list:
# 			self._zeros_slot(v, "m", self._name)
# 			self._zeros_slot(v, "v", self._name)

# 	def _prepare(self):
# 		lr = self._call_if_callable(self._lr)
# 		beta1 = self._call_if_callable(self._beta1)
# 		beta2 = self._call_if_callable(self._beta2)
# 		epsilon = self._call_if_callable(self._epsilon)

# 		self._lr_t = ops.convert_to_tensor(lr, name="learning_rate")
# 		self._beta1_t = ops.convert_to_tensor(beta1, name="beta1")
# 		self._beta2_t = ops.convert_to_tensor(beta2, name="beta2")
# 		self._epsilon_t = ops.convert_to_tensor(epsilon, name="epsilon")

# 	def _apply_dense(self, grad, var):
# 		assert False
# 		m = self.get_slot(var, "m")
# 		v = self.get_slot(var, "v")
# 		beta1_power, beta2_power = self._get_beta_accumulators()
# 		return training_ops.apply_adam(
# 				var,
# 				m,
# 				v,
# 				math_ops.cast(beta1_power, var.dtype.base_dtype),
# 				math_ops.cast(beta2_power, var.dtype.base_dtype),
# 				math_ops.cast(self._lr_t, var.dtype.base_dtype),
# 				math_ops.cast(self._beta1_t, var.dtype.base_dtype),
# 				math_ops.cast(self._beta2_t, var.dtype.base_dtype),
# 				math_ops.cast(self._epsilon_t, var.dtype.base_dtype),
# 				grad,
# 				use_locking=self._use_locking).op

# 	def _resource_apply_dense(self, grad, var):
# 		assert False
# 		m = self.get_slot(var, "m")
# 		v = self.get_slot(var, "v")
# 		beta1_power, beta2_power = self._get_beta_accumulators()
# 		return training_ops.resource_apply_adam(
# 				var.handle,
# 				m.handle,
# 				v.handle,
# 				math_ops.cast(beta1_power, grad.dtype.base_dtype),
# 				math_ops.cast(beta2_power, grad.dtype.base_dtype),
# 				math_ops.cast(self._lr_t, grad.dtype.base_dtype),
# 				math_ops.cast(self._beta1_t, grad.dtype.base_dtype),
# 				math_ops.cast(self._beta2_t, grad.dtype.base_dtype),
# 				math_ops.cast(self._epsilon_t, grad.dtype.base_dtype),
# 				grad,
# 				use_locking=self._use_locking)

# 	def _apply_sparse_shared(self, grad, var, indices,
# 		scatter_add):
# 		beta1_power, beta2_power = self._get_beta_accumulators()
# 		beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
# 		beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
# 		lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
# 		beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
# 		beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
# 		epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
# 		lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))

# 		grad = tf.verify_tensor_all_finite(grad, "fail in grad")

# 		# if "hyperbolic" in var.name:
# 		# 		grad = K.concatenate([grad[:,:-1], -grad[:,-1:]],
# 		# 			axis=-1)

# 		# m_t = beta1 * m + (1 - beta1) * g_t
# 		m = self.get_slot(var, "m")
# 		m_scaled_g_values = grad * (1 - beta1_t)
# 		m_t = state_ops.assign(m, m * beta1_t,
# 			use_locking=self._use_locking)
# 		with ops.control_dependencies([m_t]):
# 			m_t = scatter_add(m, indices, m_scaled_g_values)

# 		# v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
# 		v = self.get_slot(var, "v")
# 		v_scaled_g_values = (grad * grad) * (1 - beta2_t)
# 		v_t = state_ops.assign(v, v * beta2_t,
# 			use_locking=self._use_locking)
# 		with ops.control_dependencies([v_t]):
# 			v_t = scatter_add(v, indices, v_scaled_g_values)
# 		v_sqrt = math_ops.sqrt(K.maximum(v_t, 0.))

# 		if "hyperbolic" in var.name:

# 			m_t = tf.verify_tensor_all_finite(m_t, "fail in m_t")
# 			v_sqrt = tf.verify_tensor_all_finite(v_sqrt,
# 				"fail in v_sqrt")

# 			gr = m_t / (v_sqrt + epsilon_t)

# 			gr = tf.verify_tensor_all_finite(gr, "fail in gr")

# 			gr = K.concatenate(
# 				[gr[...,:-1], -gr[...,-1:]],
# 				axis=-1)
# 			gr_tangent = project_onto_tangent_space(var, gr)

# 			gr_tangent = tf.verify_tensor_all_finite(gr_tangent,
# 				"fail in tangent")

# 			exp_map = exponential_mapping(var, -lr * gr_tangent)

# 			exp_map = tf.verify_tensor_all_finite(exp_map,
# 				"fail in exp_map")

# 			var_update = state_ops.assign(
# 				var,
# 				exp_map,
# 				use_locking=self._use_locking)
# 		else:
# 			var_update = state_ops.assign_sub(
# 				var,
# 				lr * m_t / (v_sqrt + epsilon_t),
# 				use_locking=self._use_locking)
# 		return control_flow_ops.group(*[var_update, m_t, v_t])

# 	def _apply_sparse(self, grad, var):
# 		return self._apply_sparse_shared(
# 				grad.values,
# 				var,
# 				grad.indices,
# 				lambda x, i, v: state_ops.scatter_add(
# 						x,
# 						i,
# 						v,
# 						use_locking=self._use_locking))

# 	def _resource_scatter_add(self, x, i, v):
# 		with ops.control_dependencies(
# 				[resource_variable_ops.resource_scatter_add(x.handle, i, v)]):
# 			return x.value()

# 	def _resource_apply_sparse(self, grad, var, indices):
# 		return self._apply_sparse_shared(grad, var, indices,
# 																		 self._resource_scatter_add)

# 	def _finish(self, update_ops, name_scope):
# 		# Update the power accumulators.
# 		with ops.control_dependencies(update_ops):
# 				beta1_power, beta2_power = self._get_beta_accumulators()
# 		with ops.colocate_with(beta1_power):
# 				update_beta1 = beta1_power.assign(
# 						beta1_power * self._beta1_t, use_locking=self._use_locking)
# 				update_beta2 = beta2_power.assign(
# 						beta2_power * self._beta2_t, use_locking=self._use_locking)
# 		return control_flow_ops.group(
# 				*update_ops + [update_beta1, update_beta2], name=name_scope)
Esempio n. 28
0
    likelihoods = [Param(0.01, transforms.positive, name="gaussian_noise_{}".format(n)) for n in range(N)]

model = LKM(data, additive_kernels, likelihoods)

gp_train_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="gp_hyperparameters")

ibp_train_vars = list(set(tf.global_variables()) - set(gp_train_vars))
update_tau = model.closed_form_update_tau()
elbo = model.build_marginal_loglikelihood()

z, nll_gp_refined = model.refine()

t_test, K, K_star, K_star_star, noise = model.prepare_for_postprocess()

# train IBP parameters with Adam
adam = AdamOptimizer(0.01)
# train_ibp = adam.minimize(-elbo, var_list=ibp_train_vars)
train_ibp = adam.minimize(-elbo, var_list=ibp_train_vars)

train_gp = ScipyOptimizerInterface(-elbo,
                                   var_list=gp_train_vars,
                                   method='L-BFGS-B',
                                   options={"maxiter": 10})

# refined train
train_gp_refine = ScipyOptimizerInterface(nll_gp_refined,
                                          var_list=gp_train_vars,
                                          method='L-BFGS-B',
                                          options={"maxiter": 300}
                                          )