Beispiel #1
0
class SmallModel(tf.keras.Model):
    def __init__(self):
        super(SmallModel, self).__init__()

        # Optimizer
        self.optimizer = tf.keras.optimizers.SGD(
            learning_rate=hp.learning_rate, momentum=hp.momentum)

        # Define Model Layers (Yes I know this is not efficient... F**k you.)
        # First Basic-Conv Block
        self.small_conv1 = Conv2D(filters=64,
                                  kernel_size=3,
                                  strides=1,
                                  padding='same',
                                  activation=None,
                                  name="small_conv1")
        self.small_bn1 = BatchNormalization(name="small_bn1")
        self.small_relu1 = ReLU(name="small_relu1")

        # Second Basic-Conv Block
        self.small_conv2 = Conv2D(filters=64,
                                  kernel_size=3,
                                  strides=1,
                                  padding='same',
                                  activation=None,
                                  name="small_conv2")
        self.small_bn2 = BatchNormalization(name="small_bn2")
        self.small_relu2 = ReLU(name="small_relu2")

        # Classification Part
        self.small_class_conv1 = Conv2D(filters=128,
                                        kernel_size=3,
                                        strides=2,
                                        padding='same',
                                        name="small_class_conv1")
        self.small_class_conv2 = Conv2D(filters=128,
                                        kernel_size=3,
                                        strides=2,
                                        padding='same',
                                        name="small_class_conv2")
        self.small_class_flatten = Flatten(name="small_class_flatten")
        self.small_class_dense = Dense(units=10, activation='softmax')

    def call(self, inputs, training=False):
        """
        The call function is inherited from Model. It defines the behaviour of the model.
        In this function we will connect the layers we defined in __init__ together.
        Please review Connection Scheme and observe naming conventions.

        :param inputs: these are the images that are passed in shape (batches, height, width, channels)
        :param training: BOOL this is a MODEL param that indicates if we are training or testing... I'm still trying to figure this out...
        :return: stuff (softmax class probabilities in this case)
        """

        # Connect First Small Conv Block
        small_conv1 = self.small_conv1.apply(inputs)
        small_bn1 = self.small_bn1.apply(small_conv1)
        small_relu1 = self.small_relu1.apply(small_bn1)

        # Connect Second Small Conv Block
        small_conv2 = self.small_conv2.apply(small_relu1)
        small_bn2 = self.small_bn2.apply(small_conv2)
        small_relu2 = self.small_relu2.apply(small_bn2)

        # Connect Small Class Block
        small_class_conv1 = self.small_class_conv1.apply(small_relu2)
        small_class_conv2 = self.small_class_conv2.apply(small_class_conv1)
        small_class_flatten = self.small_class_flatten.apply(small_class_conv2)
        small_class_dense = self.small_class_dense.apply(small_class_flatten)

        # if training:
        #     output = small_class_dense
        # else:
        #     #pred = np.argmax(small_class_dense)
        #     #conf = np.max(small_class_dense)
        #     #output = [pred, conf]

        return small_class_dense

    @staticmethod
    def loss_fn(labels, predictions):
        """ Loss function for model. """

        return tf.keras.losses.sparse_categorical_crossentropy(
            labels, predictions, from_logits=False)
Beispiel #2
0
class NVDM:
    def __init__(self, sess, train_data, test_data, num_classes, num_samples,
                 batch_size, max_seq_len, initial_lr, decay_rate, decay_step,
                 hidden_dim, latent_dim, epochs, checkpoint_dir, vocab_size):

        self.sess = sess
        self.train_data = train_data
        self.test_data = test_data
        self.num_classes = num_classes
        self.num_samples = num_samples
        self.batch_size = batch_size
        self.max_seq_len = max_seq_len
        self.initial_lr = initial_lr
        self.decay_rate = decay_rate
        self.decay_step = decay_step
        self.hidden_dim = hidden_dim
        self.latent_dim = latent_dim
        self.epochs = epochs
        self.checkpoint_dir = checkpoint_dir
        self.vocab_size = vocab_size

        self.global_step = tf.Variable(0, trainable=False)

        self.build_model()

    def build_model(self):
        self.build_inputs()
        self.build_encoder()
        self.build_latent()
        self.build_posterior()
        self.build_decoder()
        self.build_loss()
        self.build_training_step()

    def build_inputs(self):
        train_dataset = tf.data.Dataset().from_tensor_slices(self.train_data)
        train_dataset = train_dataset.batch(self.batch_size,
                                            drop_remainder=True)
        train_dataset = train_dataset.prefetch(1)
        val_dataset = tf.data.Dataset().from_tensor_slices(self.test_data)
        val_dataset = val_dataset.batch(self.batch_size, drop_remainder=True)
        val_dataset = val_dataset.prefetch(1)
        iterator = tf.data.Iterator.from_structure(train_dataset.output_types,
                                                   train_dataset.output_shapes)

        # This is an op that gets the next element from the iterator
        self.bow = iterator.get_next()
        self.batch_word_count = tf.reduce_sum(tf.reduce_sum(self.bow, -1), -1)

        # These ops let us switch and reinitialize every time we finish an epoch
        self.training_init_op = iterator.make_initializer(train_dataset)
        self.validation_init_op = iterator.make_initializer(val_dataset)

    def build_encoder(self):
        with tf.variable_scope("encoder"):
            self.dense1 = Dense(units=self.hidden_dim,
                                activation=tf.nn.relu).apply(self.bow)

            self.dense2 = Dense(units=self.hidden_dim,
                                activation=tf.nn.relu).apply(self.dense1)

    def build_latent(self):
        with tf.variable_scope("latent"):
            self.mu = Dense(units=self.latent_dim).apply(self.dense2)

            self.log_sigma_sq = Dense(units=self.latent_dim).apply(self.dense2)

            self.sigma_sq = tf.exp(self.log_sigma_sq)

    def build_posterior(self):
        with tf.variable_scope("posterior"):
            self.posterior = []
            for i in range(self.num_samples):
                epsilon = tf.random_normal([self.batch_size, self.latent_dim])
                self.posterior.append(self.mu + epsilon * self.sigma_sq)

    def build_decoder(self):
        with tf.variable_scope("decoder"):
            self.logits = []
            self.dense3 = Dense(units=self.vocab_size)
            for i in range(self.num_samples):
                self.logits.append(self.dense3.apply(self.posterior[i]))

    def build_loss(self):
        self.build_neg_log_likelihood_loss()
        self.build_kl_loss()
        self.loss = self.neg_log_likelihood_loss + self.kl_loss

    def build_neg_log_likelihood_loss(self):
        self.neg_log_likelihood_loss = 0.0
        for i in range(self.num_samples):
            log_softmax = tf.nn.log_softmax(self.logits[i])
            self.neg_log_likelihood_loss += -tf.reduce_sum(
                log_softmax * self.bow, 1) / self.num_samples
        self.neg_log_likelihood_loss = tf.reduce_sum(
            self.neg_log_likelihood_loss, axis=0)

    def build_kl_loss(self):
        self.kl_loss = 0.5 * tf.reduce_sum(tf.square(self.mu) + tf.exp(
            self.log_sigma_sq) - self.log_sigma_sq - 1,
                                           axis=1)
        self.kl_loss = tf.reduce_sum(self.kl_loss, axis=0)

    def build_training_step(self):
        self.lr = tf.train.exponential_decay(self.initial_lr,
                                             self.global_step,
                                             self.decay_step,
                                             self.decay_rate,
                                             staircase=True,
                                             name="lr")

        optimizer = tf.train.AdamOptimizer(self.lr)

        # Gradient Clipping
        gradients = optimizer.compute_gradients(
            self.loss, var_list=tf.trainable_variables())
        capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var)
                            for grad, var in gradients if grad is not None]
        self.train_op = optimizer.apply_gradients(capped_gradients)

    def train(self):
        self.sess.run(tf.global_variables_initializer())
        for epoch in range(1000):
            # Initialize the iterator to consume training data
            self.sess.run(self.training_init_op)
            train_loss = 0
            perplexity = 0
            iter = 0
            while True:
                # As long as the iterator is not empty
                try:
                    _, loss, kl, log = self.sess.run([
                        self.train_op, self.loss, self.kl_loss,
                        self.neg_log_likelihood_loss
                    ])
                    iter += 1
                    train_loss += loss
#                    print(kl, log)
                except tf.errors.OutOfRangeError:
                    train_loss /= iter
                    break

            # We'll store the losses from each batch to get an average
            iter = 0
            test_loss = 0
            log_loss = 0
            word_count = 0
            doc_count = 0
            batch_perplexity = 0
            for i in range(20):
                # Intiialize the iterator to provide validation data
                self.sess.run(self.validation_init_op)
                while True:
                    # As long as the iterator is not empty
                    iter += 1
                    try:
                        loss, batch_log_loss, batch_word_count = self.sess.run(
                            [
                                self.loss, self.neg_log_likelihood_loss,
                                self.batch_word_count
                            ])
                        test_loss += loss
                        log_loss += self.batch_size
                        word_count += batch_word_count
                        doc_count += self.batch_size
                        batch_perplexity += batch_log_loss * self.batch_size / batch_word_count
                    except tf.errors.OutOfRangeError:
                        break
            test_loss = test_loss / iter
            perplexity = np.exp(batch_perplexity / doc_count)

            print("epoch_{}, train_loss = {}, test_loss = {}, perplexity = {}".
                  format(epoch, train_loss, test_loss, perplexity))
Beispiel #3
0
class MedModel(tf.keras.Model):
    def __init__(self):
        super(MedModel, self).__init__()

        # Optimizer
        self.optimizer = tf.keras.optimizers.SGD(
            learning_rate=hp.learning_rate, momentum=hp.momentum)

        # Load instance of small model .h5 (get_appropriate layer and those weight)
        small_model = SmallModel()
        small_model(tf.keras.Input(shape=(8, 8, 3)))
        print(os.getcwd())
        small_model.load_weights("./models/small_weights.h5")
        self.small_model = small_model

        initializer = tf.keras.initializers.Ones()

        # Define Model Layers
        # First Conv Block
        self.med_conv1 = Conv2D(filters=64,
                                kernel_size=3,
                                strides=1,
                                padding='SAME',
                                activation=None,
                                name="med_conv1")
        self.upsamp_small_filters_conv1 = Conv2D(
            filters=64,
            kernel_size=3,
            kernel_initializer=self.small_conv1_init,
            padding='SAME',
            name='upsamp_small_filters_conv1',
            trainable=False)
        self.comb_tensors1 = Concatenate(axis=3, name="med_concat1")
        self.med_bn1 = BatchNormalization(name="med_bn1")
        self.med_relu1 = ReLU(name="med_relu1")

        # Second Conv Block
        self.med_conv2 = Conv2D(filters=64,
                                kernel_size=3,
                                strides=1,
                                padding='SAME',
                                activation=None,
                                name="med_conv2")
        self.down_med_relu1 = Conv2D(filters=64,
                                     kernel_size=1,
                                     padding='SAME',
                                     activation=None,
                                     kernel_initializer=initializer,
                                     name="reduce_filters",
                                     trainable=False)
        self.upsamp_small_filters_conv2 = Conv2D(
            filters=64,
            kernel_size=3,
            kernel_initializer=self.small_conv2_init,
            padding='SAME',
            name='upsamp_small_filters_conv2',
            trainable=False)
        self.comb_tensors2 = Concatenate(axis=3, name="med_concat2")
        self.med_bn2 = BatchNormalization(name="med_bn2")
        self.med_relu2 = ReLU(name="med_relu2")

        # Third Conv Block
        self.med_conv3 = Conv2D(filters=64,
                                kernel_size=3,
                                strides=1,
                                padding='SAME',
                                activation=None,
                                name="med_conv3")
        self.med_bn3 = BatchNormalization(name="med_bn3")
        self.med_relu3 = ReLU(name="med_relu3")

        # Fourth Conv Block
        self.med_conv4 = Conv2D(filters=64,
                                kernel_size=3,
                                strides=1,
                                padding='SAME',
                                activation=None,
                                name="med_conv4")
        self.med_bn4 = BatchNormalization(name="med_bn4")
        self.med_relu4 = ReLU(name="med_relu4")

        # Classification Part
        self.med_class_conv1 = Conv2D(filters=128,
                                      kernel_size=3,
                                      strides=2,
                                      padding='same',
                                      name="med_class_conv1")
        self.med_class_conv2 = Conv2D(filters=128,
                                      kernel_size=3,
                                      strides=2,
                                      padding='same',
                                      name="med_class_conv2")
        self.med_class_flatten = Flatten(name="med_class_flatten")
        self.med_class_dense = Dense(units=10, activation='softmax')

    # This function returns small_conv1_filters
    def small_conv1_init(self, shape, dtype=None):
        small_conv1_filters, biases = self.small_model.get_layer(
            "small_conv1").get_weights()
        return small_conv1_filters

    # This function returns small_conv2_filters
    def small_conv2_init(self, shape, dtype=None):
        small_conv2_filters, biases = self.small_model.get_layer(
            "small_conv2").get_weights()
        return small_conv2_filters

    def call(self, inputs, training=False):
        """
        The call function is inherited from Model. It defines the behaviour of the model.
        In this function we will connect the layers we defined in __init__ together.
        Please review Connection Scheme and observe naming conventions.

        :param inputs: these are the images that are passed in shape (batches, height, width, channels)
        :param training: BOOL this is a MODEL param that indicates if we are training or testing... I'm still trying to figure this out...
        :return: stuff (softmax class probabilities in this case)
        """

        # Connect First Med Conv Block
        med_conv1 = self.med_conv1.apply(inputs)
        upsamp_small_filters_conv1 = self.upsamp_small_filters_conv1.apply(
            inputs)
        comb_tensors1 = self.comb_tensors1.apply(
            [med_conv1, upsamp_small_filters_conv1])
        med_bn1 = self.med_bn1.apply(comb_tensors1)
        med_relu1 = self.med_relu1.apply(med_bn1)

        # Connect Second Med Conv Block
        med_conv2 = self.med_conv2.apply(med_relu1)
        down_samp_relu1 = self.down_med_relu1.apply(med_relu1)
        upsamp_small_filters_conv2 = self.upsamp_small_filters_conv2.apply(
            down_samp_relu1)
        comb_tensors2 = self.comb_tensors2.apply(
            [med_conv2, upsamp_small_filters_conv2])
        med_bn2 = self.med_bn2.apply(comb_tensors2)
        med_relu2 = self.med_relu2.apply(med_bn2)

        # Connect Third Med Conv Block
        med_conv3 = self.med_conv3.apply(med_relu2)
        med_bn3 = self.med_bn3.apply(med_conv3)
        med_relu3 = self.med_relu3.apply(med_bn3)

        # Connect Fourth Med Conv Block
        med_conv4 = self.med_conv4.apply(med_relu3)
        med_bn4 = self.med_bn4.apply(med_conv4)
        med_relu4 = self.med_relu4.apply(med_bn4)

        # Connect Small Class Block
        med_class_conv1 = self.med_class_conv1.apply(med_relu4)
        med_class_conv2 = self.med_class_conv2.apply(med_class_conv1)
        med_class_flatten = self.med_class_flatten.apply(med_class_conv2)
        med_class_dense = self.med_class_dense.apply(med_class_flatten)

        # if training:
        #     output = med_class_dense
        # else:
        #     #pred = np.argmax(med_class_dense)
        #     #conf = np.max(med_class_dense)
        #     #output = [pred, conf]

        return med_class_dense

    @staticmethod
    def loss_fn(labels, predictions):
        """ Loss function for model. """

        return tf.keras.losses.sparse_categorical_crossentropy(
            labels, predictions, from_logits=False)