class SmallModel(tf.keras.Model): def __init__(self): super(SmallModel, self).__init__() # Optimizer self.optimizer = tf.keras.optimizers.SGD( learning_rate=hp.learning_rate, momentum=hp.momentum) # Define Model Layers (Yes I know this is not efficient... F**k you.) # First Basic-Conv Block self.small_conv1 = Conv2D(filters=64, kernel_size=3, strides=1, padding='same', activation=None, name="small_conv1") self.small_bn1 = BatchNormalization(name="small_bn1") self.small_relu1 = ReLU(name="small_relu1") # Second Basic-Conv Block self.small_conv2 = Conv2D(filters=64, kernel_size=3, strides=1, padding='same', activation=None, name="small_conv2") self.small_bn2 = BatchNormalization(name="small_bn2") self.small_relu2 = ReLU(name="small_relu2") # Classification Part self.small_class_conv1 = Conv2D(filters=128, kernel_size=3, strides=2, padding='same', name="small_class_conv1") self.small_class_conv2 = Conv2D(filters=128, kernel_size=3, strides=2, padding='same', name="small_class_conv2") self.small_class_flatten = Flatten(name="small_class_flatten") self.small_class_dense = Dense(units=10, activation='softmax') def call(self, inputs, training=False): """ The call function is inherited from Model. It defines the behaviour of the model. In this function we will connect the layers we defined in __init__ together. Please review Connection Scheme and observe naming conventions. :param inputs: these are the images that are passed in shape (batches, height, width, channels) :param training: BOOL this is a MODEL param that indicates if we are training or testing... I'm still trying to figure this out... :return: stuff (softmax class probabilities in this case) """ # Connect First Small Conv Block small_conv1 = self.small_conv1.apply(inputs) small_bn1 = self.small_bn1.apply(small_conv1) small_relu1 = self.small_relu1.apply(small_bn1) # Connect Second Small Conv Block small_conv2 = self.small_conv2.apply(small_relu1) small_bn2 = self.small_bn2.apply(small_conv2) small_relu2 = self.small_relu2.apply(small_bn2) # Connect Small Class Block small_class_conv1 = self.small_class_conv1.apply(small_relu2) small_class_conv2 = self.small_class_conv2.apply(small_class_conv1) small_class_flatten = self.small_class_flatten.apply(small_class_conv2) small_class_dense = self.small_class_dense.apply(small_class_flatten) # if training: # output = small_class_dense # else: # #pred = np.argmax(small_class_dense) # #conf = np.max(small_class_dense) # #output = [pred, conf] return small_class_dense @staticmethod def loss_fn(labels, predictions): """ Loss function for model. """ return tf.keras.losses.sparse_categorical_crossentropy( labels, predictions, from_logits=False)
class NVDM: def __init__(self, sess, train_data, test_data, num_classes, num_samples, batch_size, max_seq_len, initial_lr, decay_rate, decay_step, hidden_dim, latent_dim, epochs, checkpoint_dir, vocab_size): self.sess = sess self.train_data = train_data self.test_data = test_data self.num_classes = num_classes self.num_samples = num_samples self.batch_size = batch_size self.max_seq_len = max_seq_len self.initial_lr = initial_lr self.decay_rate = decay_rate self.decay_step = decay_step self.hidden_dim = hidden_dim self.latent_dim = latent_dim self.epochs = epochs self.checkpoint_dir = checkpoint_dir self.vocab_size = vocab_size self.global_step = tf.Variable(0, trainable=False) self.build_model() def build_model(self): self.build_inputs() self.build_encoder() self.build_latent() self.build_posterior() self.build_decoder() self.build_loss() self.build_training_step() def build_inputs(self): train_dataset = tf.data.Dataset().from_tensor_slices(self.train_data) train_dataset = train_dataset.batch(self.batch_size, drop_remainder=True) train_dataset = train_dataset.prefetch(1) val_dataset = tf.data.Dataset().from_tensor_slices(self.test_data) val_dataset = val_dataset.batch(self.batch_size, drop_remainder=True) val_dataset = val_dataset.prefetch(1) iterator = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes) # This is an op that gets the next element from the iterator self.bow = iterator.get_next() self.batch_word_count = tf.reduce_sum(tf.reduce_sum(self.bow, -1), -1) # These ops let us switch and reinitialize every time we finish an epoch self.training_init_op = iterator.make_initializer(train_dataset) self.validation_init_op = iterator.make_initializer(val_dataset) def build_encoder(self): with tf.variable_scope("encoder"): self.dense1 = Dense(units=self.hidden_dim, activation=tf.nn.relu).apply(self.bow) self.dense2 = Dense(units=self.hidden_dim, activation=tf.nn.relu).apply(self.dense1) def build_latent(self): with tf.variable_scope("latent"): self.mu = Dense(units=self.latent_dim).apply(self.dense2) self.log_sigma_sq = Dense(units=self.latent_dim).apply(self.dense2) self.sigma_sq = tf.exp(self.log_sigma_sq) def build_posterior(self): with tf.variable_scope("posterior"): self.posterior = [] for i in range(self.num_samples): epsilon = tf.random_normal([self.batch_size, self.latent_dim]) self.posterior.append(self.mu + epsilon * self.sigma_sq) def build_decoder(self): with tf.variable_scope("decoder"): self.logits = [] self.dense3 = Dense(units=self.vocab_size) for i in range(self.num_samples): self.logits.append(self.dense3.apply(self.posterior[i])) def build_loss(self): self.build_neg_log_likelihood_loss() self.build_kl_loss() self.loss = self.neg_log_likelihood_loss + self.kl_loss def build_neg_log_likelihood_loss(self): self.neg_log_likelihood_loss = 0.0 for i in range(self.num_samples): log_softmax = tf.nn.log_softmax(self.logits[i]) self.neg_log_likelihood_loss += -tf.reduce_sum( log_softmax * self.bow, 1) / self.num_samples self.neg_log_likelihood_loss = tf.reduce_sum( self.neg_log_likelihood_loss, axis=0) def build_kl_loss(self): self.kl_loss = 0.5 * tf.reduce_sum(tf.square(self.mu) + tf.exp( self.log_sigma_sq) - self.log_sigma_sq - 1, axis=1) self.kl_loss = tf.reduce_sum(self.kl_loss, axis=0) def build_training_step(self): self.lr = tf.train.exponential_decay(self.initial_lr, self.global_step, self.decay_step, self.decay_rate, staircase=True, name="lr") optimizer = tf.train.AdamOptimizer(self.lr) # Gradient Clipping gradients = optimizer.compute_gradients( self.loss, var_list=tf.trainable_variables()) capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in gradients if grad is not None] self.train_op = optimizer.apply_gradients(capped_gradients) def train(self): self.sess.run(tf.global_variables_initializer()) for epoch in range(1000): # Initialize the iterator to consume training data self.sess.run(self.training_init_op) train_loss = 0 perplexity = 0 iter = 0 while True: # As long as the iterator is not empty try: _, loss, kl, log = self.sess.run([ self.train_op, self.loss, self.kl_loss, self.neg_log_likelihood_loss ]) iter += 1 train_loss += loss # print(kl, log) except tf.errors.OutOfRangeError: train_loss /= iter break # We'll store the losses from each batch to get an average iter = 0 test_loss = 0 log_loss = 0 word_count = 0 doc_count = 0 batch_perplexity = 0 for i in range(20): # Intiialize the iterator to provide validation data self.sess.run(self.validation_init_op) while True: # As long as the iterator is not empty iter += 1 try: loss, batch_log_loss, batch_word_count = self.sess.run( [ self.loss, self.neg_log_likelihood_loss, self.batch_word_count ]) test_loss += loss log_loss += self.batch_size word_count += batch_word_count doc_count += self.batch_size batch_perplexity += batch_log_loss * self.batch_size / batch_word_count except tf.errors.OutOfRangeError: break test_loss = test_loss / iter perplexity = np.exp(batch_perplexity / doc_count) print("epoch_{}, train_loss = {}, test_loss = {}, perplexity = {}". format(epoch, train_loss, test_loss, perplexity))
class MedModel(tf.keras.Model): def __init__(self): super(MedModel, self).__init__() # Optimizer self.optimizer = tf.keras.optimizers.SGD( learning_rate=hp.learning_rate, momentum=hp.momentum) # Load instance of small model .h5 (get_appropriate layer and those weight) small_model = SmallModel() small_model(tf.keras.Input(shape=(8, 8, 3))) print(os.getcwd()) small_model.load_weights("./models/small_weights.h5") self.small_model = small_model initializer = tf.keras.initializers.Ones() # Define Model Layers # First Conv Block self.med_conv1 = Conv2D(filters=64, kernel_size=3, strides=1, padding='SAME', activation=None, name="med_conv1") self.upsamp_small_filters_conv1 = Conv2D( filters=64, kernel_size=3, kernel_initializer=self.small_conv1_init, padding='SAME', name='upsamp_small_filters_conv1', trainable=False) self.comb_tensors1 = Concatenate(axis=3, name="med_concat1") self.med_bn1 = BatchNormalization(name="med_bn1") self.med_relu1 = ReLU(name="med_relu1") # Second Conv Block self.med_conv2 = Conv2D(filters=64, kernel_size=3, strides=1, padding='SAME', activation=None, name="med_conv2") self.down_med_relu1 = Conv2D(filters=64, kernel_size=1, padding='SAME', activation=None, kernel_initializer=initializer, name="reduce_filters", trainable=False) self.upsamp_small_filters_conv2 = Conv2D( filters=64, kernel_size=3, kernel_initializer=self.small_conv2_init, padding='SAME', name='upsamp_small_filters_conv2', trainable=False) self.comb_tensors2 = Concatenate(axis=3, name="med_concat2") self.med_bn2 = BatchNormalization(name="med_bn2") self.med_relu2 = ReLU(name="med_relu2") # Third Conv Block self.med_conv3 = Conv2D(filters=64, kernel_size=3, strides=1, padding='SAME', activation=None, name="med_conv3") self.med_bn3 = BatchNormalization(name="med_bn3") self.med_relu3 = ReLU(name="med_relu3") # Fourth Conv Block self.med_conv4 = Conv2D(filters=64, kernel_size=3, strides=1, padding='SAME', activation=None, name="med_conv4") self.med_bn4 = BatchNormalization(name="med_bn4") self.med_relu4 = ReLU(name="med_relu4") # Classification Part self.med_class_conv1 = Conv2D(filters=128, kernel_size=3, strides=2, padding='same', name="med_class_conv1") self.med_class_conv2 = Conv2D(filters=128, kernel_size=3, strides=2, padding='same', name="med_class_conv2") self.med_class_flatten = Flatten(name="med_class_flatten") self.med_class_dense = Dense(units=10, activation='softmax') # This function returns small_conv1_filters def small_conv1_init(self, shape, dtype=None): small_conv1_filters, biases = self.small_model.get_layer( "small_conv1").get_weights() return small_conv1_filters # This function returns small_conv2_filters def small_conv2_init(self, shape, dtype=None): small_conv2_filters, biases = self.small_model.get_layer( "small_conv2").get_weights() return small_conv2_filters def call(self, inputs, training=False): """ The call function is inherited from Model. It defines the behaviour of the model. In this function we will connect the layers we defined in __init__ together. Please review Connection Scheme and observe naming conventions. :param inputs: these are the images that are passed in shape (batches, height, width, channels) :param training: BOOL this is a MODEL param that indicates if we are training or testing... I'm still trying to figure this out... :return: stuff (softmax class probabilities in this case) """ # Connect First Med Conv Block med_conv1 = self.med_conv1.apply(inputs) upsamp_small_filters_conv1 = self.upsamp_small_filters_conv1.apply( inputs) comb_tensors1 = self.comb_tensors1.apply( [med_conv1, upsamp_small_filters_conv1]) med_bn1 = self.med_bn1.apply(comb_tensors1) med_relu1 = self.med_relu1.apply(med_bn1) # Connect Second Med Conv Block med_conv2 = self.med_conv2.apply(med_relu1) down_samp_relu1 = self.down_med_relu1.apply(med_relu1) upsamp_small_filters_conv2 = self.upsamp_small_filters_conv2.apply( down_samp_relu1) comb_tensors2 = self.comb_tensors2.apply( [med_conv2, upsamp_small_filters_conv2]) med_bn2 = self.med_bn2.apply(comb_tensors2) med_relu2 = self.med_relu2.apply(med_bn2) # Connect Third Med Conv Block med_conv3 = self.med_conv3.apply(med_relu2) med_bn3 = self.med_bn3.apply(med_conv3) med_relu3 = self.med_relu3.apply(med_bn3) # Connect Fourth Med Conv Block med_conv4 = self.med_conv4.apply(med_relu3) med_bn4 = self.med_bn4.apply(med_conv4) med_relu4 = self.med_relu4.apply(med_bn4) # Connect Small Class Block med_class_conv1 = self.med_class_conv1.apply(med_relu4) med_class_conv2 = self.med_class_conv2.apply(med_class_conv1) med_class_flatten = self.med_class_flatten.apply(med_class_conv2) med_class_dense = self.med_class_dense.apply(med_class_flatten) # if training: # output = med_class_dense # else: # #pred = np.argmax(med_class_dense) # #conf = np.max(med_class_dense) # #output = [pred, conf] return med_class_dense @staticmethod def loss_fn(labels, predictions): """ Loss function for model. """ return tf.keras.losses.sparse_categorical_crossentropy( labels, predictions, from_logits=False)