def compute_output(self, X, Y, keep_prob=cfg.keep_prob, regularization_scale=cfg.regularization_scale): print("Size of input:") print(X.get_shape()) # 1. Convolve the input image up to the digit capsules. digit_caps = self._image_to_digitcaps(X) # 2. Get the margin loss margin_loss = u.margin_loss(digit_caps, Y) # 3. Reconstruct the images reconstructed_image, reconstruction_1, reconstruction_2 = self._digitcaps_to_image( digit_caps, Y) # 4. Get the reconstruction loss reconstruction_loss = u.reconstruction_loss(reconstructed_image, X) # 5. Get the total loss total_loss = margin_loss + regularization_scale * reconstruction_loss # 6. Get the batch accuracy batch_accuracy = u.acc(digit_caps, Y) # 7. Reconstruct all possible images memo = self._digitcaps_to_memo(X, digit_caps) # 8. Get the memo capsules memo_caps = self._memo_to_digitcaps(memo, keep_prob=keep_prob) # 9. Get the memo margin loss memo_margin_loss = u.margin_loss(memo_caps, Y) # 10. Get the memo accuracy memo_accuracy = u.acc(memo_caps, Y) # 11. Return all of the losses and reconstructions return (total_loss, margin_loss, reconstruction_loss, reconstructed_image, reconstruction_1, reconstruction_2, batch_accuracy, memo, memo_margin_loss, memo_accuracy)
slots_reshape_onehot = tf.one_hot(slots_reshape, len(slot_vocab['vocab']) - 2) # [16*18, 74] crossent = tf.nn.softmax_cross_entropy_with_logits_v2( labels=slots_reshape_onehot, logits=slot_outputs) crossent = tf.reshape(crossent, slots_shape) slot_loss = tf.reduce_sum(crossent * slot_weights, 1) total_size = tf.reduce_sum(slot_weights, 1) total_size += 1e-12 slot_loss = slot_loss / total_size # Define intent loss with tf.variable_scope('intent_loss'): intent_onehot = tf.one_hot(intent, len(intent_vocab['vocab'])) marginloss = margin_loss(labels=intent_onehot, raw_logits=intent_outputs_norm, margin=arg.margin, downweight=arg.downweight) intent_loss = tf.reduce_mean(marginloss, axis=-1) # Specify the learning environment params = tf.trainable_variables() slot_params = [] for p in params: if 'slot' in p.name or 'embedding' in p.name: slot_params.append(p) intent_params = tf.trainable_variables() for p in params: if 'intent' in p.name: intent_params.append(p) gradients_slot = tf.gradients(slot_loss, slot_params)
def caps_model_fn(features, labels, mode): hooks = [] train_log_dict = {} """Model function for CNN.""" # Input Layer # Reshape X to 4-D tensor: [batch_size, width, height, channels] # Fashion MNIST images are 28x28 pixels, and have one color channel input_layer = tf.reshape(features["x"], [-1, 28, 28, 1]) # A little bit cheaper version of the capsule network in: Dynamic Routing Between Capsules # Std. convolutional layer conv1 = tf.layers.conv2d(inputs=input_layer, filters=256, kernel_size=[9, 9], padding="valid", activation=tf.nn.relu, name="ReLU_Conv1") conv1 = tf.expand_dims(conv1, axis=-2) # Convolutional capsules, no routing as the dimension of the units of previous layer is one primarycaps = caps.conv2d(conv1, 32, 8, [9, 9], strides=(2, 2), name="PrimaryCaps") primarycaps = tf.reshape( primarycaps, [-1, primarycaps.shape[1].value * primarycaps.shape[2].value * 32, 8]) # Fully connected capsules with routing by agreement digitcaps = caps.dense(primarycaps, 10, 16, iter_routing=iter_routing, learn_coupling=learn_coupling, mapfn_parallel_iterations=mapfn_parallel_iterations, name="DigitCaps") # The length of the capsule activation vectors encodes the probability of an entity being present lengths = tf.sqrt(tf.reduce_sum(tf.square(digitcaps), axis=2) + epsilon, name="Lengths") # Predictions for (PREDICTION mode) predictions = { # Generate predictions (for PREDICT and EVAL mode) "classes": tf.argmax(lengths, axis=1), "probabilities": tf.nn.softmax(lengths, name="Softmax") } if regularization: masked_digitcaps_pred = mask_one(digitcaps, lengths, is_predicting=True) with tf.variable_scope(tf.get_variable_scope()): reconstruction_pred = decoder_nn(masked_digitcaps_pred) predictions["reconstruction"] = reconstruction_pred if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Calculate Loss (for both TRAIN and EVAL modes) onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10) m_loss = margin_loss(onehot_labels, lengths) train_log_dict["margin loss"] = m_loss tf.summary.scalar("margin_loss", m_loss) if regularization: masked_digitcaps = mask_one(digitcaps, onehot_labels) with tf.variable_scope(tf.get_variable_scope(), reuse=True): reconstruction = decoder_nn(masked_digitcaps) rec_loss = reconstruction_loss(input_layer, reconstruction) train_log_dict["reconstruction loss"] = rec_loss tf.summary.scalar("reconstruction_loss", rec_loss) loss = m_loss + lambda_reg * rec_loss else: loss = m_loss # Configure the Training Op (for TRAIN mode) if mode == tf.estimator.ModeKeys.TRAIN: # Logging hook train_log_dict["accuracy"] = tf.metrics.accuracy( labels=labels, predictions=predictions["classes"])[1] logging_hook = tf.train.LoggingTensorHook( train_log_dict, every_n_iter=config.save_summary_steps) # Summary hook summary_hook = tf.train.SummarySaverHook( save_steps=config.save_summary_steps, output_dir=model_dir, summary_op=tf.summary.merge_all()) hooks += [logging_hook, summary_hook] global_step = tf.train.get_or_create_global_step() learning_rate = tf.train.exponential_decay(start_lr, global_step, decay_steps, decay_rate) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op, training_hooks=hooks) # Add evaluation metrics (for EVAL mode) eval_metric_ops = { "accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"]) } return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def train(train_iter, dev_iter, model, args): if args.cuda: model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) steps = 0 model.train() epc = 0 print(args.epochs) epoch_tot_acc = 0 for epoch in range(1, args.epochs + 1): epc += 1 c = 0 for batch in train_iter: c += 1 #print(c) #print('cccccccccccccccccccccccccccccccccccccccccccccccccccccccccc') #print (batch.text) #print('kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk') #print(batch.text.size()) #print('ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd') feature, target = batch.text, batch.label feature.data.t_(), target.data.sub_(1) # batch first, index align if args.cuda: feature, target = feature.cuda(), target.cuda() batch_size = len(feature) #print('-------------------target-----------------') #print(type(target)) #print(target) target = target.cpu() d = target.data.numpy() d_t = torch.from_numpy(d) labels = d_t #print('-----------------target one hot-------------') target_one_hot = utils.one_hot_encode(d_t, length=2) assert target_one_hot.size() == torch.Size([batch_size, 2]) #print(type(target_one_hot)) #print(target_one_hot) optimizer.zero_grad() logit = model(feature) out_digit_caps = logit #print('out_digit_caps') #print(out_digit_caps) target = Variable(target_one_hot) margin_loss = utils.margin_loss(out_digit_caps, target) loss = margin_loss #print(type(loss)) #print('margin loss: ', margin_loss) loss = torch.mean(margin_loss, 0) print('mean loss: ', loss.data) #print(type(loss)) #print('logit vector', logit.size(),logit) #print('target vector', target.size(),target) #loss = F.cross_entropy(logit, target) #print('loss') #print(loss) loss.backward() optimizer.step() acc = utils.accuracy(out_digit_caps, labels, False) epoch_tot_acc += acc epoch_avg_acc = epoch_tot_acc / epc print('epc: ', epc) print('c: ', c) print('acc: ', acc) print('epoch_avg_acc', epoch_avg_acc) '''
z_dim = 100 max_epochs = 10000 d_step = 50 g_step = 50 # mnist image data x_placeholder = tf.placeholder("float", shape=[batch_size, 28, 28, 1], name="x_placeholder") Gz = generator(batch_size, z_dim) Dx = capsule_discriminator(x_placeholder) Dg = capsule_discriminator(Gz) # loss function g_loss = margin_loss(1, Dg) d_loss_real = margin_loss(1, Dx) d_loss_fake = margin_loss(0, Dg) d_loss = d_loss_real + d_loss_fake thetas = tf.trainable_variables() theta_d = [var for var in thetes if 'd_' in var.name] theta_g = [var for var in thetas if 'g_' in var.name] d_solver = tf.train.AdamOptimizer(learning_rate=lr).minimize(d_loss, var_list=theta_d) g_solver = tf.train.AdamOptimizer(learning_rate=lr).minimize(g_loss, var_list=theta_g) saver = tf.train.Saver()