Beispiel #1
0
    def compute_output(self,
                       X,
                       Y,
                       keep_prob=cfg.keep_prob,
                       regularization_scale=cfg.regularization_scale):

        print("Size of input:")
        print(X.get_shape())

        # 1. Convolve the input image up to the digit capsules.
        digit_caps = self._image_to_digitcaps(X)

        # 2. Get the margin loss
        margin_loss = u.margin_loss(digit_caps, Y)

        # 3. Reconstruct the images
        reconstructed_image, reconstruction_1, reconstruction_2 = self._digitcaps_to_image(
            digit_caps, Y)

        # 4. Get the reconstruction loss
        reconstruction_loss = u.reconstruction_loss(reconstructed_image, X)

        # 5. Get the total loss
        total_loss = margin_loss + regularization_scale * reconstruction_loss

        # 6. Get the batch accuracy
        batch_accuracy = u.acc(digit_caps, Y)

        # 7. Reconstruct all possible images
        memo = self._digitcaps_to_memo(X, digit_caps)

        # 8. Get the memo capsules
        memo_caps = self._memo_to_digitcaps(memo, keep_prob=keep_prob)

        # 9. Get the memo margin loss
        memo_margin_loss = u.margin_loss(memo_caps, Y)

        # 10. Get the memo accuracy
        memo_accuracy = u.acc(memo_caps, Y)

        # 11. Return all of the losses and reconstructions
        return (total_loss, margin_loss, reconstruction_loss,
                reconstructed_image, reconstruction_1, reconstruction_2,
                batch_accuracy, memo, memo_margin_loss, memo_accuracy)
Beispiel #2
0
    slots_reshape_onehot = tf.one_hot(slots_reshape,
                                      len(slot_vocab['vocab']) -
                                      2)  # [16*18, 74]
    crossent = tf.nn.softmax_cross_entropy_with_logits_v2(
        labels=slots_reshape_onehot, logits=slot_outputs)
    crossent = tf.reshape(crossent, slots_shape)
    slot_loss = tf.reduce_sum(crossent * slot_weights, 1)
    total_size = tf.reduce_sum(slot_weights, 1)
    total_size += 1e-12
    slot_loss = slot_loss / total_size

# Define intent loss
with tf.variable_scope('intent_loss'):
    intent_onehot = tf.one_hot(intent, len(intent_vocab['vocab']))
    marginloss = margin_loss(labels=intent_onehot,
                             raw_logits=intent_outputs_norm,
                             margin=arg.margin,
                             downweight=arg.downweight)
    intent_loss = tf.reduce_mean(marginloss, axis=-1)

# Specify the learning environment
params = tf.trainable_variables()
slot_params = []
for p in params:
    if 'slot' in p.name or 'embedding' in p.name:
        slot_params.append(p)
intent_params = tf.trainable_variables()
for p in params:
    if 'intent' in p.name:
        intent_params.append(p)

gradients_slot = tf.gradients(slot_loss, slot_params)
Beispiel #3
0
def caps_model_fn(features, labels, mode):
    hooks = []
    train_log_dict = {}
    """Model function for CNN."""
    # Input Layer
    # Reshape X to 4-D tensor: [batch_size, width, height, channels]
    # Fashion MNIST images are 28x28 pixels, and have one color channel
    input_layer = tf.reshape(features["x"], [-1, 28, 28, 1])

    # A little bit cheaper version of the capsule network in: Dynamic Routing Between Capsules
    # Std. convolutional layer
    conv1 = tf.layers.conv2d(inputs=input_layer,
                             filters=256,
                             kernel_size=[9, 9],
                             padding="valid",
                             activation=tf.nn.relu,
                             name="ReLU_Conv1")
    conv1 = tf.expand_dims(conv1, axis=-2)
    # Convolutional capsules, no routing as the dimension of the units of previous layer is one
    primarycaps = caps.conv2d(conv1,
                              32,
                              8, [9, 9],
                              strides=(2, 2),
                              name="PrimaryCaps")
    primarycaps = tf.reshape(
        primarycaps,
        [-1, primarycaps.shape[1].value * primarycaps.shape[2].value * 32, 8])
    # Fully connected capsules with routing by agreement
    digitcaps = caps.dense(primarycaps,
                           10,
                           16,
                           iter_routing=iter_routing,
                           learn_coupling=learn_coupling,
                           mapfn_parallel_iterations=mapfn_parallel_iterations,
                           name="DigitCaps")
    # The length of the capsule activation vectors encodes the probability of an entity being present
    lengths = tf.sqrt(tf.reduce_sum(tf.square(digitcaps), axis=2) + epsilon,
                      name="Lengths")

    # Predictions for (PREDICTION mode)
    predictions = {
        # Generate predictions (for PREDICT and EVAL mode)
        "classes": tf.argmax(lengths, axis=1),
        "probabilities": tf.nn.softmax(lengths, name="Softmax")
    }

    if regularization:
        masked_digitcaps_pred = mask_one(digitcaps,
                                         lengths,
                                         is_predicting=True)
        with tf.variable_scope(tf.get_variable_scope()):
            reconstruction_pred = decoder_nn(masked_digitcaps_pred)
        predictions["reconstruction"] = reconstruction_pred

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Calculate Loss (for both TRAIN and EVAL modes)
    onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10)
    m_loss = margin_loss(onehot_labels, lengths)
    train_log_dict["margin loss"] = m_loss
    tf.summary.scalar("margin_loss", m_loss)
    if regularization:
        masked_digitcaps = mask_one(digitcaps, onehot_labels)
        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            reconstruction = decoder_nn(masked_digitcaps)
        rec_loss = reconstruction_loss(input_layer, reconstruction)
        train_log_dict["reconstruction loss"] = rec_loss
        tf.summary.scalar("reconstruction_loss", rec_loss)
        loss = m_loss + lambda_reg * rec_loss
    else:
        loss = m_loss

    # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        # Logging hook
        train_log_dict["accuracy"] = tf.metrics.accuracy(
            labels=labels, predictions=predictions["classes"])[1]
        logging_hook = tf.train.LoggingTensorHook(
            train_log_dict, every_n_iter=config.save_summary_steps)
        # Summary hook
        summary_hook = tf.train.SummarySaverHook(
            save_steps=config.save_summary_steps,
            output_dir=model_dir,
            summary_op=tf.summary.merge_all())
        hooks += [logging_hook, summary_hook]
        global_step = tf.train.get_or_create_global_step()
        learning_rate = tf.train.exponential_decay(start_lr, global_step,
                                                   decay_steps, decay_rate)
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        train_op = optimizer.minimize(loss=loss,
                                      global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op,
                                          training_hooks=hooks)

    # Add evaluation metrics (for EVAL mode)
    eval_metric_ops = {
        "accuracy":
        tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])
    }
    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=loss,
                                      eval_metric_ops=eval_metric_ops)
Beispiel #4
0
def train(train_iter, dev_iter, model, args):
    if args.cuda:
        model.cuda()

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    steps = 0
    model.train()
    epc = 0
    print(args.epochs)
    epoch_tot_acc = 0
    for epoch in range(1, args.epochs + 1):
        epc += 1
        c = 0
        for batch in train_iter:
            c += 1
            #print(c)
            #print('cccccccccccccccccccccccccccccccccccccccccccccccccccccccccc')
            #print (batch.text)
            #print('kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk')
            #print(batch.text.size())
            #print('ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd')
            feature, target = batch.text, batch.label
            feature.data.t_(), target.data.sub_(1)  # batch first, index align
            if args.cuda:
                feature, target = feature.cuda(), target.cuda()
            batch_size = len(feature)

            #print('-------------------target-----------------')
            #print(type(target))
            #print(target)
            target = target.cpu()
            d = target.data.numpy()
            d_t = torch.from_numpy(d)
            labels = d_t
            #print('-----------------target one hot-------------')
            target_one_hot = utils.one_hot_encode(d_t, length=2)
            assert target_one_hot.size() == torch.Size([batch_size, 2])
            #print(type(target_one_hot))
            #print(target_one_hot)
            optimizer.zero_grad()
            logit = model(feature)

            out_digit_caps = logit
            #print('out_digit_caps')
            #print(out_digit_caps)
            target = Variable(target_one_hot)
            margin_loss = utils.margin_loss(out_digit_caps, target)
            loss = margin_loss
            #print(type(loss))
            #print('margin loss: ', margin_loss)
            loss = torch.mean(margin_loss, 0)
            print('mean loss: ', loss.data)
            #print(type(loss))

            #print('logit vector', logit.size(),logit)
            #print('target vector', target.size(),target)
            #loss = F.cross_entropy(logit, target)
            #print('loss')
            #print(loss)
            loss.backward()
            optimizer.step()

            acc = utils.accuracy(out_digit_caps, labels, False)
            epoch_tot_acc += acc
            epoch_avg_acc = epoch_tot_acc / epc
            print('epc: ', epc)
            print('c: ', c)
            print('acc: ', acc)
            print('epoch_avg_acc', epoch_avg_acc)
            '''
Beispiel #5
0
z_dim = 100
max_epochs = 10000
d_step = 50
g_step = 50

# mnist image data
x_placeholder = tf.placeholder("float",
                               shape=[batch_size, 28, 28, 1],
                               name="x_placeholder")

Gz = generator(batch_size, z_dim)
Dx = capsule_discriminator(x_placeholder)
Dg = capsule_discriminator(Gz)

# loss function
g_loss = margin_loss(1, Dg)
d_loss_real = margin_loss(1, Dx)
d_loss_fake = margin_loss(0, Dg)
d_loss = d_loss_real + d_loss_fake

thetas = tf.trainable_variables()
theta_d = [var for var in thetes if 'd_' in var.name]
theta_g = [var for var in thetas if 'g_' in var.name]

d_solver = tf.train.AdamOptimizer(learning_rate=lr).minimize(d_loss,
                                                             var_list=theta_d)
g_solver = tf.train.AdamOptimizer(learning_rate=lr).minimize(g_loss,
                                                             var_list=theta_g)

saver = tf.train.Saver()