Ejemplo n.º 1
0
    def forward(self, x, label):
        """Compute cross-entropy loss.

        Args:
          x (lbann.Layer): Input vector.
          label (lbann.Layer): Label. Should have one entry, which
            will be cast to an integer.

        Returns:
          lbann.Layer: Loss function value.

        """
        log_probs = self.fc(x)
        label_onehot = lbann.OneHot(
            label,
            size=self.num_classes,
            data_layout=self.data_layout,
        )
        loss = lbann.Multiply(
            log_probs,
            label_onehot,
            data_layout=self.data_layout,
        )
        loss = lbann.Reduction(
            loss,
            mode="sum",
            data_layout=self.data_layout,
        )
        loss = lbann.Negative(loss, data_layout=self.data_layout)
        return loss
Ejemplo n.º 2
0
    def forward(self, motif_size, motif_log_embeddings):
        """Predict whether a motif is real.

        @todo Numerically accurate computation of both log(D) and
        log(1-D).

        """

        # D = 1 - exp(-sum_j(prod_i(d_ij)))
        # log(1-D) = -sum_j(exp(sum_i(log(d_ij))))
        x = lbann.MatMul(
            lbann.Constant(value=1, num_neurons=str_list([1, motif_size])),
            motif_log_embeddings,
        )
        x = lbann.Exp(x)
        x = lbann.Reduction(x, mode='sum')
        x = lbann.Negative(x)
        log_not_prob = x

        # Convert log-probability to linear space
        # Note: D=-expm1(x) is accurate when D~0. When D~1, prefer
        # 1-D=exp(x).
        prob = lbann.Negative(lbann.Expm1(log_not_prob))

        return prob, log_not_prob
Ejemplo n.º 3
0
    def forward_encoder(self, x_emb):
        """Encoder step, emulating z ~ E(x) = q_E(z|x)

        :param x_emb: (n_batch, len(x), d_z) of floats, embeddings for input sentence x
        :return: (n_batch, d_z) of floats, sample of latent vector z
        :return: float, kl term component of loss
        """

        # _, h = self.encoder_rnn(x, None)
        h = self.encoder_rnn(x_emb, None)

        h = lbann.Slice(
            h,
            slice_points=str_list(
                [self.input_feature_dims - 1, self.input_feature_dims]),
            axis=0,
        )
        h = lbann.Identity(h)

        mu, logvar = self.q_mu(h), self.q_logvar(h)

        # Set datatype of previous layers
        # Note: Depth-first search from mu and logvar to x_emb
        stack = [mu, logvar]
        in_stack = {l: True for l in stack}
        while stack:
            l = stack.pop()
            if type(l) not in (lbann.Slice, lbann.Reshape, lbann.Tessellate):
                l.datatype = self.datatype
            for parent in l.parents:
                if parent not in in_stack and parent is not x_emb:
                    stack.append(parent)
                    in_stack[parent] = True

        # eps = torch.randn_like(mu)
        eps = lbann.Gaussian(mean=0, stdev=1, hint_layer=mu)

        # z = mu + (logvar / 2).exp() * eps
        z = lbann.Add([
            mu,
            (lbann.Multiply([
                lbann.Exp(lbann.WeightedSum(logvar, scaling_factors='0.5')),
                eps
            ]))
        ])

        # kl_loss = 0.5 * (logvar.exp() + mu ** 2 - 1 - logvar).sum(1).mean()
        kl_loss = lbann.Reduction(
            lbann.WeightedSum(
                lbann.Exp(logvar),
                lbann.Square(mu),
                self.constant(1, hint_layer=mu),
                logvar,
                scaling_factors='0.5 0.5 -0.5 -0.5',
            ),
            mode='sum',
        )

        return z, kl_loss
Ejemplo n.º 4
0
 def forward(self, inputs):
     raise NotImplementedError  # Requires log-gamma function
     if len(inputs) != 2:
         raise ValueError('expected two inputs: predictions and labels')
     pred = inputs[0]
     label = inputs[1]
     count = lbann.Reduction(label)
     alpha_sum = lbann.Reduction(pred)
     lgamma_alpha_sum = lbann.Reduction(lbann.LogGamma(pred))
     lgamma_alpha_level_count_sum = lbann.Reduction(
         lbann.LogGamma(lbann.Add([pred, label])))
     return lbann.WeightedSum([
         lbann.LogGamma(alpha_sum),
         lbann.LogGamma(lbann.Sum([count, alpha_sum])),
         lgamma_alpha_level_count, lgamma_alpha_sum
     ],
                              scaling_factors='-1.0 1.0 -1.0 1.0')
Ejemplo n.º 5
0
 def forward(self, inputs):
     if len(inputs) != 2:
         raise ValueError('expected two inputs: predictions and labels')
     pred = inputs[0]
     label = inputs[1]  # Assumed to be Boolean
     masked_pred = lbann.Multiply([pred, label])
     pred_sum = lbann.Reduction(masked_pred)
     return lbann.Negative(lbann.Log(pred_sum))
Ejemplo n.º 6
0
def negative_samples_loss(embeddings, negative_samples_embeddings):
    scores = lbann.MatMul(
        embeddings,
        negative_samples_embeddings,
        transpose_b=True,
    )
    scores = lbann.WeightedSum(scores, scaling_factors='-1')
    scores = lbann.LogSigmoid(scores)
    loss = lbann.Reduction(scores, mode='average')
    loss = lbann.WeightedSum(loss, scaling_factors='-1')
    return loss
Ejemplo n.º 7
0
 def forward(self, inputs):
     if len(inputs) != 2:
         raise ValueError('expected two inputs: predictions and labels')
     pred = inputs[0]
     label = inputs[1]
     ones = p.Constant(hint_layer=pred, value=1.0)
     term1 = lbann.Multiply(
         [label, lbann.Log(lbann.Subtract([ones, pred]))])
     term2 = lbann.Log(pred)
     full = lbann.WeightedSum([term1, term2], scaling_factors='-1.0 -1.0')
     return lbann.Reduction(full)
Ejemplo n.º 8
0
 def forward(self, inputs):
     raise NotImplementedError  # Requires log-gamma function
     if len(inputs) != 2:
         raise ValueError('expected two inputs: predictions and labels')
     pred = inputs[0]
     label = inputs[1]
     ones = lbann.Constant(hint_layer=pred, value=1.0)
     term1 = pred
     term2 = lbann.Multiply([label, lbann.Log(pred)])
     term3 = lbann.LogGamma(lbann.Add([label, ones]))
     full = lbann.WeightedSum([term1, term2, term3],
                              scaling_factors='1.0 -1.0 1.0')
     return lbann.Reduction(full)
Ejemplo n.º 9
0
def construct_model():
    """Construct LBANN model.

    Pilot1 Combo model

    """
    import lbann

    # Layer graph
    data = lbann.Input(data_field='samples')
    responses = lbann.Input(data_field='responses')

    pred = combo.Combo()(data)
    mse = lbann.MeanSquaredError([responses, pred])

    SS_res = lbann.Reduction(lbann.Square(lbann.Subtract(responses, pred)),
                             mode='sum')

    #SS_tot = var(x) = mean((x-mean(x))^2)
    mini_batch_size = lbann.MiniBatchSize()
    mean = lbann.Divide(lbann.BatchwiseReduceSum(responses), mini_batch_size)
    SS_tot = lbann.Divide(
        lbann.BatchwiseReduceSum(lbann.Square(lbann.Subtract(responses,
                                                             mean))),
        mini_batch_size)
    eps = lbann.Constant(value=1e-07, hint_layer=SS_tot)
    r2 = lbann.Subtract(lbann.Constant(value=1, num_neurons='1'),
                        lbann.Divide(SS_res, lbann.Add(SS_tot, eps)))

    metrics = [lbann.Metric(mse, name='mse')]
    metrics.append(lbann.Metric(r2, name='r2'))

    callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()]

    # Construct model
    num_epochs = 100
    layers = list(lbann.traverse_layer_graph([data, responses]))
    return lbann.Model(num_epochs,
                       layers=layers,
                       metrics=metrics,
                       objective_function=mse,
                       callbacks=callbacks)
Ejemplo n.º 10
0
    input_slice,
    weights=encoder_embeddings_weights,
    num_embeddings=num_graph_nodes,
    embedding_dim=args.latent_dim,
)

# Skip-Gram with negative sampling
preds = lbann.MatMul(decoder_embeddings, encoder_embeddings, transpose_b=True)
preds_slice = lbann.Slice(
    preds,
    axis=0,
    slice_points=f'0 {num_negative_samples} {num_negative_samples+1}')
preds_negative = lbann.Identity(preds_slice)
preds_positive = lbann.Identity(preds_slice)
obj_positive = lbann.LogSigmoid(preds_positive)
obj_positive = lbann.Reduction(obj_positive, mode='sum')
obj_negative = lbann.WeightedSum(preds_negative, scaling_factors='-1')
obj_negative = lbann.LogSigmoid(obj_negative)
obj_negative = lbann.Reduction(obj_negative, mode='sum')
obj = [
    lbann.LayerTerm(obj_positive, scale=-1),
    lbann.LayerTerm(obj_negative, scale=-1/num_negative_samples),
]

# ----------------------------------
# Create data reader
# ----------------------------------

reader = lbann.reader_pb2.DataReader()
_reader = reader.reader.add()
_reader.name = 'python'
Ejemplo n.º 11
0
    def compute_loss(self, x, y):

        # y[:, :-1]
        y = lbann.Slice(
            y,
            axis=0,
            slice_points=str_list([0, self.input_feature_dims - 1]),
        )
        y = lbann.Identity(y)

        # x[:, 1:]
        x = lbann.Slice(
            x,
            slice_points=str_list([1, self.input_feature_dims]),
        )
        x = lbann.Identity(x)

        # Convert indices in x to one-hot representation
        # Note: Ignored indices result in zero vectors
        ignore_mask = lbann.Equal(
            x,
            self.constant(self.label_to_ignore, hint_layer=x),
        )
        keep_mask = lbann.LogicalNot(ignore_mask)
        length = lbann.Reduction(keep_mask, mode='sum')
        length = lbann.Max(length, self.constant(1, [1]))
        x = lbann.Add(
            lbann.Multiply(keep_mask, x),
            lbann.Multiply(ignore_mask, self.constant(-1, hint_layer=x)),
        )
        x = lbann.Slice(x,
                        slice_points=str_list(range(self.input_feature_dims)))
        x = [lbann.Identity(x) for _ in range(self.input_feature_dims - 1)]
        x = [lbann.OneHot(xi, size=self.dictionary_size) for xi in x]
        x = [
            lbann.Reshape(xi, dims=str_list([1, self.dictionary_size]))
            for xi in x
        ]
        x = lbann.Concatenation(x, axis=0)

        # recon_loss = F.cross_entropy(
        #     y[:, :-1].contiguous().view(-1, y.size(-1)),
        #     x[:, 1:].contiguous().view(-1),
        #     ignore_index=self.pad
        # )
        # Note: Ideally we'd shift y by y.max(-1) for numerical stability
        shifts = lbann.MatMul(
            lbann.Max(y, self.constant(0, hint_layer=y)),
            self.constant(
                1 / math.sqrt(self.dictionary_size),
                [self.dictionary_size, self.dictionary_size],
            ),
        )
        y = lbann.Subtract(y, shifts)
        z = lbann.MatMul(
            lbann.Exp(y),
            self.constant(1, [self.dictionary_size, 1]),
        )
        z = lbann.Log(z)
        z = lbann.MatMul(
            lbann.Reshape(keep_mask, dims=str_list([1, -1])),
            z,
        )
        recon_loss = lbann.MatMul(
            lbann.Reshape(y, dims=str_list([1, -1])),
            lbann.Reshape(x, dims=str_list([1, -1])),
            transpose_b=True,
        )
        recon_loss = lbann.Subtract(z, recon_loss)
        recon_loss = lbann.Reshape(recon_loss, dims=str_list([1]))
        recon_loss = lbann.Divide(recon_loss, length)

        return recon_loss
Ejemplo n.º 12
0
                             has_bias=True)

# KL divergence
sd = lbann.Exp(logsd, name="sd")

var = lbann.Square(sd, name="var")

meansq = lbann.Square(mu, name="meansq")

kldiv_plus_half = lbann.WeightedSum([meansq, var, logsd],
                                    name="kldiv_plus_half",
                                    scaling_factors='0.5 0.5 -1')

kldiv_full = lbann.Rsqrt(kldiv_plus_half, name="kldiv_full")

kldiv = lbann.Reduction(kldiv_full, name="kldiv", mode="sum")

# Generate sample
noise = lbann.Gaussian(name="noise", mean=0, stdev=1, hint_layer=mu)

sdnoise = lbann.Hadamard([noise, sd], name="sdnoise")

sample = lbann.Add([mu, sdnoise], name="sample")

# Decoder
decode4 = lbann.FullyConnected(sample,
                               name="decode4",
                               has_bias=True,
                               hint_layer=encode3)

decode4neuron = lbann.Relu(decode4, name="decode4neuron")
Ejemplo n.º 13
0
def make_model(
    num_epochs,
    embed_dim,
    num_heads,
    label_smoothing,
):

    # Embedding weights
    var = 2 / (embed_dim + vocab_size)  # Glorot initialization
    embedding_weights = lbann.Weights(
        name='embeddings',
        initializer=lbann.NormalInitializer(standard_deviation=math.sqrt(var)),
    )

    # Input is two sequences of token IDs
    input_ = lbann.Input(data_field='samples')

    # Get sequences of embedding vectors
    # Note: Scale embeddings by sqrt(embed_dim).
    # Note: Decoder input is shifted right, so embedding for last
    # token isn't needed.
    embeddings_tokens = lbann.Identity(
        lbann.Slice(
            input_,
            axis=0,
            slice_points=str_list([0, 2 * sequence_length - 1]),
        ))
    embeddings = lbann.Embedding(
        embeddings_tokens,
        weights=embedding_weights,
        num_embeddings=vocab_size,
        embedding_dim=embed_dim,
        padding_idx=pad_index,
    )
    embeddings = lbann.WeightedSum(
        embeddings,
        scaling_factors=str(math.sqrt(embed_dim)),
    )
    embeddings_slice = lbann.Slice(
        embeddings,
        axis=0,
        slice_points=str_list([0, sequence_length, 2 * sequence_length - 1]),
    )
    encoder_input = lbann.Identity(embeddings_slice)
    decoder_input = lbann.Identity(embeddings_slice)

    # Apply transformer model
    transformer = lbann.models.Transformer(
        hidden_size=embed_dim,
        num_heads=num_heads,
        name='transformer',
    )
    result = transformer(
        encoder_input,
        sequence_length,
        decoder_input,
        sequence_length - 1,
    )

    # Reconstruct decoder input
    preds = lbann.ChannelwiseFullyConnected(
        result,
        weights=embedding_weights,
        output_channel_dims=[vocab_size],
        bias=False,
        transpose=True,
    )
    preds = lbann.ChannelwiseSoftmax(preds)
    preds = lbann.Slice(preds,
                        axis=0,
                        slice_points=str_list(range(sequence_length)))
    preds = [lbann.Identity(preds) for _ in range(sequence_length - 1)]

    # Count number of non-pad tokens
    label_tokens = lbann.Identity(
        lbann.Slice(
            input_,
            slice_points=str_list([sequence_length + 1, 2 * sequence_length]),
        ))
    pads = lbann.Constant(value=pad_index,
                          num_neurons=str(sequence_length - 1))
    is_not_pad = lbann.NotEqual(label_tokens, pads)
    num_not_pad = lbann.Reduction(is_not_pad, mode='sum')

    # Cross entropy loss with label smoothing
    label_tokens = lbann.Slice(
        label_tokens,
        slice_points=str_list(range(sequence_length)),
    )
    label_tokens = [
        lbann.Identity(label_tokens) for _ in range(sequence_length - 1)
    ]
    if label_smoothing > 0:
        uniform_label = lbann.Constant(value=1 / vocab_size,
                                       num_neurons=str_list([1, vocab_size]))
    loss = []
    for i in range(sequence_length - 1):
        label = lbann.OneHot(label_tokens[i], size=vocab_size)
        label = lbann.Reshape(label, dims=str_list([1, vocab_size]))
        if label_smoothing > 0:
            label = lbann.WeightedSum(
                label,
                uniform_label,
                scaling_factors=str_list(
                    [1 - label_smoothing, label_smoothing]),
            )
        loss.append(lbann.CrossEntropy(preds[i], label))
    loss = lbann.Concatenation(loss)

    # Average cross entropy over non-pad tokens
    loss_scales = lbann.Divide(
        is_not_pad,
        lbann.Tessellate(num_not_pad, hint_layer=is_not_pad),
    )
    loss = lbann.Multiply(loss, loss_scales)
    loss = lbann.Reduction(loss, mode='sum')

    # Construct model
    metrics = []
    callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()]
    return lbann.Model(
        num_epochs,
        layers=lbann.traverse_layer_graph(input_),
        objective_function=loss,
        metrics=metrics,
        callbacks=callbacks,
    )
Ejemplo n.º 14
0
    def compute_loss(self, x, y):

        # y[:, :-1]
        y = lbann.Slice(
            y,
            axis=0,
            slice_points=str_list([0, self.input_feature_dims-1]),
        )
        y = lbann.Identity(y)

        # x[:, 1:]
        x = lbann.Slice(
            x,
            slice_points=str_list([1, self.input_feature_dims]),
        )
        x = lbann.Identity(x)

        # Figure out entries in x to ignore
        ignore_mask = lbann.Equal(
            x,
            self.constant(self.label_to_ignore, hint_layer=x),
        )
        keep_mask = lbann.LogicalNot(ignore_mask)
        length = lbann.Reduction(keep_mask, mode='sum')
        length = lbann.Max(length, self.constant(1, [1]))

        # Convert entries in x to indices in y
        # Note: Ignored entries correspond to an index of -1.
        offsets = [
            row*self.dictionary_size
            for row in range(self.input_feature_dims-1)
        ]
        offsets = lbann.Weights(
            initializer=lbann.ValueInitializer(values=str_list(offsets)),
            optimizer=lbann.NoOptimizer(),
        )
        offsets = lbann.WeightsLayer(
            dims=str_list([self.input_feature_dims-1]),
            weights=offsets,
        )
        y_inds = lbann.Add(x, offsets)
        y_inds = lbann.Add(
            lbann.Multiply(keep_mask, y_inds),
            lbann.Multiply(
                ignore_mask,
                self.constant(-1, hint_layer=y_inds),
            ),
        )

        # recon_loss = F.cross_entropy(
        #     y[:, :-1].contiguous().view(-1, y.size(-1)),
        #     x[:, 1:].contiguous().view(-1),
        #     ignore_index=self.pad
        # )

        # Shift y for numerical stability
        # Note: We'd prefer to shift by y.max(-1)
        shifts = lbann.MatMul(
            lbann.Max(y, self.constant(0, hint_layer=y)),
            self.constant(
                1 / math.sqrt(self.dictionary_size),
                [self.dictionary_size, self.dictionary_size],
            ),
        )
        y = lbann.Subtract(y, shifts)

        # Compute log of softmax denominator and sum
        z = lbann.MatMul(
            lbann.Exp(y),
            self.constant(1, [self.dictionary_size, 1]),
        )
        z = lbann.Log(z)
        z = lbann.MatMul(
            lbann.Reshape(keep_mask, dims=str_list([1, -1])),
            z,
        )
        z = lbann.Reshape(z, dims=str_list([1]))

        # Compute cross entropy
        recon_loss = lbann.Gather(
            lbann.Reshape(y, dims=str_list([-1])),
            y_inds,
        )
        recon_loss = lbann.Reduction(recon_loss, mode='sum')
        recon_loss = lbann.Subtract(z, recon_loss)
        recon_loss = lbann.Divide(recon_loss, length)

        return recon_loss