예제 #1
0
    def forward(self, x):
        self.instance += 1

        x = self.conv1(x)
        x = lbann.Pooling(x,
                          num_dims=2,
                          has_vectors=False,
                          pool_dims_i=2,
                          pool_pads_i=0,
                          pool_strides_i=2,
                          pool_mode='max',
                          name='{0}_pool1_instance{1}'.format(
                              self.name, self.instance))
        x = self.conv2(x)
        x = lbann.Pooling(x,
                          num_dims=2,
                          has_vectors=False,
                          pool_dims_i=2,
                          pool_pads_i=0,
                          pool_strides_i=2,
                          pool_mode='max',
                          name='{0}_pool2_instance{1}'.format(
                              self.name, self.instance))

        x = self.fc1(x)
        x = lbann.Dropout(x,
                          keep_prob=0.5,
                          name='{0}_drop6_instance{1}'.format(
                              self.name, self.instance))
        x = self.fc2(x)
        x = lbann.Dropout(x,
                          keep_prob=0.5,
                          name='{0}_drop7_instance{1}'.format(
                              self.name, self.instance))
        return self.fc3(x)
예제 #2
0
파일: transformer.py 프로젝트: oyamay/lbann
    def forward(self, x, mask=None):
        """Apply Transformer encoder layer.

        Args:
            x (lbann.Layer): Sequence of input vectors.
            mask (lbann.Layer, optional): Attention mask.

        Returns:
            lbann.Layer: Sequence of output vectors.

        """
        self.instance += 1
        name = f'{self.name}_instance{self.instance}'

        # Self-attention with residual connection
        y = self.attention(x, x, x, mask=mask)
        if self.dropout_prob > 0:
            y = lbann.Dropout(
                y,
                keep_prob=1 - self.dropout_prob,
                name=f'{name}_drop1',
            )
        z = lbann.Sum(x, y, name=f'{name}_sum1')
        z = lbann.InstanceNorm(z, name=f'{name}_norm1')
        x = z

        # Feedforward network with residual connection
        y = lbann.ChannelwiseFullyConnected(
            x,
            weights=self.fc1_weights,
            output_channel_dims=[self.feedforward_dim],
            name=f'{name}_fc1',
        )
        y = lbann.Relu(y, name=f'{name}_relu1')
        if self.dropout_prob > 0:
            y = lbann.Dropout(
                y,
                keep_prob=1 - self.dropout_prob,
                name=f'{name}_drop2',
            )
        y = lbann.ChannelwiseFullyConnected(
            y,
            weights=self.fc2_weights,
            output_channel_dims=[self.embed_dim],
            name=f'{name}_fc2',
        )
        if self.dropout_prob > 0:
            y = lbann.Dropout(
                y,
                keep_prob=1 - self.dropout_prob,
                name=f'{name}_drop3',
            )
        z = lbann.Sum(x, y, name=f'{name}_sum2')
        z = lbann.InstanceNorm(z, name=f'{name}_norm2')
        return z
예제 #3
0
def construct_model():
    """Model description

    """
    import lbann
    import lbann.modules

    fc = lbann.modules.FullyConnectedModule
    conv = lbann.modules.Convolution2dModule

    conv1 = conv(20, 3, stride=1, padding=1, name='conv1')
    conv2 = conv(20, 3, stride=1, padding=1, name='conv2')
    fc1 = fc(100, name='fc1')
    fc2 = fc(20, name='fc2')
    fc3 = fc(num_classes, name='fc3')
    # Layer graph
    input = lbann.Input(name='inp_tensor', target_mode='classification')
    inp_slice = lbann.Slice(input,
                            axis=0,
                            slice_points=str_list([0, dims - 1, dims]),
                            name='inp_slice')
    xdata = lbann.Identity(inp_slice)
    ylabel = lbann.Identity(inp_slice, name='gt_y')
    #NHWC to NCHW
    x = lbann.Reshape(xdata, dims='14 13 13')
    x = conv2(conv1(x))
    x = lbann.Reshape(x, dims='3380')
    x = lbann.Dropout(lbann.Relu(fc1(x)), keep_prob=0.5)
    x = lbann.Dropout(fc2(x), keep_prob=0.5)
    pred = lbann.Softmax(fc3(x))
    gt_label = lbann.OneHot(ylabel, size=num_classes)
    loss = lbann.CrossEntropy([pred, gt_label], name='loss')
    acc = lbann.CategoricalAccuracy([pred, gt_label])

    layers = list(lbann.traverse_layer_graph(input))
    # Setup objective function
    weights = set()
    for l in layers:
        weights.update(l.weights)
    obj = lbann.ObjectiveFunction(loss)

    callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()]

    # Construct model
    num_epochs = 10
    return lbann.Model(num_epochs,
                       weights=weights,
                       layers=layers,
                       metrics=[lbann.Metric(acc, name='accuracy', unit='%')],
                       objective_function=obj,
                       callbacks=callbacks)
예제 #4
0
 def forward(self, hidden_states, input_tensor):
     hidden_states, hidden_shape = lbann.modules.PytorchLinear(
         hidden_states,
         self.input_shape,
         self.hidden_size,
         weights=_load_pretrained_weights(
             ".".join((self.name, "dense.weight")),
             ".".join((self.name, "dense.bias")),
             load_weights=self.load_weights,
         ),
         name=".".join((self.name, "dense")),
         return_dims=True,
     )
     hidden_states = lbann.Dropout(hidden_states,
                                   keep_prob=self.hidden_dropout_prob)
     hidden_states = lbann.modules.PytorchLayerNorm(
         lbann.Add(hidden_states, input_tensor),
         self.layer_norm_eps,
         hidden_shape,
         weights=_load_pretrained_weights(
             ".".join((self.name, "layernorm.weightbias")),
             load_weights=self.load_weights,
         ),
         name=".".join((self.name, "LayerNorm")),
     )
     return hidden_states
예제 #5
0
파일: cosmoflow.py 프로젝트: benson31/lbann
 def create_dropout(x, i):
     return lbann.Dropout(x,
                          keep_prob=0.8,
                          name='{0}_drop{1}_instance{2}'.format(
                              self.name, i, self.instance))
예제 #6
0
# Construct layer graph
input_ = lbann.Input(name='input')
image = lbann.Identity(input_, name='images')
dummy = lbann.Dummy(input_, name='labels')

# Encoder
encode1 = lbann.FullyConnected(image,
                               name="encode1",
                               data_layout="model_parallel",
                               num_neurons=1000,
                               has_bias=True)

relu1 = lbann.Relu(encode1, name="relu1", data_layout="model_parallel")

dropout1 = lbann.Dropout(relu1,
                         name="dropout1",
                         data_layout="model_parallel",
                         keep_prob=0.8)

decode1 = lbann.FullyConnected(dropout1,
                               name="decode1",
                               data_layout="model_parallel",
                               hint_layer=image,
                               has_bias=True)

reconstruction = lbann.Sigmoid(decode1,
                               name="reconstruction",
                               data_layout="model_parallel")

dropout2 = lbann.Dropout(reconstruction,
                         name="dropout2",
                         data_layout="model_parallel",
예제 #7
0
    def forward(
        self,
        input_ids=None,
        token_type_ids=None,
        position_ids=None,
        inputs_embeds=None,
    ):

        if position_ids is None:
            if input_ids is not None:
                position_ids = create_position_ids_from_input_ids(
                    input_ids,
                    self.input_shape,
                    self.padding_idx,
                )
            else:
                position_ids = self.create_position_ids_from_inputs_embeds(
                    inputs_embeds)

        if token_type_ids is None:
            token_type_ids = lbann.Constant(value=0,
                                            num_neurons=str_list(
                                                self.input_shape))

        if inputs_embeds is None:
            inputs_embeds = lbann.Embedding(
                input_ids,
                num_embeddings=self.vocab_size,
                embedding_dim=self.hidden_size,
                padding_idx=self.pad_token_id,
                weights=_load_pretrained_weights(
                    ".".join((self.name, "word_embeddings.weight")),
                    load_weights=self.load_weights,
                ),
                name=".".join((self.name, "word_embeddings")),
            )
        token_type_embeddings = lbann.Embedding(
            token_type_ids,
            num_embeddings=self.type_vocab_size,
            embedding_dim=self.hidden_size,
            weights=_load_pretrained_weights(
                ".".join((self.name, "token_type_embeddings.weight")),
                load_weights=self.load_weights,
            ),
            name=".".join((self.name, "token_type_embeddings")),
        )

        embeddings = lbann.Add(inputs_embeds, token_type_embeddings)
        if self.position_embedding_type == "absolute":
            position_embeddings = lbann.Embedding(
                position_ids,
                num_embeddings=self.max_position_embeddings,
                embedding_dim=self.hidden_size,
                padding_idx=self.pad_token_id,
                weights=_load_pretrained_weights(
                    ".".join((self.name, "position_embeddings.weight")),
                    load_weights=self.load_weights,
                ),
                name=".".join((self.name, "position_embeddings")),
            )
            embeddings = lbann.Add(embeddings, position_embeddings)

        embeddings = lbann.modules.PytorchLayerNorm(
            embeddings,
            self.layer_norm_eps,
            self.input_shape + (self.hidden_size, ),
            weights=_load_pretrained_weights(
                ".".join((self.name, "layernorm.weightbias")),
                load_weights=self.load_weights,
            ),
            name=".".join((self.name, "LayerNorm")),
        )
        embeddings = lbann.Dropout(embeddings,
                                   keep_prob=self.hidden_dropout_prob)
        return embeddings
예제 #8
0
    def forward(
        self,
        hidden_states,
        attention_mask=None,
        head_mask=None,
    ):
        mixed_query_layer, query_shape = lbann.modules.PytorchLinear(
            hidden_states,
            self.input_shape,
            self.all_head_size,
            weights=_load_pretrained_weights(
                ".".join((self.name, "query.weight")),
                ".".join((self.name, "query.bias")),
                load_weights=self.load_weights,
            ),
            name=".".join((self.name, "query")),
            return_dims=True,
        )
        query_layer, query_shape = self.transpose_for_scores(
            mixed_query_layer, query_shape)

        key_layer, key_shape = lbann.modules.PytorchLinear(
            hidden_states,
            self.input_shape,
            self.all_head_size,
            weights=_load_pretrained_weights(
                ".".join((self.name, "key.weight")),
                ".".join((self.name, "key.bias")),
                load_weights=self.load_weights,
            ),
            name=".".join((self.name, "key")),
            return_dims=True,
        )
        key_layer, key_shape = self.transpose_for_scores(key_layer, key_shape)

        value_layer, value_shape = lbann.modules.PytorchLinear(
            hidden_states,
            self.input_shape,
            self.all_head_size,
            weights=_load_pretrained_weights(
                ".".join((self.name, "value.weight")),
                ".".join((self.name, "value.bias")),
                load_weights=self.load_weights,
            ),
            name=".".join((self.name, "value")),
            return_dims=True,
        )
        value_layer, value_shape = self.transpose_for_scores(
            value_layer, value_shape)

        # Take the dot product between "query" and "key" to get the raw attention scores.
        key_layer, key_shape = lbann.modules.Permute(key_layer,
                                                     key_shape,
                                                     axes=(0, 1, -1, -2),
                                                     return_dims=True)
        attention_scores, attention_shape = lbann.modules.PytorchMatmul(
            query_layer,
            query_shape,
            key_layer,
            key_shape,
            return_dims=True,
        )

        attention_scores = lbann.Scale(attention_scores,
                                       constant=1 /
                                       math.sqrt(self.attention_head_size))

        if attention_mask is not None:
            # Apply the attention mask is (precomputed for all layers in RobertaModel forward() function)
            attention_scores = lbann.Add(attention_scores, attention_mask)

        # Normalize the attention scores to probabilities.
        attention_scores = lbann.Reshape(
            attention_scores,
            dims=str_list([np.prod(attention_shape[:-1]),
                           attention_shape[-1]]),
        )
        attention_probs = lbann.ChannelwiseSoftmax(attention_scores)
        attention_probs = lbann.Reshape(attention_probs,
                                        dims=str_list(attention_shape))

        # This is actually dropping out entire tokens to attend to, which might
        # seem a bit unusual, but is taken from the original Transformer paper.
        attention_probs = lbann.Dropout(
            attention_probs,
            keep_prob=self.attention_probs_dropout_prob,
        )

        # Mask heads if we want to
        if head_mask is not None:
            attention_probs = lbann.Multiply(attention_probs, head_mask)

        context_layer, context_shape = lbann.modules.PytorchMatmul(
            attention_probs,
            attention_shape,
            value_layer,
            value_shape,
            return_dims=True,
        )
        context_layer, context_shape = lbann.modules.Permute(
            context_layer,
            context_shape,
            axes=(0, 2, 1, 3),
            return_dims=True,
        )
        new_context_layer_shape = context_shape[:-2] + (self.all_head_size, )
        context_layer = lbann.Reshape(context_layer,
                                      dims=str_list(self.input_shape))

        return context_layer
예제 #9
0
파일: transformer.py 프로젝트: oyamay/lbann
    def forward(self, x, memory, src_mask=None, tgt_mask=None):
        """Apply Transformer decoder layer.

        Args:
            x (lbann.Layer): Sequence of input vectors.
            memory (lbann.Layer): Sequence of vectors produced by
                Transformer encoder stack.
            src_mask (lbann.Layer, optional): Attention mask for
                second attention module (attends to both `x` and
                `memory`).
            tgt_mask (lbann.Layer, optional): Attention mask for first
                attention module (attends only to `x`).

        Returns:
            lbann.Layer: Sequence of output vectors.

        """
        self.instance += 1
        name = f'{self.name}_instance{self.instance}'

        # Self-attention with residual connection
        y = self.attention1(x, x, x, mask=tgt_mask)
        if self.dropout_prob > 0:
            y = lbann.Dropout(
                y,
                keep_prob=1 - self.dropout_prob,
                name=f'{name}_drop1',
            )
        z = lbann.Sum(x, y, name=f'{name}_sum1')
        z = lbann.InstanceNorm(z, name=f'{name}_norm1')
        x = z

        # Attention on encoder output with residual connection
        y = self.attention2(x, memory, memory, mask=src_mask)
        if self.dropout_prob > 0:
            y = lbann.Dropout(
                y,
                keep_prob=1 - self.dropout_prob,
                name=f'{name}_drop2',
            )
        z = lbann.Sum(x, y, name=f'{name}_sum2')
        z = lbann.InstanceNorm(z, name=f'{name}_norm2')
        x = z

        # Feedforward network with residual connection
        y = lbann.ChannelwiseFullyConnected(
            x,
            weights=self.fc1_weights,
            output_channel_dims=[self.feedforward_dim],
            name=f'{name}_fc1',
        )
        y = lbann.Relu(y, name=f'{name}_relu1')
        if self.dropout_prob > 0:
            y = lbann.Dropout(
                y,
                keep_prob=1 - self.dropout_prob,
                name=f'{name}_drop3',
            )
        y = lbann.ChannelwiseFullyConnected(
            y,
            weights=self.fc2_weights,
            output_channel_dims=[self.embed_dim],
            name=f'{name}_fc2',
        )
        if self.dropout_prob > 0:
            y = lbann.Dropout(
                y,
                keep_prob=1 - self.dropout_prob,
                name=f'{name}_drop4',
            )
        z = lbann.Sum(x, y, name=f'{name}_sum3')
        z = lbann.InstanceNorm(z, name=f'{name}_norm3')
        return z
예제 #10
0
    def forward(self, x):
        self.instance += 1

        # Convolutional network
        x = self.conv1(x)
        x = lbann.LocalResponseNormalization(
            x,
            window_width=5,
            lrn_alpha=0.0001,
            lrn_beta=0.75,
            lrn_k=2,
            name='{0}_norm1_instance{1}'.format(self.name, self.instance))
        x = lbann.Pooling(x,
                          num_dims=2,
                          has_vectors=False,
                          pool_dims_i=3,
                          pool_pads_i=0,
                          pool_strides_i=2,
                          pool_mode='max',
                          name='{0}_pool1_instance{1}'.format(
                              self.name, self.instance))
        x = self.conv2(x)
        x = lbann.LocalResponseNormalization(
            x,
            window_width=5,
            lrn_alpha=0.0001,
            lrn_beta=0.75,
            lrn_k=2,
            name='{0}_norm2_instance{1}'.format(self.name, self.instance))
        x = lbann.Pooling(x,
                          num_dims=2,
                          has_vectors=False,
                          pool_dims_i=3,
                          pool_pads_i=0,
                          pool_strides_i=2,
                          pool_mode='max',
                          name='{0}_pool2_instance{1}'.format(
                              self.name, self.instance))
        x = self.conv5(self.conv4(self.conv3(x)))
        x = lbann.Pooling(x,
                          num_dims=2,
                          has_vectors=False,
                          pool_dims_i=3,
                          pool_pads_i=0,
                          pool_strides_i=2,
                          pool_mode='max',
                          name='{0}_pool5_instance{1}'.format(
                              self.name, self.instance))

        # Fully-connected network
        x = self.fc6(x)
        x = lbann.Dropout(x,
                          keep_prob=0.5,
                          name='{0}_drop6_instance{1}'.format(
                              self.name, self.instance))
        x = self.fc7(x)
        x = lbann.Dropout(x,
                          keep_prob=0.5,
                          name='{0}_drop7_instance{1}'.format(
                              self.name, self.instance))
        return self.fc8(x)
예제 #11
0
파일: combo.py 프로젝트: benson31/lbann
 def forward(self, x):
     return lbann.Dropout(self.track_fc[2](lbann.Dropout(
         self.track_fc[1](lbann.Dropout(self.track_fc[0](x),
                                        keep_prob=self.kp)),
         keep_prob=self.kp)),
                          keep_prob=self.kp)