Exemple #1
0
 def __init__(self, name, batch_size, sequence_length, hidden_size,
              cls_token_pos, **kwargs):
     scope_provider = kwargs['scope_provider']
     additional_scopes = [
         kwargs['builder'].outlineAttributes({'outline_scope': 'NSP'})
     ]
     scope = scope_provider.get_scope(name,
                                      execution_phase='next',
                                      additional_scopes=additional_scopes)
     params = []
     super().__init__(scope, params, **kwargs)
     self.batch_size = batch_size
     self.sequence_length = sequence_length
     self.hidden_size = hidden_size
     self.cls_token_pos = cls_token_pos
     pooler_scope = scope_provider.get_scope(
         "Pool", execution_phase=self.scope.execution_phase)
     self.pooler = Dense(scope=pooler_scope,
                         input_dim=hidden_size,
                         output_dim=hidden_size,
                         split=None,
                         activation='tanh',
                         **kwargs)
     classifier_scope = scope_provider.get_scope(
         "Classifier", execution_phase=self.scope.execution_phase)
     self.classifier = Dense(scope=classifier_scope,
                             input_dim=hidden_size,
                             output_dim=2,
                             split=None,
                             **kwargs)
     self.total_execution_phases = self.total_phases()
Exemple #2
0
    def __init__(self,
                 name,
                 input_size,
                 ff_size,
                 dropout,
                 dropout_prob,
                 epsilon,
                 residual=True,
                 intermediate_act_func='gelu',
                 alpha=None,
                 increment_scope=True,
                 serialize_matmul=False,
                 use_default_memory_proportion=True,
                 available_memory_proportion=None,
                 **kwargs):
        scope_provider = kwargs['scope_provider']
        self.apply_dropout = dropout
        if increment_scope:
            scope = scope_provider.get_scope(name, 'next')
        else:
            scope = scope_provider.get_scope(name, 'prev')
        super(FeedForward, self).__init__(params=[], scope=scope, **kwargs)
        self.residual = residual

        if serialize_matmul:
            split = Split(dim='output_channels',
                          num_splits=ff_size // input_size)
        else:
            split = None
        self.dense1 = Dense(
            scope_provider.get_scope("1", 'prev'),
            input_size,
            ff_size,
            split=split,
            activation=intermediate_act_func,
            alpha=alpha,
            use_default_memory_proportion=use_default_memory_proportion,
            available_memory_proportion=available_memory_proportion,
            **kwargs)
        if serialize_matmul:
            split = Split(dim='reducing_dim', num_splits=ff_size // input_size)
        else:
            split = None
        self.dense2 = Dense(
            scope_provider.get_scope("2", "prev"),
            ff_size,
            input_size,
            split=split,
            activation=None,
            use_default_memory_proportion=use_default_memory_proportion,
            available_memory_proportion=available_memory_proportion,
            **kwargs)
        if residual:
            if dropout:
                self.dropout = Dropout(
                    scope_provider.get_scope("Dropout", "prev"), dropout_prob,
                    **kwargs)
            self.norm = Norm(scope_provider.get_scope("Norm", "prev"),
                             input_size, epsilon, **kwargs)
        self.total_execution_phases = self.total_phases()
Exemple #3
0
    def __init__(self,
                 name,
                 vocab_size,
                 hidden_size,
                 sequence_length,
                 batch_size,
                 num_mask_tokens,
                 projection_weight,
                 activation,
                 slice_input=True,
                 no_cls_layer=False,
                 epsilon=None,
                 projection_bias=False,
                 **kwargs):
        scope_provider = kwargs['scope_provider']
        super(MaskLM, self).__init__(params=[],
                                     scope=scope_provider.get_scope(
                                         name=f'{name}',
                                         execution_phase='next'),
                                     **kwargs)
        self.sequence_len = sequence_length
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.vocab_length = vocab_size
        self.num_mask_tokens = num_mask_tokens
        self.slice_input = slice_input
        self.no_cls_layer = no_cls_layer
        if not no_cls_layer:
            scope = scope_provider.get_scope("LMPrediction",
                                             self.scope.execution_phase)
            self.pred_head_transform = Dense(scope,
                                             hidden_size,
                                             hidden_size,
                                             activation=activation,
                                             **kwargs)
            scope = scope_provider.get_scope('LMPrediction/Norm',
                                             self.scope.execution_phase)
            self.norm = Norm(scope, hidden_size, epsilon, **kwargs)

        decoder_scope = scope_provider.get_scope("Projection",
                                                 self.scope.execution_phase)
        self.decoder = Dense(decoder_scope,
                             hidden_size,
                             vocab_size,
                             split=None,
                             activation=None,
                             params=[projection_weight, None],
                             bias=projection_bias,
                             **kwargs)
        self.total_execution_phases = self.total_phases()
 def __init__(self, num_splits, vocab_size, hidden_size, sequence_length,
              batch_size, num_mask_tokens, projection_weights, activation,
              no_cls_layer, epsilon, projection_bias, **kwargs):
     scope_provider = kwargs['scope_provider']
     additional_scopes = [
         kwargs['builder'].outlineAttributes(
             {'outline_scope': 'MLMSerialised'})
     ]
     scope = scope_provider.get_scope('MLMSerialised',
                                      additional_scopes=additional_scopes)
     super().__init__(params=[], scope=scope, **kwargs)
     self.slice_scope = scope_provider.get_scope('Slice', 'next')
     self.batch_size = batch_size
     self.vocab_length = vocab_size
     self.hidden_size = hidden_size
     self.sequence_len = sequence_length
     self.num_mask_tokens = num_mask_tokens
     self.no_cls_layer = no_cls_layer
     self.projection_bias = projection_bias
     if not no_cls_layer:
         scope = scope_provider.get_scope("LMPrediction",
                                          self.slice_scope.execution_phase)
         self.pred_head_transform = Dense(scope,
                                          hidden_size,
                                          hidden_size,
                                          activation=activation,
                                          **kwargs)
         scope = scope_provider.get_scope('LMPrediction/Norm',
                                          self.slice_scope.execution_phase)
         self.norm = Norm(scope, hidden_size, epsilon, **kwargs)
     layers = []
     for i in range(num_splits):
         layers.append(
             MaskLM(f'Split{i}',
                    vocab_size // num_splits,
                    hidden_size,
                    sequence_length,
                    batch_size,
                    num_mask_tokens,
                    projection_weights[i],
                    activation=None,
                    slice_input=False,
                    no_cls_layer=True,
                    projection_bias=projection_bias,
                    **kwargs))
     self.concat_scope = scope_provider.get_scope('Concat', 'next')
     self.layers = layers
     self.total_execution_phases = self.total_phases()
Exemple #5
0
 def __init__(self, name, batch_size, sequence_length, hidden_size,
              **kwargs):
     scope_provider = kwargs['scope_provider']
     scope = scope_provider.get_scope(name, execution_phase='next')
     params = []
     super().__init__(scope, params, **kwargs)
     self.batch_size = batch_size
     self.sequence_length = sequence_length
     self.hidden_size = hidden_size
     classifier_scope = scope_provider.get_scope(
         name='', execution_phase=self.scope.execution_phase)
     self.classifier = Dense(scope=classifier_scope,
                             input_dim=hidden_size,
                             output_dim=2,
                             split=None,
                             **kwargs)
     self.total_execution_phases = self.total_phases()