def __init__(self, name, batch_size, sequence_length, hidden_size, cls_token_pos, **kwargs): scope_provider = kwargs['scope_provider'] additional_scopes = [ kwargs['builder'].outlineAttributes({'outline_scope': 'NSP'}) ] scope = scope_provider.get_scope(name, execution_phase='next', additional_scopes=additional_scopes) params = [] super().__init__(scope, params, **kwargs) self.batch_size = batch_size self.sequence_length = sequence_length self.hidden_size = hidden_size self.cls_token_pos = cls_token_pos pooler_scope = scope_provider.get_scope( "Pool", execution_phase=self.scope.execution_phase) self.pooler = Dense(scope=pooler_scope, input_dim=hidden_size, output_dim=hidden_size, split=None, activation='tanh', **kwargs) classifier_scope = scope_provider.get_scope( "Classifier", execution_phase=self.scope.execution_phase) self.classifier = Dense(scope=classifier_scope, input_dim=hidden_size, output_dim=2, split=None, **kwargs) self.total_execution_phases = self.total_phases()
def __init__(self, name, input_size, ff_size, dropout, dropout_prob, epsilon, residual=True, intermediate_act_func='gelu', alpha=None, increment_scope=True, serialize_matmul=False, use_default_memory_proportion=True, available_memory_proportion=None, **kwargs): scope_provider = kwargs['scope_provider'] self.apply_dropout = dropout if increment_scope: scope = scope_provider.get_scope(name, 'next') else: scope = scope_provider.get_scope(name, 'prev') super(FeedForward, self).__init__(params=[], scope=scope, **kwargs) self.residual = residual if serialize_matmul: split = Split(dim='output_channels', num_splits=ff_size // input_size) else: split = None self.dense1 = Dense( scope_provider.get_scope("1", 'prev'), input_size, ff_size, split=split, activation=intermediate_act_func, alpha=alpha, use_default_memory_proportion=use_default_memory_proportion, available_memory_proportion=available_memory_proportion, **kwargs) if serialize_matmul: split = Split(dim='reducing_dim', num_splits=ff_size // input_size) else: split = None self.dense2 = Dense( scope_provider.get_scope("2", "prev"), ff_size, input_size, split=split, activation=None, use_default_memory_proportion=use_default_memory_proportion, available_memory_proportion=available_memory_proportion, **kwargs) if residual: if dropout: self.dropout = Dropout( scope_provider.get_scope("Dropout", "prev"), dropout_prob, **kwargs) self.norm = Norm(scope_provider.get_scope("Norm", "prev"), input_size, epsilon, **kwargs) self.total_execution_phases = self.total_phases()
def __init__(self, name, vocab_size, hidden_size, sequence_length, batch_size, num_mask_tokens, projection_weight, activation, slice_input=True, no_cls_layer=False, epsilon=None, projection_bias=False, **kwargs): scope_provider = kwargs['scope_provider'] super(MaskLM, self).__init__(params=[], scope=scope_provider.get_scope( name=f'{name}', execution_phase='next'), **kwargs) self.sequence_len = sequence_length self.hidden_size = hidden_size self.batch_size = batch_size self.vocab_length = vocab_size self.num_mask_tokens = num_mask_tokens self.slice_input = slice_input self.no_cls_layer = no_cls_layer if not no_cls_layer: scope = scope_provider.get_scope("LMPrediction", self.scope.execution_phase) self.pred_head_transform = Dense(scope, hidden_size, hidden_size, activation=activation, **kwargs) scope = scope_provider.get_scope('LMPrediction/Norm', self.scope.execution_phase) self.norm = Norm(scope, hidden_size, epsilon, **kwargs) decoder_scope = scope_provider.get_scope("Projection", self.scope.execution_phase) self.decoder = Dense(decoder_scope, hidden_size, vocab_size, split=None, activation=None, params=[projection_weight, None], bias=projection_bias, **kwargs) self.total_execution_phases = self.total_phases()
def __init__(self, num_splits, vocab_size, hidden_size, sequence_length, batch_size, num_mask_tokens, projection_weights, activation, no_cls_layer, epsilon, projection_bias, **kwargs): scope_provider = kwargs['scope_provider'] additional_scopes = [ kwargs['builder'].outlineAttributes( {'outline_scope': 'MLMSerialised'}) ] scope = scope_provider.get_scope('MLMSerialised', additional_scopes=additional_scopes) super().__init__(params=[], scope=scope, **kwargs) self.slice_scope = scope_provider.get_scope('Slice', 'next') self.batch_size = batch_size self.vocab_length = vocab_size self.hidden_size = hidden_size self.sequence_len = sequence_length self.num_mask_tokens = num_mask_tokens self.no_cls_layer = no_cls_layer self.projection_bias = projection_bias if not no_cls_layer: scope = scope_provider.get_scope("LMPrediction", self.slice_scope.execution_phase) self.pred_head_transform = Dense(scope, hidden_size, hidden_size, activation=activation, **kwargs) scope = scope_provider.get_scope('LMPrediction/Norm', self.slice_scope.execution_phase) self.norm = Norm(scope, hidden_size, epsilon, **kwargs) layers = [] for i in range(num_splits): layers.append( MaskLM(f'Split{i}', vocab_size // num_splits, hidden_size, sequence_length, batch_size, num_mask_tokens, projection_weights[i], activation=None, slice_input=False, no_cls_layer=True, projection_bias=projection_bias, **kwargs)) self.concat_scope = scope_provider.get_scope('Concat', 'next') self.layers = layers self.total_execution_phases = self.total_phases()
def __init__(self, name, batch_size, sequence_length, hidden_size, **kwargs): scope_provider = kwargs['scope_provider'] scope = scope_provider.get_scope(name, execution_phase='next') params = [] super().__init__(scope, params, **kwargs) self.batch_size = batch_size self.sequence_length = sequence_length self.hidden_size = hidden_size classifier_scope = scope_provider.get_scope( name='', execution_phase=self.scope.execution_phase) self.classifier = Dense(scope=classifier_scope, input_dim=hidden_size, output_dim=2, split=None, **kwargs) self.total_execution_phases = self.total_phases()