def __init__(self, pretrained_model_name: Optional[str] = None, cache_dir: Optional[str] = None, hparams=None, init=True): super().__init__(hparams=hparams) self.load_pretrained_config(pretrained_model_name, cache_dir) num_layers = self._hparams.num_layers num_heads = self._hparams.num_heads head_dim = self._hparams.head_dim self.word_embed = nn.Embedding(self._hparams.vocab_size, self._hparams.hidden_dim) self.pos_embed = RelativePositionalEncoding( hparams={ "dim": self._hparams.hidden_dim, "max_seq_len": self._hparams.max_seq_length, }) self.dropout = nn.Dropout(self._hparams.dropout) self.r_r_bias = None self.r_w_bias = None self.r_s_bias = None if not self._hparams.untie_r: self.r_r_bias = nn.Parameter(torch.Tensor(num_heads, head_dim)) self.r_w_bias = nn.Parameter(torch.Tensor(num_heads, head_dim)) self.r_s_bias = (nn.Parameter(torch.Tensor(num_heads, head_dim)) if self._hparams.use_segments else None) self.attn_layers = nn.ModuleList() self.ff_layers = nn.ModuleList() rel_attn_hparams = dict_fetch( self._hparams, RelativeMultiheadAttention.default_hparams()) ff_hparams = dict_fetch(self._hparams, PositionWiseFF.default_hparams()) for _ in range(num_layers): self.attn_layers.append( RelativeMultiheadAttention(self.r_r_bias, self.r_w_bias, self.r_s_bias, hparams=rel_attn_hparams)) self.ff_layers.append(PositionWiseFF(hparams=ff_hparams)) self.mask_emb = nn.Parameter( torch.Tensor(1, 1, self._hparams.hidden_dim)) if init: self.init_pretrained_weights()
def __init__(self, hparams=None): ClassifierBase.__init__(self, hparams) with tf.variable_scope(self.variable_scope): encoder_hparams = utils.dict_fetch( hparams, Conv1DEncoder.default_hparams()) self._encoder = Conv1DEncoder(hparams=encoder_hparams) # Add an additional dense layer if needed self._num_classes = self._hparams.num_classes if self._num_classes > 0: if self._hparams.num_dense_layers <= 0: self._encoder.append_layer({"type": "Flatten"}) logit_kwargs = self._hparams.logit_layer_kwargs if logit_kwargs is None: logit_kwargs = {} elif not isinstance(logit_kwargs, HParams): raise ValueError( "hparams['logit_layer_kwargs'] must be a dict.") else: logit_kwargs = logit_kwargs.todict() logit_kwargs.update({"units": self._num_classes}) if 'name' not in logit_kwargs: logit_kwargs['name'] = "logit_layer" self._encoder.append_layer( {"type": "Dense", "kwargs": logit_kwargs})
def __init__(self, in_channels: int, in_features: Optional[int] = None, hparams: Optional[Union[HParams, Dict[str, Any]]] = None): super().__init__(hparams=hparams) encoder_hparams = utils.dict_fetch(hparams, Conv1DEncoder.default_hparams()) self._encoder = Conv1DEncoder(in_channels=in_channels, in_features=in_features, hparams=encoder_hparams) # Add an additional dense layer if needed self._num_classes = self._hparams.num_classes if self._num_classes > 0: if self._hparams.num_dense_layers <= 0: self._encoder.append_layer({"type": "Flatten"}) logit_kwargs = self._hparams.logit_layer_kwargs if logit_kwargs is None: logit_kwargs = {} elif not isinstance(logit_kwargs, HParams): raise ValueError( "hparams['logit_layer_kwargs'] must be a dict.") else: logit_kwargs = logit_kwargs.todict() logit_kwargs.update({"out_features": self._num_classes}) self._encoder.append_layer({"type": "Linear", "kwargs": logit_kwargs})
def __init__(self, pretrained_model_name: Optional[str] = None, cache_dir: Optional[str] = None, hparams=None): super().__init__(hparams=hparams) # Create the underlying encoder encoder_hparams = dict_fetch(hparams, XLNetEncoder.default_hparams()) self._encoder = XLNetEncoder( pretrained_model_name=pretrained_model_name, cache_dir=cache_dir, hparams=encoder_hparams) # TODO: The logic here is very similar to that in XLNetClassifier. # We need to reduce the code redundancy. if self._hparams.use_projection: if self._hparams.regr_strategy == 'all_time': self.projection = nn.Linear( self._encoder.output_size * self._hparams.max_seq_length, self._encoder.output_size * self._hparams.max_seq_length) else: self.projection = nn.Linear(self._encoder.output_size, self._encoder.output_size) self.dropout = nn.Dropout(self._hparams.dropout) logit_kwargs = self._hparams.logit_layer_kwargs if logit_kwargs is None: logit_kwargs = {} elif not isinstance(logit_kwargs, HParams): raise ValueError("hparams['logit_layer_kwargs'] " "must be a dict.") else: logit_kwargs = logit_kwargs.todict() if self._hparams.regr_strategy == 'all_time': self.hidden_to_logits = nn.Linear( self._encoder.output_size * self._hparams.max_seq_length, 1, **logit_kwargs) else: self.hidden_to_logits = nn.Linear(self._encoder.output_size, 1, **logit_kwargs) if self._hparams.initializer: initialize = get_initializer(self._hparams.initializer) assert initialize is not None if self._hparams.use_projection: initialize(self.projection.weight) initialize(self.projection.bias) initialize(self.hidden_to_logits.weight) if self.hidden_to_logits.bias: initialize(self.hidden_to_logits.bias) else: if self._hparams.use_projection: self.projection.apply(init_weights) self.hidden_to_logits.apply(init_weights)
def __init__(self, pretrained_model_name: Optional[str] = None, cache_dir: Optional[str] = None, hparams=None): super().__init__(hparams=hparams) # Create the underlying encoder encoder_hparams = dict_fetch(hparams, GPT2Encoder.default_hparams()) self._encoder = GPT2Encoder( pretrained_model_name=pretrained_model_name, cache_dir=cache_dir, hparams=encoder_hparams) # Create a dropout layer self._dropout_layer = nn.Dropout(self._hparams.dropout) # Create an additional classification layer if needed self.num_classes = self._hparams.num_classes if self.num_classes <= 0: self._logits_layer = None else: logit_kwargs = self._hparams.logit_layer_kwargs if logit_kwargs is None: logit_kwargs = {} elif not isinstance(logit_kwargs, HParams): raise ValueError("hparams['logit_layer_kwargs'] " "must be a dict.") else: logit_kwargs = logit_kwargs.todict() if self._hparams.clas_strategy == 'all_time': self._logits_layer = nn.Linear( self._encoder.output_size * self._hparams.max_seq_length, self.num_classes, **logit_kwargs) else: self._logits_layer = nn.Linear(self._encoder.output_size, self.num_classes, **logit_kwargs) if self._hparams.initializer: initialize = get_initializer(self._hparams.initializer) assert initialize is not None if self._logits_layer is not None: initialize(self._logits_layer.weight) if self._logits_layer.bias is not None: initialize(self._logits_layer.bias) self.is_binary = (self.num_classes == 1) or \ (self.num_classes <= 0 and self._hparams.dim == 1)
def __init__(self, pretrained_model_name=None, cache_dir=None, hparams=None): ClassifierBase.__init__(self, hparams) with tf.variable_scope(self.variable_scope): # Creates the underlying encoder encoder_hparams = utils.dict_fetch( hparams, BertEncoder.default_hparams()) if encoder_hparams is not None: encoder_hparams['name'] = None self._encoder = BertEncoder( pretrained_model_name=pretrained_model_name, cache_dir=cache_dir, hparams=encoder_hparams) # Creates an dropout layer drop_kwargs = {"rate": self._hparams.dropout} layer_hparams = {"type": "Dropout", "kwargs": drop_kwargs} self._dropout_layer = layers.get_layer(hparams=layer_hparams) # Creates an additional classification layer if needed self._num_classes = self._hparams.num_classes if self._num_classes <= 0: self._logit_layer = None else: logit_kwargs = self._hparams.logit_layer_kwargs if logit_kwargs is None: logit_kwargs = {} elif not isinstance(logit_kwargs, HParams): raise ValueError( "hparams['logit_layer_kwargs'] must be a dict.") else: logit_kwargs = logit_kwargs.todict() logit_kwargs.update({"units": self._num_classes}) if 'name' not in logit_kwargs: logit_kwargs['name'] = "logit_layer" layer_hparams = {"type": "Dense", "kwargs": logit_kwargs} self._logit_layer = layers.get_layer(hparams=layer_hparams)
def __init__(self, cell=None, cell_dropout_mode=None, output_layer=None, hparams=None): ClassifierBase.__init__(self, hparams) with tf.variable_scope(self.variable_scope): # Creates the underlying encoder encoder_hparams = utils.dict_fetch( hparams, UnidirectionalRNNEncoder.default_hparams()) if encoder_hparams is not None: encoder_hparams['name'] = None self._encoder = UnidirectionalRNNEncoder( cell=cell, cell_dropout_mode=cell_dropout_mode, output_layer=output_layer, hparams=encoder_hparams) # Creates an additional classification layer if needed self._num_classes = self._hparams.num_classes if self._num_classes <= 0: self._logit_layer = None else: logit_kwargs = self._hparams.logit_layer_kwargs if logit_kwargs is None: logit_kwargs = {} elif not isinstance(logit_kwargs, HParams): raise ValueError( "hparams['logit_layer_kwargs'] must be a dict.") else: logit_kwargs = logit_kwargs.todict() logit_kwargs.update({"units": self._num_classes}) if 'name' not in logit_kwargs: logit_kwargs['name'] = "logit_layer" layer_hparams = {"type": "Dense", "kwargs": logit_kwargs} self._logit_layer = layers.get_layer(hparams=layer_hparams)