def __init__(self, hparams=None): ClassifierBase.__init__(self, hparams) with tf.variable_scope(self.variable_scope): encoder_hparams = utils.dict_fetch( hparams, Conv1DEncoder.default_hparams()) self._encoder = Conv1DEncoder(hparams=encoder_hparams) # Add an additional dense layer if needed self._num_classes = self._hparams.num_classes if self._num_classes > 0: if self._hparams.num_dense_layers <= 0: self._encoder.append_layer({"type": "Flatten"}) logit_kwargs = self._hparams.logit_layer_kwargs if logit_kwargs is None: logit_kwargs = {} elif not isinstance(logit_kwargs, HParams): raise ValueError( "hparams['logit_layer_kwargs'] must be a dict.") else: logit_kwargs = logit_kwargs.todict() logit_kwargs.update({"units": self._num_classes}) if 'name' not in logit_kwargs: logit_kwargs['name'] = "logit_layer" self._encoder.append_layer( {"type": "Dense", "kwargs": logit_kwargs})
def __init__(self, pretrained_model_name=None, cache_dir=None, hparams=None): super(XLNetClassifier, self).__init__(hparams=hparams) with tf.variable_scope(self.variable_scope): tf.get_variable_scope().set_initializer( get_initializer(self._hparams.initializer)) # Creates the underlying encoder encoder_hparams = dict_fetch(hparams, XLNetEncoder.default_hparams()) if encoder_hparams is not None: encoder_hparams['name'] = "encoder" self._encoder = XLNetEncoder( pretrained_model_name=pretrained_model_name, cache_dir=cache_dir, hparams=encoder_hparams) if self._hparams.use_projection: self.projection = get_layer( hparams={ "type": "Dense", "kwargs": { "units": self._encoder.output_size } }) # Creates an dropout layer drop_kwargs = {"rate": self._hparams.dropout} layer_hparams = {"type": "Dropout", "kwargs": drop_kwargs} self._dropout_layer = get_layer(hparams=layer_hparams) # Creates an additional classification layer if needed self._num_classes = self._hparams.num_classes if self._num_classes <= 0: self._logit_layer = None else: logit_kwargs = self._hparams.logit_layer_kwargs if logit_kwargs is None: logit_kwargs = {} elif not isinstance(logit_kwargs, HParams): raise ValueError( "hparams['logit_layer_kwargs'] must be a dict.") else: logit_kwargs = logit_kwargs.todict() logit_kwargs.update({"units": self._num_classes}) if 'name' not in logit_kwargs: logit_kwargs['name'] = "logit_layer" layer_hparams = {"type": "Dense", "kwargs": logit_kwargs} self._logit_layer = get_layer(hparams=layer_hparams)
def __init__(self, pretrained_model_name=None, cache_dir=None, hparams=None): super().__init__(hparams=hparams) with tf.variable_scope(self.variable_scope): encoder_hparams = dict_fetch(hparams, GPT2Encoder.default_hparams()) if encoder_hparams is not None: encoder_hparams['name'] = None self._encoder = GPT2Encoder( pretrained_model_name=pretrained_model_name, cache_dir=cache_dir, hparams=encoder_hparams) # Creates an dropout layer drop_kwargs = {"rate": self._hparams.dropout} layer_hparams = {"type": "Dropout", "kwargs": drop_kwargs} self._dropout_layer = get_layer(hparams=layer_hparams) # Creates an additional classification layer if needed self._num_classes = self._hparams.num_classes if self._num_classes <= 0: self._logit_layer = None else: logit_kwargs = self._hparams.logit_layer_kwargs if logit_kwargs is None: logit_kwargs = {} elif not isinstance(logit_kwargs, HParams): raise ValueError( "hparams['logit_layer_kwargs'] must be a dict.") else: logit_kwargs = logit_kwargs.todict() logit_kwargs.update({"units": self._num_classes}) if 'name' not in logit_kwargs: logit_kwargs['name'] = "logit_layer" layer_hparams = {"type": "Dense", "kwargs": logit_kwargs} self._logit_layer = get_layer(hparams=layer_hparams)
def __init__(self, cell=None, cell_dropout_mode=None, output_layer=None, hparams=None): ClassifierBase.__init__(self, hparams) with tf.variable_scope(self.variable_scope): # Creates the underlying encoder encoder_hparams = utils.dict_fetch( hparams, UnidirectionalRNNEncoder.default_hparams()) if encoder_hparams is not None: encoder_hparams['name'] = None self._encoder = UnidirectionalRNNEncoder( cell=cell, cell_dropout_mode=cell_dropout_mode, output_layer=output_layer, hparams=encoder_hparams) # Creates an additional classification layer if needed self._num_classes = self._hparams.num_classes if self._num_classes <= 0: self._logit_layer = None else: logit_kwargs = self._hparams.logit_layer_kwargs if logit_kwargs is None: logit_kwargs = {} elif not isinstance(logit_kwargs, HParams): raise ValueError( "hparams['logit_layer_kwargs'] must be a dict.") else: logit_kwargs = logit_kwargs.todict() logit_kwargs.update({"units": self._num_classes}) if 'name' not in logit_kwargs: logit_kwargs['name'] = "logit_layer" layer_hparams = {"type": "Dense", "kwargs": logit_kwargs} self._logit_layer = layers.get_layer(hparams=layer_hparams)
def __init__(self, pretrained_model_name=None, cache_dir=None, hparams=None): super(XLNetEncoder, self).__init__(hparams=hparams) self.load_pretrained_config(pretrained_model_name, cache_dir) num_layers = self._hparams.num_layers use_segments = self._hparams.use_segments untie_r = self._hparams.untie_r with tf.variable_scope(self.variable_scope): if untie_r: self.r_w_bias = tf.get_variable('r_w_bias', [ num_layers, self._hparams.num_heads, self._hparams.head_dim ], dtype=tf.float32) self.r_r_bias = tf.get_variable('r_r_bias', [ num_layers, self._hparams.num_heads, self._hparams.head_dim ], dtype=tf.float32) else: self.r_w_bias = tf.get_variable( 'r_w_bias', [self._hparams.num_heads, self._hparams.head_dim], dtype=tf.float32) self.r_r_bias = tf.get_variable( 'r_r_bias', [self._hparams.num_heads, self._hparams.head_dim], dtype=tf.float32) if use_segments: self.segment_embed = tf.get_variable('seg_embed', [ num_layers, 2, self._hparams.num_heads, self._hparams.head_dim ], dtype=tf.float32) self.r_s_bias = (tf.get_variable( 'r_s_bias', [ num_layers, self._hparams.num_heads, self._hparams.head_dim ], dtype=tf.float32) if untie_r else tf.get_variable( 'r_s_bias', [self._hparams.num_heads, self._hparams.head_dim], dtype=tf.float32)) else: self.segment_embed = None self.r_s_bias = None # Word embedding self.word_embedder = WordEmbedder( vocab_size=self._hparams.vocab_size, hparams={"dim": self._hparams.hidden_dim}) # Position embedding self.pos_embed = RelativePositionalEncoding( hparams={ "dim": self._hparams.hidden_dim, "max_seq_len": self._hparams.max_seq_len }) self.attn_layers = [] self.ff_layers = [] rel_attn_hparams = dict_fetch( self._hparams, RelativeMutiheadAttention.default_hparams()) rel_attn_hparams["name"] = "rel_attn" ff_hparams = dict_fetch(self._hparams, PositionWiseFF.default_hparams()) ff_hparams["name"] = "ff" for i in range(num_layers): with tf.variable_scope("layer_{}".format(i)): if self._hparams.untie_r: if use_segments: self.attn_layers.append( RelativeMutiheadAttention( self.r_r_bias[i], self.r_w_bias[i], self.r_s_bias[i], self.segment_embed[i], hparams=rel_attn_hparams)) else: self.attn_layers.append( RelativeMutiheadAttention( self.r_r_bias[i], self.r_w_bias[i], hparams=rel_attn_hparams)) else: if use_segments: self.attn_layers.append( RelativeMutiheadAttention( self.r_r_bias, self.r_w_bias, self.r_s_bias, self.segment_embed[i], hparams=rel_attn_hparams)) else: self.attn_layers.append( RelativeMutiheadAttention( self.r_r_bias, self.r_w_bias, hparams=rel_attn_hparams)) self.ff_layers.append(PositionWiseFF(hparams=ff_hparams)) dropout_hparams = { "type": "Dropout", "kwargs": { "rate": self._hparams.dropout } } self.dropout = get_layer(hparams=dropout_hparams) self.mask_embed = tf.get_variable('mask_emb', [1, 1, self.hparams.hidden_dim], dtype=tf.float32)