def __init__(self, char_embed, num_classes, bigram_embed=None, trigram_embed=None, num_layers=1, hidden_size=100, dropout=0.5, target_vocab=None, encoding_type=None): super().__init__() self.char_embed = get_embeddings(char_embed) embed_size = self.char_embed.embedding_dim if bigram_embed: self.bigram_embed = get_embeddings(bigram_embed) embed_size += self.bigram_embed.embedding_dim if trigram_embed: self.trigram_ebmbed = get_embeddings(trigram_embed) embed_size += self.bigram_embed.embedding_dim if num_layers > 1: self.lstm = LSTM(embed_size, num_layers=num_layers, hidden_size=hidden_size // 2, bidirectional=True, batch_first=True, dropout=dropout) else: self.lstm = LSTM(embed_size, num_layers=num_layers, hidden_size=hidden_size // 2, bidirectional=True, batch_first=True) self.dropout = nn.Dropout(dropout) self.fc = nn.Linear(hidden_size, num_classes) trans = None if target_vocab is not None and encoding_type is not None: trans = allowed_transitions(target_vocab.idx2word, encoding_type=encoding_type, include_start_end=True) self.crf = ConditionalRandomField(num_classes, include_start_end_trans=True, allowed_transitions=trans)
def __init__(self, init_embed, num_cls): super(HANCLS, self).__init__() self.embed = get_embeddings(init_embed) self.han = HAN(input_size=300, output_size=num_cls, word_hidden_size=50, word_num_layers=1, word_context_size=100, sent_hidden_size=50, sent_num_layers=1, sent_context_size=100 )
def __init__(self, init_embed, num_classes, hidden_dim=256, num_layers=1, attention_unit=256, attention_hops=1, nfc=128): super(BiLSTM_SELF_ATTENTION,self).__init__() self.embed = get_embeddings(init_embed) self.lstm = LSTM(input_size=self.embed.embedding_dim, hidden_size=hidden_dim, num_layers=num_layers, bidirectional=True) self.attention = SelfAttention(input_size=hidden_dim * 2 , attention_unit=attention_unit, attention_hops=attention_hops) self.mlp = MLP(size_layer=[hidden_dim* 2*attention_hops, nfc, num_classes])
def __init__(self, init_embed, num_classes, hidden_dim=256, num_layers=1, nfc=128): super(BiLSTMSentiment, self).__init__() self.embed = get_embeddings(init_embed) self.lstm = LSTM(input_size=self.embed.embedding_dim, hidden_size=hidden_dim, num_layers=num_layers, bidirectional=True) self.mlp = MLP(size_layer=[hidden_dim * 2, nfc, num_classes])
def __init__(self, init_embed, out_dim=300, kernel_sizes=None): super().__init__() if kernel_sizes is None: kernel_sizes = [5, 9] assert isinstance(kernel_sizes, list), 'kernel_sizes should be List(int)' self.embed = get_embeddings(init_embed) try: embed_dim = self.embed.embedding_dim except Exception: embed_dim = self.embed.embed_size self.region_embeds = nn.ModuleList() for ksz in kernel_sizes: self.region_embeds.append( nn.Sequential( nn.Conv1d(embed_dim, embed_dim, ksz, padding=ksz // 2), )) self.linears = nn.ModuleList([ nn.Conv1d(embed_dim, out_dim, 1) for _ in range(len(kernel_sizes)) ]) self.embedding_dim = embed_dim
def __init__(self, vocab: Vocabulary, embed_size: int = 30, char_emb_size: int = 30, word_dropout: float = 0, dropout: float = 0, pool_method: str = 'max', activation='relu', min_char_freq: int = 2, requires_grad=True, include_word_start_end=True, char_attn_type='adatrans', char_n_head=3, char_dim_ffn=60, char_scale=False, char_pos_embed=None, char_dropout=0.15, char_after_norm=False): """ :param vocab: 词表 :param embed_size: TransformerCharEmbed的输出维度。默认值为50. :param char_emb_size: character的embedding的维度。默认值为50. 同时也是Transformer的d_model大小 :param float word_dropout: 以多大的概率将一个词替换为unk。这样既可以训练unk也是一定的regularize。 :param dropout: 以多大概率drop character embedding的输出以及最终的word的输出。 :param pool_method: 支持'max', 'avg'。 :param activation: 激活函数,支持'relu', 'sigmoid', 'tanh', 或者自定义函数. :param min_char_freq: character的最小出现次数。默认值为2. :param requires_grad: :param include_word_start_end: 是否使用特殊的tag标记word的开始与结束 :param char_attn_type: adatrans or naive. :param char_n_head: 多少个head :param char_dim_ffn: transformer中ffn中间层的大小 :param char_scale: 是否使用scale :param char_pos_embed: None, 'fix', 'sin'. What kind of position embedding. When char_attn_type=relative, None is ok :param char_dropout: Dropout in Transformer encoder :param char_after_norm: the normalization place. """ super(TransformerCharEmbed, self).__init__(vocab, word_dropout=word_dropout, dropout=dropout) assert char_emb_size % char_n_head == 0, "d_model should divide n_head." assert pool_method in ('max', 'avg') self.pool_method = pool_method # activation function if isinstance(activation, str): if activation.lower() == 'relu': self.activation = F.relu elif activation.lower() == 'sigmoid': self.activation = F.sigmoid elif activation.lower() == 'tanh': self.activation = F.tanh elif activation is None: self.activation = lambda x: x elif callable(activation): self.activation = activation else: raise Exception( "Undefined activation function: choose from: [relu, tanh, sigmoid, or a callable function]" ) logger.info("Start constructing character vocabulary.") # 建立char的词表 self.char_vocab = _construct_char_vocab_from_vocab( vocab, min_freq=min_char_freq, include_word_start_end=include_word_start_end) self.char_pad_index = self.char_vocab.padding_idx logger.info( f"In total, there are {len(self.char_vocab)} distinct characters.") # 对vocab进行index max_word_len = max(map(lambda x: len(x[0]), vocab)) if include_word_start_end: max_word_len += 2 self.register_buffer( 'words_to_chars_embedding', torch.full((len(vocab), max_word_len), fill_value=self.char_pad_index, dtype=torch.long)) self.register_buffer('word_lengths', torch.zeros(len(vocab)).long()) for word, index in vocab: # if index!=vocab.padding_idx: # 如果是pad的话,直接就为pad_value了. 修改为不区分pad与否 if include_word_start_end: word = ['<bow>'] + list(word) + ['<eow>'] self.words_to_chars_embedding[index, :len(word)] = \ torch.LongTensor([self.char_vocab.to_index(c) for c in word]) self.word_lengths[index] = len(word) self.char_embedding = get_embeddings( (len(self.char_vocab), char_emb_size)) self.transformer = TransformerEncoder(1, char_emb_size, char_n_head, char_dim_ffn, dropout=char_dropout, after_norm=char_after_norm, attn_type=char_attn_type, pos_embed=char_pos_embed, scale=char_scale) self.fc = nn.Linear(char_emb_size, embed_size) self._embed_size = embed_size self.requires_grad = requires_grad
def __init__(self, vocab: Vocabulary, embed_size: int = 30, char_emb_size: int = 30, word_dropout: float = 0, dropout: float = 0, pool_method: str = 'max', activation='relu', min_char_freq: int = 2, requires_grad=True, include_word_start_end=True, char_attn_type='adatrans', char_n_head=3, char_dim_ffn=60, char_scale=False, char_pos_embed=None, char_dropout=0.15, char_after_norm=False): super(TransformerCharEmbed, self).__init__(vocab, word_dropout=word_dropout, dropout=dropout) assert char_emb_size % char_n_head == 0, "d_model should divide n_head." assert pool_method in ('max', 'avg') self.pool_method = pool_method # activation function if isinstance(activation, str): if activation.lower() == 'relu': self.activation = F.relu elif activation.lower() == 'sigmoid': self.activation = F.sigmoid elif activation.lower() == 'tanh': self.activation = F.tanh elif activation is None: self.activation = lambda x: x elif callable(activation): self.activation = activation else: raise Exception( "Undefined activation function: choose from: [relu, tanh, sigmoid, or a callable function]" ) logger.info("Start constructing character vocabulary.") self.char_vocab = _construct_char_vocab_from_vocab( vocab, min_freq=min_char_freq, include_word_start_end=include_word_start_end) self.char_pad_index = self.char_vocab.padding_idx logger.info( f"In total, there are {len(self.char_vocab)} distinct characters.") max_word_len = max(map(lambda x: len(x[0]), vocab)) if include_word_start_end: max_word_len += 2 self.register_buffer( 'words_to_chars_embedding', torch.full((len(vocab), max_word_len), fill_value=self.char_pad_index, dtype=torch.long)) self.register_buffer('word_lengths', torch.zeros(len(vocab)).long()) for word, index in vocab: if include_word_start_end: word = ['<bow>'] + list(word) + ['<eow>'] self.words_to_chars_embedding[index, :len(word)] = \ torch.LongTensor([self.char_vocab.to_index(c) for c in word]) self.word_lengths[index] = len(word) self.char_embedding = get_embeddings( (len(self.char_vocab), char_emb_size)) self.transformer = TransformerEncoder(1, char_emb_size, char_n_head, char_dim_ffn, dropout=char_dropout, after_norm=char_after_norm, attn_type=char_attn_type, pos_embed=char_pos_embed, scale=char_scale) self.fc = nn.Linear(char_emb_size, embed_size) self._embed_size = embed_size self.requires_grad = requires_grad