예제 #1
0
파일: model.py 프로젝트: Oneflow-Inc/models
    def __init__(self, config):
        super(GPT2Model, self).__init__()
        self.embed_dim = config.hidden_size

        self.wte = nn.Embedding(config.vocab_size, self.embed_dim)
        self.wpe = nn.Embedding(config.max_position_embeddings, self.embed_dim)

        self.drop = nn.Dropout(config.embd_pdrop)
        self.h = nn.ModuleList(
            [GPT2Block(config) for _ in range(config.num_hidden_layers)])
        self.ln_f = LayerNorm(self.embed_dim, eps=config.layer_norm_epsilon)
예제 #2
0
    def __init__(self, config: Callable[..., None]) -> None:
        super().__init__()
        self.token_embeddings = nn.Embedding(config.vocab_size,
                                             config.hidden_size,
                                             padding_idx=0)
        self.position_embeddings = nn.Embedding(config.max_position_embeddings,
                                                config.hidden_size)
        self.token_type_embeddings = nn.Embedding(config.type_vocab_size,
                                                  config.hidden_size)

        self.layer_norm = nn.LayerNorm(config.hidden_size, epsilon=1e-12)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
예제 #3
0
 def __init__(
     self,
     word_emb_dim,
     vocab_size,
     dim_channel,
     kernel_wins,
     dropout_rate,
     num_class,
     max_seq_len,
     training=True,
 ):
     super(textCNN, self).__init__()
     self.embed = nn.Embedding(vocab_size, word_emb_dim)
     self.convs = nn.ModuleList([
         nn.Conv2d(1, dim_channel, (w, word_emb_dim)) for w in kernel_wins
     ])
     self.maxpool = nn.ModuleList([
         nn.MaxPool2d((max_seq_len - w + 1, 1), stride=1)
         for w in kernel_wins
     ])
     # Dropout layer
     self.dropout = nn.Dropout(dropout_rate)
     self.training = training
     # FC layer
     self.fc = nn.Linear(len(kernel_wins) * dim_channel, num_class)
예제 #4
0
파일: bert.py 프로젝트: Oneflow-Inc/models
    def __init__(
        self,
        max_position_embeddings,
        hidden_size,
        nheads,
        dropout=0,
        position_embedding_type="absolute",
        is_decoder=False,
    ):
        super(BertSelfAttention, self).__init__()
        if hidden_size % nheads != 0:
            raise ValueError(
                f"The hidden size ({hidden_size}) is not a multiple of the number of attention "
                f"heads ({nheads})")

        self.num_attention_heads = nheads
        self.attention_head_size = int(hidden_size / nheads)
        self.all_head_size = self.num_attention_heads * self.attention_head_size

        self.query = nn.Linear(hidden_size, self.all_head_size)
        self.key = nn.Linear(hidden_size, self.all_head_size)
        self.value = nn.Linear(hidden_size, self.all_head_size)

        self.dropout = nn.Dropout(dropout)
        self.position_embedding_type = position_embedding_type
        if (self.position_embedding_type == "relative_key"
                or self.position_embedding_type == "relative_key_query"):
            self.max_position_embeddings = max_position_embeddings
            self.distance_embedding = nn.Embedding(
                2 * max_position_embeddings - 1, self.attention_head_size)

        self.is_decoder = is_decoder
예제 #5
0
파일: lm.py 프로젝트: Oneflow-Inc/models
    def __init__(self, params):
        super(RecurrentLanguageModel, self).__init__(params)

        self.model_type = "recurrent_lm"
        self.vocab_size = params["vocab_size"]
        self.share_embedding = params["share_embedding"]
        self.smoothing = params["smoothing"]
        self.num_layers = params["num_layers"]
        self.hidden_size = params["hidden_size"]

        self.embedding = nn.Embedding(params["vocab_size"],
                                      params["hidden_size"])
        self.rnn = nn.LSTM(
            input_size=params["hidden_size"],
            hidden_size=params["hidden_size"],
            num_layers=params["num_layers"],
            batch_first=True,
            dropout=params["dropout"],
            bidirectional=False,
        )

        self.output_project = nn.Linear(params["hidden_size"],
                                        params["vocab_size"])

        if self.share_embedding:
            assert self.embedding.weight.size(
            ) == self.output_project.weight.size()
            self.output_project.weight = self.embedding.weight

        self.crit = LabelSmoothingLoss(size=self.vocab_size,
                                       smoothing=self.smoothing,
                                       padding_idx=PAD)
예제 #6
0
    def __init__(
        self,
        vocab_size,
        d_model=256,
        n_heads=4,
        d_ff=2048,
        memory_dim=256,
        n_blocks=6,
        pos_dropout=0.0,
        slf_attn_dropout=0.0,
        src_attn_dropout=0.0,
        ffn_dropout=0.0,
        residual_dropout=0.1,
        activation="relu",
        normalize_before=True,
        concat_after=False,
        share_embedding=False,
    ):
        super(TransformerDecoder, self).__init__()

        self.decoder_type = "transformer"
        self.normalize_before = normalize_before
        self.relative_positional = False

        self.d_model = d_model

        self.embedding = nn.Embedding(vocab_size, d_model)

        self.pos_emb = PositionalEncoding(d_model, pos_dropout)

        self.blocks = nn.ModuleList(
            [
                TransformerDecoderLayer(
                    n_heads,
                    d_model,
                    d_ff,
                    memory_dim,
                    slf_attn_dropout,
                    src_attn_dropout,
                    ffn_dropout,
                    residual_dropout,
                    normalize_before=normalize_before,
                    concat_after=concat_after,
                    relative_positional=False,
                    activation=activation,
                )
                for _ in range(n_blocks)
            ]
        )

        if self.normalize_before:
            self.after_norm = nn.LayerNorm(d_model)

        self.output_layer = nn.Linear(d_model, vocab_size)

        if share_embedding:
            assert self.embedding.weight.size() == self.output_layer.weight.size()
            self.output_layer.weight = self.embedding.weight
            logger.info("Tie the weights between the embedding and output layer.")
예제 #7
0
    def __init__(
        self,
        vocab_size,
        type_vocab_size,
        max_position_embeddings,
        hidden_size,
        hidden_dropout_prob,
        seq_length,
    ):
        super().__init__()
        self.word_embeddings = nn.Embedding(vocab_size, hidden_size)
        self.position_embeddings = nn.Embedding(max_position_embeddings, hidden_size)
        self.token_type_embeddings = nn.Embedding(type_vocab_size, hidden_size)

        self.LayerNorm = nn.LayerNorm(hidden_size)
        self.dropout = nn.Dropout(hidden_dropout_prob, inplace=True)
        self.register_buffer(
            "position_ids", flow.arange(max_position_embeddings).unsqueeze(0)
        )
        self.seq_length = seq_length
예제 #8
0
파일: model.py 프로젝트: Oneflow-Inc/models
 def __init__(self, emb_sz, emb_dim, hidden_size, nfc, n_classes, num_layers=1):
     super(LSTMText, self).__init__()
     self.emb_sz = emb_sz
     self.emb_dim = emb_dim
     self.n_classes = n_classes
     self.hidden_size = hidden_size
     self.nfc = nfc
     self.num_layers = num_layers
     self.bilstm = BiLSTM(emb_dim, hidden_size, num_layers)
     self.embedding = nn.Embedding(self.emb_sz, self.emb_dim)
     self.linear = nn.Linear(hidden_size * 2 * nfc, n_classes)
     self.softmax = nn.Softmax(dim=1)
예제 #9
0
    def __init__(
        self,
        sos_id,
        eos_id,
        n_tgt_vocab,
        d_word_vec,
        n_layers,
        n_head,
        d_k,
        d_v,
        d_model,
        d_inner,
        dropout=0.1,
        tgt_emb_prj_weight_sharing=True,
        pe_maxlen=5000,
    ):
        super(Decoder, self).__init__()
        # parameters
        self.sos_id = sos_id
        self.eos_id = eos_id
        self.n_tgt_vocab = n_tgt_vocab
        self.d_word_vec = d_word_vec
        self.n_layers = n_layers
        self.n_head = n_head
        self.d_k = d_k
        self.d_v = d_v
        self.d_model = d_model
        self.d_inner = d_inner
        self.dropout = dropout
        self.tgt_emb_prj_weight_sharing = tgt_emb_prj_weight_sharing
        self.pe_maxlen = pe_maxlen

        self.tgt_word_emb = nn.Embedding(n_tgt_vocab, d_word_vec)
        self.positional_encoding = PositionalEncoding(d_model,
                                                      max_len=pe_maxlen)
        self.dropout = nn.Dropout(dropout)

        self.layer_stack = nn.ModuleList([
            DecoderLayer(d_model, d_inner, n_head, d_k, d_v, dropout=dropout)
            for _ in range(n_layers)
        ])

        self.tgt_word_prj = nn.Linear(d_model, n_tgt_vocab, bias=False)
        nn.init.xavier_normal_(self.tgt_word_prj.weight)

        if tgt_emb_prj_weight_sharing:
            # Share the weight matrix between target word embedding & the final logit dense layer
            self.tgt_word_prj.weight = self.tgt_word_emb.weight
            self.x_logit_scale = d_model**0.5
        else:
            self.x_logit_scale = 1.0
예제 #10
0
파일: bert.py 프로젝트: Oneflow-Inc/models
    def __init__(
        self,
        vocab_size,
        max_position_embeddings,
        type_vocab_size,
        hidden_size,
        layer_norm_eps=1e-5,
        dropout=0,
        pad_token_id=0,
        position_embedding_type="absolute",
    ):
        super(BertEmbeddings, self).__init__()
        self.word_embeddings = nn.Embedding(vocab_size,
                                            hidden_size,
                                            padding_idx=pad_token_id)
        self.position_embeddings = nn.Embedding(max_position_embeddings,
                                                hidden_size)
        self.token_type_embeddings = nn.Embedding(type_vocab_size, hidden_size)

        self.LayerNorm = nn.LayerNorm(hidden_size, eps=layer_norm_eps)
        self.dropout = nn.Dropout(dropout)
        # position_ids (1, len position emb) is contiguous in memory and exported when serialized
        self.position_embedding_type = position_embedding_type
        self.register_buffer(
            "position_ids",
            flow.arange(max_position_embeddings).expand((1, -1)))
        self.register_buffer(
            "token_type_ids",
            flow.zeros(
                self.position_ids.size(),
                dtype=flow.int64,
                device=self.position_ids.device,
            ),
            persistent=False,
        )

        self.padding_idx = pad_token_id
예제 #11
0
파일: lm.py 프로젝트: Oneflow-Inc/models
    def __init__(self, params):
        super(TransformerLanguageModel, self).__init__(params)

        self.model_type = "transformer_lm"
        self.normalize_before = False
        self.smoothing = params["smoothing"]
        self.vocab_size = params["vocab_size"]
        self.num_blocks = params["num_blocks"]

        self.embedding = nn.Embedding(self.vocab_size, params["d_model"])
        self.pos_embedding = PositionalEncoding(params["d_model"], 0.0)

        self.blocks = nn.ModuleList([
            TransformerEncoderLayer(
                params["n_heads"],
                params["d_model"],
                params["d_ff"],
                slf_attn_dropout=0.0,
                ffn_dropout=0.0,
                residual_dropout=params["residual_dropout"],
                normalize_before=False,
                concat_after=False,
                activation="glu",
            ) for _ in range(self.num_blocks)
        ])

        if self.normalize_before:
            self.after_norm = nn.LayerNorm(params["d_model"])

        self.output_project = nn.Linear(params["d_model"], self.vocab_size)

        if params["share_embedding"]:
            self.output_project.weight = self.embedding.weight
            print("Share the weight of embedding to the output project layer!")

        self.crit = LabelSmoothingLoss(size=self.vocab_size,
                                       smoothing=self.smoothing,
                                       padding_idx=PAD)
예제 #12
0
파일: model.py 프로젝트: Oneflow-Inc/models
 def __init__(self, vocab, d_model):
     super(Embeddings, self).__init__()
     self.lut = nn.Embedding(vocab, d_model)
     self.d_model = d_model