def __init__(self, cfg, word_vocab_size, pos_vocab_size, dep_vocab_size):
        super().__init__()
        self.transformer = models.Transformer(cfg)

        #logits_pos
        self.fc1 = nn.Linear(cfg.dim, cfg.dim)
        self.activ1 = models.gelu
        self.norm1 = models.LayerNorm(cfg)
        self.decoder1 = nn.Linear(cfg.dim, pos_vocab_size)

        #logits_vocab
        self.fc2 = nn.Linear(cfg.dim, cfg.dim)
        self.activ2 = models.gelu
        self.norm2 = models.LayerNorm(cfg)
        self.decoder2 = nn.Linear(cfg.dim, dep_vocab_size)

        #logits_word_vocab_size
        self.fc3 = nn.Linear(cfg.dim, cfg.dim)
        self.activ3 = models.gelu
        self.norm3 = models.LayerNorm(cfg)
        embed_weight = self.transformer.embed.tok_embed.weight
        n_vocab, n_dim = embed_weight.size()
        self.decoder3 = nn.Linear(n_dim, n_vocab, bias=False)
        self.decoder3.weight = embed_weight
        self.decoder3_bias = nn.Parameter(torch.zeros(n_vocab))
    def __init__(self, cfg, word_vocab_size, pos_vocab_size, dep_vocab_size):
        super().__init__()
        self.transformer = models.Transformer2(cfg)

        #logits_pos
        self.fc1 = nn.Linear(cfg.dim, cfg.dim)
        self.activ1 = models.gelu
        self.norm1 = models.LayerNorm(cfg)
        embed_weight1 = self.transformer.embed.tag_embed.weight
        n_vocab1, n_dim1 = embed_weight1.size()
        self.decoder1 = nn.Linear(n_dim1, n_vocab1, bias=False)
        self.decoder1.weight = embed_weight1
        self.decoder1_bias = nn.Parameter(torch.zeros(n_vocab1))

        #logits_vocab
        self.fc2 = nn.Linear(cfg.dim, cfg.dim)
        self.activ2 = models.gelu
        self.norm2 = models.LayerNorm(cfg)
        embed_weight2 = self.transformer.embed.dep_embed.weight
        n_vocab2, n_dim2 = embed_weight2.size()
        self.decoder2 = nn.Linear(n_dim2, n_vocab2, bias=False)
        self.decoder2.weight = embed_weight2
        self.decoder2_bias = nn.Parameter(torch.zeros(n_vocab2))

        #logits_word_vocab_size
        self.fc3 = nn.Linear(cfg.dim, cfg.dim)
        self.activ3 = models.gelu
        self.norm3 = models.LayerNorm(cfg)
        embed_weight3 = self.transformer.embed.tok_embed.weight
        n_vocab3, n_dim3 = embed_weight3.size()
        self.decoder3 = nn.Linear(n_dim3, n_vocab3, bias=False)
        self.decoder3.weight = embed_weight3
        self.decoder3_bias = nn.Parameter(torch.zeros(n_vocab3))
    def __init__(self, cfg):
        super().__init__()
        self.transformer = models.Transformer(cfg)

        #logits_sentence_clsf
        self.fc = nn.Linear(cfg.dim, cfg.dim)
        self.activ1 = nn.Tanh()
        self.classifier = nn.Linear(cfg.dim, 2)

        #logits_paragraph_clsf
        '''
        self.fc = nn.Linear(cfg.dim, 2)
        self.activ1 = nn.Tanh()
        self.norm1 = models.LayerNorm(cfg)
        self.drop = nn.Dropout(cfg.p_drop_hidden)
        self.classifier = nn.Linear(cfg.max_len * 2, 2)
        '''

        #logits_lm
        self.linear = nn.Linear(cfg.dim, cfg.dim)
        self.activ2 = models.gelu
        self.norm2 = models.LayerNorm(cfg)
        # decoder is shared with embedding layer
        embed_weight = self.transformer.embed.tok_embed.weight
        n_vocab, n_dim = embed_weight.size()
        self.decoder = nn.Linear(n_dim, n_vocab, bias=False)
        self.decoder.weight = embed_weight
        self.decoder_bias = nn.Parameter(torch.zeros(n_vocab))

        #logits_same
        self.linear2 = nn.Linear(cfg.dim, cfg.vocab_size)
Exemple #4
0
    def __init__(self, cfg):
        super().__init__()
        self.transformer = models.Transformer(cfg)
        self.fc = nn.Linear(cfg.hidden, cfg.hidden)
        self.activ1 = nn.Tanh()
        self.linear = nn.Linear(cfg.hidden, cfg.hidden)
        self.activ2 = models.gelu
        self.norm = models.LayerNorm(cfg)
        self.classifier = nn.Linear(cfg.hidden, 2)

        # decoder is shared with embedding layer
        ## project hidden layer to embedding layer
        embed_weight2 = self.transformer.embed.tok_embed2.weight
        n_hidden, n_embedding = embed_weight2.size()
        self.decoder1 = nn.Linear(n_hidden, n_embedding, bias=False)
        self.decoder1.weight.data = embed_weight2.data.t()

        ## project embedding layer to vocabulary layer
        embed_weight1 = self.transformer.embed.tok_embed1.weight
        n_vocab, n_embedding = embed_weight1.size()
        self.decoder2 = nn.Linear(n_embedding, n_vocab, bias=False)
        self.decoder2.weight = embed_weight1

        # self.tok_embed1 = nn.Embedding(cfg.vocab_size, cfg.embedding)
        # self.tok_embed2 = nn.Linear(cfg.embedding, cfg.hidden)

        self.decoder_bias = nn.Parameter(torch.zeros(n_vocab))  # 역할이 뭐지..?
Exemple #5
0
    def __init__(self, cfg):
        super().__init__()
        self.transformer = models.Transformer(cfg)

        #logits_word_vocab_size
        self.fc3 = nn.Linear(cfg.dim, cfg.dim)
        self.activ3 = models.gelu
        self.norm3 = models.LayerNorm(cfg)
        embed_weight3 = self.transformer.embed.tok_embed.weight
        n_vocab3, n_dim3 = embed_weight3.size()
        self.decoder3 = nn.Linear(n_dim3, n_vocab3, bias=False)
        self.decoder3.weight = embed_weight3
        self.decoder3_bias = nn.Parameter(torch.zeros(n_vocab3))
    def __init__(self, cfg):
        super().__init__()
        self.transformer = models.Transformer(cfg)
        self.fc = nn.Linear(cfg.hidden, cfg.hidden)
        self.activ1 = nn.Tanh()
        self.linear = nn.Linear(cfg.hidden, cfg.hidden)
        self.activ2 = models.gelu
        self.norm = models.LayerNorm(cfg)
        self.classifier = nn.Linear(cfg.hidden, 2)

        # decoder is shared with embedding layer
        ## project hidden layer to embedding layer
        self.discriminator = nn.Linear(cfg.hidden, 1, bias=False)
Exemple #7
0
 def __init__(self, cfg):
     super().__init__()
     self.transformer = models.Transformer(cfg)
     self.fc = nn.Linear(cfg.dim, cfg.dim)
     self.activ1 = nn.Tanh()
     self.linear = nn.Linear(cfg.dim, cfg.dim)
     self.activ2 = models.gelu
     self.norm = models.LayerNorm(cfg)
     self.classifier = nn.Linear(cfg.dim, 2)
     # decoder is shared with embedding layer
     embed_weight = self.transformer.embed.tok_embed.weight
     n_vocab, n_dim = embed_weight.size()
     self.decoder = nn.Linear(n_dim, n_vocab, bias=False)
     self.decoder.weight = embed_weight
     self.decoder_bias = nn.Parameter(torch.zeros(n_vocab))