Example #1
0
    def __init__(self, config, bpemb, vocab, relation_vocab):
        super(GraphTransformer, self).__init__()

        self.config = config
        self.embedding = bpemb
        self.vocab = vocab
        self.relation_vocab = relation_vocab
        self.n_layer = getattr(config, "g_num_layer", 4)
        self.use_pe = getattr(config, "g_pe", True)

        assert vocab is not None
        self.vocab_inv = {v: k for k, v in vocab.items()}
        assert relation_vocab is not None
        self.relation_embedding = nn.Embedding(len(self.relation_vocab),
                                               config.d_relation)
        assert config.d_relation * config.g_n_head == config.d_enc_concept

        self.position_encoder = PositionalEncoding(config.d_enc_concept)

        # encoder_layer = nn.TransformerEncoderLayer(
        #     config.d_model, config.n_head, dim_feedforward=1024, dropout=config.dropout
        # )
        encoder_layer = TransformerEncoderLayer(
            d_model=config.d_enc_concept,
            heads=config.g_n_head,
            d_ff=getattr(config, "g_d_ff", 1024),
            dropout=config.dropout,
            att_drop=config.dropout,
            use_structure=True,
        )
        encoder_norm = nn.LayerNorm(config.d_enc_concept)
        self.encoder = TransformerEncoder(encoder_layer, self.n_layer,
                                          encoder_norm)
Example #2
0
    def __init__(self, config, bpemb, vocab):
        super(SentTransformer, self).__init__()

        self.config = config
        self.embedding = bpemb
        self.vocab = vocab

        assert vocab is not None
        self.vocab_inv = {v: k for k, v in vocab.items()}
        self.position_encoder = PositionalEncoding(config.d_enc_sent)
        # encoder_layer = nn.TransformerEncoderLayer(
        #     config.d_enc_sent, config.n_head, dim_feedforward=1024, dropout=config.dropout
        # )
        encoder_layer = TransformerEncoderLayer(
            d_model=config.d_enc_sent,
            heads=config.n_head,
            d_ff=getattr(config, "d_ff", 1024),
            dropout=config.dropout,
            att_drop=config.dropout,
            use_structure=False,
        )
        encoder_norm = nn.LayerNorm(config.d_enc_sent)
        self.encoder = TransformerEncoder(encoder_layer, config.num_layer,
                                          encoder_norm)

        if vocab is not None:
            self.vocab_size = len(self.vocab)
            self.BOS = self.vocab["<bos>"]
            self.EOS = self.vocab["<eos>"]
        else:
            self.vocab_size = self.bpemb.vectors.shape[0]
            self.BOS = self.bpemb.BOS
            self.EOS = self.bpemb.EOS
Example #3
0
    def __init__(self, config, relation_vocab):
        super(AdapterGraphTransformer, self).__init__()

        self.config = config
        self.relation_vocab = relation_vocab
        self.n_layer = getattr(config, "adapter_layer", 2)
        self.use_pe = getattr(config, "adapter_pe", True)

        assert relation_vocab is not None
        d_relation = config.d_enc_sent // config.n_head
        self.relation_embedding = nn.Embedding(len(self.relation_vocab),
                                               d_relation)
        self.position_encoder = PositionalEncoding(config.d_enc_sent)

        encoder_layer = TransformerEncoderLayer(
            d_model=config.d_enc_sent,
            heads=config.n_head,
            d_ff=getattr(config, "g_d_ff", 1024),
            dropout=config.dropout,
            att_drop=config.dropout,
            use_structure=True,
        )
        encoder_norm = nn.LayerNorm(config.d_enc_sent)
        self.encoder = TransformerEncoder(encoder_layer, self.n_layer,
                                          encoder_norm)
Example #4
0
    def __init__(self, config, relation_vocab):
        super(GraphTransformer, self).__init__()

        self.config = config
        self.relation_vocab = relation_vocab
        self.n_layer = getattr(config, "g_num_layer", 2)
        self.use_pe = getattr(config, "g_pe", True)
        self.d_relation = getattr(config, "d_relation", 64)
        self.d_concept = getattr(config, "d_concept", 768)
        self.g_n_head = self.d_concept // self.d_relation
        self.dropout = getattr(config, "dropout", 0.1)
        assert relation_vocab is not None
        self.relation_embedding = nn.Embedding(len(self.relation_vocab),
                                               self.d_relation)
        assert self.d_relation * self.g_n_head == self.d_concept

        self.position_encoder = PositionalEncoding(self.d_concept)
        encoder_layer = TransformerEncoderLayer(
            d_model=self.d_concept,
            heads=self.g_n_head,
            d_ff=getattr(config, "g_d_ff", 1024),
            dropout=self.dropout,
            att_drop=self.dropout,
            use_structure=True,
        )
        encoder_norm = nn.LayerNorm(self.d_concept)
        self.encoder = TransformerEncoder(encoder_layer, self.n_layer,
                                          encoder_norm)
Example #5
0
    def __init__(self, config, word_emb, con_emb, word_vocab, concept_vocab,
                 relation_vocab):
        super(DualTransformer, self).__init__()

        self.config = config
        self.word_vocab = word_vocab
        self.concept_vocab = concept_vocab
        self.relation_vocab = relation_vocab

        self.enc_word_embedding = self.build_embedding(word_emb, word_vocab,
                                                       self.config.d_enc_sent)
        self.word_encoder = SentTransformer(config, self.enc_word_embedding,
                                            word_vocab)
        if config.dual_enc and self.concept_vocab is not None and relation_vocab is not None:
            if config.share_con_vocab:
                self.enc_concept_embedding = self.enc_word_embedding
            else:
                self.enc_concept_embedding = self.build_embedding(
                    con_emb, concept_vocab, self.config.d_enc_concept)

            self.graph_encoder = GraphTransformer(config,
                                                  self.enc_concept_embedding,
                                                  concept_vocab,
                                                  relation_vocab)
        else:
            self.graph_encoder = None
        self.dec_word_embedding = self.enc_word_embedding
        self.position_encoder = PositionalEncoding(config.d_dec)
        dual_mode = getattr(config, "dual_mode", "cat")
        if config.dual_enc:
            if dual_mode == "cat":
                decoder_layer = DoubleAttnTransformerDecoderLayer(
                    d_model=config.d_dec,
                    d_sent=config.d_enc_sent,
                    d_con=config.d_enc_concept,
                    heads=config.n_head,
                    d_ff=1024,
                    dropout=config.dropout,
                    att_drop=config.dropout,
                    dual_enc=config.
                    dual_enc,  # dual_enc=False when use single sentence encoder
                )
            elif dual_mode == "graph_first":
                decoder_layer = DoubleAttnTransformerDecoderLayerGraphFirst(
                    d_model=config.d_dec,
                    d_enc=config.d_model +
                    config.d_concept if config.dual_enc else config.d_model,
                    heads=config.n_head,
                    d_ff=1024,
                    dropout=config.dropout,
                    att_drop=config.dropout,
                    dual_enc=config.
                    dual_enc,  # dual_enc=False when use single sentence encoder
                )
            elif dual_mode == "sent_first":
                decoder_layer = DoubleAttnTransformerDecoderLayerSentFirst(
                    d_model=config.d_dec,
                    d_enc=config.d_model +
                    config.d_concept if config.dual_enc else config.d_model,
                    heads=config.n_head,
                    d_ff=1024,
                    dropout=config.dropout,
                    att_drop=config.dropout,
                    dual_enc=config.
                    dual_enc,  # dual_enc=False when use single sentence encoder
                )
            else:
                print(
                    'Invalid dual_mode, should in (cat, graph_first, sent_first)'
                )
        else:
            decoder_layer = DoubleAttnTransformerDecoderLayer(
                d_model=config.d_dec,
                d_sent=config.d_enc_sent,
                d_con=config.d_enc_concept,
                heads=config.n_head,
                d_ff=1024,
                dropout=config.dropout,
                att_drop=config.dropout,
                dual_enc=config.
                dual_enc,  # dual_enc=False when use single sentence encoder
            )
        decoder_norm = nn.LayerNorm(config.d_dec)
        self.decoder = DoubleAttnTransformerDecoder(decoder_layer,
                                                    config.num_layer,
                                                    decoder_norm)

        if word_vocab is not None:
            self.word_vocab_size = len(self.word_vocab)
            self.BOS = self.word_vocab["<bos>"]
            self.EOS = self.word_vocab["<eos>"]

        self.projector = nn.Linear(config.d_dec, self.word_vocab_size)
        if self.config.share_vocab:  # existing bugs to be fixed
            self.projector.weight = self.dec_word_embedding.weight
        if self.config.use_kl_loss:
            self.kl = nn.KLDivLoss(size_average=False)

        if self.config.rl_ratio > 0.0 and self.config.rl_type == "bertscore":
            self.rl_metric = nlp.load_metric("bertscore")