def __init__(self, config, bpemb, vocab, relation_vocab): super(GraphTransformer, self).__init__() self.config = config self.embedding = bpemb self.vocab = vocab self.relation_vocab = relation_vocab self.n_layer = getattr(config, "g_num_layer", 4) self.use_pe = getattr(config, "g_pe", True) assert vocab is not None self.vocab_inv = {v: k for k, v in vocab.items()} assert relation_vocab is not None self.relation_embedding = nn.Embedding(len(self.relation_vocab), config.d_relation) assert config.d_relation * config.g_n_head == config.d_enc_concept self.position_encoder = PositionalEncoding(config.d_enc_concept) # encoder_layer = nn.TransformerEncoderLayer( # config.d_model, config.n_head, dim_feedforward=1024, dropout=config.dropout # ) encoder_layer = TransformerEncoderLayer( d_model=config.d_enc_concept, heads=config.g_n_head, d_ff=getattr(config, "g_d_ff", 1024), dropout=config.dropout, att_drop=config.dropout, use_structure=True, ) encoder_norm = nn.LayerNorm(config.d_enc_concept) self.encoder = TransformerEncoder(encoder_layer, self.n_layer, encoder_norm)
def __init__(self, config, bpemb, vocab): super(SentTransformer, self).__init__() self.config = config self.embedding = bpemb self.vocab = vocab assert vocab is not None self.vocab_inv = {v: k for k, v in vocab.items()} self.position_encoder = PositionalEncoding(config.d_enc_sent) # encoder_layer = nn.TransformerEncoderLayer( # config.d_enc_sent, config.n_head, dim_feedforward=1024, dropout=config.dropout # ) encoder_layer = TransformerEncoderLayer( d_model=config.d_enc_sent, heads=config.n_head, d_ff=getattr(config, "d_ff", 1024), dropout=config.dropout, att_drop=config.dropout, use_structure=False, ) encoder_norm = nn.LayerNorm(config.d_enc_sent) self.encoder = TransformerEncoder(encoder_layer, config.num_layer, encoder_norm) if vocab is not None: self.vocab_size = len(self.vocab) self.BOS = self.vocab["<bos>"] self.EOS = self.vocab["<eos>"] else: self.vocab_size = self.bpemb.vectors.shape[0] self.BOS = self.bpemb.BOS self.EOS = self.bpemb.EOS
def __init__(self, config, relation_vocab): super(AdapterGraphTransformer, self).__init__() self.config = config self.relation_vocab = relation_vocab self.n_layer = getattr(config, "adapter_layer", 2) self.use_pe = getattr(config, "adapter_pe", True) assert relation_vocab is not None d_relation = config.d_enc_sent // config.n_head self.relation_embedding = nn.Embedding(len(self.relation_vocab), d_relation) self.position_encoder = PositionalEncoding(config.d_enc_sent) encoder_layer = TransformerEncoderLayer( d_model=config.d_enc_sent, heads=config.n_head, d_ff=getattr(config, "g_d_ff", 1024), dropout=config.dropout, att_drop=config.dropout, use_structure=True, ) encoder_norm = nn.LayerNorm(config.d_enc_sent) self.encoder = TransformerEncoder(encoder_layer, self.n_layer, encoder_norm)
def __init__(self, config, relation_vocab): super(GraphTransformer, self).__init__() self.config = config self.relation_vocab = relation_vocab self.n_layer = getattr(config, "g_num_layer", 2) self.use_pe = getattr(config, "g_pe", True) self.d_relation = getattr(config, "d_relation", 64) self.d_concept = getattr(config, "d_concept", 768) self.g_n_head = self.d_concept // self.d_relation self.dropout = getattr(config, "dropout", 0.1) assert relation_vocab is not None self.relation_embedding = nn.Embedding(len(self.relation_vocab), self.d_relation) assert self.d_relation * self.g_n_head == self.d_concept self.position_encoder = PositionalEncoding(self.d_concept) encoder_layer = TransformerEncoderLayer( d_model=self.d_concept, heads=self.g_n_head, d_ff=getattr(config, "g_d_ff", 1024), dropout=self.dropout, att_drop=self.dropout, use_structure=True, ) encoder_norm = nn.LayerNorm(self.d_concept) self.encoder = TransformerEncoder(encoder_layer, self.n_layer, encoder_norm)
def __init__(self, config, word_emb, con_emb, word_vocab, concept_vocab, relation_vocab): super(DualTransformer, self).__init__() self.config = config self.word_vocab = word_vocab self.concept_vocab = concept_vocab self.relation_vocab = relation_vocab self.enc_word_embedding = self.build_embedding(word_emb, word_vocab, self.config.d_enc_sent) self.word_encoder = SentTransformer(config, self.enc_word_embedding, word_vocab) if config.dual_enc and self.concept_vocab is not None and relation_vocab is not None: if config.share_con_vocab: self.enc_concept_embedding = self.enc_word_embedding else: self.enc_concept_embedding = self.build_embedding( con_emb, concept_vocab, self.config.d_enc_concept) self.graph_encoder = GraphTransformer(config, self.enc_concept_embedding, concept_vocab, relation_vocab) else: self.graph_encoder = None self.dec_word_embedding = self.enc_word_embedding self.position_encoder = PositionalEncoding(config.d_dec) dual_mode = getattr(config, "dual_mode", "cat") if config.dual_enc: if dual_mode == "cat": decoder_layer = DoubleAttnTransformerDecoderLayer( d_model=config.d_dec, d_sent=config.d_enc_sent, d_con=config.d_enc_concept, heads=config.n_head, d_ff=1024, dropout=config.dropout, att_drop=config.dropout, dual_enc=config. dual_enc, # dual_enc=False when use single sentence encoder ) elif dual_mode == "graph_first": decoder_layer = DoubleAttnTransformerDecoderLayerGraphFirst( d_model=config.d_dec, d_enc=config.d_model + config.d_concept if config.dual_enc else config.d_model, heads=config.n_head, d_ff=1024, dropout=config.dropout, att_drop=config.dropout, dual_enc=config. dual_enc, # dual_enc=False when use single sentence encoder ) elif dual_mode == "sent_first": decoder_layer = DoubleAttnTransformerDecoderLayerSentFirst( d_model=config.d_dec, d_enc=config.d_model + config.d_concept if config.dual_enc else config.d_model, heads=config.n_head, d_ff=1024, dropout=config.dropout, att_drop=config.dropout, dual_enc=config. dual_enc, # dual_enc=False when use single sentence encoder ) else: print( 'Invalid dual_mode, should in (cat, graph_first, sent_first)' ) else: decoder_layer = DoubleAttnTransformerDecoderLayer( d_model=config.d_dec, d_sent=config.d_enc_sent, d_con=config.d_enc_concept, heads=config.n_head, d_ff=1024, dropout=config.dropout, att_drop=config.dropout, dual_enc=config. dual_enc, # dual_enc=False when use single sentence encoder ) decoder_norm = nn.LayerNorm(config.d_dec) self.decoder = DoubleAttnTransformerDecoder(decoder_layer, config.num_layer, decoder_norm) if word_vocab is not None: self.word_vocab_size = len(self.word_vocab) self.BOS = self.word_vocab["<bos>"] self.EOS = self.word_vocab["<eos>"] self.projector = nn.Linear(config.d_dec, self.word_vocab_size) if self.config.share_vocab: # existing bugs to be fixed self.projector.weight = self.dec_word_embedding.weight if self.config.use_kl_loss: self.kl = nn.KLDivLoss(size_average=False) if self.config.rl_ratio > 0.0 and self.config.rl_type == "bertscore": self.rl_metric = nlp.load_metric("bertscore")