Esempio n. 1
0
 def __init__(self, hparams):
     super().__init__()
     self.hparams = hparams
     self.Qmodel = BertModel.from_pretrained(self.hparams.bert_path)
     self.Tmodel = TableBertModel.from_pretrained(self.hparams.tabert_path)
     self.criterion = nn.MarginRankingLoss(margin=1)
     self.avg_pooler = nn.AdaptiveAvgPool2d([1, 768])
Esempio n. 2
0
    def __init__(self, hparams):
        super(TaBERTTuner, self).__init__()
        self.hparams = hparams

        self.model = TableBertModel.from_pretrained(
            'tabert_base_k1/model.bin')  #jz

        #for multi-classification, col-encoding is (bs, 15, 768)->(bs, 768)->(bs, 500)->(bs, 15), label is (bs, 1)
        #first layer #jz
        self.l1 = nn.Linear(768, 500)
        self.l1_cat = nn.Linear(1536, 500)

        #second layer
        self.l2 = nn.Linear(500, max_len)  #jz

        #softmax
        self.sm = nn.Softmax(dim=1)

        #loss
        self.l = nn.CrossEntropyLoss()

        #attention: apply to only column_encoding. weighted sum of column_encoding.
        #weight: (bs, 15, 768)->(bs, 15, 768)->(bs, 15, 1)->(bs, 15), weight*column_encoding: (bs,1,15)*(bs,15,768)->(bs,1,768)
        self.lin_bias = nn.Linear(768, 768)
        self.att_weight = nn.Parameter(torch.rand(768, 1))
        self.sm_att = nn.Softmax(dim=1)
Esempio n. 3
0
    def __init__(self, params):
        super().__init__()
        self.data_dir = params.data_dir

        self.query_tokenizer = BertTokenizer.from_pretrained(params.bert_path)
        table_model = TableBertModel.from_pretrained(params.tabert_path)
        self.table_tokenizer = table_model.tokenizer

        self.train_batch_size = params.train_batch_size
        self.valid_batch_size = params.valid_batch_size
        if hasattr(params, 'test_batch_size'):
            self.test_batch_size = params.test_batch_size
Esempio n. 4
0
    def __init__(self, hparams):
        super(TaBERTTuner, self).__init__()
        self.hparams = hparams

        self.model = TableBertModel.from_pretrained('tabert_base_k1/model.bin') #jz
        
        #first layer #jz
        self.l1 = nn.Linear(768, 500)
        self.l1_cat = nn.Linear(1536, 500)
        
        #second layer
        self.l2 = nn.Linear(500, 2)  #jz

        #softmax
        # self.sm = nn.LogSoftmax(dim=1)
        self.sm = nn.Softmax(dim=2)

        #loss
        weight_try = torch.FloatTensor([1,0.01])
        #weight_try = torch.FloatTensor([1,0.167])
        self.l = nn.CrossEntropyLoss(ignore_index = 2,weight=weight_try) #jz: 2 is index for padding
        #self.l = nn.CrossEntropyLoss(ignore_index = 2)
        self.l = self.l.to('cuda')
Esempio n. 5
0
        input_ids.append(q["input_ids"].squeeze())
        token_type_ids.append(q["token_type_ids"].squeeze())
        attention_mask.append(q["attention_mask"].squeeze())

    query = {
        "input_ids": torch.stack(input_ids),
        "token_type_ids": torch.stack(token_type_ids),
        "attention_mask": torch.stack(attention_mask)
    }
    return query, column, caption, rel, qid, tid


if __name__ == "__main__":
    query_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    bert_model = BertModel.from_pretrained('bert-base-uncased')
    table_model = TableBertModel.from_pretrained(
        'model/tabert_base_k3/model.bin')
    table_tokenizer = table_model.tokenizer

    dataset = QueryTableDataset(
        data_dir='data/1',
        data_type='train',
        query_tokenizer=query_tokenizer,
        table_tokenizer=table_tokenizer,
        prepare=True,
    )
    dataloader = DataLoader(dataset,
                            batch_size=2,
                            collate_fn=query_table_collate_fn)

    for _ in range(1):
        for d in dataloader:
Esempio n. 6
0
from table_bert import TableBertModel

model = TableBertModel.from_pretrained(
    '/tabert/models/tabert_base_k1/model.bin', )

from table_bert import Table, Column

table = Table(id='List of countries by GDP (PPP)',
              header=[
                  Column('Nation', 'text', sample_value='United States'),
                  Column('Gross Domestic Product',
                         'real',
                         sample_value='21,439,453')
              ],
              data=[
                  ['United States', '21,439,453'],
                  ['China', '27,308,857'],
                  ['European Union', '22,774,165'],
              ]).tokenize(model.tokenizer)

# To visualize table in an IPython notebook:
# display(table.to_data_frame(), detokenize=True)

context = 'show me countries ranked by GDP'

# model takes batched, tokenized inputs
context_encoding, column_encoding, info_dict = model.encode(
    contexts=[model.tokenizer.tokenize(context)], tables=[table])

print(context_encoding.shape)
print(column_encoding.shape)
Esempio n. 7
0
from table_bert import TableBertModel
from table_bert import Table, Column

import torch

model = TableBertModel.from_pretrained(
    '/Users/mac/Desktop/syt/Deep-Learning/Repos/TaBERT/pretrained-models/tabert_base_k3/model.bin',
)

table = Table(id='List of countries by GDP (PPP)',
              header=[
                  Column('Nation', 'text', sample_value='United States'),
                  Column('Gross Domestic Product',
                         'real',
                         sample_value='21,439,453')
              ],
              data=[
                  ['United States', '21,439,453'],
                  ['China', '27,308,857'],
                  ['European Union', '22,774,165'],
              ]).tokenize(model.tokenizer)

table2 = Table(id='List of countries by GDP (PPP)',
               header=[
                   Column('Nation', 'text', sample_value='United States'),
                   Column('Gross Domestic Product',
                          'real',
                          sample_value='21,439,453'),
                   Column('Continent', 'text', sample_value='North America')
               ],
               data=[
Esempio n. 8
0
 def __init__(self, hparams, decoder,enc_hid_dim, dec_hid_dim):
     super(TaBERTTuner, self).__init__()
     self.hparams = hparams
     self.encoder = TableBertModel.from_pretrained('bert-base-uncased')
     self.hidden = nn.Linear(enc_hid_dim, dec_hid_dim)
     self.decoder = decoder
Esempio n. 9
0
 def __init__(self, hparams):
     super().__init__()
     self.hparams = hparams
     self.Qmodel = BertModel.from_pretrained(self.hparams.bert_path)
     self.Tmodel = TableBertModel.from_pretrained(self.hparams.tabert_path)
     self.norm = nn.LayerNorm(768)