コード例 #1
0
    def __init__(
        self,
        model_path,
        summarizer_args=summarizer_args,
        preprocessing_args=preprocessing_args,
        bert_config=bert_config,
    ):
        # BERT model args
        self.args = Namespace()
        self.args.__dict__ = summarizer_args

        # Preprocessing arguments
        self.pp_args = Namespace()
        self.pp_args.__dict__ = preprocessing_args

        self.bert_config = BertConfig.from_dict(bert_config)

        self.BertData = data_builder.BertData(self.pp_args)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = Summarizer(
            self.args,
            self.device,
            load_pretrained_bert=False,
            bert_config=self.bert_config,
        )
        model.load_cp(torch.load(model_path, map_location=lambda storage, loc: storage))
        # Evaluate without performing backpropagation and dropout
        for param in model.parameters():
            param.requires_grad = False
        model.eval()
        model.to(device=self.device)
        self.model = model
コード例 #2
0
def get_kobert_model(model_file, vocab_file, ctx="cpu"):
    bertmodel = BertModel(config=BertConfig.from_dict(bert_config))
    bertmodel.load_state_dict(torch.load(model_file))
    device = torch.device(ctx)
    bertmodel.to(device)
    bertmodel.eval()
    vocab_b_obj = nlp.vocab.BERTVocab.from_json(open(vocab_file, 'rt').read())
    return bertmodel, vocab_b_obj
コード例 #3
0
ファイル: pytorch_kobert.py プロジェクト: seanhtchoi/KoBERT
def get_kobert_model(model_file, vocab_file, ctx="cpu"):
    bertmodel = BertModel(config=BertConfig.from_dict(bert_config))
    bertmodel.load_state_dict(torch.load(model_file))
    device = torch.device(ctx)
    bertmodel.to(device)
    bertmodel.eval()
    vocab_b_obj = nlp.vocab.BERTVocab.from_sentencepiece(vocab_file,
                                                         padding_token='[PAD]')
    return bertmodel, vocab_b_obj
コード例 #4
0
 def __init__(self, config, num_classes, vocab=None):
     super(KobertCRF, self).__init__()
     self.bert_config = con.BERT_CONFIG
     self.bert_config['output_attentions'] = True
     self.bert = BertModel(config=BertConfig.from_dict(self.bert_config))
     self.vocab = vocab
     self.dropout = nn.Dropout(config["dropout"])
     self.position_wise_ff = nn.Linear(config["hidden_size"], num_classes)
     self.crf = CRF(num_tags=num_classes, batch_first=True)
     with open(con.NER_UTIL_PATH["token_to_index"], 'rb') as f:
         self.token_to_index = pickle.load(f)
コード例 #5
0
    def __init__(self, config, num_classes, vocab=None) -> None:
        super(KobertCRF, self).__init__()

        if vocab is None:  # pretraining model 사용
            self.bert, self.vocab = get_pytorch_kobert_model()
        else:  # finetuning model 사용
            self.bert = BertModel(config=BertConfig.from_dict(bert_config))
            self.vocab = vocab

        self.dropout = nn.Dropout(config.dropout)
        self.position_wise_ff = nn.Linear(config.hidden_size, num_classes)
        self.crf = CRF(num_tags=num_classes, batch_first=True)
コード例 #6
0
def get_kobert_model(ctx="cpu"):
    model_file = './kobert_model/pytorch_kobert_2439f391a6.params'
    vocab_file = './kobert_model/kobertvocab_f38b8a4d6d.json'
    bertmodel = BertModel(config=BertConfig.from_dict(bert_config))
    bertmodel.load_state_dict(torch.load(model_file))
    device = torch.device(ctx)
    bertmodel.to(device)
    bertmodel.eval()
    #print(vocab_file) #./kobertvocab_f38b8a4d6d.json
    vocab_b_obj = nlp.vocab.BERTVocab.from_json(
        open(vocab_file, 'rt').read())
    #print(vocab_b_obj)
    return bertmodel, vocab_b_obj
コード例 #7
0
    def __init__(self, config, num_classes, vocab=None) -> None:
        super(KobertCRFViz, self).__init__()
        # attention weight는 transformers 패키지에서만 지원됨
        from transformers import BertModel, BertConfig

        # 모델 로딩 전에 True 값으로 설정해야함
        bert_config['output_attentions'] = True
        self.bert = BertModel(config=BertConfig.from_dict(bert_config))
        self.vocab = vocab

        self.dropout = nn.Dropout(config.dropout)
        self.position_wise_ff = nn.Linear(config.hidden_size, num_classes)
        self.crf = CRF(num_tags=num_classes, batch_first=True)
コード例 #8
0
 def __init__(self, config, num_classes, vocab=None) -> None:
     super(KobertCRF, self).__init__()
     self.name = self.__class__.__name__
     
     if vocab is None:
         self.bert, self.vocab = get_kobert_model()
     else:
         self.bert = BertModel(config=BertConfig.from_dict(bert_config))
         #self.bert = get_bert_multi_model()
         self.vocab = vocab
 
     self.dropout = nn.Dropout(config.dropout)
     self.position_wise_ff = nn.Linear(config.hidden_size, num_classes)
     self.crf = CRF(num_tags=num_classes, batch_first=True)
コード例 #9
0
 def __init__(self, config, num_classes, vocab=None) -> None:
     super(KobertBiLSTMCRF, self).__init__()
     self.name = self.__class__.__name__
     self.config = config
     
     if vocab is None: # pretraining model 사용
         self.bert, self.vocab = get_kobert_model()
     else: # finetuning model 사용           
         self.bert = BertModel(config=BertConfig.from_dict(bert_config))
         self.vocab = vocab
     self._pad_id = self.vocab.token_to_idx[self.vocab.padding_token]
     self.num_layers = 1
     self.dropout = nn.Dropout(config.dropout)
     self.hidden_size = config.hidden_size
     self.bilstm = nn.LSTM(self.hidden_size, self.hidden_size // 2, num_layers=1, dropout=config.dropout, batch_first=True, bidirectional=True)
     self.position_wise_ff = nn.Linear(self.hidden_size, num_classes)
     self.crf = CRF(num_tags=num_classes, batch_first=True)
コード例 #10
0
    def __init__(self, config, num_classes, vocab=None) -> None:
        super(KobertBiGRUCRF, self).__init__()

        if vocab is None:  # pretraining model 사용
            self.bert, self.vocab = get_pytorch_kobert_model()
        else:  # finetuning model 사용
            self.bert = BertModel(config=BertConfig.from_dict(bert_config))
            self.vocab = vocab
        self._pad_id = self.vocab.token_to_idx[self.vocab.padding_token]

        self.dropout = nn.Dropout(config.dropout)
        self.bigru = nn.GRU(config.hidden_size, (config.hidden_size) // 2,
                            dropout=config.dropout,
                            batch_first=True,
                            bidirectional=True)
        self.position_wise_ff = nn.Linear(config.hidden_size, num_classes)
        self.crf = CRF(num_tags=num_classes, batch_first=True)