Beispiel #1
0
 def __init__(self,
              my_device=torch.device('cuda:2'),
              model_name='roberta.hdf5',
              model_path=current_directory_path +
              '/external_pretrained_models/'):
     self.answ = "UNKNOWN ERROR"
     self.model_name = model_name
     self.model_path = model_path
     self.first_object = ''
     self.second_object = ''
     self.predicates = ''
     self.aspects = ''
     cuda_device = my_device
     self.spans = [
     ]  # we can't use set because span object is dict and dict is unchashable. We add function add_span to keep non-repeatability
     try:
         print(self.model_path + self.model_name)
         print(model_path + "vocab_dir")
         vocab = Vocabulary.from_files(model_path + "vocab_dir")
         BERT_MODEL = 'google/electra-base-discriminator'
         embedder = PretrainedTransformerMismatchedEmbedder(
             model_name=BERT_MODEL)
         text_field_embedder = BasicTextFieldEmbedder({'tokens': embedder})
         seq2seq_encoder = PassThroughEncoder(
             input_dim=embedder.get_output_dim())
         print("encoder loaded")
         self.indexer = PretrainedTransformerMismatchedIndexer(
             model_name=BERT_MODEL)
         print("indexer loaded")
         self.model = SimpleTagger(
             text_field_embedder=text_field_embedder,
             vocab=vocab,
             encoder=seq2seq_encoder,
             calculate_span_f1=True,
             label_encoding='IOB1').cuda(device=cuda_device)
         self.model.load_state_dict(
             torch.load(self.model_path + self.model_name))
         print("model loaded")
         self.reader = Conll2003DatasetReader(
             token_indexers={'tokens': self.indexer})
         print("reader loaded")
     except:
         e = sys.exc_info()[0]
         print("exeption while mapping to gpu in extractor ", e)
         raise RuntimeError(
             "Init extractor: can't map to gpu. Maybe it is OOM")
     try:
         self.predictor = SentenceTaggerPredictor(self.model, self.reader)
     except:
         e = sys.exc_info()[0]
         print("exeption in creating predictor ", e)
         raise RuntimeError(
             "Init extractor: can't map to gpu. Maybe it is WTF")
Beispiel #2
0
        def model_ctor():
            # model = BertForTokenClassificationCustom.from_pretrained(self._bert_model_type,
            #                                                          cache_dir=self._cache_dir,
            #                                                          num_labels=len(self._tag2idx)).cuda()
            #
            # seq_tagger = SequenceTaggerBert(model, self._bert_tokenizer, idx2tag=self._idx2tag,
            #                                 tag2idx=self._tag2idx, pred_batch_size=self._ebs)

            embedder = PretrainedTransformerMismatchedEmbedder(
                model_name=self._bert_model_type)
            text_field_embedder = BasicTextFieldEmbedder({'tokens': embedder})

            seq2seq_encoder = PassThroughEncoder(
                input_dim=embedder.get_output_dim())

            tagger = SimpleTagger(text_field_embedder=text_field_embedder,
                                  vocab=self.vocab,
                                  encoder=seq2seq_encoder,
                                  calculate_span_f1=True,
                                  label_encoding='IOB1').cuda()

            return tagger