def setUp(self): super().setUp() vocab_path = self.FIXTURES_ROOT / "bert" / "vocab.txt" self.token_indexer = PretrainedBertIndexer(str(vocab_path)) config_path = self.FIXTURES_ROOT / "bert" / "config.json" config = BertConfig.from_json_file(str(config_path)) self.bert_model = BertModel(config) self.token_embedder = BertEmbedder(self.bert_model)
def __init__(self, args): super().__init__() self.args = args self.bert_config = BertConfig.from_pretrained( self.args.bert_config_dir, output_hidden_states=False) self.bert = BertModel(self.bert_config) self.linear = nn.Linear(self.bert_config.hidden_size * 1001, 919) self.threshold = nn.Threshold(0, 1e-6) self.linear2 = nn.Linear(919, 919) self.sigmoid = nn.Sigmoid()
def __init__(self, hparams): super().__init__(hparams) # super light BERT model config = BertConfig(hidden_size=12, num_hidden_layers=1, num_attention_heads=1, intermediate_size=12) self.model = BertForSequenceClassification(config) self.tokenizer = BertTokenizer.from_pretrained( "bert-base-cased", config=config, cache_dir=hparams.cache_dir)
def _build_word_embedding(self): self.bert_config = BertConfig.from_pretrained(self.config.bert_model_name) if self.config.pretrained_bert: bert_model = BertForPreTraining.from_pretrained(self.config.bert_model_name) self.word_embedding = bert_model.bert.embeddings self.pooler = bert_model.bert.pooler self.pooler.apply(self.init_weights) else: self.pooler = BertPooler(self.bert_config) self.word_embedding = BertEmbeddings(self.bert_config)
def init_encoder( cls, cfg_name: str, projection_dim: int = 0, dropout: float = 0.1, pretrained: bool = True, **kwargs ) -> BertModel: cfg = BertConfig.from_pretrained(cfg_name if cfg_name else "bert-base-uncased") if dropout != 0: cfg.attention_probs_dropout_prob = dropout cfg.hidden_dropout_prob = dropout if pretrained: return cls.from_pretrained(cfg_name, config=cfg, project_dim=projection_dim, **kwargs) else: return HFBertEncoder(cfg, project_dim=projection_dim)
def __init__(self, config): super().__init__() self.config = config self.output_attentions = self.config.output_attentions self.output_hidden_states = self.config.output_hidden_states # If bert_model_name is not specified, you will need to specify # all of the required parameters for BERTConfig and a pretrained # model won't be loaded self.bert_model_name = getattr(self.config, "bert_model_name", None) self.bert_config = BertConfig.from_dict( OmegaConf.to_container(self.config, resolve=True)) if self.bert_model_name is None: self.bert = VisualBERTBase( self.bert_config, visual_embedding_dim=self.config.visual_embedding_dim, embedding_strategy=self.config.embedding_strategy, bypass_transformer=self.config.bypass_transformer, output_attentions=self.config.output_attentions, output_hidden_states=self.config.output_hidden_states, ) else: self.bert = VisualBERTBase.from_pretrained( self.config.bert_model_name, config=self.bert_config, cache_dir=os.path.join(get_mmf_cache_dir(), "distributed_{}".format(-1)), visual_embedding_dim=self.config.visual_embedding_dim, embedding_strategy=self.config.embedding_strategy, bypass_transformer=self.config.bypass_transformer, output_attentions=self.config.output_attentions, output_hidden_states=self.config.output_hidden_states, ) self.vocab_size = self.bert.config.vocab_size # TODO: Once omegaconf fixes int keys issue, bring this back # See https://github.com/omry/omegaconf/issues/149 # with omegaconf.open_dict(self.config): # # Add bert config such as hidden_state to our main config # self.config.update(self.bert.config.to_dict()) if self.bert_model_name is None: bert_masked_lm = BertForPreTraining(self.bert.config) else: bert_masked_lm = BertForPreTraining.from_pretrained( self.config.bert_model_name, config=self.bert.config, cache_dir=os.path.join(get_mmf_cache_dir(), "distributed_{}".format(-1)), ) self.cls = deepcopy(bert_masked_lm.cls) self.loss_fct = nn.CrossEntropyLoss(ignore_index=-1) self.init_weights()
def __init__(self, pretrained_model, vocab_size): super().__init__() self.vocab_size = vocab_size config = BertConfig(vocab_size=vocab_size) self.bert = BertModel.from_pretrained(pretrained_model, return_dict=True) self.decoder = BertLMPredictionHead(config) self.decoder.decoder.weight.data = self.bert.embeddings.word_embeddings.weight.data for param in self.bert.parameters(): param.requires_grad = True for param in self.decoder.parameters(): param.requires_grad = True
def __init__(self, config, args): super().__init__(config) self.args = args if args.bert_model == "albert-base-v2": bert = AlbertModel.from_pretrained(args.bert_model) elif args.bert_model == "emilyalsentzer/Bio_ClinicalBERT": bert = AutoModel.from_pretrained(args.bert_model) elif args.bert_model == "bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12": bert = AutoModel.from_pretrained(args.bert_model) elif args.bert_model == "bert-small-scratch": config = BertConfig.from_pretrained( "google/bert_uncased_L-4_H-512_A-8") bert = BertModel(config) elif args.bert_model == "bert-base-scratch": config = BertConfig.from_pretrained("bert-base-uncased") bert = BertModel(config) else: bert = BertModel.from_pretrained( args.bert_model) # bert-base-uncased, small, tiny self.txt_embeddings = bert.embeddings self.img_embeddings = ImageBertEmbeddings(args, self.txt_embeddings) if args.img_encoder == 'ViT': img_size = args.img_size patch_sz = 32 if img_size == 512 else 16 self.img_encoder = Img_patch_embedding(image_size=img_size, patch_size=patch_sz, dim=2048) else: self.img_encoder = ImageEncoder_cnn(args) for p in self.img_encoder.parameters(): p.requires_grad = False for c in list(self.img_encoder.children())[5:]: for p in c.parameters(): p.requires_grad = True self.encoder = bert.encoder self.pooler = bert.pooler
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path): # Initialise PyTorch model config = BertConfig.from_json_file(bert_config_file) print("Building PyTorch model from configuration: {}".format(str(config))) model = BertForPreTraining(config) # Load weights from tf checkpoint load_tf_weights_in_bert(model, tf_checkpoint_path) # Save pytorch-model print("Save PyTorch model to {}".format(pytorch_dump_path)) torch.save(model.state_dict(), pytorch_dump_path)
def __init__(self, config): super(BertModel, self).__init__(config) self.task_specific_tokens = config.task_specific_tokens t_config = BertConfig.from_dict(config.t_config) v_config = BertConfig.from_dict(config.v_config) # initilize word embedding if config.model == 'bert': self.embeddings = BertEmbeddings(t_config) elif config.model == 'roberta': self.embeddings = RobertaEmbeddings(t_config) # initlize the vision embedding self.v_embeddings = BertImageEmbeddings(v_config) self.encoder = BertEncoder(config) self.t_pooler = BertTextPooler(config) self.v_pooler = BertImagePooler(config) self.init_weights()
def main(args): dataset_config = Config(args.dataset_config) model_config = Config(args.model_config) ptr_config_info = Config(f"conf/pretrained/{model_config.type}.json") exp_dir = Path("experiments") / model_config.type exp_dir = exp_dir.joinpath( f"epochs_{args.epochs}_batch_size_{args.batch_size}_learning_rate_{args.learning_rate}" f"_weight_decay_{args.weight_decay}" ) preprocessor = get_preprocessor(ptr_config_info, model_config) with open(ptr_config_info.config, mode="r") as io: ptr_config = json.load(io) # model (restore) checkpoint_manager = CheckpointManager(exp_dir) checkpoint = checkpoint_manager.load_checkpoint('best.tar') config = BertConfig() config.update(ptr_config) model = PairwiseClassifier(config, num_classes=model_config.num_classes, vocab=preprocessor.vocab) model.load_state_dict(checkpoint['model_state_dict']) # evaluation filepath = getattr(dataset_config, args.data) ds = Corpus(filepath, preprocessor.preprocess) dl = DataLoader(ds, batch_size=args.batch_size, num_workers=4) device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) summary_manager = SummaryManager(exp_dir) summary = evaluate(model, dl, {'loss': nn.CrossEntropyLoss(), 'acc': acc}, device) summary_manager.load('summary.json') summary_manager.update({'{}'.format(args.data): summary}) summary_manager.save('summary.json') print('loss: {:.3f}, acc: {:.2%}'.format(summary['loss'], summary['acc']))
def load_model(self): self.tokenizer = BertTokenizer.from_pretrained(self.args.pretrained_path,do_lower_case=self.args.do_lower_case) self.config = BertConfig.from_pretrained(self.args.pretrained_path,num_labels=self.args.num_labels) if self.args.resume_model: self.model = BertForMultiLable.from_pretrained(self.args.resume_model_path,config=self.config) with open(self.threshold_path, 'r') as f: self.threshold = float(f.read()) # read the best model's threshold else: self.model = BertForMultiLable.from_pretrained(self.args.pretrained_path,config=self.config) if self.args.cuda: self.model.cuda() if self.args.n_gpus>1: self.model = DataParallel(self.model)
def __init__(self, config, bert_model_embedding_weights): super(BertPreTrainingHeads, self).__init__() t_config = BertConfig.from_dict(config.t_config) self.causal_predictor_t2v = BertLMPredictionHead( t_config, bert_model_embedding_weights, 768) self.causal_predictor_t = BertLMPredictionHead( t_config, bert_model_embedding_weights, 768) self.predictions = BertLMPredictionHead(t_config, bert_model_embedding_weights, 768) self.bi_seq_relationship = nn.Linear(config.bi_hidden_size, 2) v_config = BertConfig.from_dict(config.v_config) self.causal_predictor_v2t = BertImagePredictionHead(v_config, 1024) self.causal_predictor_v = BertImagePredictionHead( v_config, 2048) # causal loss,必须放在前面,它修改了config.v_hidden_size self.imagePredictions = BertImagePredictionHead( v_config, 1024) # 类比之前的mask_loss_v self.fusion_method = config.fusion_method self.dropout = nn.Dropout(0.1) self.criterion_v = nn.KLDivLoss(reduction='none') self.criterion_t = CrossEntropyLoss(ignore_index=-1)
def __init__(self, model_path, vocab: Vocabulary): super().__init__(vocab) self.pretrained_tokenizer = BertForPreTraining.from_pretrained( model_path) config = BertConfig.from_pretrained(model_path) bert_model = BertForPreTraining(config) self.bert = bert_model.bert tags = vocab.get_index_to_token_vocabulary("tags") num_tags = len(tags) constraints = allowed_transitions(constraint_type="BMES", labels=tags) self.projection = torch.nn.Linear(768, num_tags) self.crf = ConditionalRandomField(num_tags=num_tags, constraints=constraints, include_start_end_transitions=False)
def __init__(self, config, args): super().__init__(config) self.args = args if args.bert_model == "emilyalsentzer/Bio_ClinicalBERT": bert = AutoModel.from_pretrained(args.bert_model) elif args.bert_model == "bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12": bert = AutoModel.from_pretrained(args.bert_model) elif args.bert_model == "bert-small-scratch": config = BertConfig.from_pretrained( "google/bert_uncased_L-4_H-512_A-8") bert = BertModel(config) elif args.bert_model == "bert-base-scratch": config = BertConfig.from_pretrained("bert-base-uncased") bert = BertModel(config) else: bert = BertModel.from_pretrained( args.bert_model) # bert-base-uncased, small, tiny self.txt_embeddings = bert.embeddings self.encoder = bert.encoder self.pooler = bert.pooler
def __init__(self, config): super().__init__() self.config = config self.output_attentions = self.config.output_attentions self.output_hidden_states = self.config.output_hidden_states self.pooler_strategy = self.config.get("pooler_strategy", "default") # If bert_model_name is not specified, you will need to specify # all of the required parameters for BERTConfig and a pretrained # model won't be loaded self.bert_model_name = getattr(self.config, "bert_model_name", None) self.bert_config = BertConfig.from_dict( OmegaConf.to_container(self.config, resolve=True) ) if self.bert_model_name is None: self.bert = VisualBERTBase( self.bert_config, visual_embedding_dim=self.config.visual_embedding_dim, embedding_strategy=self.config.embedding_strategy, bypass_transformer=self.config.bypass_transformer, output_attentions=self.config.output_attentions, output_hidden_states=self.config.output_hidden_states, ) else: self.bert = VisualBERTBase.from_pretrained( self.config.bert_model_name, config=self.bert_config, cache_dir=os.path.join( get_mmf_cache_dir(), "distributed_{}".format(-1) ), visual_embedding_dim=self.config.visual_embedding_dim, embedding_strategy=self.config.embedding_strategy, bypass_transformer=self.config.bypass_transformer, output_attentions=self.config.output_attentions, output_hidden_states=self.config.output_hidden_states, ) self.training_head_type = self.config.training_head_type self.num_labels = self.config.num_labels self.dropout = Dropout(self.bert.config.hidden_dropout_prob) if self.config.training_head_type == "nlvr2": self.bert.config.hidden_size *= 2 self.classifier = Sequential( BertPredictionHeadTransform(self.bert.config), Linear(self.bert.config.hidden_size, self.config.num_labels), ) self.vqa_pooler = IndexSelect() self.init_weights()
def test_sliding_window(self): tokenizer = BertPreTokenizer() sentence = "the quickest quick brown fox jumped over the lazy dog" tokens = tokenizer.tokenize(sentence) vocab = Vocabulary() vocab_path = self.FIXTURES_ROOT / "bert" / "vocab.txt" token_indexer = PretrainedBertIndexer(str(vocab_path), truncate_long_sequences=False, max_pieces=8) config_path = self.FIXTURES_ROOT / "bert" / "config.json" config = BertConfig.from_json_file(str(config_path)) bert_model = BertModel(config) token_embedder = BertEmbedder(bert_model, max_pieces=8) instance = Instance( {"tokens": TextField(tokens, {"bert": token_indexer})}) batch = Batch([instance]) batch.index_instances(vocab) padding_lengths = batch.get_padding_lengths() tensor_dict = batch.as_tensor_dict(padding_lengths) tokens = tensor_dict["tokens"]["bert"] # 16 = [CLS], 17 = [SEP] # 1 full window + 1 half window with start/end tokens assert tokens["input_ids"].tolist() == [[ 16, 2, 3, 4, 3, 5, 6, 17, 16, 3, 5, 6, 8, 9, 2, 17, 16, 8, 9, 2, 14, 12, 17 ]] assert tokens["offsets"].tolist() == [[1, 3, 4, 5, 6, 7, 8, 9, 10, 11]] bert_vectors = token_embedder(tokens["input_ids"]) assert list(bert_vectors.shape) == [1, 13, 12] # Testing without token_type_ids bert_vectors = token_embedder(tokens["input_ids"], offsets=tokens["offsets"]) assert list(bert_vectors.shape) == [1, 10, 12] # Testing with token_type_ids bert_vectors = token_embedder(tokens["input_ids"], offsets=tokens["offsets"], token_type_ids=tokens["token_type_ids"]) assert list(bert_vectors.shape) == [1, 10, 12]
def __init__(self, job_config, use_pretrain, tokenizer, cache_dir, device, write_log, summary_writer): self.job_config = job_config if not use_pretrain: model_config = self.job_config.get_model_config() bert_config = BertConfig(**model_config) bert_config.vocab_size = len(tokenizer.vocab) self.bert_encoder = BertModel(bert_config) # Use pretrained bert weights else: self.bert_encoder = BertModel.from_pretrained(self.job_config.get_model_file_type()) bert_config = self.bert_encoder.config self.bert_encoder.to(device) self.network=MTLRouting(self.bert_encoder, write_log = write_log, summary_writer = summary_writer) #config_data=self.config['data'] loss_calculation = BertPretrainingLoss(self.bert_encoder, bert_config) loss_calculation.to(device) # Pretrain Dataset self.network.register_batch(BatchType.PRETRAIN_BATCH, "pretrain_dataset", loss_calculation=loss_calculation) self.device=device
def load_model(self): self.tokenizer = MyBertTokenizer.from_pretrained( self.args.pretrained_path, do_lower_case=self.args.do_lower_case) self.config = BertConfig.from_pretrained( self.args.pretrained_path, num_labels=self.args.num_labels) if self.args.resume_model: self.model = BertCrfForNer.from_pretrained( self.args.resume_model_path, config=self.config) else: self.model = BertCrfForNer.from_pretrained( self.args.pretrained_path, config=self.config) if self.args.cuda: self.model.cuda() if self.args.n_gpus > 1: self.model = DataParallel(self.model)
def test_model_from_pretrained(self): logging.basicConfig(level=logging.INFO) for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: config = BertConfig.from_pretrained(model_name) self.assertIsNotNone(config) self.assertIsInstance(config, PretrainedConfig) model = BertModel.from_pretrained(model_name) model, loading_info = BertModel.from_pretrained( model_name, output_loading_info=True) self.assertIsNotNone(model) self.assertIsInstance(model, PreTrainedModel) for value in loading_info.values(): self.assertEqual(len(value), 0) config = BertConfig.from_pretrained(model_name, output_attentions=True, output_hidden_states=True) model = BertModel.from_pretrained(model_name, output_attentions=True, output_hidden_states=True) self.assertEqual(model.config.output_attentions, True) self.assertEqual(model.config.output_hidden_states, True) self.assertEqual(model.config, config)
def __init__(self, **kwargs): super().__init__() self.config = kwargs self.output_attentions = self.config['output_attentions'] self.output_hidden_states = self.config['output_hidden_states'] self.pooler_strategy = self.config.get('pooler_strategy', 'default') # If bert_model_name is not specified, you will need to specify # all of the required parameters for BERTConfig and a pretrained # model won't be loaded self.bert_model_name = self.config['bert_model_name'] self.bert_config = BertConfig.from_dict(self.config) if self.bert_model_name is None: self.bert = VisualBERTBase( self.bert_config, visual_embedding_dim=self.config['visual_embedding_dim'], embedding_strategy=self.config['embedding_strategy'], bypass_transformer=self.config['bypass_transformer'], output_attentions=self.config['output_attentions'], output_hidden_states=self.config['output_hidden_states'], ) else: from imix.utils.config import ToExpanduser cache_dir = os.path.join('~/.cache/torch', 'transformers') cache_dir = ToExpanduser.modify_path(cache_dir) self.bert = VisualBERTBase.from_pretrained( self.config['bert_model_name'], config=self.bert_config, cache_dir=cache_dir, visual_embedding_dim=self.config['visual_embedding_dim'], embedding_strategy=self.config['embedding_strategy'], bypass_transformer=self.config['bypass_transformer'], output_attentions=self.config['output_attentions'], output_hidden_states=self.config['output_hidden_states'], ) self.training_head_type = self.config['training_head_type'] self.num_labels = self.config['num_labels'] self.dropout = nn.Dropout(self.bert.config.hidden_dropout_prob) if self.config['training_head_type'] == 'nlvr2': self.bert.config.hidden_size *= 2 self.classifier = nn.Sequential( BertPredictionHeadTransform(self.bert.config), nn.Linear(self.bert.config.hidden_size, self.config['num_labels']), ) self.init_weights()
def init_data(self, use_cuda: bool) -> None: self.test_device = torch.device('cuda:0') if use_cuda else \ torch.device('cpu:0') if not use_cuda: torch.set_num_threads(4) torch.set_grad_enabled(False) self.cfg = BertConfig() self.torch_pooler = BertPooler(self.cfg) if torch.cuda.is_available(): self.torch_pooler.to(self.test_device) self.torch_pooler.eval() self.turbo_pooler = turbo_transformers.BertPooler.from_torch( self.torch_pooler)
def init_data(self, use_cuda) -> None: torch.set_grad_enabled(False) torch.set_num_threads(4) turbo_transformers.set_num_threads(4) self.test_device = torch.device('cuda:0') if use_cuda else \ torch.device('cpu:0') self.cfg = BertConfig() self.torch_model = BertModel(self.cfg) self.torch_model.eval() if torch.cuda.is_available(): self.torch_model.to(self.test_device) self.turbo_model = turbo_transformers.BertModel.from_torch( self.torch_model, self.test_device, "turbo")
def __init__( self, image_task_dict=None, dropout=1e-1 ): super(BertResnetEnsembleForMultiTaskClassification, self).__init__() # Define text architecture config = BertConfig() self.bert = BertModel(config) self.dropout = torch.nn.Dropout(dropout) self.image_task_dict = image_task_dict self.text_task_dict = self.create_text_dict(image_task_dict) # Define image architecture image_resnets = {} image_dense_layers = {} ensemble_layers = {} for key in self.image_task_dict.keys(): resnet = torch_models.resnet50(pretrained=False) resnet.fc = _Identity() image_resnets[key] = resnet image_dense_layers[key] = nn.Sequential( _dense_block(2048*2, 1024, 2e-3), _dense_block(1024, 512, 2e-3), _dense_block(512, 256, 2e-3) ) # Define final ensemble before classifier layers # The input is size 768 from BERT and 256 from ResNet50 models # so the total size is 1024 ensemble_layers[key] = nn.Sequential( _dense_block(1024, 512, 2e-3), _dense_block(512, 512, 2e-3), _dense_block(512, 256, 2e-3), ) self.image_resnets = nn.ModuleDict(image_resnets) self.image_dense_layers = nn.ModuleDict(image_dense_layers) self.ensemble_layers = nn.ModuleDict(ensemble_layers) pretrained_layers = {} for key, task_size in self.text_task_dict.items(): pretrained_layers[key] = nn.Linear(256, task_size) self.classifiers = nn.ModuleDict(pretrained_layers)
def __init__(self, config, args): super().__init__(config) if args.weight_load: config = AutoConfig.from_pretrained(args.load_pretrained_model) model_state_dict = torch.load( os.path.join(args.load_pretrained_model, 'pytorch_model.bin')) cxrbert = CXRBERT.from_pretrained(args.load_pretrained_model, state_dict=model_state_dict, config=config, args=args) else: config = BertConfig.from_pretrained('bert-base-uncased') cxrbert = CXRBERT(config, args) self.enc = cxrbert.enc self.itm = cxrbert.itm
def __init__(self, embedding_matrix, opt): super(LCA_GLOVE, self).__init__() # Only few of the parameters are necessary in the config.json, such as hidden_size, num_attention_heads self.config = BertConfig.from_json_file("utils/bert_config.json") self.opt = opt self.embed = nn.Embedding.from_pretrained( torch.tensor(embedding_matrix, dtype=torch.float)) self.lc_embed = nn.Embedding(2, opt.embed_dim) self.global_encoder1 = SelfAttention(self.config, opt) self.local_encoder1 = SelfAttention(self.config, opt) self.local_encoder2 = SelfAttention(self.config, opt) self.mha = SelfAttention(self.config, opt) self.pool = BertPooler(self.config) self.dropout = nn.Dropout(opt.dropout) self.linear = nn.Linear(opt.embed_dim * 2, opt.embed_dim) self.dense = nn.Linear(opt.embed_dim, opt.polarities_dim) self.classifier = nn.Linear(opt.embed_dim, 2)
def setUp(self): self.monkeypatch = MonkeyPatch() # monkeypatch the PretrainedBertModel to return the tiny test fixture model config_path = self.FIXTURES_ROOT / "bert" / "config.json" vocab_path = self.FIXTURES_ROOT / "bert" / "vocab.txt" config = BertConfig.from_json_file(config_path) self.monkeypatch.setattr(BertModel, "from_pretrained", lambda _: BertModel(config)) self.monkeypatch.setattr( BertTokenizer, "from_pretrained", lambda _: BertTokenizer(vocab_path) ) super().setUp() self.set_up_model( self.FIXTURES_ROOT / "bert_srl" / "experiment.jsonnet", self.FIXTURES_ROOT / "conll_2012", )
def model_builder(model_name_or_path: str, num_labels: int, feat_config_path: str = None, one_hot_embed: bool = True, use_lstm=False, device: torch.device = torch.device("cpu")): feature = None if feat_config_path is not None: feature = Feature(feat_config_path, one_hot_embed) config = BertConfig.from_pretrained(model_name_or_path, num_labels=num_labels) model = NerModel.from_pretrained(model_name_or_path, config=config, feature=feature, use_lstm=use_lstm, device=device) return config, model, feature
def __init__(self, embedding_matrix, opt): super(LCF_GLOVE, self).__init__() self.config = BertConfig.from_json_file("utils/bert_config.json") self.opt = opt self.embed = nn.Embedding.from_pretrained( torch.tensor(embedding_matrix, dtype=torch.float)) self.mha_global = SelfAttention(self.config, opt) self.mha_local = SelfAttention(self.config, opt) self.ffn_global = PositionwiseFeedForward(self.opt.embed_dim, dropout=self.opt.dropout) self.ffn_local = PositionwiseFeedForward(self.opt.embed_dim, dropout=self.opt.dropout) self.mha_local_SA = SelfAttention(self.config, opt) self.mha_global_SA = SelfAttention(self.config, opt) self.pool = BertPooler(self.config) self.dropout = nn.Dropout(opt.dropout) self.linear = nn.Linear(opt.embed_dim * 2, opt.embed_dim) self.dense = nn.Linear(opt.embed_dim, opt.polarities_dim)
def init_bert_models(self, use_cuda: bool) -> None: self.test_device = torch.device('cuda:0') if use_cuda else \ torch.device('cpu:0') if not use_cuda: torch.set_num_threads(1) torch.set_grad_enabled(False) self.cfg = BertConfig(attention_probs_dropout_prob=0.0, hidden_dropout_prob=0.0) self.torch_model = BertModel(self.cfg) self.torch_model.eval() if use_cuda: self.torch_model.to(self.test_device) self.hidden_size = self.cfg.hidden_size self.turbo_model = turbo_transformers.BertModelSmartBatch.from_torch( self.torch_model)