def get_embedding(embed_type): if embed_type == 'BioBERT': model_loc = './auxiliary/pretrained_bert_tf/biobert_pretrain_output_all_notes_150000/' tokenizer = BertTokenizer.from_pretrained(model_loc, do_lower_case=True) cache_dir = os.path.join(PYTORCH_PRETRAINED_BERT_CACHE, 'distributed_{}'.format(-1)) model = BertModel.from_pretrained(model_loc, cache_dir=cache_dir) indexer = None elif embed_type == 'BERT': model_loc = './auxiliary/pretrained_bert_tf/bert_pretrain_output_all_notes_150000/' tokenizer = BertTokenizer.from_pretrained(model_loc, do_lower_case=True) cache_dir = os.path.join(PYTORCH_PRETRAINED_BERT_CACHE, 'distributed_{}'.format(-1)) model = BertModel.from_pretrained(model_loc, cache_dir=cache_dir) indexer = None elif embed_type == 'CharBERT': model_loc = './auxiliary/pretrained_character_bert/general_character_bert/' model = CharacterBertModel.from_pretrained(model_loc) tokenizer = BertTokenizer.from_pretrained( './auxiliary/pretrained_bert_tf/bert_pretrain_output_all_notes_150000/' ) indexer = CharacterIndexer() elif embed_type == 'BioCharBERT': model_loc = './auxiliary/pretrained_character_bert/medical_character_bert/' model = CharacterBertModel.from_pretrained(model_loc) tokenizer = BertTokenizer.from_pretrained( './auxiliary/pretrained_bert_tf/biobert_pretrain_output_all_notes_150000/' ) indexer = CharacterIndexer() return indexer, tokenizer, model
def __init__(self, encoder_size=64, dim_num_feat=0, dropout=0.2, seq_dropout=0.1, num_outputs=5): super(EntityLink_bert, self).__init__() # self.word_embedding = nn.Embedding(vocab_size, # word_embed_size, # padding_idx=0) # self.pos_embedding = nn.Embedding(pos_embed_size, pos_dim, padding_idx=0) self.seq_dropout = seq_dropout self.dropout1d = nn.Dropout2d(self.seq_dropout) self.span_extractor = EndpointSpanExtractor(encoder_size * 2, combination="x,x+y,y") # selfspanextractor效果很差 bert_model = 'bert-base-chinese' self.bert = BertModel.from_pretrained(bert_model) self.use_layer = -1 self.LSTM = LSTMEncoder(embed_size=768, encoder_size=encoder_size, bidirectional=True) hidden_size = 100 self.hidden = nn.Linear(2 * encoder_size, num_outputs) self.classify = nn.Sequential( nn.BatchNorm1d(4 * 768), nn.Dropout(p=dropout), nn.Linear(in_features=4 * 768, out_features=num_outputs)) self.attn_pool = Attention(2 * encoder_size)
def __init__(self, param): super().__init__() self.args = args = param.args self.param = param self.bert_exclude = BertModel.from_pretrained(args.bert_model) self.drop = nn.Dropout(args.droprate)
def __init__(self, pretrained_model: str, requires_grad: bool = False, top_layer_only: bool = False) -> None: model = BertModel.from_pretrained(pretrained_model) for param in model.parameters(): param.requires_grad = requires_grad super().__init__(bert_model=model, top_layer_only=top_layer_only)
def __init__(self, bert_path): super().__init__() # 加载并冻结bert模型参数 self.bert = BertModel.from_pretrained(bert_path) for param in self.bert.parameters(): param.requires_grad = True self.output = nn.Sequential(nn.Dropout(0.2), nn.Linear(768, 3))
def _init_embeddings(self): ''' Initialise embeddings ''' if self.elmo: self.embedding_dim += 1024 * 3 options_file = "/data/models/pytorch/elmo/options/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json" weight_file = "/data/models/pytorch/elmo/weights/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5" self.elmo_embedder = ElmoEmbedder(options_file, weight_file, cuda_device=0) if self.glove: self.embedding_dim += 300 self.vocab = glove_embeddings.index.tolist() self.num_embeddings = glove_embeddings.shape[0] self.glove_embedder = torch.nn.Embedding(self.num_embeddings, self.embedding_dim, max_norm=None, norm_type=2, scale_grad_by_freq=False, sparse=False) self.glove_embedder.weight.data.copy_( torch.from_numpy(glove_embeddings.values)) self.glove_embedder.weight.requires_grad = False self.vocab_hash = {w: i for i, w in enumerate(self.vocab)} if self.type: self.embedding_dim += type_dim if self.token: self.embedding_dim += token_dim if self.bert: self.bert_tokenizer = CustomBertTokenizer.from_pretrained( "bert-large-cased", do_lower_case=False) self.bert_embedder = BertModel.from_pretrained( "bert-large-cased").to(self.device)
def __init__(self, bert_model: str, ) -> None: super().__init__() self.bert = BertModel.from_pretrained(bert_model) self.dropout = nn.Dropout(self.bert.config.hidden_dropout_prob) self.classifier = nn.Linear(self.bert.config.hidden_size, 2)
def __init__( self, bert_model=None, tokenizer=None, language=Language.ENGLISH, num_gpus=None, cache_dir=".", to_lower=True, max_len=512, layer_index=-1, pooling_strategy=PoolingStrategy.MEAN, ): """Initialize the encoder's underlying model and tokenizer Args: bert_model: BERT model to use for encoding. Defaults to pretrained BertModel. tokenizer: Tokenizer to use for preprocessing. Defaults to pretrained BERT tokenizer. language: The pretrained model's language. Defaults to Language.ENGLISH. num_gpus: The number of gpus to use. Defaults to None, which forces all available GPUs to be used. cache_dir: Location of BERT's cache directory. Defaults to "." to_lower: True to lowercase before tokenization. Defaults to False. max_len: Maximum number of tokens. layer_index: The layer from which to extract features. Defaults to the last layer; can also be a list of integers for experimentation. pooling_strategy: Pooling strategy to aggregate token embeddings into sentence embedding. """ self.model = (bert_model.model.bert if bert_model else BertModel.from_pretrained(language, cache_dir=cache_dir)) self.tokenizer = (tokenizer if tokenizer else Tokenizer( language, to_lower=to_lower, cache_dir=cache_dir)) self.num_gpus = num_gpus self.max_len = max_len self.layer_index = layer_index self.pooling_strategy = pooling_strategy self.has_cuda = self.cuda
def dump_row(data, dump_path="../data/data_h5py/"): model_type = "bert-base-uncased" model = BertModel.from_pretrained(model_type).cuda() tokenizer = BertTokenizer.from_pretrained(model_type) for index in tqdm(data.index): _id = data['QID'].iloc[index] d = data.iloc[index].to_dict() query = d['Query'] passages = d['Passages'] label = d['RelevantPassage'] query_tensor = process_sentence(str(query), tokenizer, model).cpu().detach().numpy() passage_tensor = [] for passage in passages: passage_tensor.append( process_sentence(str(passage), tokenizer, model)) passage_tensor = torch.cat(passage_tensor, 0).cpu().detach().numpy() label = torch.LongTensor([label]).numpy() data_dict = dict(query=query_tensor, passages=passage_tensor, label=label) with h5py.File(f'{dump_path}/{_id}.hdf5', 'w') as h: for k, v in data_dict.items(): h.create_dataset(k, data=v) del data_dict del passage_tensor del query_tensor gc.collect()
def __init__(self, model, requires_grad=True): super(BertEmbedding, self).__init__() self.bert = BertModel.from_pretrained(model) #self.bert = self.bert.requires_grad_(requires_grad) self.requires_grad = requires_grad self.hidden_size = self.bert.config.hidden_size
def __init__(self, config): super(PairModel, self).__init__() self.bert = BertModel.from_pretrained(config.bert_path) for param in self.bert.parameters(): param.requires_grad = True self.out = nn.Linear(config.hidden_size, config.num_labels) self.config = config
def __init__(self, bert_model: str, device: torch.device, use_layer: list, linear_hidden_size=64, dist_embed_dim=4, token_dist_ratio=4, bert_cache=None): super().__init__() self.device = device self.use_layer = use_layer self.bert_cache = bert_cache if bert_model in ("bert-base-uncased", "bert-base-cased"): self.bert_hidden_size = 768 elif bert_model in ("bert-large-uncased", "bert-large-cased"): self.bert_hidden_size = 1024 else: raise ValueError("Unsupported BERT model.") # self.bert = BertModel.from_pretrained(bert_model).to(device) self.bert = BertModel.from_pretrained( "/home/gy/.pytorch_pretrained_bert/214d4777e8e3eb234563136cd3a49f6bc34131de836848454373fa43f10adc5e.abfbb80ee795a608acbf35c7bf2d2d58574df3887cdd94b355fc67e03fddba05" ).to(device) # self.bert = BertModel.from_pretrained(bert_model).to(device) self.head = Head(self.bert_hidden_size, linear_hidden_size=linear_hidden_size, dist_embed_dim=dist_embed_dim, token_dist_ratio=token_dist_ratio, use_layers=use_layer).to(device)
def __init__( self, pretrained_model: str, requires_grad: Union[List[int], str] = [], top_layer_only: bool = False ) -> None: model = BertModel.from_pretrained(pretrained_model) if isinstance(requires_grad, str): if requires_grad == "None": for param in model.parameters(): param.requires_grad = False elif requires_grad == "all": for param in model.parameters(): param.requires_grad = True else: raise NotImplementedError("Work in progress") elif isinstance(requires_grad, list): if len(requires_grad) == 0: # no finetuning required for param in model.parameters(): param.requires_grad = False else: # Finetune the pooling layer and the # layers mentioned in the list grad_str = "|".join([str(x) for x in requires_grad]) match_str = r"encoder\.layer\.({0}).*".format(grad_str) for name, param in model.named_parameters(): if re.match(match_str, name) is not None or "pooler" in name: param.requires_grad = True logger.info(f"Layer {name} is finetuned") else: param.requires_grad = False super().__init__(bert_model=model, top_layer_only=top_layer_only)
def __init__(self, n_classes, hidden_size=768): super(VanillaBert, self).__init__() self.n_classes = n_classes self.hidden_size = hidden_size self.bert = BertModel.from_pretrained("bert-base-uncased") self.linear = torch.nn.Linear(self.hidden_size, self.n_classes) self.softmax = torch.nn.LogSoftmax(dim=1)
def compute_represenation(sents, bert_model, logger, device="cuda", reprer=None): if reprer is None: model = BertModel.from_pretrained(bert_model).to(device) else: model = reprer.model model.eval() batch_size = 100 for i in range(0, len(sents), batch_size): items = sents[i:min(len(sents), i + batch_size)] with torch.no_grad(): input_ids = torch.tensor([item.input_ids for item in items], dtype=torch.long).to(device) segment_ids = torch.tensor([item.segment_ids for item in items], dtype=torch.long).to(device) input_mask = torch.tensor([item.input_mask for item in items], dtype=torch.long).to(device) all_encoder_layers, _ = model( input_ids, segment_ids, input_mask) # batch_size x seq_len x target_size layer_output = all_encoder_layers[-1].detach().cpu().numpy( ) # batch_size x seq_len x target_size for j, item in enumerate(items): item.representation = layer_output[j][0] # item.representation = layer_output if i % (10 * batch_size) == 0: logger.info( ' Compute sentence representation. To {}...'.format(i)) logger.info(' Finish.')
def __init__(self, bert_model, args): super(RawBertCls, self).__init__() self.backbone = BertModel.from_pretrained( 'data/.cache/bert-base-uncased.tar.gz') self.dropout = nn.Dropout(0.1) self.classifier = nn.Linear(768, 2)
def main(raw_args=None): parser = argparse.ArgumentParser() parser.add_argument("--model_name", type=str, required=True, help="model name e.g. bert-base-uncased") parser.add_argument("--cache_dir", type=str, default=None, required=False, help="Directory containing pytorch model") parser.add_argument("--pytorch_model_path", type=str, required=True, help="/path/to/<pytorch-model-name>.bin") parser.add_argument("--tf_cache_dir", type=str, required=True, help="Directory in which to save tensorflow model") args = parser.parse_args(raw_args) model = BertModel.from_pretrained( pretrained_model_name_or_path=args.model_name, state_dict=torch.load(args.pytorch_model_path), cache_dir=args.cache_dir) convert_pytorch_checkpoint_to_tf(model=model, ckpt_dir=args.tf_cache_dir, model_name=args.model_name)
def __init__(self, args): super(MultimodalBertEncoder, self).__init__() self.args = args bert = BertModel.from_pretrained(args.bert_model) self.txt_embeddings = bert.embeddings if args.task in ["vsnli", 'msnews']: ternary_embeds = nn.Embedding(3, args.hidden_sz) ternary_embeds.weight.data[:2].copy_( bert.embeddings.token_type_embeddings.weight) ternary_embeds.weight.data[2].copy_( bert.embeddings.token_type_embeddings.weight.data.mean(dim=0)) self.txt_embeddings.token_type_embeddings = ternary_embeds self.img_embeddings = ImageBertEmbeddings(args, self.txt_embeddings) self.img_encoder = ImageEncoder(args) self.encoder = bert.encoder self.pooler = bert.pooler self.clf = nn.Linear(args.hidden_sz, args.n_classes) # GPU Options if args.multiGPU: self.img_embeddings = nn.DataParallel(self.img_embeddings) self.encoder = nn.DataParallel(self.encoder) self.img_encoder = nn.DataParallel(self.img_encoder)
def __init__(self, opt): super(TagWordModel, self).__init__() self.register_buffer('dummy', torch.Tensor(1, 1).fill_(-float("inf"))) self.bert = BertModel.from_pretrained(opt.bert_model, cache_dir=CACHEDIR) for lay in self.bert.encoder.layer: lay.output.dropout.p = args.drop
def __init__(self, job_config, use_pretrain, tokenizer, cache_dir, device, write_log, summary_writer): self.job_config = job_config if not use_pretrain: model_config = self.job_config.get_model_config() bert_config = BertConfig(**model_config) bert_config.vocab_size = len(tokenizer.vocab) self.bert_encoder = BertModel(bert_config) # Use pretrained bert weights else: self.bert_encoder = BertModel.from_pretrained( self.job_config.get_model_file_type(), cache_dir=cache_dir) bert_config = self.bert_encoder.config self.network = MTLRouting(self.bert_encoder, write_log=write_log, summary_writer=summary_writer) #config_data=self.config['data'] # Pretrain Dataset self.network.register_batch(BatchType.PRETRAIN_BATCH, "pretrain_dataset", loss_calculation=BertPretrainingLoss( self.bert_encoder, bert_config)) self.device = device
def bertModel(*args, **kwargs): """ BertModel is the basic BERT Transformer model with a layer of summed token, position and sequence embeddings followed by a series of identical self-attention blocks (12 for BERT-base, 24 for BERT-large). Example: # Load the tokenizer >>> import torch >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) # Prepare tokenized input >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" >>> tokenized_text = tokenizer.tokenize(text) >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) >>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] >>> tokens_tensor = torch.tensor([indexed_tokens]) >>> segments_tensors = torch.tensor([segments_ids]) # Load bertModel >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertModel', 'bert-base-cased') >>> model.eval() # Predict hidden states features for each layer >>> with torch.no_grad(): encoded_layers, _ = model(tokens_tensor, segments_tensors) """ model = BertModel.from_pretrained(*args, **kwargs) return model
def __init__(self, vocab: Vocabulary, bert_model: Union[str, BertModel], embedding_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, label_smoothing: float = None, ignore_span_metric: bool = False, srl_eval_path: str = DEFAULT_SRL_EVAL_PATH) -> None: super().__init__(vocab, regularizer) if isinstance(bert_model, str): self.bert_model = BertModel.from_pretrained(bert_model) else: self.bert_model = bert_model self.num_classes = self.vocab.get_vocab_size("labels") if srl_eval_path is not None: # For the span based evaluation, we don't want to consider labels # for verb, because the verb index is provided to the model. self.span_metric = SrlEvalScorer(srl_eval_path, ignore_classes=["V"]) else: self.span_metric = None self.tag_projection_layer = Linear(self.bert_model.config.hidden_size, self.num_classes) self.embedding_dropout = Dropout(p=embedding_dropout) self._label_smoothing = label_smoothing self.ignore_span_metric = ignore_span_metric initializer(self)
def __init__(self, args): self.config = args.config if not args.use_pretrain: if args.progressive_layer_drop: print("BertConfigPreLnLayerDrop") from nvidia.modelingpreln_layerdrop import BertForPreTrainingPreLN, BertForMaskedLM, BertConfig else: from nvidia.modelingpreln import BertForPreTrainingPreLN, BertForMaskedLM, BertConfig bert_config = BertConfig(**self.config["bert_model_config"]) bert_config.vocab_size = len(args.tokenizer.vocab) # Padding for divisibility by 8 if bert_config.vocab_size % 8 != 0: bert_config.vocab_size += 8 - (bert_config.vocab_size % 8) print("VOCAB SIZE:", bert_config.vocab_size) self.network = BertForPreTrainingPreLN(bert_config, args) # self.network = BertForMaskedLM(bert_config) # something else should be changes for this to work # Use pretrained bert weights else: self.bert_encoder = BertModel.from_pretrained( self.config['bert_model_file'], cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(args.local_rank)) bert_config = self.bert_encoder.config self.device = None
def __init__(self, pretrained_model: str, requires_grad: bool = False, dropout: float = 0.0, first_layer_only: bool = False, second_to_last_layer_only: bool = False, last_layer_only: bool = False, sum_last_four_layers: bool = False, concat_last_four_layers: bool = False, sum_all_layers: bool = False, scalar_mix: bool = False) -> None: model = BertModel.from_pretrained(pretrained_model) for param in model.parameters(): param.requires_grad = requires_grad super().__init__(bert_model=model, dropout=dropout, first_layer_only=first_layer_only, second_to_last_layer_only=second_to_last_layer_only, last_layer_only=last_layer_only, sum_last_four_layers=sum_last_four_layers, concat_last_four_layers=concat_last_four_layers, sum_all_layers=sum_all_layers, scalar_mix=scalar_mix)
def __init__(self, n_input=768, n_output=128, bert_model='bert-base-uncased'): super(ProtNet, self).__init__() self.bert = BertModel.from_pretrained( '../Fewshot-Learning-with-BERT-master/bert-base-uncased')
def __init__(self, config): super(bc_RNN, self).__init__() self.config = config self.encoder = BertModel.from_pretrained("bert-base-uncased") context_input_size = (config.num_layers * config.encoder_hidden_size) self.context_encoder = layer.ContextRNN(context_input_size, config.context_size, config.rnn, config.num_layers, config.dropout) self.context2decoder = layer.FeedForward(config.context_size, config.num_layers * config.context_size, num_layers=1, activation=config.activation, isActivation=True) self.decoder2output = layer.FeedForward(config.num_layers * config.context_size, config.num_classes, num_layers=1, isActivation=False) self.dropoutLayer = nn.Dropout(p=config.dropout)
def __init__(self, word_vec_mat, max_length=100, word_embedding_dim=768, dpos_embedding_dim=50, dmask_embedding_dim=50): nn.Module.__init__(self) self.max_length = max_length self.word_embedding_dim = word_embedding_dim self.dpos_embedding_dim = dpos_embedding_dim self.dmask_embedding_dim = dmask_embedding_dim #self.bert_token = BertTokenizer.from_pretrained('bert-base-uncased') #self.bert_embedding = BertModel.from_pretrained('bert-base-uncased') self.bert_token = BertTokenizer.from_pretrained('./models/bert-base-uncased-vocab.txt') self.bert_embedding = BertModel.from_pretrained('./models') # Word embedding #unk = torch.randn(1, word_embedding_dim) / math.sqrt(word_embedding_dim) #blk = torch.zeros(1, word_embedding_dim) #word_vec_mat = torch.from_numpy(word_vec_mat) self.word_embedding = nn.Embedding(400002, 50, padding_idx=word_vec_mat.shape[0] + 1) #self.word_embedding.weight.data.copy_(torch.cat((word_vec_mat, unk, blk), 0)) #self.bword_embedding.weight.data.copy_(torch.cat((word_vec_mat, unk, blk), 0)) #Position Embedding self.pos1_embedding = nn.Embedding(80, 5, padding_idx=0) self.pos2_embedding = nn.Embedding(80, 5, padding_idx=0) self.dpos1_embedding = nn.Embedding(2*self.max_length,dpos_embedding_dim,padding_idx=0) self.dpos2_embedding = nn.Embedding(2*self.max_length,dpos_embedding_dim,padding_idx=0) #dmask embedding self.dmask1_embedding = nn.Embedding(2*self.max_length,dmask_embedding_dim,padding_idx=0) self.dmask2_embedding = nn.Embedding(2*self.max_length,dmask_embedding_dim,padding_idx=0)
def __init__(self, bert_model, rnn_size, labels_vocab_size, num_layers=2, dropout=0.5 ): torch.nn.Module.__init__(self) self.bert = BertModel.from_pretrained(bert_model) self.dropout = torch.nn.Dropout(dropout) bert_size = self.bert.config.hidden_size self.rnn = torch.nn.LSTM( bert_size, rnn_size, num_layers, bidirectional=True, batch_first=True, dropout=dropout, ) self.output = torch.nn.Linear(rnn_size*2, labels_vocab_size) # *2 because of bidi
def extractBert(): model = BertModel.from_pretrained(modelPath[args.model]) #print(model);exit(10) embeddings = model.embeddings.word_embeddings print(embeddings.num_embeddings) print(embeddings.weight.size()) weight = embeddings.weight.detach().numpy() tokenizer = BertTokenizer.from_pretrained(modelPath[args.model]) #print(tokenizer.ids_to_tokens) #for i in range(10): # print(weight[i]) with open(programmingalpha.Bert768 + "embeddings.txt", "w") as f: vec_strs = [] for i in range(len(weight)): vec = weight[i] vec_str = list(map(lambda x: str(x), vec)) token = tokenizer.ids_to_tokens[i] vec_str.insert(0, token) vec_str = " ".join(vec_str) vec_strs.append(vec_str + "\n") def turnIndexs(index1, index2): tmp = vec_strs[index1] vec_strs[index1] = vec_strs[index2] vec_strs[index2] = tmp turnIndexs(0, 1) turnIndexs(0, 100) f.writelines(vec_strs)
def __init__(self, encoder, decoder, emb_type, emb_dim, vocab_size, conv_hidden, encoder_hidden, encoder_layer, isTrain=True, n_hop=1, dropout=0.0): super().__init__() self._encoder = encoder self._decoder = decoder self._emb_type = emb_type self._sent_enc = ConvSentEncoder(vocab_size, emb_dim, conv_hidden, dropout, emb_type) # BERT if emb_type == 'BERT': self._bert = BertModel.from_pretrained( '/path/to/uncased_L-24_H-1024_A-16') self._bert.eval() for p in self._bert.parameters(): p.requires_grad = False self._bert_w = nn.Linear(1024 * 4, emb_dim) # Sentence Encoder if encoder == 'BiLSTM': enc_out_dim = encoder_hidden * 2 # bidirectional self._art_enc = LSTMEncoder(3 * conv_hidden, encoder_hidden, encoder_layer, dropout=dropout, bidirectional=True) elif encoder == 'Transformer': enc_out_dim = encoder_hidden self._art_enc = TransformerEncoder(3 * conv_hidden, encoder_hidden, encoder_layer, decoder) self._emb_w = nn.Linear(3 * conv_hidden, encoder_hidden) self.sent_pos_embed = nn.Embedding.from_pretrained( get_sinusoid_encoding_table(1000, enc_out_dim, padding_idx=0), freeze=True) elif encoder == 'DeepLSTM': enc_out_dim = encoder_hidden self._isTrain = isTrain self._art_enc = DeepLSTM(3 * conv_hidden, encoder_hidden, encoder_layer, 0.1) # Decoder decoder_hidden = encoder_hidden decoder_layer = encoder_layer if decoder == 'PN': self._extractor = LSTMPointerNet(enc_out_dim, decoder_hidden, decoder_layer, dropout, n_hop) else: self._ws = nn.Linear(enc_out_dim, 2)
def __init__(self, name, **kwargs): super(BERTBaseEmbeddings, self).__init__(name=name, **kwargs) global BERT_TOKENIZER self.dsz = kwargs.get('dsz') if BERT_TOKENIZER is None: BERT_TOKENIZER = BertTokenizer.from_pretrained(kwargs.get('embed_file')) self.model = BertModel.from_pretrained(kwargs.get('embed_file')) self.vocab = BERT_TOKENIZER.vocab self.vsz = len(BERT_TOKENIZER.vocab) # 30522 self.model.embeddings.word_embeddings.num_embeddings self.layer_indices = kwargs.get('layers', [-1, -2, -3, -4]) self.operator = kwargs.get('operator', 'concat')