def extract_embeddings_for_other_clf(): distil_bert = "distilbert-base-uncased" config = DistilBertConfig(dropout=0.2, attention_dropout=0.2) config.output_hidden_states = False transformer_model = TFDistilBertModel.from_pretrained(distil_bert, config=config) input_ids_in = tf.keras.layers.Input(shape=(25, ), name="input_token", dtype="int32") input_masks_in = tf.keras.layers.Input(shape=(25, ), name="masked_token", dtype="int32") embedding_layer = transformer_model(input_ids_in, attention_mask=input_masks_in)[0] cls_token = embedding_layer[:, 0, :] X = tf.keras.layers.BatchNormalization()(cls_token) X = tf.keras.layers.Dense(192, activation="relu")(X) X = tf.keras.layers.Dropout(0.2)(X) X = tf.keras.layers.Dense(3, activation="softmax")(X) model = tf.keras.Model(inputs=[input_ids_in, input_masks_in], outputs=X) for layer in model.layers[:3]: layer.trainable = False return model
def create_model(model_config: CommentClassifierConfig, saved_weights_path: str = None, max_seq_length: int = MAX_SEQ_LENGTH) -> tf.keras.Model: """ :param model_config: CommentClassifierConfig :param saved_weights_path: If defined, model weights will be loaded from the provided checkpoint path :param max_seq_length: Maximum length of the tokenized input to BERT :return: Model for text classification using DistilBert transformers """ # Load pre-trained DistilBERT bert_config = DistilBertConfig( dropout=model_config.bert_dropout, attention_dropout=model_config.bert_attention_dropout, num_labels=NUM_CLASSES) bert_config.output_hidden_states = False transformer_model = TFDistilBertModel.from_pretrained(MODEL_NAME, config=bert_config) input_ids_in = tf.keras.layers.Input(shape=(max_seq_length, ), name='input_token', dtype='int32') input_masks_in = tf.keras.layers.Input(shape=(max_seq_length, ), name='masked_token', dtype='int32') embedding_layer = transformer_model(input_ids_in, attention_mask=input_masks_in)[0] x = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM( model_config.lstm_units, return_sequences=True, dropout=model_config.lstm_dropout, recurrent_dropout=model_config.lstm_recurrent_dropout))( embedding_layer) x = tf.keras.layers.GlobalMaxPool1D()(x) x = tf.keras.layers.Dense( model_config.hidden_layer_dim, activation=model_config.hidden_layer_activation)(x) x = tf.keras.layers.Dropout(model_config.final_layer_dropout)(x) x = tf.keras.layers.Dense( NUM_CLASSES, activation=model_config.final_layer_activation)(x) model = tf.keras.Model(inputs=[input_ids_in, input_masks_in], outputs=x) # Use transfer learning only - do not train BERT again for layer in model.layers[:3]: layer.trainable = False # Load weights from a checkpoint, but allow partial matching # (e.g. due to a change in the optimizer) if saved_weights_path is not None: model.load_weights(saved_weights_path).expect_partial() return model
def __init__(self, config: Dict): super().__init__() self.config = config self.model_config = DistilBertConfig(**self.config["model"]) self.model = DistilBertModel(self.model_config) self.criterion = nn.CosineEmbeddingLoss(margin=0.0, reduction='mean')
def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_mask = None if self.use_input_mask: input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) sequence_labels = None token_labels = None choice_labels = None if self.use_labels: sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) config = DistilBertConfig( vocab_size=self.vocab_size, dim=self.hidden_size, n_layers=self.num_hidden_layers, n_heads=self.num_attention_heads, hidden_dim=self.intermediate_size, hidden_act=self.hidden_act, dropout=self.hidden_dropout_prob, attention_dropout=self.attention_probs_dropout_prob, max_position_embeddings=self.max_position_embeddings, initializer_range=self.initializer_range, ) return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
def __init__(self, config={}): super(DistilBert, self).__init__() self.masking = tf.keras.layers.Masking() self.fc1 = Dense(config['base_config']['dim'], activation='relu') self.model_config = DistilBertConfig.from_dict(config['base_config']) self.base = TFDistilBertModel(self.model_config) self.head = HEADS[config['head']['name']](config['head'])
def classify(text): print('start') path = settings.MEDIA_ROOT + "\distilbert.bin" MODEL_PATH = 'distilbert-base-uncased' tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH) encode = tokenizer.encode_plus( text, add_special_tokens=True, max_length=192, pad_to_max_length=True, truncation=True, ) device = torch.device('cpu') tokens = encode['input_ids'] tokens = torch.tensor(tokens, dtype=torch.long).unsqueeze(0) tokens = tokens.to(device) config = DistilBertConfig() model = Bert(DistilBertModel(config)) model.load_state_dict(torch.load(path, map_location=device)) model.to(device) output = model(tokens) output = output.cpu().detach().numpy() print(output) output = 0.0 if output < 0.5 else 1.0 return output
def __init__(self, embedding_size, projection_size, n_layers, emo_dict, dropout=0.1): super(context_classifier_model, self).__init__() self.projection_size = projection_size self.projection = torch.nn.Linear(embedding_size, projection_size) self.position_embeds = torch.nn.Embedding(3, projection_size) self.norm = torch.nn.LayerNorm(projection_size) self.drop = torch.nn.Dropout(dropout) context_config = DistilBertConfig(dropout=dropout, dim=projection_size, hidden_dim=4 * projection_size, n_layers=n_layers, n_heads=1, num_labels=4) self.context_transformer = DistilBertForSequenceClassification( context_config) self.others_label = emo_dict['others'] self.bin_loss_fct = torch.nn.BCEWithLogitsLoss()
def test(informal): if torch.cuda.is_available(): device = torch.device('cuda:3') print(f'Using GPU device: {device}') else: device = torch.device('cpu') print(f'GPU is not available, using CPU device {device}') test_config = {'batch_size': 5, 'epoch': 29, 'save_dir': './checkpoints/'} test_dataset = FormalDataset(informal) dataloader = DataLoader(test_dataset, batch_size=test_config['batch_size'], shuffle=False, num_workers=4, drop_last=False) config = DistilBertConfig() model = DistilBertForMaskedLM(config) load_model(test_config['epoch'], model, test_config['save_dir']) model.to(device) model.eval() with torch.no_grad(): for i, batch in tqdm(enumerate(dataloader)): inp = batch['input_ids'].to(device) attn = batch['attention_mask'].to(device) logits = model(input_ids=inp, attention_mask=attn)[0] preds = decode_text(test_dataset.tokenizer, logits) for seq in preds: with open('test_pred.txt', 'a') as res_file: res_file.writelines(seq + '\n')
def __init__(self, max_seq_len=MAX_LEN, batch_size=BATCH_SIZE, n_epochs=N_EPOCHS, val_size=0.1, learning_rate=LEARNING_RATE, load_local_pretrained=False): self.max_seq_len = max_seq_len self.batch_size = batch_size self.n_epochs = n_epochs self.val_size = val_size self.learning_rate = learning_rate # Load dataset, tokenizer, model from pretrained model/vocabulary self.tokenizer = (DistilBertTokenizerFast.from_pretrained( BERTMODEL, do_lower_case=False)) if load_local_pretrained: self.model = (TFDistilBertForSequenceClassification. from_pretrained(MODEL_PATH)) else: config = DistilBertConfig.from_pretrained(BERTMODEL, num_labels=2) self.model = ( TFDistilBertForSequenceClassification.from_pretrained( BERTMODEL, config=config)) # Freeze distilbert layer self.model.distilbert.trainable = False
def __load(self): dbertConf = DistilBertConfig.from_pretrained(self.path + '/config.json') self.model = TFDistilBertForSequenceClassification.from_pretrained\ ( self.path + '/tf_model.h5', config=dbertConf, )
def loadNet(modelURL, numClasses, unfreezePretrain=False, fromHuggingFace=False): if fromHuggingFace == False: pretrainedNet = hub.KerasLayer(modelURL, input_shape=(IMG_SIZE, IMG_SIZE, CHANNELS)) pretrainedNet.trainable = unfreezePretrain # freezing the pretrained network else: # config = AutoConfig.from_pretrained(modelURL, config.num_labels = numClasses, config.seq_classif_dropout = 0) #distil # tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased') config = DistilBertConfig( num_labels=numClasses) # , seq_classif_dropout = 0.99 print(f'Number of Classes: {numClasses}') # config.num_labels = numClasses # config.seq_classif_dropout = 0 print(config) pretrainedNet = TFDistilBertForSequenceClassification.from_pretrained( modelURL, config=config) # TFBertForSequenceClassification pretrainedNet.layers[0].trainable = unfreezePretrain return pretrainedNet
def load_model(self, model_name: str = "bert_ner_test"): # TODO model loaded from mlflow # Load model and tokenizer. config = DistilBertConfig.from_pretrained(model_name) model = DistilBertForTokenClassification(config).from_pretrained( model_name) tokenizer = DistilBertTokenizerFast.from_pretrained(model_name) return model, config, tokenizer
def model_load(self, path: str): config = DistilBertConfig.from_pretrained(path + "/config.json") tokenizer = DistilBertTokenizer.from_pretrained( path, do_lower_case=self.do_lower_case) model = DistilBertForQuestionAnswering.from_pretrained(path, from_tf=False, config=config) return model, tokenizer
def classify_with_pre_trained(): # model = "neuralmind/bert-base-portuguese-cased" config = DistilBertConfig(num_labels=3) config.output_hidden_states = False transformer_model = TFDistilBertForSequenceClassification.from_pretrained( distil_bert, config=config)[0] input_ids = tf.keras.layers.Input(shape=(128, ), name="input_token", dtype="int32") input_masks_ids = tf.keras.layers.Input(shape=(128, ), name="masked_token", dtype="int32") X = transformer_model(input_ids, input_masks_ids) model = tf.keras.Model(inputs=[input_ids, input_masks_ids], outputs=X) return model
def __init__(self, dropout): super(DISTILBertModel, self).__init__() self.distilbert = DistilBertModel.from_pretrained( config.PATHS['distilbert'], config=DistilBertConfig()) self.fc = nn.Linear(768, 2) self.dropout = nn.Dropout(dropout)
def __init__(self, args, device, d_model=256, nhead=4, d_ff=1024, nlayers=2, dropout=0.5): super(Autoencoder, self).__init__() self.model_type = 'Transformer' self.d_model = d_model self.src_mask = None self.pos_encoder = PositionalEncoding(d_model, dropout) # encoder's position self.pos_decoder = PositionalEncoding(d_model, dropout) # decoder's position decoder_layers = TransformerDecoderLayer(d_model, nhead, d_ff, dropout) decoder_norm = nn.LayerNorm(d_model) self.transformer_decoder = TransformerDecoder(decoder_layers, nlayers, decoder_norm) # self.bert_encoder = BertModel.from_pretrained(args.PRETRAINED_MODEL_NAME, output_hidden_states=args.distill_2) if args.use_albert: self.bert_encoder = AlbertModel.from_pretrained( "clue/albert_chinese_tiny") self.bert_embed = self.bert_encoder.embeddings # self.tgt_embed = self.bert_embed d_vocab = self.bert_encoder.config.vocab_size + 1 self.tgt_embed = nn.Sequential( Embeddings(d_model, d_vocab), PositionalEncoding(d_model, dropout)) elif args.use_tiny_bert: self.bert_encoder = AutoModel.from_pretrained( "google/bert_uncased_L-2_H-256_A-4") self.bert_embed = self.bert_encoder.embeddings self.tgt_embed = self.bert_embed elif args.use_distil_bert: configuration = DistilBertConfig() self.bert_encoder = DistilBertModel(configuration) self.bert_embed = self.bert_encoder.embeddings self.tgt_embed = self.bert_embed # self.tgt_embed = self.bert.embeddings else: self.bert_encoder = BertModel.from_pretrained( args.PRETRAINED_MODEL_NAME, output_hidden_states=args.distill_2) self.bert_embed = self.bert_encoder.embeddings self.tgt_embed = self.bert_embed self.distill_2 = args.distill_2 self.gru = nn.GRU(d_model, d_model, 1) self.lr = nn.Linear(d_model, self.bert_encoder.config.vocab_size + 1) self.sigmoid = nn.Sigmoid() self.device = device self.init_weights()
def model_load(self, path): s3_model_url = 'https://distilbert-finetuned-model.s3.eu-west-2.amazonaws.com/pytorch_model.bin' path_to_model = download_model(s3_model_url, model_name="pytorch_model.bin") config = DistilBertConfig.from_pretrained(path + "/config.json") tokenizer = DistilBertTokenizer.from_pretrained(path, do_lower_case=self.do_lower_case) model = DistilBertForQuestionAnswering.from_pretrained(path_to_model, from_tf=False, config=config) return model, tokenizer
def model_fn(model_dir): config = DistilBertConfig.from_json_file('/opt/ml/model/code/config.json') model_path = '{}/{}'.format(model_dir, 'model.pth') model = DistilBertForSequenceClassification.from_pretrained(model_path, config=config) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model.to(device) return model
def model_fn(model_dir): config = DistilBertConfig.from_json_file("/opt/ml/model/code/config.json") model_path = "{}/{}".format(model_dir, "model.pth") model = DistilBertForSequenceClassification.from_pretrained(model_path, config=config) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) return model
def download_distilbert_base(): file = '../input/distilbert-base-uncased' config = DistilBertConfig.from_pretrained('distilbert-base-uncased') config.save_pretrained(file) model = DistilBertModel.from_pretrained('distilbert-base-uncased') model.save_pretrained(file) tkn = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') tkn.save_pretrained(file)
def __init__(self, tokenizer, device): super(VQAModel, self).__init__() self.tokenizer = tokenizer self.device = device self.image_embedding = torch.nn.Sequential( *(list(models.resnet152(pretrained=True).children())[:-1])) configuration = DistilBertConfig() self.question_embedding = DistilBertModel(configuration) self.linear1 = torch.nn.Linear(2816, 1024) self.linear2 = torch.nn.Linear(1024, 512) self.linear3 = torch.nn.Linear(512, 2)
def main(): # コマンドライン引数の取得(このファイル上部のドキュメントから自動生成) args = docopt(__doc__) pprint(args) # パラメータの取得 lr = float(args['--lr']) seq_len = int(args['--seq_len']) max_epoch = int(args['--max_epoch']) batch_size = int(args['--batch_size']) num_train = int(args['--num_train']) num_valid = int(args['--num_valid']) # モデルの選択 pretrained_weights = 'distilbert-base-uncased' tokenizer = DistilBertTokenizer.from_pretrained(pretrained_weights) config = DistilBertConfig(num_labels=4) model = DistilBertForSequenceClassification.from_pretrained( pretrained_weights, config=config) # 使用デバイスの取得 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model.to(device) # データの読み込みとデータセットの作成 encoder = TwinPhraseEncoder(tokenizer, seq_len) train_dataset = WordnetDataset(mode='train', num_data=num_train, transform=encoder) valid_dataset = WordnetDataset(mode='valid', num_data=num_valid, transform=encoder) train_loader = data.DataLoader(train_dataset, batch_size, shuffle=True) valid_loader = data.DataLoader(valid_dataset, batch_size, shuffle=True) # 最適化法の定義 optimizer = optim.Adam(model.parameters(), lr=lr) # 学習 for epoch in range(1, max_epoch + 1): print('=' * 27 + f' Epoch {epoch:0>2} ' + '=' * 27) # Training loss, accu = train_model(model, optimizer, train_loader, device) print( f'| Training | loss-avg : {loss:>8.6f} | accuracy : {accu:>8.3%} |' ) # Validation loss, accu = valid_model(model, optimizer, valid_loader, device) print( f'| Validation | loss-avg : {loss:>8.6f} | accuracy : {accu:>8.3%} |' ) # 保存 torch.save(model.state_dict(), f'../result/bert.pkl')
def test_TFDistilBertForQuestionAnswering(self): from transformers import DistilBertConfig, TFDistilBertForQuestionAnswering keras.backend.clear_session() # pretrained_weights = 'distilbert-base-uncased' tokenizer_file = 'distilbert_distilbert-base-uncased.pickle' tokenizer = self._get_tokenzier(tokenizer_file) text, inputs, inputs_onnx = self._prepare_inputs(tokenizer) config = DistilBertConfig() model = TFDistilBertForQuestionAnswering(config) predictions = model.predict(inputs) onnx_model = keras2onnx.convert_keras(model, model.name) self.assertTrue(run_onnx_runtime(onnx_model.graph.name, onnx_model, inputs_onnx, predictions, self.model_files))
def create_model(max_seq_len, classes): config = DistilBertConfig(dropout=0.2, attention_dropout=0.2) config.output_hidden_states = False tfm = TFDistilBertModel.from_pretrained('./MODEL/uncased/', config=config) input_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32', name="input_ids") bert_output = tfm(input_ids)[0] cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output) cls_out = keras.layers.Dropout(0.5)(cls_out) logits = keras.layers.Dense(units=768, activation="tanh")(cls_out) logits = keras.layers.Dropout(0.5)(logits) logits = keras.layers.Dense(units=512, activation="tanh")(cls_out) logits = keras.layers.Dropout(0.5)(logits) logits = keras.layers.Dense(units=256, activation="tanh")(logits) logits = keras.layers.Dropout(0.5)(logits) logits = keras.layers.Dense(units=len(classes), activation="softmax")(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) return model
def __init__( self, pretrainString='distilbert-base-uncased-finetuned-sst-2-english'): super().__init__() configuration = DistilBertConfig(dropout=0.25, num_labels=7) self.bert = DistilBertModel(configuration).from_pretrained( pretrainString) self.pre_classifier = nn.Linear(configuration.dim, configuration.dim) self.classifier = nn.Linear(configuration.dim, configuration.num_labels) self.dropout = nn.Dropout(configuration.seq_classif_dropout) self.to(device)
def build_model(args): if args.clf_model.lower() == "cnn": # easy for text tokenization tokenizer = DistilBertTokenizer.from_pretrained( args.model_name_or_path, do_lower_case=args.do_lower_case) model = CNN_Text(args) elif args.clf_model.lower() == "robert": print("name is {}".format(args.model_name_or_path)) tokenizer = RobertaTokenizer.from_pretrained( args.model_name_or_path, do_lower_case=args.do_lower_case) config = RobertaConfig.from_pretrained(args.model_name_or_path, num_labels=args.num_labels, finetuning_task=args.task_name) model = RobertaForSequenceClassification.from_pretrained( args.model_name_or_path, config=config) # freeze the weight for transformers if args.freeze: for n, p in model.named_parameters(): if "bert" in n: p.requires_grad = False elif args.clf_model.lower() == "bert": tokenizer = BertTokenizer.from_pretrained( args.model_name_or_path, do_lower_case=args.do_lower_case) config = BertConfig.from_pretrained(args.model_name_or_path, num_labels=args.num_labels, finetuning_task=args.task_name) model = BertForSequenceClassification.from_pretrained( args.model_name_or_path, config=config) # freeze the weight for transformers # if args.freeze: # for n, p in model.named_parameters(): # if "bert" in n: # p.requires_grad = False else: tokenizer = DistilBertTokenizer.from_pretrained( args.model_name_or_path, do_lower_case=args.do_lower_case) config = DistilBertConfig.from_pretrained( args.model_name_or_path, num_labels=args.num_labels, finetuning_task=args.task_name) model = DistilBertForSequenceClassification.from_pretrained( args.model_name_or_path, config=config) model.expand_class_head(args.multi_head) model = model.to(args.device) return tokenizer, model
def __init__(self, bert_type='bertbase'): super(Bert, self).__init__() self.bert_type = bert_type if bert_type == 'bertbase': configuration = BertConfig() self.model = BertModel(configuration) elif bert_type == 'distilbert': configuration = DistilBertConfig() self.model = DistilBertModel(configuration) elif bert_type == 'mobilebert': configuration = MobileBertConfig.from_pretrained('checkpoints/mobilebert') self.model = MobileBertModel(configuration)
def __init__(self, pretrained=True, **kwargs): super().__init__() hidden_dimension = 32 if pretrained: self.bert = DistilBertModel.from_pretrained( "distilbert-base-uncased") else: self.bert = DistilBertModel(DistilBertConfig()) self.tokenizer = DistilBertTokenizer.from_pretrained( "distilbert-base-uncased") self.pre_classifier = nn.Linear(self.bert.config.dim, hidden_dimension) self.classifier = nn.Linear(hidden_dimension, 1)
def get_config(self): return DistilBertConfig( vocab_size=self.vocab_size, dim=self.hidden_size, n_layers=self.num_hidden_layers, n_heads=self.num_attention_heads, hidden_dim=self.intermediate_size, hidden_act=self.hidden_act, dropout=self.hidden_dropout_prob, attention_dropout=self.attention_probs_dropout_prob, max_position_embeddings=self.max_position_embeddings, initializer_range=self.initializer_range, )
def __init__(self, cfg: DictConfig): super().__init__(cfg) config = DistilBertConfig() config.sinusoidal_pos_embds = True self.module = DistilBertModel.from_pretrained( 'distilbert-base-uncased', config=config) self.tokenizer = DistilBertTokenizer.from_pretrained( 'distilbert-base-uncased') # Add in our own positional encodings embedding_layer = PositionalBertEmbeddings(self.module.config) self.module.embeddings = embedding_layer if self.is_frozen: for param in self.module.parameters(): param.requires_grad = False layer = nn.TransformerEncoderLayer(self.hidden_size, self.num_heads) self.unit = nn.TransformerEncoder(layer, num_layers=self.num_layers)