def __init__(self, bert_config=BertConfig(), trainable_layers=3, max_seq_length=128, show_summary=False, patience=3, epochs=10, save_predictions=False, batch_size=32, DATA_COLUMN="text", TARGET_COLUMN="target", DATA2_COLUMN=None, lr=2e-05, session=None, dense_activation=None, loss='MSE', monitor_loss='val_mse', monitor_mode='min', METRICS=[ keras.metrics.MeanSquaredError(name="MSE"), keras.metrics.MeanAbsoluteError(name="MAE"), keras.metrics.MeanSquaredLogarithmicError(name="MSLE"), ]): self.bert_config = bert_config self.session = session self.name = f'{"OOC1" if not DATA2_COLUMN else "OOC2"}-b{batch_size}.e{epochs}.len{max_seq_length}.bert' self.tokenizer = BertTokenizer.from_pretrained( "bert-base-cased", do_lower_case=False, max_length=max_seq_length, pad_to_max_length=True) self.lr = lr self.batch_size = batch_size self.DATA_COLUMN = DATA_COLUMN self.DATA2_COLUMN = DATA2_COLUMN self.TARGET_COLUMN = TARGET_COLUMN self.trainable_layers = trainable_layers self.max_seq_length = max_seq_length self.show_summary = show_summary self.patience = patience self.save_predictions = save_predictions self.epochs = epochs self.METRICS = METRICS self.loss = loss self.monitor_loss = monitor_loss self.monitor_mode = monitor_mode self.dense_activation = dense_activation self.earlystop = tf.keras.callbacks.EarlyStopping( monitor=self.monitor_loss, patience=self.patience, verbose=1, restore_best_weights=True, mode=self.monitor_mode) self.BERT_parent = TFBertModel.from_pretrained( "bert-base-cased", output_attentions=True) #, config=self.bert_config) self.BERT_target = TFBertModel.from_pretrained("bert-base-cased", output_attentions=True)
def __init__(self, load_post_trained_bert, post_trained_bert_file): super(PostTrainedBert, self).__init__() if load_post_trained_bert: self.bert = TFBertModel.from_pretrained(post_trained_bert_file, from_pt=True) self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") else: self.bert = TFBertModel.from_pretrained("bert-base-uncased") self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
def direct_convert(): state_dict = torch.load("checkpoints/nq/single/bert-base-encoder.cp", map_location=lambda s, l: default_restore_location(s, "cpu")) state_dict = CheckpointState(**state_dict) question_prefix = "question_model." ctx_prefix = "ctx_model." question_encoder_state = { key[len(question_prefix):]: value for key, value in state_dict.model_dict.items() if key.startswith(question_prefix) } torch_question_encoder = BertModel.from_pretrained('pretrained/bert-base-uncased') torch_question_encoder.load_state_dict(question_encoder_state, strict=False) ctx_encoder_state = { key[len(ctx_prefix):]: value for key, value in state_dict.model_dict.items() if key.startswith(ctx_prefix) } torch_ctx_encoder = BertModel.from_pretrained('pretrained/bert-base-uncased') torch_ctx_encoder.load_state_dict(ctx_encoder_state, strict=False) question_encoder_weights = [] for k, v in question_encoder_state.items(): if k.endswith(".weight") and not k.startswith("embeddings."): v = v.t() question_encoder_weights.append(v.numpy()) question_encoder_weights[1], question_encoder_weights[2] = question_encoder_weights[2], question_encoder_weights[1] ctx_encoder_weights = [] for k, v in ctx_encoder_state.items(): if k.endswith(".weight") and not k.startswith("embeddings."): v = v.t() ctx_encoder_weights.append(v.numpy()) ctx_encoder_weights[1], ctx_encoder_weights[2] = ctx_encoder_weights[2], ctx_encoder_weights[1] tf_question_encoder = TFBertModel.from_pretrained('pretrained/bert-base-uncased') tf_ctx_encoder = TFBertModel.from_pretrained('pretrained/bert-base-uncased') tf_question_encoder.set_weights(question_encoder_weights) tf_ctx_encoder.set_weights(ctx_encoder_weights) assert check_compability(torch_question_encoder, tf_question_encoder) assert check_compability(torch_ctx_encoder, tf_ctx_encoder) tf_biencoder = BiEncoder(question_model=tf_question_encoder, ctx_model=tf_ctx_encoder) tf_ckpt = tf.train.Checkpoint(model=tf_biencoder) tf_manager = tf.train.CheckpointManager(tf_ckpt, "checkpoints/nq/single/", max_to_keep=3) tf_manager.save()
def dual_bert(): set_seed(33) opt = Adam(learning_rate=2e-5) id1 = Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) id2 = Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) mask1 = Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) mask2 = Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) atn1 = Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) atn2 = Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) config = BertConfig() config.output_hidden_states = False # Set to True to obtain hidden states bert_model1 = TFBertModel.from_pretrained('bert-base-uncased', config=config) bert_model2 = TFBertModel.from_pretrained('bert-base-uncased', config=config) embedding1 = bert_model1(id1, attention_mask=mask1, token_type_ids=atn1)[0] embedding2 = bert_model2(id2, attention_mask=mask2, token_type_ids=atn2)[0] x = Concatenate()([embedding1, embedding2]) x = keras.layers.Bidirectional( # 加上这个就变成了双向lstm keras.layers.LSTM( # 这个是单向lstm 64, # 权重初始化 kernel_initializer='he_normal', # 返回每个token的输出,如果设置为False 只出最后一个。 return_sequences=True))(x) #x = Lambda(lambda x: x[:, 0], name='CLS-token')(x)#降维 #x1 = GlobalAveragePooling1D()(embedding1) #x2 = GlobalAveragePooling1D()(embedding2) #x = Concatenate()([x1, x2]) x = Attention(128)(x) # 加入attention x = Dense(64, activation='relu')(x) x = Dropout(0.2)(x) #out = Dense(len(map_label), activation='softmax')(x) out = Dense(5, activation='softmax')(x) model = Model(inputs=[id1, mask1, atn1, id2, mask2, atn2], outputs=out) model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy']) #加个评测指标 return model
def get_transformer(bert_model_type, output_hidden_states=False): config = get_bert_config(bert_model_type, output_hidden_states) if bert_model_type in [ 'bert-base-uncased', 'bert-base-cased', 'bert-large-uncased', 'bert-large-uncased-whole-word-masking', 'bert-large-uncased-whole-word-masking-finetuned-squad' ]: return TFBertModel.from_pretrained(BERT_MODEL_FILE[bert_model_type], config=config) elif bert_model_type in [ 'prod-bert-base-uncased', 'tune_bert-base-uncased_nsp' ]: return TFBertModel.from_pretrained(BERT_MODEL_FILE[bert_model_type], config=config, from_pt=True) elif bert_model_type in [ 'roberta-base', 'roberta-large', 'roberta-large-mnli', 'distilroberta-base' ]: return TFRobertaModel.from_pretrained(BERT_MODEL_FILE[bert_model_type], config=config) elif bert_model_type in ['prod-roberta-base-cased']: return TFRobertaModel.from_pretrained(BERT_MODEL_FILE[bert_model_type], config=config, from_pt=True) elif bert_model_type in ['xlnet-base-cased']: return TFXLNetModel.from_pretrained(BERT_MODEL_FILE[bert_model_type], config=config) elif bert_model_type in [ 'albert-base-v1', 'albert-large-v1', 'albert-xlarge-v1', 'albert-xxlarge-v1' ]: return TFAlbertModel.from_pretrained(BERT_MODEL_FILE[bert_model_type], config=config) elif bert_model_type in ['gpt2', 'gpt2-medium']: return TFGPT2Model.from_pretrained(BERT_MODEL_FILE[bert_model_type], config=config) elif bert_model_type in ['transfo-xl']: return TFTransfoXLModel.from_pretrained( BERT_MODEL_FILE[bert_model_type], config=config) elif bert_model_type in [ 'distilbert-base-uncased', 'distilbert-base-uncased-distilled-squad' ]: return TFDistilBertModel.from_pretrained( BERT_MODEL_FILE[bert_model_type], config=config) else: raise ValueError( f'`bert_model_type` not understood: {bert_model_type}')
def create_bert_cnn_model(num_tokens: int, num_filters: int, filter_size: int, embedding_dim: int, nn_hidden_dim: int, dropout_prob: float): # define the encoder for bert model bert_encoder = TFBertModel.from_pretrained('bert-base-uncased') input_word_ids = tf.keras.Input(shape=(num_tokens, ), dtype=tf.int32, name="input_word_ids") bert_embedding = bert_encoder([input_word_ids]) cnn_input = tf.expand_dims(bert_embedding[0], -1) cnn_output = layers.Convolution2D(filters=num_filters, kernel_size=[filter_size, embedding_dim], activation='relu')(cnn_input) max_pooled_output = tf.nn.max_pool(cnn_output, ksize=[1, 13, 1, 1], strides=[1, 1, 1, 1], padding='VALID') max_pooled_output = tf.reshape(max_pooled_output, [-1, 200]) hidden_output = layers.Dense(nn_hidden_dim, activation='relu')(max_pooled_output) hidden_output = layers.Dropout(dropout_prob)(hidden_output) output = layers.Dense(1, activation='sigmoid')(hidden_output) model = tf.keras.Model(inputs=[input_word_ids], outputs=output) return model
def __init__(self, params): super().__init__() self.train_bert = params["train_bert"] self.bert = TFBertModel.from_pretrained(params["bert_version"]) if not self.train_bert: self.dense_modules = [ dict( dense=tf.keras.layers.Dense(units), normalization=tf.keras.layers.LayerNormalization(), activation=tf.nn.relu, ) for units in params["layers"] ] self.selection_module = dict( dense=tf.keras.layers.Dense(params["linear_units"]), normalization=tf.keras.layers.LayerNormalization(), activation=tf.nn.relu, ) self.select = Select(5, use_scale=True) self.flatten = tf.keras.layers.Flatten() self.final_module = dict( dense=tf.keras.layers.Dense(params["final_units"]), normalization=tf.keras.layers.LayerNormalization(), activation=tf.nn.relu, ) self.output_layer = tf.keras.layers.Dense(1, activation="sigmoid")
def simple_bert(): set_seed(33) opt = Adam(learning_rate=2e-5) id_ = Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) mask_ = Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) atn_ = Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) config = BertConfig() config.output_hidden_states = False # Set to True to obtain hidden states bert_model = TFBertModel.from_pretrained('bert-base-uncased', config=config) embedding = bert_model(id_, attention_mask=mask_, token_type_ids=atn_)[0] x = Bidirectional( # 加上这个就变成了双向lstm LSTM( # 这个是单向lstm 64, # 权重初始化 kernel_initializer='he_normal', # 返回每个token的输出,如果设置为False 只出最后一个。 return_sequences=True))(embedding) #x=Attention(128)(x) x = GlobalAveragePooling1D()(x) #x = Dropout(0.2)(x) #x = Dense(64, activation='relu')(embedding) out = Dense(len(map_label), activation='softmax')(x) model = Model(inputs=[id_, mask_, atn_], outputs=out) model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy']) return model
def get_model_tokenizer(model_path, do_lower_case, seed=42): if model_path.startswith('bert'): tokenizer = BertTokenizer.from_pretrained( model_path, do_lower_case=do_lower_case) model = TFBertModel.from_pretrained(model_path, output_hidden_states=True, output_attentions=False) elif model_path.startswith('roberta'): tokenizer = RobertaTokenizer.from_pretrained( model_path, do_lower_case=do_lower_case, add_prefix_space=True) model = TFRobertaModel.from_pretrained(model_path, output_hidden_states=True, output_attentions=False) elif model_path.startswith('jplu/tf-xlm-roberta'): tokenizer = XLMRobertaTokenizer.from_pretrained( model_path, do_lower_case=do_lower_case) model = TFXLMRobertaModel.from_pretrained( model_path, output_hidden_states=True, output_attentions=False) elif model_path.startswith('random-bert'): tokenizer = BertTokenizer.from_pretrained("bert-base-cased", do_lower_case=True) config = BertConfig(seed=seed, output_hidden_states=True, output_attentions=False) model = TFBertModel(config) else: raise ValueError( f"Unknown Transformer name: {model_path}. " f"Please select one of the supported models: {constants.SUPPORTED_MODELS}" ) return model, tokenizer
def bert_multiclass(): bert_encoder = TFBertModel.from_pretrained("bert-base-cased") for layer in bert_encoder.layers: layer.trainable = False input_ids = tf.keras.layers.Input(shape=(MAX_LENGTH, ), dtype=tf.int32, name="input_word_ids") attention_mask_ids = tf.keras.layers.Input(shape=(MAX_LENGTH, ), dtype=tf.int32, name="attention_mask_ids") token_type_ids = tf.keras.layers.Input(shape=(MAX_LENGTH, ), dtype=tf.int32, name="token_type_ids") embeddings = bert_encoder([input_ids, attention_mask_ids, token_type_ids])[0] out = tf.keras.layers.Dense(4, activation='softmax')(embeddings[:, 0, :]) model = tf.keras.models.Model( inputs=[input_ids, attention_mask_ids, token_type_ids], outputs=out) return model
def get_emb(inputs_list, model_name, max_length=512): if 't5' in model_name: tokenizer = T5Tokenizer.from_pretrained(TOKEN_DIR) model = T5Model.from_pretrained(MODEL_DIR) inputs = tokenizer.batch_encode_plus(inputs_list, max_length=max_length, pad_to_max_length=True, return_tensors="pt") outputs = model(input_ids=inputs['input_ids'], decoder_input_ids=inputs['input_ids']) last_hidden_states = torch.mean(outputs[0], dim=1) return last_hidden_states.tolist() elif 'bert' in model_name: tokenizer = BertTokenizer.from_pretrained( 'bert-base-multilingual-cased') model = TFBertModel.from_pretrained('bert-base-multilingual-cased') batch_encoding = tokenizer.batch_encode_plus( ["this is", "the second", "the thrid"], max_length=max_length, pad_to_max_length=True) outputs = model(tf.convert_to_tensor(batch_encoding['input_ids']) ) # shape: (batch,sequence length, hidden state) embeddings = tf.reduce_mean(outputs[0], 1) return embeddings.numpy().tolist()
def build_model(name): max_len = 259 bert_encoder = TFBertModel.from_pretrained(name) # input_word_ids = tf.keras.Input( # shape=(max_len,), dtype=tf.int32, name="input_word_ids") # input_mask = tf.keras.Input( # shape=(max_len,), dtype=tf.int32, name="input_mask") # input_type_ids = tf.keras.Input( # shape=(max_len,), dtype=tf.int32, name="input_type_ids") input_word_ids = tf.keras.Input(shape=(max_len, ), dtype=tf.int32, name="input_word_ids") input_mask = tf.keras.Input(shape=(max_len, ), dtype=tf.int32, name="input_mask") input_type_ids = tf.keras.Input(shape=(max_len, ), dtype=tf.int32, name="input_type_ids") embedding = bert_encoder([input_word_ids, input_mask, input_type_ids])[0] output = tf.keras.layers.Dense(3, activation='softmax')(embedding[:, 0, :]) model = tf.keras.Model( inputs=[input_word_ids, input_mask, input_type_ids], outputs=output) model.compile(tf.keras.optimizers.Adam(lr=1e-5), loss='sparse_categorical_crossentropy', metrics=['accuracy']) return model
def __init__(self, data_manager, logger): hidden_dim = classifier_config['hidden_dim'] classifier = classifier_config['classifier'] self.dataManager = data_manager self.seq_length = data_manager.max_sequence_length num_classes = data_manager.max_label_number self.embedding_dim = data_manager.embedding_dim vocab_size = data_manager.vocab_size self.logger = logger # 卷集核的个数 num_filters = classifier_config['num_filters'] self.checkpoints_dir = classifier_config['checkpoints_dir'] self.embedding_method = classifier_config['embedding_method'] if self.embedding_method == 'Bert': from transformers import TFBertModel self.bert_model = TFBertModel.from_pretrained('bert-base-multilingual-cased') logger.info('loading model parameter') if classifier == 'textcnn': from engines.models.textcnn import TextCNN self.model = TextCNN(self.seq_length, num_filters, num_classes, self.embedding_dim, vocab_size) elif classifier == 'textrcnn': from engines.models.textrcnn import TextRCNN self.model = TextRCNN(self.seq_length, num_classes, hidden_dim, self.embedding_dim, vocab_size) elif classifier == 'textrnn': from engines.models.textrnn import TextRNN self.model = TextRNN(self.seq_length, num_classes, hidden_dim, self.embedding_dim, vocab_size) else: raise Exception('config model is not exist') # 实例化Checkpoint,设置恢复对象为新建立的模型 checkpoint = tf.train.Checkpoint(model=self.model) # 从文件恢复模型参数 checkpoint.restore(tf.train.latest_checkpoint(self.checkpoints_dir)) logger.info('loading model successfully')
def get_model(): K.clear_session() bert_model = TFBertModel.from_pretrained(bert_path, from_pt=True, trainable=True) for l in bert_model.layers: l.trainable = True input_ids_texta = Input(shape=(None,), dtype='int32', name='input_ids_texta') input_token_type_ids_texta = Input(shape=(None,), dtype='int32', name='input_token_type_ids_texta') input_attention_mask_texta = Input(shape=(None,), dtype='int32', name='input_attention_mask_texta') input_ids_textb = Input(shape=(None,), dtype='int32', name='input_ids_textb') input_token_type_ids_textb = Input(shape=(None,), dtype='int32', name='input_token_type_ids_textb') input_attention_mask_textb = Input(shape=(None,), dtype='int32', name='input_attention_mask_textb') input_token_type_ids_textb = Input(shape=(None,), dtype='int32', name='input_token_type_ids_textb') input_bm25 = Input(shape=(1), dtype='float32', name='input_bm25') input_tf_cosine = Input(shape=(1), dtype='float32', name='input_tf_cosine') input_tfidf_cosine = Input(shape=(1), dtype='float32', name='input_tfidf_cosine') input_cat_texta = Input(shape=(1), dtype='float32', name='input_cat_texta') input_cat_textb = Input(shape=(1), dtype='float32', name='input_cat_textb') bert_output_texta = bert_model({'input_ids':input_ids_texta, 'token_type_ids':input_token_type_ids_texta, 'attention_mask':input_attention_mask_texta}, return_dict=False, training=True) projection_logits_texta = bert_output_texta[0] bert_cls_texta = Lambda(lambda x: x[:, 0])(projection_logits_texta) # 取出[CLS]对应的向量用来做分类 bert_output_textb = bert_model({'input_ids':input_ids_textb, 'token_type_ids':input_token_type_ids_textb, 'attention_mask':input_attention_mask_textb}, return_dict=False, training=True) projection_logits_textb = bert_output_textb[0] bert_cls_textb = Lambda(lambda x: x[:, 0])(projection_logits_textb) # 取出[CLS]对应的向量用来做分类 subtracted = Subtract()([bert_cls_texta, bert_cls_textb]) cos = Dot(axes=1, normalize=True)([bert_cls_texta, bert_cls_textb]) # dot=1按行点积,normalize=True输出余弦相似度 bert_cls = concatenate([bert_cls_texta, bert_cls_textb, subtracted, cos, input_bm25, input_tf_cosine, input_tfidf_cosine, input_cat_texta, input_cat_textb], axis=-1) dense_A_0 = Dense(256, activation='relu')(bert_cls) dropout_A_0 = Dropout(0.2)(dense_A_0) dense_A_1 = Dense(32, activation='relu')(dropout_A_0) dropout_A_1 = Dropout(0.2)(dense_A_1) output_A = Dense(1, activation='sigmoid', name='output_A')(dropout_A_1) dense_B_0 = Dense(256, activation='relu')(bert_cls) dropout_B_0 = Dropout(0.2)(dense_B_0) dense_B_1 = Dense(32, activation='relu')(dropout_B_0) dropout_B_1 = Dropout(0.2)(dense_B_1) output_B = Dense(1, activation='sigmoid', name='output_B')(dropout_B_1) model = Model([input_ids_texta, input_token_type_ids_texta, input_attention_mask_texta, input_ids_textb, input_token_type_ids_textb, input_attention_mask_textb, input_bm25, input_tf_cosine, input_tfidf_cosine, input_cat_texta, input_cat_textb], [output_A, output_B]) model.compile( # loss=my_binary_crossentropy, loss={ 'output_A':my_binary_crossentropy, 'output_B':my_binary_crossentropy, }, # loss='binary_crossentropy', # loss=binary_crossentropy, optimizer=Adam(1e-5), #用足够小的学习率 metrics=[my_binary_accuracy, my_f1_score] # metrics='accuracy' ) print(model.summary()) return model
def extract_bert_feature(df): tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased') model = TFBertModel.from_pretrained('bert-base-multilingual-cased') # input_ids = tf.constant(tokenizer.encode("Injecteren intramusculairProtocollen Voorbehouden, Risicovolle en Overige handelingen Injecteren algemeen 100 Injecteren intramusculair Omschrijving Het voorgeschreven medicijn wordt (zo nodig) opgelost en opgetrokken in een spuit. Vervolgens wordt het medicijn loodrecht in een spier gespoten. Opdracht tot voorbehouden of risicovolle handeling: Mag zelfstandig verricht worden door: Aandachtspunten - In deze geprotocolleerde werkinstructie wordt ervan uitgegaan dat het medicijn met een opzuignaald uit een flacon of ampul wordt opgetrokken en wordt toegediend met een injectienaald. - Geschikte injectieplaatsen zijn: de buiten/bovenkant van resp. de bovenarm (musculus deltoïdeus), het bovenbeen (musculus lateralis) en de bil (musculus glutaeus). - Indien de patiënt vaker een intramusculaire injectie krijgt, wissel dan iedere keer van injectieplaats. - Palpeer en controleer de injectieplaats op geschiktheid. - Het intramusculair inspuiten van een relatief grote hoeveelheid vloeistof (meer dan 5 ml) op één plaats kan pijnlijk zijn. Verdeel de totale hoeveelheid zo nodig over twee injectieplaatsen. - Het achteraf afdrukken en/of masseren van de injectieplaats dient achterwege te blijven. - Deze geprotocolleerde werkinstructie is niet van toepassing op het injecteren van cytostatica. - Het werken met een overzichtelijke medicijnlijst waarop per tijdstip wordt aangegeven welke medicijnen moeten worden toegediend reduceert het maken van fouten. - Van een verkeerd toegediend en/of een niet gegeven injectie dient melding te worden gemaakt volgens organisatieprocedure. Informeer arts. Complicaties tijdens de handeling Handelwijze Misselijkheid door te snel injecteren. Rustig, niet te snel injecteren. Tijdens het terugtrekken van de zuiger wordt bloed opgetrokken. Trek de naald eruit en druk de injectieplaats af met een gaasje. Neem een nieuwe spuit en naald en begin opnieuw. Kies een andere injectieplaats. Tijdens het injecteren raakt naald los van spuit. Afhankelijk van soort injectie de injectie overdoen. Tijdens het injecteren raakt naald subcutaan. Let op heftige lokale reacties. Verwijzingen - Achtergrondinformatie: injecteren. - Materiaalbeschrijvingen: injectienaalden; injectiespuiten. - Hygiënerichtlijnen: desinfecteren bij injecties, gebruik naaldenbeker. - Geprotocolleerde werkinstructie: gereedmaken injectiespuit (ampul), gereedmaken injectiespuit (flacon). Benodigdheden - flacon of ampul met het voorgeschreven medicijn - gaasjes - steriele spuit - steriele opzuignaald - steriele injectienaald - prullenbak - naaldenbeker Indien het medicijn in een flacon met rubber dop zit: - desinfectans, chloorhexidine alcohol 70% Indien het medicijn in de flacon nog moet worden opgelost: - ampul met oplosmiddel - gaasje (om de ampul open te breken) © Vilans 03-05-2009 Injecteren intramusculair: 1 (van 2) KICK onbewaakte kopieProtocollen Voorbehouden, Risicovolle en Overige handelingen Injecteren algemeen 101 © Vilans 03-05-2009 Injecteren intramusculair: 2 (van 2) KICK Werkwijze 1 Pas handhygiëne toe. 2 Zet de benodigdheden binnen handbereik. 3 Controleer het medicijn, de medicijnlijst en de gegevens van de cliënt. a Controleer het medicijn op de volgende aspecten: - vervaldatum - kleur en substantie - toedieningswijze b Vergelijk het medicijn met de medicijnlijst. - naam en geboortedatum cliënt - soort - dosering - toedieningstijdstip 4 Maak het medicijn klaar voor gebruik. Indien het medicijn in een ampul zit: a Breek de ampul open met een gaasje als bescherming. Indien het medicijn in een flacon met rubber dop zit: a Overgiet een gaasje met alcohol. b Desinfecteer het rubber met het alcoholgaasje en laat gedurende 1 minuut drogen. Indien het medicijn nog moet worden opgelost: a Breek de ampul oplosmiddel open met een gaasje als bescherming. b Trek de juiste hoeveelheid oplosmiddel op. c Spuit het oplosmiddel in de flacon met het medicijn; laat de naald + spuit in de flacon zitten. d Wacht tot het medicijn geheel is opgelost. Laat hierbij de zuiger van de spuit iets vieren. 5 Maak de spuit met het medicijn gereed. a Zuig de voorgeschreven hoeveelheid medicatie op in de spuit met behulp van opzuignaald. b Ontlucht de spuit. c Doe de opzuignaald in de naaldenbeker. d Plaats de injectienaald op de spuit. 6 Vraag de patiënt de injectieplaats te ontbloten en zich te ontspannen. 7 Neem de spuit in de injecterende hand en verwijder de naaldhuls. 8 Span de huid met de duim en wijsvinger van uw vrije hand. 9 Steek de naald met een snelle beweging loodrecht op het oppervlak in de spier. 10 Laat de huid los. 11 Trek de zuiger iets terug om u ervan te vergewissen dat er geen bloedvat is aangeprikt. 12 Spuit de vloeistof langzaam en regelmatig in. 13 Trek de naald uit de huid (houd gaasje gereed voor de opvang van een eventuele bloeddruppel; niet afdrukken of masseren!). 14 Doe de naald in de naaldenbeker. 15 Ruim de overige materialen op. 16 Noteer tijdstip, soort, concentratie, hoeveelheid, plaats, wijze van toediening en bevindingen. onbewaakte kopie", add_special_tokens=True))[None, :] # Batch size 1 text_list = df['pdf_content'].fillna("").to_list() max_length = 768 batch_encoding = tokenizer.batch_encode_plus( text_list, max_length=max_length, pad_to_max_length=True, ) features = [] for i in range(len(text_list)): inputs = {k: batch_encoding[k][i] for k in batch_encoding} features.append(inputs['input_ids']) features = tf.convert_to_tensor(features) outputs = model(features) # shape: (batch,sequence length, hidden state) last_hidden_states = tf.reduce_mean( outputs[0], 1 ) # or tf.reduce_mean(outputs[0],2) avergae at the hidden_state, to get sementic mearning df['bert_vector'] = last_hidden_states.numpy().tolist() return df
def build_model(model_path): # BERT encoder encoder = TFBertModel.from_pretrained(model_path) # QA Model input_ids = layers.Input(shape=(max_len,), dtype=tf.int32) token_type_ids = layers.Input(shape=(max_len,), dtype=tf.int32) attention_mask = layers.Input(shape=(max_len,), dtype=tf.int32) outputs = encoder( input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask ) # https://github.com/huggingface/transformers/issues/6029 _, _, hidden_states = outputs[0], outputs[1], outputs[2] sequence_output = layers.Concatenate(axis=-1)([hidden_states[-1], hidden_states[-2], hidden_states[-3]]) start_logits = layers.Dense(1, name="start_logit", use_bias=False)(sequence_output) start_logits = layers.Flatten()(start_logits) end_logits = layers.Dense(1, name="end_logit", use_bias=False)(sequence_output) end_logits = layers.Flatten()(end_logits) start_probs = layers.Activation(keras.activations.softmax, name="start")(start_logits) end_probs = layers.Activation(keras.activations.softmax, name="end")(end_logits) bert_model = keras.Model( inputs=[input_ids, token_type_ids, attention_mask], outputs=[start_probs, end_probs], name="BERTForQuestionAnswer" ) loss = keras.losses.SparseCategoricalCrossentropy(from_logits=False) optimizer = keras.optimizers.Adam(lr=5e-5) bert_model.compile(optimizer=optimizer, loss=[loss, loss], metrics=['acc', get_lr_metric(optimizer)]) return bert_model
def create_model(params): input_id = tf.keras.layers.Input((params["max_sequence_length"], ), dtype=tf.int32) input_mask = tf.keras.layers.Input((params["max_sequence_length"], ), dtype=tf.int32) input_atn = tf.keras.layers.Input((params["max_sequence_length"], ), dtype=tf.int32) config = BertConfig.from_pretrained(params["bert_pretrain_path"] + 'bert-base-chinese-config.json', output_hidden_states=False) bert_model = TFBertModel.from_pretrained(params["bert_pretrain_path"] + 'bert-base-chinese-tf_model.h5', config=config) sequence_output, pooler_output, hidden_states = bert_model( input_id, attention_mask=input_mask, token_type_ids=input_atn) # (bs,140,768)(bs,768) x = tf.keras.layers.GlobalAveragePooling1D()(sequence_output) # x = x + pooler_output x = tf.keras.layers.Dropout(0.15)(x) x = tf.keras.layers.Dense(3, activation='softmax')(x) model = tf.keras.models.Model(inputs=[input_id, input_mask, input_atn], outputs=x) return model
def _build_model_from_transformers(self): from transformers import TFBertModel model = TFBertModel.from_pretrained(os.path.join( PYTORCH_MODEL_PATH, "bert_uncased_L-6_H-768_A-12-pytorch"), from_pt=True) return model
def create_model_cls_output(self, trainable=True): ## BERT encoder encoder = TFBertModel.from_pretrained("bert-base-uncased") encoder.trainable = trainable input_ids = layers.Input(shape=(512, ), dtype=tf.int32) attention_mask = layers.Input(shape=(512, ), dtype=tf.int32) embed = encoder(input_ids, attention_mask=attention_mask) #averaged = tf.reduce_mean(sequence_output, axis=1) dropout = layers.Dropout(0.1)(embed[1]) out = layers.Dense( self.nums_category, kernel_initializer=modeling_tf_utils.get_initializer(0.02))( dropout) model = tf.keras.Model( inputs=[input_ids, attention_mask], outputs=[out], ) model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5, epsilon=1e-08), loss=tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True), metrics=["acc"]) model.summary() return model
def _load_model_instance(self, skip_model_load: bool) -> None: """Try loading the model instance. Args: skip_model_load: Skip loading the model instances to save time. This should be True only for pytests """ if skip_model_load: # This should be True only during pytests return from rasa.nlu.utils.hugging_face.registry import ( model_class_dict, model_tokenizer_dict, ) logger.debug(f"Loading Tokenizer and Model for {self.model_name}") self.tokenizer = model_tokenizer_dict[self.model_name].from_pretrained( "neuralmind/bert-base-portuguese-cased", cache_dir=self.cache_dir, from_pt=True) self.model = TFBertModel.from_pretrained( "neuralmind/bert-base-portuguese-cased", cache_dir=self.cache_dir, from_pt=True) # Use a universal pad token since all transformer architectures do not have a # consistent token. Instead of pad_token_id we use unk_token_id because # pad_token_id is not set for all architectures. We can't add a new token as # well since vocabulary resizing is not yet supported for TF classes. # Also, this does not hurt the model predictions since we use an attention mask # while feeding input. self.pad_token_id = self.tokenizer.unk_token_id
def create_model(): ## BERT encoder encoder = TFBertModel.from_pretrained("bert-base-uncased") ## QA Model input_ids = layers.Input(shape=(max_len,), dtype=tf.int32) token_type_ids = layers.Input(shape=(max_len,), dtype=tf.int32) attention_mask = layers.Input(shape=(max_len,), dtype=tf.int32) embedding = encoder( input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask )[0] start_logits = layers.Dense(1, name="start_logit", use_bias=False)(embedding) start_logits = layers.Flatten()(start_logits) end_logits = layers.Dense(1, name="end_logit", use_bias=False)(embedding) end_logits = layers.Flatten()(end_logits) start_probs = layers.Activation(keras.activations.softmax)(start_logits) end_probs = layers.Activation(keras.activations.softmax)(end_logits) model = keras.Model( inputs=[input_ids, token_type_ids, attention_mask], outputs=[start_probs, end_probs], ) loss = keras.losses.SparseCategoricalCrossentropy(from_logits=False) optimizer = keras.optimizers.Adam(lr=5e-5) model.compile(optimizer=optimizer, loss=[loss, loss]) return model
def __init__(self, dropout=0.1, units=300): super().__init__() self.bert = TFBertModel.from_pretrained('bert-base-uncased', trainable=True) self.drop = tf.keras.layers.Dropout(dropout) self.fc = tf.keras.layers.Dense(units, tf.nn.silu) self.out = tf.keras.layers.Dense(2)
def _bulk_predict(self, batch_size=256): """Predict bert embeddings.""" for i in range(0, len(self.docs), batch_size): batch_docs = self.docs[i:i + batch_size] tokenizer = BertTokenizer.from_pretrained(self.bert_model_name) model = TFBertModel.from_pretrained(self.bert_model_name) text_list = [doc['text'] for doc in batch_docs] batch_encoding = tokenizer.batch_encode_plus( text_list, max_length=self.max_length, pad_to_max_length=True, ) features = [] for i in range(len(text_list)): inputs = {k: batch_encoding[k][i] for k in batch_encoding} features.append(inputs['input_ids']) features = tf.convert_to_tensor(features) outputs = model( features) # shape: (batch,sequence length, hidden state) embeddings = tf.reduce_mean(outputs[0], 1) # embeddings = bc.encode([doc['text'] for doc in batch_docs]) for emb in embeddings: yield emb.numpy().tolist()
def bertDNN_model(self): max_len = int(self.max_len) ## BERT encoder encoder = TFBertModel.from_pretrained("bert-base-uncased") ## QA Model input_ids = layers.Input(shape=(max_len, ), dtype=tf.int32) token_type_ids = layers.Input(shape=(max_len, ), dtype=tf.int32) attention_mask = layers.Input(shape=(max_len, ), dtype=tf.int32) embedding = encoder(input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask)[0] Den0 = layers.Dense(units=256, activation='relu')(embedding) Den1 = layers.Dense(units=128, activation='relu')(Den0) Den2 = layers.Dense(units=64, activation='relu')(Den1) Drop1 = layers.Dropout(0.2)(Den2) label = layers.Dense(1, name="start_logit", activation='sigmoid', use_bias=False)(Drop1) model = keras.Model( inputs=[input_ids, token_type_ids, attention_mask], outputs=[label], ) optimizer = keras.optimizers.Adam(lr=5e-5) model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy']) return model
def __init__(self, num_layers: int, num_heads: int, feed_forward_dim: int, embedding_shape: Tuple[int, int], bert_embedding_name=None, embedding_trainable=True, embedding_weights=None, dropout_rate=0.1, max_seq_len=10000) -> None: super(Encoder, self).__init__() self.num_layers = num_layers vocab_size, vec_dim = embedding_shape self.embedding_size = vec_dim self.bert_embedding_name = bert_embedding_name if bert_embedding_name is not None: self.embedding = TFBertModel.from_pretrained(bert_embedding_name) self.embedding.trainable = embedding_trainable else: weights = None if embedding_weights is None else [ embedding_weights ] self.embedding = tf.keras.layers.Embedding( vocab_size, vec_dim, weights=weights, trainable=embedding_trainable) self.pos_encoding = positional_encoding(max_seq_len, self.embedding_size) self.enc_layers = [ EncoderLayer(vec_dim, num_heads, feed_forward_dim, dropout_rate) for _ in range(num_layers) ] self.dropout = tf.keras.layers.Dropout(dropout_rate)
def get_embedding_model(): # First time this is executes, the model is downloaded. Actually, the download process should be done # a side, from a controled version of the model, included, for instance in the docker build process. model = TFBertModel.from_pretrained("bert-base-uncased", output_hidden_states=True) tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") return model, tokenizer
def __init__(self, input_files, labels=None, batch_size=12, max_len=50, train_p=0.3, valid_p=0.1, test_p=0.05, size=data.full_dimension): # --------- Parameters here assert (size < data.full_dimension), "The size is too big" self.size = size self.max_len = max_len # these are Bert parameters self.tokenizer = AutoTokenizer.from_pretrained('bert-base-cased') self.trans_model = TFBertModel.from_pretrained('bert-base-cased') # --------------------------- ## load the dataset first x, y = self.load_data_from_files(input_files, labels) x, y = self.preprocess_xy(x, y) train, val, test = \ self.split_data(x,y,train_p,valid_p,test_p, batch_size) self.train = self.batch_and_prepare(train, is_training=True, batch_size=batch_size) self.valid = self.batch_and_prepare(val, is_training=False, batch_size=batch_size) self.test = self.batch_and_prepare(test, is_training=False, batch_size=batch_size)
def _embedding_from_bert(): log.info("Extracting pretrained word embeddings weights from BERT") vocab_of_BERT = TFBertModel.from_pretrained(config.pretrained_bert_model, trainable=False) embedding_matrix = vocab_of_BERT.get_weights()[0] log.info(f"Embedding matrix shape '{embedding_matrix.shape}'") return (embedding_matrix, vocab_of_BERT)
def predict_sentence(sentence): tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") trainenc = [] trainattn = [] enc = tokenizer.encode(sentence) trainenc.append(enc) trainenc = tf.keras.preprocessing.sequence.pad_sequences(trainenc, maxlen=128, dtype="long", value=0, truncating="post", padding="post") for i in trainenc: att = [int(x > 0) for x in i] trainattn.append(att) loader = tf.data.Dataset.from_tensor_slices((trainenc, trainattn)) model = roBERTaClassifier( TFBertModel.from_pretrained( "/home/stanleyzheng/Desktop/Learning/classifier/saved_models/savedmodelNLP" ), 1) prediction = model.predict(loader) print(prediction[1][0] - 0.01) return prediction[1] # Doom is 1, Animal Crossing is 0 # img = skimage.io.imread('/home/stanleyzheng/Desktop/ignitionhacks/test/animalcrossing.jpg') # predict_image(img)
def __init__( self, pretrained_model_name_or_path='bert-base-uncased', reduce_output='cls_pooled', trainable=True, num_tokens=None, **kwargs ): super(BERTEncoder, self).__init__() try: from transformers import TFBertModel except ModuleNotFoundError: logger.error( ' transformers is not installed. ' 'In order to install all text feature dependencies run ' 'pip install ludwig[text]' ) sys.exit(-1) self.transformer = TFBertModel.from_pretrained( pretrained_model_name_or_path ) self.reduce_output = reduce_output if not self.reduce_output == 'cls_pooled': self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output) self.transformer.trainable = trainable self.transformer.resize_token_embeddings(num_tokens)