def init_new_training(self): if self.params.use_hf_model_: if self.params.use_hf_electra_model_: self.pretrained_bert = TFElectraModel.from_pretrained( self.params.pretrained_hf_model_,cache_dir=self.params.hf_cache_dir_, return_dict=True ) else: self.pretrained_bert = TFBertModel.from_pretrained(self.params.pretrained_hf_model_,cache_dir=self.params.hf_cache_dir_, return_dict=True) elif self.params.pretrained_bert: logger.info(f"Attempt to load pre-trained bert from saved model: {self.params.pretrained_bert}") if os.path.basename(self.params.pretrained_bert.strip(os.sep)) == "encoder_only": saved_model_dir = self.params.pretrained_bert elif os.path.isdir(os.path.join(self.params.pretrained_bert, "export", "additional", "encoder_only")): saved_model_dir = os.path.join(self.params.pretrained_bert, "export", "additional", "encoder_only") elif os.path.isdir(os.path.join(self.params.pretrained_bert, "additional", "encoder_only")): saved_model_dir = os.path.join(self.params.pretrained_bert, "additional", "encoder_only") elif os.path.basename(self.params.pretrained_bert.strip(os.sep)) == "best" and os.path.isdir( os.path.join(self.params.pretrained_bert, "encoder_only") ): saved_model_dir = os.path.join(self.params.pretrained_bert, "encoder_only") else: saved_model_dir = os.path.join(self.params.pretrained_bert, "best", "encoder_only") self.pretrained_bert = keras.models.load_model(saved_model_dir) logger.info(f"Saved model loaded from: {self.params.pretrained_bert}") # else: # logger.info(f"Attemed to load pre-trained bert from saved model: {self._params.pretrained_bert}") # self.pretrained_bert = keras.models.load_model(self._params.pretrained_bert) pass
def __init__( self, pretrained_model_name_or_path='google/electra-small-discriminator', reduce_output='sum', trainable=True, num_tokens=None, **kwargs ): super(ELECTRAEncoder, self).__init__() try: from transformers import TFElectraModel except ModuleNotFoundError: logger.error( ' transformers is not installed. ' 'In order to install all text feature dependencies run ' 'pip install ludwig[text]' ) sys.exit(-1) self.transformer = TFElectraModel.from_pretrained( pretrained_model_name_or_path ) self.reduce_output = reduce_output self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output) self.transformer.trainable = trainable self.transformer.resize_token_embeddings(num_tokens)
def _test_Electra(self, size, large=False): from transformers import ElectraTokenizer, TFElectraModel tokenizer = ElectraTokenizer.from_pretrained(size) model = TFElectraModel.from_pretrained(size) input_dict = tokenizer("Hello, my dog is cute", return_tensors="tf") spec, input_dict = self.spec_and_pad(input_dict) outputs = ["last_hidden_state"] self.run_test(model, input_dict, input_signature=spec, outputs=outputs, large=large)
def build_transformer(self, training, transformer): if training: self.config.n_words = len(self.transform.form_vocab) self._init_config() if isinstance(transformer, str): if 'albert_chinese' in transformer: tokenizer = BertTokenizerFast.from_pretrained( transformer, add_special_tokens=False) transformer: TFPreTrainedModel = TFAutoModel.from_pretrained( transformer, name=transformer, from_pt=True) elif transformer.startswith('albert') and transformer.endswith( 'zh'): transformer, tokenizer, path = build_transformer(transformer) transformer.config = AlbertConfig.from_json_file( os.path.join(path, "albert_config.json")) tokenizer = BertTokenizer.from_pretrained( os.path.join(path, "vocab_chinese.txt"), add_special_tokens=False) elif 'chinese-roberta' in transformer: tokenizer = BertTokenizer.from_pretrained(transformer) transformer = TFBertModel.from_pretrained(transformer, name=transformer, from_pt=True) elif 'electra' in transformer: from transformers import TFElectraModel tokenizer = BertTokenizer.from_pretrained(transformer) transformer = TFElectraModel.from_pretrained(transformer, name=transformer, from_pt=True) else: tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained( transformer) try: transformer: TFPreTrainedModel = TFAutoModel.from_pretrained( transformer, name=transformer) except (TypeError, OSError): transformer: TFPreTrainedModel = TFAutoModel.from_pretrained( transformer, name=transformer, from_pt=True) elif transformer[0] == 'AutoModelWithLMHead': tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained( transformer[1]) transformer: TFAutoModelWithLMHead = TFAutoModelWithLMHead.from_pretrained( transformer[1]) else: raise ValueError(f'Unknown identifier {transformer}') self.transform.tokenizer = tokenizer if self.config.get('fp16', None) or self.config.get('use_amp', None): policy = tf.keras.mixed_precision.experimental.Policy( 'mixed_float16') tf.keras.mixed_precision.experimental.set_policy(policy) # tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True}) transformer.set_weights( [w.astype('float16') for w in transformer.get_weights()]) self.transform.transformer_config = transformer.config return transformer
def build_model(num_labels, use_dropout=True, dropout_rate=0.15): model = TFElectraModel.from_pretrained(model_name, cache_dir = cache_dir) input_ids = tf.keras.layers.Input(shape=(model_meta.max_seq_length,), name='input_ids', dtype='int32') attention_mask = tf.keras.layers.Input(shape=(model_meta.max_seq_length,), name='attention_mask', dtype='int32') token_type_ids = tf.keras.layers.Input(shape=(model_meta.max_seq_length,), name='token_type_ids', dtype='int32') model_inputs = [input_ids, attention_mask, token_type_ids] outputs = model(model_inputs) logits = outputs[0] if use_dropout and dropout_rate>0: logits = tf.keras.layers.Dropout(dropout_rate)(logits) model_op = tf.keras.layers.Dense(num_labels, activation = 'softmax', kernel_initializer='glorot_uniform')(logits) keras_model = tf.keras.Model(inputs= model_inputs, outputs = model_op)
def get_electra(): ids = keras.layers.Input(shape=(None, ), dtype=tf.int32, name='ids') att = keras.layers.Input(shape=(None, ), dtype=tf.int32, name='att') tok_type_ids = keras.layers.Input(shape=(None, ), dtype=tf.int32, name='tti') config = ElectraConfig.from_pretrained(Config.Electra.config) electra_model = TFElectraModel.from_pretrained(Config.Electra.model, config=config) x = electra_model(ids, attention_mask=att, token_type_ids=tok_type_ids) x1 = keras.layers.Dropout(0.15)(x[0]) x1 = keras.layers.Conv1D(768, 2, padding='same')(x1) x1 = keras.layers.LeakyReLU()(x1) x1 = keras.layers.LayerNormalization()(x1) x1 = keras.layers.Conv1D(64, 2, padding='same')(x1) x1 = keras.layers.LeakyReLU()(x1) x1 = keras.layers.LayerNormalization()(x1) x1 = keras.layers.Conv1D(32, 2, padding='same')(x1) x1 = keras.layers.Conv1D(1, 1)(x1) x1 = keras.layers.Flatten()(x1) x1 = keras.layers.Activation('softmax', dtype='float32', name='sts')(x1) x2 = keras.layers.Dropout(0.15)(x[0]) x2 = keras.layers.Conv1D(768, 2, padding='same')(x2) x2 = keras.layers.LeakyReLU()(x2) x2 = keras.layers.LayerNormalization()(x2) x2 = keras.layers.Conv1D(64, 2, padding='same')(x2) x2 = keras.layers.LeakyReLU()(x2) x2 = keras.layers.LayerNormalization()(x2) x2 = keras.layers.Conv1D(32, 2, padding='same')(x2) x2 = keras.layers.Conv1D(1, 1)(x2) x2 = keras.layers.Flatten()(x2) x2 = keras.layers.Activation('softmax', dtype='float32', name='ets')(x2) model = keras.models.Model(inputs=[ids, att, tok_type_ids], outputs=[x1, x2]) optimizer = keras.optimizers.Adam(learning_rate=6e-5) if Config.Train.use_amp: optimizer = keras.mixed_precision.experimental.LossScaleOptimizer( optimizer, 'dynamic') loss = keras.losses.CategoricalCrossentropy( label_smoothing=Config.Train.label_smoothing) model.compile(loss=loss, optimizer=optimizer) return model
def make_model(): transformer = TFElectraModel.from_pretrained(r'models\electra') input_ids = L.Input(shape=(512, ), dtype=tf.int32) x = transformer(input_ids)[0] x = x[:, 0, :] x = L.Dense(1, activation='sigmoid', name='sigmoid')(x) model = Model(inputs=input_ids, outputs=x) model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer=Adam(lr=1e-5)) last_layer = pickle.load(open('models\sigmoid.pickle', 'rb')) model.get_layer('sigmoid').set_weights(last_layer) return model
def __init__(self, extractor, config, *args, **kwargs): super(TFParade_Class, self).__init__(*args, **kwargs) self.extractor = extractor self.config = config if config["pretrained"] == "electra-base-msmarco": self.bert = TFElectraModel.from_pretrained( "Capreolus/electra-base-msmarco") elif config["pretrained"] == "bert-base-msmarco": self.bert = TFBertModel.from_pretrained( "Capreolus/bert-base-msmarco") elif config["pretrained"] == "bert-base-uncased": self.bert = TFBertModel.from_pretrained("bert-base-uncased") else: raise ValueError( f"unsupported model: {config['pretrained']}; need to ensure correct tokenizers will be used before arbitrary hgf models are supported" ) self.transformer_layer_1 = TFBertLayer(self.bert.config) self.transformer_layer_2 = TFBertLayer(self.bert.config) self.num_passages = extractor.config["numpassages"] self.maxseqlen = extractor.config["maxseqlen"] self.linear = tf.keras.layers.Dense( 1, input_shape=(self.bert.config.hidden_size, ), dtype=tf.float32) if config["aggregation"] == "maxp": self.aggregation = self.aggregate_using_maxp elif config["aggregation"] == "transformer": self.aggregation = self.aggregate_using_transformer input_embeddings = self.bert.get_input_embeddings() cls_token_id = tf.convert_to_tensor([101]) cls_token_id = tf.reshape(cls_token_id, [1, 1]) self.initial_cls_embedding = input_embeddings( input_ids=cls_token_id) self.initial_cls_embedding = tf.reshape( self.initial_cls_embedding, [1, self.bert.config.hidden_size]) initializer = tf.random_normal_initializer(stddev=0.02) full_position_embeddings = tf.Variable( initial_value=initializer(shape=[ self.num_passages + 1, self.bert.config.hidden_size ]), name="passage_position_embedding", ) self.full_position_embeddings = tf.expand_dims( full_position_embeddings, axis=0)
def load_model( sigmoid_dir, transformer_dir="transformer", architecture="distilbert", max_len=256 ): """ Special function to load a keras model that uses a transformer layer """ sigmoid_path = os.path.join(sigmoid_dir, "sigmoid.pickle") if architecture == "electra": transformer = TFElectraModel.from_pretrained(transformer_dir) else: transformer = TFAutoModel.from_pretrained(transformer_dir) model = build_model(transformer, max_len=max_len) sigmoid = pickle.load(open(sigmoid_path, "rb")) model.get_layer("sigmoid").set_weights(sigmoid) return model
def test_load_tf_model(): for model_name_or_path in ALL_MODEL_NAME_OR_PATH_LST: TFElectraModel.from_pretrained(model_name_or_path, from_pt=True)
embedding_weights.append(np.zeros(vocab_size)) for char, i in tk.word_index.items(): onehot = np.zeros(vocab_size) onehot[i - 1] = 1 embedding_weights.append(onehot) embedding_weights = np.array(embedding_weights) ### Ko-ELECTRA from transformers import TFElectraModel from transformers import ElectraTokenizer tokenizer = ElectraTokenizer.from_pretrained( "monologg/koelectra-base-v3-discriminator") model = TFElectraModel.from_pretrained( "monologg/koelectra-base-v3-discriminator", from_pt=True) SEQ_LEN = 128 BATCH_SIZE = 16 token_inputs = tf.keras.layers.Input((SEQ_LEN, ), dtype=tf.int32, name='input_word_ids') mask_inputs = tf.keras.layers.Input((SEQ_LEN, ), dtype=tf.int32, name='input_masks') segment_inputs = tf.keras.layers.Input((SEQ_LEN, ), dtype=tf.int32, name='input_segment') ELEC_outputs = model([token_inputs, mask_inputs, segment_inputs])