def test_TFRobertaModel(self): from transformers import RobertaConfig, TFRobertaModel keras.backend.clear_session() # pretrained_weights = 'roberta-base' tokenizer_file = 'roberta_roberta-base.pickle' tokenizer = self._get_tokenzier(tokenizer_file) text, inputs, inputs_onnx = self._prepare_inputs(tokenizer) config = RobertaConfig() model = TFRobertaModel(config) predictions = model.predict(inputs) onnx_model = keras2onnx.convert_keras(model, model.name) self.assertTrue(run_onnx_runtime(onnx_model.graph.name, onnx_model, inputs_onnx, predictions, self.model_files))
def get_transformer(bert_model_type, output_hidden_states=False): config = get_bert_config(bert_model_type, output_hidden_states) if bert_model_type in [ 'bert-base-uncased', 'bert-base-cased', 'bert-large-uncased', 'bert-large-uncased-whole-word-masking', 'bert-large-uncased-whole-word-masking-finetuned-squad' ]: return TFBertModel.from_pretrained(BERT_MODEL_FILE[bert_model_type], config=config) elif bert_model_type in [ 'prod-bert-base-uncased', 'tune_bert-base-uncased_nsp' ]: return TFBertModel.from_pretrained(BERT_MODEL_FILE[bert_model_type], config=config, from_pt=True) elif bert_model_type in [ 'roberta-base', 'roberta-large', 'roberta-large-mnli', 'distilroberta-base' ]: return TFRobertaModel.from_pretrained(BERT_MODEL_FILE[bert_model_type], config=config) elif bert_model_type in ['prod-roberta-base-cased']: return TFRobertaModel.from_pretrained(BERT_MODEL_FILE[bert_model_type], config=config, from_pt=True) elif bert_model_type in ['xlnet-base-cased']: return TFXLNetModel.from_pretrained(BERT_MODEL_FILE[bert_model_type], config=config) elif bert_model_type in [ 'albert-base-v1', 'albert-large-v1', 'albert-xlarge-v1', 'albert-xxlarge-v1' ]: return TFAlbertModel.from_pretrained(BERT_MODEL_FILE[bert_model_type], config=config) elif bert_model_type in ['gpt2', 'gpt2-medium']: return TFGPT2Model.from_pretrained(BERT_MODEL_FILE[bert_model_type], config=config) elif bert_model_type in ['transfo-xl']: return TFTransfoXLModel.from_pretrained( BERT_MODEL_FILE[bert_model_type], config=config) elif bert_model_type in [ 'distilbert-base-uncased', 'distilbert-base-uncased-distilled-squad' ]: return TFDistilBertModel.from_pretrained( BERT_MODEL_FILE[bert_model_type], config=config) else: raise ValueError( f'`bert_model_type` not understood: {bert_model_type}')
def __init__(self, dropout=0.1): super().__init__() self.roberta = TFRobertaModel.from_pretrained('bert-base-uncased', trainable=True) self.drop = tf.keras.layers.Dropout(dropout) self.fc = tf.keras.layers.Dense(300, tf.nn.silu) self.out = tf.keras.layers.Dense(2)
def get_roberta_model(model_name, max_len, log_directory, inputs, max_pool, dropout=None): if "xlm" in model_name: roberta_model = TFXLMRobertaModel.from_pretrained(model_name) else: roberta_model = TFRobertaModel.from_pretrained(model_name) layer_inputs = [] for input in inputs: layer_inputs.append(tf.keras.Input(shape=(max_len,), dtype=tf.int32, name=input)) roberta_layer = roberta_model(layer_inputs)[0] if not max_pool: roberta_layer = roberta_layer[:, 0, :] if dropout: roberta_layer = tf.keras.layers.Dropout(roberta_layer, name="dropout") output = tf.keras.layers.Dense(3, activation='softmax')(roberta_layer) else: hidden_layer = tf.keras.layers.GlobalAveragePooling1D(name="pooling")(roberta_layer) hidden_layer = tf.keras.layers.Dropout(0.25, name="dropout")(hidden_layer) hidden_layer = tf.keras.layers.Dense(32, activation='relu', name="dense_1")(hidden_layer) hidden_layer = tf.keras.layers.Dense(16, activation='relu', name="dense_2")(hidden_layer) output = tf.keras.layers.Dense(3, activation='softmax', name="final_dense")(hidden_layer) model = tf.keras.Model( inputs=layer_inputs, outputs=[output]) model.compile(tf.keras.optimizers.Adam(lr=1e-5), loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() tf.keras.utils.plot_model( model, to_file=log_directory + "/roberta_model.png", show_shapes=False, show_layer_names=True, rankdir='TB', expand_nested=False, dpi=200 ) return model
def build_model(self): ids = tf.keras.layers.Input((self.config.data.roberta.max_len, ), dtype=tf.int32) att = tf.keras.layers.Input((self.config.data.roberta.max_len, ), dtype=tf.int32) tok = tf.keras.layers.Input((self.config.data.roberta.max_len, ), dtype=tf.int32) # Network architecture config = RobertaConfig.from_pretrained(self.config.data.roberta.path + self.config.data.roberta.config) bert_model = TFRobertaModel.from_pretrained( self.config.data.roberta.path + self.config.data.roberta.roberta_weights, config=config) x = bert_model(ids, attention_mask=att, token_type_ids=tok) self.init_head(x[0]) self.add_dropout(0.1) self.add_lstm(64, True) self.add_dropout(0.1) self.add_dense(1) self.add_activation('softmax') self.model = tf.keras.models.Model( inputs=[ids, att, tok], outputs=[self.start_head, self.end_head]) self.model.compile(loss=self.config.model.loss, optimizer=self.config.model.optimizer)
def get_classification_roberta(): ids = keras.layers.Input(shape=(Config.Train.max_len, ), dtype=tf.int32, name='ids') att = keras.layers.Input(shape=(Config.Train.max_len, ), dtype=tf.int32, name='att') tok_type_ids = keras.layers.Input(shape=(Config.Train.max_len, ), dtype=tf.int32, name='tti') config = RobertaConfig.from_pretrained(Config.Roberta.config) roberta_model = TFRobertaModel.from_pretrained(Config.Roberta.model, config=config) x = roberta_model(ids, attention_mask=att, token_type_ids=tok_type_ids) x = keras.layers.Dropout(0.2)(x[0]) x = keras.layers.GlobalAveragePooling1D()(x) x = keras.layers.Dense(3, activation='softmax', name='sentiment')(x) model = keras.models.Model(inputs=[ids, att, tok_type_ids], outputs=x) lr_schedule = keras.experimental.CosineDecay(5e-5, 1000) optimizer = keras.optimizers.Adam(learning_rate=lr_schedule) loss = keras.losses.CategoricalCrossentropy( label_smoothing=Config.Train.label_smoothing) model.compile(loss=loss, optimizer=optimizer, metrics=['acc']) return model
def get_model_tokenizer(model_path, do_lower_case, seed=42): if model_path.startswith('bert'): tokenizer = BertTokenizer.from_pretrained( model_path, do_lower_case=do_lower_case) model = TFBertModel.from_pretrained(model_path, output_hidden_states=True, output_attentions=False) elif model_path.startswith('roberta'): tokenizer = RobertaTokenizer.from_pretrained( model_path, do_lower_case=do_lower_case, add_prefix_space=True) model = TFRobertaModel.from_pretrained(model_path, output_hidden_states=True, output_attentions=False) elif model_path.startswith('jplu/tf-xlm-roberta'): tokenizer = XLMRobertaTokenizer.from_pretrained( model_path, do_lower_case=do_lower_case) model = TFXLMRobertaModel.from_pretrained( model_path, output_hidden_states=True, output_attentions=False) elif model_path.startswith('random-bert'): tokenizer = BertTokenizer.from_pretrained("bert-base-cased", do_lower_case=True) config = BertConfig(seed=seed, output_hidden_states=True, output_attentions=False) model = TFBertModel(config) else: raise ValueError( f"Unknown Transformer name: {model_path}. " f"Please select one of the supported models: {constants.SUPPORTED_MODELS}" ) return model, tokenizer
def create_model_and_optimizer(): with strategy.scope(): transformer_layer = TFRobertaModel.from_pretrained(PRETRAINED_MODEL) model = build_model(transformer_layer) optimizer_transformer = Adam(learning_rate=LR_TRANSFORMER) optimizer_head = Adam(learning_rate=LR_HEAD) return model, optimizer_transformer, optimizer_head
def create_roberta_model(tokens_train, attn_mask_train, num_classes): config = RobertaConfig(vocab_size=50021, hidden_size=1024, num_hidden_layers=16, num_attention_heads=16, intermediate_size=2048, attention_probs_dropout_prob=0.3, hidden_dropout_prob=0.3) bert = TFRobertaModel(config) # dense1 = Dense(500, activation='relu') dense2 = Dense(368, activation='relu') dense3 = Dense(num_classes, activation='softmax') dropout = Dropout(0.3) tokens = Input(shape=(tokens_train.shape[1],), dtype=tf.int32) attn_mask = Input(shape=(attn_mask_train.shape[1],), dtype=tf.int32) pooled_output = bert(tokens, attn_mask).pooler_output med = dropout(dense2(pooled_output)) final = dense3(pooled_output) model = Model(inputs=[tokens, attn_mask], outputs=final) return model
def __init__( self, pretrained_model_name_or_path='roberta-base', reduce_output='cls_pooled', trainable=True, num_tokens=None, **kwargs ): super(RoBERTaEncoder, self).__init__() try: from transformers import TFRobertaModel except ModuleNotFoundError: logger.error( ' transformers is not installed. ' 'In order to install all text feature dependencies run ' 'pip install ludwig[text]' ) sys.exit(-1) self.transformer = TFRobertaModel.from_pretrained( pretrained_model_name_or_path ) self.reduce_output = reduce_output if not self.reduce_output == 'cls_pooled': self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output) self.transformer.trainable = trainable self.transformer.resize_token_embeddings(num_tokens)
def __init__(self, dropout_rate=0.2, units=300): super().__init__() self.roberta = TFRobertaModel.from_pretrained('roberta-base-uncased', trainable=True) self.drop1 = tf.keras.layers.Dropout(dropout_rate) self.drop2 = tf.keras.layers.Dropout(dropout_rate) self.fc = tf.keras.layers.Dense(units, tf.nn.swish) self.out = tf.keras.layers.Dense(3)
def _test_TFRoberta(self, size, large=False): from transformers import RobertaTokenizer, TFRobertaModel tokenizer = RobertaTokenizer.from_pretrained(size) model = TFRobertaModel.from_pretrained(size) input_dict = tokenizer("Hello, my dog is cute", return_tensors="tf") spec, input_dict = self.spec_and_pad(input_dict) outputs = ["last_hidden_state"] self.run_test(model, input_dict, input_signature=spec, outputs=outputs, large=large)
def dl_roberta(model_name, path): print("Start to download", model_name, "...") dump = path + model_name config = RobertaConfig.from_pretrained(model_name) model = TFRobertaModel.from_pretrained(model_name) tokenizer = RobertaTokenizer.from_pretrained(model_name) config.save_pretrained(dump) model.save_pretrained(dump) tokenizer.save_pretrained(dump) print("Download", model_name, "completed.")
def __init__(self, model_name, dir_path, num_class): super(RobertaClassifier, self).__init__() self.bert = TFRobertaModel.from_pretrained(model_name, from_pt=True) self.dropout = tf.keras.layers.Dropout( self.bert.config.hidden_dropout_prob) self.classifier = tf.keras.layers.Dense( num_class, kernel_initializer=tf.keras.initializers.TruncatedNormal( self.bert.config.initializer_range, seed=42), name="classifier")
def test_TFRobertaModel(self): from transformers import RobertaTokenizer, TFRobertaModel pretrained_weights = 'roberta-base' tokenizer = RobertaTokenizer.from_pretrained(pretrained_weights) text, inputs, inputs_onnx = self._prepare_inputs(tokenizer) model = TFRobertaModel.from_pretrained(pretrained_weights) predictions = model.predict(inputs) onnx_model = keras2onnx.convert_keras(model, model.name) self.assertTrue( run_onnx_runtime(onnx_model.graph.name, onnx_model, inputs_onnx, predictions, self.model_files))
def build_roberta( self, model_name: str = "distilroberta-base", model_latent_dim: int = 768, max_sentence_length: int = 25, ): self._vae.roberta_shape = (max_sentence_length, model_latent_dim) roberta = TFRobertaModel.from_pretrained(model_name) # Se supone que esto evita pedos (https://github.com/huggingface/transformers/issues/1350#issuecomment-537625496) roberta.roberta.call = tf.function(roberta.roberta.call) self._vae.roberta = roberta return self
def __init__(self, intent_size, slot_size, lr=1e-4, dropout_rate=0.2, units=300): super().__init__() self.roberta = TFRobertaModel.from_pretrained('roberta-base-uncased', trainable=True) self.inp_dropout = Dropout(dropout_rate) self.intent_dropout = Dropout(dropout_rate) self.fc_intent = Dense(units, activation='relu') self.trans_params = self.add_weight(shape=(slot_size, slot_size)) self.out_linear_intent = Dense(intent_size) self.out_linear_slot = Dense(slot_size) self.optimizer = Adam(lr) self.slots_accuracy = tf.keras.metrics.Accuracy() self.intent_accuracy = tf.keras.metrics.Accuracy() self.decay_lr = tf.optimizers.schedules.ExponentialDecay(lr, 1000, 0.95) self.logger = logging.getLogger('tensorflow') self.logger.setLevel(logging.INFO)
def get_twin_net(input_dim): left_input = tf.keras.Input(input_dim, dtype='int64') right_input = tf.keras.Input(input_dim, dtype='int64') #bert_model = TFRobertaModel.from_pretrained(bert_model_path, from_pt=True, config=config) bert_model = TFRobertaModel.from_pretrained("microsoft/codebert-base") encoded_l = bert_model(left_input)[0][:, 0, :] encoded_r = bert_model(right_input)[0][:, 0, :] ## Commented out lines below use average of sequence vectors, instead of the aggregated CLS. #av_encoded_l = tf.keras.layers.Lambda(lambda x: K.mean(x, axis=1))(encoded_l) #av_encoded_r = tf.keras.layers.Lambda(lambda x: K.mean(x, axis=1))(encoded_r) L1_layer = tf.keras.layers.Lambda( lambda tensors: K.abs(tensors[0] - tensors[1])) L1_distance = L1_layer([encoded_l, encoded_r]) prediction = tf.keras.layers.Dense(1, activation='sigmoid')(L1_distance) twin_net = tf.keras.models.Model(inputs=[left_input, right_input], outputs=prediction) return twin_net
def build_model(): ids = J.Input((max_word,), dtype=tf.int32) att = J.Input((max_word,), dtype=tf.int32) tok =J.Input((max_word,), dtype=tf.int32) padding = tf.cast(tf.equal(ids, pad_num), tf.int32) lens = max_word - tf.reduce_sum(padding, -1) max_len = tf.reduce_max(lens) ids_ = ids[:, :max_len] att_ = att[:, :max_len] tok_ = tok[:, :max_len] config = RobertaConfig.from_pretrained(f'{path}datasets_597869_1074900_config-roberta-base.json') bert_model = TFRobertaModel.from_pretrained(f'{path}pretrained-roberta-base.h5',config=config) x = bert_model(ids_,attention_mask=att_,token_type_ids=tok_) #start and end position x1 = J.Dropout(0.1)(x[0]) x1 = J.Conv1D(768, 2,padding='causal')(x1)#dilated conv x1 = J.LeakyReLU()(x1) x1 = J.Dense(1)(x1) x1 = J.Flatten()(x1) x1 = J.Activation('softmax')(x1) x2 = J.Dropout(0.1)(x[0]) x2 = J.Conv1D(768, 2,padding='causal')(x2) x2 = J.LeakyReLU()(x2) x2 = J.Dense(1)(x2) x2 = J.Flatten()(x2) x2 = J.Activation('softmax')(x2) model = tf.keras.models.Model(inputs=[ids, att, tok], outputs=[x1,x2]) optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5) model.compile(loss=loss_fn, optimizer=optimizer) #We specify the training configuration (optimizer, loss x1_padded = tf.pad(x1, [[0, 0], [0, max_word - max_len]], constant_values=0.)#trim the size for prediction x2_padded = tf.pad(x2, [[0, 0], [0,max_word - max_len]], constant_values=0.) padded_model = tf.keras.models.Model(inputs=[ids, att, tok], outputs=[x1_padded,x2_padded]) return model, padded_model
def __init__(self, MODELPATH, MODEL=None): self.special_token_set = { 'roberta': (['<s>', '</s>'], 'be'), 'bert': (['[CLS]', '[SEP]'], 'be'), 'xlnet': (['<sep>', '<cls>'], 'e') } self.tokenizer = None self.model = None self.modeltype = None self.add_prefix_space = None if MODEL: MODEL = MODEL else: MODEL = MODELPATH.split('/')[-1] print(MODEL, MODELPATH) if MODEL.startswith('roberta'): self.modeltype = 'roberta' self.tokenizer = RobertaTokenizer.from_pretrained( MODELPATH, add_special_tokens=False) self.model = TFRobertaModel.from_pretrained(MODELPATH, output_attentions=True) self.add_prefix_space = True if MODEL.startswith('bert'): self.modeltype = 'bert' self.tokenizer = BertTokenizer.from_pretrained( MODELPATH, add_special_tokens=False) self.model = TFBertModel.from_pretrained(MODELPATH, output_attentions=True) self.add_prefix_space = False if MODEL.startswith('xlnet'): self.modeltype = 'xlnet' self.tokenizer = XLNetTokenizer.from_pretrained( MODELPATH, add_special_tokens=False) self.model = TFXLNetModel.from_pretrained(MODELPATH, output_attentions=True) self.add_prefix_space = False
def __init__(self): '''initialising the class and loading the BERT model from HuggingFace library and giving max embeddings to get for each columns.''' # load models self.tokenizer = RobertaTokenizer.from_pretrained('roberta-large-mnli') self.bert_model = TFRobertaModel.from_pretrained('roberta-large-mnli') # parameters for getting embedding self.max_token_dict = { 'asp_cat_emb': 16, 'asp_term_emb': 24, 'review_emb': 50 } self.src_column_dict = { 'asp_cat_emb': 'aspect_category', 'asp_term_emb': 'aspect_term', 'review_emb': 'review' } # loading variable encoder self.encoder = LabelEncoder() # model self.model = self.create_model()
DATA_FILE = '../type-data.json' ## LOAD TOKENIZER #with open('tokenizers/twin_nc_tokenizer.pickle', 'rb') as handle: #with open('tokenizers/twin_names_tokenizer.pickle', 'rb') as handle: # lang_tokenizer = pickle.load(handle) ## LOAD SAVED MODEL #model = load_model('models/twin__nc_TOP__PROG_model.h5')#twin__names_TOP__500000_PROG_model.h5')# arg_model = load_model( 'bert_twin_data/models/twin_bert_arg_200_84349_model.h5') ret_model = load_model( 'bert_twin_data/models/twin_bert_ret_200_99167_model.h5') tokenizer = RobertaTokenizer.from_pretrained("microsoft/codebert-base") bert_model = TFRobertaModel.from_pretrained("microsoft/codebert-base") bert_cache = {} vector_cache = {} names_cache = {} types_cache = {} state = "open" running_list_of_vecs = [] max_seq_length = 510 # in1 and in2 are lists of strings to be run through twin model # i.e., in1[0] compared with in2[0], in1[1] compared with in2[1]... def run_twin_model(in1, in2): in1 = tf.keras.preprocessing.sequence.pad_sequences(
output_attentions=False, output_hidden_states=False) break except: pass last_hidden_states = outputs[0].numpy() last_hidden_states = np.array( tf.math.reduce_mean(last_hidden_states, axis=1)) del outputs del inp res.append(last_hidden_states) return tf.concat(res, 0, name='concat') inp_filename = os.path.join(os.getcwd(), 'data', "input", "data.txt") out_filename = os.path.join(os.getcwd(), 'data', "output", "data.txt") dataset = read_ds_from_file(inp_filename) data_content, data_names, data_topic = retrieve_data(dataset) inputs = strings2tokenized(data_content) RoBERTa = TFRobertaModel.from_pretrained('roberta-large') outputs = test(inputs, RoBERTa, chunk_size=1) _, data_names, _ = retrieve_data(dataset) embedded_dataset = reset_data(dataset, embedded2strings(outputs), data_names) write_ds_to_file(out_filename, embedded_dataset)
def build_model_1(self, verbose=False): """initialization the model""" if self.model_type in TrainModelConfigV2.BERT_LIST: config = BertConfig.from_pretrained( "{}{}/config.json".format(PATH_TRANS_INPUT, self.model_type), num_labels=BertBaseUnCaseV2.N_CLASS) bert_model = TFBertModel.from_pretrained("{}{}/tf_model.h5".format( PATH_TRANS_INPUT, self.model_type), config=config) bert_model.trainable = False input_ids_layer = Input(shape=(BertBaseUnCaseV2.MAXLEN, ), dtype=np.int32, name='input_ids') input_mask_layer = Input(shape=(BertBaseUnCaseV2.MAXLEN, ), dtype=np.int32, name='attention_mask') input_token_type_layer = Input(shape=(BertBaseUnCaseV2.MAXLEN, ), dtype=np.int32, name='token_type_ids') input_layer_list = [ input_ids_layer, input_mask_layer, input_token_type_layer ] bert_layer = bert_model(input_layer_list)[0] elif self.model_type in TrainModelConfigV2.ROBERTA_LIST: config = RobertaConfig.from_pretrained( "{}{}/config.json".format(PATH_TRANS_INPUT, self.model_type), num_labels=BertBaseUnCaseV2.N_CLASS) bert_model = TFRobertaModel.from_pretrained( "{}{}/tf_model.h5".format(PATH_TRANS_INPUT, self.model_type), config=config) bert_model.trainable = False input_ids_layer = Input(shape=(BertBaseUnCaseV2.MAXLEN, ), dtype=np.int32, name='input_ids') input_mask_layer = Input(shape=(BertBaseUnCaseV2.MAXLEN, ), dtype=np.int32, name='attention_mask') input_layer_list = [input_ids_layer, input_mask_layer] bert_layer = bert_model(input_layer_list)[0] if self.version == "v1": flat_layer = Flatten()(bert_layer) out = Dropout(0.2)(flat_layer) elif self.version == "v2": out = LSTM(BertBaseUnCaseV2.hidden_size, dropout=0.2)(bert_layer) elif self.version == "v3": flat_layer = Flatten()(bert_layer) dense_layer = Dense(BertBaseUnCaseV2.hidden_size, activation='relu')(flat_layer) out = Dropout(0.2)(dense_layer) elif self.version == "v4": bi_layer = Bidirectional( LSTM(BertBaseUnCaseV2.hidden_size, dropout=0.2, return_sequences=True))(bert_layer) bi_layer = Bidirectional(LSTM( BertBaseUnCaseV2.hidden_size))(bi_layer) dropout_layer = Dropout(0.2)(bi_layer) out = Dense(256, activation='relu')(dropout_layer) if BertBaseUnCaseV2.VER == 'v5': dense_output = Dense(BertBaseUnCaseV2.N_CLASS, activation='sigmoid')(out) else: dense_output = Dense(BertBaseUnCaseV2.N_CLASS, activation='softmax')(out) model = Model(inputs=input_layer_list, outputs=dense_output) # compile and fit if BertBaseUnCaseV2.VER == 'v5': optimizer = optimizers.Adam(learning_rate=BertBaseUnCaseV2.lr) loss = losses.SparseCategoricalCrossentropy(from_logits=True) metric = metrics.SparseCategoricalAccuracy('accuracy') else: optimizer = optimizers.Adam(learning_rate=BertBaseUnCaseV2.lr) loss = losses.SparseCategoricalCrossentropy(from_logits=True) metric = metrics.SparseCategoricalAccuracy('accuracy') model.compile(optimizer=optimizer, loss=loss, metrics=[metric]) if verbose: model.summary() return model
def __init__(self, num_classes: int, pretrained_roberta_name: str): super(KerasTextClassifier, self).__init__() self.transformer = TFRobertaModel.from_pretrained( pretrained_roberta_name) self.final_layer = layers.Dense(num_classes) self.softmax = layers.Softmax()
def train(args): # 构建词表对象 vocab = Vocab(args.vocab_file, 50000, args.train_data_path) # 取出词和id的字典 args.vocab = vocab # 读取预训练好的embeddings embs = load_pkl('E:/CodeSleepEatRepeat/data/58tech/data/word2vec.txt') # 构建mlm的训练数据 batches = batcher(args, embs) # load pretrained model if args.pre_trained_model: config = RobertaConfig.from_pretrained(args.pre_trained_model) model_roberta = TFRobertaModel.from_pretrained(args.pre_trained_model, config=config) else: # huggingface transformers 模型配置 config = RobertaConfig() config.num_hidden_layers = args.num_hidden_layers # 12 config.hidden_size = args.hidden_size # 128 config.intermediate_size = args.hidden_size * 4 config.num_attention_heads = args.num_attention_heads # 8 config.vocab_size = args.vocab.word_size() model_roberta = TFRobertaModel(config) model = Model_Roberta(args, model_roberta) # model.summary() optimizer = tf.keras.optimizers.Nadam() loss_func = tf.keras.losses.SparseCategoricalCrossentropy() train_loss = tf.keras.metrics.Mean(name='train_loss') train_metric = tf.keras.metrics.SparseCategoricalAccuracy( name='train_accuracy') # checkpoint_dir = args.checkpoints_dir # ckpt = tf.train.Checkpoint(model=model) # ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_dir, max_to_keep=3) if args.checkpoints_dir: print("Creating the checkpoint manager") checkpoint_dir = args.checkpoints_dir ckpt = tf.train.Checkpoint(model=model) ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_dir, max_to_keep=5) if ckpt_manager.latest_checkpoint: # ckpt.restore('./checkpoints/ckpt-53') ckpt.restore(ckpt_manager.latest_checkpoint) print("Restored from {}".format(ckpt_manager.latest_checkpoint)) else: print("Initializing from scratch.") count = 0 best_loss = 20 for epoch in tf.range(1, args.epochs + 1): for batch in batches: # inputs, inputs_ids, attention_masks, labels = batch[0], batch[1], batch[2], batch[3] gradients, loss, predictions, labels = train_step( model, batch, loss_func, args) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss.update_state(loss) train_metric.update_state(labels, predictions) logs = 'Epoch={},Loss:{},Accuracy:{}' # print(predictions) # print('-'*20) # print(masks_labels) # print('*'*20) # print(tf.reduce_mean(loss)) # print('='*20) # label = tf.argmax(predictions[0]) # print(label) if count % 100 == 0 and count != 0: tf.print( tf.strings.format( logs, (epoch, train_loss.result(), train_metric.result()))) tf.print("") if count % 1000 == 0 and train_loss.result() < best_loss: best_loss = train_loss.result() ckpt_save_path = ckpt_manager.save() print('*' * 20) print('Saving checkpoint for epoch {} at {} ,best loss {}'. format(epoch, ckpt_save_path, best_loss)) print('*' * 20) count += 1 train_loss.reset_states() train_metric.reset_states() model.encoder.save_pretrained('./pretrained-roberta/')
def get_encoder_decoder_model(self, config, decoder_config): encoder_model = TFRobertaModel(config, name="encoder") decoder_model = TFRobertaForCausalLM(decoder_config, name="decoder") return encoder_model, decoder_model
def get_model(self): return TFRobertaModel.from_pretrained("roberta-base")
vecstr = '' for x_i in x: x_i_str = '%.4f' % (x_i) vecstr += x_i_str + ' ' return vecstr[0:-1] if __name__ == "__main__": if len(sys.argv) < 3: print('usage: python saveptvecs.py <context file> <outvec file>') sys.exit(0) print("here") bertTokenizer = RobertaTokenizer.from_pretrained("roberta-base") bertModel = TFRobertaModel.from_pretrained("roberta-base") vocabfile = sys.argv[1] outfile = sys.argv[2] nlp_features = pipeline('feature-extraction', model=bertModel, tokenizer=bertTokenizer) with open(vocabfile) as f: words = f.read().splitlines() f = open(outfile, "w") f.write(str(len(words)) + ' 768\n') for w in words:
from sklearn.metrics import precision_recall_fscore_support, classification_report from sutime import SUTime import json from sklearn.utils import class_weight from imblearn.over_sampling import RandomOverSampler import pandas as pd # GLOBAL VARIABLES POS = True NE = True MAX_CLAUSE_LENGTH = 70 TOKENIZER = RobertaTokenizer.from_pretrained("roberta-base") MODEL = TFRobertaModel.from_pretrained('roberta-base') # Spacy and corenlp stuff nlp = spacy.load("en_core_web_sm") ## METRIC FUNCTIONS ##################################################### def recall_m(y_true, y_pred): true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) recall = true_positives / (possible_positives + K.epsilon()) return recall def precision_m(y_true, y_pred): true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))