Exemple #1
0
def get_transformer(bert_model_type, output_hidden_states=False):
    config = get_bert_config(bert_model_type, output_hidden_states)
    if bert_model_type in [
            'bert-base-uncased', 'bert-base-cased', 'bert-large-uncased',
            'bert-large-uncased-whole-word-masking',
            'bert-large-uncased-whole-word-masking-finetuned-squad'
    ]:
        return TFBertModel.from_pretrained(BERT_MODEL_FILE[bert_model_type],
                                           config=config)
    elif bert_model_type in [
            'prod-bert-base-uncased', 'tune_bert-base-uncased_nsp'
    ]:
        return TFBertModel.from_pretrained(BERT_MODEL_FILE[bert_model_type],
                                           config=config,
                                           from_pt=True)
    elif bert_model_type in [
            'roberta-base', 'roberta-large', 'roberta-large-mnli',
            'distilroberta-base'
    ]:
        return TFRobertaModel.from_pretrained(BERT_MODEL_FILE[bert_model_type],
                                              config=config)
    elif bert_model_type in ['prod-roberta-base-cased']:
        return TFRobertaModel.from_pretrained(BERT_MODEL_FILE[bert_model_type],
                                              config=config,
                                              from_pt=True)
    elif bert_model_type in ['xlnet-base-cased']:
        return TFXLNetModel.from_pretrained(BERT_MODEL_FILE[bert_model_type],
                                            config=config)
    elif bert_model_type in [
            'albert-base-v1', 'albert-large-v1', 'albert-xlarge-v1',
            'albert-xxlarge-v1'
    ]:
        return TFAlbertModel.from_pretrained(BERT_MODEL_FILE[bert_model_type],
                                             config=config)
    elif bert_model_type in ['gpt2', 'gpt2-medium']:
        return TFGPT2Model.from_pretrained(BERT_MODEL_FILE[bert_model_type],
                                           config=config)
    elif bert_model_type in ['transfo-xl']:
        return TFTransfoXLModel.from_pretrained(
            BERT_MODEL_FILE[bert_model_type], config=config)
    elif bert_model_type in [
            'distilbert-base-uncased',
            'distilbert-base-uncased-distilled-squad'
    ]:
        return TFDistilBertModel.from_pretrained(
            BERT_MODEL_FILE[bert_model_type], config=config)
    else:
        raise ValueError(
            f'`bert_model_type` not understood: {bert_model_type}')
Exemple #2
0
 def __init__(self, dropout=0.1):
     super().__init__()
     self.roberta = TFRobertaModel.from_pretrained('bert-base-uncased',
                                                   trainable=True)
     self.drop = tf.keras.layers.Dropout(dropout)
     self.fc = tf.keras.layers.Dense(300, tf.nn.silu)
     self.out = tf.keras.layers.Dense(2)
Exemple #3
0
def get_roberta_model(model_name, max_len, log_directory, inputs, max_pool, dropout=None):
    if "xlm" in model_name:
        roberta_model = TFXLMRobertaModel.from_pretrained(model_name)
    else:
        roberta_model = TFRobertaModel.from_pretrained(model_name)
    layer_inputs = []

    for input in inputs:
        layer_inputs.append(tf.keras.Input(shape=(max_len,), dtype=tf.int32, name=input))

    roberta_layer = roberta_model(layer_inputs)[0]
    if not max_pool:
        roberta_layer = roberta_layer[:, 0, :]
        if dropout:
            roberta_layer = tf.keras.layers.Dropout(roberta_layer, name="dropout")
        output = tf.keras.layers.Dense(3, activation='softmax')(roberta_layer)
    else:
        hidden_layer = tf.keras.layers.GlobalAveragePooling1D(name="pooling")(roberta_layer)
        hidden_layer = tf.keras.layers.Dropout(0.25, name="dropout")(hidden_layer)
        hidden_layer = tf.keras.layers.Dense(32, activation='relu', name="dense_1")(hidden_layer)
        hidden_layer = tf.keras.layers.Dense(16, activation='relu', name="dense_2")(hidden_layer)
        output = tf.keras.layers.Dense(3, activation='softmax', name="final_dense")(hidden_layer)

    model = tf.keras.Model(
        inputs=layer_inputs,
        outputs=[output])
    model.compile(tf.keras.optimizers.Adam(lr=1e-5), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.summary()

    tf.keras.utils.plot_model(
        model, to_file=log_directory + "/roberta_model.png", show_shapes=False, show_layer_names=True,
        rankdir='TB', expand_nested=False, dpi=200
    )

    return model
Exemple #4
0
    def __init__(
            self,
            pretrained_model_name_or_path='roberta-base',
            reduce_output='cls_pooled',
            trainable=True,
            num_tokens=None,
            **kwargs
    ):
        super(RoBERTaEncoder, self).__init__()
        try:
            from transformers import TFRobertaModel
        except ModuleNotFoundError:
            logger.error(
                ' transformers is not installed. '
                'In order to install all text feature dependencies run '
                'pip install ludwig[text]'
            )
            sys.exit(-1)

        self.transformer = TFRobertaModel.from_pretrained(
            pretrained_model_name_or_path
        )
        self.reduce_output = reduce_output
        if not self.reduce_output == 'cls_pooled':
            self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output)
        self.transformer.trainable = trainable
        self.transformer.resize_token_embeddings(num_tokens)
def create_model_and_optimizer():
    with strategy.scope():
        transformer_layer = TFRobertaModel.from_pretrained(PRETRAINED_MODEL)
        model = build_model(transformer_layer)
        optimizer_transformer = Adam(learning_rate=LR_TRANSFORMER)
        optimizer_head = Adam(learning_rate=LR_HEAD)
    return model, optimizer_transformer, optimizer_head
Exemple #6
0
def get_classification_roberta():
    ids = keras.layers.Input(shape=(Config.Train.max_len, ),
                             dtype=tf.int32,
                             name='ids')
    att = keras.layers.Input(shape=(Config.Train.max_len, ),
                             dtype=tf.int32,
                             name='att')
    tok_type_ids = keras.layers.Input(shape=(Config.Train.max_len, ),
                                      dtype=tf.int32,
                                      name='tti')

    config = RobertaConfig.from_pretrained(Config.Roberta.config)
    roberta_model = TFRobertaModel.from_pretrained(Config.Roberta.model,
                                                   config=config)

    x = roberta_model(ids, attention_mask=att, token_type_ids=tok_type_ids)

    x = keras.layers.Dropout(0.2)(x[0])
    x = keras.layers.GlobalAveragePooling1D()(x)
    x = keras.layers.Dense(3, activation='softmax', name='sentiment')(x)

    model = keras.models.Model(inputs=[ids, att, tok_type_ids], outputs=x)
    lr_schedule = keras.experimental.CosineDecay(5e-5, 1000)
    optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)
    loss = keras.losses.CategoricalCrossentropy(
        label_smoothing=Config.Train.label_smoothing)
    model.compile(loss=loss, optimizer=optimizer, metrics=['acc'])

    return model
Exemple #7
0
    def get_model_tokenizer(model_path, do_lower_case, seed=42):
        if model_path.startswith('bert'):
            tokenizer = BertTokenizer.from_pretrained(
                model_path, do_lower_case=do_lower_case)
            model = TFBertModel.from_pretrained(model_path,
                                                output_hidden_states=True,
                                                output_attentions=False)
        elif model_path.startswith('roberta'):
            tokenizer = RobertaTokenizer.from_pretrained(
                model_path, do_lower_case=do_lower_case, add_prefix_space=True)
            model = TFRobertaModel.from_pretrained(model_path,
                                                   output_hidden_states=True,
                                                   output_attentions=False)
        elif model_path.startswith('jplu/tf-xlm-roberta'):
            tokenizer = XLMRobertaTokenizer.from_pretrained(
                model_path, do_lower_case=do_lower_case)
            model = TFXLMRobertaModel.from_pretrained(
                model_path, output_hidden_states=True, output_attentions=False)
        elif model_path.startswith('random-bert'):
            tokenizer = BertTokenizer.from_pretrained("bert-base-cased",
                                                      do_lower_case=True)
            config = BertConfig(seed=seed,
                                output_hidden_states=True,
                                output_attentions=False)
            model = TFBertModel(config)
        else:
            raise ValueError(
                f"Unknown Transformer name: {model_path}. "
                f"Please select one of the supported models: {constants.SUPPORTED_MODELS}"
            )

        return model, tokenizer
    def build_model(self):
        ids = tf.keras.layers.Input((self.config.data.roberta.max_len, ),
                                    dtype=tf.int32)
        att = tf.keras.layers.Input((self.config.data.roberta.max_len, ),
                                    dtype=tf.int32)
        tok = tf.keras.layers.Input((self.config.data.roberta.max_len, ),
                                    dtype=tf.int32)

        # Network architecture
        config = RobertaConfig.from_pretrained(self.config.data.roberta.path +
                                               self.config.data.roberta.config)
        bert_model = TFRobertaModel.from_pretrained(
            self.config.data.roberta.path +
            self.config.data.roberta.roberta_weights,
            config=config)
        x = bert_model(ids, attention_mask=att, token_type_ids=tok)

        self.init_head(x[0])
        self.add_dropout(0.1)
        self.add_lstm(64, True)
        self.add_dropout(0.1)
        self.add_dense(1)
        self.add_activation('softmax')
        self.model = tf.keras.models.Model(
            inputs=[ids, att, tok], outputs=[self.start_head, self.end_head])
        self.model.compile(loss=self.config.model.loss,
                           optimizer=self.config.model.optimizer)
Exemple #9
0
 def __init__(self, dropout_rate=0.2, units=300):
     super().__init__()
     self.roberta = TFRobertaModel.from_pretrained('roberta-base-uncased',
                                                   trainable=True)
     self.drop1 = tf.keras.layers.Dropout(dropout_rate)
     self.drop2 = tf.keras.layers.Dropout(dropout_rate)
     self.fc = tf.keras.layers.Dense(units, tf.nn.swish)
     self.out = tf.keras.layers.Dense(3)
 def _test_TFRoberta(self, size, large=False):
     from transformers import RobertaTokenizer, TFRobertaModel
     tokenizer = RobertaTokenizer.from_pretrained(size)
     model = TFRobertaModel.from_pretrained(size)
     input_dict = tokenizer("Hello, my dog is cute", return_tensors="tf")
     spec, input_dict = self.spec_and_pad(input_dict)
     outputs = ["last_hidden_state"]
     self.run_test(model, input_dict, input_signature=spec, outputs=outputs, large=large)
Exemple #11
0
 def dl_roberta(model_name, path):
     print("Start to download", model_name, "...")
     dump = path + model_name
     config = RobertaConfig.from_pretrained(model_name)
     model = TFRobertaModel.from_pretrained(model_name)
     tokenizer = RobertaTokenizer.from_pretrained(model_name)
     config.save_pretrained(dump)
     model.save_pretrained(dump)
     tokenizer.save_pretrained(dump)
     print("Download", model_name, "completed.")
 def test_TFRobertaModel(self):
     from transformers import RobertaTokenizer, TFRobertaModel
     pretrained_weights = 'roberta-base'
     tokenizer = RobertaTokenizer.from_pretrained(pretrained_weights)
     text, inputs, inputs_onnx = self._prepare_inputs(tokenizer)
     model = TFRobertaModel.from_pretrained(pretrained_weights)
     predictions = model.predict(inputs)
     onnx_model = keras2onnx.convert_keras(model, model.name)
     self.assertTrue(
         run_onnx_runtime(onnx_model.graph.name, onnx_model, inputs_onnx,
                          predictions, self.model_files))
Exemple #13
0
    def __init__(self, model_name, dir_path, num_class):
        super(RobertaClassifier, self).__init__()

        self.bert = TFRobertaModel.from_pretrained(model_name, from_pt=True)
        self.dropout = tf.keras.layers.Dropout(
            self.bert.config.hidden_dropout_prob)
        self.classifier = tf.keras.layers.Dense(
            num_class,
            kernel_initializer=tf.keras.initializers.TruncatedNormal(
                self.bert.config.initializer_range, seed=42),
            name="classifier")
Exemple #14
0
 def build_roberta(
     self,
     model_name: str = "distilroberta-base",
     model_latent_dim: int = 768,
     max_sentence_length: int = 25,
 ):
     self._vae.roberta_shape = (max_sentence_length, model_latent_dim)
     roberta = TFRobertaModel.from_pretrained(model_name)
     # Se supone que esto evita pedos (https://github.com/huggingface/transformers/issues/1350#issuecomment-537625496)
     roberta.roberta.call = tf.function(roberta.roberta.call)
     self._vae.roberta = roberta
     return self
Exemple #15
0
 def __init__(self, intent_size, slot_size, lr=1e-4, dropout_rate=0.2, units=300):
     super().__init__()
     self.roberta = TFRobertaModel.from_pretrained('roberta-base-uncased',
                                                   trainable=True)
     self.inp_dropout = Dropout(dropout_rate)
     self.intent_dropout = Dropout(dropout_rate)
     self.fc_intent = Dense(units, activation='relu')
     self.trans_params = self.add_weight(shape=(slot_size, slot_size))
     self.out_linear_intent = Dense(intent_size)
     self.out_linear_slot = Dense(slot_size)
     self.optimizer = Adam(lr)
     self.slots_accuracy = tf.keras.metrics.Accuracy()
     self.intent_accuracy = tf.keras.metrics.Accuracy()
     self.decay_lr = tf.optimizers.schedules.ExponentialDecay(lr, 1000, 0.95)
     self.logger = logging.getLogger('tensorflow')
     self.logger.setLevel(logging.INFO)
def get_twin_net(input_dim):
    left_input = tf.keras.Input(input_dim, dtype='int64')
    right_input = tf.keras.Input(input_dim, dtype='int64')
    #bert_model = TFRobertaModel.from_pretrained(bert_model_path, from_pt=True, config=config)
    bert_model = TFRobertaModel.from_pretrained("microsoft/codebert-base")
    encoded_l = bert_model(left_input)[0][:, 0, :]
    encoded_r = bert_model(right_input)[0][:, 0, :]
    ## Commented out lines below use average of sequence vectors, instead of the aggregated CLS.
    #av_encoded_l = tf.keras.layers.Lambda(lambda x: K.mean(x, axis=1))(encoded_l)
    #av_encoded_r = tf.keras.layers.Lambda(lambda x: K.mean(x, axis=1))(encoded_r)
    L1_layer = tf.keras.layers.Lambda(
        lambda tensors: K.abs(tensors[0] - tensors[1]))
    L1_distance = L1_layer([encoded_l, encoded_r])
    prediction = tf.keras.layers.Dense(1, activation='sigmoid')(L1_distance)
    twin_net = tf.keras.models.Model(inputs=[left_input, right_input],
                                     outputs=prediction)
    return twin_net
Exemple #17
0
def build_model():
    ids = J.Input((max_word,), dtype=tf.int32)
    att = J.Input((max_word,), dtype=tf.int32)
    tok =J.Input((max_word,), dtype=tf.int32)
    padding = tf.cast(tf.equal(ids, pad_num), tf.int32)

    lens = max_word - tf.reduce_sum(padding, -1)
    max_len = tf.reduce_max(lens)
    ids_ = ids[:, :max_len]
    att_ = att[:, :max_len]
    tok_ = tok[:, :max_len]

    config = RobertaConfig.from_pretrained(f'{path}datasets_597869_1074900_config-roberta-base.json')
    bert_model = TFRobertaModel.from_pretrained(f'{path}pretrained-roberta-base.h5',config=config)
    x = bert_model(ids_,attention_mask=att_,token_type_ids=tok_)
    #start and end position
    x1 = J.Dropout(0.1)(x[0])
    x1 = J.Conv1D(768, 2,padding='causal')(x1)#dilated conv
    x1 = J.LeakyReLU()(x1)
    x1 = J.Dense(1)(x1)
    x1 = J.Flatten()(x1)
    x1 = J.Activation('softmax')(x1)
    
    x2 = J.Dropout(0.1)(x[0]) 
    x2 = J.Conv1D(768, 2,padding='causal')(x2)
    x2 = J.LeakyReLU()(x2)
    x2 = J.Dense(1)(x2)
    x2 = J.Flatten()(x2)
    x2 = J.Activation('softmax')(x2)

    model = tf.keras.models.Model(inputs=[ids, att, tok], outputs=[x1,x2])
    optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5) 
    model.compile(loss=loss_fn, optimizer=optimizer) #We specify the training configuration (optimizer, loss
    x1_padded = tf.pad(x1, [[0, 0], [0, max_word - max_len]], constant_values=0.)#trim the size for prediction
    x2_padded = tf.pad(x2, [[0, 0], [0,max_word - max_len]], constant_values=0.)
    
    padded_model = tf.keras.models.Model(inputs=[ids, att, tok], outputs=[x1_padded,x2_padded])
    return model, padded_model
Exemple #18
0
 def __init__(self, MODELPATH, MODEL=None):
     self.special_token_set = {
         'roberta': (['<s>', '</s>'], 'be'),
         'bert': (['[CLS]', '[SEP]'], 'be'),
         'xlnet': (['<sep>', '<cls>'], 'e')
     }
     self.tokenizer = None
     self.model = None
     self.modeltype = None
     self.add_prefix_space = None
     if MODEL:
         MODEL = MODEL
     else:
         MODEL = MODELPATH.split('/')[-1]
     print(MODEL, MODELPATH)
     if MODEL.startswith('roberta'):
         self.modeltype = 'roberta'
         self.tokenizer = RobertaTokenizer.from_pretrained(
             MODELPATH, add_special_tokens=False)
         self.model = TFRobertaModel.from_pretrained(MODELPATH,
                                                     output_attentions=True)
         self.add_prefix_space = True
     if MODEL.startswith('bert'):
         self.modeltype = 'bert'
         self.tokenizer = BertTokenizer.from_pretrained(
             MODELPATH, add_special_tokens=False)
         self.model = TFBertModel.from_pretrained(MODELPATH,
                                                  output_attentions=True)
         self.add_prefix_space = False
     if MODEL.startswith('xlnet'):
         self.modeltype = 'xlnet'
         self.tokenizer = XLNetTokenizer.from_pretrained(
             MODELPATH, add_special_tokens=False)
         self.model = TFXLNetModel.from_pretrained(MODELPATH,
                                                   output_attentions=True)
         self.add_prefix_space = False
    def __init__(self):
        '''initialising the class and loading the BERT model from HuggingFace library 
           and giving max embeddings to get for each columns.'''
        # load models
        self.tokenizer = RobertaTokenizer.from_pretrained('roberta-large-mnli')
        self.bert_model = TFRobertaModel.from_pretrained('roberta-large-mnli')

        # parameters for getting embedding
        self.max_token_dict = {
            'asp_cat_emb': 16,
            'asp_term_emb': 24,
            'review_emb': 50
        }
        self.src_column_dict = {
            'asp_cat_emb': 'aspect_category',
            'asp_term_emb': 'aspect_term',
            'review_emb': 'review'
        }

        # loading variable encoder
        self.encoder = LabelEncoder()

        # model
        self.model = self.create_model()
                                     output_attentions=False,
                                     output_hidden_states=False)
                break
            except:
                pass

        last_hidden_states = outputs[0].numpy()
        last_hidden_states = np.array(
            tf.math.reduce_mean(last_hidden_states, axis=1))
        del outputs
        del inp
        res.append(last_hidden_states)

    return tf.concat(res, 0, name='concat')


inp_filename = os.path.join(os.getcwd(), 'data', "input", "data.txt")
out_filename = os.path.join(os.getcwd(), 'data', "output", "data.txt")

dataset = read_ds_from_file(inp_filename)
data_content, data_names, data_topic = retrieve_data(dataset)

inputs = strings2tokenized(data_content)
RoBERTa = TFRobertaModel.from_pretrained('roberta-large')

outputs = test(inputs, RoBERTa, chunk_size=1)

_, data_names, _ = retrieve_data(dataset)
embedded_dataset = reset_data(dataset, embedded2strings(outputs), data_names)

write_ds_to_file(out_filename, embedded_dataset)
Exemple #21
0
DATA_FILE = '../type-data.json'

## LOAD TOKENIZER
#with open('tokenizers/twin_nc_tokenizer.pickle', 'rb') as handle:
#with open('tokenizers/twin_names_tokenizer.pickle', 'rb') as handle:
#    lang_tokenizer = pickle.load(handle)

## LOAD SAVED MODEL
#model = load_model('models/twin__nc_TOP__PROG_model.h5')#twin__names_TOP__500000_PROG_model.h5')#
arg_model = load_model(
    'bert_twin_data/models/twin_bert_arg_200_84349_model.h5')
ret_model = load_model(
    'bert_twin_data/models/twin_bert_ret_200_99167_model.h5')

tokenizer = RobertaTokenizer.from_pretrained("microsoft/codebert-base")
bert_model = TFRobertaModel.from_pretrained("microsoft/codebert-base")

bert_cache = {}
vector_cache = {}
names_cache = {}
types_cache = {}

state = "open"
running_list_of_vecs = []
max_seq_length = 510


# in1 and in2 are lists of strings to be run through twin model
# i.e., in1[0] compared with in2[0], in1[1] compared with in2[1]...
def run_twin_model(in1, in2):
    in1 = tf.keras.preprocessing.sequence.pad_sequences(
Exemple #22
0
def train(args):
    # 构建词表对象
    vocab = Vocab(args.vocab_file, 50000, args.train_data_path)

    # 取出词和id的字典
    args.vocab = vocab

    # 读取预训练好的embeddings
    embs = load_pkl('E:/CodeSleepEatRepeat/data/58tech/data/word2vec.txt')

    # 构建mlm的训练数据
    batches = batcher(args, embs)

    # load pretrained model
    if args.pre_trained_model:
        config = RobertaConfig.from_pretrained(args.pre_trained_model)
        model_roberta = TFRobertaModel.from_pretrained(args.pre_trained_model,
                                                       config=config)
    else:
        # huggingface transformers 模型配置
        config = RobertaConfig()
        config.num_hidden_layers = args.num_hidden_layers  # 12
        config.hidden_size = args.hidden_size  # 128
        config.intermediate_size = args.hidden_size * 4
        config.num_attention_heads = args.num_attention_heads  # 8
        config.vocab_size = args.vocab.word_size()

        model_roberta = TFRobertaModel(config)

    model = Model_Roberta(args, model_roberta)
    # model.summary()

    optimizer = tf.keras.optimizers.Nadam()
    loss_func = tf.keras.losses.SparseCategoricalCrossentropy()

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_metric = tf.keras.metrics.SparseCategoricalAccuracy(
        name='train_accuracy')

    # checkpoint_dir = args.checkpoints_dir
    # ckpt = tf.train.Checkpoint(model=model)
    # ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_dir, max_to_keep=3)

    if args.checkpoints_dir:
        print("Creating the checkpoint manager")
        checkpoint_dir = args.checkpoints_dir
        ckpt = tf.train.Checkpoint(model=model)
        ckpt_manager = tf.train.CheckpointManager(ckpt,
                                                  checkpoint_dir,
                                                  max_to_keep=5)

        if ckpt_manager.latest_checkpoint:
            # ckpt.restore('./checkpoints/ckpt-53')
            ckpt.restore(ckpt_manager.latest_checkpoint)
            print("Restored from {}".format(ckpt_manager.latest_checkpoint))
        else:
            print("Initializing from scratch.")

    count = 0
    best_loss = 20
    for epoch in tf.range(1, args.epochs + 1):

        for batch in batches:
            # inputs, inputs_ids, attention_masks, labels = batch[0], batch[1], batch[2], batch[3]
            gradients, loss, predictions, labels = train_step(
                model, batch, loss_func, args)

            optimizer.apply_gradients(zip(gradients,
                                          model.trainable_variables))

            train_loss.update_state(loss)
            train_metric.update_state(labels, predictions)

            logs = 'Epoch={},Loss:{},Accuracy:{}'

            # print(predictions)
            # print('-'*20)
            # print(masks_labels)
            # print('*'*20)
            # print(tf.reduce_mean(loss))
            # print('='*20)
            # label = tf.argmax(predictions[0])
            # print(label)

            if count % 100 == 0 and count != 0:
                tf.print(
                    tf.strings.format(
                        logs,
                        (epoch, train_loss.result(), train_metric.result())))
                tf.print("")
                if count % 1000 == 0 and train_loss.result() < best_loss:
                    best_loss = train_loss.result()
                    ckpt_save_path = ckpt_manager.save()
                    print('*' * 20)
                    print('Saving checkpoint for epoch {} at {} ,best loss {}'.
                          format(epoch, ckpt_save_path, best_loss))
                    print('*' * 20)
            count += 1

        train_loss.reset_states()
        train_metric.reset_states()

    model.encoder.save_pretrained('./pretrained-roberta/')
    def build_model_1(self, verbose=False):
        """initialization the model"""
        if self.model_type in TrainModelConfigV2.BERT_LIST:
            config = BertConfig.from_pretrained(
                "{}{}/config.json".format(PATH_TRANS_INPUT, self.model_type),
                num_labels=BertBaseUnCaseV2.N_CLASS)
            bert_model = TFBertModel.from_pretrained("{}{}/tf_model.h5".format(
                PATH_TRANS_INPUT, self.model_type),
                                                     config=config)
            bert_model.trainable = False
            input_ids_layer = Input(shape=(BertBaseUnCaseV2.MAXLEN, ),
                                    dtype=np.int32,
                                    name='input_ids')
            input_mask_layer = Input(shape=(BertBaseUnCaseV2.MAXLEN, ),
                                     dtype=np.int32,
                                     name='attention_mask')
            input_token_type_layer = Input(shape=(BertBaseUnCaseV2.MAXLEN, ),
                                           dtype=np.int32,
                                           name='token_type_ids')
            input_layer_list = [
                input_ids_layer, input_mask_layer, input_token_type_layer
            ]
            bert_layer = bert_model(input_layer_list)[0]

        elif self.model_type in TrainModelConfigV2.ROBERTA_LIST:
            config = RobertaConfig.from_pretrained(
                "{}{}/config.json".format(PATH_TRANS_INPUT, self.model_type),
                num_labels=BertBaseUnCaseV2.N_CLASS)
            bert_model = TFRobertaModel.from_pretrained(
                "{}{}/tf_model.h5".format(PATH_TRANS_INPUT, self.model_type),
                config=config)
            bert_model.trainable = False
            input_ids_layer = Input(shape=(BertBaseUnCaseV2.MAXLEN, ),
                                    dtype=np.int32,
                                    name='input_ids')
            input_mask_layer = Input(shape=(BertBaseUnCaseV2.MAXLEN, ),
                                     dtype=np.int32,
                                     name='attention_mask')

            input_layer_list = [input_ids_layer, input_mask_layer]
            bert_layer = bert_model(input_layer_list)[0]

        if self.version == "v1":
            flat_layer = Flatten()(bert_layer)
            out = Dropout(0.2)(flat_layer)
        elif self.version == "v2":
            out = LSTM(BertBaseUnCaseV2.hidden_size, dropout=0.2)(bert_layer)
        elif self.version == "v3":
            flat_layer = Flatten()(bert_layer)
            dense_layer = Dense(BertBaseUnCaseV2.hidden_size,
                                activation='relu')(flat_layer)
            out = Dropout(0.2)(dense_layer)
        elif self.version == "v4":
            bi_layer = Bidirectional(
                LSTM(BertBaseUnCaseV2.hidden_size,
                     dropout=0.2,
                     return_sequences=True))(bert_layer)
            bi_layer = Bidirectional(LSTM(
                BertBaseUnCaseV2.hidden_size))(bi_layer)
            dropout_layer = Dropout(0.2)(bi_layer)
            out = Dense(256, activation='relu')(dropout_layer)

        if BertBaseUnCaseV2.VER == 'v5':
            dense_output = Dense(BertBaseUnCaseV2.N_CLASS,
                                 activation='sigmoid')(out)
        else:
            dense_output = Dense(BertBaseUnCaseV2.N_CLASS,
                                 activation='softmax')(out)
        model = Model(inputs=input_layer_list, outputs=dense_output)

        # compile and fit
        if BertBaseUnCaseV2.VER == 'v5':
            optimizer = optimizers.Adam(learning_rate=BertBaseUnCaseV2.lr)
            loss = losses.SparseCategoricalCrossentropy(from_logits=True)
            metric = metrics.SparseCategoricalAccuracy('accuracy')
        else:
            optimizer = optimizers.Adam(learning_rate=BertBaseUnCaseV2.lr)
            loss = losses.SparseCategoricalCrossentropy(from_logits=True)
            metric = metrics.SparseCategoricalAccuracy('accuracy')
        model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
        if verbose:
            model.summary()
        return model
Exemple #24
0
 def get_model(self):
     return TFRobertaModel.from_pretrained("roberta-base")
Exemple #25
0
def get_tunable_roberta(hp: HyperParameters):
    ids = keras.layers.Input(shape=(Config.Train.max_len, ),
                             dtype=tf.int32,
                             name='ids')
    att = keras.layers.Input(shape=(Config.Train.max_len, ),
                             dtype=tf.int32,
                             name='att')
    tok_type_ids = keras.layers.Input(shape=(Config.Train.max_len, ),
                                      dtype=tf.int32,
                                      name='tti')

    config = RobertaConfig.from_pretrained(Config.Roberta.config)
    roberta_model = TFRobertaModel.from_pretrained(Config.Roberta.model,
                                                   config=config)

    roberta_model.trainable = False

    x = roberta_model(ids, attention_mask=att, token_type_ids=tok_type_ids)

    use_alpha_dropout = False  # hp.Boolean('use_alpha_dropout')
    if use_alpha_dropout:
        x1 = keras.layers.AlphaDropout(hp.Choice('dropout1',
                                                 [0.1, 0.2, 0.3]))(x[0])
        x2 = keras.layers.AlphaDropout(hp.Choice('dropout2',
                                                 [0.1, 0.2, 0.3]))(x[0])
    else:
        x1 = keras.layers.Dropout(hp.Choice('dropout1', [0.1, 0.2, 0.3]))(x[0])
        x2 = keras.layers.Dropout(hp.Choice('dropout2', [0.1, 0.2, 0.3]))(x[0])

    use_rnn = False  # hp.Boolean('use_rnn')
    if use_rnn:
        lstm_count = hp.Choice('rnn_count', [1, 2])
        for i in range(lstm_count):
            x1, state1_0, _, state1_1, _ = keras.layers.Bidirectional(
                keras.layers.LSTM(hp.Int(f'lstm_units1_{i}', 32, 48, step=8),
                                  return_sequences=True,
                                  return_state=True))(x1)
            x1 = keras.layers.LeakyReLU()(x1)
            state1 = keras.layers.concatenate([state1_0, state1_1])
            x1 = keras.layers.Attention()([x1, state1])
            x2, state2_0, _, state2_1, _ = keras.layers.Bidirectional(
                keras.layers.LSTM(hp.Int(f'lstm_units2_{i}', 32, 48, step=8),
                                  return_sequences=True,
                                  return_state=True))(x2)
            x2 = keras.layers.LeakyReLU()(x2)
            state2 = keras.layers.concatenate([state2_0, state2_1])
            x2 = keras.layers.Attention()([x2, state2])
    else:
        conv_count = hp.Choice('conv_count', [1, 2])
        for i in range(conv_count):
            x1 = keras.layers.Conv1D(hp.Int(f'conv_filter1_{i}', 8, 24,
                                            step=8),
                                     hp.Int(f'conv_kernel1_{i}', 3, 5, step=1),
                                     padding='same')(x1)
            x1 = keras.layers.LeakyReLU()(x1)
            x2 = keras.layers.Conv1D(hp.Int(f'conv_filter2_{i}', 8, 24,
                                            step=8),
                                     hp.Int(f'conv_kernel2_{i}', 3, 5, step=1),
                                     padding='same')(x2)
            x2 = keras.layers.LeakyReLU()(x2)

    x1 = keras.layers.Conv1D(1, 1)(x1)
    x1 = keras.layers.Flatten()(x1)
    x1 = keras.layers.Activation('softmax', name='sts')(x1)

    x2 = keras.layers.Conv1D(1, 1)(x2)
    x2 = keras.layers.Flatten()(x2)
    x2 = keras.layers.Activation('softmax', name='ets')(x2)

    model = keras.models.Model(inputs=[ids, att, tok_type_ids],
                               outputs=[x1, x2])
    optimizer = keras.optimizers.Adam(learning_rate=1e-3)
    loss = keras.losses.CategoricalCrossentropy(
        label_smoothing=Config.Train.label_smoothing)
    model.compile(loss=loss, optimizer=optimizer)

    return model
Exemple #26
0
def get_roberta():
    ids = keras.layers.Input(shape=(None, ), dtype=tf.int32, name='ids')
    att = keras.layers.Input(shape=(None, ), dtype=tf.int32, name='att')
    tok_type_ids = keras.layers.Input(shape=(None, ),
                                      dtype=tf.int32,
                                      name='tti')

    config = RobertaConfig.from_pretrained(Config.Roberta.config)
    config.output_hidden_states = True
    roberta_model = TFRobertaModel.from_pretrained(Config.Roberta.model,
                                                   config=config)

    _, _, x = roberta_model(ids,
                            attention_mask=att,
                            token_type_ids=tok_type_ids)

    x1 = keras.layers.Dropout(0.15)(x[-1])
    x1 = keras.layers.Conv1D(768, 2, padding='same')(x1)
    x1 = keras.layers.LeakyReLU()(x1)
    x1 = keras.layers.LayerNormalization()(x1)
    x1 = keras.layers.add([x1, x[-2]])
    x1 = keras.layers.Conv1D(768, 5, padding='same')(x1)
    x1 = keras.layers.LeakyReLU()(x1)
    x1 = keras.layers.LayerNormalization()(x1)
    x1 = keras.layers.add([x1, x[-3]])
    x1 = keras.layers.Conv1D(768, 8, padding='same')(x1)
    x1 = keras.layers.LeakyReLU()(x1)
    x1 = keras.layers.LayerNormalization()(x1)
    x1 = keras.layers.add([x1, x[-4]])
    x1 = keras.layers.Dense(1)(x1)
    x1 = keras.layers.Flatten()(x1)
    x1 = keras.layers.Activation('softmax', dtype='float32', name='sts')(x1)

    x2 = keras.layers.Dropout(0.15)(x[-1])
    x2 = keras.layers.Conv1D(768, 2, padding='same')(x2)
    x2 = keras.layers.LeakyReLU()(x2)
    x2 = keras.layers.LayerNormalization()(x2)
    x2 = keras.layers.add([x2, x[-2]])
    x2 = keras.layers.Conv1D(768, 5, padding='same')(x2)
    x2 = keras.layers.LeakyReLU()(x2)
    x2 = keras.layers.LayerNormalization()(x2)
    x2 = keras.layers.add([x2, x[-3]])
    x2 = keras.layers.Conv1D(768, 8, padding='same')(x2)
    x2 = keras.layers.LeakyReLU()(x2)
    x2 = keras.layers.LayerNormalization()(x2)
    x2 = keras.layers.add([x2, x[-4]])
    x2 = keras.layers.Dense(1)(x2)
    x2 = keras.layers.Flatten()(x2)
    x2 = keras.layers.Activation('softmax', dtype='float32', name='ets')(x2)

    model = keras.models.Model(inputs=[ids, att, tok_type_ids],
                               outputs=[x1, x2])

    optimizer = keras.optimizers.Adam(learning_rate=6e-5)
    if Config.Train.use_amp:
        optimizer = keras.mixed_precision.experimental.LossScaleOptimizer(
            optimizer, 'dynamic')
    loss = keras.losses.CategoricalCrossentropy(
        label_smoothing=Config.Train.label_smoothing)
    model.compile(loss=loss, optimizer=optimizer)

    keras.utils.plot_model(model, to_file='robert.png', show_shapes=True)
    return model
from sklearn.metrics import precision_recall_fscore_support, classification_report
from sutime import SUTime
import json
from sklearn.utils import class_weight
from imblearn.over_sampling import RandomOverSampler
import pandas as pd

# GLOBAL VARIABLES

POS = True 
NE = True

MAX_CLAUSE_LENGTH = 70

TOKENIZER = RobertaTokenizer.from_pretrained("roberta-base")  
MODEL = TFRobertaModel.from_pretrained('roberta-base')

# Spacy and corenlp stuff 
nlp = spacy.load("en_core_web_sm")

## METRIC FUNCTIONS #####################################################

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    vecstr = ''
    for x_i in x:
        x_i_str = '%.4f' % (x_i)
        vecstr += x_i_str + ' '

    return vecstr[0:-1]


if __name__ == "__main__":

    if len(sys.argv) < 3:
        print('usage: python saveptvecs.py <context file> <outvec file>')
        sys.exit(0)
    print("here")
    bertTokenizer = RobertaTokenizer.from_pretrained("roberta-base")
    bertModel = TFRobertaModel.from_pretrained("roberta-base")

    vocabfile = sys.argv[1]
    outfile = sys.argv[2]

    nlp_features = pipeline('feature-extraction',
                            model=bertModel,
                            tokenizer=bertTokenizer)

    with open(vocabfile) as f:
        words = f.read().splitlines()

    f = open(outfile, "w")
    f.write(str(len(words)) + ' 768\n')

    for w in words:
Exemple #29
0
 def __init__(self, num_classes: int, pretrained_roberta_name: str):
     super(KerasTextClassifier, self).__init__()
     self.transformer = TFRobertaModel.from_pretrained(
         pretrained_roberta_name)
     self.final_layer = layers.Dense(num_classes)
     self.softmax = layers.Softmax()
def create_model_and_optimizer():
    with strategy.scope():
        transformer_layer = TFRobertaModel.from_pretrained(
            PRETRAINED_TOKENIZER)
        model = build_model(transformer_layer)
    return model