Example #1
0
 def model_fn(features, labels, mode, params):
     input_ids = features["input_ids"]
     input_mask = features["input_mask"]
     segment_ids = features["segment_ids"]
     model = BertModel(config, True, input_ids, input_mask, segment_ids)
     final_hidden = model.get_sequence_output()
     return final_hidden
    def __init__(self):
        bert_pretrained_dir = args.pretrain_models_path + args.bert_model_name
        self.do_lower_case = args.bert_model_name.startswith('uncased')
        self.vocab_file = os.path.join(bert_pretrained_dir, 'vocab.txt')
        self.config_file = os.path.join(bert_pretrained_dir,
                                        'bert_config.json')
        self.tokenizer = FullTokenizer(vocab_file=self.vocab_file,
                                       do_lower_case=self.do_lower_case)

        self.input_id = tf.placeholder(tf.int64, [None, None], 'input_ids')
        self.input_mask = tf.placeholder(tf.int64, [None, None], 'input_mask')
        self.segment_ids = tf.placeholder(tf.int64, [None, None],
                                          'segment_ids')

        bert_config = BertConfig.from_json_file(self.config_file)
        model = BertModel(config=bert_config,
                          is_training=False,
                          input_ids=self.input_id,
                          input_mask=self.input_mask,
                          token_type_ids=self.segment_ids,
                          use_one_hot_embeddings=True,
                          scope='bert')
        self.output_layer = model.get_sequence_output()
        self.embedding_layer = model.get_embedding_output()

        saver = tf.train.Saver()

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.session = tf.Session(config=config)
        saver.restore(self.session, bert_pretrained_dir + '/bert_model.ckpt')
Example #3
0
    def _bert_model(self,
                    input_ids,
                    input_tag_embeddings,
                    input_masks,
                    bert_config,
                    bert_checkpoint_file,
                    is_training=False):
        """Creates the Bert model.

    Args:
      input_ids: A [batch, max_seq_len] int tensor.
      input_masks: A [batch, max_seq_len] int tensor.
    """
        bert_model = BertModel(bert_config,
                               is_training,
                               input_ids=input_ids,
                               input_mask=input_masks,
                               use_tag_embeddings=True,
                               tag_embeddings=input_tag_embeddings)

        # Restore from checkpoint.
        assignment_map, _ = get_assignment_map_from_checkpoint(
            tf.global_variables(), bert_checkpoint_file)
        if 'global_step' in assignment_map:
            assignment_map.pop('global_step')
        tf.compat.v1.train.init_from_checkpoint(bert_checkpoint_file,
                                                assignment_map)
        return bert_model.get_pooled_output()
Example #4
0
    def _bert_model(self, input_ids, input_tag_features, input_masks):
        """Creates the Bert model.

    Args:
      input_ids: A [batch, max_seq_len] int tensor.
      input_masks: A [batch, max_seq_len] int tensor.
    """
        is_training = self._is_training
        options = self._model_proto

        bert_config = BertConfig.from_json_file(options.bert_config_file)
        bert_model = BertModel(bert_config,
                               is_training,
                               input_ids=input_ids,
                               input_mask=input_masks,
                               use_tag_embeddings=True,
                               tag_features=input_tag_features)

        # Restore from checkpoint.
        assignment_map, _ = get_assignment_map_from_checkpoint(
            tf.global_variables(), options.bert_checkpoint_file)
        if 'global_step' in assignment_map:
            assignment_map.pop('global_step')
        tf.compat.v1.train.init_from_checkpoint(options.bert_checkpoint_file,
                                                assignment_map)
        return bert_model.get_pooled_output()
 def __init__(self, path, training=False, max_seq_length=512):
     self.max_seq_length = max_seq_length
     self.graph = tf.Graph()
     with self.graph.as_default():
         self.input_ids = tf.compat.v1.placeholder(
             tf.int32, shape=(None, self.max_seq_length))
         self.input_mask = tf.compat.v1.placeholder(
             tf.int32, shape=(None, self.max_seq_length))
         self.segment_ids = tf.compat.v1.placeholder(
             tf.int32, shape=(None, self.max_seq_length))
         self.bert_config = BertConfig.from_json_file(path +
                                                      '/bert_config.json')
         self.bert_module = BertModel(config=self.bert_config,
                                      is_training=training,
                                      input_ids=self.input_ids,
                                      input_mask=self.input_mask,
                                      token_type_ids=self.segment_ids,
                                      use_one_hot_embeddings=False)
         assignment_map, initialized_variable_names = get_assignment_map_from_checkpoint(
             tf.trainable_variables(), path + '/bert_model.ckpt')
         tf.train.init_from_checkpoint(path + '/bert_model.ckpt',
                                       assignment_map)
         self.sess = tf.compat.v1.Session()
         self.sess.run(
             tf.group(tf.compat.v1.global_variables_initializer(),
                      tf.compat.v1.tables_initializer()))
         self.bert_outputs = {
             'sequence_output': self.bert_module.get_sequence_output(),
             'pooled_output': self.bert_module.get_pooled_output(),
         }
         self.tok = tokenization.FullTokenizer(vocab_file=path +
                                               '/vocab.txt',
                                               do_lower_case=True)
Example #6
0
 def get_bert_embeddings(self, flattened_input_ids, flattened_input_mask,
                         is_training: bool):
     """
     applying BERT to each sliding window, and get token embeddings corresponding to the right tokens
     :param flattened_input_ids: [-1]
     :param flattened_input_mask: [-1]
     :param is_training:
     :return: (num_tokens, embed_size)
     """
     input_ids = tf.reshape(flattened_input_ids,
                            [-1, self.config.sliding_window_size])
     input_mask = tf.reshape(flattened_input_mask,
                             [-1, self.config.sliding_window_size])
     actual_mask = tf.cast(tf.not_equal(input_mask, self.config.pad_idx),
                           tf.int32)
     with tf.variable_scope('bert', reuse=tf.AUTO_REUSE):
         bert_model = BertModel(self.bert_config,
                                is_training,
                                input_ids,
                                actual_mask,
                                scope='bert')
     bert_embeddings = bert_model.get_sequence_output(
     )  # (num_windows, window_size, embed_size)
     flattened_embeddings = tf.reshape(bert_embeddings,
                                       [-1, self.bert_config.hidden_size])
     flattened_mask = tf.greater_equal(flattened_input_mask, 0)
     output_embeddings = tf.boolean_mask(flattened_embeddings,
                                         flattened_mask)
     print('xixi', bert_embeddings.get_shape(),
           output_embeddings.get_shape(), flattened_embeddings.get_shape(),
           flattened_mask.get_shape())
     return output_embeddings
Example #7
0
def get_bert(BERT_PT_PATH, bert_type, do_lower_case, no_pretraining):


    bert_config_file = os.path.join(BERT_PT_PATH, f'bert_config_{bert_type}.json')
    vocab_file = os.path.join(BERT_PT_PATH, f'vocab_{bert_type}.txt')
    init_checkpoint = os.path.join(BERT_PT_PATH, f'pytorch_model_{bert_type}.bin')



    bert_config = BertConfig.from_json_file(bert_config_file)
    tokenizer = tokenization.FullTokenizer(
        vocab_file=vocab_file, do_lower_case=do_lower_case)
    bert_config.print_status()

    model_bert = BertModel(bert_config)
    # if no_pretraining:
    #     pass
    # else:
        # model_bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu'))
        # print("Load pre-trained parameters.")
    # model_bert=torch.nn.DataParallel(model_bert, device_ids=[0, 4, 5])
    model_bert.to(device)
    # model_bert.cuda(2)

    return model_bert, tokenizer, bert_config
Example #8
0
        def qa_loop_body(i, starts, ends, labels, scores):
            input_ids = tf.reshape(flattened_input_ids,
                                   [-1, self.config.sliding_window_size
                                    ])  # (num_windows, window_size)
            input_mask = tf.reshape(flattened_input_mask,
                                    [-1, self.config.sliding_window_size])
            actual_mask = tf.cast(tf.not_equal(input_mask,
                                               self.config.pad_idx),
                                  tf.int32)  # (num_windows, window_size)

            num_windows = tf.shape(actual_mask)[0]
            question_tokens = self.get_question_token_ids(
                sentence_map, flattened_input_ids, flattened_input_mask,
                top_span_starts[i], top_span_ends[i])  # (num_question_tokens)
            tiled_question = tf.tile(
                tf.expand_dims(question_tokens, 0),
                [num_windows, 1])  # (num_windows, num_ques_tokens)
            question_ones = tf.ones_like(tiled_question, dtype=tf.int32)
            question_zeros = tf.zeros_like(tiled_question, dtype=tf.int32)
            qa_input_ids = tf.concat(
                [tiled_question, input_ids],
                1)  # (num_windows, num_ques_tokens + window_size)
            qa_input_mask = tf.concat(
                [question_ones, actual_mask],
                1)  # (num_windows, num_ques_tokens + window_size)
            token_type_ids = tf.concat([question_zeros, actual_mask], 1)
            with tf.variable_scope('bert', reuse=tf.AUTO_REUSE):
                bert_model = BertModel(self.bert_config,
                                       is_training,
                                       qa_input_ids,
                                       qa_input_mask,
                                       token_type_ids,
                                       scope='bert')
            bert_embeddings = bert_model.get_sequence_output(
            )  # num_windows, num_ques_tokens + window_size, embed_size
            flattened_embeddings = tf.reshape(
                bert_embeddings, [-1, self.bert_config.hidden_size])
            output_mask = tf.concat(
                [-1 * question_ones, input_mask],
                1)  # (num_windows, num_ques_tokens + window_size)
            flattened_mask = tf.reshape(tf.greater_equal(output_mask, 0), [-1])
            qa_embeddings = tf.boolean_mask(
                flattened_embeddings,
                flattened_mask)  # (num_tokens, embed_size)
            qa_scores, qa_indices, qa_starts, qa_ends, qa_embs = self.filter_by_mention_scores(
                qa_embeddings, candidate_starts, candidate_ends, dropout, c)
            qa_cluster_ids = self.get_top_span_cluster_ids(
                candidate_starts, candidate_ends, span_starts, span_ends,
                cluster_ids, qa_indices)
            return (i + 1,
                    tf.concat(
                        [starts, tf.expand_dims(qa_starts, axis=0)], axis=0),
                    tf.concat([ends, tf.expand_dims(qa_ends, axis=0)], axis=0),
                    tf.concat([labels,
                               tf.expand_dims(qa_cluster_ids, axis=0)],
                              axis=0),
                    tf.concat(
                        [scores, tf.expand_dims(qa_scores, axis=0)], axis=0))
Example #9
0
File: bert.py Project: yekeren/VCR
    def predict(self, inputs, **kwargs):
        """Predicts the resulting tensors.

    Args:
      inputs: A dictionary of input tensors keyed by names.

    Returns:
      predictions: A dictionary of prediction tensors keyed by name.
    """
        is_training = self._is_training
        options = self._model_proto

        (answer_choices, answer_choices_len,
         answer_label) = (inputs[InputFields.answer_choices_with_question],
                          inputs[InputFields.answer_choices_with_question_len],
                          inputs[InputFields.answer_label])

        # Create model layers.
        token_to_id_layer = token_to_id.TokenToIdLayer(
            options.bert_vocab_file, options.bert_unk_token_id)

        # Convert tokens into token ids.
        batch_size = answer_choices.shape[0]

        answer_choices_token_ids = token_to_id_layer(answer_choices)
        answer_choices_token_ids_reshaped = tf.reshape(
            answer_choices_token_ids, [batch_size * NUM_CHOICES, -1])

        answer_choices_mask = tf.sequence_mask(
            answer_choices_len, maxlen=tf.shape(answer_choices)[-1])
        answer_choices_mask_reshaped = tf.reshape(
            answer_choices_mask, [batch_size * NUM_CHOICES, -1])

        # Bert prediction.
        bert_config = BertConfig.from_json_file(options.bert_config_file)
        bert_model = BertModel(bert_config,
                               is_training,
                               input_ids=answer_choices_token_ids_reshaped,
                               input_mask=answer_choices_mask_reshaped)

        answer_choices_cls_feature_reshaped = bert_model.get_pooled_output()
        answer_choices_cls_feature = tf.reshape(
            answer_choices_cls_feature_reshaped, [batch_size, NUM_CHOICES, -1])

        assignment_map, _ = get_assignment_map_from_checkpoint(
            tf.global_variables(), options.bert_checkpoint_file)

        tf.compat.v1.train.init_from_checkpoint(options.bert_checkpoint_file,
                                                assignment_map)

        # Classification layer.
        output = tf.compat.v1.layers.dense(answer_choices_cls_feature,
                                           units=1,
                                           activation=None)
        output = tf.squeeze(output, axis=-1)

        return {FIELD_ANSWER_PREDICTION: output}
Example #10
0
def convert(args):
    # Initialise PyTorch model
    config = BertConfig.from_json_file(args.bert_config_file)
    model = BertModel(config)

    # Load weights from TF model
    path = args.tf_checkpoint_path
    print("Converting TensorFlow checkpoint from {}".format(path))

    init_vars = tf.train.list_variables(path)
    names = []
    arrays = []
    for name, shape in init_vars:
        print("Loading {} with shape {}".format(name, shape))
        array = tf.train.load_variable(path, name)
        print("Numpy array shape {}".format(array.shape))
        names.append(name)
        arrays.append(array)

    for name, array in zip(names, arrays):
        name = name[5:]  # skip "bert/"
        print("Loading {}".format(name))
        name = name.split('/')
        if name[0] in ['redictions', 'eq_relationship']:
            print("Skipping")
            continue
        pointer = model
        for m_name in name:
            if re.fullmatch(r'[A-Za-z]+_\d+', m_name):
                l = re.split(r'_(\d+)', m_name)
            else:
                l = [m_name]
            if l[0] == 'kernel':
                pointer = getattr(pointer, 'weight')
            else:
                if l[0] != 'l_step':
                    pointer = getattr(pointer, l[0], name)
                else:
                    print(l[0])
            if len(l) >= 2:
                num = int(l[1])
                pointer = pointer[num]
        if m_name[-11:] == '_embeddings':
            pointer = getattr(pointer, 'weight')
        elif m_name == 'kernel':
            array = np.transpose(array)
        try:
            assert pointer.shape == array.shape
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
        except AttributeError:
            continue
        pointer.data = torch.from_numpy(array)

    # Save pytorch-model
    torch.save(model.state_dict(), args.pytorch_dump_path)
Example #11
0
    def build(self, data_iter, bert_config_file):
        # get the inputs
        with tf.variable_scope('inputs'):
            input_map = data_iter.get_next()
            usrid, prdid, input_x, input_y, doc_len = \
                (input_map['usr'], input_map['prd'],
                 input_map['content'], input_map['rating'],
                 input_map['doc_len'])

            input_x = tf.reshape(input_x, [-1, self.max_sen_len])
            sen_len = tf.count_nonzero(input_x, axis=-1)
            doc_len = doc_len // self.max_sen_len

            input_x = tf.cast(input_x, tf.int32)
            self.usr = lookup(self.embeddings['usr_emb'], usrid, name='cur_usr_embedding')
            self.prd = lookup(self.embeddings['prd_emb'], prdid, name='cur_prd_embedding')
            input_x = tf.reshape(input_x, [-1, self.max_sen_len])
            input_mask = tf.sequence_mask(sen_len, self.max_sen_len)
            input_mask = tf.cast(input_mask, tf.int32)

        bert_config = BertConfig.from_json_file(bert_config_file)
        bert = BertModel(bert_config, is_training=False,
                         input_ids=input_x, input_mask=input_mask,
                         token_type_ids=None,
                         use_one_hot_embeddings=False)
        # input_x = bert.get_sequence_output()
        input_x = bert.get_embedding_output()

        # build the process of model
        d_hat = self.nsc(input_x, self.max_sen_len, self.max_doc_len // self.max_sen_len,
                         sen_len, doc_len)
        prediction = tf.argmax(d_hat, 1, name='prediction')

        with tf.variable_scope("loss"):
            sce = tf.nn.softmax_cross_entropy_with_logits_v2
            self.loss = sce(logits=d_hat, labels=tf.one_hot(input_y, self.cls_cnt))

            regularizer = tf.zeros(1)
            params = tf.trainable_variables()
            for param in params:
                if param not in self.embeddings.values():
                    regularizer += tf.nn.l2_loss(param)
            self.loss = tf.reduce_sum(self.loss) + self.l2_rate * regularizer

        prediction = tf.argmax(d_hat, 1, name='prediction')
        with tf.variable_scope("metrics"):
            correct_prediction = tf.equal(prediction, input_y)
            mse = tf.reduce_sum(tf.square(prediction - input_y), name="mse")
            correct_num = tf.reduce_sum(tf.cast(correct_prediction, dtype=tf.int32), name="correct_num")
            accuracy = tf.reduce_sum(tf.cast(correct_prediction, "float"), name="accuracy")

        return self.loss, mse, correct_num, accuracy
    def __init__(self,
                 config,
                 is_training,
                 input_ids,
                 input_mask=None,
                 token_type_ids=None):
        self.model = BertModel(config=config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=token_type_ids)

        self.embeddings_table = self.model.get_embedding_table()
Example #13
0
    def __init__(self, config, output_hidden_size):
        super(BertForInteractSpanExtractAndClassification, self).__init__()
        # Shared Part
        self.bert = BertModel(config)

        # Private Part
        self.te_bilstm = nn.LSTM(input_size=config.hidden_size,
                                 hidden_size=config.hidden_size,
                                 batch_first=True,
                                 bidirectional=True)
        self.tc_bilstm = nn.LSTM(input_size=config.hidden_size,
                                 hidden_size=config.hidden_size,
                                 batch_first=True,
                                 bidirectional=True)
        self.te_dense = nn.Linear(config.hidden_size * 2, config.hidden_size)
        self.tc_dense = nn.Linear(config.hidden_size * 2, config.hidden_size)

        self.attention = nn.Linear(config.hidden_size * 2, 1)
        self.tc_output_layer = nn.Linear(config.hidden_size * 2,
                                         output_hidden_size)

        self.extraction = nn.Linear(config.hidden_size * 2, 2)

        self.classifier = nn.Linear(output_hidden_size, 5)

        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.activation = nn.Tanh()
        self.mse = nn.MSELoss(reduction="mean")

        def init_weights(module):
            if isinstance(module, (nn.Linear, nn.Embedding)):
                module.weight.data.normal_(mean=0.0,
                                           std=config.initializer_range)
            elif isinstance(module, BERTLayerNorm):
                module.beta.data.normal_(mean=0.0,
                                         std=config.initializer_range)
                module.gamma.data.normal_(mean=0.0,
                                          std=config.initializer_range)
            elif isinstance(module, nn.LSTM):
                for name, param in module.named_parameters():
                    if 'weight_ih' in name:
                        nn.init.xavier_normal_(param)
                    elif 'weight_hh' in name:
                        nn.init.orthogonal_(param)
                    elif 'bias' in name:
                        nn.init.constant_(param, 0.0)
                        param.chunk(4)[1].fill_(1)
            if isinstance(module, nn.Linear):
                module.bias.data.zero_()

        self.apply(init_weights)
 def _buildModel(self, input_ids, token_type_ids, input_mask):
     bert_model = BertModel(self.config, self.config.training, input_ids,
                            input_mask, token_type_ids,
                            self.config.use_one_hot_embeddings)
     bert_output = bert_model.get_pooled_output()
     output = tf.layers.dense(
         bert_output,
         self.config.output_dim,
         kernel_initializer=tf.truncated_normal_initializer(
             stddev=self.config.initializer_range),
         kernel_regularizer=tf.contrib.layers.l2_regularizer(1.0),
         bias_regularizer=tf.contrib.layers.l2_regularizer(1.0),
         name='output')
     return output
Example #15
0
    def get_model(self):
        logging.info("get bert model")
        graph = tf.Graph()
        with graph.as_default():
            ph_input_ids = tf.placeholder(dtype=tf.int32, shape=[None, self._seq_length + 2], name="ph_input_ids")
            con = BertConfig.from_json_file(config.PROJECT_ROOT + "/bert_config.json")
            bert_model = BertModel(config=con, is_training=False, input_ids=ph_input_ids,
                                   use_one_hot_embeddings=True)
            output = bert_model.get_sequence_output()
            init = tf.global_variables_initializer()

        sess = tf.Session(graph=graph)
        sess.run(init)

        return sess, ph_input_ids, output
Example #16
0
def make_bert_graph(bert_config, max_seq_length, dropout_keep_prob_rate, num_labels, tune=False):
    input_ids = tf.placeholder(tf.int32, [None, max_seq_length], name='inputs_ids')
    input_mask = tf.placeholder(tf.int32, [None, max_seq_length], name='input_mask')
    segment_ids = tf.placeholder(tf.int32, [None, max_seq_length], name='segment_ids')
    model = BertModel(config=bert_config,
                      is_training=tune,
                      input_ids=input_ids,
                      input_mask=input_mask,
                      token_type_ids=segment_ids)
    if tune:
        bert_embeddings_dropout = tf.nn.dropout(model.pooled_output, keep_prob=(1 - dropout_keep_prob_rate))
        label_ids = tf.placeholder(tf.int32, [None], name='label_ids')
    else:
        bert_embeddings_dropout = model.pooled_output
        label_ids = None
    logits = tf.contrib.layers.fully_connected(inputs=bert_embeddings_dropout,
                                               num_outputs=num_labels,
                                               activation_fn=None,
                                               weights_initializer=tf.truncated_normal_initializer(stddev=0.02),
                                               biases_initializer=tf.zeros_initializer())
    if tune:
        # loss layer
        CE = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label_ids, logits=logits)
        loss = tf.reduce_mean(CE)
        return input_ids, input_mask, segment_ids, label_ids, logits, loss
    else:
        # prob layer
        probs = tf.nn.softmax(logits, axis=-1, name='probs')
        return model, input_ids, input_mask, segment_ids, probs
Example #17
0
    def __init__(self, config, params):
        super(NestedNERModel, self).__init__(config)

        self.params = params

        self.ner_label_limit = params["ner_label_limit"]
        self.thresholds = params["ner_threshold"]

        self.num_entities = params["mappings"]["nn_mapping"]["num_entities"]
        self.num_triggers = params["mappings"]["nn_mapping"]["num_triggers"]

        self.max_span_width = params["max_span_width"]

        self.bert = BertModel(config)

        self.dropout = nn.Dropout(config.hidden_dropout_prob)

        self.entity_classifier = nn.Linear(config.hidden_size * 3, self.num_entities)
        self.trigger_classifier = nn.Linear(config.hidden_size * 3, self.num_triggers)

        self.register_buffer(
            "label_ids",
            torch.tensor(
                params["mappings"]["nn_mapping"]["mlb"].classes_, dtype=torch.uint8
            ),
        )

        self.apply(self.init_bert_weights)
        self.params = params
Example #18
0
    def __init__(self, model_folder, max_length=256, lowercase=True):

        # 1. Create tokenizer
        self.max_length = max_length
        vocab_file = os.path.join(model_folder, 'vocab.txt')
        self.tokenizer = FullTokenizer(vocab_file, do_lower_case=lowercase)

        # 2. Read Config
        config_file = os.path.join(model_folder, 'bert_config.json')
        self.config = BertConfig.from_json_file(config_file)

        # 3. Create Model
        self.session = tf.Session()
        self.token_ids_op = tf.placeholder(tf.int32,
                                           shape=(None, max_length),
                                           name='token_ids')
        self.model = BertModel(config=self.config,
                               is_training=False,
                               input_ids=self.token_ids_op,
                               use_one_hot_embeddings=False)

        # 4. Restore Trained Model
        self.saver = tf.train.Saver()
        ckpt_file = os.path.join(model_folder, 'bert_model.ckpt')
        # RCS ckpt_file = os.path.join(model_folder, 'model.ckpt-1000000')
        self.saver.restore(self.session, ckpt_file)

        hidden_layers = self.config.num_hidden_layers
        self.embeddings_op = tf.get_default_graph().get_tensor_by_name(
            "bert/encoder/Reshape_{}:0".format(hidden_layers + 1))
Example #19
0
    def __init__(self, config, use_crf=False):
        super(BertForJointBIOExtractAndClassification, self).__init__()
        self.bert = BertModel(config)
        self.use_crf = use_crf
        # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version
        # self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.bio_affine = nn.Linear(config.hidden_size, 3)
        self.cls_affine = nn.Linear(config.hidden_size, 5)
        if self.use_crf:
            self.cls_crf = ConditionalRandomField(5)

        def init_weights(module):
            if isinstance(module, (nn.Linear, nn.Embedding)):
                # Slightly different from the TF version which uses truncated_normal for initialization
                # cf https://github.com/pytorch/pytorch/pull/5617
                module.weight.data.normal_(mean=0.0,
                                           std=config.initializer_range)
            elif isinstance(module, BERTLayerNorm):
                module.beta.data.normal_(mean=0.0,
                                         std=config.initializer_range)
                module.gamma.data.normal_(mean=0.0,
                                          std=config.initializer_range)
            if isinstance(module, nn.Linear):
                module.bias.data.zero_()

        self.apply(init_weights)
Example #20
0
def get_bert(BERT_PT_PATH, bert_type, do_lower_case, my_pretrain_bert):
    # bert_config_file = os.path.join(BERT_PT_PATH, f'bert_config_{bert_type}.json')
    # vocab_file = os.path.join(BERT_PT_PATH, f'vocab_{bert_type}.txt')
    # init_checkpoint = os.path.join(BERT_PT_PATH, f'pytorch_model_{bert_type}.bin')

    # bert_config = BertConfig.from_json_file(bert_config_file)
    # tokenizer = tokenization.FullTokenizer(
    #     vocab_file=vocab_file, do_lower_case=do_lower_case)
    # bert_config.print_status()

    # model_bert = BertModel(bert_config)

    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                              do_lower_case=args.do_lower_case)
    model_bert, bert_config = BertModel.from_pretrained('bert-base-uncased')

    if my_pretrain_bert:
        model_bert.load_state_dict(
            torch.load(init_checkpoint, map_location='cpu'))
        print("Load pre-trained parameters.")
    else:
        pass
    model_bert.to(device)

    return model_bert, tokenizer, bert_config
Example #21
0
    def __init__(self, config):
        super(BertForCollapsedSpanAspectExtractionAndClassification,
              self).__init__()
        self.bert = BertModel(config)
        # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version
        # self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.neu_outputs = nn.Linear(config.hidden_size, 2)
        self.pos_outputs = nn.Linear(config.hidden_size, 2)
        self.neg_outputs = nn.Linear(config.hidden_size, 2)

        def init_weights(module):
            if isinstance(module, (nn.Linear, nn.Embedding)):
                # Slightly different from the TF version which uses truncated_normal for initialization
                # cf https://github.com/pytorch/pytorch/pull/5617
                module.weight.data.normal_(mean=0.0,
                                           std=config.initializer_range)
            elif isinstance(module, BERTLayerNorm):
                module.beta.data.normal_(mean=0.0,
                                         std=config.initializer_range)
                module.gamma.data.normal_(mean=0.0,
                                          std=config.initializer_range)
            if isinstance(module, nn.Linear):
                module.bias.data.zero_()

        self.apply(init_weights)
    def __init__(self,
                 config,
                 num_labels: int,
                 num_pos: int,
                 use_pos: bool,
                 arc_representation_dim: int,
                 arc_feedforward: FeedForward = None,
                 use_mst_decoding_for_validation: bool = True,
                 dropout: float = 0.) -> None:
        super(DistanceDependencyParser, self).__init__(config)
        self.bert = BertModel(config)
        self.apply(self.init_bert_weights)

        encoder_dim = config.hidden_size

        self.arc_feedforward = arc_feedforward or \
                                    FeedForward(encoder_dim, 1,
                                                arc_representation_dim,
                                                Activation.by_name("linear")())

        self.arc_attention = DistanceAttention()

        self._dropout = InputVariationalDropout(dropout)

        self.use_mst_decoding_for_validation = use_mst_decoding_for_validation

        self._attachment_scores = UndirectedAttachmentScores()
    def __init__(self, config, use_bert_ffn):
        super(BertForSequenceClassificationWithSelfAtt, self).__init__()
        self.bert = BertModel(config)
        if use_bert_ffn:
            self.rank_ffn = BertFeedForward(config, config.hidden_size,
                                            config.hidden_size, 2)
        else:
            self.rank_ffn = NormalFeedForward(config, config.hidden_size,
                                              config.hidden_size, 2)
        self.rank_affine = nn.Linear(config.hidden_size, 1)

        def init_weights(module):
            if isinstance(module, (nn.Linear, nn.Embedding)):
                # Slightly different from the TF version which uses truncated_normal for initialization
                # cf https://github.com/pytorch/pytorch/pull/5617
                module.weight.data.normal_(mean=0.0,
                                           std=config.initializer_range)
            elif isinstance(module, BERTLayerNorm):
                module.beta.data.normal_(mean=0.0,
                                         std=config.initializer_range)
                module.gamma.data.normal_(mean=0.0,
                                          std=config.initializer_range)
            if isinstance(module, nn.Linear):
                module.bias.data.zero_()

        self.apply(init_weights)
Example #24
0
    def __init__(self):
        super().__init__()
        self.bert = BertModel.from_pretrained('bert_base/')

        if args.bert_freeze:
            for param in self.bert.parameters():
                param.requires_grad = False

        self.lstm = BiLSTM(
            input_size=args.bert_hidden_size + args.cnn_output_size,
            hidden_size=args.rnn_hidden_size + args.cnn_output_size,
            num_layers=args.rnn_num_layers,
            num_dirs=args.rnn_num_dirs)

        self.lstm_dropout = nn.Dropout(p=args.rnn_dropout)

        self.cnn = CharCNN(embedding_num=len(CHAR_VOCAB),
                           embedding_dim=args.cnn_embedding_dim,
                           filters=eval(args.cnn_filters),
                           output_size=args.cnn_output_size)

        self.crf = CRF(target_size=len(VOCAB) + 2, use_cuda=args.crf_use_cuda)

        self.linear = nn.Linear(in_features=args.rnn_hidden_size +
                                args.cnn_output_size,
                                out_features=len(VOCAB) + 2)

        self.attn = MultiHeadAttention(model_dim=args.rnn_hidden_size +
                                       args.cnn_output_size,
                                       num_heads=args.attn_num_heads,
                                       dropout=args.attn_dropout)

        self.feat_dropout = nn.Dropout(p=args.feat_dropout)
Example #25
0
    def _predict_logits(self,
                        answer_choices,
                        answer_choices_len,
                        token_to_id_fn,
                        bert_config,
                        slim_fc_scope,
                        keep_prob=1.0,
                        is_training=False):
        """Predicts answer for a particular task.

    Args:
      answer_choices: A [batch, NUM_CHOICES, max_answer_len] string tensor.
      answer_choices_len: A [batch, NUM_CHOICES] int tensor.
      token_to_id_fn: A callable to convert the token tensor to an int tensor.
      slim_fc_scope: Slim FC scope.
      keep_prob: Keep probability of dropout layers.
      bert_config: A BertConfig instance to initialize BERT model.

    Returns:
      logits: A [batch, NUM_CHOICES] float tensor.
    """
        batch_size = answer_choices.shape[0]

        # Convert tokens into token ids.
        answer_choices_token_ids = token_to_id_fn(answer_choices)
        answer_choices_token_ids = tf.reshape(answer_choices_token_ids,
                                              [batch_size * NUM_CHOICES, -1])

        answer_choices_mask = tf.sequence_mask(
            answer_choices_len, maxlen=tf.shape(answer_choices)[-1])
        answer_choices_mask = tf.reshape(answer_choices_mask,
                                         [batch_size * NUM_CHOICES, -1])

        # Bert prediction.
        bert_model = BertModel(bert_config,
                               is_training,
                               input_ids=answer_choices_token_ids,
                               input_mask=answer_choices_mask)
        output = bert_model.get_pooled_output()

        # Classification layer.
        with slim.arg_scope(slim_fc_scope):
            output = slim.fully_connected(output,
                                          num_outputs=1,
                                          activation_fn=None,
                                          scope='logits')
        return tf.reshape(output, [batch_size, NUM_CHOICES])
Example #26
0
def bertModel(*args, **kwargs):
    """
    BertModel is the basic BERT Transformer model with a layer of summed token,
    position and sequence embeddings followed by a series of identical
    self-attention blocks (12 for BERT-base, 24 for BERT-large).
    """
    model = BertModel.from_pretrained(*args, **kwargs)
    return model
Example #27
0
def bert(bert_config_file,
         mode,
         dim,
         input_ids,
         input_mask,
         input_type,
         activation,
         init_checkpoint=None):
    bert_config = BertConfig.from_json_file(bert_config_file)
    bert_model = BertModel(config=bert_config,
                           is_training=mode == tf.estimator.ModeKeys.TRAIN,
                           input_ids=input_ids,
                           input_mask=input_mask,
                           token_type_ids=input_type,
                           scope="bert_query")
    output = bert_model.get_pooled_output()
    if mode == tf.estimator.ModeKeys.TRAIN:
        output = tf.nn.dropout(output, keep_prob=0.9)
    sig = tf.layers.dense(output,
                          dim,
                          activation=activation,
                          kernel_initializer=tf.truncated_normal_initializer(
                              stddev=bert_config.initializer_range),
                          name="bert_query/query")

    tvars = tf.trainable_variables('bert_query')
    initialized_variable_names = {}
    if init_checkpoint:
        (assignment_map,
         initialized_variable_names) = get_assignment_map_from_checkpoint(
             tvars, init_checkpoint)
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
    """
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)
    """

    return sig
class BertEncoder(object):
    def __init__(self,
                 config,
                 is_training,
                 input_ids,
                 input_mask=None,
                 token_type_ids=None):
        self.model = BertModel(config=config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=token_type_ids)

        self.embeddings_table = self.model.get_embedding_table()

    def encode(self):
        #encoded is => sequence_output` shape = [batch_size, seq_length, hidden_size].
        output = self.model.get_sequence_output()
        states = ()
        for layer in self.model.get_all_encoder_layers():
            states += (tf.reduce_mean(layer, axis=1), )
        return output, states,
Example #29
0
    def __init__(self, config, num_labels, word_pool_type='mean'):

        super(BertForSequenceLabeling, self).__init__(config)
        if word_pool_type.lower() not in {'first', 'mean', 'sum'}:
            raise ValueError('No {} pooling methods!'.format(word_pool_type))
        if word_pool_type.lower() == 'sum':
            self.layer_norm = BertLayerNorm(config)
        self.word_pool_type = word_pool_type
        self.bert = BertModel(config)
        self.dropout = torch.nn.Dropout(config.hidden_dropout_prob)
        self.classifier = torch.nn.Linear(config.hidden_size, num_labels)
        self.crf = ConditionalRandomField(num_labels)
        self.apply(self.init_bert_weights)
Example #30
0
    def body(self, features, mode):
        """Body of the model, aka Bert

        Arguments:
            features {dict} -- feature dict,
                keys: input_ids, input_mask, segment_ids
            mode {mode} -- mode

        Returns:
            dict -- features extracted from bert.
                keys: 'seq', 'pooled', 'all', 'embed'

        seq:
            tensor, [batch_size, seq_length, hidden_size]
        pooled:
            tensor, [batch_size, hidden_size]
        all:
            list of tensor, num_hidden_layers * [batch_size, seq_length, hidden_size]
        embed:
            tensor, [batch_size, seq_length, hidden_size]
        """

        config = self.config
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = BertModel(config=config.bert_config,
                          is_training=is_training,
                          input_ids=input_ids,
                          input_mask=input_mask,
                          token_type_ids=segment_ids,
                          use_one_hot_embeddings=config.use_one_hot_embeddings)

        feature_dict = {}
        for logit_type in ['seq', 'pooled', 'all', 'embed', 'embed_table']:
            if logit_type == 'seq':
                # tensor, [batch_size, seq_length, hidden_size]
                feature_dict[logit_type] = model.get_sequence_output()
            elif logit_type == 'pooled':
                # tensor, [batch_size, hidden_size]
                feature_dict[logit_type] = model.get_pooled_output()
            elif logit_type == 'all':
                # list, num_hidden_layers * [batch_size, seq_length, hidden_size]
                feature_dict[logit_type] = model.get_all_encoder_layers()
            elif logit_type == 'embed':
                # for res connection
                feature_dict[logit_type] = model.get_embedding_output()
            elif logit_type == 'embed_table':
                feature_dict[logit_type] = model.get_embedding_table()

        return feature_dict