def __init__(self):
        bert_pretrained_dir = args.pretrain_models_path + args.bert_model_name
        self.do_lower_case = args.bert_model_name.startswith('uncased')
        self.vocab_file = os.path.join(bert_pretrained_dir, 'vocab.txt')
        self.config_file = os.path.join(bert_pretrained_dir,
                                        'bert_config.json')
        self.tokenizer = FullTokenizer(vocab_file=self.vocab_file,
                                       do_lower_case=self.do_lower_case)

        self.input_id = tf.placeholder(tf.int64, [None, None], 'input_ids')
        self.input_mask = tf.placeholder(tf.int64, [None, None], 'input_mask')
        self.segment_ids = tf.placeholder(tf.int64, [None, None],
                                          'segment_ids')

        bert_config = BertConfig.from_json_file(self.config_file)
        model = BertModel(config=bert_config,
                          is_training=False,
                          input_ids=self.input_id,
                          input_mask=self.input_mask,
                          token_type_ids=self.segment_ids,
                          use_one_hot_embeddings=True,
                          scope='bert')
        self.output_layer = model.get_sequence_output()
        self.embedding_layer = model.get_embedding_output()

        saver = tf.train.Saver()

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.session = tf.Session(config=config)
        saver.restore(self.session, bert_pretrained_dir + '/bert_model.ckpt')
Example #2
0
    def body(self, features, mode):
        """Body of the model, aka Bert

        Arguments:
            features {dict} -- feature dict,
                keys: input_ids, input_mask, segment_ids
            mode {mode} -- mode

        Returns:
            dict -- features extracted from bert.
                keys: 'seq', 'pooled', 'all', 'embed'

        seq:
            tensor, [batch_size, seq_length, hidden_size]
        pooled:
            tensor, [batch_size, hidden_size]
        all:
            list of tensor, num_hidden_layers * [batch_size, seq_length, hidden_size]
        embed:
            tensor, [batch_size, seq_length, hidden_size]
        """

        config = self.config
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = BertModel(config=config.bert_config,
                          is_training=is_training,
                          input_ids=input_ids,
                          input_mask=input_mask,
                          token_type_ids=segment_ids,
                          use_one_hot_embeddings=config.use_one_hot_embeddings)

        feature_dict = {}
        for logit_type in ['seq', 'pooled', 'all', 'embed', 'embed_table']:
            if logit_type == 'seq':
                # tensor, [batch_size, seq_length, hidden_size]
                feature_dict[logit_type] = model.get_sequence_output()
            elif logit_type == 'pooled':
                # tensor, [batch_size, hidden_size]
                feature_dict[logit_type] = model.get_pooled_output()
            elif logit_type == 'all':
                # list, num_hidden_layers * [batch_size, seq_length, hidden_size]
                feature_dict[logit_type] = model.get_all_encoder_layers()
            elif logit_type == 'embed':
                # for res connection
                feature_dict[logit_type] = model.get_embedding_output()
            elif logit_type == 'embed_table':
                feature_dict[logit_type] = model.get_embedding_table()

        return feature_dict
Example #3
0
    def build(self, data_iter, bert_config_file):
        # get the inputs
        with tf.variable_scope('inputs'):
            input_map = data_iter.get_next()
            usrid, prdid, input_x, input_y, doc_len = \
                (input_map['usr'], input_map['prd'],
                 input_map['content'], input_map['rating'],
                 input_map['doc_len'])

            input_x = tf.reshape(input_x, [-1, self.max_sen_len])
            sen_len = tf.count_nonzero(input_x, axis=-1)
            doc_len = doc_len // self.max_sen_len

            input_x = tf.cast(input_x, tf.int32)
            self.usr = lookup(self.embeddings['usr_emb'], usrid, name='cur_usr_embedding')
            self.prd = lookup(self.embeddings['prd_emb'], prdid, name='cur_prd_embedding')
            input_x = tf.reshape(input_x, [-1, self.max_sen_len])
            input_mask = tf.sequence_mask(sen_len, self.max_sen_len)
            input_mask = tf.cast(input_mask, tf.int32)

        bert_config = BertConfig.from_json_file(bert_config_file)
        bert = BertModel(bert_config, is_training=False,
                         input_ids=input_x, input_mask=input_mask,
                         token_type_ids=None,
                         use_one_hot_embeddings=False)
        # input_x = bert.get_sequence_output()
        input_x = bert.get_embedding_output()

        # build the process of model
        d_hat = self.nsc(input_x, self.max_sen_len, self.max_doc_len // self.max_sen_len,
                         sen_len, doc_len)
        prediction = tf.argmax(d_hat, 1, name='prediction')

        with tf.variable_scope("loss"):
            sce = tf.nn.softmax_cross_entropy_with_logits_v2
            self.loss = sce(logits=d_hat, labels=tf.one_hot(input_y, self.cls_cnt))

            regularizer = tf.zeros(1)
            params = tf.trainable_variables()
            for param in params:
                if param not in self.embeddings.values():
                    regularizer += tf.nn.l2_loss(param)
            self.loss = tf.reduce_sum(self.loss) + self.l2_rate * regularizer

        prediction = tf.argmax(d_hat, 1, name='prediction')
        with tf.variable_scope("metrics"):
            correct_prediction = tf.equal(prediction, input_y)
            mse = tf.reduce_sum(tf.square(prediction - input_y), name="mse")
            correct_num = tf.reduce_sum(tf.cast(correct_prediction, dtype=tf.int32), name="correct_num")
            accuracy = tf.reduce_sum(tf.cast(correct_prediction, "float"), name="accuracy")

        return self.loss, mse, correct_num, accuracy