def build_logits(self, features, mode): """ Building BERT feature extraction graph Args: features (`OrderedDict`): A dict mapping raw input to tensors mode (`bool`): tell the model whether it is under training Returns: pooled_output (`Tensor`): The output after pooling. Shape of [None, 768] all_hidden_outputs (`Tensor`): The last hidden outputs of all sequence. Shape of [None, seq_len, hidden_size] """ bert_preprocessor = preprocessors.get_preprocessor(self.config.pretrain_model_name_or_path, user_defined_config=self.config) input_ids, input_mask, segment_ids = bert_preprocessor(features)[:3] if self.finetune_model_name == "text_match_bert_two_tower": with tf.variable_scope('text_match_bert_two_tower', reuse=tf.AUTO_REUSE): bert_backbone = model_zoo.get_pretrained_model(self.config.pretrain_model_name_or_path) sequence_output, pooled_output = bert_backbone( [input_ids, input_mask, segment_ids], output_features=True, mode=mode) if hasattr(self.config, "projection_dim") and self.config.projection_dim != -1: first_token_output_a = sequence_output[:, 0, :] pooled_output = tf.layers.dense(inputs=first_token_output_a, units=self.config.projection_dim, activation=None, name='output_dense_layer') else: bert_backbone = model_zoo.get_pretrained_model(self.config.pretrain_model_name_or_path) sequence_output, pooled_output = bert_backbone( [input_ids, input_mask, segment_ids], output_features=True, mode=mode) return sequence_output, pooled_output
def build_logits(self, features, mode=None): bert_preprocessor = preprocessors.get_preprocessor( self.pretrain_model_name_or_path, app_model_name="pretrain_language_model", user_defined_config=self.user_defined_config) if _APP_FLAGS.distribution_strategy == "WhaleStrategy" or \ self.config.distribution_strategy == "WhaleStrategy": tf.logging.info("*********Calling Whale Encoder***********") model = model_zoo.get_pretrained_model( self.pretrain_model_name_or_path, enable_whale=True, input_sequence_length=_APP_FLAGS.input_sequence_length) else: model = model_zoo.get_pretrained_model( self.pretrain_model_name_or_path, input_sequence_length=_APP_FLAGS.input_sequence_length) if _APP_FLAGS.loss == "mlm+nsp" or _APP_FLAGS.loss == "mlm+sop": input_ids, input_mask, segment_ids, masked_lm_positions, \ masked_lm_ids, masked_lm_weights, next_sentence_labels = bert_preprocessor(features) lm_logits, nsp_logits, _ = model( [input_ids, input_mask, segment_ids], masked_lm_positions=masked_lm_positions, output_features=False, mode=mode) return (lm_logits, nsp_logits), (masked_lm_ids, masked_lm_weights, next_sentence_labels) elif _APP_FLAGS.loss == "mlm": task_1_dense = layers.Dense( 2, kernel_initializer=layers.get_initializer(0.02), name='task_1_dense') input_ids, input_mask, segment_ids, masked_lm_positions, \ masked_lm_ids, masked_lm_weights, task_1_label = bert_preprocessor(features) lm_logits, _, pooled_output = model( [input_ids, input_mask, segment_ids], masked_lm_positions=masked_lm_positions, output_features=False, mode=mode) task_1_logits = task_1_dense(pooled_output) return (lm_logits, task_1_logits), (masked_lm_ids, masked_lm_weights, task_1_label)
def build_logits(self, features, mode=None): preprocessor = preprocessors.get_preprocessor( self.pretrain_model_name_or_path, user_defined_config=self.user_defined_config) model = model_zoo.get_pretrained_model( self.pretrain_model_name_or_path) dense = layers.Dense(self.num_labels, kernel_initializer=layers.get_initializer(0.02), name='dense') input_ids, input_mask, segment_ids, label_ids = preprocessor(features) outputs = model([input_ids, input_mask, segment_ids], mode=mode) pooled_output = outputs[1] if mode == tf.estimator.ModeKeys.TRAIN: pooled_output = tf.nn.dropout(pooled_output, keep_prob=0.9) logits = dense(pooled_output) if mode == tf.estimator.ModeKeys.PREDICT: return logits return logits, label_ids
def build_logits(self, features, mode=None): """ Building BERT text match graph Args: features (`OrderedDict`): A dict mapping raw input to tensors mode (`bool`): tell the model whether it is under training Returns: logits (`Tensor`): The output after the last dense layer. Shape of [None, num_labels] label_ids (`Tensor`): label_ids, shape of [None] """ bert_preprocessor = preprocessors.get_preprocessor( self.config.pretrain_model_name_or_path, user_defined_config=self.config) input_ids, input_mask, segment_ids, label_ids = bert_preprocessor( features) bert_backbone = model_zoo.get_pretrained_model( self.config.pretrain_model_name_or_path) _, pool_output = bert_backbone([input_ids, input_mask, segment_ids], mode=mode) is_training = (mode == tf.estimator.ModeKeys.TRAIN) pool_output = tf.layers.dropout(pool_output, rate=self.config.dropout_rate, training=is_training) logits = layers.Dense(self.config.num_labels, kernel_initializer=layers.get_initializer(0.02), name='app/ez_dense')(pool_output) self.check_and_init_from_checkpoint(mode) return logits, label_ids
def build_logits(self, features, mode=None): """ Building graph of BERT Sequence Labeling Args: features (`OrderedDict`): A dict mapping raw input to tensors mode (`bool): tell the model whether it is under training Returns: logits (`Tensor`): The output after the last dense layer. Shape of [None, sequence_length, num_labels] label_ids (`Tensor`): label_ids, shape of [None, sequence_length] """ preprocessor = preprocessors.get_preprocessor(self.config.pretrain_model_name_or_path, user_defined_config=self.config, app_model_name="sequence_labeling_bert") input_ids, input_mask, segment_ids, label_ids, _ = preprocessor(features) bert_backbone = model_zoo.get_pretrained_model(self.config.pretrain_model_name_or_path) sequence_output, _ = bert_backbone([input_ids, input_mask, segment_ids], mode=mode) is_training = (mode == tf.estimator.ModeKeys.TRAIN) sequence_output = tf.layers.dropout( sequence_output, rate=self.config.dropout_rate, training=is_training) kernel_initializer = tf.glorot_uniform_initializer(seed=np.random.randint(10000), dtype=tf.float32) bias_initializer = tf.zeros_initializer logits = layers.Dense(self.config.num_labels, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, name='app/ez_dense')(sequence_output) self.check_and_init_from_checkpoint(mode) return logits, label_ids
def build_logits(self, features, mode=None): """ Building graph of KD Student Args: features (`OrderedDict`): A dict mapping raw input to tensors mode (`bool): tell the model whether it is under training Returns: logits (`list`): logits for all the layers, list of shape of [None, num_labels] label_ids (`Tensor`): label_ids, shape of [None] """ is_training = (mode == tf.estimator.ModeKeys.TRAIN) preprocessor = preprocessors.get_preprocessor( self.config.pretrain_model_name_or_path, user_defined_config=self.config) bert_backbone = model_zoo.get_pretrained_model( self.config.pretrain_model_name_or_path) if mode != tf.estimator.ModeKeys.PREDICT: teacher_logits, input_ids, input_mask, segment_ids, label_ids = preprocessor( features) else: teacher_logits, input_ids, input_mask, segment_ids = preprocessor( features) label_ids = None teacher_n_layers = int( teacher_logits.shape[1]) / self.config.num_labels - 1 self.teacher_logits = [ teacher_logits[:, i * self.config.num_labels:(i + 1) * self.config.num_labels] for i in range(teacher_n_layers + 1) ] if self.config.train_probes: bert_model = bert_backbone.bert embedding_output = bert_model.embeddings([input_ids, segment_ids], training=is_training) attention_mask = layers.get_attn_mask_bert(input_ids, input_mask) all_hidden_outputs, all_att_outputs = bert_model.encoder( [embedding_output, attention_mask], training=is_training) # Get teacher Probes logits = layers.HiddenLayerProbes( self.config.num_labels, kernel_initializer=layers.get_initializer(0.02), name="probes")([embedding_output, all_hidden_outputs]) else: _, pooled_output = bert_backbone( [input_ids, input_mask, segment_ids], mode=mode) pooled_output = tf.layers.dropout(pooled_output, rate=self.config.dropout_rate, training=is_training) logits = layers.Dense( self.config.num_labels, kernel_initializer=layers.get_initializer(0.02), name='app/ez_dense')(pooled_output) logits = [logits] return logits, label_ids
def build_logits(self, features, mode=None): """ Building BERT Two Tower text match graph Args: features (`OrderedDict`): A dict mapping raw input to tensors mode (`bool`): tell the model whether it is under training Returns: logits (`Tensor`): The output after the last dense layer. Shape of [None, num_labels] label_ids (`Tensor`): label_ids, shape of [None] """ bert_preprocessor = preprocessors.get_preprocessor( self.config.pretrain_model_name_or_path, is_paired=True, user_defined_config=self.config) input_ids_a, input_mask_a, \ segment_ids_a, input_ids_b, input_mask_b, segment_ids_b, label_id = bert_preprocessor(features) with tf.variable_scope('text_match_bert_two_tower', reuse=tf.AUTO_REUSE): bert_backbone = model_zoo.get_pretrained_model( self.config.pretrain_model_name_or_path) _, pool_output_a = bert_backbone( [input_ids_a, input_mask_a, segment_ids_a], mode=mode) _, pool_output_b = bert_backbone( [input_ids_b, input_mask_b, segment_ids_b], mode=mode) logits = self._cosine(pool_output_a, pool_output_b) self.check_and_init_from_checkpoint(mode) return [logits, pool_output_a, pool_output_b], label_id
def build_logits(self, features, mode=None): """ Building graph of KD Teacher Args: features (`OrderedDict`): A dict mapping raw input to tensors mode (`bool): tell the model whether it is under training Returns: logits (`list`): logits for all the layers, list of shape of [None, num_labels] label_ids (`Tensor`): label_ids, shape of [None] """ is_training = (mode == tf.estimator.ModeKeys.TRAIN) preprocessor = preprocessors.get_preprocessor( self.config.pretrain_model_name_or_path, user_defined_config=self.config) bert_backbone = model_zoo.get_pretrained_model( self.config.pretrain_model_name_or_path) # Serialize raw text to get input tensors input_ids, input_mask, segment_ids, label_id = preprocessor(features) if self.config.train_probes: # Get BERT all hidden states bert_model = bert_backbone.bert embedding_output = bert_model.embeddings([input_ids, segment_ids], training=is_training) attention_mask = layers.get_attn_mask_bert(input_ids, input_mask) all_hidden_outputs, all_att_outputs = bert_model.encoder( [embedding_output, attention_mask], training=is_training) # Get teacher Probes logits = layers.HiddenLayerProbes( self.config.num_labels, kernel_initializer=layers.get_initializer(0.02), name="probes")([embedding_output, all_hidden_outputs]) self.tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "probes/") else: _, pooled_output = bert_backbone( [input_ids, input_mask, segment_ids], mode=mode) pooled_output = tf.layers.dropout(pooled_output, rate=self.config.dropout_rate, training=is_training) logits = layers.Dense( self.config.num_labels, kernel_initializer=layers.get_initializer(0.02), name='app/ez_dense')(pooled_output) logits = [logits] if mode == tf.estimator.ModeKeys.PREDICT: return { "input_ids": input_ids, "input_mask": input_mask, "segment_ids": segment_ids, "label_id": label_id, "logits": tf.concat(logits, axis=-1) } else: return logits, label_id
def build_logits(self, features, mode=None): preprocessor = preprocessors.get_preprocessor( self.pretrain_model_name_or_path) model = model_zoo.get_pretrained_model( self.pretrain_model_name_or_path) global_step = tf.train.get_or_create_global_step() tnews_dense = layers.Dense( 15, kernel_initializer=layers.get_initializer(0.02), name='tnews_dense') ocemotion_dense = layers.Dense( 7, kernel_initializer=layers.get_initializer(0.02), name='ocemotion_dense') ocnli_dense = layers.Dense( 3, kernel_initializer=layers.get_initializer(0.02), name='ocnli_dense') input_ids, input_mask, segment_ids, label_ids = preprocessor(features) outputs_tnews = model([input_ids[0], input_mask[0], segment_ids[0]], mode=mode) pooled_output_tnews = outputs_tnews[1] if mode == tf.estimator.ModeKeys.TRAIN: pooled_output_tnews = tf.nn.dropout(pooled_output_tnews, keep_prob=0.2) logits_tnews = tnews_dense(pooled_output_tnews) outputs_ocemotion = model( [input_ids[1], input_mask[1], segment_ids[1]], mode=mode) pooled_output_ocemotion = outputs_ocemotion[1] if mode == tf.estimator.ModeKeys.TRAIN: pooled_output_ocemotion = tf.nn.dropout(pooled_output_ocemotion, keep_prob=0.2) logits_ocemotion = ocemotion_dense(pooled_output_ocemotion) outputs_ocnli = model([input_ids[2], input_mask[2], segment_ids[2]], mode=mode) pooled_output_ocnli = outputs_ocnli[1] if mode == tf.estimator.ModeKeys.TRAIN: pooled_output_ocnli = tf.nn.dropout(pooled_output_ocnli, keep_prob=0.5) logits_ocnli = ocnli_dense(pooled_output_ocnli) return [logits_tnews, logits_ocemotion, logits_ocnli], [label_ids[0], label_ids[1], label_ids[2]]
def build_logits(self, features, mode=None): preprocessor = preprocessors.get_preprocessor( self.pretrain_model_name_or_path) model = model_zoo.get_pretrained_model( self.pretrain_model_name_or_path) global_step = tf.train.get_or_create_global_step() tnews_dense = layers.Dense( 15, kernel_initializer=layers.get_initializer(0.02), name='tnews_dense') ocemotion_dense = layers.Dense( 7, kernel_initializer=layers.get_initializer(0.02), name='ocemotion_dense') ocnli_dense = layers.Dense( 3, kernel_initializer=layers.get_initializer(0.02), name='ocnli_dense') input_ids, input_mask, segment_ids, label_ids = preprocessor(features) outputs = model([input_ids, input_mask, segment_ids], mode=mode) pooled_output = outputs[1] if mode == tf.estimator.ModeKeys.TRAIN: pooled_output = tf.nn.dropout(pooled_output, keep_prob=0.9) logits = tf.case([ (tf.equal(tf.mod(global_step, 3), 0), lambda: tnews_dense(pooled_output)), (tf.equal(tf.mod(global_step, 3), 1), lambda: ocemotion_dense(pooled_output)), (tf.equal(tf.mod(global_step, 3), 2), lambda: ocnli_dense(pooled_output)), ], exclusive=True) if mode == tf.estimator.ModeKeys.PREDICT: ret = { "tnews_logits": tnews_dense(pooled_output), "ocemotion_logits": ocemotion_dense(pooled_output), "ocnli_logits": ocnli_dense(pooled_output), "label_ids": label_ids } return ret return logits, label_ids
def build_logits(self, features, mode=None): preprocessor = preprocessors.get_preprocessor( self.pretrain_model_name_or_path, app_model_name="pretrain_language_model", feature_type="pretrain_multimodel", user_defined_config=self.user_defined_config) self.model = model_zoo.get_pretrained_model( self.pretrain_model_name_or_path, input_sequence_length=_APP_FLAGS.input_sequence_length) if mode == tf.estimator.ModeKeys.PREDICT: image_feature, image_mask, input_ids, input_mask, segment_ids,\ nx_sent_labels, prod_desc, text_prod_id, image_prod_id, prod_img_id = preprocessor(features) # TODO: DONOT Need these features in predict. BUT to compatible the data format masked_patch_positions = tf.constant( np.random.randint( 0, self.config.predict_batch_size, (self.model.config.masked_image_token_num, ))) masked_lm_positions = tf.constant( np.random.randint(0, self.config.predict_batch_size, (self.model.config.masked_text_token_num, ))) masked_lm_ids = tf.constant( np.random.randint(0, self.config.predict_batch_size, ( self.model.config.masked_text_token_num, 1, ))) masked_lm_weights = tf.ones( self.config.predict_batch_size, self.model.config.masked_text_token_num) else: image_feature, image_mask, masked_patch_positions, input_ids, input_mask, segment_ids,\ masked_lm_positions, masked_lm_ids, masked_lm_weights, nx_sent_labels = preprocessor(features) mlm_logits, nsp_logits, mpm_logits, target_raw_patch_features, pooled_output = \ self.model(input_ids, input_mask=input_mask, segment_ids=segment_ids, masked_lm_positions=masked_lm_positions, image_feature=image_feature, image_mask=image_mask, masked_patch_positions=masked_patch_positions, output_features=False, mode=mode, image_feature_size=_APP_FLAGS.image_feature_size) logits = (mlm_logits, nsp_logits, mpm_logits) labels = (masked_lm_ids, masked_lm_weights, nx_sent_labels, target_raw_patch_features) return logits, labels
def build_logits(self, features, mode=None): bert_preprocessor = preprocessors.get_preprocessor(self.pretrain_model_name_or_path) model = model_zoo.get_pretrained_model(self.pretrain_model_name_or_path) input_ids, input_mask, segment_ids = bert_preprocessor(features)[:3] outputs = model([input_ids, input_mask, segment_ids], mode=mode) pooled_output = outputs[1] if mode == tf.estimator.ModeKeys.PREDICT: ret = { "pooled_output": pooled_output } return ret
def build_logits(self, features, mode=None): preprocessor = preprocessors.get_preprocessor( self.pretrain_model_name_or_path, user_defined_config=self.config) model = model_zoo.get_pretrained_model( self.pretrain_model_name_or_path) dense = layers.Dense(self.num_labels, kernel_initializer=layers.get_initializer(0.02), name='dense') input_ids, input_mask, segment_ids, label_ids = preprocessor(features) outputs = model([input_ids, input_mask, segment_ids], mode=mode) pooled_output = outputs[1] logits = dense(pooled_output) return logits, label_ids
def build_logits(self, features, mode=None): """ Building graph of BERT Text Comprehension Args: features (`OrderedDict`): A dict mapping raw input to tensors mode (`bool): tell the model whether it is under training Returns: logits (`tuple`): (start_logits, end_logits), The output after the last dense layer. Two tensor of Shape [None, num_labels] label_ids (`tuple`): (start_positions, end_positions). Two tensor of shape [None] """ preprocessor = preprocessors.get_preprocessor( self.config.pretrain_model_name_or_path, app_model_name="text_comprehension_bert", user_defined_config=self.config) input_ids, input_mask, segment_ids, start_positions, end_positions = preprocessor( features) bert_backbone = model_zoo.get_pretrained_model( self.config.pretrain_model_name_or_path) sequence_output, _ = bert_backbone( [input_ids, input_mask, segment_ids], mode=mode) seq_length = self.config.sequence_length hidden_size = int(sequence_output.shape[2]) output_weights = tf.get_variable( "app/output_weights", [2, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("app/output_bias", [2], initializer=tf.zeros_initializer()) final_hidden_matrix = tf.reshape(sequence_output, [-1, hidden_size]) logits = tf.matmul(final_hidden_matrix, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) logits = tf.reshape(logits, [-1, seq_length, 2]) logits = tf.transpose(logits, [2, 0, 1]) unstacked_logits = tf.unstack(logits, axis=0) (start_logits, end_logits) = (unstacked_logits[0], unstacked_logits[1]) self.check_and_init_from_checkpoint(mode) return (start_logits, end_logits), (start_positions, end_positions)
def build_logits(self, features, mode=None): preprocessor = preprocessors.get_preprocessor( self.pretrain_model_name_or_path) model = model_zoo.get_pretrained_model( self.pretrain_model_name_or_path) input_ids, input_mask, segment_ids, label_ids, texts, domains, labels = preprocessor( features) outputs = model([input_ids, input_mask, segment_ids], mode=mode) pooled_output = outputs[1] ret = { "pooled_output": pooled_output, "text": texts, "domain": domains, "label": labels } return ret
def build_logits(self, features, mode=None): # 负责对原始数据进行预处理,生成模型需要的特征,比如:input_ids, input_mask, segment_ids等 preprocessor = preprocessors.get_preprocessor(self.pretrain_model_name_or_path, user_defined_config=self.user_defined_config) # 负责构建网络的backbone model = model_zoo.get_pretrained_model(self.pretrain_model_name_or_path) dense = layers.Dense(self.num_labels, kernel_initializer=layers.get_initializer(0.02), name='dense') input_ids, input_mask, segment_ids, label_ids = preprocessor(features) _, pooled_output = model([input_ids, input_mask, segment_ids], mode=mode) logits = dense(pooled_output) return logits, label_ids
def build_logits(self, features, mode=None): preprocessor = preprocessors.get_preprocessor( self.pretrain_model_name_or_path) model = model_zoo.get_pretrained_model( self.pretrain_model_name_or_path) dense = layers.Dense(self.num_labels, kernel_initializer=layers.get_initializer(0.02), name='dense') input_ids, input_mask, segment_ids, label_ids = preprocessor(features) outputs = model([input_ids, input_mask, segment_ids], mode=mode) pooled_output = outputs[1] logits = dense(pooled_output) if mode == tf.estimator.ModeKeys.PREDICT: ret = {"logits": logits} return ret return logits, label_ids
def build_logits(self, features, mode=None): preprocessor = preprocessors.get_preprocessor( self.pretrain_model_name_or_path, is_paired=True) model = model_zoo.get_pretrained_model( self.pretrain_model_name_or_path) input_ids_a, input_mask_a, segment_ids_a, input_ids_b, input_mask_b, segment_ids_b, label_ids, texts1, texts2, domains, labels = preprocessor( features) outputs = model([ input_ids_a, input_mask_a, segment_ids_a, input_ids_b, input_mask_b, segment_ids_b ], mode=mode) pooled_output = outputs[1] ret = { "pooled_output": pooled_output, "text1": texts1, "text2": texts2, "domain": domains, "label": labels } return ret
def build_logits(self, features, mode=None): is_training = (mode == tf.estimator.ModeKeys.TRAIN) preprocessor = preprocessors.get_preprocessor( self.pretrain_model_name_or_path, user_defined_config=self.config) bert_backbone = model_zoo.get_pretrained_model( self.config.pretrain_model_name_or_path) dense = layers.Dense(self.num_labels, kernel_initializer=layers.get_initializer(0.02), name='dense') input_ids, input_mask, segment_ids, label_ids, domains, weights = preprocessor( features) self.domains = domains self.weights = weights hidden_size = bert_backbone.config.hidden_size self.domain_logits = dict() bert_model = bert_backbone.bert embedding_output = bert_model.embeddings([input_ids, segment_ids], training=is_training) attention_mask = layers.get_attn_mask_bert(input_ids, input_mask) encoder_outputs = bert_model.encoder( [embedding_output, attention_mask], training=is_training) encoder_outputs = encoder_outputs[0] pooled_output = bert_model.pooler(encoder_outputs[-1][:, 0]) if mode == tf.estimator.ModeKeys.TRAIN: pooled_output = tf.nn.dropout(pooled_output, keep_prob=0.9) with tf.variable_scope("mft", reuse=tf.AUTO_REUSE): # add domain network logits = dense(pooled_output) domains = tf.squeeze(domains) domain_embedded_matrix = tf.get_variable( "domain_projection", [num_domains, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) domain_embedded = tf.nn.embedding_lookup(domain_embedded_matrix, domains) for layer_index in layer_indexes: content_tensor = tf.reduce_mean(encoder_outputs[layer_index], axis=1) content_tensor_with_domains = domain_embedded + content_tensor domain_weights = tf.get_variable( "domain_weights", [num_domains, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) domain_bias = tf.get_variable( "domain_bias", [num_domains], initializer=tf.zeros_initializer()) current_domain_logits = tf.matmul(content_tensor_with_domains, domain_weights, transpose_b=True) current_domain_logits = tf.nn.bias_add(current_domain_logits, domain_bias) self.domain_logits["domain_logits_" + str(layer_index)] = current_domain_logits return logits, label_ids
""" @author: AlexWang @date: 2021/4/27 3:28 PM @Email: [email protected] """ from easytransfer import model_zoo from easytransfer import preprocessors model_zoo.get_pretrained_model()