def build_logits(self, features, mode=None): preprocessor = preprocessors.get_preprocessor( self.pretrain_model_name_or_path, user_defined_config=self.user_defined_config) model = model_zoo.get_pretrained_model( self.pretrain_model_name_or_path) dense = layers.Dense(self.num_labels, kernel_initializer=layers.get_initializer(0.02), name='dense') input_ids, input_mask, segment_ids, label_ids = preprocessor(features) outputs = model([input_ids, input_mask, segment_ids], mode=mode) pooled_output = outputs[1] if mode == tf.estimator.ModeKeys.TRAIN: pooled_output = tf.nn.dropout(pooled_output, keep_prob=0.9) logits = dense(pooled_output) if mode == tf.estimator.ModeKeys.PREDICT: return logits return logits, label_ids
def build_logits(self, features, mode=None): """ Building BERT Two Tower text match graph Args: features (`OrderedDict`): A dict mapping raw input to tensors mode (`bool`): tell the model whether it is under training Returns: logits (`Tensor`): The output after the last dense layer. Shape of [None, num_labels] label_ids (`Tensor`): label_ids, shape of [None] """ bert_preprocessor = preprocessors.get_preprocessor( self.config.pretrain_model_name_or_path, is_paired=True, user_defined_config=self.config) input_ids_a, input_mask_a, \ segment_ids_a, input_ids_b, input_mask_b, segment_ids_b, label_id = bert_preprocessor(features) with tf.variable_scope('text_match_bert_two_tower', reuse=tf.AUTO_REUSE): bert_backbone = model_zoo.get_pretrained_model( self.config.pretrain_model_name_or_path) _, pool_output_a = bert_backbone( [input_ids_a, input_mask_a, segment_ids_a], mode=mode) _, pool_output_b = bert_backbone( [input_ids_b, input_mask_b, segment_ids_b], mode=mode) logits = self._cosine(pool_output_a, pool_output_b) self.check_and_init_from_checkpoint(mode) return [logits, pool_output_a, pool_output_b], label_id
def build_logits(self, features, mode=None): preprocessor = preprocessors.get_preprocessor( self.config.tokenizer_name_or_path, app_model_name="pretrain_language_model") input_ids, input_mask, segment_ids, masked_lm_positions, \ masked_lm_ids, masked_lm_weights = preprocessor(features) return input_ids, input_mask, segment_ids, masked_lm_positions, masked_lm_ids, masked_lm_weights
def build_logits(self, features, mode): """ Building BERT feature extraction graph Args: features (`OrderedDict`): A dict mapping raw input to tensors mode (`bool`): tell the model whether it is under training Returns: pooled_output (`Tensor`): The output after pooling. Shape of [None, 768] all_hidden_outputs (`Tensor`): The last hidden outputs of all sequence. Shape of [None, seq_len, hidden_size] """ bert_preprocessor = preprocessors.get_preprocessor(self.config.pretrain_model_name_or_path, user_defined_config=self.config) input_ids, input_mask, segment_ids = bert_preprocessor(features)[:3] if self.finetune_model_name == "text_match_bert_two_tower": with tf.variable_scope('text_match_bert_two_tower', reuse=tf.AUTO_REUSE): bert_backbone = model_zoo.get_pretrained_model(self.config.pretrain_model_name_or_path) sequence_output, pooled_output = bert_backbone( [input_ids, input_mask, segment_ids], output_features=True, mode=mode) if hasattr(self.config, "projection_dim") and self.config.projection_dim != -1: first_token_output_a = sequence_output[:, 0, :] pooled_output = tf.layers.dense(inputs=first_token_output_a, units=self.config.projection_dim, activation=None, name='output_dense_layer') else: bert_backbone = model_zoo.get_pretrained_model(self.config.pretrain_model_name_or_path) sequence_output, pooled_output = bert_backbone( [input_ids, input_mask, segment_ids], output_features=True, mode=mode) return sequence_output, pooled_output
def run(self): self.proc_executor = distribution.ProcessExecutor(self.queue_size) worker_id = self.config.task_index num_workers = len(self.config.worker_hosts.split(",")) proc_executor = distribution.ProcessExecutor(self.queue_size) reader = get_reader_fn(self.config.preprocess_input_fp)( input_glob=self.config.preprocess_input_fp, input_schema=self.config.input_schema, is_training=False, batch_size=self.config.preprocess_batch_size, slice_id=worker_id, slice_count=num_workers, output_queue=proc_executor.get_output_queue()) proc_executor.add(reader) preprocessor = preprocessors.get_preprocessor( self.config.tokenizer_name_or_path, thread_num=self.thread_num, input_queue=proc_executor.get_input_queue(), output_queue=proc_executor.get_output_queue(), preprocess_batch_size=self.config.preprocess_batch_size, user_defined_config=self.config, app_model_name=self.config.app_model_name) proc_executor.add(preprocessor) writer = get_writer_fn(self.config.preprocess_output_fp)( output_glob=self.config.preprocess_output_fp, output_schema=self.config.output_schema, slice_id=worker_id, input_queue=proc_executor.get_input_queue()) proc_executor.add(writer) proc_executor.run() proc_executor.wait()
def build_logits(self, features, mode=None): """ Building graph of BERT Sequence Labeling Args: features (`OrderedDict`): A dict mapping raw input to tensors mode (`bool): tell the model whether it is under training Returns: logits (`Tensor`): The output after the last dense layer. Shape of [None, sequence_length, num_labels] label_ids (`Tensor`): label_ids, shape of [None, sequence_length] """ preprocessor = preprocessors.get_preprocessor(self.config.pretrain_model_name_or_path, user_defined_config=self.config, app_model_name="sequence_labeling_bert") input_ids, input_mask, segment_ids, label_ids, _ = preprocessor(features) bert_backbone = model_zoo.get_pretrained_model(self.config.pretrain_model_name_or_path) sequence_output, _ = bert_backbone([input_ids, input_mask, segment_ids], mode=mode) is_training = (mode == tf.estimator.ModeKeys.TRAIN) sequence_output = tf.layers.dropout( sequence_output, rate=self.config.dropout_rate, training=is_training) kernel_initializer = tf.glorot_uniform_initializer(seed=np.random.randint(10000), dtype=tf.float32) bias_initializer = tf.zeros_initializer logits = layers.Dense(self.config.num_labels, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, name='app/ez_dense')(sequence_output) self.check_and_init_from_checkpoint(mode) return logits, label_ids
def build_logits(self, features, mode=None): """ Building graph of KD Student Args: features (`OrderedDict`): A dict mapping raw input to tensors mode (`bool): tell the model whether it is under training Returns: logits (`list`): logits for all the layers, list of shape of [None, num_labels] label_ids (`Tensor`): label_ids, shape of [None] """ is_training = (mode == tf.estimator.ModeKeys.TRAIN) preprocessor = preprocessors.get_preprocessor( self.config.pretrain_model_name_or_path, user_defined_config=self.config) bert_backbone = model_zoo.get_pretrained_model( self.config.pretrain_model_name_or_path) if mode != tf.estimator.ModeKeys.PREDICT: teacher_logits, input_ids, input_mask, segment_ids, label_ids = preprocessor( features) else: teacher_logits, input_ids, input_mask, segment_ids = preprocessor( features) label_ids = None teacher_n_layers = int( teacher_logits.shape[1]) / self.config.num_labels - 1 self.teacher_logits = [ teacher_logits[:, i * self.config.num_labels:(i + 1) * self.config.num_labels] for i in range(teacher_n_layers + 1) ] if self.config.train_probes: bert_model = bert_backbone.bert embedding_output = bert_model.embeddings([input_ids, segment_ids], training=is_training) attention_mask = layers.get_attn_mask_bert(input_ids, input_mask) all_hidden_outputs, all_att_outputs = bert_model.encoder( [embedding_output, attention_mask], training=is_training) # Get teacher Probes logits = layers.HiddenLayerProbes( self.config.num_labels, kernel_initializer=layers.get_initializer(0.02), name="probes")([embedding_output, all_hidden_outputs]) else: _, pooled_output = bert_backbone( [input_ids, input_mask, segment_ids], mode=mode) pooled_output = tf.layers.dropout(pooled_output, rate=self.config.dropout_rate, training=is_training) logits = layers.Dense( self.config.num_labels, kernel_initializer=layers.get_initializer(0.02), name='app/ez_dense')(pooled_output) logits = [logits] return logits, label_ids
def build_logits(self, features, mode=None): """ Building BERT text match graph Args: features (`OrderedDict`): A dict mapping raw input to tensors mode (`bool`): tell the model whether it is under training Returns: logits (`Tensor`): The output after the last dense layer. Shape of [None, num_labels] label_ids (`Tensor`): label_ids, shape of [None] """ bert_preprocessor = preprocessors.get_preprocessor( self.config.pretrain_model_name_or_path, user_defined_config=self.config) input_ids, input_mask, segment_ids, label_ids = bert_preprocessor( features) bert_backbone = model_zoo.get_pretrained_model( self.config.pretrain_model_name_or_path) _, pool_output = bert_backbone([input_ids, input_mask, segment_ids], mode=mode) is_training = (mode == tf.estimator.ModeKeys.TRAIN) pool_output = tf.layers.dropout(pool_output, rate=self.config.dropout_rate, training=is_training) logits = layers.Dense(self.config.num_labels, kernel_initializer=layers.get_initializer(0.02), name='app/ez_dense')(pool_output) self.check_and_init_from_checkpoint(mode) return logits, label_ids
def test_dist_preprocess(self): app = Serialization() queue_size = 1 proc_executor = ProcessExecutor(queue_size) reader = CSVReader(input_glob=app.preprocess_input_fp, input_schema=app.input_schema, is_training=False, batch_size=app.preprocess_batch_size, output_queue=proc_executor.get_output_queue()) proc_executor.add(reader) feature_process = preprocessors.get_preprocessor( 'google-bert-base-zh', thread_num=7, input_queue=proc_executor.get_input_queue(), output_queue=proc_executor.get_output_queue()) proc_executor.add(feature_process) writer = CSVWriter(output_glob=app.preprocess_output_fp, output_schema=app.output_schema, input_queue=proc_executor.get_input_queue()) proc_executor.add(writer) proc_executor.run() proc_executor.wait() writer.close()
def build_logits(self, features, mode=None): """ Building graph of KD Teacher Args: features (`OrderedDict`): A dict mapping raw input to tensors mode (`bool): tell the model whether it is under training Returns: logits (`list`): logits for all the layers, list of shape of [None, num_labels] label_ids (`Tensor`): label_ids, shape of [None] """ is_training = (mode == tf.estimator.ModeKeys.TRAIN) preprocessor = preprocessors.get_preprocessor( self.config.pretrain_model_name_or_path, user_defined_config=self.config) bert_backbone = model_zoo.get_pretrained_model( self.config.pretrain_model_name_or_path) # Serialize raw text to get input tensors input_ids, input_mask, segment_ids, label_id = preprocessor(features) if self.config.train_probes: # Get BERT all hidden states bert_model = bert_backbone.bert embedding_output = bert_model.embeddings([input_ids, segment_ids], training=is_training) attention_mask = layers.get_attn_mask_bert(input_ids, input_mask) all_hidden_outputs, all_att_outputs = bert_model.encoder( [embedding_output, attention_mask], training=is_training) # Get teacher Probes logits = layers.HiddenLayerProbes( self.config.num_labels, kernel_initializer=layers.get_initializer(0.02), name="probes")([embedding_output, all_hidden_outputs]) self.tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "probes/") else: _, pooled_output = bert_backbone( [input_ids, input_mask, segment_ids], mode=mode) pooled_output = tf.layers.dropout(pooled_output, rate=self.config.dropout_rate, training=is_training) logits = layers.Dense( self.config.num_labels, kernel_initializer=layers.get_initializer(0.02), name='app/ez_dense')(pooled_output) logits = [logits] if mode == tf.estimator.ModeKeys.PREDICT: return { "input_ids": input_ids, "input_mask": input_mask, "segment_ids": segment_ids, "label_id": label_id, "logits": tf.concat(logits, axis=-1) } else: return logits, label_id
def run_app_predictor(config): try: if config.model_name == "feat_ext_bert": predictor = AppPredictor(config, input_keys=["input_ids", "input_mask", "segment_ids"], output_keys=["pool_output", "first_token_output", "all_hidden_outputs"], job_name="ez_bert_feat") predictor.run_predict() elif config.model_name in ["text_comprehension_bert", "text_comprehension_bert_hae"]: input_keys = ["input_ids", "input_mask", "segment_ids"] if config.model_name == "text_comprehension_bert" \ else ["input_ids", "input_mask", "segment_ids", "history_answer_marker"] predictor = AppPredictor(config, input_keys=input_keys, output_keys=["start_logits", "end_logits"], job_name=config.model_name + "_predictor") preprocessor = preprocessors.get_preprocessor( config.pretrain_model_name_or_path, thread_num=predictor.thread_num, input_queue=queue.Queue(), output_queue=queue.Queue(), preprocess_batch_size=config.predict_batch_size, user_defined_config=config, app_model_name=config.model_name) postprocessor = postprocessors.get_postprocessors( n_best_size=int(config.n_best_size) if hasattr(config, "n_best_size") else 20, max_answer_length=int(config.max_answer_length) if hasattr(config, "max_answer_length") else 30, output_schema=config.output_schema, app_model_name=config.model_name, thread_num=predictor.thread_num, input_queue=queue.Queue(), output_queue=queue.Queue()) predictor.run_predict(preprocessor=preprocessor, postprocessor=postprocessor) elif config.model_name in ["text_match_dam", "text_match_damplus", "text_match_bicnn", "text_match_hcnn", "text_classify_cnn"]: predictor = AppPredictor(config, input_keys=["input_ids_a", "input_mask_a", "input_ids_b", "input_mask_b"], output_keys=["predictions", "probabilities", "logits"], job_name=config.model_name + "_predictor") preprocessor = DeepTextPreprocessor(config, thread_num=predictor.thread_num, input_queue=queue.Queue(), output_queue=queue.Queue(), job_name=config.model_name + "_predictor") predictor.run_predict(preprocessor=preprocessor) elif config.model_name in ["text_match_bert_two_tower"]: raise NotImplementedError else: predictor = AppPredictor(config, input_keys=["input_ids", "input_mask", "segment_ids"], output_keys=["predictions", "probabilities", "logits"], job_name=config.model_name + "_predictor") predictor.run_predict() except Exception as e: traceback.print_exc(file=sys.stderr) raise RuntimeError(str(e))
def build_logits(self, features, mode=None): bert_preprocessor = preprocessors.get_preprocessor( self.pretrain_model_name_or_path, app_model_name="pretrain_language_model", user_defined_config=self.user_defined_config) if _APP_FLAGS.distribution_strategy == "WhaleStrategy" or \ self.config.distribution_strategy == "WhaleStrategy": tf.logging.info("*********Calling Whale Encoder***********") model = model_zoo.get_pretrained_model( self.pretrain_model_name_or_path, enable_whale=True, input_sequence_length=_APP_FLAGS.input_sequence_length) else: model = model_zoo.get_pretrained_model( self.pretrain_model_name_or_path, input_sequence_length=_APP_FLAGS.input_sequence_length) if _APP_FLAGS.loss == "mlm+nsp" or _APP_FLAGS.loss == "mlm+sop": input_ids, input_mask, segment_ids, masked_lm_positions, \ masked_lm_ids, masked_lm_weights, next_sentence_labels = bert_preprocessor(features) lm_logits, nsp_logits, _ = model( [input_ids, input_mask, segment_ids], masked_lm_positions=masked_lm_positions, output_features=False, mode=mode) return (lm_logits, nsp_logits), (masked_lm_ids, masked_lm_weights, next_sentence_labels) elif _APP_FLAGS.loss == "mlm": task_1_dense = layers.Dense( 2, kernel_initializer=layers.get_initializer(0.02), name='task_1_dense') input_ids, input_mask, segment_ids, masked_lm_positions, \ masked_lm_ids, masked_lm_weights, task_1_label = bert_preprocessor(features) lm_logits, _, pooled_output = model( [input_ids, input_mask, segment_ids], masked_lm_positions=masked_lm_positions, output_features=False, mode=mode) task_1_logits = task_1_dense(pooled_output) return (lm_logits, task_1_logits), (masked_lm_ids, masked_lm_weights, task_1_label)
def build_logits(self, features, mode=None): preprocessor = preprocessors.get_preprocessor( self.pretrain_model_name_or_path, app_model_name="pretrain_language_model", feature_type="pretrain_multimodel", user_defined_config=self.user_defined_config) self.model = model_zoo.get_pretrained_model( self.pretrain_model_name_or_path, input_sequence_length=_APP_FLAGS.input_sequence_length) if mode == tf.estimator.ModeKeys.PREDICT: image_feature, image_mask, input_ids, input_mask, segment_ids,\ nx_sent_labels, prod_desc, text_prod_id, image_prod_id, prod_img_id = preprocessor(features) # TODO: DONOT Need these features in predict. BUT to compatible the data format masked_patch_positions = tf.constant( np.random.randint( 0, self.config.predict_batch_size, (self.model.config.masked_image_token_num, ))) masked_lm_positions = tf.constant( np.random.randint(0, self.config.predict_batch_size, (self.model.config.masked_text_token_num, ))) masked_lm_ids = tf.constant( np.random.randint(0, self.config.predict_batch_size, ( self.model.config.masked_text_token_num, 1, ))) masked_lm_weights = tf.ones( self.config.predict_batch_size, self.model.config.masked_text_token_num) else: image_feature, image_mask, masked_patch_positions, input_ids, input_mask, segment_ids,\ masked_lm_positions, masked_lm_ids, masked_lm_weights, nx_sent_labels = preprocessor(features) mlm_logits, nsp_logits, mpm_logits, target_raw_patch_features, pooled_output = \ self.model(input_ids, input_mask=input_mask, segment_ids=segment_ids, masked_lm_positions=masked_lm_positions, image_feature=image_feature, image_mask=image_mask, masked_patch_positions=masked_patch_positions, output_features=False, mode=mode, image_feature_size=_APP_FLAGS.image_feature_size) logits = (mlm_logits, nsp_logits, mpm_logits) labels = (masked_lm_ids, masked_lm_weights, nx_sent_labels, target_raw_patch_features) return logits, labels
def build_logits(self, features, mode=None): preprocessor = preprocessors.get_preprocessor( self.pretrain_model_name_or_path) model = model_zoo.get_pretrained_model( self.pretrain_model_name_or_path) global_step = tf.train.get_or_create_global_step() tnews_dense = layers.Dense( 15, kernel_initializer=layers.get_initializer(0.02), name='tnews_dense') ocemotion_dense = layers.Dense( 7, kernel_initializer=layers.get_initializer(0.02), name='ocemotion_dense') ocnli_dense = layers.Dense( 3, kernel_initializer=layers.get_initializer(0.02), name='ocnli_dense') input_ids, input_mask, segment_ids, label_ids = preprocessor(features) outputs = model([input_ids, input_mask, segment_ids], mode=mode) pooled_output = outputs[1] if mode == tf.estimator.ModeKeys.TRAIN: pooled_output = tf.nn.dropout(pooled_output, keep_prob=0.9) logits = tf.case([ (tf.equal(tf.mod(global_step, 3), 0), lambda: tnews_dense(pooled_output)), (tf.equal(tf.mod(global_step, 3), 1), lambda: ocemotion_dense(pooled_output)), (tf.equal(tf.mod(global_step, 3), 2), lambda: ocnli_dense(pooled_output)), ], exclusive=True) if mode == tf.estimator.ModeKeys.PREDICT: ret = { "tnews_logits": tnews_dense(pooled_output), "ocemotion_logits": ocemotion_dense(pooled_output), "ocnli_logits": ocnli_dense(pooled_output), "label_ids": label_ids } return ret return logits, label_ids
def build_logits(self, features, mode=None): preprocessor = preprocessors.get_preprocessor( self.pretrain_model_name_or_path) model = model_zoo.get_pretrained_model( self.pretrain_model_name_or_path) global_step = tf.train.get_or_create_global_step() tnews_dense = layers.Dense( 15, kernel_initializer=layers.get_initializer(0.02), name='tnews_dense') ocemotion_dense = layers.Dense( 7, kernel_initializer=layers.get_initializer(0.02), name='ocemotion_dense') ocnli_dense = layers.Dense( 3, kernel_initializer=layers.get_initializer(0.02), name='ocnli_dense') input_ids, input_mask, segment_ids, label_ids = preprocessor(features) outputs_tnews = model([input_ids[0], input_mask[0], segment_ids[0]], mode=mode) pooled_output_tnews = outputs_tnews[1] if mode == tf.estimator.ModeKeys.TRAIN: pooled_output_tnews = tf.nn.dropout(pooled_output_tnews, keep_prob=0.2) logits_tnews = tnews_dense(pooled_output_tnews) outputs_ocemotion = model( [input_ids[1], input_mask[1], segment_ids[1]], mode=mode) pooled_output_ocemotion = outputs_ocemotion[1] if mode == tf.estimator.ModeKeys.TRAIN: pooled_output_ocemotion = tf.nn.dropout(pooled_output_ocemotion, keep_prob=0.2) logits_ocemotion = ocemotion_dense(pooled_output_ocemotion) outputs_ocnli = model([input_ids[2], input_mask[2], segment_ids[2]], mode=mode) pooled_output_ocnli = outputs_ocnli[1] if mode == tf.estimator.ModeKeys.TRAIN: pooled_output_ocnli = tf.nn.dropout(pooled_output_ocnli, keep_prob=0.5) logits_ocnli = ocnli_dense(pooled_output_ocnli) return [logits_tnews, logits_ocemotion, logits_ocnli], [label_ids[0], label_ids[1], label_ids[2]]
def build_logits(self, features, mode=None): bert_preprocessor = preprocessors.get_preprocessor(self.pretrain_model_name_or_path) model = model_zoo.get_pretrained_model(self.pretrain_model_name_or_path) input_ids, input_mask, segment_ids = bert_preprocessor(features)[:3] outputs = model([input_ids, input_mask, segment_ids], mode=mode) pooled_output = outputs[1] if mode == tf.estimator.ModeKeys.PREDICT: ret = { "pooled_output": pooled_output } return ret
def build_logits(self, features, mode=None): """ Building graph of BERT Text Comprehension Args: features (`OrderedDict`): A dict mapping raw input to tensors mode (`bool): tell the model whether it is under training Returns: logits (`tuple`): (start_logits, end_logits), The output after the last dense layer. Two tensor of Shape [None, num_labels] label_ids (`tuple`): (start_positions, end_positions). Two tensor of shape [None] """ preprocessor = preprocessors.get_preprocessor( self.config.pretrain_model_name_or_path, app_model_name="text_comprehension_bert_hae", user_defined_config=self.config) input_ids, input_mask, segment_ids, history_answer_marker, start_positions, end_positions =\ preprocessor(features) bert_backbone = BertHAEPretrainedModel.get( self.config.pretrain_model_name_or_path) tmp = [input_ids, input_mask, segment_ids, history_answer_marker] sequence_output, _ = bert_backbone(tmp, mode=mode) seq_length = self.config.sequence_length hidden_size = int(sequence_output.shape[2]) output_weights = tf.get_variable( "app/output_weights", [2, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("app/output_bias", [2], initializer=tf.zeros_initializer()) final_hidden_matrix = tf.reshape(sequence_output, [-1, hidden_size]) logits = tf.matmul(final_hidden_matrix, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) logits = tf.reshape(logits, [-1, seq_length, 2]) logits = tf.transpose(logits, [2, 0, 1]) unstacked_logits = tf.unstack(logits, axis=0) (start_logits, end_logits) = (unstacked_logits[0], unstacked_logits[1]) self.check_and_init_from_checkpoint(mode) return (start_logits, end_logits), (start_positions, end_positions)
def get_default_preprocessor(self): if hasattr(self.config, "model_name"): app_model_name = self.config.model_name else: app_model_name = None if app_model_name == "feat_ext_bert": app_model_name = "text_classify_bert" return preprocessors.get_preprocessor( self.config.pretrain_model_name_or_path, thread_num=self.thread_num, input_queue=queue.Queue(), output_queue=queue.Queue(), preprocess_batch_size=self.config.predict_batch_size, user_defined_config=self.config, app_model_name=app_model_name)
def build_logits(self, features, mode=None): preprocessor = preprocessors.get_preprocessor( self.pretrain_model_name_or_path, user_defined_config=self.config) model = model_zoo.get_pretrained_model( self.pretrain_model_name_or_path) dense = layers.Dense(self.num_labels, kernel_initializer=layers.get_initializer(0.02), name='dense') input_ids, input_mask, segment_ids, label_ids = preprocessor(features) outputs = model([input_ids, input_mask, segment_ids], mode=mode) pooled_output = outputs[1] logits = dense(pooled_output) return logits, label_ids
def build_logits(self, features, mode=None): preprocessor = preprocessors.get_preprocessor( self.pretrain_model_name_or_path) model = model_zoo.get_pretrained_model( self.pretrain_model_name_or_path) input_ids, input_mask, segment_ids, label_ids, texts, domains, labels = preprocessor( features) outputs = model([input_ids, input_mask, segment_ids], mode=mode) pooled_output = outputs[1] ret = { "pooled_output": pooled_output, "text": texts, "domain": domains, "label": labels } return ret
def build_logits(self, features, mode=None): # 负责对原始数据进行预处理,生成模型需要的特征,比如:input_ids, input_mask, segment_ids等 preprocessor = preprocessors.get_preprocessor(self.pretrain_model_name_or_path, user_defined_config=self.user_defined_config) # 负责构建网络的backbone model = model_zoo.get_pretrained_model(self.pretrain_model_name_or_path) dense = layers.Dense(self.num_labels, kernel_initializer=layers.get_initializer(0.02), name='dense') input_ids, input_mask, segment_ids, label_ids = preprocessor(features) _, pooled_output = model([input_ids, input_mask, segment_ids], mode=mode) logits = dense(pooled_output) return logits, label_ids
def build_logits(self, features, mode=None): preprocessor = preprocessors.get_preprocessor( self.pretrain_model_name_or_path) model = model_zoo.get_pretrained_model( self.pretrain_model_name_or_path) dense = layers.Dense(self.num_labels, kernel_initializer=layers.get_initializer(0.02), name='dense') input_ids, input_mask, segment_ids, label_ids = preprocessor(features) outputs = model([input_ids, input_mask, segment_ids], mode=mode) pooled_output = outputs[1] logits = dense(pooled_output) if mode == tf.estimator.ModeKeys.PREDICT: ret = {"logits": logits} return ret return logits, label_ids
def build_logits(self, features, mode=None): preprocessor = preprocessors.get_preprocessor( self.pretrain_model_name_or_path, is_paired=True) model = model_zoo.get_pretrained_model( self.pretrain_model_name_or_path) input_ids_a, input_mask_a, segment_ids_a, input_ids_b, input_mask_b, segment_ids_b, label_ids, texts1, texts2, domains, labels = preprocessor( features) outputs = model([ input_ids_a, input_mask_a, segment_ids_a, input_ids_b, input_mask_b, segment_ids_b ], mode=mode) pooled_output = outputs[1] ret = { "pooled_output": pooled_output, "text1": texts1, "text2": texts2, "domain": domains, "label": labels } return ret
def build_logits(self, features, mode=None): is_training = (mode == tf.estimator.ModeKeys.TRAIN) preprocessor = preprocessors.get_preprocessor( self.pretrain_model_name_or_path, user_defined_config=self.config) bert_backbone = model_zoo.get_pretrained_model( self.config.pretrain_model_name_or_path) dense = layers.Dense(self.num_labels, kernel_initializer=layers.get_initializer(0.02), name='dense') input_ids, input_mask, segment_ids, label_ids, domains, weights = preprocessor( features) self.domains = domains self.weights = weights hidden_size = bert_backbone.config.hidden_size self.domain_logits = dict() bert_model = bert_backbone.bert embedding_output = bert_model.embeddings([input_ids, segment_ids], training=is_training) attention_mask = layers.get_attn_mask_bert(input_ids, input_mask) encoder_outputs = bert_model.encoder( [embedding_output, attention_mask], training=is_training) encoder_outputs = encoder_outputs[0] pooled_output = bert_model.pooler(encoder_outputs[-1][:, 0]) if mode == tf.estimator.ModeKeys.TRAIN: pooled_output = tf.nn.dropout(pooled_output, keep_prob=0.9) with tf.variable_scope("mft", reuse=tf.AUTO_REUSE): # add domain network logits = dense(pooled_output) domains = tf.squeeze(domains) domain_embedded_matrix = tf.get_variable( "domain_projection", [num_domains, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) domain_embedded = tf.nn.embedding_lookup(domain_embedded_matrix, domains) for layer_index in layer_indexes: content_tensor = tf.reduce_mean(encoder_outputs[layer_index], axis=1) content_tensor_with_domains = domain_embedded + content_tensor domain_weights = tf.get_variable( "domain_weights", [num_domains, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) domain_bias = tf.get_variable( "domain_bias", [num_domains], initializer=tf.zeros_initializer()) current_domain_logits = tf.matmul(content_tensor_with_domains, domain_weights, transpose_b=True) current_domain_logits = tf.nn.bias_add(current_domain_logits, domain_bias) self.domain_logits["domain_logits_" + str(layer_index)] = current_domain_logits return logits, label_ids
def build_logits(self, features, mode=None): bert_preprocessor = preprocessors.get_preprocessor( self.config.tokenizer_name_or_path) input_ids, input_mask, segment_ids, label_ids = bert_preprocessor( features) return input_ids, input_mask, segment_ids, label_ids