def __init__(self, num_classes, ssdr_config, core_model, seq_length, is_training): super(WSSDRWrapperInterface, self).__init__() placeholder = tf.compat.v1.placeholder bert_config = BertConfig.from_json_file(os.path.join(data_path, "bert_config.json")) def_max_length = FLAGS.max_def_length loc_max_length = FLAGS.max_loc_length tf_logging.debug("WSSDRWrapper init()") tf_logging.debug("seq_length %d" % seq_length) tf_logging.debug("def_max_length %d" % def_max_length) tf_logging.debug("loc_max_length %d" % loc_max_length) self.input_ids = placeholder(tf.int64, [None, seq_length], name="input_ids") self.input_mask_ = placeholder(tf.int64, [None, seq_length], name="input_mask") self.segment_ids = placeholder(tf.int64, [None, seq_length], name="segment_ids") self.d_location_ids = placeholder(tf.int64, [None, loc_max_length], name="d_location_ids") self.d_input_ids = placeholder(tf.int64, [None, def_max_length], name="d_input_ids") self.d_input_mask = placeholder(tf.int64, [None, def_max_length], name="d_input_mask") self.d_segment_ids = placeholder(tf.int64, [None, def_max_length], name="d_segment_ids") self.ab_mapping = placeholder(tf.int64, [None, 1], name="ab_mapping") if ssdr_config.use_ab_mapping_mask: self.ab_mapping_mask = placeholder(tf.int64, [None, FLAGS.def_per_batch], name="ab_mapping_mask") else: self.ab_mapping_mask = None # [batch,seq_len], 1 if the indices in d_locations_id y_lookup = get_y_lookup_from_location_ids(self.d_location_ids, seq_length) self.y_cls = placeholder(tf.int64, [None]) self.network = core_model( config=bert_config, ssdr_config=ssdr_config, is_training=is_training, input_ids=self.input_ids, input_mask=self.input_mask_, token_type_ids=self.segment_ids, d_input_ids=self.d_input_ids, d_input_mask=self.d_input_mask, d_segment_ids=self.d_segment_ids, d_location_ids=self.d_location_ids, ab_mapping=self.ab_mapping, ab_mapping_mask=self.ab_mapping_mask, use_one_hot_embeddings=False, ) self.cls_logits = keras.layers.Dense(num_classes)(self.network.get_pooled_output()) self.cls_loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.cls_logits, labels=self.y_cls) self.cls_loss = tf.reduce_mean(self.cls_loss_arr) self.lookup_logits = keras.layers.Dense(2)(self.network.get_sequence_output()) self.lookup_p_at_1 = tf_module.p_at_1(self.lookup_logits[:,:, 1], y_lookup) self.lookup_loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.lookup_logits, labels=y_lookup) self.y_lookup = y_lookup self.lookup_loss_per_example = tf.reduce_sum(self.lookup_loss_arr, axis=-1) self.lookup_loss = tf.reduce_mean(self.lookup_loss_per_example) self.acc = tf_module.accuracy(self.cls_logits, self.y_cls)
def main(_): bert_config = BertConfig.from_json_file(FLAGS.bert_config_file) train_config = TrainConfigEx.from_flags(FLAGS) input_files = get_input_files_from_flags(FLAGS) input_fn = input_fn_builder(input_files, FLAGS, False) model_fn = model_fn_lm(bert_config, train_config, BertModel) return run_estimator(model_fn, input_fn)
def run_classification_w_second_input(): input_files = get_input_files_from_flags(FLAGS) bert_config = BertConfig.from_json_file(FLAGS.bert_config_file) train_config = TrainConfigEx.from_flags(FLAGS) show_input_files(input_files) model_fn = model_fn_classification( bert_config, train_config, ) input_fn = input_fn_builder_use_second_input(FLAGS) if FLAGS.do_predict: tf_logging.addFilter(MuteEnqueueFilter()) result = run_estimator(model_fn, input_fn) return result
def __init__(self, num_classes, seq_length, is_training): super(DictReaderWrapper, self).__init__() placeholder = tf.compat.v1.placeholder bert_config = BertConfig.from_json_file(os.path.join(data_path, "bert_config.json")) def_max_length = FLAGS.max_def_length loc_max_length = FLAGS.max_loc_length tf_logging.debug("DictReaderWrapper init()") tf_logging.debug("seq_length %d" % seq_length) tf_logging.debug("def_max_length %d" % def_max_length) tf_logging.debug("loc_max_length %d" % loc_max_length) self.input_ids = placeholder(tf.int64, [None, seq_length]) self.input_mask_ = placeholder(tf.int64, [None, seq_length]) self.segment_ids = placeholder(tf.int64, [None, seq_length]) self.d_input_ids = placeholder(tf.int64, [None, def_max_length]) self.d_input_mask = placeholder(tf.int64, [None, def_max_length]) self.d_location_ids = placeholder(tf.int64, [None, loc_max_length]) self.y_cls = placeholder(tf.int64, [None]) self.y_lookup = placeholder(tf.int64, [None, seq_length]) self.network = DictReaderModel( config=bert_config, d_config=bert_config, is_training=is_training, input_ids=self.input_ids, input_mask=self.input_mask_, d_input_ids=self.d_input_ids, d_input_mask=self.d_input_mask, d_location_ids=self.d_location_ids, use_target_pos_emb=True, token_type_ids=self.segment_ids, use_one_hot_embeddings=False, ) self.cls_logits = keras.layers.Dense(num_classes)(self.network.pooled_output) self.cls_loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.cls_logits, labels=self.y_cls) self.cls_loss = tf.reduce_mean(self.cls_loss_arr) self.lookup_logits = keras.layers.Dense(2)(self.network.sequence_output) self.lookup_loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.lookup_logits, labels=self.y_lookup) self.lookup_loss_per_example = tf.reduce_mean(self.lookup_loss_arr, axis=-1) self.lookup_loss = tf.reduce_mean(self.lookup_loss_per_example) self.acc = tf_module.accuracy(self.cls_logits, self.y_cls)
def run_w_data_id(): input_files = get_input_files_from_flags(FLAGS) bert_config = BertConfig.from_json_file(FLAGS.bert_config_file) train_config = TrainConfigEx.from_flags(FLAGS) show_input_files(input_files) model_fn = model_fn_classification_weighted_loss( bert_config, train_config, ) if FLAGS.do_predict: tf_logging.addFilter(CounterFilter()) input_fn = input_fn_builder_classification_w_data_id( input_files=input_files, flags=FLAGS, is_training=FLAGS.do_train) result = run_estimator(model_fn, input_fn) return result
def main(_): input_files = get_input_files_from_flags(FLAGS) bert_config = BertConfig.from_json_file(FLAGS.bert_config_file) train_config = TrainConfigEx.from_flags(FLAGS) show_input_files(input_files) special_flags = FLAGS.special_flags.split(",") model_fn = model_fn_sensitivity( bert_config=bert_config, train_config=train_config, model_class=BertModel, special_flags=special_flags, ) if FLAGS.do_predict: tf_logging.addFilter(MuteEnqueueFilter()) input_fn = input_fn_builder_use_second_input(FLAGS) result = run_estimator(model_fn, input_fn) return result
def run_w_data_id(): input_files = get_input_files_from_flags(FLAGS) bert_config = BertConfig.from_json_file(FLAGS.bert_config_file) train_config = TrainConfigEx.from_flags(FLAGS) show_input_files(input_files) special_flags = FLAGS.special_flags.split(",") model_fn = model_fn_classification( bert_config=bert_config, train_config=train_config, model_class=BertModel, special_flags=special_flags, ) if FLAGS.do_predict: tf_logging.addFilter(CounterFilter()) input_fn = input_fn_builder_classification_w_data_ids_typo( input_files=input_files, flags=FLAGS, is_training=FLAGS.do_train) result = run_estimator(model_fn, input_fn) return result
def main(_): input_files = get_input_files_from_flags(FLAGS) bert_config = BertConfig.from_json_file(FLAGS.bert_config_file) train_config = TrainConfigEx.from_flags(FLAGS) show_input_files(input_files) special_flags = FLAGS.special_flags.split(",") model_fn = model_fn_classification( bert_config=bert_config, train_config=train_config, model_class=FreezeEmbedding, special_flags=special_flags, ) input_fn = input_fn_builder_classification_w_data_id( input_files=input_files, flags=FLAGS, is_training=FLAGS.do_train) result = run_estimator(model_fn, input_fn) return result
def main(_): input_files = get_input_files_from_flags(FLAGS) bert_config = BertConfig.from_json_file(FLAGS.bert_config_file) train_config = TrainConfigEx.from_flags(FLAGS) show_input_files(input_files) special_flags = FLAGS.special_flags.split(",") def override_prediction_fn(predictions, model): predictions['vector'] = model.get_output() return predictions model_fn = model_fn_classification( bert_config=bert_config, train_config=train_config, model_class=MultiEvidenceUseFirst, special_flags=special_flags, override_prediction_fn=override_prediction_fn) if FLAGS.do_predict: tf_logging.addFilter(CounterFilter()) input_fn = input_fn_builder_use_second_input(FLAGS) result = run_estimator(model_fn, input_fn) return result
def __init__(self): config = BertConfig.from_json_file( os.path.join(data_path, "bert_config.json")) self.attention_probs_list = [] input_ids = tf.constant([[101] + [100] * 511]) token_type_ids = tf.constant([[0] * 512]) input_mask = tf.constant([[1] * 512]) attention_mask = create_attention_mask_from_input_mask( input_ids, input_mask) initializer = create_initializer(config.initializer_range) scope = None with tf.compat.v1.variable_scope(scope, default_name="bert"): with tf.compat.v1.variable_scope("embeddings"): # Perform embedding lookup on the word ids. (self.embedding_output, self.embedding_table) = embedding_lookup( input_ids=input_ids, vocab_size=config.vocab_size, embedding_size=config.hidden_size, initializer_range=config.initializer_range, word_embedding_name="word_embeddings", use_one_hot_embeddings=False) # Add positional embeddings and token type embeddings, then layer # normalize and perform dropout. self.embedding_output = embedding_postprocessor( input_tensor=self.embedding_output, use_token_type=True, token_type_ids=token_type_ids, token_type_vocab_size=config.type_vocab_size, token_type_embedding_name="token_type_embeddings", use_position_embeddings=True, position_embedding_name="position_embeddings", initializer_range=config.initializer_range, max_position_embeddings=config.max_position_embeddings, dropout_prob=config.hidden_dropout_prob) prev_output = reshape_to_matrix(self.embedding_output) with tf.compat.v1.variable_scope("encoder"): for layer_idx in range(12): with tf.compat.v1.variable_scope("layer_%d" % layer_idx): layer_input = prev_output with tf.compat.v1.variable_scope("attention"): attention_heads = [] with tf.compat.v1.variable_scope("self"): attention_head = self.attention_fn(layer_input) attention_heads.append(attention_head) attention_output = None if len(attention_heads) == 1: attention_output = attention_heads[0] else: # In the case where we have other sequences, we just concatenate # them to the self-attention head before the projection. attention_output = tf.concat(attention_heads, axis=-1) # Run a linear projection of `hidden_size` then add a residual # with `layer_input`. with tf.compat.v1.variable_scope("output"): attention_output = dense( hidden_size, initializer)(attention_output) attention_output = layer_norm( attention_output + layer_input) # The activation is only applied to the "intermediate" hidden layer. with tf.compat.v1.variable_scope("intermediate"): intermediate_output = dense( config.intermediate_size, initializer, activation=gelu)(attention_output) # Down-project back to `hidden_size` then add the residual. with tf.compat.v1.variable_scope("output"): layer_output = dense( hidden_size, initializer)(intermediate_output) layer_output = layer_norm(layer_output + attention_output) prev_output = layer_output