def process_feature(self, feature): self.num_features += 1 features = collections.OrderedDict() features["input_ids_a"] = tf_data_utils.create_int_feature( feature.input_ids_a) features["label_ids"] = tf_data_utils.create_int_feature( [feature.label_ids]) try: features["input_char_ids_a"] = tf_data_utils.create_int_feature( feature.input_char_ids_a) except: s = 0 try: features["input_ids_b"] = tf_data_utils.create_int_feature( feature.input_ids_b) except: s = 0 try: features["input_char_ids_b"] = tf_data_utils.create_int_feature( feature.input_char_ids_b) except: s = 0 try: features["label_probs"] = tf_data_utils.create_float_feature( feature.label_probs) except: s = 0 try: features["label_ratio"] = tf_data_utils.create_float_feature( [feature.label_ratio]) except: s = 0 try: features[ "distillation_ratio"] = tf_data_utils.create_float_feature( [feature.distillation_ratio]) except: s = 0 try: features[ "distillation_feature"] = tf_data_utils.create_float_feature( feature.feature) except: s = 0 try: features["adv_ids"] = tf_data_utils.create_int_feature( [feature.adv_ids]) except: s = 0 tf_example = tf.train.Example(features=tf.train.Features( feature=features)) self._writer.write(tf_example.SerializeToString())
def process_feature(self, feature, **kargs): self.num_features += 1 features = collections.OrderedDict() # print(feature.label_probs) features["input_ids_a"] = tf_data_utils.create_int_feature( feature.input_ids_a) if kargs.get("label_type", "multi_class") == "multi_class": features["label_ids"] = tf_data_utils.create_int_feature( [feature.label_ids]) else: features["label_ids"] = tf_data_utils.create_int_feature( feature.label_ids) try: features["input_char_ids_a"] = tf_data_utils.create_int_feature( feature.input_char_ids_a) except: s = 0 try: features["input_ids_b"] = tf_data_utils.create_int_feature( feature.input_ids_b) except: s = 0 try: features["input_char_ids_b"] = tf_data_utils.create_int_feature( feature.input_char_ids_b) except: s = 0 try: features["label_probs"] = tf_data_utils.create_float_feature( feature.label_probs) except: s = 0 try: features["label_ratio"] = tf_data_utils.create_float_feature( [feature.label_ratio]) except: s = 0 try: features[ "distillation_ratio"] = tf_data_utils.create_float_feature( [feature.distillation_ratio]) except: s = 0 try: features[ "distillation_feature"] = tf_data_utils.create_float_feature( feature.feature) except: s = 0 tf_example = tf.train.Example(features=tf.train.Features( feature=features)) self._writer.write(tf_example.SerializeToString())
def process_feature(self, feature): """Write a InputFeature to the TFRecordWriter as a tf.train.Example.""" self.num_features += 1 features = collections.OrderedDict() features["input_ids_a"] = tf_data_utils.create_int_feature( feature.input_ids_a) features["input_mask_a"] = tf_data_utils.create_int_feature( feature.input_mask_a) features["segment_ids_a"] = tf_data_utils.create_int_feature( feature.segment_ids_a) features["input_ids_b"] = tf_data_utils.create_int_feature( feature.input_ids_b) features["input_mask_b"] = tf_data_utils.create_int_feature( feature.input_mask_b) features["segment_ids_b"] = tf_data_utils.create_int_feature( feature.segment_ids_b) features["label_ids"] = tf_data_utils.create_int_feature( [feature.label_ids]) try: features["qas_id"] = tf_data_utils.create_int_feature( [feature.guid]) except: pass try: features["class_ratio"] = tf_data_utils.create_float_feature( [feature.class_ratio]) except: pass tf_example = tf.train.Example(features=tf.train.Features( feature=features)) self._writer.write(tf_example.SerializeToString())
def process_feature(self, feature): self.num_features += 1 features = collections.OrderedDict() features["input_ids"] = tf_data_utils.create_int_feature( feature.input_ids) features["input_mask"] = tf_data_utils.create_int_feature( feature.input_mask) features["segment_ids"] = tf_data_utils.create_int_feature( feature.segment_ids) features["masked_lm_positions"] = tf_data_utils.create_int_feature( feature.masked_lm_positions) features["masked_lm_ids"] = tf_data_utils.create_int_feature( feature.masked_lm_ids) features["masked_lm_weights"] = tf_data_utils.create_float_feature( feature.masked_lm_weights) features["label_ids"] = tf_data_utils.create_int_feature( [feature.label_ids]) try: features["qas_id"] = tf_data_utils.create_int_feature( [feature.guid]) tf_example = tf.train.Example(features=tf.train.Features( feature=features)) self._writer.write(tf_example.SerializeToString()) except: tf_example = tf.train.Example(features=tf.train.Features( feature=features)) self._writer.write(tf_example.SerializeToString())
def main(_): print(FLAGS) print(tf.__version__, "==tensorflow version==") init_checkpoint = os.path.join(FLAGS.buckets, FLAGS.init_checkpoint) train_file = os.path.join(FLAGS.buckets, FLAGS.train_file) dev_file = os.path.join(FLAGS.buckets, FLAGS.dev_file) checkpoint_dir = os.path.join(FLAGS.buckets, FLAGS.model_output) print(init_checkpoint, train_file, dev_file, checkpoint_dir) sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) cluster = {'chief': ['localhost:2221'], 'worker': ['localhost:2222']} try: os.environ['TF_CONFIG'] = json.dumps({ 'cluster': cluster, 'task': { 'type': 'evaluator', 'index': 0 } }) except: print("==not tf config env==") run_config = tf.estimator.RunConfig(keep_checkpoint_max=5, model_dir=checkpoint_dir, session_config=sess_config, save_checkpoints_secs=None, save_checkpoints_steps=None, log_step_count_steps=100) task_index = run_config.task_id is_chief = run_config.is_chief worker_count = 1 print("==worker_count==", worker_count, "==local_rank==", task_index, "==is is_chief==", is_chief) target = "" if FLAGS.mode == "single_task": train_eval_api = train_eval elif FLAGS.mode == "multi_task": train_eval_api = multitask_train_eval if FLAGS.run_type == "estimator": train_eval_api.monitored_estimator(FLAGS=FLAGS, worker_count=worker_count, task_index=task_index, cluster=cluster, is_chief=is_chief, target=target, init_checkpoint=init_checkpoint, train_file=train_file, dev_file=dev_file, checkpoint_dir=checkpoint_dir, run_config=run_config, profiler=FLAGS.profiler, parse_type=FLAGS.parse_type, rule_model=FLAGS.rule_model, train_op=FLAGS.train_op, running_type="eval", input_target=FLAGS.input_target, ues_token_type=FLAGS.ues_token_type, attention_type=FLAGS.attention_type) elif FLAGS.run_type == "sess": result_dict = train_eval_api.monitored_sess( FLAGS=FLAGS, worker_count=worker_count, task_index=task_index, cluster=cluster, is_chief=is_chief, target=target, init_checkpoint=init_checkpoint, train_file=train_file, dev_file=dev_file, checkpoint_dir=checkpoint_dir, run_config=run_config, profiler=FLAGS.profiler, parse_type=FLAGS.parse_type, rule_model=FLAGS.rule_model, train_op=FLAGS.train_op, running_type="eval", input_target=FLAGS.input_target, ues_token_type=FLAGS.ues_token_type, attention_type=FLAGS.attention_type) result_log_file = os.path.join(checkpoint_dir, FLAGS.feature_output) print(result_log_file, "==result log path==") # with tf.gfile.GFile(result_log_file, 'w') as f: # f.write(json.dumps(result_dict)+"\n") writer = tf.python_io.TFRecordWriter(result_log_file) try: for label_id, feature, prob in zip(result_dict["label_ids"], result_dict["feature"], result_dict["prob"]): features = {} features["label_id"] = tf_data_utils.create_int_feature( [label_id]) features["feature"] = tf_data_utils.create_float_feature( feature) features["prob"] = tf_data_utils.create_float_feature(prob) tf_example = tf.train.Example(features=tf.train.Features( feature=features)) writer.write(tf_example.SerializeToString()) writer.close() except: print("===not legal output for writer===")