Example #1
0
    def process_feature(self, feature):
        self.num_features += 1
        features = collections.OrderedDict()

        features["input_ids_a"] = tf_data_utils.create_int_feature(
            feature.input_ids_a)
        features["label_ids"] = tf_data_utils.create_int_feature(
            [feature.label_ids])

        try:
            features["input_char_ids_a"] = tf_data_utils.create_int_feature(
                feature.input_char_ids_a)
        except:
            s = 0
        try:
            features["input_ids_b"] = tf_data_utils.create_int_feature(
                feature.input_ids_b)
        except:
            s = 0
        try:
            features["input_char_ids_b"] = tf_data_utils.create_int_feature(
                feature.input_char_ids_b)
        except:
            s = 0
        try:
            features["label_probs"] = tf_data_utils.create_float_feature(
                feature.label_probs)

        except:
            s = 0

        try:
            features["label_ratio"] = tf_data_utils.create_float_feature(
                [feature.label_ratio])
        except:
            s = 0
        try:
            features[
                "distillation_ratio"] = tf_data_utils.create_float_feature(
                    [feature.distillation_ratio])
        except:
            s = 0
        try:
            features[
                "distillation_feature"] = tf_data_utils.create_float_feature(
                    feature.feature)
        except:
            s = 0

        try:
            features["adv_ids"] = tf_data_utils.create_int_feature(
                [feature.adv_ids])
        except:
            s = 0

        tf_example = tf.train.Example(features=tf.train.Features(
            feature=features))
        self._writer.write(tf_example.SerializeToString())
Example #2
0
    def process_feature(self, feature, **kargs):
        self.num_features += 1
        features = collections.OrderedDict()

        # print(feature.label_probs)

        features["input_ids_a"] = tf_data_utils.create_int_feature(
            feature.input_ids_a)
        if kargs.get("label_type", "multi_class") == "multi_class":
            features["label_ids"] = tf_data_utils.create_int_feature(
                [feature.label_ids])
        else:
            features["label_ids"] = tf_data_utils.create_int_feature(
                feature.label_ids)

        try:
            features["input_char_ids_a"] = tf_data_utils.create_int_feature(
                feature.input_char_ids_a)
        except:
            s = 0
        try:
            features["input_ids_b"] = tf_data_utils.create_int_feature(
                feature.input_ids_b)
        except:
            s = 0
        try:
            features["input_char_ids_b"] = tf_data_utils.create_int_feature(
                feature.input_char_ids_b)
        except:
            s = 0
        try:
            features["label_probs"] = tf_data_utils.create_float_feature(
                feature.label_probs)

        except:
            s = 0

        try:
            features["label_ratio"] = tf_data_utils.create_float_feature(
                [feature.label_ratio])
        except:
            s = 0
        try:
            features[
                "distillation_ratio"] = tf_data_utils.create_float_feature(
                    [feature.distillation_ratio])
        except:
            s = 0
        try:
            features[
                "distillation_feature"] = tf_data_utils.create_float_feature(
                    feature.feature)
        except:
            s = 0

        tf_example = tf.train.Example(features=tf.train.Features(
            feature=features))
        self._writer.write(tf_example.SerializeToString())
Example #3
0
    def process_feature(self, feature):
        """Write a InputFeature to the TFRecordWriter as a tf.train.Example."""
        self.num_features += 1

        features = collections.OrderedDict()
        features["input_ids_a"] = tf_data_utils.create_int_feature(
            feature.input_ids_a)
        features["input_mask_a"] = tf_data_utils.create_int_feature(
            feature.input_mask_a)
        features["segment_ids_a"] = tf_data_utils.create_int_feature(
            feature.segment_ids_a)
        features["input_ids_b"] = tf_data_utils.create_int_feature(
            feature.input_ids_b)
        features["input_mask_b"] = tf_data_utils.create_int_feature(
            feature.input_mask_b)
        features["segment_ids_b"] = tf_data_utils.create_int_feature(
            feature.segment_ids_b)
        features["label_ids"] = tf_data_utils.create_int_feature(
            [feature.label_ids])
        try:
            features["qas_id"] = tf_data_utils.create_int_feature(
                [feature.guid])
        except:
            pass
        try:
            features["class_ratio"] = tf_data_utils.create_float_feature(
                [feature.class_ratio])
        except:
            pass

        tf_example = tf.train.Example(features=tf.train.Features(
            feature=features))
        self._writer.write(tf_example.SerializeToString())
Example #4
0
    def process_feature(self, feature):
        self.num_features += 1
        features = collections.OrderedDict()

        features["input_ids"] = tf_data_utils.create_int_feature(
            feature.input_ids)
        features["input_mask"] = tf_data_utils.create_int_feature(
            feature.input_mask)
        features["segment_ids"] = tf_data_utils.create_int_feature(
            feature.segment_ids)
        features["masked_lm_positions"] = tf_data_utils.create_int_feature(
            feature.masked_lm_positions)
        features["masked_lm_ids"] = tf_data_utils.create_int_feature(
            feature.masked_lm_ids)
        features["masked_lm_weights"] = tf_data_utils.create_float_feature(
            feature.masked_lm_weights)
        features["label_ids"] = tf_data_utils.create_int_feature(
            [feature.label_ids])

        try:
            features["qas_id"] = tf_data_utils.create_int_feature(
                [feature.guid])
            tf_example = tf.train.Example(features=tf.train.Features(
                feature=features))
            self._writer.write(tf_example.SerializeToString())
        except:
            tf_example = tf.train.Example(features=tf.train.Features(
                feature=features))
            self._writer.write(tf_example.SerializeToString())
Example #5
0
def main(_):

    print(FLAGS)
    print(tf.__version__, "==tensorflow version==")

    init_checkpoint = os.path.join(FLAGS.buckets, FLAGS.init_checkpoint)
    train_file = os.path.join(FLAGS.buckets, FLAGS.train_file)
    dev_file = os.path.join(FLAGS.buckets, FLAGS.dev_file)
    checkpoint_dir = os.path.join(FLAGS.buckets, FLAGS.model_output)

    print(init_checkpoint, train_file, dev_file, checkpoint_dir)

    sess_config = tf.ConfigProto(allow_soft_placement=True,
                                 log_device_placement=True)

    cluster = {'chief': ['localhost:2221'], 'worker': ['localhost:2222']}
    try:
        os.environ['TF_CONFIG'] = json.dumps({
            'cluster': cluster,
            'task': {
                'type': 'evaluator',
                'index': 0
            }
        })
    except:
        print("==not tf config env==")

    run_config = tf.estimator.RunConfig(keep_checkpoint_max=5,
                                        model_dir=checkpoint_dir,
                                        session_config=sess_config,
                                        save_checkpoints_secs=None,
                                        save_checkpoints_steps=None,
                                        log_step_count_steps=100)

    task_index = run_config.task_id
    is_chief = run_config.is_chief
    worker_count = 1

    print("==worker_count==", worker_count, "==local_rank==", task_index,
          "==is is_chief==", is_chief)
    target = ""

    if FLAGS.mode == "single_task":
        train_eval_api = train_eval
    elif FLAGS.mode == "multi_task":
        train_eval_api = multitask_train_eval

    if FLAGS.run_type == "estimator":
        train_eval_api.monitored_estimator(FLAGS=FLAGS,
                                           worker_count=worker_count,
                                           task_index=task_index,
                                           cluster=cluster,
                                           is_chief=is_chief,
                                           target=target,
                                           init_checkpoint=init_checkpoint,
                                           train_file=train_file,
                                           dev_file=dev_file,
                                           checkpoint_dir=checkpoint_dir,
                                           run_config=run_config,
                                           profiler=FLAGS.profiler,
                                           parse_type=FLAGS.parse_type,
                                           rule_model=FLAGS.rule_model,
                                           train_op=FLAGS.train_op,
                                           running_type="eval",
                                           input_target=FLAGS.input_target,
                                           ues_token_type=FLAGS.ues_token_type,
                                           attention_type=FLAGS.attention_type)
    elif FLAGS.run_type == "sess":
        result_dict = train_eval_api.monitored_sess(
            FLAGS=FLAGS,
            worker_count=worker_count,
            task_index=task_index,
            cluster=cluster,
            is_chief=is_chief,
            target=target,
            init_checkpoint=init_checkpoint,
            train_file=train_file,
            dev_file=dev_file,
            checkpoint_dir=checkpoint_dir,
            run_config=run_config,
            profiler=FLAGS.profiler,
            parse_type=FLAGS.parse_type,
            rule_model=FLAGS.rule_model,
            train_op=FLAGS.train_op,
            running_type="eval",
            input_target=FLAGS.input_target,
            ues_token_type=FLAGS.ues_token_type,
            attention_type=FLAGS.attention_type)

        result_log_file = os.path.join(checkpoint_dir, FLAGS.feature_output)
        print(result_log_file, "==result log path==")
        # with tf.gfile.GFile(result_log_file, 'w') as f:
        # 	f.write(json.dumps(result_dict)+"\n")
        writer = tf.python_io.TFRecordWriter(result_log_file)
        try:
            for label_id, feature, prob in zip(result_dict["label_ids"],
                                               result_dict["feature"],
                                               result_dict["prob"]):
                features = {}
                features["label_id"] = tf_data_utils.create_int_feature(
                    [label_id])
                features["feature"] = tf_data_utils.create_float_feature(
                    feature)
                features["prob"] = tf_data_utils.create_float_feature(prob)

                tf_example = tf.train.Example(features=tf.train.Features(
                    feature=features))
                writer.write(tf_example.SerializeToString())
            writer.close()
        except:
            print("===not legal output for writer===")