Python convert_examples_to_features Exemples, official.nlp.data.squad_lib.convert_examples_to_features Python Exemples

Exemple #1

0

Afficher le fichier

 def convert_examples_to_features(self, examples, is_training, output_fn,
                                  batch_size):
     """Converts examples to features and write them into TFRecord file."""
     return squad_lib.convert_examples_to_features(
         examples=examples,
         tokenizer=self.tokenizer,
         max_seq_length=self.seq_len,
         doc_stride=self.doc_stride,
         max_query_length=self.query_len,
         is_training=is_training,
         output_fn=output_fn,
         batch_size=batch_size)

Exemple #2

0

Afficher le fichier

Fichier : text_dataloader.py Projet : xiej23/examples

    def _generate_tf_record_from_squad_file(cls,
                                            input_file_path,
                                            tokenizer,
                                            output_path,
                                            is_training,
                                            predict_batch_size=8,
                                            max_seq_length=384,
                                            max_query_length=64,
                                            doc_stride=128,
                                            version_2_with_negative=False):
        """Generates and saves training/validation data into a tf record file."""
        examples = squad_lib.read_squad_examples(
            input_file=input_file_path,
            is_training=is_training,
            version_2_with_negative=version_2_with_negative)
        writer = squad_lib.FeatureWriter(filename=output_path,
                                         is_training=is_training)

        features = []

        def _append_feature(feature, is_padding):
            if not is_padding:
                features.append(feature)
            writer.process_feature(feature)

        if is_training:
            batch_size = None
        else:
            batch_size = predict_batch_size

        number_of_examples = squad_lib.convert_examples_to_features(
            examples=examples,
            tokenizer=tokenizer,
            max_seq_length=max_seq_length,
            doc_stride=doc_stride,
            max_query_length=max_query_length,
            is_training=is_training,
            output_fn=writer.process_feature
            if is_training else _append_feature,
            batch_size=batch_size)
        writer.close()

        meta_data = {
            'size': number_of_examples,
            'version_2_with_negative': version_2_with_negative
        }

        if is_training:
            examples = []
        return meta_data, examples, features

Exemple #3

0

Afficher le fichier

Fichier : main.py Projet : TetianaKlymchuk/BERT_SQUAD

vocab_file = my_bert_layer.resolved_object.vocab_file.asset_path.numpy()
do_lower_case = my_bert_layer.resolved_object.do_lower_case.numpy()
tokenizer = FullTokenizer(vocab_file, do_lower_case)

def _append_feature(feature, is_padding):
    if not is_padding:
        eval_features.append(feature)
    eval_writer.process_feature(feature)


eval_features = []
dataset_size = convert_examples_to_features(
    examples=eval_examples,
    tokenizer=tokenizer,
    max_seq_length=384,
    doc_stride=128,
    max_query_length=64,
    is_training=False,
    output_fn=_append_feature,
    batch_size=4)

eval_writer.close()

BATCH_SIZE = 4

eval_dataset = create_squad_dataset(
    "/content/drive/My Drive/BERT/data/squad/eval.tf_record",
    384,#input_meta_data['max_seq_length'],
    BATCH_SIZE,
    is_training=False)