Python nq_examples_iter 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: bert_utils

메소드/함수: nq_examples_iter

hotexamples.com에서의 예제들: 4

Python nq_examples_iter - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 bert_utils.nq_examples_iter에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: bert_joint_baseline.py 프로젝트: lorarjohns/kaggle-tf2qa

def write_eval_records(filepath: Path):
    eval_writer = bert_utils.FeatureWriter(filename=str(filepath),
                                           is_training=False)
    tokenizer = tokenization.FullTokenizer(vocab_file=str(DATASET_PATH /
                                                          'vocab-nq.txt'),
                                           do_lower_case=True)
    features = []
    convert = bert_utils.ConvertExamples2Features(
        tokenizer=tokenizer,
        is_training=False,
        output_fn=eval_writer.process_feature,
        collect_stat=False)
    n_examples = 0
    for examples in bert_utils.nq_examples_iter(input_file=TEST_FILE,
                                                is_training=False,
                                                tqdm=tqdm.tqdm):
        for example in examples:
            n_examples += convert(example)
    eval_writer.close()
    print('number of test examples: %d, written to file: %d' %
          (n_examples, eval_writer.num_features))

예제 #2

파일 보기

파일: bert_joint.py 프로젝트: RitheshRn/OpenDomainQA

    eval_writer = bert_utils.FeatureWriter(filename=os.path.join(eval_records),
                                           is_training=False)

    tokenizer = tokenization.FullTokenizer(vocab_file='vocab-nq.txt',
                                           do_lower_case=True)

    features = []
    convert = bert_utils.ConvertExamples2Features(
        tokenizer=tokenizer,
        is_training=False,
        output_fn=eval_writer.process_feature,
        collect_stat=False)

    n_examples = 0
    for examples in bert_utils.nq_examples_iter(input_file=nq_test_file,
                                                is_training=False,
                                                tqdm=None):
        for example in examples:
            n_examples += convert(example)

    eval_writer.close()
#     print('number of test examples: %d, written to file: %d' % (n_examples,eval_writer.num_features))

seq_length = bert_utils.FLAGS.max_seq_length  #config['max_position_embeddings']
name_to_features = {
    "unique_id": tf.io.FixedLenFeature([], tf.int64),
    "input_ids": tf.io.FixedLenFeature([seq_length], tf.int64),
    "input_mask": tf.io.FixedLenFeature([seq_length], tf.int64),
    "segment_ids": tf.io.FixedLenFeature([seq_length], tf.int64),
}

예제 #3

파일 보기

    tokenizer = tokenization.FullTokenizer(
        vocab_file='../input/bert-joint-baseline/vocab-nq.txt',
        do_lower_case=True)

    features = []
    convert = bert_utils.ConvertExamples2Features(
        tokenizer=tokenizer,
        is_training=False,
        output_fn=eval_writer.process_feature,
        collect_stat=False)

    n_examples = 0
    tqdm_notebook = tqdm.tqdm_notebook if not on_kaggle_server else None
    for examples in bert_utils.nq_examples_iter(input_file=nq_test_file,
                                                is_training=False,
                                                tqdm=tqdm_notebook):
        for example in examples:
            n_examples += convert(example)

    eval_writer.close()
    print('number of test examples: %d, written to file: %d' %
          (n_examples, eval_writer.num_features))
seq_length = FLAGS.max_seq_length  #config['max_position_embeddings']
name_to_features = {
    "unique_id": tf.io.FixedLenFeature([], tf.int64),
    "input_ids": tf.io.FixedLenFeature([seq_length], tf.int64),
    "input_mask": tf.io.FixedLenFeature([seq_length], tf.int64),
    "segment_ids": tf.io.FixedLenFeature([seq_length], tf.int64),
}

예제 #4

파일 보기

파일: nq-predict.py 프로젝트: Tyushang/natural-questions

if not url_exists(NQ_TEST_TFRECORD_PATH):
    # tf2baseline.F.max_seq_length = 512
    eval_writer = bert_utils.FeatureWriter(filename=NQ_TEST_TFRECORD_PATH,
                                           is_training=False)
    tokenizer = tokenization.FullTokenizer(vocab_file=VOCAB_PATH,
                                           do_lower_case=True)
    features = []
    convert = bert_utils.ConvertExamples2Features(tokenizer=tokenizer,
                                                  is_training=False,
                                                  output_fn=eval_writer.process_feature,
                                                  collect_stat=False)
    n_examples = 0
    # tqdm_notebook = tqdm.tqdm_notebook  # if not on_kaggle_server else None
    for examples in bert_utils.nq_examples_iter(input_file=NQ_TEST_JSONL_PATH,
                                                is_training=False,
                                                tqdm=tqdm):
        for example in examples:
            n_examples += convert(example)
    eval_writer.close()
    print('number of test examples: %d, written to file: %d' % (n_examples, eval_writer.num_features))

#%%

raw_ds = tf.data.TFRecordDataset(NQ_TEST_TFRECORD_PATH)
decoded_ds = raw_ds.map(_decode_record)
batched_ds = decoded_ds.batch(batch_size=BATCH_SIZE, drop_remainder=(TPU is not None))

result = model.predict(batched_ds, verbose=1)

#%%