Python json2features 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: preprocess.DRCD_preprocess

메소드/함수: json2features

hotexamples.com에서의 예제들: 4

Python json2features - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 preprocess.DRCD_preprocess.json2features에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

    # load the bert setting
    if 'albert' not in args.bert_config_file:
        bert_config = BertConfig.from_json_file(args.bert_config_file)
    else:
        if 'google' in args.bert_config_file:
            bert_config = AlbertConfig.from_json_file(args.bert_config_file)
        else:
            bert_config = ALBertConfig.from_json_file(args.bert_config_file)

    # load data
    print('loading data...')
    tokenizer = tokenization.BertTokenizer(vocab_file=args.vocab_file, do_lower_case=True)
    assert args.vocab_size == len(tokenizer.vocab)

    if not os.path.exists(args.test_dir1) or not os.path.exists(args.test_dir2):
        json2features(args.test_file, [args.test_dir1, args.test_dir2], tokenizer, is_training=False,
                      max_seq_length=args.max_seq_length)

    if not os.path.exists(args.test_dir1):
        json2features(input_file=args.test_file, output_files=[args.test_dir1, args.test_dir2],
                      tokenizer=tokenizer, is_training=False, repeat_limit=3, max_query_length=96,
                      max_seq_length=args.max_seq_length, doc_stride=128)
    test_examples = json.load(open(args.test_dir1, 'r'))
    test_features = json.load(open(args.test_dir2, 'r'))

    dev_steps_per_epoch = len(test_features) // args.n_batch
    if len(test_features) % args.n_batch != 0:
        dev_steps_per_epoch += 1

    # init model
    print('init model...')
    if 'albert' not in args.init_restore_dir:

예제 #2

파일 보기

        bert_config = BertConfig.from_json_file(args.bert_config_file)
    else:
        if 'google' in args.bert_config_file:
            bert_config = AlbertConfig.from_json_file(args.bert_config_file)
        else:
            bert_config = ALBertConfig.from_json_file(args.bert_config_file)

    # load data
    print('loading data...')
    tokenizer = tokenization.BertTokenizer(vocab_file=args.vocab_file,
                                           do_lower_case=True)
    assert args.vocab_size == len(tokenizer.vocab)
    if not os.path.exists(args.train_dir):
        json2features(args.train_file, [
            args.train_dir.replace('_features_', '_examples_'), args.train_dir
        ],
                      tokenizer,
                      is_training=True,
                      max_seq_length=args.max_seq_length)

    if not os.path.exists(args.dev_dir1) or not os.path.exists(args.dev_dir2):
        json2features(args.dev_file, [args.dev_dir1, args.dev_dir2],
                      tokenizer,
                      is_training=False,
                      max_seq_length=args.max_seq_length)

    train_features = json.load(open(args.train_dir, 'r'))
    dev_examples = json.load(open(args.dev_dir1, 'r'))
    dev_features = json.load(open(args.dev_dir2, 'r'))
    if os.path.exists(args.log_file):
        os.remove(args.log_file)

예제 #3

파일 보기

파일: DRCD_finetune.py 프로젝트: brightmart/bert_cn_finetune

    # load the bert setting
    if 'albert' not in args.bert_config_file:
        bert_config = BertConfig.from_json_file(args.bert_config_file)
    else:
        bert_config = ALBertConfig.from_json_file(args.bert_config_file)

    # load data
    print('loading data...')
    tokenizer = tokenization.BertTokenizer(vocab_file=args.vocab_file,
                                           do_lower_case=True)
    assert args.vocab_size == len(tokenizer.vocab)
    if not os.path.exists(args.train_dir):
        json2features(args.train_file, [
            args.train_dir.replace('_features_', '_examples_'), args.train_dir
        ],
                      tokenizer,
                      is_training=True,
                      max_seq_length=bert_config.max_position_embeddings)

    if not os.path.exists(args.dev_dir1) or not os.path.exists(args.dev_dir2):
        json2features(args.dev_file, [args.dev_dir1, args.dev_dir2],
                      tokenizer,
                      is_training=False,
                      max_seq_length=bert_config.max_position_embeddings)

    train_features = json.load(open(args.train_dir, 'r'))
    dev_examples = json.load(open(args.dev_dir1, 'r'))
    dev_features = json.load(open(args.dev_dir2, 'r'))
    if os.path.exists(args.log_file):
        os.remove(args.log_file)

예제 #4

파일 보기

파일: DRCD_test.py 프로젝트: caihao20/bert_cn_finetune

    # load the bert setting
    if 'albert' not in args.bert_config_file:
        bert_config = BertConfig.from_json_file(args.bert_config_file)
    else:
        bert_config = ALBertConfig.from_json_file(args.bert_config_file)

    # load data
    print('loading data...')
    tokenizer = tokenization.BertTokenizer(vocab_file=args.vocab_file,
                                           do_lower_case=True)
    assert args.vocab_size == len(tokenizer.vocab)

    if not os.path.exists(args.test_dir1) or not os.path.exists(
            args.test_dir2):
        json2features(args.test_file, [args.test_dir1, args.test_dir2],
                      tokenizer,
                      is_training=False,
                      max_seq_length=bert_config.max_position_embeddings)

    test_examples = json.load(open(args.test_dir1, 'r'))
    test_features = json.load(open(args.test_dir2, 'r'))

    dev_steps_per_epoch = len(test_features) // args.n_batch
    if len(test_features) % args.n_batch != 0:
        dev_steps_per_epoch += 1

    # init model
    print('init model...')
    if 'albert' not in args.init_restore_dir:
        model = BertForQuestionAnswering(bert_config)
    else:
        model = ALBertForQA(bert_config, dropout_rate=args.dropout)