Python BPE Examples

Programming Language: Python

Namespace/Package Name: preprocess.bpe

Class/Type: BPE

Examples at hotexamples.com: 3

Python BPE - 3 examples found. These are the top rated real world Python examples of preprocess.bpe.BPE extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

BPE(3)

load_state_dict(2)

build_base_vocab(1)

build_vocab(1)

learn_bpe(1)

Example #1

Show file

File: bpe_setup.py Project: jeffdshen/squad

def preprocess(args):
    # Process training set and use it to construct bpe
    bpe_state_dict = try_load(args.bpe_file)
    if bpe_state_dict is None:
        bpe = process_bpe_file(args.train_file, "train", args.max_tokens)
        save(args.bpe_file, bpe.state_dict(), "BPE")
    else:
        bpe = BPE()
        bpe.load_state_dict(bpe_state_dict)

    train_examples, train_eval = process_file(args.train_file, "train", bpe)
    train_meta = build_features(train_examples, "train",
                                args.train_record_file)
    save(args.train_eval_file, train_eval, message="train eval")
    save(args.train_meta_file, train_meta, message="train meta")

    # Process dev and test sets
    dev_examples, dev_eval = process_file(args.dev_file, "dev", bpe)
    dev_meta = build_features(dev_examples, "dev", args.dev_record_file)
    save(args.dev_eval_file, dev_eval, message="dev eval")
    save(args.dev_meta_file, dev_meta, message="dev meta")

    if args.include_test_examples:
        test_examples, test_eval = process_file(args.test_file, "test", bpe)
        test_meta = build_features(test_examples, "test",
                                   args.test_record_file)
        save(args.test_eval_file, test_eval, message="test eval")
        save(args.test_meta_file, test_meta, message="test meta")

Example #2

Show file

File: bpe_setup.py Project: jeffdshen/squad

def process_bpe_file(filename, data_type, max_length):
    print(f"Getting vocab from {data_type} examples...")
    lines = Counter()
    with open(filename, "r") as file:
        source = json.load(file)
        for article in tqdm(source["data"]):
            for paragraph in article["paragraphs"]:
                context = paragraph["context"]
                qas = paragraph["qas"]

                # Weight by number of questions it has
                lines[context] += len(qas)
                for qa in qas:
                    question = qa["question"]
                    lines[question] += 1

    bpe = BPE()
    bpe.build_base_vocab()
    bpe.build_vocab(lines)
    print("Learning bpe on {} words for {}".format(len(bpe.vocab), data_type))
    bpe.learn_bpe(max_length)
    return bpe

Example #3

Show file

def get_bpe(args):
    bpe = BPE()
    with open(args.bpe_file, "r") as file:
        bpe.load_state_dict(json.load(file))
    add_special_tokens(args)
    return bpe