Example #1
0
def build_legacy_torchtext_vocab_pipeline(vocab_file):
    tokenizer = get_tokenizer("basic_english")
    from torchtext.vocab import build_vocab_from_iterator

    def token_iterator(vocab_file):
        f = open(vocab_file, 'r')
        for line in f:
            for token in line:
                yield token

    vocab = build_vocab_from_iterator(token_iterator(vocab_file))
    pipeline = sequential_transforms(tokenizer, vocab_func(vocab))
    return pipeline, None, None
Example #2
0
def build_legacy_batch_torchtext_vocab_pipeline(vocab_file):
    tokenizer = get_tokenizer("basic_english")
    from torchtext.vocab import build_vocab_from_iterator
    from transforms import TextClassificationPipeline

    def token_iterator(vocab_file):
        f = open(vocab_file, 'r')
        for line in f:
            for token in line:
                yield token

    vocab = build_vocab_from_iterator(token_iterator(vocab_file))
    text_pipeline = sequential_transforms(tokenizer, vocab_func(vocab))
    label_pipeline = totensor(dtype=torch.long)
    return TextClassificationPipeline(label_pipeline, text_pipeline), None, None