def build_legacy_torchtext_vocab_pipeline(vocab_file): tokenizer = get_tokenizer("basic_english") from torchtext.vocab import build_vocab_from_iterator def token_iterator(vocab_file): f = open(vocab_file, 'r') for line in f: for token in line: yield token vocab = build_vocab_from_iterator(token_iterator(vocab_file)) pipeline = sequential_transforms(tokenizer, vocab_func(vocab)) return pipeline, None, None
def build_legacy_batch_torchtext_vocab_pipeline(vocab_file): tokenizer = get_tokenizer("basic_english") from torchtext.vocab import build_vocab_from_iterator from transforms import TextClassificationPipeline def token_iterator(vocab_file): f = open(vocab_file, 'r') for line in f: for token in line: yield token vocab = build_vocab_from_iterator(token_iterator(vocab_file)) text_pipeline = sequential_transforms(tokenizer, vocab_func(vocab)) label_pipeline = totensor(dtype=torch.long) return TextClassificationPipeline(label_pipeline, text_pipeline), None, None