Beispiel #1
0
    def iters(cls, batch_size: int =32, device: int = 0, root: str ='.data',
              vectors: Tensor = None, **kwargs) -> Tuple[Iterator, Iterator, Iterator]:

        text = Field()
        label = Field(sequential=False)

        train, valid, test = cls.splits(text, label, root=root, **kwargs)

        text.build_vocab(train, vectors=vectors)
        label.build_vocab(train)

        return BucketIterator.splits(
            (train, test), batch_size=batch_size, device=device)
Beispiel #2
0
params = parser.parse_args()

kvs = [(k, v) for k, v in vars(params).items()]
kvs.append(('Device', device))

print_kv_box('Current Configuration', kvs)

if params.mode == 'debug':
    tokenizer = WordToCharTokenizer()
    text_field = Field(tokenize=tokenizer, batch_first=True)
    ds = RandomizedTextWindowDataset(params.dataset,
                                     text_field,
                                     params.window_size,
                                     topk=params.topk,
                                     newline_eos=False)
    text_field.build_vocab(ds)
    train_ds, test_ds = ds.split(0.8)
    iterator = NoisedPreWindowedIterator(train_ds, params.batch_size,
                                         params.window_size, 0.0)
    iterator = PredictMiddleNoisedWindowIterator(iterator, 1)
    for b in iterator:
        print(b)
    i = 1
    # model = MLP(51, 27, 1024, 3)
    # text_field = Field(tokenize=tokenize, batch_first=True)
    # ds = SplittableLanguageModelingDataset(params.dataset, text_field, newline_eos=False)
    # text_field.build_vocab(ds)
    # train, test = ds.split()
    # model = MLP(51, len(text_field.vocab), 1024, 3)
    # iterator = PredictMiddleNoisedWindowIterator(ds, 64, 51, 0.1, 1)
    # for b in iterator: