def test_to_batch(self): r = Resource(DATA_ROOT, ["sentiment", "text", "score"], "sentiment") X, y = r.to_batch("train") self.assertEqual(X.shape, (4, 2)) self.assertEqual(y.shape, (4, 1)) r.make_vocab() r.column("text").as_word_seq(fixed_len=5) X, y = r.to_batch("train", columns=("sentiment", "text")) self.assertEqual(X.shape, (4, 5, len(r.vocab)))
def test_to_batch_iter(self): r = Resource(DATA_ROOT, ["sentiment", "text", "score"], "sentiment") r.make_vocab() batch_size = 2 fixed_len = 5 r.column("text").as_word_seq(fixed_len=fixed_len) iterator, count = r.to_batch_iter("train", columns=("sentiment", "text"), batch_size=batch_size) self.assertEqual(count, batch_size) for i in range(4): X, y = next(iterator) self.assertEqual(y.shape, (batch_size, 1)) self.assertEqual(X.shape, (batch_size, fixed_len, len(r.vocab))) print(r.column("text").back(X))