def test_initialize_word_tensorizer(self): tensorizer = WordTensorizer(column="text") init = tensorizer.initialize() init.send(None) # kick for row in self.data.train: init.send(row) init.close() self.assertEqual(49, len(tensorizer.vocab))
def test_create_word_tensors(self): tensorizer = WordTensorizer(text_column="text") init = tensorizer.initialize() init.send(None) # kick for row in self.data.train: init.send(row) init.close() rows = [{"text": "I want some coffee"}, {"text": "Turn it up"}] tensors = (tensorizer.numberize(row) for row in rows) tokens, seq_len = next(tensors) self.assertEqual([24, 0, 0, 0], tokens) self.assertEqual(4, seq_len) tokens, seq_len = next(tensors) self.assertEqual([13, 47, 9], tokens) self.assertEqual(3, seq_len)
def test_create_word_tensors(self): tensorizer = WordTensorizer(column="text") init = tensorizer.initialize() init.send(None) # kick for row in self.data.train: init.send(row) init.close() batch = [ {"text": types.Text("I want some coffee")}, {"text": types.Text("Turn it up")}, ] tokens, seq_lens = tensorizer.create_training_tensors(batch) self.assertIsInstance(tokens, torch.LongTensor) self.assertIsInstance(seq_lens, torch.LongTensor) self.assertEqual((2, 4), tokens.size()) self.assertEqual((2,), seq_lens.size()) self.assertEqual([[24, 0, 0, 0], [13, 47, 9, 1]], tokens.tolist()) self.assertEqual([4, 3], seq_lens.tolist())