Ejemplo n.º 1
0
 def test_initialize_word_tensorizer(self):
     tensorizer = WordTensorizer(column="text")
     init = tensorizer.initialize()
     init.send(None)  # kick
     for row in self.data.train:
         init.send(row)
     init.close()
     self.assertEqual(49, len(tensorizer.vocab))
Ejemplo n.º 2
0
    def test_create_word_tensors(self):
        tensorizer = WordTensorizer(text_column="text")
        init = tensorizer.initialize()
        init.send(None)  # kick
        for row in self.data.train:
            init.send(row)
        init.close()

        rows = [{"text": "I want some coffee"}, {"text": "Turn it up"}]
        tensors = (tensorizer.numberize(row) for row in rows)
        tokens, seq_len = next(tensors)
        self.assertEqual([24, 0, 0, 0], tokens)
        self.assertEqual(4, seq_len)

        tokens, seq_len = next(tensors)
        self.assertEqual([13, 47, 9], tokens)
        self.assertEqual(3, seq_len)
Ejemplo n.º 3
0
    def test_create_word_tensors(self):
        tensorizer = WordTensorizer(column="text")
        init = tensorizer.initialize()
        init.send(None)  # kick
        for row in self.data.train:
            init.send(row)
        init.close()

        batch = [
            {"text": types.Text("I want some coffee")},
            {"text": types.Text("Turn it up")},
        ]

        tokens, seq_lens = tensorizer.create_training_tensors(batch)
        self.assertIsInstance(tokens, torch.LongTensor)
        self.assertIsInstance(seq_lens, torch.LongTensor)
        self.assertEqual((2, 4), tokens.size())
        self.assertEqual((2,), seq_lens.size())
        self.assertEqual([[24, 0, 0, 0], [13, 47, 9, 1]], tokens.tolist())
        self.assertEqual([4, 3], seq_lens.tolist())