def test_initialize_list_tensorizers(self): tensorizers = { "intent": LabelListTensorizer( label_column="intent", pad_in_vocab=True, allow_unknown=True ), "goal": LabelListTensorizer(label_column="goal"), } initialize_tensorizers(tensorizers, self.data.train) self.assertEqual(9, len(tensorizers["intent"].vocab)) self.assertEqual(7, len(tensorizers["goal"].vocab))
def test_label_list_tensors_pad_missing(self): ds = SessionPandasDataSource( test_df=pd.DataFrame( # test None and empty case { "session_id": [1, 1, 1, 1], "label": ["positive", "negative", None, ""], } ), schema={"label": List[str]}, id_col="session_id", ) tensorizers = { "label": LabelListTensorizer( pad_missing=True, label_column="label", pad_in_vocab=False, allow_unknown=False, ) } initialize_tensorizers(tensorizers, ds.test) self.assertEqual(2, len(tensorizers["label"].vocab)) # only one row in test data label_idx_list, lens = tensorizers["label"].numberize(next(iter(ds.test))) self.assertEqual([0, 1, -1, -1], label_idx_list) tensorizers["label"].pad_missing = False with self.assertRaises(Exception): tensorizers["label"].numberize(next(iter(ds.test)))
def test_create_label_list_tensors(self): tensorizers = { "intent": LabelListTensorizer(label_column="intent", pad_in_vocab=True, allow_unknown=True) } initialize_tensorizers(tensorizers, self.data.train) tensors = [ tensorizers["intent"].numberize(row) for row in self.data.train ] # test label idx self.assertEqual([2, 3], tensors[0][0]) self.assertEqual([4, 5], tensors[1][0]) self.assertEqual([6, 7, 8], tensors[2][0]) # test seq lens self.assertEqual(2, tensors[0][1]) self.assertEqual(2, tensors[1][1]) self.assertEqual(3, tensors[2][1]) self.assertEqual(3, len(tensors)) tensors, lens = tensorizers["intent"].tensorize(tensors) np.testing.assert_array_almost_equal( np.array([[2, 3, 1], [4, 5, 1], [6, 7, 8]]), tensors.detach().numpy()) np.testing.assert_array_almost_equal(np.array([2, 2, 3]), lens.detach().numpy())
def test_label_list_tensors_no_pad_in_vocab(self): tensorizers = { "intent": LabelListTensorizer( label_column="intent", pad_in_vocab=False, allow_unknown=True ) } initialize_tensorizers(tensorizers, self.data.train) self.assertEqual(8, len(tensorizers["intent"].vocab)) tensors = [] for row in self.data.train: row["intent"].append("unknown") tensors.append(tensorizers["intent"].numberize(row)) tensors, lens = tensorizers["intent"].tensorize(tensors) np.testing.assert_array_almost_equal( np.array([[1, 2, 0, -1], [3, 4, 0, -1], [5, 6, 7, 0]]), tensors.detach().numpy(), )