def test_repeat_1(self): c = "a" c10 = c * 10 s = TString(c) s10 = s.repeat(10) t10 = s.toTensor().repeat(1,1,10) self.assertTensorEqual(t10, s10.toTensor())
def test_concat_4(self): a = "" b = "cat" ab = StringList([a+b]) t_ab = TString(a) + TString(b) self.assertEqual(ab.words, t_ab.words) self.assertTensorEqual(TString(ab).tensor, t_ab.tensor)
def test_slice(self): the_cat = StringList(["The","cat"]) the_cat_ts = TString(the_cat) slices_ts = the_cat_ts[1:3] slices = StringList([s[1:3] for s in the_cat]) expected = TString(slices) self.assertEqual(slices.words, slices_ts.words) self.assertTensorEqual(expected.tensor, slices_ts.tensor)
def test_concat_1(self): a = StringList(["the "]) b = StringList(["cat"]) ab = a + b t_ab = TString(a) + TString(b) print(ab.words) print(t_ab.words) self.assertEqual(ab.words, t_ab.words) self.assertTensorEqual(TString(ab).tensor, t_ab.tensor)
def test_repeat_2(self): c = "a" c10 = c * 10 s = TString(StringList([c,c])) s10 = s.repeat(10) t10 = s.toTensor().repeat(1,1,10) print(s10.toStringList()) self.assertTensorEqual(t10, s10.toTensor()) self.assertListEqual([c10, c10], s10.words)
def test_concat_2(self): a = StringList(["the ", "the "]) b = StringList(["cat", "dog"]) ab = a + b t_ab = TString(a) + TString(b) self.assertEqual(ab.words, t_ab.words) self.assertTensorEqual(TString(ab).tensor, t_ab.tensor) self.assertEqual(len(ab), 7) self.assertEqual(len(t_ab), 7) self.assertEqual(ab.depth, 2) self.assertEqual(t_ab.depth, 2)
def test_export_reload(self): b_1 = SimplePredictor(self.model).pred_binarized( TString(self.text_example), [Catalogue.GENEPROD]) y_1 = self.model(self.x) self.myzip = export_model(self.model, custom_name='test_model_importexport') reloaded = load_model('test_model_importexport.zip') b_2 = SimplePredictor(reloaded).pred_binarized( TString(self.text_example), [Catalogue.GENEPROD]) y_2 = reloaded(self.x) self.assertTensorEqual(y_1, y_2) self.assertTensorEqual(b_1.marks, b_2.marks)
def test_lossless_decode_encode(self): text = "hallo" s1 = TString(text) tensor = s1.toTensor() s2 = TString(tensor) self.assertTensorEqual(s1.toTensor(), s2.toTensor()) self.assertEqual(s1.toTensor().size(), s2.toTensor().size()) self.assertEqual(len(s1), len(s1)) self.assertNotEqual(len(s2), 0) self.assertEqual([text], s1.words)
def setUpClass(self): # run only once self.text_example = "AAAAAAA XXX AAA" self.x = TString(self.text_example) self.y = torch.Tensor( # A A A A A A A X X X A A A [[[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1]]]) self.selected_features = ["geneprod"] self.entity_model = toy_model(self.x.tensor, self.y) self.entity_model.eval() self.anonymized_text_example = self.text_example.replace( "X", MARKING_CHAR) self.z = TString(self.anonymized_text_example) self.context_model = toy_model(self.z.tensor, self.y, selected_features=['intervention']) self.context_model.eval()
def test_predictor_padding(self): p = Predictor(self.entity_model) test_string_200 = "a" * 200 test_string_200_encoded = TString(test_string_200) padded_string_200_encoded, padding_length = p.padding( test_string_200_encoded) expected_padding_length = ceil( max(config.min_size - 200, 0) / 2) + config.min_padding self.assertEqual(expected_padding_length, padding_length) print("config.min_size, config.min_size, padding_length", config.min_size, config.min_size, padding_length) expected_padded_string_200_encoded = TString( config.padding_char * expected_padding_length + test_string_200 + config.padding_char * expected_padding_length) print(padded_string_200_encoded.toStringList()) self.assertTensorEqual(expected_padded_string_200_encoded.tensor, padded_string_200_encoded.tensor)
def test_entity_predictor_1(self): p = Predictor(self.entity_model) output = p.forward(TString(StringList([self.text_example])), torch.Tensor(0)) self.assertEqual(list(self.y.size()), list(output.size()))
def test_empty_string(self): empty_string = '' empty_string_ts = TString(empty_string) expected_string_list = [] self.assertEqual(expected_string_list, empty_string_ts.words) self.assertEqual(empty_string_ts.tensor.dim(), 0)
def test_repeat_3(self): s = TString("a") with self.assertRaises(RepeatError): s.repeat(0)
def test_len(self): x = TString("1234567890") l1 = len(x) l2 = len("1234567890") self.assertEqual(l1, l2)
def test_concat_5(self): a = StringList(["the "]) b = StringList(["cat", "dog"]) with self.assertRaises(ConcatenatingTStringWithUnequalDepthError): TString(a) + TString(b)