Beispiel #1
0
 def test_normalise_simple(self):
     mapping_path = os.path.join(self._mapping_dir, 'map1.csv')
     mapping = VariantMapping(mapping_path, self._tokenizer)
     text = 'DURSTYVWX'
     expected = 'DQQDDD'
     actual = mapping.normalise(text)
     self.assertEqual(actual, expected)
Beispiel #2
0
 def test_normalise_simple(self):
     mapping_path = os.path.join(self._mapping_dir, 'map1.csv')
     mapping = VariantMapping(mapping_path, self._tokenizer)
     text = 'DURSTYVWX'
     expected = 'DQQDDD'
     actual = mapping.normalise(text)
     self.assertEqual(actual, expected)
Beispiel #3
0
 def test_normalise_complex(self):
     mapping_path = os.path.join(self._mapping_dir, 'map3.csv')
     tokenizer = Tokenizer(*constants.TOKENIZERS['latin'])
     mapping = VariantMapping(mapping_path, tokenizer)
     text = 'then the anthem hit the ant'
     expected = 'then that anthem hit bees'
     actual = mapping.normalise(text)
     self.assertEqual(actual, expected)
Beispiel #4
0
 def test_normalise_complex(self):
     mapping_path = os.path.join(self._mapping_dir, 'map3.csv')
     tokenizer = Tokenizer(*constants.TOKENIZERS['latin'])
     mapping = VariantMapping(mapping_path, tokenizer)
     text = 'then the anthem hit the ant'
     expected = 'then that anthem hit bees'
     actual = mapping.normalise(text)
     self.assertEqual(actual, expected)
Beispiel #5
0
 def test_denormalise_complex(self):
     mapping_path = os.path.join(self._mapping_dir, 'map3.csv')
     tokenizer = Tokenizer(*constants.TOKENIZERS['latin'])
     mapping = VariantMapping(mapping_path, tokenizer)
     text = 'then that thatched anthem hit bees'
     expected = [
         'then that thatched anthem hit bees',
         'then the thatched anthem hit bees',
         'then that thatched anthem hit the ant',
         'then the thatched anthem hit the ant',
         "then Mozart's thatched anthem hit bees",
         "then Mozart's thatched anthem hit the ant",
     ]
     actual = mapping.denormalise(text)
     self.assertEqual(set(actual), set(expected))
Beispiel #6
0
 def test_denormalise_complex(self):
     mapping_path = os.path.join(self._mapping_dir, 'map3.csv')
     tokenizer = Tokenizer(*constants.TOKENIZERS['latin'])
     mapping = VariantMapping(mapping_path, tokenizer)
     text = 'then that thatched anthem hit bees'
     expected = [
         'then that thatched anthem hit bees',
         'then the thatched anthem hit bees',
         'then that thatched anthem hit the ant',
         'then the thatched anthem hit the ant',
         "then Mozart s thatched anthem hit bees",
         "then Mozart s thatched anthem hit the ant",
     ]
     actual = mapping.denormalise(text)
     self.assertEqual(set(actual), set(expected))
Beispiel #7
0
 def test_denormalise_simple(self):
     mapping_path = os.path.join(self._mapping_dir, 'map1.csv')
     mapping = VariantMapping(mapping_path, self._tokenizer)
     text = 'AQA'
     actual = mapping.denormalise(text)
     expected = [
         'ABCA',
         'NBCA',
         'ABCN',
         'NBCN',
         'OPBCA',
         'ABCOP',
         'OPBCOP',
         'AQA',
         'ARSTA',
         'AUA',
         'NQA',
         'AQN',
         'NQN',
         'OPQA',
         'AQOP',
         'OPQOP',
         'NRSTA',
         'ARSTN',
         'NRSTN',
         'OPRSTA',
         'ARSTOP',
         'OPRSTOP',
         'NUA',
         'AUN',
         'NUN',
         'OPUA',
         'AUOP',
         'OPUOP',
         'OPUN',
         'OPRSTN',
         'NRSTOP',
         'OPBCN',
         'OPQN',
         'NQOP',
         'NBCOP',
         'NUOP',
     ]
     self.assertEqual(set(actual), set(expected))
Beispiel #8
0
 def test_denormalise_simple(self):
     mapping_path = os.path.join(self._mapping_dir, 'map1.csv')
     mapping = VariantMapping(mapping_path, self._tokenizer)
     text = 'AQA'
     actual = mapping.denormalise(text)
     expected = [
         'ABCA',
         'NBCA',
         'ABCN',
         'NBCN',
         'OPBCA',
         'ABCOP',
         'OPBCOP',
         'AQA',
         'ARSTA',
         'AUA',
         'NQA',
         'AQN',
         'NQN',
         'OPQA',
         'AQOP',
         'OPQOP',
         'NRSTA',
         'ARSTN',
         'NRSTN',
         'OPRSTA',
         'ARSTOP',
         'OPRSTOP',
         'NUA',
         'AUN',
         'NUN',
         'OPUA',
         'AUOP',
         'OPUOP',
         'OPUN',
         'OPRSTN',
         'NRSTOP',
         'OPBCN',
         'OPQN',
         'NQOP',
         'NBCOP',
         'NUOP',
     ]
     self.assertEqual(set(actual), set(expected))
Beispiel #9
0
 def test_mapping_long_normalised_form(self):
     mapping_path = os.path.join(self._mapping_dir,
                                 'long_normalised_form.csv')
     mapping = VariantMapping(mapping_path, self._tokenizer)
     self.assertRaises(exceptions.MalformedNormaliserMappingError,
                       mapping.normalise, 'foo')
Beispiel #10
0
 def test_mapping_duplicate_forms_in_row(self):
     mapping_path = os.path.join(self._mapping_dir,
                                 'normalised_same_variant.csv')
     mapping = VariantMapping(mapping_path, self._tokenizer)
     self.assertRaises(exceptions.MalformedNormaliserMappingError,
                       mapping.normalise, 'foo')