コード例 #1
0
 def test_normalise_simple(self):
     mapping_path = os.path.join(self._mapping_dir, 'map1.csv')
     mapping = VariantMapping(mapping_path, self._tokenizer)
     text = 'DURSTYVWX'
     expected = 'DQQDDD'
     actual = mapping.normalise(text)
     self.assertEqual(actual, expected)
コード例 #2
0
ファイル: normaliser_test.py プロジェクト: ajenhl/tacl
 def test_normalise_simple(self):
     mapping_path = os.path.join(self._mapping_dir, 'map1.csv')
     mapping = VariantMapping(mapping_path, self._tokenizer)
     text = 'DURSTYVWX'
     expected = 'DQQDDD'
     actual = mapping.normalise(text)
     self.assertEqual(actual, expected)
コード例 #3
0
ファイル: normaliser_test.py プロジェクト: ajenhl/tacl
 def test_normalise_complex(self):
     mapping_path = os.path.join(self._mapping_dir, 'map3.csv')
     tokenizer = Tokenizer(*constants.TOKENIZERS['latin'])
     mapping = VariantMapping(mapping_path, tokenizer)
     text = 'then the anthem hit the ant'
     expected = 'then that anthem hit bees'
     actual = mapping.normalise(text)
     self.assertEqual(actual, expected)
コード例 #4
0
 def test_normalise_complex(self):
     mapping_path = os.path.join(self._mapping_dir, 'map3.csv')
     tokenizer = Tokenizer(*constants.TOKENIZERS['latin'])
     mapping = VariantMapping(mapping_path, tokenizer)
     text = 'then the anthem hit the ant'
     expected = 'then that anthem hit bees'
     actual = mapping.normalise(text)
     self.assertEqual(actual, expected)
コード例 #5
0
ファイル: normaliser_test.py プロジェクト: ajenhl/tacl
 def test_denormalise_complex(self):
     mapping_path = os.path.join(self._mapping_dir, 'map3.csv')
     tokenizer = Tokenizer(*constants.TOKENIZERS['latin'])
     mapping = VariantMapping(mapping_path, tokenizer)
     text = 'then that thatched anthem hit bees'
     expected = [
         'then that thatched anthem hit bees',
         'then the thatched anthem hit bees',
         'then that thatched anthem hit the ant',
         'then the thatched anthem hit the ant',
         "then Mozart's thatched anthem hit bees",
         "then Mozart's thatched anthem hit the ant",
     ]
     actual = mapping.denormalise(text)
     self.assertEqual(set(actual), set(expected))
コード例 #6
0
 def test_denormalise_complex(self):
     mapping_path = os.path.join(self._mapping_dir, 'map3.csv')
     tokenizer = Tokenizer(*constants.TOKENIZERS['latin'])
     mapping = VariantMapping(mapping_path, tokenizer)
     text = 'then that thatched anthem hit bees'
     expected = [
         'then that thatched anthem hit bees',
         'then the thatched anthem hit bees',
         'then that thatched anthem hit the ant',
         'then the thatched anthem hit the ant',
         "then Mozart s thatched anthem hit bees",
         "then Mozart s thatched anthem hit the ant",
     ]
     actual = mapping.denormalise(text)
     self.assertEqual(set(actual), set(expected))
コード例 #7
0
ファイル: normaliser_test.py プロジェクト: ajenhl/tacl
 def test_denormalise_simple(self):
     mapping_path = os.path.join(self._mapping_dir, 'map1.csv')
     mapping = VariantMapping(mapping_path, self._tokenizer)
     text = 'AQA'
     actual = mapping.denormalise(text)
     expected = [
         'ABCA',
         'NBCA',
         'ABCN',
         'NBCN',
         'OPBCA',
         'ABCOP',
         'OPBCOP',
         'AQA',
         'ARSTA',
         'AUA',
         'NQA',
         'AQN',
         'NQN',
         'OPQA',
         'AQOP',
         'OPQOP',
         'NRSTA',
         'ARSTN',
         'NRSTN',
         'OPRSTA',
         'ARSTOP',
         'OPRSTOP',
         'NUA',
         'AUN',
         'NUN',
         'OPUA',
         'AUOP',
         'OPUOP',
         'OPUN',
         'OPRSTN',
         'NRSTOP',
         'OPBCN',
         'OPQN',
         'NQOP',
         'NBCOP',
         'NUOP',
     ]
     self.assertEqual(set(actual), set(expected))
コード例 #8
0
 def test_denormalise_simple(self):
     mapping_path = os.path.join(self._mapping_dir, 'map1.csv')
     mapping = VariantMapping(mapping_path, self._tokenizer)
     text = 'AQA'
     actual = mapping.denormalise(text)
     expected = [
         'ABCA',
         'NBCA',
         'ABCN',
         'NBCN',
         'OPBCA',
         'ABCOP',
         'OPBCOP',
         'AQA',
         'ARSTA',
         'AUA',
         'NQA',
         'AQN',
         'NQN',
         'OPQA',
         'AQOP',
         'OPQOP',
         'NRSTA',
         'ARSTN',
         'NRSTN',
         'OPRSTA',
         'ARSTOP',
         'OPRSTOP',
         'NUA',
         'AUN',
         'NUN',
         'OPUA',
         'AUOP',
         'OPUOP',
         'OPUN',
         'OPRSTN',
         'NRSTOP',
         'OPBCN',
         'OPQN',
         'NQOP',
         'NBCOP',
         'NUOP',
     ]
     self.assertEqual(set(actual), set(expected))
コード例 #9
0
 def test_mapping_long_normalised_form(self):
     mapping_path = os.path.join(self._mapping_dir,
                                 'long_normalised_form.csv')
     mapping = VariantMapping(mapping_path, self._tokenizer)
     self.assertRaises(exceptions.MalformedNormaliserMappingError,
                       mapping.normalise, 'foo')
コード例 #10
0
 def test_mapping_duplicate_forms_in_row(self):
     mapping_path = os.path.join(self._mapping_dir,
                                 'normalised_same_variant.csv')
     mapping = VariantMapping(mapping_path, self._tokenizer)
     self.assertRaises(exceptions.MalformedNormaliserMappingError,
                       mapping.normalise, 'foo')