def test_collection(self): collection = DenormalCollection() self.assertIsNotNone(collection) count = collection.load_from_text(""" " dot co ",".co" " dot uk ",".uk" " dot net ",".net" " dot ca ",".ca" " dot de ",".de" " dot jp ",".jp" " dot fr ",".fr" " dot es ",".es" " dot mil ",".mil" " dot co ",".co" " are not "," aren't " " can not "," can't " " could not "," couldn't " " could have "," could've " """) self.assertEqual(count, 14) self.assertEqual(collection.denormalise_string("You are not him"), "You aren't him") self.assertEqual(collection.denormalise_string("keithsterling dot co dot uk"), "keithsterling.co.uk") self.assertEqual("(^dot co | dot co | dot co$)", collection.denormalise(" dot co ")) self.assertIsNone(collection.denormalise(" dot cox "))
def test_reload_jp(self): storage_factory = StorageFactory() tokenizer = TokenizerJP() file_store_config = FileStorageConfiguration() file_store_config._denormal_storage = FileStoreConfiguration( file=os.path.dirname(__file__) + os.sep + "test_files" + os.sep + "denormal_jp.txt", format="text", extension="txt", encoding="utf-8", delete_on_start=False) storage_engine = FileStorageEngine(file_store_config) storage_factory._storage_engines[ StorageFactory.DENORMAL] = storage_engine storage_factory._store_to_engine_map[ StorageFactory.DENORMAL] = storage_engine collection = DenormalCollection() self.assertIsNotNone(collection) collection.load(storage_factory) self.assertEqual(collection.denormalise_string(tokenizer, "丸1の回答"), "①の回答") self.assertIsNone(collection.denormalise("丸")) collection.reload(storage_factory) self.assertEqual(collection.denormalise_string(tokenizer, "丸1の回答"), "①の回答") self.assertIsNone(collection.denormalise("丸"))
def test_collection(self): collection = DenormalCollection() self.assertIsNotNone(collection) count = collection.load_from_text(""" " www dot ","www." " dot com ",".com " " dot co ",".co" " dot uk ",".uk" " dot net ",".net" " dot ca ",".ca" " dot de ",".de" " dot jp ",".jp" " dot fr ",".fr" " dot es ",".es" " dot mil ",".mil" " dot co ",".co" " are not "," aren't " " can not "," can't " " could not "," couldn't " " could have "," could've " """) self.assertEqual(count, 16) self.assertEqual(collection.denormalise_string("You are not him"), "You aren't him") self.assertEqual(collection.denormalise_string("keithsterling dot co dot uk"), "keithsterling.co.uk") self.assertEqual(collection.denormalise_string("www dot google dot com"), "www.google.com") self.assertIsNone(collection.denormalise(" dot cox "))
def test_load(self): storage_factory = StorageFactory() file_store_config = FileStorageConfiguration() file_store_config._denormal_storage = FileStoreConfiguration( file=os.path.dirname(__file__) + os.sep + "test_files" + os.sep + "denormal.txt", format="text", extension="txt", encoding="utf-8", delete_on_start=False) storage_engine = FileStorageEngine(file_store_config) storage_factory._storage_engines[ StorageFactory.DENORMAL] = storage_engine storage_factory._store_to_engine_map[ StorageFactory.DENORMAL] = storage_engine collection = DenormalCollection() self.assertIsNotNone(collection) collection.load(storage_factory) self.assertEqual( collection.denormalise_string("keithsterling dot com"), "keithsterling.com") self.assertIsNone(collection.denormalise(" dot cox "))
def assert_upload_from_text(self, store): store.empty() store.upload_from_text( None, """ " dot uk ",".uk" " dot net ",".net" " dot ca ",".ca" " dot de ",".de" " dot jp ",".jp" " dot fr ",".fr" " dot au ",".au" " dot us ",".us" " dot ru ",".ru" " dot ch ",".ch" " dot it ",".it" " dot nl ",".nl" " dot se ",".se" " dot no ",".no" " dot es ",".es" """) collection = DenormalCollection() store.load(collection) self.assertEqual( collection.denormalise(" DOT UK "), [re.compile('(^DOT UK | DOT UK | DOT UK$)', re.IGNORECASE), '.UK']) self.assertEqual(collection.denormalise_string("keiffster DOT UK"), "keiffster.uk")
def test_collection_duplicate(self): collection = DenormalCollection() self.assertIsNotNone(collection) collection.add_to_lookup("dot com", ".com ") collection.add_to_lookup("dot com", ".co ") self.assertEqual(collection.denormalise("dot com"), '.com ')
def test_collection_duplicate_jp(self): collection = DenormalCollection() self.assertIsNotNone(collection) collection.add_to_lookup("丸1", "①") collection.add_to_lookup("丸1", "②") self.assertEqual(collection.denormalise("丸1"), '①')
def assert_upload_from_file(self, store): denormal_collection = DenormalCollection() store.upload_from_file(os.path.dirname(__file__) + os.sep + "data" + os.sep + "lookups" + os.sep + "text" + os.sep + "denormal.txt") store.load(denormal_collection) self.assertEqual(denormal_collection.denormalise(" DOT COM "), [re.compile('(^DOT COM | DOT COM | DOT COM$)', re.IGNORECASE), '.COM']) self.assertEqual(denormal_collection.denormalise_string("keith dot com"), "keith.com")
def test_collection_operations(self): collection = DenormalCollection() self.assertIsNotNone(collection) collection.add_to_lookup(" DOT COM ", [re.compile("(^DOT COM | DOT COM | DOT COM$)", re.IGNORECASE), ".com"]) self.assertTrue(collection.has_key(" DOT COM ")) self.assertEqual([re.compile("(^DOT COM | DOT COM | DOT COM$)", re.IGNORECASE), ".com"], collection.value(" DOT COM ")) self.assertEqual(collection.denormalise_string("keithsterling dot com"), "keithsterling.com") self.assertIsNone(collection.denormalise(" dot cox "))
def test_collection_invalid(self): collection = DenormalCollection() self.assertIsNotNone(collection) collection.add_to_lookup("dot com", ".com ") self.assertFalse(collection.has_keyVal("dot co")) self.assertIsNone(collection.value("dot co")) self.assertIsNone(collection.denormalise("dot co")) self.assertEqual(collection.denormalise_string(None, "www.dot.co"), "www.dot.co")
def test_collection_invalid_jp(self): collection = DenormalCollection() self.assertIsNotNone(collection) collection.add_to_lookup("丸1", "①") self.assertFalse(collection.has_keyVal("丸")) self.assertIsNone(collection.value("丸")) tokenizer = TokenizerJP() self.assertIsNone(collection.denormalise("丸")) self.assertEqual(collection.denormalise_string(tokenizer, "丸の回答"), "丸の回答")
def test_collection_operations_JP(self): collection = DenormalCollection() self.assertIsNotNone(collection) collection.add_to_lookup("丸1", "①") tokenizer = TokenizerJP() self.assertTrue(collection.has_keyVal("丸1")) self.assertEqual("①", collection.value("丸1")) self.assertEqual(collection.denormalise_string(tokenizer, "丸1の回答"), "①の回答") self.assertIsNone(collection.denormalise("丸"))
def assert_upload_from_text_file(self, store): store.empty() store.upload_from_file( os.path.dirname(__file__) + os.sep + "data" + os.sep + "lookups" + os.sep + "text" + os.sep + "denormal.txt") collection = DenormalCollection() store.load(collection) self.assertEqual( collection.denormalise(" DOT UK "), [re.compile('(^DOT UK | DOT UK | DOT UK$)', re.IGNORECASE), '.UK']) self.assertEqual(collection.denormalise_string("keiffster DOT UK"), "keiffster.uk")
def test_load_from_file(self): config = FileStorageConfiguration() config._denormal_storage = FileStoreConfiguration( file=os.path.dirname(__file__) + os.sep + "data" + os.sep + "lookups" + os.sep + "text" + os.sep + "denormal.txt", format="text", encoding="utf-8", delete_on_start=False) engine = FileStorageEngine(config) engine.initialise() store = FileDenormalStore(engine) denormal_collection = DenormalCollection() store.load(denormal_collection) self.assertEqual(denormal_collection.denormalise("dot com"), '.com ') self.assertEqual( denormal_collection.denormalise_string(None, "keith dot com"), "keith.com")
def test_collection_operations(self): denormal_text = """ "dot ac",".ac " "dot au",".au " "dot ca",".ca " "dot ch",".ch " "dot co",".co " "dot com",".com " """ collection = DenormalCollection() self.assertIsNotNone(collection) collection.load_from_text(denormal_text) self.assertTrue(collection.has_keyVal("dot com")) self.assertEqual(".com ", collection.value("dot com")) self.assertEqual( collection.denormalise_string(None, "keithsterling dot com"), "keithsterling.com") self.assertIsNone(collection.denormalise("dot cox"))