Ejemplo n.º 1
0
    def test_collection(self):
        collection = DenormalCollection()
        self.assertIsNotNone(collection)

        count = collection.load_from_text("""
            " dot co ",".co"
            " dot uk ",".uk"
            " dot net ",".net"
            " dot ca ",".ca"
            " dot de ",".de"
            " dot jp ",".jp"
            " dot fr ",".fr"
            " dot es ",".es"
            " dot mil ",".mil"
            " dot co ",".co"
            " are not "," aren't "
            " can not "," can't "
            " could not "," couldn't "
            " could have "," could've "
         """)
        self.assertEqual(count, 14)

        self.assertEqual(collection.denormalise_string("You are not him"), "You aren't him")
        self.assertEqual(collection.denormalise_string("keithsterling dot co dot uk"), "keithsterling.co.uk")

        self.assertEqual("(^dot co | dot co | dot co$)", collection.denormalise(" dot co "))
        self.assertIsNone(collection.denormalise(" dot cox "))
Ejemplo n.º 2
0
    def test_collection(self):
        collection = DenormalCollection()
        self.assertIsNotNone(collection)

        count = collection.load_from_text("""
            " www dot ","www."
            " dot com ",".com "
            " dot co ",".co"
            " dot uk ",".uk"
            " dot net ",".net"
            " dot ca ",".ca"
            " dot de ",".de"
            " dot jp ",".jp"
            " dot fr ",".fr"
            " dot es ",".es"
            " dot mil ",".mil"
            " dot co ",".co"
            " are not "," aren't "
            " can not "," can't "
            " could not "," couldn't "
            " could have "," could've "
         """)
        self.assertEqual(count, 16)

        self.assertEqual(collection.denormalise_string("You are not him"), "You aren't him")
        self.assertEqual(collection.denormalise_string("keithsterling dot co dot uk"), "keithsterling.co.uk")

        self.assertEqual(collection.denormalise_string("www dot google dot com"), "www.google.com")

        self.assertIsNone(collection.denormalise(" dot cox "))
Ejemplo n.º 3
0
    def test_reload_jp(self):
        storage_factory = StorageFactory()
        tokenizer = TokenizerJP()

        file_store_config = FileStorageConfiguration()
        file_store_config._denormal_storage = FileStoreConfiguration(
            file=os.path.dirname(__file__) + os.sep + "test_files" + os.sep +
            "denormal_jp.txt",
            format="text",
            extension="txt",
            encoding="utf-8",
            delete_on_start=False)

        storage_engine = FileStorageEngine(file_store_config)

        storage_factory._storage_engines[
            StorageFactory.DENORMAL] = storage_engine
        storage_factory._store_to_engine_map[
            StorageFactory.DENORMAL] = storage_engine

        collection = DenormalCollection()
        self.assertIsNotNone(collection)

        collection.load(storage_factory)

        self.assertEqual(collection.denormalise_string(tokenizer, "丸1の回答"),
                         "①の回答")
        self.assertIsNone(collection.denormalise("丸"))

        collection.reload(storage_factory)

        self.assertEqual(collection.denormalise_string(tokenizer, "丸1の回答"),
                         "①の回答")
        self.assertIsNone(collection.denormalise("丸"))
Ejemplo n.º 4
0
    def test_load(self):
        storage_factory = StorageFactory()

        file_store_config = FileStorageConfiguration()
        file_store_config._denormal_storage = FileStoreConfiguration(
            file=os.path.dirname(__file__) + os.sep + "test_files" + os.sep +
            "denormal.txt",
            format="text",
            extension="txt",
            encoding="utf-8",
            delete_on_start=False)

        storage_engine = FileStorageEngine(file_store_config)

        storage_factory._storage_engines[
            StorageFactory.DENORMAL] = storage_engine
        storage_factory._store_to_engine_map[
            StorageFactory.DENORMAL] = storage_engine

        collection = DenormalCollection()
        self.assertIsNotNone(collection)

        collection.load(storage_factory)

        self.assertEqual(
            collection.denormalise_string("keithsterling dot com"),
            "keithsterling.com")
        self.assertIsNone(collection.denormalise(" dot cox "))
Ejemplo n.º 5
0
    def assert_upload_from_text(self, store):

        store.empty()

        store.upload_from_text(
            None, """
                                " dot uk ",".uk"
                                " dot net ",".net"
                                " dot ca ",".ca"
                                " dot de ",".de"
                                " dot jp ",".jp"
                                " dot fr ",".fr"
                                " dot au ",".au"
                                " dot us ",".us"
                                " dot ru ",".ru"
                                " dot ch ",".ch"
                                " dot it ",".it"
                                " dot nl ",".nl"
                                " dot se ",".se"
                                " dot no ",".no"
                                " dot es ",".es"
                                """)

        collection = DenormalCollection()
        store.load(collection)

        self.assertEqual(
            collection.denormalise(" DOT UK "),
            [re.compile('(^DOT UK | DOT UK | DOT UK$)', re.IGNORECASE), '.UK'])
        self.assertEqual(collection.denormalise_string("keiffster DOT UK"),
                         "keiffster.uk")
Ejemplo n.º 6
0
    def assert_upload_from_file(self, store):

        denormal_collection = DenormalCollection()

        store.upload_from_file(os.path.dirname(__file__) + os.sep + "data" + os.sep + "lookups" + os.sep + "text" + os.sep + "denormal.txt")

        store.load(denormal_collection)

        self.assertEqual(denormal_collection.denormalise(" DOT COM "),
                         [re.compile('(^DOT COM | DOT COM | DOT COM$)', re.IGNORECASE), '.COM'])
        self.assertEqual(denormal_collection.denormalise_string("keith dot com"), "keith.com")
Ejemplo n.º 7
0
    def test_collection_operations(self):
        collection = DenormalCollection()
        self.assertIsNotNone(collection)

        collection.add_to_lookup(" DOT COM ", [re.compile("(^DOT COM | DOT COM | DOT COM$)", re.IGNORECASE), ".com"])

        self.assertTrue(collection.has_key(" DOT COM "))
        self.assertEqual([re.compile("(^DOT COM | DOT COM | DOT COM$)", re.IGNORECASE), ".com"], collection.value(" DOT COM "))

        self.assertEqual(collection.denormalise_string("keithsterling dot com"), "keithsterling.com")
        self.assertIsNone(collection.denormalise(" dot cox "))
Ejemplo n.º 8
0
    def test_collection_invalid(self):
        collection = DenormalCollection()
        self.assertIsNotNone(collection)

        collection.add_to_lookup("dot com", ".com ")

        self.assertFalse(collection.has_keyVal("dot co"))
        self.assertIsNone(collection.value("dot co"))

        self.assertIsNone(collection.denormalise("dot co"))
        self.assertEqual(collection.denormalise_string(None, "www.dot.co"),
                         "www.dot.co")
Ejemplo n.º 9
0
    def test_collection_invalid_jp(self):
        collection = DenormalCollection()
        self.assertIsNotNone(collection)

        collection.add_to_lookup("丸1", "①")

        self.assertFalse(collection.has_keyVal("丸"))
        self.assertIsNone(collection.value("丸"))

        tokenizer = TokenizerJP()
        self.assertIsNone(collection.denormalise("丸"))
        self.assertEqual(collection.denormalise_string(tokenizer, "丸の回答"),
                         "丸の回答")
Ejemplo n.º 10
0
    def test_collection_operations_JP(self):
        collection = DenormalCollection()
        self.assertIsNotNone(collection)

        collection.add_to_lookup("丸1", "①")
        tokenizer = TokenizerJP()

        self.assertTrue(collection.has_keyVal("丸1"))
        self.assertEqual("①", collection.value("丸1"))

        self.assertEqual(collection.denormalise_string(tokenizer, "丸1の回答"),
                         "①の回答")
        self.assertIsNone(collection.denormalise("丸"))
Ejemplo n.º 11
0
    def assert_upload_from_text_file(self, store):

        store.empty()

        store.upload_from_file(
            os.path.dirname(__file__) + os.sep + "data" + os.sep + "lookups" +
            os.sep + "text" + os.sep + "denormal.txt")

        collection = DenormalCollection()
        store.load(collection)

        self.assertEqual(
            collection.denormalise(" DOT UK "),
            [re.compile('(^DOT UK | DOT UK | DOT UK$)', re.IGNORECASE), '.UK'])
        self.assertEqual(collection.denormalise_string("keiffster DOT UK"),
                         "keiffster.uk")
Ejemplo n.º 12
0
    def test_load_from_file(self):
        config = FileStorageConfiguration()
        config._denormal_storage = FileStoreConfiguration(
            file=os.path.dirname(__file__) + os.sep + "data" + os.sep +
            "lookups" + os.sep + "text" + os.sep + "denormal.txt",
            format="text",
            encoding="utf-8",
            delete_on_start=False)
        engine = FileStorageEngine(config)
        engine.initialise()
        store = FileDenormalStore(engine)

        denormal_collection = DenormalCollection()

        store.load(denormal_collection)

        self.assertEqual(denormal_collection.denormalise("dot com"), '.com ')
        self.assertEqual(
            denormal_collection.denormalise_string(None, "keith dot com"),
            "keith.com")
Ejemplo n.º 13
0
    def test_collection_operations(self):
        denormal_text = """
        "dot ac",".ac "
        "dot au",".au "
        "dot ca",".ca "
        "dot ch",".ch "
        "dot co",".co "
        "dot com",".com "
        """

        collection = DenormalCollection()
        self.assertIsNotNone(collection)

        collection.load_from_text(denormal_text)

        self.assertTrue(collection.has_keyVal("dot com"))
        self.assertEqual(".com ", collection.value("dot com"))

        self.assertEqual(
            collection.denormalise_string(None, "keithsterling dot com"),
            "keithsterling.com")
        self.assertIsNone(collection.denormalise("dot cox"))