Ejemplo n.º 1
0
def runTests(tests, databases, registerUnicode, iteration=10):
    f = ReadingFactory()

    timing = {}
    for no in tests:
        print "Running test %d (reading from %s)..." % (no, databases[no])

        connection = {'sqlalchemy.url': 'sqlite:///%s' % databases[no],
                      'attach': ['cjklib'],
                      'registerUnicode': registerUnicode[no]}
        db = dbconnector.getDBConnector(connection)
        availableDicts = [dictClass.DICTIONARY_TABLE for dictClass
                          in dictionary.BaseDictionary\
                             .getAvailableDictionaries(db)]
        dictionaries = list(set(availableDicts)
                            & set(db.engine.table_names(schema=db._mainSchema)))
        if not dictionaries:
            raise ValueError("No dictionaries found")

        print "Found dictionaries '%s'" % "', '".join(dictionaries)

        runTime = {}
        for dictName in dictionaries:
            dictClass = dictionary.BaseDictionary.getDictionaryClass(dictName)
            dictInstance = dictClass(dbConnectInst=db)

            opClass = (dictClass.READING
                       and f.getReadingOperatorClass(dictClass.READING))
            if hasattr(opClass, 'guessReadingDialect'):
                requestList = []
                for request in SEARCH_REQUESTS:
                    options = opClass.guessReadingDialect(request)
                    requestList.append((request, options))
            else:
                requestList = [(request, {}) for request in SEARCH_REQUESTS]

            mod = imp.new_module('timeit_runmod')
            mod.runRequest = runRequest
            mod.dictInstance = dictInstance
            mod.requestList = requestList

            sys.modules['timeit_runmod'] = mod

            methodTime = {}
            for method in ('getFor', 'getForHeadword', 'getForReading',
                           'getForTranslation'):
                t = Timer("""timeit_runmod.runRequest(
                                timeit_runmod.dictInstance,
                                timeit_runmod.requestList,
                                method='%s')
                          """ % method,
                          "import timeit_runmod")
                methodTime[method] = t.timeit(iteration)
            runTime[dictName] = methodTime

        timing[no] = runTime

    return timing
Ejemplo n.º 2
0
    def open (self, dbname):
        """Open the database."""
        self.dbname = dbname
        if not hasattr(self, '_dictionaryName'):
	    self._dictionaryName = dbname
        try:
            self._dictInst = getDictionary(self._dictionaryName,
		entryFactory=entry.UnifiedHeadword())
        except ValueError as e:
            if debug: print(e, file=sys.stderr)
            return False

	if self._dictInst.READING:
	    f = ReadingFactory()
	    opClass = f.getReadingOperatorClass(self._dictInst.READING)
	    if hasattr(opClass, 'guessReadingDialect'):
		self._opClass = opClass

        return True
Ejemplo n.º 3
0
def runTests(tests, databases, registerUnicode, iteration=10):
    f = ReadingFactory()

    timing = {}
    for no in tests:
        print("Running test %d (reading from %s)..." % (no, databases[no]))

        connection = {
            'sqlalchemy.url': 'sqlite:///%s' % databases[no],
            'attach': ['cjklib'],
            'registerUnicode': registerUnicode[no]
        }
        db = dbconnector.getDBConnector(connection)
        availableDicts = [dictClass.DICTIONARY_TABLE for dictClass
                          in dictionary.BaseDictionary\
                             .getAvailableDictionaries(db)]
        dictionaries = list(
            set(availableDicts)
            & set(db.engine.table_names(schema=db._mainSchema)))
        if not dictionaries:
            raise ValueError("No dictionaries found")

        print("Found dictionaries '%s'" % "', '".join(dictionaries))

        runTime = {}
        for dictName in dictionaries:
            dictClass = dictionary.BaseDictionary.getDictionaryClass(dictName)
            dictInstance = dictClass(dbConnectInst=db)

            opClass = (dictClass.READING
                       and f.getReadingOperatorClass(dictClass.READING))
            if hasattr(opClass, 'guessReadingDialect'):
                requestList = []
                for request in SEARCH_REQUESTS:
                    options = opClass.guessReadingDialect(request)
                    requestList.append((request, options))
            else:
                requestList = [(request, {}) for request in SEARCH_REQUESTS]

            mod = imp.new_module('timeit_runmod')
            mod.runRequest = runRequest
            mod.dictInstance = dictInstance
            mod.requestList = requestList

            sys.modules['timeit_runmod'] = mod

            methodTime = {}
            for method in ('getFor', 'getForHeadword', 'getForReading',
                           'getForTranslation'):
                t = Timer(
                    """timeit_runmod.runRequest(
                                timeit_runmod.dictInstance,
                                timeit_runmod.requestList,
                                method='%s')
                          """ % method, "import timeit_runmod")
                methodTime[method] = t.timeit(iteration)
            runTime[dictName] = methodTime

        timing[no] = runTime

    return timing
Ejemplo n.º 4
0
class CharacterLookupReadingMethodsTest(CharacterLookupTest,
                                        unittest.TestCase):
    """
    Runs consistency checks on the reading methods of the
    :class:`~cjklib.characterlookup.CharacterLookup` class.

    .. todo::
        * Impl: include script table from Unicode 5.2.0 to get character ranges
          for Hangul and Kana
    """
    DIALECTS = {}

    SPECIAL_ENTITY_LIST = {}

    def setUp(self):
        CharacterLookupTest.setUp(self)
        self.f = ReadingFactory(dbConnectInst=self.db)

    def testReadingMappingAvailability(self):
        """
        Test if the readings under
        ``CharacterLookup.CHARARACTER_READING_MAPPING`` are available for
        conversion.
        """
        # mock to simulate availability of all tables in
        #   characterLookup.CHARARACTER_READING_MAPPING
        tables = [table for table, _ \
            in list(self.characterLookup.CHARARACTER_READING_MAPPING.values())]
        self.characterLookup.db.engine = EngineMock(
            self.characterLookup.db.engine, mockTables=tables)

        for reading in self.characterLookup.CHARARACTER_READING_MAPPING:
            # only if table exists
            table, _ = self.characterLookup.CHARARACTER_READING_MAPPING[
                reading]

            self.assertTrue(
                self.characterLookup.hasMappingForReadingToCharacter(reading))
            self.assertTrue(
                self.characterLookup.hasMappingForCharacterToReading(reading))

        # test proper checking for all known readings
        for reading in self.f.getSupportedReadings():
            self.assertTrue(
                self.characterLookup.hasMappingForReadingToCharacter(reading) \
                in [True, False])
            self.assertTrue(
                self.characterLookup.hasMappingForCharacterToReading(reading) \
                in [True, False])

    @attr('slow')
    def testGetCharactersForReadingAcceptsAllEntities(self):
        """Test if ``getCharactersForReading`` accepts all reading entities."""
        for reading in self.f.getSupportedReadings():
            if not self.characterLookup.hasMappingForReadingToCharacter(
                    reading):
                continue

            dialects = [{}]
            if reading in self.DIALECTS:
                dialects.extend(self.DIALECTS[reading])

            for dialect in dialects:
                if hasattr(self.f.getReadingOperatorClass(reading),
                           'getReadingEntities'):
                    entities = self.f.getReadingEntities(reading, **dialect)
                elif reading in self.SPECIAL_ENTITY_LIST:
                    entities = self.SPECIAL_ENTITY_LIST[reading]
                else:
                    continue

                for entity in entities:
                    try:
                        results = self.characterLookup.getCharactersForReading(
                            entity, reading, **dialect)

                        self.assertEqual(type(results), type([]),
                            "Method getCharactersForReading() doesn't return" \
                                + " a list for entity %s " % repr(entity) \
                        + ' (reading %s, dialect %s)' % (reading, dialect))

                        for entry in results:
                            self.assertEqual(len(entry), 1,
                                "Entry %s in result for %s has length != 1" \
                                    % (repr(entry), repr(entity)) \
                                + ' (reading %s, dialect %s)' \
                                % (reading, dialect))
                    except exception.UnsupportedError:
                        pass
                    except exception.ConversionError:
                        pass
Ejemplo n.º 5
0
class CharacterLookupReadingMethodsTest(CharacterLookupTest, unittest.TestCase):
    """
    Runs consistency checks on the reading methods of the
    :class:`~cjklib.characterlookup.CharacterLookup` class.

    .. todo::
        * Impl: include script table from Unicode 5.2.0 to get character ranges
          for Hangul and Kana
    """
    DIALECTS = {}

    SPECIAL_ENTITY_LIST = {}

    def setUp(self):
        CharacterLookupTest.setUp(self)
        self.f = ReadingFactory(dbConnectInst=self.db)

    def testReadingMappingAvailability(self):
        """
        Test if the readings under
        ``CharacterLookup.CHARARACTER_READING_MAPPING`` are available for
        conversion.
        """
        # mock to simulate availability of all tables in
        #   characterLookup.CHARARACTER_READING_MAPPING
        tables = [table for table, _ \
            in self.characterLookup.CHARARACTER_READING_MAPPING.values()]
        self.characterLookup.db.engine = EngineMock(
                self.characterLookup.db.engine, mockTables=tables)

        for reading in self.characterLookup.CHARARACTER_READING_MAPPING:
            # only if table exists
            table, _ = self.characterLookup.CHARARACTER_READING_MAPPING[reading]

            self.assert_(
                self.characterLookup.hasMappingForReadingToCharacter(reading))
            self.assert_(
                self.characterLookup.hasMappingForCharacterToReading(reading))

        # test proper checking for all known readings
        for reading in self.f.getSupportedReadings():
            self.assert_(
                self.characterLookup.hasMappingForReadingToCharacter(reading) \
                in [True, False])
            self.assert_(
                self.characterLookup.hasMappingForCharacterToReading(reading) \
                in [True, False])

    @attr('slow')
    def testGetCharactersForReadingAcceptsAllEntities(self):
        """Test if ``getCharactersForReading`` accepts all reading entities."""
        for reading in self.f.getSupportedReadings():
            if not self.characterLookup.hasMappingForReadingToCharacter(
                reading):
                continue

            dialects = [{}]
            if reading in self.DIALECTS:
                dialects.extend(self.DIALECTS[reading])

            for dialect in dialects:
                if hasattr(self.f.getReadingOperatorClass(reading),
                    'getReadingEntities'):
                    entities = self.f.getReadingEntities(reading, **dialect)
                elif reading in self.SPECIAL_ENTITY_LIST:
                    entities = self.SPECIAL_ENTITY_LIST[reading]
                else:
                    continue

                for entity in entities:
                    try:
                        results = self.characterLookup.getCharactersForReading(
                            entity, reading, **dialect)

                        self.assertEquals(type(results), type([]),
                            "Method getCharactersForReading() doesn't return" \
                                + " a list for entity %s " % repr(entity) \
                        + ' (reading %s, dialect %s)' % (reading, dialect))

                        for entry in results:
                            self.assertEquals(len(entry), 1,
                                "Entry %s in result for %s has length != 1" \
                                    % (repr(entry), repr(entity)) \
                                + ' (reading %s, dialect %s)' \
                                % (reading, dialect))
                    except exception.UnsupportedError:
                        pass
                    except exception.ConversionError:
                        pass