Beispiel #1
0
    def test_get_mode(self):
        args = [
            '-w', 'word', '-f', 'frequency', '-clr', '-cost', 'cost.txt',
            '-basic'
        ]
        configurator = Configurator(args)
        mode = configurator.get_mode()

        self.assertEqual(mode.get(cfg.MODE_CLEAR), 'STDOUT')
        self.assertEqual(mode.get(cfg.MODE_COST), 'cost.txt')
        self.assertEqual(mode.get(cfg.MODE_BASIC), 'STDOUT')
        self.assertIsNone(mode.get(cfg.MODE_CORRECT))
Beispiel #2
0
    def build(configurator: cfg.Configurator):
        analyzer = WordAnalyzer()
        analyzer.words = configurator.get_words(verbose=True)
        analyzer.frequency_words = configurator.get_frequency_words(
            verbose=True)
        analyzer.splitter = TextSplitter(analyzer.frequency_words)
        analyzer.tree = configurator.get_tree()
        analyzer.mode = configurator.get_mode()
        analyzer.verbose = configurator.get_verbose()

        # This flag specifies that's need add extra information with a word when analyzing words
        analyzer.verbose_file = False

        # If the word isn't in the dictionary, then its cost is default_cost
        analyzer.default_cost = 1000

        # How many similar words will be returned by get_similar_words method
        analyzer.number_similar_words = configurator.get_configuration_values(
        )['similar_words']
        # What distance will be used to search for similar words
        analyzer.distance = configurator.get_configuration_values()['distance']
        # How many parts will be spliced in the get_correct_words method
        analyzer.threshold = configurator.get_configuration_values(
        )['threshold']
        # How many words will be returned by get_correct_words method
        analyzer.number_of_corrected_words = configurator.get_configuration_values(
        )['number_corrected']

        analyzer.max_len = max(len(x) for x in analyzer.words)
        # Define dictionary with values like (number: list_of_divisors).
        # There are defined all numbers from 1 to max length of all words.
        # It's necessary to improve efficiency, because divisors won't calculated again for same number
        analyzer.divisors = dict((number, factorize(number))
                                 for number in range(1, analyzer.max_len + 1))

        return analyzer