コード例 #1
0
ファイル: date.py プロジェクト: pombredanne/cahoots
 def test_bootstrap(self):
     self.assertTrue(registry.test('DP_pre_timedelta_phrases'))
     self.assertTrue(registry.test('DP_post_timedelta_phrases'))
     self.assertEqual(type(registry.get('DP_pre_timedelta_phrases')),
                      pyparsing.And)
     self.assertEqual(type(registry.get('DP_post_timedelta_phrases')),
                      pyparsing.Or)
コード例 #2
0
 def test_bootStrap(self, get_prepositions_mock):
     get_prepositions_mock.return_value = ['of']
     MeasurementParser.bootstrap(TestConfig)
     self.assertEqual(
         {
             'acres': 'imperial_area',
             'yards': 'imperial_length',
             'yard': 'imperial_length',
             'acre': 'imperial_area'
         },
         registry.get('MP_units')
     )
     self.assertEqual(
         {
             'imperial_length': ('Imperial', 'Length'),
             'imperial_area': ('Imperial', 'Area')
         },
         registry.get('MP_systems')
     )
     self.assertIsInstance(
         registry.get('MP_preposition_parser'),
         pyparsing.And
     )
     self.assertIsInstance(
         registry.get('MP_measurement_parser'),
         pyparsing.And
     )
     get_prepositions_mock.assert_called_once_with()
コード例 #3
0
    def test_bootstrapCarriesOutAsExpected(self):
        config = TestConfig()
        ProgrammingParser.bootstrap(config)
        self.bootstrapMock.assert_called_with(config)

        self.assertEqual(486, len(registry.get('PP_all_keywords')))
        self.assertEqual(11, len(registry.get('PP_language_keywords')))
コード例 #4
0
ファイル: __init__.py プロジェクト: hickeroar/cahoots
 def __init__(self, config):
     """
     :param config: cahoots config
     :type config: cahoots.config.BaseConfig
     """
     BaseParser.__init__(self, config, "Programming", 0)
     self.all_keywords = registry.get("PP_all_keywords")
     self.language_keywords = registry.get("PP_language_keywords")
コード例 #5
0
 def __init__(self, config):
     """
     :param config: cahoots config
     :type config: cahoots.config.BaseConfig
     """
     BaseParser.__init__(self, config, "Programming", 0)
     self.all_keywords = registry.get('PP_all_keywords')
     self.language_keywords = registry.get('PP_language_keywords')
コード例 #6
0
ファイル: date.py プロジェクト: SerenitySoftwareLLC/cahoots
    def parse(self, data_string):
        """
        parses for dates

        :param data_string: the string we want to parse
        :type data_string: str
        :return: yields parse result(s) if there are any
        :rtype: ParseResult
        """
        data_string = data_string.strip()

        if len(data_string) < 3 or len(data_string) > 50:
            return

        # Just date detection
        parsed_date = self.date_parse(data_string)
        if parsed_date:
            yield self.result(parsed_date[0], 100, parsed_date[1])
            return

        # Looking for <number> <timescale> <prepositions> <datetime>
        pre_timedelta_phrases = registry.get('DP_pre_timedelta_phrases')
        try:
            pre_delta = pre_timedelta_phrases.parseString(data_string)
        except ParseException:
            pass
        else:
            parsed_date = self.date_parse(pre_delta[1])
            if parsed_date:
                try:
                    yield self.result(
                        "Number Timescale Preposition Date",
                        100,
                        parsed_date[1] + pre_delta[0]
                    )
                except OverflowError:
                    pass
                return

        # Looking for <datetime> <plus/minus> <number> <timescale>
        post_timedelta_phrases = registry.get('DP_post_timedelta_phrases')
        post_deltas = \
            [t for t in post_timedelta_phrases.scanString(data_string)]
        if len(post_deltas) == 1:
            for token, start, _ in post_deltas:
                parsed_date = self.date_parse(data_string[0:start].strip())
                if parsed_date:
                    try:
                        yield self.result(
                            "Date Operator Number Timescale",
                            100,
                            parsed_date[1] + token.pop()
                        )
                    except OverflowError:
                        pass
                    return
コード例 #7
0
 def __init__(self, config):
     """
     :param config: cahoots config
     :type config: cahoots.config.BaseConfig
     """
     BaseParser.__init__(self, config, "Measurement", 100)
     self.units = registry.get('MP_units')
     self.systems = registry.get('MP_systems')
     self.preposition_parser = registry.get('MP_preposition_parser')
     self.measurement_parser = registry.get('MP_measurement_parser')
コード例 #8
0
 def __init__(self, config):
     """
     :param config: cahoots config
     :type config: cahoots.config.BaseConfig
     """
     BaseParser.__init__(self, config, "Measurement", 100)
     self.units = registry.get('MP_units')
     self.systems = registry.get('MP_systems')
     self.preposition_parser = registry.get('MP_preposition_parser')
     self.measurement_parser = registry.get('MP_measurement_parser')
コード例 #9
0
ファイル: date.py プロジェクト: SerenitySoftwareLLC/cahoots
 def test_bootstrap(self):
     self.assertTrue(registry.test('DP_pre_timedelta_phrases'))
     self.assertTrue(registry.test('DP_post_timedelta_phrases'))
     self.assertEqual(
         type(registry.get('DP_pre_timedelta_phrases')),
         pyparsing.And
     )
     self.assertEqual(
         type(registry.get('DP_post_timedelta_phrases')),
         pyparsing.Or
     )
コード例 #10
0
    def parse(self, data_string):
        """
        parses for dates

        :param data_string: the string we want to parse
        :type data_string: str
        :return: yields parse result(s) if there are any
        :rtype: ParseResult
        """
        data_string = data_string.strip()

        if len(data_string) < 3 or len(data_string) > 50:
            return

        # Just date detection
        parsed_date = self.date_parse(data_string)
        if parsed_date:
            yield self.result(parsed_date[0], 100, parsed_date[1])
            return

        # Looking for <number> <timescale> <prepositions> <datetime>
        pre_timedelta_phrases = registry.get('DP_pre_timedelta_phrases')
        try:
            pre_delta = pre_timedelta_phrases.parseString(data_string)
        except ParseException:
            pass
        else:
            parsed_date = self.date_parse(pre_delta[1])
            if parsed_date:
                try:
                    yield self.result("Number Timescale Preposition Date", 100,
                                      parsed_date[1] + pre_delta[0])
                except OverflowError:
                    pass
                return

        # Looking for <datetime> <plus/minus> <number> <timescale>
        post_timedelta_phrases = registry.get('DP_post_timedelta_phrases')
        post_deltas = \
            [t for t in post_timedelta_phrases.scanString(data_string)]
        if len(post_deltas) == 1:
            for token, start, _ in post_deltas:
                parsed_date = self.date_parse(data_string[0:start].strip())
                if parsed_date:
                    try:
                        yield self.result("Date Operator Number Timescale",
                                          100, parsed_date[1] + token.pop())
                    except OverflowError:
                        pass
                    return
コード例 #11
0
ファイル: coordinate.py プロジェクト: pombredanne/cahoots
    def parse(self, data):
        """
        parses data to determine if this is a location

        :param data_string: the string we want to parse
        :type data_string: str
        :return: yields parse result(s) if there are any
        :rtype: ParseResult
        """
        data = data.strip()

        test_parameters = self.get_coordinate_test_parameters()

        for reg_key, format_func, fmt, subtype, confidence in test_parameters:
            # checking each of our types of coordinates and breaking on find
            match = registry.get(reg_key).match(data)
            if match:
                # if a format_arg provided, we pass it into formatting func
                res = format_func(match, fmt) if fmt else format_func(match)

                # Prepping processed data with better metadata
                result, add_data = self.generate_result_data(res.to_string())
                yield self.result(subtype, confidence, result, add_data)

                # Only looking to match one format, so we break here
                break
コード例 #12
0
ファイル: coordinate.py プロジェクト: pombredanne/cahoots
    def parse(self, data):
        """
        parses data to determine if this is a location

        :param data_string: the string we want to parse
        :type data_string: str
        :return: yields parse result(s) if there are any
        :rtype: ParseResult
        """
        data = data.strip()

        test_parameters = self.get_coordinate_test_parameters()

        for reg_key, format_func, fmt, subtype, confidence in test_parameters:
            # checking each of our types of coordinates and breaking on find
            match = registry.get(reg_key).match(data)
            if match:
                # if a format_arg provided, we pass it into formatting func
                res = format_func(match, fmt) if fmt else format_func(match)

                # Prepping processed data with better metadata
                result, add_data = self.generate_result_data(res.to_string())
                yield self.result(subtype, confidence, result, add_data)

                # Only looking to match one format, so we break here
                break
コード例 #13
0
    def prepare_landmark_datastring(self, data):
        """
        Cleans up and validates the datastring

        :param data: data we want to check for being a location
        :type data: str
        :return: the cleaned up datastring
        :rtype: str
        """
        data = registry.get('LP_the_regex').sub('', data).strip()

        if len(data) > 75:
            return

        name_parser = NameParser(self.config)
        if not name_parser.basic_validation(data.split()):
            return

        allowed_chars = \
            string.whitespace + string.ascii_letters + string.digits
        allowed_chars += '.,-:'

        if [x for x in data if x not in allowed_chars]:
            return

        return data
コード例 #14
0
ファイル: name.py プロジェクト: pombredanne/cahoots
    def parse(self, data):
        """
        Determines if the data is a name or not

        :param data_string: the string we want to parse
        :type data_string: str
        :return: yields parse result(s) if there are any
        :rtype: ParseResult
        """

        # Making sure there are at least SOME uppercase letters in the phrase
        if not registry.get('NP_upper_alpha_regex').search(data):
            return

        data = data.split()

        # If someone has a name longer than 7 words...they need
        # help. Making sure each word in the phrase starts with an
        # uppercase letter or a number
        if len(data) >= 7 or not self.basic_validation(data):
            return

        self.detect_prefix_or_suffix(data)

        self.calculate_confidence(data)

        if self.confidence <= 0:
            return

        yield self.result("Name", min(100, self.confidence))
コード例 #15
0
    def test_bootstrapSetsUpClassifierAsExpected(self):

        ProgrammingBayesianClassifier.bootstrap(TestConfig)

        self.assertEqual(
            ZipFileStub.called,
            [
                'init-trainers.zip-r',
                'namelist',
                'read-foo.def',
                'read-bar.def'
            ]
        )

        self.assertTrue(
            ismethod(SimpleBayesStub.Tokenizer) or
            isfunction(SimpleBayesStub.Tokenizer)
        )
        self.assertIsInstance(registry.get('PP_bayes'), SimpleBayesStub)

        self.assertEqual(
            SimpleBayesStub.Languages,
            {
                'foo': 'foo.def-text',
                'bar': 'bar.def-text'
            }
        )
コード例 #16
0
ファイル: name.py プロジェクト: pombredanne/cahoots
    def parse(self, data):
        """
        Determines if the data is a name or not

        :param data_string: the string we want to parse
        :type data_string: str
        :return: yields parse result(s) if there are any
        :rtype: ParseResult
        """

        # Making sure there are at least SOME uppercase letters in the phrase
        if not registry.get('NP_upper_alpha_regex').search(data):
            return

        data = data.split()

        # If someone has a name longer than 7 words...they need
        # help. Making sure each word in the phrase starts with an
        # uppercase letter or a number
        if len(data) >= 7 or not self.basic_validation(data):
            return

        self.detect_prefix_or_suffix(data)

        self.calculate_confidence(data)

        if self.confidence <= 0:
            return

        yield self.result("Name", min(100, self.confidence))
コード例 #17
0
ファイル: address.py プロジェクト: pombredanne/cahoots
    def parse(self, data):
        """
        parses for potential address

        :param data_string: the string we want to parse
        :type data_string: str
        :return: yields parse result(s) if there are any
        :rtype: ParseResult
        """
        data = data.strip()

        # If invalid length or there are no digits, we return.
        if len(data) > 100 or not [x for x in data if x.isdigit()]:
            return

        split_regex = registry.get('AP_split_regex')
        # splitting the data string and removing empty values
        data_set = [x for x in split_regex.split(data) if x]

        # At least 4 words and one of the words should start with a number
        if len(data_set) <= 3 or not [x for x in data_set if x[:1].isdigit()]:
            return

        results, token_count, data_set = \
            self.generate_result_data(data, data_set) \
            or (None, None, None)

        if token_count:
            # Subtracting a little confidence for each token that wasn't found
            self.confidence -= 5 * (len(data_set) - token_count)

            yield self.result(self.subtype, min(100, self.confidence), results)
コード例 #18
0
    def parse(self, data):
        """
        parses for potential address

        :param data: the string we want to parse
        :type data: str
        :return: yields parse result(s) if there are any
        :rtype: ParseResult
        """
        data = data.strip()

        # If invalid length or there are no digits, we return.
        if len(data) > 100 or not [x for x in data if x.isdigit()]:
            return

        split_regex = registry.get('AP_split_regex')
        # splitting the data string and removing empty values
        data_set = [x for x in split_regex.split(data) if x]

        # At least 4 words and one of the words should start with a number
        if len(data_set) <= 3 or not [x for x in data_set if x[:1].isdigit()]:
            return

        results, token_count, data_set = \
            self.generate_result_data(data, data_set) \
            or (None, None, None)

        if token_count:
            # Subtracting a little confidence for each token that wasn't found
            self.confidence -= 5*(len(data_set)-token_count)

            yield self.result(self.subtype, min(100, self.confidence), results)
コード例 #19
0
    def test_flush(self):

        registry.set('test', 'foo')

        self.assertEqual('foo', registry.get('test'))
        self.assertNotEqual(0, len(registry.storage))

        registry.flush()

        self.assertEqual(0, len(registry.storage))
コード例 #20
0
    def get_preposition_literals():
        """Generates the prepositions parser and returns it"""
        if registry.test('DP_prepositions'):
            return registry.get('DP_prepositions')

        prepositions = \
            Or([CaselessLiteral(s) for s in DataHandler().get_prepositions()])

        registry.set('DP_prepositions', prepositions)
        return prepositions
コード例 #21
0
    def test_flush(self):

        registry.set('test', 'foo')

        self.assertEqual('foo', registry.get('test'))
        self.assertNotEqual(0, len(registry.storage))

        registry.flush()

        self.assertEqual(0, len(registry.storage))
コード例 #22
0
    def classify(cls, data_string):
        """
        Takes an string and creates a dict of
        programming language match probabilities
        """
        classifier = registry.get('PP_bayes')

        scores = classifier.score(data_string)

        return scores
コード例 #23
0
ファイル: date.py プロジェクト: msabramo/cahoots
    def get_preposition_literals():
        """Generates the prepositions parser and returns it"""
        if registry.test('DP_prepositions'):
            return registry.get('DP_prepositions')

        prepositions = \
            Or([CaselessLiteral(s) for s in DataHandler().get_prepositions()])

        registry.set('DP_prepositions', prepositions)
        return prepositions
コード例 #24
0
 def test_bootStrap(self, get_prepositions_mock):
     get_prepositions_mock.return_value = ['of']
     MeasurementParser.bootstrap(TestConfig)
     self.assertEqual(
         {
             'acres': 'imperial_area',
             'yards': 'imperial_length',
             'yard': 'imperial_length',
             'acre': 'imperial_area'
         }, registry.get('MP_units'))
     self.assertEqual(
         {
             'imperial_length': ('Imperial', 'Length'),
             'imperial_area': ('Imperial', 'Area')
         }, registry.get('MP_systems'))
     self.assertIsInstance(registry.get('MP_preposition_parser'),
                           pyparsing.And)
     self.assertIsInstance(registry.get('MP_measurement_parser'),
                           pyparsing.And)
     get_prepositions_mock.assert_called_once_with()
コード例 #25
0
ファイル: data.py プロジェクト: msabramo/cahoots
    def get_prepositions(self):
        """returns the list of prepositions"""
        if registry.test('DATA_prepositions'):
            return registry.get('DATA_prepositions')

        handle = self.get_file_handle('prepositions.yaml')
        prepositions = yaml.load(handle)
        handle.close()

        registry.set('DATA_prepositions', prepositions)
        return prepositions
コード例 #26
0
    def test_basicLanguageHeuristicFindsExpectedKeywords(self):
        config = TestConfig()
        ProgrammingParser.bootstrap(config)
        self.bootstrapMock.assert_called_with(config)

        parser = ProgrammingParser(config)
        result = parser.basic_language_heuristic(
            registry.get('PP_language_keywords')['php'],
            ['for', 'if', 'foobar']
        )

        self.assertEqual(2, len(result))
コード例 #27
0
    def parse(self, data_string):
        """
        parses for email addresses

        :param data_string: the string we want to parse
        :type data_string: str
        :return: yields parse result(s) if there are any
        :rtype: ParseResult
        """
        if len(data_string) > 254 or '@' not in data_string:
            return

        if registry.get('EP_valid_regex').match(data_string):
            yield self.result("Email Address", self.confidence)
コード例 #28
0
ファイル: postalcode.py プロジェクト: msabramo/cahoots
    def parse(self, data):
        """parses data to determine if this is a location"""
        data = data.strip()

        if len(data) >= 20:
            return

        postal_regex = registry.get('ZCP_postal_code_regex')
        if postal_regex.match(data):
            results = self.get_postal_code_data(data)
            if results is not None:
                self.calculate_confidence(data, results)
                if self.confidence > 0:
                    yield self.result("Postal Code", self.confidence, results)
                    return
コード例 #29
0
    def classify(cls, data_string):
        """
        Takes an string and creates a dict of
        programming language match probabilities

        :param data_string: the string we want to classify
        :type data_string: str
        :return: bayesian probabilities
        :rtype: dict
        """
        classifier = registry.get('PP_bayes')

        scores = classifier.score(data_string)

        return scores
コード例 #30
0
ファイル: data.py プロジェクト: pombredanne/cahoots
    def get_prepositions(self):
        """
        returns the list of prepositions

        :return: list of prepositions
        :rtype: list
        """
        if registry.test('DATA_prepositions'):
            return registry.get('DATA_prepositions')

        handle = self.get_file_handle('prepositions.yaml')
        prepositions = yaml.load(handle)
        handle.close()

        registry.set('DATA_prepositions', prepositions)
        return prepositions
コード例 #31
0
ファイル: bayesian.py プロジェクト: pombredanne/cahoots
    def test_bootstrapSetsUpClassifierAsExpected(self):

        ProgrammingBayesianClassifier.bootstrap(TestConfig)

        self.assertEqual(ZipFileStub.called, [
            'init-trainers.zip-r', 'namelist', 'read-foo.def', 'read-bar.def'
        ])

        self.assertTrue(
            ismethod(SimpleBayesStub.Tokenizer)
            or isfunction(SimpleBayesStub.Tokenizer))
        self.assertIsInstance(registry.get('PP_bayes'), SimpleBayesStub)

        self.assertEqual(SimpleBayesStub.Languages, {
            'foo': 'foo.def-text',
            'bar': 'bar.def-text'
        })
コード例 #32
0
ファイル: landmark.py プロジェクト: msabramo/cahoots
    def prepare_landmark_datastring(cls, data):
        """Cleans up and validates the datastring"""
        data = registry.get('LP_the_regex').sub('', data).strip()

        if len(data) > 75:
            return

        if not NameParser.basic_validation(data.split()):
            return

        allowed_chars = \
            string.whitespace + string.ascii_letters + string.digits
        allowed_chars += '.,-:'

        if [x for x in data if x not in allowed_chars]:
            return

        return data
コード例 #33
0
    def prepare_landmark_datastring(self, data):
        """Cleans up and validates the datastring"""
        data = registry.get('LP_the_regex').sub('', data).strip()

        if len(data) > 75:
            return

        name_parser = NameParser(self.config)
        if not name_parser.basic_validation(data.split()):
            return

        allowed_chars = \
            string.whitespace + string.ascii_letters + string.digits
        allowed_chars += '.,-:'

        if [x for x in data if x not in allowed_chars]:
            return

        return data
コード例 #34
0
ファイル: postalcode.py プロジェクト: pombredanne/cahoots
 def test_postal_code_patterns_match(self):
     postal_regex = registry.get('ZCP_postal_code_regex')
     self.assertTrue(postal_regex.match('A999'))
     self.assertTrue(postal_regex.match('AB 12'))
     self.assertTrue(postal_regex.match('AD999'))
     self.assertTrue(postal_regex.match('999 99'))
     self.assertTrue(postal_regex.match('AA9999'))
     self.assertTrue(postal_regex.match('VC9999'))
     self.assertTrue(postal_regex.match('VG1199'))
     self.assertTrue(postal_regex.match('6799 W3'))
     self.assertTrue(postal_regex.match('9999 AA'))
     self.assertTrue(postal_regex.match('9999 AW'))
     self.assertTrue(postal_regex.match('9999 CW'))
     self.assertTrue(postal_regex.match('A9A 9A9'))
     self.assertTrue(postal_regex.match('AZ 9999'))
     self.assertTrue(postal_regex.match('BB99999'))
     self.assertTrue(postal_regex.match('GY9 9AA'))
     self.assertTrue(postal_regex.match('JE9 9AA'))
     self.assertTrue(postal_regex.match('JMAAA99'))
     self.assertTrue(postal_regex.match('LV-9999'))
     self.assertTrue(postal_regex.match('A9999AAA'))
     self.assertTrue(postal_regex.match('AA99 9AA'))
     self.assertTrue(postal_regex.match('AAA 9999'))
     self.assertTrue(postal_regex.match('AAAA 1ZZ'))
     self.assertTrue(postal_regex.match('FIQQ 1ZZ'))
     self.assertTrue(postal_regex.match('TKCA 1ZZ'))
     self.assertTrue(postal_regex.match('GX99 9AA'))
     self.assertTrue(postal_regex.match('IM99 9AA'))
     self.assertTrue(postal_regex.match('KY9-9999'))
     self.assertTrue(postal_regex.match('999'))
     self.assertTrue(postal_regex.match('9999'))
     self.assertTrue(postal_regex.match('99-99'))
     self.assertTrue(postal_regex.match('99-999'))
     self.assertTrue(postal_regex.match('999999'))
     self.assertTrue(postal_regex.match('999-999'))
     self.assertTrue(postal_regex.match('9999999'))
     self.assertTrue(postal_regex.match('999-9999'))
     self.assertTrue(postal_regex.match('9999-999'))
     self.assertTrue(postal_regex.match('99999-9999'))
     self.assertTrue(postal_regex.match('77515 CEDEX'))
     self.assertTrue(postal_regex.match('77515 CEDEX 9'))
     self.assertTrue(postal_regex.match('77515 CEDEX 99'))
コード例 #35
0
 def test_postal_code_patterns_match(self):
     postal_regex = registry.get('ZCP_postal_code_regex')
     self.assertTrue(postal_regex.match('A999'))
     self.assertTrue(postal_regex.match('AB 12'))
     self.assertTrue(postal_regex.match('AD999'))
     self.assertTrue(postal_regex.match('999 99'))
     self.assertTrue(postal_regex.match('AA9999'))
     self.assertTrue(postal_regex.match('VC9999'))
     self.assertTrue(postal_regex.match('VG1199'))
     self.assertTrue(postal_regex.match('6799 W3'))
     self.assertTrue(postal_regex.match('9999 AA'))
     self.assertTrue(postal_regex.match('9999 AW'))
     self.assertTrue(postal_regex.match('9999 CW'))
     self.assertTrue(postal_regex.match('A9A 9A9'))
     self.assertTrue(postal_regex.match('AZ 9999'))
     self.assertTrue(postal_regex.match('BB99999'))
     self.assertTrue(postal_regex.match('GY9 9AA'))
     self.assertTrue(postal_regex.match('JE9 9AA'))
     self.assertTrue(postal_regex.match('JMAAA99'))
     self.assertTrue(postal_regex.match('LV-9999'))
     self.assertTrue(postal_regex.match('A9999AAA'))
     self.assertTrue(postal_regex.match('AA99 9AA'))
     self.assertTrue(postal_regex.match('AAA 9999'))
     self.assertTrue(postal_regex.match('AAAA 1ZZ'))
     self.assertTrue(postal_regex.match('FIQQ 1ZZ'))
     self.assertTrue(postal_regex.match('TKCA 1ZZ'))
     self.assertTrue(postal_regex.match('GX99 9AA'))
     self.assertTrue(postal_regex.match('IM99 9AA'))
     self.assertTrue(postal_regex.match('KY9-9999'))
     self.assertTrue(postal_regex.match('999'))
     self.assertTrue(postal_regex.match('9999'))
     self.assertTrue(postal_regex.match('99-99'))
     self.assertTrue(postal_regex.match('99-999'))
     self.assertTrue(postal_regex.match('999999'))
     self.assertTrue(postal_regex.match('999-999'))
     self.assertTrue(postal_regex.match('9999999'))
     self.assertTrue(postal_regex.match('999-9999'))
     self.assertTrue(postal_regex.match('9999-999'))
     self.assertTrue(postal_regex.match('99999-9999'))
     self.assertTrue(postal_regex.match('77515 CEDEX'))
     self.assertTrue(postal_regex.match('77515 CEDEX 9'))
     self.assertTrue(postal_regex.match('77515 CEDEX 99'))
コード例 #36
0
ファイル: postalcode.py プロジェクト: pombredanne/cahoots
    def parse(self, data):
        """
        parses data to determine if this is a location

        :param data_string: the string we want to parse
        :type data_string: str
        :return: yields parse result(s) if there are any
        :rtype: ParseResult
        """
        data = data.strip()

        if len(data) >= 20:
            return

        postal_regex = registry.get('ZCP_postal_code_regex')
        if postal_regex.match(data):
            results = self.get_postal_code_data(data)
            if results is not None:
                self.calculate_confidence(data, results)
                if self.confidence > 0:
                    yield self.result("Postal Code", self.confidence, results)
                    return
コード例 #37
0
    def parse(self, data):
        """
        parses data to determine if this is a location

        :param data: the string we want to parse
        :type data: str
        :return: yields parse result(s) if there are any
        :rtype: ParseResult
        """
        data = data.strip()

        if len(data) >= 20:
            return

        postal_regex = registry.get('ZCP_postal_code_regex')
        if postal_regex.match(data):
            results = self.get_postal_code_data(data)
            if results is not None:
                self.calculate_confidence(data, results)
                if self.confidence > 0:
                    yield self.result("Postal Code", self.confidence, results)
                    return
コード例 #38
0
    def test_get(self):

        registry.set('test', 'foo')

        self.assertEqual('foo', registry.get('test'))
        self.assertIsNone(registry.get('bar'))
コード例 #39
0
ファイル: __init__.py プロジェクト: msabramo/cahoots
 def __init__(self, config):
     BaseParser.__init__(self, config, "Measurement", 100)
     self.units = registry.get('MP_units')
     self.systems = registry.get('MP_systems')
     self.preposition_parser = registry.get('MP_preposition_parser')
     self.measurement_parser = registry.get('MP_measurement_parser')
コード例 #40
0
ファイル: __init__.py プロジェクト: msabramo/cahoots
 def __init__(self, config):
     BaseParser.__init__(self, config, "Programming", 0)
     self.all_keywords = registry.get('PP_all_keywords')
     self.language_keywords = registry.get('PP_language_keywords')
コード例 #41
0
    def test_get(self):

        registry.set('test', 'foo')

        self.assertEqual('foo', registry.get('test'))
        self.assertIsNone(registry.get('bar'))