Example #1
0
 def init_strings(self):
     DateParser.init_strings(self)
     self._span = re.compile(
         u"(fra)?\s*(?P<start>.+)\s*(til|--|–)\s*(?P<stop>.+)",
         re.IGNORECASE)
     self._range = re.compile(
         u"(mellem)\s+(?P<start>.+)\s+og\s+(?P<stop>.+)", re.IGNORECASE)
Example #2
0
 def init_strings(self):
     """ Define, in Swedish, span and range regular expressions"""
     DateParser.init_strings(self)
     self._span = re.compile(
         u"(från)?\s*(?P<start>.+)\s*(till|--|–)\s*(?P<stop>.+)",
         re.IGNORECASE)
     self._range = re.compile(
         u"(mellan)\s+(?P<start>.+)\s+och\s+(?P<stop>.+)", re.IGNORECASE)
Example #3
0
 def init_strings(self):
     DateParser.init_strings(self)
     # date, whitespace
     self._span = re.compile(u"(?P<start>.+)\s+(-)\s+(?P<stop>.+)",
                             re.IGNORECASE)
     self._range = re.compile(
         u"(vuosien\s*)?(?P<start>.+)\s+ja\s+(?P<stop>.+)\s+välillä",
         re.IGNORECASE)
Example #4
0
    def init_strings(self):
        """
        This method compiles regular expression strings for matching dates.
        
        Most of the re's in most languages can stay as is. span and range
        most likely will need to change. Whatever change is done, this method
        may be called first as DateParser.init_strings(self) so that the
        invariant expresions don't need to be repeteadly coded. All differences
        can be coded after DateParser.init_strings(self) call, that way they
        override stuff from this method. See DateParserRU() as an example.
        """
        DateParser.init_strings(self)

        # This self._numeric is different from the base
        # avoid bug gregorian / french calendar conversion (+/-10 days)

        self._numeric = re.compile(
            "((\d+)[/\. ])?\s*((\d+)[/\.])?\s*(\d+)\s*$")
        self._span = re.compile(u"(de)\s+(?P<start>.+)\s+(à)\s+(?P<stop>.+)",
                                re.IGNORECASE)
        self._range = re.compile(
            u"(entre|ent\.|ent)\s+(?P<start>.+)\s+(et)\s+(?P<stop>.+)",
            re.IGNORECASE)

        # This self._text are different from the base
        # by adding ".?" after the first date and removing "\s*$" at the end

        #gregorian and julian

        self._text2 = re.compile(
            '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._mon_str, re.IGNORECASE)

        #hebrew

        self._jtext2 = re.compile(
            '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._jmon_str, re.IGNORECASE)

        #french

        self._ftext2 = re.compile(
            '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._fmon_str, re.IGNORECASE)

        #persian

        self._ptext2 = re.compile(
            '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._pmon_str, re.IGNORECASE)

        #islamic

        self._itext2 = re.compile(
            '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._imon_str, re.IGNORECASE)

        #swedish

        self._stext2 = re.compile(
            '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._smon_str, re.IGNORECASE)
Example #5
0
 def init_strings(self):
     DateParser.init_strings(self)
     self._span = re.compile("(van)\s+(?P<start>.+)\s+(tot)\s+(?P<stop>.+)",
                             re.IGNORECASE)
     self._range = re.compile("tussen\s+(?P<start>.+)\s+en\s+(?P<stop>.+)",
                              re.IGNORECASE)
     self._text2 = re.compile(
         '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._mon_str, re.IGNORECASE)
     self._jtext2 = re.compile(
         '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._jmon_str, re.IGNORECASE)
Example #6
0
 def init_strings(self):
     DateParser.init_strings(self)
     self._span = re.compile(
         "(von|vom)\s+(?P<start>.+)\s+(bis)\s+(?P<stop>.+)", re.IGNORECASE)
     self._range = re.compile(
         "zwischen\s+(?P<start>.+)\s+und\s+(?P<stop>.+)", re.IGNORECASE)
     self._text2 = re.compile(
         '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._mon_str, re.IGNORECASE)
     self._jtext2 = re.compile(
         '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._jmon_str, re.IGNORECASE)
Example #7
0
 def init_strings(self):
     DateParser.init_strings(self)
     _span_1 = [u'от']
     _span_2 = [u'до']
     _range_1 = [u'между']
     _range_2 = [u'и']
     self._span = re.compile(
         "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
         ('|'.join(_span_1), '|'.join(_span_2)), re.IGNORECASE)
     self._range = re.compile(
         "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
         ('|'.join(_range_1), '|'.join(_range_2)), re.IGNORECASE)
Example #8
0
 def init_strings(self):
     DateParser.init_strings(self)
     _span_1 = [u'dal', u'da']
     _span_2 = [u'al', u'a']
     _range_1 = [u'tra', u'fra']
     _range_2 = [u'e']
     self._span = re.compile(
         "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
         ('|'.join(_span_1), '|'.join(_span_2)), re.IGNORECASE)
     self._range = re.compile(
         "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
         ('|'.join(_range_1), '|'.join(_range_2)), re.IGNORECASE)
Example #9
0
 def init_strings(self):
     DateParser.init_strings(self)
     _span_1 = [u'nuo']
     _span_2 = [u'iki']
     _range_1 = [u'tarp']
     _range_2 = [u'ir']
     self._span = re.compile(
         "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
         ('|'.join(_span_1), '|'.join(_span_2)), re.IGNORECASE)
     self._range = re.compile(
         "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
         ('|'.join(_range_1), '|'.join(_range_2)), re.IGNORECASE)
Example #10
0
 def init_strings(self):
     DateParser.init_strings(self)
     self._span = re.compile("(od)\s+(?P<start>.+)\s+(do)\s+(?P<stop>.+)",
                             re.IGNORECASE)
     # Also handle a common mistakes
     self._range = re.compile(
         u"((?:po)?mi(?:ę|e)dzy)\s+(?P<start>.+)\s+(a)\s+(?P<stop>.+)",
         re.IGNORECASE)
     self._text2 = re.compile(
         '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._mon_str, re.IGNORECASE)
     self._jtext2 = re.compile(
         '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._jmon_str, re.IGNORECASE)
Example #11
0
 def init_strings(self):
     DateParser.init_strings(self)
     _span_1 = [u'des de']
     _span_2 = [u'fins a']
     _range_1 = [u'entre', u'ent\.', u'ent']
     _range_2 = [u'i']
     self._span = re.compile(
         "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
         ('|'.join(_span_1), '|'.join(_span_2)), re.IGNORECASE)
     self._range = re.compile(
         "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
         ('|'.join(_range_1), '|'.join(_range_2)), re.IGNORECASE)
Example #12
0
 def init_strings(self):
     """
     This method compiles regular expression strings for matching dates.
     """
     DateParser.init_strings(self)
     _span_1 = [u'من']
     _span_2 = [u'إلى']
     _range_1 = [u'بين']
     _range_2 = [u'و']
     self._span = re.compile(
         "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
         ('|'.join(_span_1), '|'.join(_span_2)), re.IGNORECASE)
     self._range = re.compile(
         "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
         ('|'.join(_range_1), '|'.join(_range_2)), re.IGNORECASE)
Example #13
0
 def init_strings(self):
     """
     compiles regular expression strings for matching dates
     """
     DateParser.init_strings(self)
     # match 'Day. MONTH year.' format with or without dots
     self._text2 = re.compile('(\d+)?\.?\s*?%s\.?\s*((\d+)(/\d+)?)?\s*\.?$'
                             % self._mon_str, re.IGNORECASE)
     # match Day.Month.Year.
     self._numeric  = re.compile("((\d+)[/\.-])?\s*((\d+)[/\.-])?\s*(\d+)\.?$")
    
     self._span  = re.compile("od\s+(?P<start>.+)\s+do\s+(?P<stop>.+)", 
                             re.IGNORECASE)
     self._range = re.compile(
                         u"med\s+(?P<start>.+)\s+in\s+(?P<stop>.+)", 
                         re.IGNORECASE)
     self._jtext2 = re.compile('(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?'\
                             % self._jmon_str, re.IGNORECASE)
Example #14
0
    def init_strings(self):
        """
        This method compiles regular expression strings for matching dates.

        See DateParser.init_strings()
        """
        DateParser.init_strings(self)

        _span_1 = [u'з', u'від']
        # b.c.e. pattern also have "до" so skip "до н."
        _span_2 = [u'по', u'до?!\sн\.']
        _range_1 = [u'між']
        _range_2 = [u'і', u'та']
        self._span = re.compile(
            "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
            ('|'.join(_span_1), '|'.join(_span_2)), re.IGNORECASE)
        self._range = re.compile(
            "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
            ('|'.join(_range_1), '|'.join(_range_2)), re.IGNORECASE)
Example #15
0
    def init_strings(self):
        """
        compiles regular expression strings for matching dates
        """
        DateParser.init_strings(self)
        # match 'Day. MONTH year.' format with or without dots
        self._text2 = re.compile(
            '(\d+)?\.?\s*?%s\s*((\d+)(/\d+)?)?\.?\s*$' % self._mon_str,
            re.IGNORECASE)

        # match Day.Month.Year.
        self._numeric = re.compile(
            "((\d+)[/\. ])?\s*((\d+)[/\.])?\s*(\d+)\.?$")

        _span_1 = ['od', 'од']
        _span_2 = ['do', 'до']
        _range_1 = ['između', 'између']
        _range_2 = ['i', 'и']
        self._span = re.compile(
            "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
            ('|'.join(_span_1), '|'.join(_span_2)), re.IGNORECASE)
        self._range = re.compile(
            "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" %
            ('|'.join(_range_1), '|'.join(_range_2)), re.IGNORECASE)
Example #16
0
 def init_strings(self):
     DateParser.init_strings(self)
     self._span = re.compile(u"(od)\s+(?P<start>.+)\s+(do)\s+(?P<stop>.+)",
                             re.IGNORECASE)
     self._range = re.compile(
         u"(mezi)\s+(?P<start>.+)\s+(a)\s+(?P<stop>.+)", re.IGNORECASE)