def init_strings(self): DateParser.init_strings(self) self._span = re.compile( u"(fra)?\s*(?P<start>.+)\s*(til|--|–)\s*(?P<stop>.+)", re.IGNORECASE) self._range = re.compile( u"(mellem)\s+(?P<start>.+)\s+og\s+(?P<stop>.+)", re.IGNORECASE)
def init_strings(self): """ Define, in Swedish, span and range regular expressions""" DateParser.init_strings(self) self._span = re.compile( u"(från)?\s*(?P<start>.+)\s*(till|--|–)\s*(?P<stop>.+)", re.IGNORECASE) self._range = re.compile( u"(mellan)\s+(?P<start>.+)\s+och\s+(?P<stop>.+)", re.IGNORECASE)
def init_strings(self): DateParser.init_strings(self) # date, whitespace self._span = re.compile(u"(?P<start>.+)\s+(-)\s+(?P<stop>.+)", re.IGNORECASE) self._range = re.compile( u"(vuosien\s*)?(?P<start>.+)\s+ja\s+(?P<stop>.+)\s+välillä", re.IGNORECASE)
def init_strings(self): """ This method compiles regular expression strings for matching dates. Most of the re's in most languages can stay as is. span and range most likely will need to change. Whatever change is done, this method may be called first as DateParser.init_strings(self) so that the invariant expresions don't need to be repeteadly coded. All differences can be coded after DateParser.init_strings(self) call, that way they override stuff from this method. See DateParserRU() as an example. """ DateParser.init_strings(self) # This self._numeric is different from the base # avoid bug gregorian / french calendar conversion (+/-10 days) self._numeric = re.compile( "((\d+)[/\. ])?\s*((\d+)[/\.])?\s*(\d+)\s*$") self._span = re.compile(u"(de)\s+(?P<start>.+)\s+(à)\s+(?P<stop>.+)", re.IGNORECASE) self._range = re.compile( u"(entre|ent\.|ent)\s+(?P<start>.+)\s+(et)\s+(?P<stop>.+)", re.IGNORECASE) # This self._text are different from the base # by adding ".?" after the first date and removing "\s*$" at the end #gregorian and julian self._text2 = re.compile( '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._mon_str, re.IGNORECASE) #hebrew self._jtext2 = re.compile( '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._jmon_str, re.IGNORECASE) #french self._ftext2 = re.compile( '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._fmon_str, re.IGNORECASE) #persian self._ptext2 = re.compile( '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._pmon_str, re.IGNORECASE) #islamic self._itext2 = re.compile( '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._imon_str, re.IGNORECASE) #swedish self._stext2 = re.compile( '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._smon_str, re.IGNORECASE)
def init_strings(self): DateParser.init_strings(self) self._span = re.compile("(van)\s+(?P<start>.+)\s+(tot)\s+(?P<stop>.+)", re.IGNORECASE) self._range = re.compile("tussen\s+(?P<start>.+)\s+en\s+(?P<stop>.+)", re.IGNORECASE) self._text2 = re.compile( '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._mon_str, re.IGNORECASE) self._jtext2 = re.compile( '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._jmon_str, re.IGNORECASE)
def init_strings(self): DateParser.init_strings(self) self._span = re.compile( "(von|vom)\s+(?P<start>.+)\s+(bis)\s+(?P<stop>.+)", re.IGNORECASE) self._range = re.compile( "zwischen\s+(?P<start>.+)\s+und\s+(?P<stop>.+)", re.IGNORECASE) self._text2 = re.compile( '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._mon_str, re.IGNORECASE) self._jtext2 = re.compile( '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._jmon_str, re.IGNORECASE)
def init_strings(self): DateParser.init_strings(self) _span_1 = [u'от'] _span_2 = [u'до'] _range_1 = [u'между'] _range_2 = [u'и'] self._span = re.compile( "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" % ('|'.join(_span_1), '|'.join(_span_2)), re.IGNORECASE) self._range = re.compile( "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" % ('|'.join(_range_1), '|'.join(_range_2)), re.IGNORECASE)
def init_strings(self): DateParser.init_strings(self) _span_1 = [u'dal', u'da'] _span_2 = [u'al', u'a'] _range_1 = [u'tra', u'fra'] _range_2 = [u'e'] self._span = re.compile( "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" % ('|'.join(_span_1), '|'.join(_span_2)), re.IGNORECASE) self._range = re.compile( "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" % ('|'.join(_range_1), '|'.join(_range_2)), re.IGNORECASE)
def init_strings(self): DateParser.init_strings(self) _span_1 = [u'nuo'] _span_2 = [u'iki'] _range_1 = [u'tarp'] _range_2 = [u'ir'] self._span = re.compile( "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" % ('|'.join(_span_1), '|'.join(_span_2)), re.IGNORECASE) self._range = re.compile( "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" % ('|'.join(_range_1), '|'.join(_range_2)), re.IGNORECASE)
def init_strings(self): DateParser.init_strings(self) self._span = re.compile("(od)\s+(?P<start>.+)\s+(do)\s+(?P<stop>.+)", re.IGNORECASE) # Also handle a common mistakes self._range = re.compile( u"((?:po)?mi(?:ę|e)dzy)\s+(?P<start>.+)\s+(a)\s+(?P<stop>.+)", re.IGNORECASE) self._text2 = re.compile( '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._mon_str, re.IGNORECASE) self._jtext2 = re.compile( '(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?' % self._jmon_str, re.IGNORECASE)
def init_strings(self): DateParser.init_strings(self) _span_1 = [u'des de'] _span_2 = [u'fins a'] _range_1 = [u'entre', u'ent\.', u'ent'] _range_2 = [u'i'] self._span = re.compile( "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" % ('|'.join(_span_1), '|'.join(_span_2)), re.IGNORECASE) self._range = re.compile( "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" % ('|'.join(_range_1), '|'.join(_range_2)), re.IGNORECASE)
def init_strings(self): """ This method compiles regular expression strings for matching dates. """ DateParser.init_strings(self) _span_1 = [u'من'] _span_2 = [u'إلى'] _range_1 = [u'بين'] _range_2 = [u'و'] self._span = re.compile( "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" % ('|'.join(_span_1), '|'.join(_span_2)), re.IGNORECASE) self._range = re.compile( "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" % ('|'.join(_range_1), '|'.join(_range_2)), re.IGNORECASE)
def init_strings(self): """ compiles regular expression strings for matching dates """ DateParser.init_strings(self) # match 'Day. MONTH year.' format with or without dots self._text2 = re.compile('(\d+)?\.?\s*?%s\.?\s*((\d+)(/\d+)?)?\s*\.?$' % self._mon_str, re.IGNORECASE) # match Day.Month.Year. self._numeric = re.compile("((\d+)[/\.-])?\s*((\d+)[/\.-])?\s*(\d+)\.?$") self._span = re.compile("od\s+(?P<start>.+)\s+do\s+(?P<stop>.+)", re.IGNORECASE) self._range = re.compile( u"med\s+(?P<start>.+)\s+in\s+(?P<stop>.+)", re.IGNORECASE) self._jtext2 = re.compile('(\d+)?.?\s+?%s\s*((\d+)(/\d+)?)?'\ % self._jmon_str, re.IGNORECASE)
def init_strings(self): """ This method compiles regular expression strings for matching dates. See DateParser.init_strings() """ DateParser.init_strings(self) _span_1 = [u'з', u'від'] # b.c.e. pattern also have "до" so skip "до н." _span_2 = [u'по', u'до?!\sн\.'] _range_1 = [u'між'] _range_2 = [u'і', u'та'] self._span = re.compile( "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" % ('|'.join(_span_1), '|'.join(_span_2)), re.IGNORECASE) self._range = re.compile( "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" % ('|'.join(_range_1), '|'.join(_range_2)), re.IGNORECASE)
def init_strings(self): """ compiles regular expression strings for matching dates """ DateParser.init_strings(self) # match 'Day. MONTH year.' format with or without dots self._text2 = re.compile( '(\d+)?\.?\s*?%s\s*((\d+)(/\d+)?)?\.?\s*$' % self._mon_str, re.IGNORECASE) # match Day.Month.Year. self._numeric = re.compile( "((\d+)[/\. ])?\s*((\d+)[/\.])?\s*(\d+)\.?$") _span_1 = ['od', 'од'] _span_2 = ['do', 'до'] _range_1 = ['između', 'између'] _range_2 = ['i', 'и'] self._span = re.compile( "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" % ('|'.join(_span_1), '|'.join(_span_2)), re.IGNORECASE) self._range = re.compile( "(%s)\s+(?P<start>.+)\s+(%s)\s+(?P<stop>.+)" % ('|'.join(_range_1), '|'.join(_range_2)), re.IGNORECASE)
def init_strings(self): DateParser.init_strings(self) self._span = re.compile(u"(od)\s+(?P<start>.+)\s+(do)\s+(?P<stop>.+)", re.IGNORECASE) self._range = re.compile( u"(mezi)\s+(?P<start>.+)\s+(a)\s+(?P<stop>.+)", re.IGNORECASE)