def __init__(self, **kwargs):
     keywords  = '((^)|([^a-zA-Z]))((pal)|(t0)|(palavik(uga)?)|(t((emp)(eratuur)?)?))'
     digits    = '[1-9][0-9]([ ,.]*[0-9]{1,2})?(?![0-9])'
     guard     = '(?!\s*(mg)|(x)|(cm)|(mm)|(g)|(kg))'
     temp      = '(?P<temperature>' + digits + ')'
     temp_low  = '(?P<temperature_low>' + digits + ')'
     temp_high = '(?P<temperature_high>' + digits + ')'
     sep       = '[ .*-/](\D{0,35}?)'
     self.patterns = [
         re.compile(keywords + temp + guard, re.UNICODE),
         re.compile(keywords + sep + temp + guard, re.UNICODE),
         re.compile(keywords + temp_low + '\s*-\s*' + temp_high + guard, re.UNICODE),
         re.compile(keywords + sep + temp_low + '\s*-\s*' + temp_high + guard, re.UNICODE)
         ]
 def __init__(self, **kwargs):
     dig = '[0-9]+\s*([,.]\s*[0-9]+)?'
     sep = '\s*[.x,/-]?\s*'
     times = u'(näd|kuu|päe|aast)\S*'
     
     patterns = []
     patterns.append(u'(?P<value>' + dig + ')' + sep + '(?P<expression>' + times + u')')
     self.patterns = [re.compile(p, re.UNICODE) for p in patterns]
 def __init__(self, **kwargs):
     dig = '[0-9]+\s*[,.]?\s*[0-9]+'
     sep = '\s*[.,/-]?\s*'
     patterns = []
     patterns.append('(sp|kasv|pikk(us)?)' + sep + '(?P<height>' + dig + ')' + sep + '(cm|m)?')
     patterns.append(u'(pü|pea(ü)?.{0,9})' + sep + '(?P<head_diameter>' + dig + ')' + sep + '(cm)?')
     patterns.append('(sk|kaal)' + sep + '(?P<weight>' + dig + ')' + sep + '(k?g)?')
     
     self.patterns = [re.compile(p) for p in patterns]
 def __init__(self, **kwargs):
     dig = '[0-9]{2,4}'
     month = u'jaanuar|veebruar|märts|aprill|mai|juuni|juuli|august|september|oktoober|november|detsember'
     month += u'|jaan|veeb|mär|apr|juun|juul|aug|sep|okt|nov|det'
     sep = '(\s*[./-]\s*| )'
     self.patterns = []
     self.patterns.append('(?P<day>' + dig +')' + sep + '(?P<month>' + month + '|' + dig + ').{0,2}?' + sep + '(?P<year>' + dig +')(?!' + sep + dig + sep + ')')
     self.patterns.append('(?P<day>' + dig +')' + sep + '(?P<month>' + month + '|' + dig + ').{0,2}?')
     self.patterns = [re.compile(p) for p in self.patterns]
 def __init__(self, **kwargs):
     dig       = '[0-9]+([ .,]*[0-9]*)?'
     units     = '(?P<unit>(mg)|(g)|(tbl)|(d)|(ugx))'
     medicine  = '(?P<medicine>\\b[a-zA-Z]{3,50}\\b)[ .-]*((ravi|ret).{0,3}?)?'
     amount    = '(?P<amount>' + dig + ')\s*'
     frequency = '\s*[x*]\s*(?P<frequency>\d+)'
     n         = '\s*n\s*[.*]?\s*(?P<n>\d+)'
     
     self.patterns = [
         re.compile(medicine + amount + units, re.UNICODE),
         re.compile(medicine + amount + units + frequency, re.UNICODE),
         re.compile(medicine + amount + units + n, re.UNICODE),
         re.compile(medicine + amount + units + frequency + n, re.UNICODE),
         re.compile(medicine + amount + frequency, re.UNICODE),
         re.compile(medicine + amount + n, re.UNICODE)
         ]
    def __init__(self, **kwargs):
        '''Initialize a new BloodPressure extractor.'''
        # define common regular expressions
        space = '\s*'
        dig = '[0-9]{2,3}'
        sep = space + '[/&-]' + space
        # define regular expressions for extracting blood pressure
        patterns = []
        
        # pattern, where low and high systolic / diastolic values are given
        # as two measurements
        patterns.append((
            '(?P<systolic_low>{0})\s*/\s*(?P<diastolic_low>{0})\s*/\s*'
            '(?P<systolic_high>{0})\s*/\s*(?P<diastolic_high>{0})').format(dig))
        # pattern, where blood pressure ranges are given with '-' character
        # and separated with /
        patterns.append((
            '(?P<systolic_low>{0})\s*-\s*(?P<systolic_high>{0}){1}'
            '(?P<diastolic_low>{0}){1}(?P<diastolic_high>{0})').format(dig, sep))
        # pattern, where blood pressure ranges are given with '/' character
        # and separated with -
        patterns.append((
            '(?P<systolic_low>{0})\s*/\s*(?P<diastolic_low>{0})\s*-\s*'
            '(?P<systolic_high>{0})\s*/\s*(?P<diastolic_high>{0})').format(dig))
            
        # pattern for blood pressures, where systolic is given as range
        patterns.append((
            '(?P<systolic_low>{0})\s*-\s*(?P<systolic_high>{0}){1}'
            '(?P<diastolic>{0})').format(dig, sep))

        # generic pattern for matching blood pressures
        patterns.append(
            '(?P<systolic>{0}){1}(?P<diastolic>{0})'.format(dig, sep))

        # more specific patterns, but allows more separators
        patterns.append(
            '[rR][rR]\D{0,3}?' + 
            '(?P<systolic>{0})\s*.\s*(?P<diastolic>{0})'.format(dig))
        # only for systolic
        patterns.append(
            '[rR][rR]\D{0,3}?' + 
            '(?P<systolic>{0})'.format(dig))
        
        # regular expression for extracting pulse
        pulse_single = '(?P<pulse>' + dig + ')(x(min)?)?'
        pulse_range  = '(?P<pulse_low>' + dig + ')' + sep + '(?P<pulse_high>' + dig + ')(x(min)?)?'
        pulse_prefix = u'((^)|([^a-zA-Z]))(ps|pulss|fr|p(?![üa-zA-Z])).{0,5}?'
        pulse_suffix = '(?!\s*(cm|mg|kg|l|ml|g))'
        pulse_dist   = '(.{0,35}?)'
        pulses = []
        pulses.append('(' + pulse_prefix + pulse_single + pulse_suffix + pulse_dist + ')')
        pulses.append('(' + pulse_prefix + pulse_range + pulse_suffix + pulse_dist + ')')
        pulses.append('(' + pulse_dist + pulse_prefix + pulse_single + pulse_suffix + ')')
        pulses.append('(' + pulse_dist + pulse_prefix + pulse_range + pulse_suffix + ')')
        
        # create regular expressions for matching optional pulse
        self.patterns = []
        for p in patterns:
            for pulse in pulses:
                self.patterns.append(re.compile(pulse + p, re.UNICODE))
                self.patterns.append(re.compile(p + pulse, re.UNICODE))
            self.patterns.append(re.compile(p, re.UNICODE))
        # patterns for only pulse and pulse ranges
        self.patterns.append(re.compile(pulse_prefix + pulse_single + pulse_suffix, re.UNICODE))
        self.patterns.append(re.compile(pulse_prefix + pulse_range + pulse_suffix, re.UNICODE))