def __init__(self, **kwargs): keywords = '((^)|([^a-zA-Z]))((pal)|(t0)|(palavik(uga)?)|(t((emp)(eratuur)?)?))' digits = '[1-9][0-9]([ ,.]*[0-9]{1,2})?(?![0-9])' guard = '(?!\s*(mg)|(x)|(cm)|(mm)|(g)|(kg))' temp = '(?P<temperature>' + digits + ')' temp_low = '(?P<temperature_low>' + digits + ')' temp_high = '(?P<temperature_high>' + digits + ')' sep = '[ .*-/](\D{0,35}?)' self.patterns = [ re.compile(keywords + temp + guard, re.UNICODE), re.compile(keywords + sep + temp + guard, re.UNICODE), re.compile(keywords + temp_low + '\s*-\s*' + temp_high + guard, re.UNICODE), re.compile(keywords + sep + temp_low + '\s*-\s*' + temp_high + guard, re.UNICODE) ]
def __init__(self, **kwargs): dig = '[0-9]+\s*([,.]\s*[0-9]+)?' sep = '\s*[.x,/-]?\s*' times = u'(näd|kuu|päe|aast)\S*' patterns = [] patterns.append(u'(?P<value>' + dig + ')' + sep + '(?P<expression>' + times + u')') self.patterns = [re.compile(p, re.UNICODE) for p in patterns]
def __init__(self, **kwargs): dig = '[0-9]+\s*[,.]?\s*[0-9]+' sep = '\s*[.,/-]?\s*' patterns = [] patterns.append('(sp|kasv|pikk(us)?)' + sep + '(?P<height>' + dig + ')' + sep + '(cm|m)?') patterns.append(u'(pü|pea(ü)?.{0,9})' + sep + '(?P<head_diameter>' + dig + ')' + sep + '(cm)?') patterns.append('(sk|kaal)' + sep + '(?P<weight>' + dig + ')' + sep + '(k?g)?') self.patterns = [re.compile(p) for p in patterns]
def __init__(self, **kwargs): dig = '[0-9]{2,4}' month = u'jaanuar|veebruar|märts|aprill|mai|juuni|juuli|august|september|oktoober|november|detsember' month += u'|jaan|veeb|mär|apr|juun|juul|aug|sep|okt|nov|det' sep = '(\s*[./-]\s*| )' self.patterns = [] self.patterns.append('(?P<day>' + dig +')' + sep + '(?P<month>' + month + '|' + dig + ').{0,2}?' + sep + '(?P<year>' + dig +')(?!' + sep + dig + sep + ')') self.patterns.append('(?P<day>' + dig +')' + sep + '(?P<month>' + month + '|' + dig + ').{0,2}?') self.patterns = [re.compile(p) for p in self.patterns]
def __init__(self, **kwargs): dig = '[0-9]+([ .,]*[0-9]*)?' units = '(?P<unit>(mg)|(g)|(tbl)|(d)|(ugx))' medicine = '(?P<medicine>\\b[a-zA-Z]{3,50}\\b)[ .-]*((ravi|ret).{0,3}?)?' amount = '(?P<amount>' + dig + ')\s*' frequency = '\s*[x*]\s*(?P<frequency>\d+)' n = '\s*n\s*[.*]?\s*(?P<n>\d+)' self.patterns = [ re.compile(medicine + amount + units, re.UNICODE), re.compile(medicine + amount + units + frequency, re.UNICODE), re.compile(medicine + amount + units + n, re.UNICODE), re.compile(medicine + amount + units + frequency + n, re.UNICODE), re.compile(medicine + amount + frequency, re.UNICODE), re.compile(medicine + amount + n, re.UNICODE) ]
def __init__(self, **kwargs): '''Initialize a new BloodPressure extractor.''' # define common regular expressions space = '\s*' dig = '[0-9]{2,3}' sep = space + '[/&-]' + space # define regular expressions for extracting blood pressure patterns = [] # pattern, where low and high systolic / diastolic values are given # as two measurements patterns.append(( '(?P<systolic_low>{0})\s*/\s*(?P<diastolic_low>{0})\s*/\s*' '(?P<systolic_high>{0})\s*/\s*(?P<diastolic_high>{0})').format(dig)) # pattern, where blood pressure ranges are given with '-' character # and separated with / patterns.append(( '(?P<systolic_low>{0})\s*-\s*(?P<systolic_high>{0}){1}' '(?P<diastolic_low>{0}){1}(?P<diastolic_high>{0})').format(dig, sep)) # pattern, where blood pressure ranges are given with '/' character # and separated with - patterns.append(( '(?P<systolic_low>{0})\s*/\s*(?P<diastolic_low>{0})\s*-\s*' '(?P<systolic_high>{0})\s*/\s*(?P<diastolic_high>{0})').format(dig)) # pattern for blood pressures, where systolic is given as range patterns.append(( '(?P<systolic_low>{0})\s*-\s*(?P<systolic_high>{0}){1}' '(?P<diastolic>{0})').format(dig, sep)) # generic pattern for matching blood pressures patterns.append( '(?P<systolic>{0}){1}(?P<diastolic>{0})'.format(dig, sep)) # more specific patterns, but allows more separators patterns.append( '[rR][rR]\D{0,3}?' + '(?P<systolic>{0})\s*.\s*(?P<diastolic>{0})'.format(dig)) # only for systolic patterns.append( '[rR][rR]\D{0,3}?' + '(?P<systolic>{0})'.format(dig)) # regular expression for extracting pulse pulse_single = '(?P<pulse>' + dig + ')(x(min)?)?' pulse_range = '(?P<pulse_low>' + dig + ')' + sep + '(?P<pulse_high>' + dig + ')(x(min)?)?' pulse_prefix = u'((^)|([^a-zA-Z]))(ps|pulss|fr|p(?![üa-zA-Z])).{0,5}?' pulse_suffix = '(?!\s*(cm|mg|kg|l|ml|g))' pulse_dist = '(.{0,35}?)' pulses = [] pulses.append('(' + pulse_prefix + pulse_single + pulse_suffix + pulse_dist + ')') pulses.append('(' + pulse_prefix + pulse_range + pulse_suffix + pulse_dist + ')') pulses.append('(' + pulse_dist + pulse_prefix + pulse_single + pulse_suffix + ')') pulses.append('(' + pulse_dist + pulse_prefix + pulse_range + pulse_suffix + ')') # create regular expressions for matching optional pulse self.patterns = [] for p in patterns: for pulse in pulses: self.patterns.append(re.compile(pulse + p, re.UNICODE)) self.patterns.append(re.compile(p + pulse, re.UNICODE)) self.patterns.append(re.compile(p, re.UNICODE)) # patterns for only pulse and pulse ranges self.patterns.append(re.compile(pulse_prefix + pulse_single + pulse_suffix, re.UNICODE)) self.patterns.append(re.compile(pulse_prefix + pulse_range + pulse_suffix, re.UNICODE))