def parse( self, text: str, debug: bool = False ) -> Generator[Tuple[str, Dict[str, Any]], None, None]: """Parser for Weight. Specialized for complex unit conversion.""" for m in self.COMPILED_REGEX.finditer(text): if debug: print("Match {} for {}".format(m, repr(text))) startpos = m.start() endpos = m.end() matching_text = m.group(0) # the whole thing variable_text = m.group(1) tense_text = m.group(2) relation_text = m.group(3) metric_expression = m.group(4) metric_value = m.group(5) metric_units = m.group(6) imperial_expression = m.group(7) imperial_st_and_lb_st = m.group(8) imperial_st_and_lb_st_units = m.group(9) imperial_st_and_lb_lb = m.group(10) imperial_st_and_lb_lb_units = m.group(11) imperial_st_only_st = m.group(12) imperial_st_only_st_units = m.group(13) imperial_lb_only_lb = m.group(14) imperial_lb_only_lb_units = m.group(15) expression = None value_kg = None units = None if metric_expression: expression = metric_expression value_kg = to_float(metric_value) units = metric_units elif imperial_expression: expression = imperial_expression if imperial_st_and_lb_st and imperial_st_and_lb_lb: st = to_float(imperial_st_and_lb_st) lb = to_float(imperial_st_and_lb_lb) value_kg = kg_from_st_lb_oz(stones=st, pounds=lb) units = assemble_units([ imperial_st_and_lb_st_units, imperial_st_and_lb_lb_units ]) elif imperial_st_only_st: st = to_float(imperial_st_only_st) value_kg = kg_from_st_lb_oz(stones=st) units = imperial_st_only_st_units elif imperial_lb_only_lb: lb = to_float(imperial_lb_only_lb) value_kg = kg_from_st_lb_oz(pounds=lb) units = imperial_lb_only_lb_units # All left as signed float, as you definitely see things like # "weight -0.3 kg" for weight changes. tense, relation = common_tense(tense_text, relation_text) result = { FN_VARIABLE_NAME: self.variable, FN_CONTENT: matching_text, FN_START: startpos, FN_END: endpos, FN_VARIABLE_TEXT: variable_text, FN_RELATION_TEXT: relation_text, FN_RELATION: relation, FN_VALUE_TEXT: expression, FN_UNITS: units, self.target_unit: value_kg, FN_TENSE_TEXT: tense_text, FN_TENSE: tense, } # log.critical(result) yield self.tablename, result
def parse( self, text: str, debug: bool = False ) -> Generator[Tuple[str, Dict[str, Any]], None, None]: """Parser for BP. Specialized because we're fetching two numbers.""" for m in self.COMPILED_REGEX.finditer(text): if debug: print("Match {} for {}".format(m, repr(text))) startpos = m.start() endpos = m.end() matching_text = m.group(0) # the whole thing variable_text = m.group(1) tense_indicator = m.group(2) relation_text = m.group(3) value_text = m.group(4) units = m.group(5) sbp = None dbp = None if self.COMPILED_SBP.match(variable_text): if self.COMPILED_ONE_NUMBER_BP.match(value_text): sbp = to_pos_float(value_text) elif self.COMPILED_DBP.match(variable_text): if self.COMPILED_ONE_NUMBER_BP.match(value_text): dbp = to_pos_float(value_text) elif self.COMPILED_BP.match(variable_text): bpmatch = self.COMPILED_TWO_NUMBER_BP.match(value_text) if bpmatch: sbp = to_pos_float(bpmatch.group(1)) dbp = to_pos_float(bpmatch.group(2)) if sbp is None and dbp is None: # This is OK; e.g. "BP 110", which we will ignore. # log.warning( # "Failed interpretation: matching_text={matching_text}, " # "variable_text={variable_text}, " # "tense_indicator={tense_indicator}, " # "relation={relation}, " # "value_text={value_text}, " # "units={units}".format( # matching_text=repr(matching_text), # variable_text=repr(variable_text), # tense_indicator=repr(tense_indicator), # relation=repr(relation), # value_text=repr(value_text), # units=repr(units), # ) # ) continue tense, relation = common_tense(tense_indicator, relation_text) yield self.tablename, { FN_CONTENT: matching_text, FN_START: startpos, FN_END: endpos, FN_VARIABLE_TEXT: variable_text, FN_RELATION_TEXT: relation_text, FN_RELATION: relation, FN_VALUE_TEXT: value_text, FN_UNITS: units, self.FN_SYSTOLIC_BP_MMHG: sbp, self.FN_DIASTOLIC_BP_MMHG: dbp, FN_TENSE: tense, }
def parse( self, text: str, debug: bool = False ) -> Generator[Tuple[str, Dict[str, Any]], None, None]: """Parser for Height. Specialized for complex unit conversion.""" for m in self.COMPILED_REGEX.finditer(text): # watch out: 'm'/metres if debug: print("Match {} for {}".format(m, repr(text))) startpos = m.start() endpos = m.end() matching_text = m.group(0) # the whole thing variable_text = m.group(1) tense_text = m.group(2) relation_text = m.group(3) metric_expression = m.group(4) metric_m_and_cm_m = m.group(5) metric_m_and_cm_m_units = m.group(6) metric_m_and_cm_cm = m.group(7) metric_m_and_cm_cm_units = m.group(8) metric_m_only_m = m.group(9) metric_m_only_m_units = m.group(10) metric_cm_only_cm = m.group(11) metric_cm_only_cm_units = m.group(12) imperial_expression = m.group(13) imperial_ft_and_in_ft = m.group(14) imperial_ft_and_in_ft_units = m.group(15) imperial_ft_and_in_in = m.group(16) imperial_ft_and_in_in_units = m.group(17) imperial_ft_only_ft = m.group(18) imperial_ft_only_ft_units = m.group(19) imperial_in_only_in = m.group(20) imperial_in_only_in_units = m.group(21) expression = None value_m = None units = None if metric_expression: expression = metric_expression if metric_m_and_cm_m and metric_m_and_cm_cm: metres = to_pos_float(metric_m_and_cm_m) # ... beware: 'm' above cm = to_pos_float(metric_m_and_cm_cm) value_m = m_from_m_cm(metres=metres, centimetres=cm) units = assemble_units( [metric_m_and_cm_m_units, metric_m_and_cm_cm_units]) elif metric_m_only_m: value_m = to_pos_float(metric_m_only_m) units = metric_m_only_m_units elif metric_cm_only_cm: cm = to_pos_float(metric_cm_only_cm) value_m = m_from_m_cm(centimetres=cm) units = metric_cm_only_cm_units elif imperial_expression: expression = imperial_expression if imperial_ft_and_in_ft and imperial_ft_and_in_in: ft = to_pos_float(imperial_ft_and_in_ft) inches = to_pos_float(imperial_ft_and_in_in) value_m = m_from_ft_in(feet=ft, inches=inches) units = assemble_units([ imperial_ft_and_in_ft_units, imperial_ft_and_in_in_units ]) elif imperial_ft_only_ft: ft = to_pos_float(imperial_ft_only_ft) value_m = m_from_ft_in(feet=ft) units = imperial_ft_only_ft_units elif imperial_in_only_in: inches = to_pos_float(imperial_in_only_in) value_m = m_from_ft_in(inches=inches) units = imperial_in_only_in_units tense, relation = common_tense(tense_text, relation_text) result = { FN_VARIABLE_NAME: self.variable, FN_CONTENT: matching_text, FN_START: startpos, FN_END: endpos, FN_VARIABLE_TEXT: variable_text, FN_RELATION_TEXT: relation_text, FN_RELATION: relation, FN_VALUE_TEXT: expression, FN_UNITS: units, self.target_unit: value_m, FN_TENSE_TEXT: tense_text, FN_TENSE: tense, } # log.critical(result) yield self.tablename, result