Ejemplo n.º 1
0
    def parse(
        self,
        text: str,
        debug: bool = False
    ) -> Generator[Tuple[str, Dict[str, Any]], None, None]:
        """Parser for Weight. Specialized for complex unit conversion."""
        for m in self.COMPILED_REGEX.finditer(text):
            if debug:
                print("Match {} for {}".format(m, repr(text)))
            startpos = m.start()
            endpos = m.end()
            matching_text = m.group(0)  # the whole thing
            variable_text = m.group(1)
            tense_text = m.group(2)
            relation_text = m.group(3)
            metric_expression = m.group(4)
            metric_value = m.group(5)
            metric_units = m.group(6)
            imperial_expression = m.group(7)
            imperial_st_and_lb_st = m.group(8)
            imperial_st_and_lb_st_units = m.group(9)
            imperial_st_and_lb_lb = m.group(10)
            imperial_st_and_lb_lb_units = m.group(11)
            imperial_st_only_st = m.group(12)
            imperial_st_only_st_units = m.group(13)
            imperial_lb_only_lb = m.group(14)
            imperial_lb_only_lb_units = m.group(15)

            expression = None
            value_kg = None
            units = None
            if metric_expression:
                expression = metric_expression
                value_kg = to_float(metric_value)
                units = metric_units
            elif imperial_expression:
                expression = imperial_expression
                if imperial_st_and_lb_st and imperial_st_and_lb_lb:
                    st = to_float(imperial_st_and_lb_st)
                    lb = to_float(imperial_st_and_lb_lb)
                    value_kg = kg_from_st_lb_oz(stones=st, pounds=lb)
                    units = assemble_units([
                        imperial_st_and_lb_st_units,
                        imperial_st_and_lb_lb_units
                    ])
                elif imperial_st_only_st:
                    st = to_float(imperial_st_only_st)
                    value_kg = kg_from_st_lb_oz(stones=st)
                    units = imperial_st_only_st_units
                elif imperial_lb_only_lb:
                    lb = to_float(imperial_lb_only_lb)
                    value_kg = kg_from_st_lb_oz(pounds=lb)
                    units = imperial_lb_only_lb_units

            # All left as signed float, as you definitely see things like
            # "weight -0.3 kg" for weight changes.

            tense, relation = common_tense(tense_text, relation_text)

            result = {
                FN_VARIABLE_NAME: self.variable,
                FN_CONTENT: matching_text,
                FN_START: startpos,
                FN_END: endpos,
                FN_VARIABLE_TEXT: variable_text,
                FN_RELATION_TEXT: relation_text,
                FN_RELATION: relation,
                FN_VALUE_TEXT: expression,
                FN_UNITS: units,
                self.target_unit: value_kg,
                FN_TENSE_TEXT: tense_text,
                FN_TENSE: tense,
            }
            # log.critical(result)
            yield self.tablename, result
Ejemplo n.º 2
0
    def parse(
        self,
        text: str,
        debug: bool = False
    ) -> Generator[Tuple[str, Dict[str, Any]], None, None]:
        """Parser for BP. Specialized because we're fetching two numbers."""
        for m in self.COMPILED_REGEX.finditer(text):
            if debug:
                print("Match {} for {}".format(m, repr(text)))
            startpos = m.start()
            endpos = m.end()
            matching_text = m.group(0)  # the whole thing
            variable_text = m.group(1)
            tense_indicator = m.group(2)
            relation_text = m.group(3)
            value_text = m.group(4)
            units = m.group(5)

            sbp = None
            dbp = None
            if self.COMPILED_SBP.match(variable_text):
                if self.COMPILED_ONE_NUMBER_BP.match(value_text):
                    sbp = to_pos_float(value_text)
            elif self.COMPILED_DBP.match(variable_text):
                if self.COMPILED_ONE_NUMBER_BP.match(value_text):
                    dbp = to_pos_float(value_text)
            elif self.COMPILED_BP.match(variable_text):
                bpmatch = self.COMPILED_TWO_NUMBER_BP.match(value_text)
                if bpmatch:
                    sbp = to_pos_float(bpmatch.group(1))
                    dbp = to_pos_float(bpmatch.group(2))
            if sbp is None and dbp is None:
                # This is OK; e.g. "BP 110", which we will ignore.
                # log.warning(
                #     "Failed interpretation: matching_text={matching_text}, "
                #     "variable_text={variable_text}, "
                #     "tense_indicator={tense_indicator}, "
                #     "relation={relation}, "
                #     "value_text={value_text}, "
                #     "units={units}".format(
                #         matching_text=repr(matching_text),
                #         variable_text=repr(variable_text),
                #         tense_indicator=repr(tense_indicator),
                #         relation=repr(relation),
                #         value_text=repr(value_text),
                #         units=repr(units),
                #     )
                # )
                continue

            tense, relation = common_tense(tense_indicator, relation_text)

            yield self.tablename, {
                FN_CONTENT: matching_text,
                FN_START: startpos,
                FN_END: endpos,
                FN_VARIABLE_TEXT: variable_text,
                FN_RELATION_TEXT: relation_text,
                FN_RELATION: relation,
                FN_VALUE_TEXT: value_text,
                FN_UNITS: units,
                self.FN_SYSTOLIC_BP_MMHG: sbp,
                self.FN_DIASTOLIC_BP_MMHG: dbp,
                FN_TENSE: tense,
            }
Ejemplo n.º 3
0
    def parse(
        self,
        text: str,
        debug: bool = False
    ) -> Generator[Tuple[str, Dict[str, Any]], None, None]:
        """Parser for Height. Specialized for complex unit conversion."""
        for m in self.COMPILED_REGEX.finditer(text):  # watch out: 'm'/metres
            if debug:
                print("Match {} for {}".format(m, repr(text)))
            startpos = m.start()
            endpos = m.end()
            matching_text = m.group(0)  # the whole thing
            variable_text = m.group(1)
            tense_text = m.group(2)
            relation_text = m.group(3)
            metric_expression = m.group(4)
            metric_m_and_cm_m = m.group(5)
            metric_m_and_cm_m_units = m.group(6)
            metric_m_and_cm_cm = m.group(7)
            metric_m_and_cm_cm_units = m.group(8)
            metric_m_only_m = m.group(9)
            metric_m_only_m_units = m.group(10)
            metric_cm_only_cm = m.group(11)
            metric_cm_only_cm_units = m.group(12)
            imperial_expression = m.group(13)
            imperial_ft_and_in_ft = m.group(14)
            imperial_ft_and_in_ft_units = m.group(15)
            imperial_ft_and_in_in = m.group(16)
            imperial_ft_and_in_in_units = m.group(17)
            imperial_ft_only_ft = m.group(18)
            imperial_ft_only_ft_units = m.group(19)
            imperial_in_only_in = m.group(20)
            imperial_in_only_in_units = m.group(21)

            expression = None
            value_m = None
            units = None
            if metric_expression:
                expression = metric_expression
                if metric_m_and_cm_m and metric_m_and_cm_cm:
                    metres = to_pos_float(metric_m_and_cm_m)
                    # ... beware: 'm' above
                    cm = to_pos_float(metric_m_and_cm_cm)
                    value_m = m_from_m_cm(metres=metres, centimetres=cm)
                    units = assemble_units(
                        [metric_m_and_cm_m_units, metric_m_and_cm_cm_units])
                elif metric_m_only_m:
                    value_m = to_pos_float(metric_m_only_m)
                    units = metric_m_only_m_units
                elif metric_cm_only_cm:
                    cm = to_pos_float(metric_cm_only_cm)
                    value_m = m_from_m_cm(centimetres=cm)
                    units = metric_cm_only_cm_units
            elif imperial_expression:
                expression = imperial_expression
                if imperial_ft_and_in_ft and imperial_ft_and_in_in:
                    ft = to_pos_float(imperial_ft_and_in_ft)
                    inches = to_pos_float(imperial_ft_and_in_in)
                    value_m = m_from_ft_in(feet=ft, inches=inches)
                    units = assemble_units([
                        imperial_ft_and_in_ft_units,
                        imperial_ft_and_in_in_units
                    ])
                elif imperial_ft_only_ft:
                    ft = to_pos_float(imperial_ft_only_ft)
                    value_m = m_from_ft_in(feet=ft)
                    units = imperial_ft_only_ft_units
                elif imperial_in_only_in:
                    inches = to_pos_float(imperial_in_only_in)
                    value_m = m_from_ft_in(inches=inches)
                    units = imperial_in_only_in_units

            tense, relation = common_tense(tense_text, relation_text)

            result = {
                FN_VARIABLE_NAME: self.variable,
                FN_CONTENT: matching_text,
                FN_START: startpos,
                FN_END: endpos,
                FN_VARIABLE_TEXT: variable_text,
                FN_RELATION_TEXT: relation_text,
                FN_RELATION: relation,
                FN_VALUE_TEXT: expression,
                FN_UNITS: units,
                self.target_unit: value_m,
                FN_TENSE_TEXT: tense_text,
                FN_TENSE: tense,
            }
            # log.critical(result)
            yield self.tablename, result