Ejemplo n.º 1
0
    def parseNumbers(self, s):
        """
        Parses a string that represents a number into a decimal data type so that
        we can match the quantity field in the db with the quantity that appears
        in the display name. Rounds the result to 2 places.
        """
        ss = utils.unclump(s)

        m3 = re.match('^\d+$', ss)
        if m3 is not None:
            return decimal.Decimal(round(float(ss), 2))

        m1 = re.match(r'(\d+)\s+(\d)/(\d)', ss)
        if m1 is not None:
            num = int(m1.group(1)) + (float(m1.group(2)) / float(m1.group(3)))
            return decimal.Decimal(str(round(num,2)))

        m2 = re.match(r'^(\d)/(\d)$', ss)
        if m2 is not None:
            num = float(m2.group(1)) / float(m2.group(2))
            return decimal.Decimal(str(round(num,2)))

        return None
Ejemplo n.º 2
0
    def parseNumbers(self, s):
        """
        Parses a string that represents a number into a decimal data type so that
        we can match the quantity field in the db with the quantity that appears
        in the display name. Rounds the result to 2 places.
        """
        ss = utils.unclump(s)

        m3 = re.match('^\d+$', ss)
        if m3 is not None:
            return decimal.Decimal(round(float(ss), 2))

        m1 = re.match(r'(\d+)\s+(\d)/(\d)', ss)
        if m1 is not None:
            num = int(m1.group(1)) + (float(m1.group(2)) / float(m1.group(3)))
            return decimal.Decimal(str(round(num, 2)))

        m2 = re.match(r'^(\d)/(\d)$', ss)
        if m2 is not None:
            num = float(m2.group(1)) / float(m2.group(2))
            return decimal.Decimal(str(round(num, 2)))

        return None
        # blank line starts a new ingredient
        elif line == "\n":
            data.append({})
            display.append([])
            prevTag = None

        # otherwise it's a token
        # e.g.: potato \t I2 \t L5 \t NoCAP \t B-NAME/0.978253
        else:

            columns = re.split(r'\t', string.strip(line))
            token = string.strip(columns[0])

            # unclump fractions
            token = utils.unclump(token)

            # turn B-NAME/123 back into "name"
            tag, confidence = re.split(r'/', columns[-1], 1)
            tag = re.sub(r'^[BI]\-', "", tag).lower()

            # ---- DISPLAY ----
            # build a structure which groups each token by its tag, so we can
            # rebuild the original display name later.

            if prevTag != tag:
                display[-1].append((tag, [token]))
                prevTag = tag

            else:
                display[-1][-1][1].append(token)