Beispiel #1
0
    def __FixedSellerNames__(self, seller_names):
        min_similarity = 1000
        result = ''

        for CompanyName in seller_names:
            if CompanyName in CompanyList.CompanyList:
                return ConfidenceLevel.Confident, CompanyName

            for CompanyListName in CompanyList.CompanyList:
                similarity = StrUtil.Similarity(CompanyListName, CompanyName)
                if similarity < len(
                        CompanyName) and min_similarity >= similarity:
                    if (min_similarity == similarity
                            and len(result) >= len(CompanyListName)):
                        continue

                    min_similarity = similarity
                    result = CompanyListName

            if min_similarity == 1000:
                return ConfidenceLevel.Bad, CompanyName
            else:
                return ConfidenceLevel.Fixed, result

        return ConfidenceLevel.Bad, ''
Beispiel #2
0
    def __CheckBeforeTaxFixedResult__(self, before_tax, fixed_before_tax,
                                      similarity):
        length = len(fixed_before_tax)

        if length and similarity > 0.49999:
            if length == len(before_tax):
                patterns = ((u'8', u'6', u'0'), (u'1', u'4', u'7'),
                            (u'3', u'8'), (u'3', u'7'), (u'0', u'4'))
                for index in range(length):
                    if fixed_before_tax[index] != before_tax[index]:
                        match = False
                        for pattern in patterns:
                            if fixed_before_tax[
                                    index] in pattern and before_tax[
                                        index] in pattern:
                                match = True
                                break
                        if not match:
                            return False

            else:
                if similarity < 0.6 and max(
                        length,
                        len(before_tax) * similarity) > 3.0:
                    return False

            return len(before_tax) == len(
                fixed_before_tax) or StrUtil.Similarity(
                    before_tax, fixed_before_tax) == 1

        return False
Beispiel #3
0
    def __FixedDataByAfterTax__(self, before_tax, tax, after_tax):
        d_after_tax = Decimal(after_tax)
        similarity = Decimal(0.0)
        fixed_before_tax = ''
        fixed_tax = ''

        for rate in self.__TaxRate__:
            for error in self.__Error__:
                temp_before_tax = (d_after_tax /
                                   (Decimal(u'1.0') + rate)) + error
                temp_tax = self.__Round__(temp_before_tax * rate)
                if ((int(temp_before_tax * 100) % 100 +
                     int(temp_tax * 100) % 100) %
                        100) == (int(d_after_tax * 100) % 100):
                    fixed = self.__DoubleToString__(temp_before_tax)
                    fixed_similarity = StrUtil.SimilarityRate(
                        fixed, before_tax)
                    if not (fixed_similarity < similarity):
                        fixed_before_tax = fixed
                        fixed_tax = self.__DoubleToString__(temp_tax)
                        if tax == fixed_tax:
                            before_tax = fixed_before_tax
                            return True, before_tax, tax, after_tax
                        similarity = fixed_similarity

        if self.__CheckBeforeTaxFixedResult__(before_tax, fixed_before_tax,
                                              similarity):
            return True, self.__FormatData__(
                fixed_before_tax), self.__FormatData__(
                    fixed_tax), self.__FormatData__(after_tax)

        return False, before_tax, tax, after_tax
Beispiel #4
0
    def __FixedDataByTax__(self, before_tax, tax, after_tax):
        d_tax = Decimal(tax)

        if self.__CheckFloat__(before_tax):
            d_before_tax = Decimal(before_tax)
            valiedTax = False
            for rate in self.__TaxRate__:
                temp_tax = self.__DoubleToString__(
                    self.__Round__(d_before_tax * rate))
                if temp_tax == tax:
                    valiedTax = True

            if valiedTax:
                fixed_after_tax = d_before_tax + d_tax
                fixed = self.__DoubleToString__(fixed_after_tax)
                if StrUtil.SimilarityRate(fixed, after_tax) > 0.7:
                    return True, self.__FormatData__(
                        before_tax), self.__FormatData__(
                            tax), self.__FormatData__(fixed)

        similarity = Decimal(0.0)
        fixed_before_tax = u''
        fixed_after_tax = u''
        for rate in self.__TaxRate__:
            for error in self.__Error__:
                temp_before_tax = (d_tax / rate) + error
                fixed = self.__DoubleToString__(temp_before_tax)
                fixed_similarity = StrUtil.SimilarityRate(fixed, before_tax)
                if fixed_similarity > similarity:
                    fixed_before_tax = fixed
                    fixed_after_tax = self.__DoubleToString__(temp_before_tax +
                                                              d_tax)
                    similarity = fixed_similarity

        if self.__CheckBeforeTaxFixedResult__(before_tax, fixed_before_tax,
                                              similarity):
            return True, self.__FormatData__(
                fixed_before_tax), self.__FormatData__(
                    tax), self.__FormatData__(fixed_after_tax)

        return False, before_tax, tax, after_tax
Beispiel #5
0
    def __FixedDataBySubtotal__(self, subtotal, tip, total):
        d_subtotal = Decimal(subtotal)
        tipTooLarge = False

        if self.__CheckFloat__(tip):
            d_tip = Decimal(tip)
            tipTooLarge = d_tip > (d_subtotal * Decimal(u'0.4'))
            if not tipTooLarge:
                d_total = d_subtotal + d_tip
                fixed = str(d_total)
                if d_total >= d_subtotal and (
                        len(total) == 0 or not self.__CheckFloat__(total)
                        or StrUtil.SimilarityRate(fixed, total) > 0.5):
                    if self.__CheckFloat__(total):
                        diff = d_total - Decimal(total)
                        if diff < Decimal('1.0') and diff > Decimal('0.0'):
                            fixed = total
                    return ConfidenceLevel.Fixed, self.__FormatData__(
                        subtotal), self.__FormatData__(
                            tip), self.__FormatData__(fixed)

        if self.__CheckFloat__(total):
            d_total = Decimal(total)
            d_tip = d_total - d_subtotal
            fixed = str(d_tip)
            if d_tip >= Decimal(u'0.00') and (
                    len(tip) == 0 or not self.__CheckFloat__(tip) or
                    tipTooLarge or StrUtil.SimilarityRate(fixed, tip) > 0.5):
                return ConfidenceLevel.Fixed, self.__FormatData__(
                    subtotal), self.__FormatData__(fixed), self.__FormatData__(
                        total)
            elif d_tip < Decimal(u'0.00'):
                fixed = subtotal
                if StrUtil.SimilarityRate(fixed, total) > 0.6:
                    return ConfidenceLevel.Fixed, self.__FormatData__(
                        subtotal), self.__FormatData__(
                            u'0.00'), self.__FormatData__(fixed)

        return ConfidenceLevel.Bad, u'', u'', u''
Beispiel #6
0
    def __FixedDataByBeforeTax__(self, before_tax, tax, after_tax):
        d_before_tax = Decimal(before_tax)
        similarity = Decimal(0.0)
        fixed_tax = Decimal(0.0)

        for rate in self.__TaxRate__:
            temp_tax = self.__Round__(d_before_tax * rate)
            fixed = self.__DoubleToString__(d_before_tax + temp_tax)
            fixed_similarity = StrUtil.SimilarityRate(fixed, after_tax)
            if fixed_similarity > similarity:
                fixed_tax = temp_tax
                similarity = fixed_similarity

        return True, self.__FormatData__(before_tax), self.__FormatData__(
            self.__DoubleToString__(fixed_tax)), self.__FormatData__(
                self.__DoubleToString__(d_before_tax + fixed_tax))
Beispiel #7
0
    def __FixedDataByTipRate__(self, subtotal, tip, total, tiprate_tip,
                               tiprate_total):
        if self.__CheckFloat__(tiprate_tip) and self.__CheckFloat__(
                tiprate_total):
            d_tiprate_total = Decimal(tiprate_total)
            d_tiprate_tip = Decimal(tiprate_tip)
            if self.__CheckFloat__(subtotal):
                d_subtotal = Decimal(subtotal)
                if (d_tiprate_total == (d_subtotal + d_tiprate_tip)):
                    if self.__CheckFloat__(total):
                        d_total = Decimal(total)
                        if d_tiprate_total == d_total:
                            return ConfidenceLevel.Confident, self.__FormatData__(
                                subtotal), self.__FormatData__(
                                    tiprate_tip), self.__FormatData__(total)
                        else:
                            if abs(d_tiprate_total - d_total) > min(
                                    d_tiprate_total, d_total):
                                d_total = d_tiprate_total
                            else:
                                d_total = min(d_tiprate_total, d_total)
                            if self.__CheckFloat__(tip):
                                d_tip = Decimal(tip)
                                if d_tip == d_tiprate_tip:
                                    return ConfidenceLevel.Confident, self.__FormatData__(
                                        subtotal), self.__FormatData__(
                                            tiprate_tip), self.__FormatData__(
                                                tiprate_total)
                                else:
                                    return ConfidenceLevel.Fixed, self.__FormatData__(
                                        subtotal), self.__FormatData__(
                                            str(d_tiprate_total - d_subtotal)
                                        ), self.__FormatData__(tiprate_total)
                            elif len(tip) == 0:
                                return ConfidenceLevel.Fixed, self.__FormatData__(
                                    subtotal), tip, self.__FormatData__(
                                        str(d_total))
            else:
                d_subtotal = d_tiprate_total - d_tiprate_tip
                fixed = str(d_subtotal)
                if not self.__CheckFloat__(subtotal) or StrUtil.SimilarityRate(
                        fixed, subtotal) > 0.7:
                    return ConfidenceLevel.Fixed, self.__FormatData__(
                        fixed), self.__FormatData__(
                            tiprate_tip), self.__FormatData__(tiprate_total)

        return ConfidenceLevel.Bad, u'', u'', u''
Beispiel #8
0
    def __GetSimilarityName__(self, name):
        if not len(name):
            return

        max_count = 0
        target = name
        name = re.sub(u'责|任|航|空|服|务|有|限|公|司', u'', name)
        for candidate in candidate_companies:
            diff_count = StrUtil.Similarity(candidate, name)
            if max_count < len(candidate) - diff_count:
                max_count = len(candidate) - diff_count
                target = candidate

        if float(max_count) / len(name) > self.__Threshold__:
            return target
        
        return None
Beispiel #9
0
    def __FixedDataByTotal__(self, subtotal, tip, total):
        d_total = Decimal(total)
        tipTooLarge = False

        if self.__CheckFloat__(tip):
            d_tip = Decimal(tip)
            tipTooLarge = d_tip > (d_total * Decimal(u'0.4'))
            if not tipTooLarge:
                d_subtotal = d_total - d_tip
                fixed = str(d_subtotal)
                if len(subtotal) == 0 or not self.__CheckFloat__(
                        subtotal) or StrUtil.SimilarityRate(fixed,
                                                            subtotal) > 0.5:
                    return ConfidenceLevel.Fixed, self.__FormatData__(
                        fixed), self.__FormatData__(tip), self.__FormatData__(
                            total)

        return ConfidenceLevel.Bad, u'', u'', u''
Beispiel #10
0
    def __FixedSeatData__(self, datalist):
        min_similarity = 1000
        result = ''

        for data in datalist:
            if data in SeatList:
                return ConfidenceLevel.Confident, data

            for seat in SeatList:
                similarity = StrUtil.Similarity(seat, data)
                if similarity < len(data) and min_similarity >= similarity:
                    if (min_similarity == similarity and len(result) >= len(seat)):
                        continue
                    
                    min_similarity = similarity
                    result = seat

            if min_similarity == 1000:
                return ConfidenceLevel.Bad, data
            else:
                return ConfidenceLevel.Fixed, result

        return ConfidenceLevel.Bad, ''
Beispiel #11
0
    def __FixedTrainNumberData__(self, datalist):
        trainnumber = re.match(u'[a-zA-Z]?\d{1,4}', datalist[0])
        if trainnumber and trainnumber.group() == datalist[0]:
            return ConfidenceLevel.Fixed, datalist[0]

        min_similarity = 1000
        result = datalist[0]

        for data in datalist:
            if data in TrainNumberList.TrainNumberList:
                return ConfidenceLevel.Confident, data

            for trainnumber in TrainNumberList.TrainNumberList:
                similarity = StrUtil.Similarity(trainnumber, data)
                if similarity < len(data) and min_similarity >= similarity:
                    min_similarity = similarity
                    result = trainnumber

            if min_similarity == 1000:
                return ConfidenceLevel.Bad, data
            else:
                return ConfidenceLevel.Fixed, result

        return ConfidenceLevel.Bad, ''
Beispiel #12
0
    def __MildFixedAmountData__(self, subtotal, tip, total):
        subtotal = self.__NormalizeData__(subtotal)
        tip = self.__NormalizeData__(tip)
        total = self.__NormalizeData__(total)

        if self.__CheckFloat__(subtotal) and self.__CheckFloat__(total):
            d_subtotal = Decimal(subtotal)
            d_total = Decimal(total)
            if self.__CheckFloat__(tip):
                d_tip = Decimal(tip)
                if d_total == (d_subtotal + d_tip):
                    return ConfidenceLevel.Confident, self.__FormatData__(
                        subtotal), self.__FormatData__(
                            tip), self.__FormatData__(total)

                if self.__MissDot__(total):
                    d_temp = d_subtotal + d_tip
                    fixed = str(d_temp)
                    if StrUtil.Similarity(
                            fixed,
                            total) == 1 and len(fixed) - len(total) == 1:
                        if (d_temp == (d_subtotal + d_tip)):
                            return ConfidenceLevel.Confident, self.__FormatData__(
                                subtotal), self.__FormatData__(
                                    tip), self.__FormatData__(fixed)

                if self.__MissDot__(tip) or self.__MissInteger__(tip):
                    d_temp = d_total - d_subtotal
                    fixed = str(d_temp)
                    if StrUtil.Similarity(
                            fixed, tip) == 1 and len(fixed) - len(tip) == 1:
                        if (d_total == (d_subtotal + d_temp)):
                            return ConfidenceLevel.Confident, self.__FormatData__(
                                subtotal), self.__FormatData__(
                                    fixed), self.__FormatData__(total)

            elif len(tip) == 0:
                if d_subtotal == d_total:
                    return ConfidenceLevel.Confident, self.__FormatData__(
                        subtotal), tip, self.__FormatData__(total)
                elif self.__MissDot__(total) and d_total > d_subtotal:
                    totallen = len(total)
                    if totallen > 1:
                        pos = 1
                        while d_subtotal * 2 < d_total:
                            d_total = Decimal(total[0:totallen - pos] + '.' +
                                              total[totallen - pos:])
                            pos += 1
                        if d_subtotal < d_total:
                            return ConfidenceLevel.Fixed, self.__FormatData__(
                                subtotal), tip, self.__FormatData__(
                                    str(d_total))

            else:
                d_temp = d_total - d_subtotal
                fixed = str(d_temp)
                if StrUtil.Similarity(fixed, tip) == 1:
                    return ConfidenceLevel.Confident, self.__FormatData__(
                        subtotal), self.__FormatData__(
                            fixed), self.__FormatData__(total)

        return ConfidenceLevel.Bad, self.__FormatData__(
            subtotal), self.__FormatData__(tip), self.__FormatData__(total)