def __FixedSellerNames__(self, seller_names): min_similarity = 1000 result = '' for CompanyName in seller_names: if CompanyName in CompanyList.CompanyList: return ConfidenceLevel.Confident, CompanyName for CompanyListName in CompanyList.CompanyList: similarity = StrUtil.Similarity(CompanyListName, CompanyName) if similarity < len( CompanyName) and min_similarity >= similarity: if (min_similarity == similarity and len(result) >= len(CompanyListName)): continue min_similarity = similarity result = CompanyListName if min_similarity == 1000: return ConfidenceLevel.Bad, CompanyName else: return ConfidenceLevel.Fixed, result return ConfidenceLevel.Bad, ''
def __CheckBeforeTaxFixedResult__(self, before_tax, fixed_before_tax, similarity): length = len(fixed_before_tax) if length and similarity > 0.49999: if length == len(before_tax): patterns = ((u'8', u'6', u'0'), (u'1', u'4', u'7'), (u'3', u'8'), (u'3', u'7'), (u'0', u'4')) for index in range(length): if fixed_before_tax[index] != before_tax[index]: match = False for pattern in patterns: if fixed_before_tax[ index] in pattern and before_tax[ index] in pattern: match = True break if not match: return False else: if similarity < 0.6 and max( length, len(before_tax) * similarity) > 3.0: return False return len(before_tax) == len( fixed_before_tax) or StrUtil.Similarity( before_tax, fixed_before_tax) == 1 return False
def __FixedDataByAfterTax__(self, before_tax, tax, after_tax): d_after_tax = Decimal(after_tax) similarity = Decimal(0.0) fixed_before_tax = '' fixed_tax = '' for rate in self.__TaxRate__: for error in self.__Error__: temp_before_tax = (d_after_tax / (Decimal(u'1.0') + rate)) + error temp_tax = self.__Round__(temp_before_tax * rate) if ((int(temp_before_tax * 100) % 100 + int(temp_tax * 100) % 100) % 100) == (int(d_after_tax * 100) % 100): fixed = self.__DoubleToString__(temp_before_tax) fixed_similarity = StrUtil.SimilarityRate( fixed, before_tax) if not (fixed_similarity < similarity): fixed_before_tax = fixed fixed_tax = self.__DoubleToString__(temp_tax) if tax == fixed_tax: before_tax = fixed_before_tax return True, before_tax, tax, after_tax similarity = fixed_similarity if self.__CheckBeforeTaxFixedResult__(before_tax, fixed_before_tax, similarity): return True, self.__FormatData__( fixed_before_tax), self.__FormatData__( fixed_tax), self.__FormatData__(after_tax) return False, before_tax, tax, after_tax
def __FixedDataByTax__(self, before_tax, tax, after_tax): d_tax = Decimal(tax) if self.__CheckFloat__(before_tax): d_before_tax = Decimal(before_tax) valiedTax = False for rate in self.__TaxRate__: temp_tax = self.__DoubleToString__( self.__Round__(d_before_tax * rate)) if temp_tax == tax: valiedTax = True if valiedTax: fixed_after_tax = d_before_tax + d_tax fixed = self.__DoubleToString__(fixed_after_tax) if StrUtil.SimilarityRate(fixed, after_tax) > 0.7: return True, self.__FormatData__( before_tax), self.__FormatData__( tax), self.__FormatData__(fixed) similarity = Decimal(0.0) fixed_before_tax = u'' fixed_after_tax = u'' for rate in self.__TaxRate__: for error in self.__Error__: temp_before_tax = (d_tax / rate) + error fixed = self.__DoubleToString__(temp_before_tax) fixed_similarity = StrUtil.SimilarityRate(fixed, before_tax) if fixed_similarity > similarity: fixed_before_tax = fixed fixed_after_tax = self.__DoubleToString__(temp_before_tax + d_tax) similarity = fixed_similarity if self.__CheckBeforeTaxFixedResult__(before_tax, fixed_before_tax, similarity): return True, self.__FormatData__( fixed_before_tax), self.__FormatData__( tax), self.__FormatData__(fixed_after_tax) return False, before_tax, tax, after_tax
def __FixedDataBySubtotal__(self, subtotal, tip, total): d_subtotal = Decimal(subtotal) tipTooLarge = False if self.__CheckFloat__(tip): d_tip = Decimal(tip) tipTooLarge = d_tip > (d_subtotal * Decimal(u'0.4')) if not tipTooLarge: d_total = d_subtotal + d_tip fixed = str(d_total) if d_total >= d_subtotal and ( len(total) == 0 or not self.__CheckFloat__(total) or StrUtil.SimilarityRate(fixed, total) > 0.5): if self.__CheckFloat__(total): diff = d_total - Decimal(total) if diff < Decimal('1.0') and diff > Decimal('0.0'): fixed = total return ConfidenceLevel.Fixed, self.__FormatData__( subtotal), self.__FormatData__( tip), self.__FormatData__(fixed) if self.__CheckFloat__(total): d_total = Decimal(total) d_tip = d_total - d_subtotal fixed = str(d_tip) if d_tip >= Decimal(u'0.00') and ( len(tip) == 0 or not self.__CheckFloat__(tip) or tipTooLarge or StrUtil.SimilarityRate(fixed, tip) > 0.5): return ConfidenceLevel.Fixed, self.__FormatData__( subtotal), self.__FormatData__(fixed), self.__FormatData__( total) elif d_tip < Decimal(u'0.00'): fixed = subtotal if StrUtil.SimilarityRate(fixed, total) > 0.6: return ConfidenceLevel.Fixed, self.__FormatData__( subtotal), self.__FormatData__( u'0.00'), self.__FormatData__(fixed) return ConfidenceLevel.Bad, u'', u'', u''
def __FixedDataByBeforeTax__(self, before_tax, tax, after_tax): d_before_tax = Decimal(before_tax) similarity = Decimal(0.0) fixed_tax = Decimal(0.0) for rate in self.__TaxRate__: temp_tax = self.__Round__(d_before_tax * rate) fixed = self.__DoubleToString__(d_before_tax + temp_tax) fixed_similarity = StrUtil.SimilarityRate(fixed, after_tax) if fixed_similarity > similarity: fixed_tax = temp_tax similarity = fixed_similarity return True, self.__FormatData__(before_tax), self.__FormatData__( self.__DoubleToString__(fixed_tax)), self.__FormatData__( self.__DoubleToString__(d_before_tax + fixed_tax))
def __FixedDataByTipRate__(self, subtotal, tip, total, tiprate_tip, tiprate_total): if self.__CheckFloat__(tiprate_tip) and self.__CheckFloat__( tiprate_total): d_tiprate_total = Decimal(tiprate_total) d_tiprate_tip = Decimal(tiprate_tip) if self.__CheckFloat__(subtotal): d_subtotal = Decimal(subtotal) if (d_tiprate_total == (d_subtotal + d_tiprate_tip)): if self.__CheckFloat__(total): d_total = Decimal(total) if d_tiprate_total == d_total: return ConfidenceLevel.Confident, self.__FormatData__( subtotal), self.__FormatData__( tiprate_tip), self.__FormatData__(total) else: if abs(d_tiprate_total - d_total) > min( d_tiprate_total, d_total): d_total = d_tiprate_total else: d_total = min(d_tiprate_total, d_total) if self.__CheckFloat__(tip): d_tip = Decimal(tip) if d_tip == d_tiprate_tip: return ConfidenceLevel.Confident, self.__FormatData__( subtotal), self.__FormatData__( tiprate_tip), self.__FormatData__( tiprate_total) else: return ConfidenceLevel.Fixed, self.__FormatData__( subtotal), self.__FormatData__( str(d_tiprate_total - d_subtotal) ), self.__FormatData__(tiprate_total) elif len(tip) == 0: return ConfidenceLevel.Fixed, self.__FormatData__( subtotal), tip, self.__FormatData__( str(d_total)) else: d_subtotal = d_tiprate_total - d_tiprate_tip fixed = str(d_subtotal) if not self.__CheckFloat__(subtotal) or StrUtil.SimilarityRate( fixed, subtotal) > 0.7: return ConfidenceLevel.Fixed, self.__FormatData__( fixed), self.__FormatData__( tiprate_tip), self.__FormatData__(tiprate_total) return ConfidenceLevel.Bad, u'', u'', u''
def __GetSimilarityName__(self, name): if not len(name): return max_count = 0 target = name name = re.sub(u'责|任|航|空|服|务|有|限|公|司', u'', name) for candidate in candidate_companies: diff_count = StrUtil.Similarity(candidate, name) if max_count < len(candidate) - diff_count: max_count = len(candidate) - diff_count target = candidate if float(max_count) / len(name) > self.__Threshold__: return target return None
def __FixedDataByTotal__(self, subtotal, tip, total): d_total = Decimal(total) tipTooLarge = False if self.__CheckFloat__(tip): d_tip = Decimal(tip) tipTooLarge = d_tip > (d_total * Decimal(u'0.4')) if not tipTooLarge: d_subtotal = d_total - d_tip fixed = str(d_subtotal) if len(subtotal) == 0 or not self.__CheckFloat__( subtotal) or StrUtil.SimilarityRate(fixed, subtotal) > 0.5: return ConfidenceLevel.Fixed, self.__FormatData__( fixed), self.__FormatData__(tip), self.__FormatData__( total) return ConfidenceLevel.Bad, u'', u'', u''
def __FixedSeatData__(self, datalist): min_similarity = 1000 result = '' for data in datalist: if data in SeatList: return ConfidenceLevel.Confident, data for seat in SeatList: similarity = StrUtil.Similarity(seat, data) if similarity < len(data) and min_similarity >= similarity: if (min_similarity == similarity and len(result) >= len(seat)): continue min_similarity = similarity result = seat if min_similarity == 1000: return ConfidenceLevel.Bad, data else: return ConfidenceLevel.Fixed, result return ConfidenceLevel.Bad, ''
def __FixedTrainNumberData__(self, datalist): trainnumber = re.match(u'[a-zA-Z]?\d{1,4}', datalist[0]) if trainnumber and trainnumber.group() == datalist[0]: return ConfidenceLevel.Fixed, datalist[0] min_similarity = 1000 result = datalist[0] for data in datalist: if data in TrainNumberList.TrainNumberList: return ConfidenceLevel.Confident, data for trainnumber in TrainNumberList.TrainNumberList: similarity = StrUtil.Similarity(trainnumber, data) if similarity < len(data) and min_similarity >= similarity: min_similarity = similarity result = trainnumber if min_similarity == 1000: return ConfidenceLevel.Bad, data else: return ConfidenceLevel.Fixed, result return ConfidenceLevel.Bad, ''
def __MildFixedAmountData__(self, subtotal, tip, total): subtotal = self.__NormalizeData__(subtotal) tip = self.__NormalizeData__(tip) total = self.__NormalizeData__(total) if self.__CheckFloat__(subtotal) and self.__CheckFloat__(total): d_subtotal = Decimal(subtotal) d_total = Decimal(total) if self.__CheckFloat__(tip): d_tip = Decimal(tip) if d_total == (d_subtotal + d_tip): return ConfidenceLevel.Confident, self.__FormatData__( subtotal), self.__FormatData__( tip), self.__FormatData__(total) if self.__MissDot__(total): d_temp = d_subtotal + d_tip fixed = str(d_temp) if StrUtil.Similarity( fixed, total) == 1 and len(fixed) - len(total) == 1: if (d_temp == (d_subtotal + d_tip)): return ConfidenceLevel.Confident, self.__FormatData__( subtotal), self.__FormatData__( tip), self.__FormatData__(fixed) if self.__MissDot__(tip) or self.__MissInteger__(tip): d_temp = d_total - d_subtotal fixed = str(d_temp) if StrUtil.Similarity( fixed, tip) == 1 and len(fixed) - len(tip) == 1: if (d_total == (d_subtotal + d_temp)): return ConfidenceLevel.Confident, self.__FormatData__( subtotal), self.__FormatData__( fixed), self.__FormatData__(total) elif len(tip) == 0: if d_subtotal == d_total: return ConfidenceLevel.Confident, self.__FormatData__( subtotal), tip, self.__FormatData__(total) elif self.__MissDot__(total) and d_total > d_subtotal: totallen = len(total) if totallen > 1: pos = 1 while d_subtotal * 2 < d_total: d_total = Decimal(total[0:totallen - pos] + '.' + total[totallen - pos:]) pos += 1 if d_subtotal < d_total: return ConfidenceLevel.Fixed, self.__FormatData__( subtotal), tip, self.__FormatData__( str(d_total)) else: d_temp = d_total - d_subtotal fixed = str(d_temp) if StrUtil.Similarity(fixed, tip) == 1: return ConfidenceLevel.Confident, self.__FormatData__( subtotal), self.__FormatData__( fixed), self.__FormatData__(total) return ConfidenceLevel.Bad, self.__FormatData__( subtotal), self.__FormatData__(tip), self.__FormatData__(total)