def disambiguate(self, clusters): mylist = [] for cluster in clusters: buffer = [] for pos in range(0,len(cluster),1): if cluster[pos].is_mergeable: buffer.append( cluster[pos:] ) break else: buffer.append( ambiguousCluster(cluster[pos], {} ) ) buffer.reverse() mylist.extend( buffer ) return mylist
def split_string(self, s): items = re.findall('(?:%s|(?: *-*[,0-9]+)+)'.decode('utf8') % self.category_hinter.re, s) poppers = [] # # XXX Untested self.category_hinter.line_reset() for pos in range(0,len(items),1): item = items[pos] if self.category_hinter.classify( item ): poppers.append(pos) continue info = {'category':self.category_hinter.current,'nontotal':self.category_hinter.nontotal,'line':self.line_number} item = item.replace(',','') items[pos] = ambiguousCluster( item, info ) poppers.reverse() for pos in poppers: items.pop(pos) return items
def __init__(self, s, category_hinter, line_number=None): """ Attempt to disambiguate assuming a cross-total exists This resolves if three numbers exist in a cluster such that the third is the difference of the first two. Resolution either works or it doesn't; the value returned is an ambiguousCluster with a single value, or an empty string. """ lineList.__init__(self, s, category_hinter, line_number=line_number) for pos in range(len(self)-1,-1,-1): cluster = self[pos] income = None if len(cluster) > 2: income = self.cross_total( cluster ) if income == None and len(cluster) > 1: income = self.paired_numbers( cluster ) if income == None: self.pop(pos) continue self[pos] = ambiguousCluster( income, {} ) self.max_only()