def iterhashantijoin(left, right, lkey, rkey): lit = iter(left) rit = iter(right) lflds = lit.next() rflds = rit.next() yield tuple(lflds) # determine indices of the key fields in left and right tables lkind = asindices(lflds, lkey) rkind = asindices(rflds, rkey) # construct functions to extract key values from both tables lgetk = operator.itemgetter(*lkind) rgetk = operator.itemgetter(*rkind) rkeys = set() for rrow in rit: rk = rgetk(rrow) rkeys.add(rk) for lrow in lit: lk = lgetk(lrow) if lk not in rkeys: yield tuple(lrow)
def tupletrees(table, facet, start='start', stop='stop', value=None): """ Construct faceted interval trees for the given table, where each node in the tree is a row of the table. """ try: import bx.intervals except ImportError as e: raise UnsatisfiedDependency(e, dep_message) it = iter(table) fields = it.next() assert start in fields, 'start field not recognised' assert stop in fields, 'stop field not recognised' getstart = itemgetter(fields.index(start)) getstop = itemgetter(fields.index(stop)) if value is None: getvalue = tuple else: valueindices = asindices(fields, value) assert len(valueindices) > 0, 'invalid value field specification' getvalue = itemgetter(*valueindices) keyindices = asindices(fields, facet) assert len(keyindices) > 0, 'invalid key' getkey = itemgetter(*keyindices) trees = dict() for row in it: k = getkey(row) if k not in trees: trees[k] = bx.intervals.intersection.IntervalTree() trees[k].add(getstart(row), getstop(row), getvalue(row)) return trees
def __init__(self, default_connections, keyed_connections, fields, key): super(DuplicatesConnection, self).__init__(default_connections, keyed_connections, fields) # convert field selection into field indices indices = asindices(fields, key) # now use field indices to construct a _getkey function # N.B., this may raise an exception on short rows, depending on # the field selection self.getkey = itemgetter(*indices) # initial state self.previous = None self.previous_is_duplicate = False # convert field selection into field indices indices = asindices(fields, key) # now use field indices to construct a _getkey function # N.B., this may raise an exception on short rows, depending on # the field selection self.getkey = itemgetter(*indices) # initial state self.previous = None self.previous_is_duplicate = False
def iterhashrightjoin(left, right, lkey, rkey, missing, llookup, lprefix, rprefix): lit = iter(left) rit = iter(right) lflds = lit.next() rflds = rit.next() # determine indices of the key fields in left and right tables lkind = asindices(lflds, lkey) rkind = asindices(rflds, rkey) # construct functions to extract key values from left table rgetk = operator.itemgetter(*rkind) # determine indices of non-key fields in the right table # (in the output, we only include key fields from the left table - we # don't want to duplicate fields) rvind = [i for i in range(len(rflds)) if i not in rkind] rgetv = rowgetter(*rvind) # determine the output fields if lprefix is None: outflds = list(lflds) else: outflds = [(str(lprefix) + str(f)) for f in lflds] if rprefix is None: outflds.extend(rgetv(rflds)) else: outflds.extend([(str(rprefix) + str(f)) for f in rgetv(rflds)]) yield tuple(outflds) # define a function to join rows def joinrows(_rrow, _lrows): for lrow in _lrows: # start with the left row _outrow = list(lrow) # extend with non-key values from the right row _outrow.extend(rgetv(_rrow)) yield tuple(_outrow) for rrow in rit: k = rgetk(rrow) if k in llookup: lrows = llookup[k] for outrow in joinrows(rrow, lrows): yield outrow else: # start with missing values in place of the left row outrow = [missing] * len(lflds) # set key values for li, ri in zip(lkind, rkind): outrow[li] = rrow[ri] # extend with non-key values from the right row outrow.extend(rgetv(rrow)) yield tuple(outrow)
def iterantijoin(left, right, lkey, rkey): lit = iter(left) rit = iter(right) lflds = lit.next() rflds = rit.next() yield tuple(lflds) # determine indices of the key fields in left and right tables lkind = asindices(lflds, lkey) rkind = asindices(rflds, rkey) # construct functions to extract key values from both tables lgetk = operator.itemgetter(*lkind) rgetk = operator.itemgetter(*rkind) # construct group iterators for both tables lgit = itertools.groupby(lit, key=lgetk) rgit = itertools.groupby(rit, key=rgetk) # loop until *either* of the iterators is exhausted lkval, rkval = None, None # initialise here to handle empty tables try: # pick off initial row groups lkval, lrowgrp = lgit.next() rkval, _ = rgit.next() while True: if lkval < rkval: for row in lrowgrp: yield tuple(row) # advance left lkval, lrowgrp = lgit.next() elif lkval > rkval: # advance right rkval, _ = rgit.next() else: # advance both lkval, lrowgrp = lgit.next() rkval, _ = rgit.next() except StopIteration: pass # any left over? if lkval > rkval: # yield anything that got left hanging for row in lrowgrp: yield tuple(row) # and the rest... for lkval, lrowgrp in lgit: for row in lrowgrp: yield tuple(row)
def iterhashlookupjoin(left, right, lkey, rkey, missing, lprefix, rprefix): lit = iter(left) lflds = lit.next() rflds, rit = iterpeek(right) # need the whole lot to pass to lookup from petl.util import lookupone rlookup = lookupone(rit, rkey, strict=False) # determine indices of the key fields in left and right tables lkind = asindices(lflds, lkey) rkind = asindices(rflds, rkey) # construct functions to extract key values from left table lgetk = operator.itemgetter(*lkind) # determine indices of non-key fields in the right table # (in the output, we only include key fields from the left table - we # don't want to duplicate fields) rvind = [i for i in range(len(rflds)) if i not in rkind] rgetv = rowgetter(*rvind) # determine the output fields if lprefix is None: outflds = list(lflds) else: outflds = [(str(lprefix) + str(f)) for f in lflds] if rprefix is None: outflds.extend(rgetv(rflds)) else: outflds.extend([(str(rprefix) + str(f)) for f in rgetv(rflds)]) yield tuple(outflds) # define a function to join rows def joinrows(_lrow, _rrow): # start with the left row _outrow = list(_lrow) # extend with non-key values from the right row _outrow.extend(rgetv(_rrow)) return tuple(_outrow) for lrow in lit: k = lgetk(lrow) if k in rlookup: rrow = rlookup[k] yield joinrows(lrow, rrow) else: outrow = list(lrow) # start with the left row # extend with missing values in place of the right row outrow.extend([missing] * len(rvind)) yield tuple(outrow)
def itersimplemultirangeaggregate(table, keys, widths, aggregation, value, mins, maxs): if aggregation == len: aggregation = lambda grp: sum(1 for _ in grp) # count length of iterable yield ('key', 'value') # we want a recursive grouping algorithm so we could cope with any number of # key fields it = iter(table) fields = it.next() # wrap rows it = hybridrows(fields, it) # determine value function if value is None: getval = lambda v: v # identity function - i.e., whole row else: if callable(value): getval = value else: vindices = asindices(fields, value) getval = operator.itemgetter(*vindices) for bindef, vals in _recursive_bin(it, 0, [], fields, keys, widths, getval, mins, maxs): yield bindef, aggregation(vals)
def itersearch(table, pattern, field, flags, complement): prog = re.compile(pattern, flags) it = iter(table) fields = [str(f) for f in it.next()] yield tuple(fields) if field is None: # search whole row test = lambda row: any(prog.search(str(v)) for v in row) elif isinstance(field, basestring): # search single field index = fields.index(field) test = lambda row: prog.search(str(row[index])) else: # list or tuple or ... # search selection of fields indices = asindices(fields, field) getvals = operator.itemgetter(*indices) test = lambda row: any(prog.search(str(v)) for v in getvals(row)) # complement==False, return rows that match if complement == False: for row in it: if test(row): yield tuple(row) # complement==True, return rows that do not match else: for row in it: if not test(row): yield tuple(row)
def iterunique(source, key): # assume source is sorted # first need to sort the data it = iter(source) flds = it.next() yield tuple(flds) # convert field selection into field indices if key is None: indices = range(len(flds)) else: indices = asindices(flds, key) # now use field indices to construct a _getkey function # N.B., this may raise an exception on short rows, depending on # the field selection getkey = operator.itemgetter(*indices) prev = it.next() prev_key = getkey(prev) prev_comp_ne = True for curr in it: curr_key = getkey(curr) curr_comp_ne = (curr_key != prev_key) if prev_comp_ne and curr_comp_ne: yield tuple(prev) prev = curr prev_key = curr_key prev_comp_ne = curr_comp_ne # last one? if prev_comp_ne: yield prev
def iterfieldselect(source, field, where, complement): it = iter(source) flds = it.next() yield tuple(flds) indices = asindices(flds, field) getv = operator.itemgetter(*indices) for row in it: v = getv(row) if where(v) != complement: # XOR yield tuple(row)
def iterconflicts(source, key, missing, exclude, include): # normalise arguments if isinstance(exclude, basestring): exclude = (exclude,) if isinstance(include, basestring): include = (include,) # exclude overrides include if include and exclude: include = None it = iter(source) flds = it.next() yield tuple(flds) # convert field selection into field indices indices = asindices(flds, key) # now use field indices to construct a _getkey function # N.B., this may raise an exception on short rows, depending on # the field selection getkey = operator.itemgetter(*indices) previous = None previous_yielded = False for row in it: if previous is None: previous = row else: kprev = getkey(previous) kcurr = getkey(row) if kprev == kcurr: # is there a conflict? conflict = False for x, y, f in zip(previous, row, flds): if (exclude and f not in exclude) \ or (include and f in include) \ or (not exclude and not include): if missing not in (x, y) and x != y: conflict = True break if conflict: if not previous_yielded: yield tuple(previous) previous_yielded = True yield tuple(row) else: # reset previous_yielded = False previous = row
def itermergesort(sources, key, header, missing, reverse): # first need to standardise headers of all input tables # borrow this from itercat - TODO remove code smells its = [iter(t) for t in sources] source_flds_lists = [it.next() for it in its] if header is None: # determine output fields by gathering all fields found in the sources outflds = list() for flds in source_flds_lists: for f in flds: if f not in outflds: # add any new fields as we find them outflds.append(f) else: # predetermined output fields outflds = header yield tuple(outflds) def _standardisedata(it, flds, outflds): # now construct and yield the data rows for row in it: try: # should be quickest to do this way yield tuple(row[flds.index(f)] if f in flds else missing for f in outflds) except IndexError: # handle short rows outrow = [missing] * len(outflds) for i, f in enumerate(flds): try: outrow[outflds.index(f)] = row[i] except IndexError: pass # be relaxed about short rows yield tuple(outrow) # wrap all iterators to standardise fields sits = [_standardisedata(it, flds, outflds) for flds, it in zip(source_flds_lists, its)] # now determine key function getkey = None if key is not None: # convert field selection into field indices indices = asindices(outflds, key) # now use field indices to construct a _getkey function # N.B., this will probably raise an exception on short rows getkey = operator.itemgetter(*indices) # OK, do the merge sort for row in shortlistmergesorted(getkey, reverse, *sits): yield row
def iterfilldown(table, fillfields, missing): it = iter(table) fields = it.next() yield tuple(fields) if not fillfields: # fill down all fields fillfields = fields fillindices = asindices(fields, fillfields) fill = list(it.next()) # fill values yield tuple(fill) for row in it: outrow = list(row) for idx in fillindices: if row[idx] == missing: outrow[idx] = fill[idx] # fill down else: fill[idx] = row[idx] # new fill value yield tuple(outrow)
def __iter__(self): it = iter(self.table) flds = it.next() # convert field selection into field indices if self.key is None: indices = range(len(flds)) else: indices = asindices(flds, self.key) # now use field indices to construct a _getkey function # N.B., this may raise an exception on short rows, depending on # the field selection getkey = operator.itemgetter(*indices) if self.count: flds = tuple(flds) + (self.count,) yield flds previous = None n_dup = 1 for row in it: if previous is None: previous = row else: kprev = getkey(previous) kcurr = getkey(row) if kprev == kcurr: n_dup += 1 else: yield tuple(previous) + (n_dup,) n_dup = 1 previous = row # deal with last row yield tuple(previous) + (n_dup,) else: yield flds previous_keys = None for row in it: keys = getkey(row) if keys != previous_keys: yield tuple(row) previous_keys = keys
def __init__(self, default_connections, keyed_connections, fields, key, reverse, buffersize): super(SortConnection, self).__init__(default_connections, keyed_connections, fields) self.getkey = None if key is not None: # convert field selection into field indices indices = asindices(fields, key) # now use field indices to construct a _getkey function # N.B., this will probably raise an exception on short rows self.getkey = itemgetter(*indices) self.reverse = reverse if buffersize is None: self.buffersize = petl.transform.sorts.defaultbuffersize else: self.buffersize = buffersize self.cache = list() self.chunkfiles = list()
def __iter__(self): it = iter(self.table) # determine output fields fields = list(it.next()) newfields = [f for f in fields if f != self.field] newfields.insert(self.index, self.field) yield tuple(newfields) # define a function to transform each row in the source data # according to the field selection indices = asindices(fields, newfields) transform = rowgetter(*indices) # construct the transformed data for row in it: try: yield transform(row) except IndexError: # row is short, let's be kind and fill in any missing fields yield tuple(row[i] if i < len(row) else self.missing for i in indices)
def itersearch(table, pattern, field, flags): prog = re.compile(pattern, flags) it = iter(table) fields = [str(f) for f in it.next()] yield tuple(fields) if field is None: # search whole row test = lambda row: any(prog.search(str(v)) for v in row) elif isinstance(field, basestring): # search single field index = fields.index(field) test = lambda row: prog.search(str(row[index])) else: # list or tuple or ... # search selection of fields indices = asindices(fields, field) getvals = operator.itemgetter(*indices) test = lambda row: any(prog.search(str(v)) for v in getvals(row)) for row in it: if test(row): yield tuple(row)
def itercut(source, spec, missing=None): it = iter(source) spec = tuple(spec) # make sure no-one can change midstream # convert field selection into field indices flds = it.next() indices = asindices(flds, spec) # define a function to transform each row in the source data # according to the field selection transform = rowgetter(*indices) # yield the transformed field names yield transform(flds) # construct the transformed data for row in it: try: yield transform(row) except IndexError: # row is short, let's be kind and fill in any missing fields yield tuple(row[i] if i < len(row) else missing for i in indices)
def itercutout(source, spec, missing=None): it = iter(source) spec = tuple(spec) # make sure no-one can change midstream # convert field selection into field indices flds = it.next() indicesout = asindices(flds, spec) indices = [i for i in range(len(flds)) if i not in indicesout] # define a function to transform each row in the source data # according to the field selection transform = rowgetter(*indices) # yield the transformed field names yield transform(flds) # construct the transformed data for row in it: try: yield transform(row) except IndexError: # row is short, let's be kind and fill in any missing fields yield tuple(row[i] if i < len(row) else missing for i in indices)
def iterduplicates(source, key): # assume source is sorted # first need to sort the data it = iter(source) flds = it.next() yield tuple(flds) # convert field selection into field indices if key is None: indices = range(len(flds)) else: indices = asindices(flds, key) # now use field indices to construct a _getkey function # N.B., this may raise an exception on short rows, depending on # the field selection getkey = operator.itemgetter(*indices) previous = None previous_yielded = False for row in it: if previous is None: previous = row else: kprev = getkey(previous) kcurr = getkey(row) if kprev == kcurr: if not previous_yielded: yield tuple(previous) previous_yielded = True yield tuple(row) else: # reset previous_yielded = False previous = row
def __init__(self, default_connections, keyed_connections, fields, discriminator): super(PartitionConnection, self).__init__(default_connections, keyed_connections, fields) if callable(discriminator): self.discriminator = discriminator else: # assume field or fields self.discriminator = itemgetter(*asindices(fields, discriminator))
def _iternocache(self, source, key, reverse): debug('iterate without cache') self._clearcache() it = iter(source) flds = it.next() yield tuple(flds) if key is not None: # convert field selection into field indices indices = asindices(flds, key) else: indices = range(len(flds)) # now use field indices to construct a _getkey function # N.B., this will probably raise an exception on short rows getkey = sortable_itemgetter(*indices) # initialise the first chunk rows = list(itertools.islice(it, 0, self.buffersize)) rows.sort(key=getkey, reverse=reverse) # have we exhausted the source iterator? if self.buffersize is None or len(rows) < self.buffersize: if self.cache: debug('caching mem') self._fldcache = flds self._memcache = rows self._getkey = getkey # actually not needed to iterate from memcache for row in rows: yield tuple(row) else: chunkfiles = [] while rows: # dump the chunk f = NamedTemporaryFile(dir=self.tempdir) for row in rows: pickle.dump(row, f, protocol=-1) f.flush() # N.B., do not close the file! Closing will delete # the file, and we might want to keep it around # if it can be cached. We'll let garbage collection # deal with this, i.e., when no references to the # chunk files exist any more, garbage collection # should be an implicit close, which will cause file # deletion. chunkfiles.append(f) # grab the next chunk rows = list(itertools.islice(it, 0, self.buffersize)) rows.sort(key=getkey, reverse=reverse) if self.cache: debug('caching files %r', chunkfiles) self._fldcache = flds self._filecache = chunkfiles self._getkey = getkey chunkiters = [iterchunk(f) for f in chunkfiles] for row in _mergesorted(getkey, reverse, *chunkiters): yield tuple(row)
def iterlookupjoin(left, right, lkey, rkey, missing=None, lprefix=None, rprefix=None): lit = iter(left) rit = iter(right) lflds = lit.next() rflds = rit.next() # determine indices of the key fields in left and right tables lkind = asindices(lflds, lkey) rkind = asindices(rflds, rkey) # construct functions to extract key values from both tables lgetk = operator.itemgetter(*lkind) rgetk = operator.itemgetter(*rkind) # determine indices of non-key fields in the right table # (in the output, we only include key fields from the left table - we # don't want to duplicate fields) rvind = [i for i in range(len(rflds)) if i not in rkind] rgetv = rowgetter(*rvind) # determine the output fields if lprefix is None: outflds = list(lflds) else: outflds = [(str(lprefix) + str(f)) for f in lflds] if rprefix is None: outflds.extend(rgetv(rflds)) else: outflds.extend([(str(rprefix) + str(f)) for f in rgetv(rflds)]) yield tuple(outflds) # define a function to join two groups of rows def joinrows(lrowgrp, rrowgrp): if rrowgrp is None: for lrow in lrowgrp: outrow = list(lrow) # start with the left row # extend with missing values in place of the right row outrow.extend([missing] * len(rvind)) yield tuple(outrow) else: rrow = iter(rrowgrp).next() # pick first arbitrarily for lrow in lrowgrp: # start with the left row outrow = list(lrow) # extend with non-key values from the right row outrow.extend(rgetv(rrow)) yield tuple(outrow) # construct group iterators for both tables lgit = itertools.groupby(lit, key=lgetk) rgit = itertools.groupby(rit, key=rgetk) # loop until *either* of the iterators is exhausted lkval, rkval = None, None # initialise here to handle empty tables try: # pick off initial row groups lkval, lrowgrp = lgit.next() rkval, rrowgrp = rgit.next() while True: if lkval < rkval: for row in joinrows(lrowgrp, None): yield tuple(row) # advance left lkval, lrowgrp = lgit.next() elif lkval > rkval: # advance right rkval, rrowgrp = rgit.next() else: for row in joinrows(lrowgrp, rrowgrp): yield tuple(row) # advance both lkval, lrowgrp = lgit.next() rkval, rrowgrp = rgit.next() except StopIteration: pass # make sure any left rows remaining are yielded if lkval > rkval: # yield anything that got left hanging for row in joinrows(lrowgrp, None): yield tuple(row) # yield the rest for lkval, lrowgrp in lgit: for row in joinrows(lrowgrp, None): yield tuple(row)
def iterintervaljoin(left, right, lstart, lstop, rstart, rstop, lfacet, rfacet, proximity, missing, lprefix, rprefix, leftouter, anti=False): # create iterators and obtain fields lit = iter(left) lfields = lit.next() rit = iter(right) rfields = rit.next() # check fields via petl.util.asindices (raises FieldSelectionError if spec # is not valid) asindices(lfields, lstart) asindices(lfields, lstop) if lfacet is not None: asindices(lfields, lfacet) asindices(rfields, rstart) asindices(rfields, rstop) if rfacet is not None: asindices(rfields, rfacet) # determine output fields if lprefix is None: outfields = list(lfields) if not anti: outfields.extend(rfields) else: outfields = list(lprefix + f for f in lfields) if not anti: outfields.extend(rprefix + f for f in rfields) yield tuple(outfields) # create getters for start and stop positions getlstart = itemgetter(lfields.index(lstart)) getlstop = itemgetter(lfields.index(lstop)) if rfacet is None: # build interval lookup for right table lookup = intervallookup(right, rstart, rstop, proximity=proximity) find = lookup.find # main loop for lrow in lit: start = getlstart(lrow) stop = getlstop(lrow) rrows = find(start, stop) if rrows: if not anti: for rrow in rrows: outrow = list(lrow) outrow.extend(rrow) yield tuple(outrow) elif leftouter: outrow = list(lrow) if not anti: outrow.extend([missing] * len(rfields)) yield tuple(outrow) else: # build interval lookup for right table lookup = facetintervallookup(right, facet=rfacet, start=rstart, stop=rstop, proximity=proximity) find = dict() for f in lookup: find[f] = lookup[f].find # getter for facet key values in left table getlkey = itemgetter(*asindices(lfields, lfacet)) # main loop for lrow in lit: lkey = getlkey(lrow) start = getlstart(lrow) stop = getlstop(lrow) try: rrows = find[lkey](start, stop) except KeyError: rrows = None except AttributeError: rrows = None if rrows: if not anti: for rrow in rrows: outrow = list(lrow) outrow.extend(rrow) yield tuple(outrow) elif leftouter: outrow = list(lrow) if not anti: outrow.extend([missing] * len(rfields)) yield tuple(outrow)
def iterintervalsubtract(left, right, lstart, lstop, rstart, rstop, lfacet, rfacet, proximity): # create iterators and obtain fields lit = iter(left) lfields = lit.next() assert lstart in lfields, 'field not found: %s' % lstart assert lstop in lfields, 'field not found: %s' % lstop if lfacet is not None: assert lfacet in lfields, 'field not found: %s' % lfacet rit = iter(right) rfields = rit.next() assert rstart in rfields, 'field not found: %s' % rstart assert rstop in rfields, 'field not found: %s' % rstop if rfacet is not None: assert rfacet in rfields, 'field not found: %s' % rfacet # determine output fields outfields = list(lfields) # outfields.extend(rfields) yield tuple(outfields) # create getters for start and stop positions lstartidx = lfields.index(lstart) lstopidx = lfields.index(lstop) getlcoords = itemgetter(lstartidx, lstopidx) getrcoords = itemgetter(rfields.index(rstart), rfields.index(rstop)) if rfacet is None: # build interval lookup for right table lookup = intervallookup(right, rstart, rstop, proximity=proximity) find = lookup.find # main loop for lrow in lit: start, stop = getlcoords(lrow) rrows = find(start, stop) if not rrows: yield tuple(lrow) else: rivs = sorted([getrcoords(rrow) for rrow in rrows], key=itemgetter(0)) # sort by start for x, y in _subtract(start, stop, rivs): out = list(lrow) out[lstartidx] = x out[lstopidx] = y yield tuple(out) else: # build interval lookup for right table lookup = facetintervallookup(right, facet=rfacet, start=rstart, stop=rstop, proximity=proximity) # getter for facet key values in left table getlkey = itemgetter(*asindices(lfields, lfacet)) # main loop for lrow in lit: lkey = getlkey(lrow) start, stop = getlcoords(lrow) try: rrows = lookup[lkey].find(start, stop) except KeyError: rrows = None except AttributeError: rrows = None if not rrows: yield tuple(lrow) else: rivs = sorted([getrcoords(rrow) for rrow in rrows], key=itemgetter(0)) # sort by start for x, y in _subtract(start, stop, rivs): out = list(lrow) out[lstartidx] = x out[lstopidx] = y yield tuple(out)
def iterintervalleftjoin(left, right, lstart, lstop, rstart, rstop, lfacet, rfacet, proximity, missing): # create iterators and obtain fields lit = iter(left) lfields = lit.next() assert lstart in lfields, 'field not found: %s' % lstart assert lstop in lfields, 'field not found: %s' % lstop if lfacet is not None: assert lfacet in lfields, 'field not found: %s' % lfacet rit = iter(right) rfields = rit.next() assert rstart in rfields, 'field not found: %s' % rstart assert rstop in rfields, 'field not found: %s' % rstop if rfacet is not None: assert rfacet in rfields, 'field not found: %s' % rfacet # determine output fields outfields = list(lfields) outfields.extend(rfields) yield tuple(outfields) # create getters for start and stop positions getlstart = itemgetter(lfields.index(lstart)) getlstop = itemgetter(lfields.index(lstop)) if rfacet is None: # build interval lookup for right table lookup = intervallookup(right, rstart, rstop, proximity=proximity) find = lookup.find # main loop for lrow in lit: start = getlstart(lrow) stop = getlstop(lrow) rrows = find(start, stop) if rrows: for rrow in rrows: outrow = list(lrow) outrow.extend(rrow) yield tuple(outrow) else: outrow = list(lrow) outrow.extend([missing] * len(rfields)) yield tuple(outrow) else: # build interval lookup for right table lookup = facetintervallookup(right, facet=rfacet, start=rstart, stop=rstop, proximity=proximity) find = dict() for f in lookup: find[f] = lookup[f].find # getter for facet key values in left table getlkey = itemgetter(*asindices(lfields, lfacet)) # main loop for lrow in lit: lkey = getlkey(lrow) start = getlstart(lrow) stop = getlstop(lrow) try: rrows = find[lkey](start, stop) except KeyError: rrows = None except AttributeError: rrows = None if rrows: for rrow in rrows: outrow = list(lrow) outrow.extend(rrow) yield tuple(outrow) else: outrow = list(lrow) outrow.extend([missing] * len(rfields)) yield tuple(outrow)