def test_lookupone(): t1 = (('foo', 'bar'), ('a', 1), ('b', 2), ('b', 3)) # lookup one column on another under strict mode try: lookupone(t1, 'foo', 'bar', strict=True) except DuplicateKeyError: pass # expected else: assert False, 'expected error' # lookup one column on another under, not strict actual = lookupone(t1, 'foo', 'bar', strict=False) expect = {'a': 1, 'b': 2} # first value wins eq_(expect, actual) # test default value - tuple of whole row actual = lookupone(t1, 'foo', strict=False) # no value selector expect = {'a': ('a', 1), 'b': ('b', 2)} # first wins eq_(expect, actual) t2 = (('foo', 'bar', 'baz'), ('a', 1, True), ('b', 2, False), ('b', 3, True), ('b', 3, False)) # test value selection actual = lookupone(t2, 'foo', ('bar', 'baz'), strict=False) expect = {'a': (1, True), 'b': (2, False)} eq_(expect, actual) # test compound key actual = lookupone(t2, ('foo', 'bar'), 'baz', strict=False) expect = {('a', 1): True, ('b', 2): False, ('b', 3): True} # first wins eq_(expect, actual)
def iterhashlookupjoin(left, right, lkey, rkey, missing, lprefix, rprefix): lit = iter(left) lhdr = next(lit) rhdr, rit = iterpeek(right) # need the whole lot to pass to lookup rlookup = lookupone(rit, rkey, strict=False) # determine indices of the key fields in left and right tables lkind = asindices(lhdr, lkey) rkind = asindices(rhdr, rkey) # construct functions to extract key values from left table lgetk = operator.itemgetter(*lkind) # determine indices of non-key fields in the right table # (in the output, we only include key fields from the left table - we # don't want to duplicate fields) rvind = [i for i in range(len(rhdr)) if i not in rkind] rgetv = rowgetter(*rvind) # determine the output fields if lprefix is None: outhdr = list(lhdr) else: outhdr = [(str(lprefix) + str(f)) for f in lhdr] if rprefix is None: outhdr.extend(rgetv(rhdr)) else: outhdr.extend([(str(rprefix) + str(f)) for f in rgetv(rhdr)]) yield tuple(outhdr) # define a function to join rows def joinrows(_lrow, _rrow): # start with the left row _outrow = list(_lrow) # extend with non-key values from the right row _outrow.extend(rgetv(_rrow)) return tuple(_outrow) for lrow in lit: k = lgetk(lrow) if k in rlookup: rrow = rlookup[k] yield joinrows(lrow, rrow) else: outrow = list(lrow) # start with the left row # extend with missing values in place of the right row outrow.extend([missing] * len(rvind)) yield tuple(outrow)
def iterhashlookupjoin(left, right, lkey, rkey, missing, lprefix, rprefix): lit = iter(left) lhdr = next(lit) rhdr, rit = iterpeek(right) # need the whole lot to pass to lookup rlookup = lookupone(rit, rkey, strict=False) # determine indices of the key fields in left and right tables lkind = asindices(lhdr, lkey) rkind = asindices(rhdr, rkey) # construct functions to extract key values from left table lgetk = operator.itemgetter(*lkind) # determine indices of non-key fields in the right table # (in the output, we only include key fields from the left table - we # don't want to duplicate fields) rvind = [i for i in range(len(rhdr)) if i not in rkind] rgetv = rowgetter(*rvind) # determine the output fields if lprefix is None: outhdr = list(lhdr) else: outhdr = [(text_type(lprefix) + text_type(f)) for f in lhdr] if rprefix is None: outhdr.extend(rgetv(rhdr)) else: outhdr.extend([(text_type(rprefix) + text_type(f)) for f in rgetv(rhdr)]) yield tuple(outhdr) # define a function to join rows def joinrows(_lrow, _rrow): # start with the left row _outrow = list(_lrow) # extend with non-key values from the right row _outrow.extend(rgetv(_rrow)) return tuple(_outrow) for lrow in lit: k = lgetk(lrow) if k in rlookup: rrow = rlookup[k] yield joinrows(lrow, rrow) else: outrow = list(lrow) # start with the left row # extend with missing values in place of the right row outrow.extend([missing] * len(rvind)) yield tuple(outrow)