Beispiel #1
0
def join(data, strategy, source_left, source_right, destination, key_left,
         key_right, prefix_left, prefix_right, presorted, buffersize, tempdir,
         cache, missing):
    """Perform a join on two data tables."""
    source_left = data.get(source_left)
    source_right = data.get(source_right)

    kwargs = {}
    if key_left == key_right:
        kwargs['key'] = key_left
    else:
        kwargs['lkey'] = key_left
        kwargs['rkey'] = key_right

    if presorted is True:
        kwargs['presorted'] = presorted

    if buffersize is not None:
        kwargs['buffersize'] = buffersize

    if tempdir:
        kwargs['tempdir'] = tempdir

    if 'anti' not in strategy:
        if prefix_left is not None:
            kwargs['lprefix'] = prefix_left
        if prefix_right is not None:
            kwargs['rprefix'] = prefix_right

    if strategy not in ['join', 'antijoin', 'hashjoin', 'hashantijoin']:
        kwargs['missing'] = missing

    if strategy == 'join':
        o = petl.join(source_left, source_right, **kwargs)
    elif strategy == 'leftjoin':
        o = petl.leftjoin(source_left, source_right, **kwargs)
    elif strategy == 'lookupjoin':
        o = petl.lookupjoin(source_left, source_right, **kwargs)
    elif strategy == 'rightjoin':
        o = petl.rightjoin(source_left, source_right, **kwargs)
    elif strategy == 'outerjoin':
        o = petl.outerjoin(source_left, source_right, **kwargs)
    elif strategy == 'antijoin':
        o = petl.antijoin(source_left, source_right, **kwargs)
    elif strategy == 'hashjoin':
        o = petl.antijoin(source_left, source_right, **kwargs)
    elif strategy == 'hashleftjoin':
        o = petl.hashleftjoin(source_left, source_right, **kwargs)
    elif strategy == 'hashlookupjoin':
        o = petl.hashlookupjoin(source_left, source_right, **kwargs)
    elif strategy == 'hashrightjoin':
        o = petl.hashrightjoin(source_left, source_right, **kwargs)

    data.set(destination, o)
Beispiel #2
0
table1 = (('id', 'color', 'cost'), 
          (1, 'blue', 12), 
          (2, 'red', 8), 
          (3, 'purple', 4))
table2 = (('id', 'shape', 'size'), 
          (1, 'circle', 'big'), 
          (1, 'circle', 'small'), 
          (2, 'square', 'tiny'), 
          (2, 'square', 'big'), 
          (3, 'ellipse', 'small'), 
          (3, 'ellipse', 'tiny'))

from petl import lookupjoin, look
look(table1)
look(table2)
table3 = lookupjoin(table1, table2, key='id')
look(table3)


# filldown

table1 = (('foo', 'bar', 'baz'),
          (1, 'a', None),
          (1, None, .23),
          (1, 'b', None),
          (2, None, None),
          (2, None, .56),
          (2, 'c', None),
          (None, 'c', .72))
from petl import filldown, look
look(table1)
Beispiel #3
0
table1 = (('id', 'color', 'cost'), 
          (1, 'blue', 12), 
          (2, 'red', 8), 
          (3, 'purple', 4))
table2 = (('id', 'shape', 'size'), 
          (1, 'circle', 'big'), 
          (1, 'circle', 'small'), 
          (2, 'square', 'tiny'), 
          (2, 'square', 'big'), 
          (3, 'ellipse', 'small'), 
          (3, 'ellipse', 'tiny'))

from petl import lookupjoin, look
look(table1)
look(table2)
table3 = lookupjoin(table1, table2, key='id')
look(table3)


# filldown

table1 = (('foo', 'bar', 'baz'),
          (1, 'a', None),
          (1, None, .23),
          (1, 'b', None),
          (2, None, None),
          (2, None, .56),
          (2, 'c', None),
          (None, 'c', .72))
from petl import filldown, look
look(table1)
Beispiel #4
0
table1 = [['id', 'colour'], [0, 'black'], [1, 'blue'], [2, 'red'],
          [4, 'yellow'], [5, 'white']]
table2 = [['id', 'shape'], [1, 'circle'], [3, 'square']]
table3 = etl.antijoin(table1, table2, key='id')
table3

# lookupjoin()
##############

import petl as etl
table1 = [['id', 'color', 'cost'], [1, 'blue', 12], [2, 'red', 8],
          [3, 'purple', 4]]
table2 = [['id', 'shape', 'size'], [1, 'circle', 'big'],
          [1, 'circle', 'small'], [2, 'square', 'tiny'], [2, 'square', 'big'],
          [3, 'ellipse', 'small'], [3, 'ellipse', 'tiny']]
table3 = etl.lookupjoin(table1, table2, key='id')
table3

# unjoin()
##########

import petl as etl
# join key is present in the table
table1 = (('foo', 'bar', 'baz'), ('A', 1, 'apple'), ('B', 1, 'apple'),
          ('C', 2, 'orange'))
table2, table3 = etl.unjoin(table1, 'baz', key='bar')
table2
table3
# an integer join key can also be reconstructed
table4 = (('foo', 'bar'), ('A', 'apple'), ('B', 'apple'), ('C', 'orange'))
table5, table6 = etl.unjoin(table4, 'bar')
Beispiel #5
0
# lookupjoin()
##############

import petl as etl
table1 = [['id', 'color', 'cost'], 
          [1, 'blue', 12], 
          [2, 'red', 8], 
          [3, 'purple', 4]]
table2 = [['id', 'shape', 'size'], 
          [1, 'circle', 'big'], 
          [1, 'circle', 'small'], 
          [2, 'square', 'tiny'], 
          [2, 'square', 'big'], 
          [3, 'ellipse', 'small'], 
          [3, 'ellipse', 'tiny']]
table3 = etl.lookupjoin(table1, table2, key='id')
table3


# unjoin()
##########

import petl as etl
# join key is present in the table
table1 = (('foo', 'bar', 'baz'),
          ('A', 1, 'apple'),
          ('B', 1, 'apple'),
          ('C', 2, 'orange'))
table2, table3 = etl.unjoin(table1, 'baz', key='bar')
table2
table3