def test_outerjoin_2(): table1 = (('id', 'colour'), (1, 'blue'), (2, 'red'), (3, 'purple')) table2 = (('id', 'shape'), (0, 'pentagon'), (1, 'circle'), (3, 'square'), (4, 'ellipse'), (5, 'triangle')) table3 = outerjoin(table1, table2, key='id') expect3 = (('id', 'colour', 'shape'), (0, None, 'pentagon'), (1, 'blue', 'circle'), (2, 'red', None), (3, 'purple', 'square'), (4, None, 'ellipse'), (5, None, 'triangle')) ieq(expect3, table3) ieq(expect3, table3) # check twice # natural join table4 = outerjoin(table1, table2) expect4 = expect3 ieq(expect4, table4)
def test_outerjoin_novaluefield(): table1 = (('id', 'colour'), (0, 'black'), (1, 'blue'), (2, 'red'), (3, 'purple'), (5, 'yellow'), (7, 'white')) table2 = (('id', 'shape'), (1, 'circle'), (3, 'square'), (4, 'ellipse')) expect = (('id', 'colour', 'shape'), (0, 'black', None), (1, 'blue', 'circle'), (2, 'red', None), (3, 'purple', 'square'), (4, None, 'ellipse'), (5, 'yellow', None), (7, 'white', None)) actual = outerjoin(table1, table2, key='id') ieq(expect, actual) actual = outerjoin(cut(table1, 'id'), table2, key='id') ieq(cut(expect, 'id', 'shape'), actual) actual = outerjoin(table1, cut(table2, 'id'), key='id') ieq(cut(expect, 'id', 'colour'), actual) actual = outerjoin(cut(table1, 'id'), cut(table2, 'id'), key='id') ieq(cut(expect, 'id'), actual)
def transform_resource(self, source, target): if isinstance(self.__resource, str): self.__resource = source.package.get_resource(self.__resource) self.__resource.infer(only_sample=True) view1 = source.to_petl() view2 = self.__resource.to_petl() if self.__mode == "inner": join = petl.hashjoin if self.__hash else petl.join target.data = join(view1, view2, self.__field_name) elif self.__mode == "left": leftjoin = petl.hashleftjoin if self.__hash else petl.leftjoin target.data = leftjoin(view1, view2, self.__field_name) elif self.__mode == "right": rightjoin = petl.hashrightjoin if self.__hash else petl.rightjoin target.data = rightjoin(view1, view2, self.__field_name) elif self.__mode == "outer": target.data = petl.outerjoin(view1, view2, self.__field_name) elif self.__mode == "cross": target.data = petl.crossjoin(view1, view2) elif self.__mode == "anti": antijoin = petl.hashantijoin if self.__hash else petl.antijoin target.data = antijoin(view1, view2, self.__field_name) if self.__mode not in ["anti"]: for field in self.__resource.schema.fields: if field.name != self.__field_name: target.schema.fields.append(field.to_copy())
def transform_resource(self, resource): target = resource source = self.get("resource") field_name = self.get("fieldName") use_hash = self.get("useHash") mode = self.get("mode") if isinstance(source, str): source = target.package.get_resource(source) source.infer() view1 = target.to_petl() view2 = source.to_petl() if mode not in ["negate"]: for field in source.schema.fields: if field.name != field_name: target.schema.fields.append(field.to_copy()) if mode == "inner": join = petl.hashjoin if use_hash else petl.join resource.data = join(view1, view2, field_name) elif mode == "left": leftjoin = petl.hashleftjoin if use_hash else petl.leftjoin resource.data = leftjoin(view1, view2, field_name) elif mode == "right": rightjoin = petl.hashrightjoin if use_hash else petl.rightjoin resource.data = rightjoin(view1, view2, field_name) elif mode == "outer": resource.data = petl.outerjoin(view1, view2, field_name) elif mode == "cross": resource.data = petl.crossjoin(view1, view2) elif mode == "negate": antijoin = petl.hashantijoin if use_hash else petl.antijoin resource.data = antijoin(view1, view2, field_name)
def test_outerjoin_multiple(): table1 = (('id', 'color', 'cost'), (1, 'blue', 12), (1, 'red', 8), (2, 'yellow', 15), (2, 'orange', 5), (3, 'purple', 4), (4, 'chartreuse', 42)) table2 = (('id', 'shape', 'size'), (1, 'circle', 'big'), (2, 'square', 'tiny'), (2, 'square', 'big'), (3, 'ellipse', 'small'), (3, 'ellipse', 'tiny'), (5, 'didodecahedron', 3.14159265)) actual = outerjoin(table1, table2, key='id') expect = (('id', 'color', 'cost', 'shape', 'size'), (1, 'blue', 12, 'circle', 'big'), (1, 'red', 8, 'circle', 'big'), (2, 'yellow', 15, 'square', 'tiny'), (2, 'yellow', 15, 'square', 'big'), (2, 'orange', 5, 'square', 'tiny'), (2, 'orange', 5, 'square', 'big'), (3, 'purple', 4, 'ellipse', 'small'), (3, 'purple', 4, 'ellipse', 'tiny'), (4, 'chartreuse', 42, None, None), (5, None, None, 'didodecahedron', 3.14159265)) ieq(expect, actual)
def test_outerjoin_fieldorder(): table1 = (('colour', 'id'), ('blue', 1), ('red', 2), ('purple', 3)) table2 = (('id', 'shape'), (1, 'circle'), (3, 'square'), (4, 'ellipse')) table3 = outerjoin(table1, table2, key='id') expect3 = (('colour', 'id', 'shape'), ('blue', 1, 'circle'), ('red', 2, None), ('purple', 3, 'square'), (None, 4, 'ellipse')) ieq(expect3, table3) ieq(expect3, table3) # check twice
def process_join(self): hpa_merged_table = (petl.outerjoin( self.hpa_normal_table, self.hpa_rna_table, key='gene', presorted=True).addfield( 'expression', lambda rec: format_expression_with_rna(rec)).cut('expression')) return hpa_merged_table
def test_outerjoin_empty(): table1 = (('id', 'colour'), (0, 'black'), (1, 'blue'), (2, 'red'), (3, 'purple'), (5, 'yellow'), (7, 'white')) table2 = (('id', 'shape'), ) table3 = outerjoin(table1, table2, key='id') expect3 = (('id', 'colour', 'shape'), (0, 'black', None), (1, 'blue', None), (2, 'red', None), (3, 'purple', None), (5, 'yellow', None), (7, 'white', None)) ieq(expect3, table3)
def join(data, strategy, source_left, source_right, destination, key_left, key_right, prefix_left, prefix_right, presorted, buffersize, tempdir, cache, missing): """Perform a join on two data tables.""" source_left = data.get(source_left) source_right = data.get(source_right) kwargs = {} if key_left == key_right: kwargs['key'] = key_left else: kwargs['lkey'] = key_left kwargs['rkey'] = key_right if presorted is True: kwargs['presorted'] = presorted if buffersize is not None: kwargs['buffersize'] = buffersize if tempdir: kwargs['tempdir'] = tempdir if 'anti' not in strategy: if prefix_left is not None: kwargs['lprefix'] = prefix_left if prefix_right is not None: kwargs['rprefix'] = prefix_right if strategy not in ['join', 'antijoin', 'hashjoin', 'hashantijoin']: kwargs['missing'] = missing if strategy == 'join': o = petl.join(source_left, source_right, **kwargs) elif strategy == 'leftjoin': o = petl.leftjoin(source_left, source_right, **kwargs) elif strategy == 'lookupjoin': o = petl.lookupjoin(source_left, source_right, **kwargs) elif strategy == 'rightjoin': o = petl.rightjoin(source_left, source_right, **kwargs) elif strategy == 'outerjoin': o = petl.outerjoin(source_left, source_right, **kwargs) elif strategy == 'antijoin': o = petl.antijoin(source_left, source_right, **kwargs) elif strategy == 'hashjoin': o = petl.antijoin(source_left, source_right, **kwargs) elif strategy == 'hashleftjoin': o = petl.hashleftjoin(source_left, source_right, **kwargs) elif strategy == 'hashlookupjoin': o = petl.hashlookupjoin(source_left, source_right, **kwargs) elif strategy == 'hashrightjoin': o = petl.hashrightjoin(source_left, source_right, **kwargs) data.set(destination, o)
def test_outerjoin_prefix(): table1 = (('id', 'colour'), (0, 'black'), (1, 'blue'), (2, 'red'), (3, 'purple'), (5, 'yellow'), (7, 'white')) table2 = (('id', 'shape'), (1, 'circle'), (3, 'square'), (4, 'ellipse')) table3 = outerjoin(table1, table2, key='id', lprefix='l_', rprefix='r_') expect3 = (('l_id', 'l_colour', 'r_shape'), (0, 'black', None), (1, 'blue', 'circle'), (2, 'red', None), (3, 'purple', 'square'), (4, None, 'ellipse'), (5, 'yellow', None), (7, 'white', None)) ieq(expect3, table3) ieq(expect3, table3) # check twice
def test_outerjoin_2(): table1 = (("id", "colour"), (1, "blue"), (2, "red"), (3, "purple")) table2 = (("id", "shape"), (0, "pentagon"), (1, "circle"), (3, "square"), (4, "ellipse"), (5, "triangle")) table3 = outerjoin(table1, table2, key="id") expect3 = ( ("id", "colour", "shape"), (0, None, "pentagon"), (1, "blue", "circle"), (2, "red", None), (3, "purple", "square"), (4, None, "ellipse"), (5, None, "triangle"), ) ieq(expect3, table3) ieq(expect3, table3) # check twice # natural join table4 = outerjoin(table1, table2) expect4 = expect3 ieq(expect4, table4)
def load_into_warehouse(): api = WarehouseConnector() odoo = extract_odoo() backend = extract_backend() planday = extract_planday() # Make a flat table to compare odoo_with_backend = outerjoin(odoo, backend, lkey='backend_username', rkey='backend_username') odoo_with_planday = outerjoin(odoo, planday, key='salary_id') odoo_with_backend = odoo_with_backend.addfield('etl_timestamp', timestamp) odoo_with_planday = odoo_with_planday.addfield('etl_timestamp', timestamp) write_to_log(odoo_with_backend, 'odoo_with_backend', 'outerjoin') write_to_log(odoo_with_planday, 'odoo_with_planday', 'outerjoin') odoo_with_backend.toxlsx(ODOO_WITH_BACKEND_FILENAME) odoo_with_planday.toxlsx(ODOO_WITH_PLANDAY_FILENAME) odoo_with_planday.appenddb(api.db, 'sync_police_uk_drivers_planday_vs_odoo', schema='tableau') odoo_with_backend.appenddb(api.db, 'sync_police_uk_drivers_backend_vs_odoo', schema='tableau')
def test_outerjoin_lrkey(): table1 = (('id', 'colour'), (0, 'black'), (1, 'blue'), (2, 'red'), (3, 'purple'), (5, 'yellow'), (7, 'white')) table2 = (('identifier', 'shape'), (1, 'circle'), (3, 'square'), (4, 'ellipse')) table3 = outerjoin(table1, table2, lkey='id', rkey='identifier') expect3 = (('id', 'colour', 'shape'), (0, 'black', None), (1, 'blue', 'circle'), (2, 'red', None), (3, 'purple', 'square'), (4, None, 'ellipse'), (5, 'yellow', None), (7, 'white', None)) ieq(expect3, table3) ieq(expect3, table3) # check twice
def test_outerjoin_fieldorder(): table1 = (("colour", "id"), ("blue", 1), ("red", 2), ("purple", 3)) table2 = (("id", "shape"), (1, "circle"), (3, "square"), (4, "ellipse")) table3 = outerjoin(table1, table2, key="id") expect3 = ( ("colour", "id", "shape"), ("blue", 1, "circle"), ("red", 2, None), ("purple", 3, "square"), (None, 4, "ellipse"), ) ieq(expect3, table3) ieq(expect3, table3) # check twice
def test_outerjoin_empty(): table1 = (("id", "colour"), (0, "black"), (1, "blue"), (2, "red"), (3, "purple"), (5, "yellow"), (7, "white")) table2 = (("id", "shape"),) table3 = outerjoin(table1, table2, key="id") expect3 = ( ("id", "colour", "shape"), (0, "black", None), (1, "blue", None), (2, "red", None), (3, "purple", None), (5, "yellow", None), (7, "white", None), ) ieq(expect3, table3)
def test_outerjoin_lrkey(): table1 = (("id", "colour"), (0, "black"), (1, "blue"), (2, "red"), (3, "purple"), (5, "yellow"), (7, "white")) table2 = (("identifier", "shape"), (1, "circle"), (3, "square"), (4, "ellipse")) table3 = outerjoin(table1, table2, lkey="id", rkey="identifier") expect3 = ( ("id", "colour", "shape"), (0, "black", None), (1, "blue", "circle"), (2, "red", None), (3, "purple", "square"), (4, None, "ellipse"), (5, "yellow", None), (7, "white", None), ) ieq(expect3, table3) ieq(expect3, table3) # check twice
def test_outerjoin_prefix(): table1 = (("id", "colour"), (0, "black"), (1, "blue"), (2, "red"), (3, "purple"), (5, "yellow"), (7, "white")) table2 = (("id", "shape"), (1, "circle"), (3, "square"), (4, "ellipse")) table3 = outerjoin(table1, table2, key="id", lprefix="l_", rprefix="r_") expect3 = ( ("l_id", "l_colour", "r_shape"), (0, "black", None), (1, "blue", "circle"), (2, "red", None), (3, "purple", "square"), (4, None, "ellipse"), (5, "yellow", None), (7, "white", None), ) ieq(expect3, table3) ieq(expect3, table3) # check twice
def test_outerjoin_empty(): table1 = (('id', 'colour'), (0, 'black'), (1, 'blue'), (2, 'red'), (3, 'purple'), (5, 'yellow'), (7, 'white')) table2 = (('id', 'shape'),) table3 = outerjoin(table1, table2, key='id') expect3 = (('id', 'colour', 'shape'), (0, 'black', None), (1, 'blue', None), (2, 'red', None), (3, 'purple', None), (5, 'yellow', None), (7, 'white', None)) ieq(expect3, table3)
def join_execute(cl, cr, join, **kwargs): cl, cr = cl(), cr() if 'addLfields' in kwargs: cl = etl.addfields(cl, kwargs['addLfields']) if 'addRfields' in kwargs: cr = etl.addfields(cr, kwargs['addRfields']) args = cl, cr if join == Join.UNION: c = etl.crossjoin(*args) else: kwargs = filter_keys(kwargs, ("key", "lkey", "rkey", "missing", "presorted", "buffersize", "tempdir", "cache")) if join == Join.INNER: c = etl.join(*args, **kwargs) elif join == Join.LEFT: c = etl.leftjoin(*args, **kwargs) elif join == Join.RIGHT: c = etl.rightjoin(*args, **kwargs) elif join == Join.FULL: c = etl.outerjoin(*args, **kwargs) return c
# create petl table from column arrays and rename the columns exchangeRates = petl.fromcolumns([BOCDates, BOCRates], header=['date', 'rate']) # print (exchangeRates) # load expense document try: expenses = petl.io.xlsx.fromxlsx('Expenses.xlsx', sheet='Github') except Exception as e: print('could not open expenses.xlsx:' + str(e)) sys.exit() # join tables expenses = petl.outerjoin(exchangeRates, expenses, key='date') # fill down missing values expenses = petl.filldown(expenses, 'rate') # remove dates with no expenses expenses = petl.select(expenses, lambda rec: rec.USD != None) # add CDN column expenses = petl.addfield(expenses, 'CAD', lambda rec: decimal.Decimal(rec.USD) * rec.rate) # intialize database connection try: dbConnection = pymssql.connect(server=destServer, database=destDatabase)
# rightjoin() ############# import petl as etl table1 = [['id', 'colour'], [1, 'blue'], [2, 'red'], [3, 'purple']] table2 = [['id', 'shape'], [1, 'circle'], [3, 'square'], [4, 'ellipse']] table3 = etl.rightjoin(table1, table2, key='id') table3 # outerjoin() ############# import petl as etl table1 = [['id', 'colour'], [1, 'blue'], [2, 'red'], [3, 'purple']] table2 = [['id', 'shape'], [1, 'circle'], [3, 'square'], [4, 'ellipse']] table3 = etl.outerjoin(table1, table2, key='id') table3 # crossjoin() ############# import petl as etl table1 = [['id', 'colour'], [1, 'blue'], [2, 'red']] table2 = [['id', 'shape'], [1, 'circle'], [3, 'square']] table3 = etl.crossjoin(table1, table2) table3 # antijoin() ############ import petl as etl
# outerjoin table1 = [['id', 'colour'], [1, 'blue'], [2, 'red'], [3, 'purple']] table2 = [['id', 'shape'], [1, 'circle'], [3, 'square'], [4, 'ellipse']] from petl import outerjoin, look look(table1) look(table2) table3 = outerjoin(table1, table2, key='id') look(table3) # crossjoin table1 = [['id', 'colour'], [1, 'blue'], [2, 'red']] table2 = [['id', 'shape'], [1, 'circle'], [3, 'square']] from petl import crossjoin, look look(table1) look(table2)
'song_title_2017': lambda r: r[0].song_title, }).aggregate( 'song_title_id', { 'count_2017': ('artist_count_2017', sum), 'song_title_2017': lambda r: r[0].song_title_2017, 'artists_2017': (( 'artist', 'artist_count_2017', ), list) }).sort('count_2017', reverse=True).addrownumbers(start=1, field='rank_2017')) totals = (petl.outerjoin( song_2018, song_2017, 'song_title_id').select(lambda rec: (rec[ 'count_2018'] or 0) >= 1 or (rec['count_2017'] or 0) > 2).addfield( 'song_title', lambda rec: rec.song_title_2018 or rec.song_title_2017).cutout( 'song_title_2018', 'song_title_2017', 'song_title_id')) def rank_change(rec): if rec['rank_2017'] is None: return 1000 elif rec['rank_2018'] is None: return -1000 else: return rec['rank_2017'] - rec['rank_2018'] winners_losers = totals.addfield('rank_change', rank_change) winners_losers = winners_losers.sort('rank_change', reverse=True)
.aggregate(('artist_id', 'song_title'), { 'song_count_2017': len, 'artist_2017': lambda r: r[0].artist, }) .aggregate('artist_id', { 'count_2017': ('song_count_2017', sum), 'artist_2017': lambda r: r[0].artist_2017, 'songs_2017': (('song_title', 'song_count_2017',), list) }) .sort('count_2017', reverse=True) .addrownumbers(start=1, field='rank_2017') ) totals = ( petl.outerjoin(song_2018, song_2017, 'artist_id') .select(lambda rec: (rec['count_2018'] or 0) >= 1 or (rec['count_2017'] or 0) > 2) .addfield('artist', lambda rec: rec.artist_2018 or rec.artist_2017) .cutout('artist_2018', 'artist_2017', 'artist_id') ) def rank_change(rec): if rec['rank_2017'] is None: return 1000 elif rec['rank_2018'] is None: return -1000 else: return rec['rank_2017'] - rec['rank_2018'] winners_losers = totals.addfield('rank_change', rank_change) winners_losers = winners_losers.sort('rank_change', reverse=True) for entry in winners_losers.dicts():
table3 # outerjoin() ############# import petl as etl table1 = [['id', 'colour'], [1, 'blue'], [2, 'red'], [3, 'purple']] table2 = [['id', 'shape'], [1, 'circle'], [3, 'square'], [4, 'ellipse']] table3 = etl.outerjoin(table1, table2, key='id') table3 # crossjoin() ############# import petl as etl table1 = [['id', 'colour'], [1, 'blue'], [2, 'red']] table2 = [['id', 'shape'], [1, 'circle'], [3, 'square']] table3 = etl.crossjoin(table1, table2) table3