def test_crossjoin_novaluefield(): table1 = (('id', 'colour'), (1, 'blue'), (2, 'red')) table2 = (('id', 'shape'), (1, 'circle'), (3, 'square')) expect = (('id', 'colour', 'id', 'shape'), (1, 'blue', 1, 'circle'), (1, 'blue', 3, 'square'), (2, 'red', 1, 'circle'), (2, 'red', 3, 'square')) actual = crossjoin(table1, table2, key='id') ieq(expect, actual) actual = crossjoin(cut(table1, 'id'), table2, key='id') ieq(cut(expect, 0, 2, 'shape'), actual) actual = crossjoin(table1, cut(table2, 'id'), key='id') ieq(cut(expect, 0, 'colour', 2), actual) actual = crossjoin(cut(table1, 'id'), cut(table2, 'id'), key='id') ieq(cut(expect, 0, 2), actual)
def transform_resource(self, resource): target = resource source = self.get("resource") field_name = self.get("fieldName") use_hash = self.get("useHash") mode = self.get("mode") if isinstance(source, str): source = target.package.get_resource(source) source.infer() view1 = target.to_petl() view2 = source.to_petl() if mode not in ["negate"]: for field in source.schema.fields: if field.name != field_name: target.schema.fields.append(field.to_copy()) if mode == "inner": join = petl.hashjoin if use_hash else petl.join resource.data = join(view1, view2, field_name) elif mode == "left": leftjoin = petl.hashleftjoin if use_hash else petl.leftjoin resource.data = leftjoin(view1, view2, field_name) elif mode == "right": rightjoin = petl.hashrightjoin if use_hash else petl.rightjoin resource.data = rightjoin(view1, view2, field_name) elif mode == "outer": resource.data = petl.outerjoin(view1, view2, field_name) elif mode == "cross": resource.data = petl.crossjoin(view1, view2) elif mode == "negate": antijoin = petl.hashantijoin if use_hash else petl.antijoin resource.data = antijoin(view1, view2, field_name)
def test_crossjoin_empty(): table1 = (('id', 'colour'), (1, 'blue'), (2, 'red')) table2 = (('id', 'shape'), ) table3 = crossjoin(table1, table2) expect3 = (('id', 'colour', 'id', 'shape'), ) ieq(expect3, table3)
def transform_resource(self, source, target): if isinstance(self.__resource, str): self.__resource = source.package.get_resource(self.__resource) self.__resource.infer(only_sample=True) view1 = source.to_petl() view2 = self.__resource.to_petl() if self.__mode == "inner": join = petl.hashjoin if self.__hash else petl.join target.data = join(view1, view2, self.__field_name) elif self.__mode == "left": leftjoin = petl.hashleftjoin if self.__hash else petl.leftjoin target.data = leftjoin(view1, view2, self.__field_name) elif self.__mode == "right": rightjoin = petl.hashrightjoin if self.__hash else petl.rightjoin target.data = rightjoin(view1, view2, self.__field_name) elif self.__mode == "outer": target.data = petl.outerjoin(view1, view2, self.__field_name) elif self.__mode == "cross": target.data = petl.crossjoin(view1, view2) elif self.__mode == "anti": antijoin = petl.hashantijoin if self.__hash else petl.antijoin target.data = antijoin(view1, view2, self.__field_name) if self.__mode not in ["anti"]: for field in self.__resource.schema.fields: if field.name != self.__field_name: target.schema.fields.append(field.to_copy())
def test_crossjoin_empty(): table1 = (("id", "colour"), (1, "blue"), (2, "red")) table2 = (("id", "shape"),) table3 = crossjoin(table1, table2) expect3 = (("id", "colour", "id", "shape"),) ieq(expect3, table3)
def test_crossjoin_prefix(): table1 = (('id', 'colour'), (1, 'blue'), (2, 'red')) table2 = (('id', 'shape'), (1, 'circle'), (3, 'square')) table3 = crossjoin(table1, table2, prefix=True) expect3 = (('1_id', '1_colour', '2_id', '2_shape'), (1, 'blue', 1, 'circle'), (1, 'blue', 3, 'square'), (2, 'red', 1, 'circle'), (2, 'red', 3, 'square')) ieq(expect3, table3)
def test_crossjoin(): table1 = (('id', 'colour'), (1, 'blue'), (2, 'red')) table2 = (('id', 'shape'), (1, 'circle'), (3, 'square')) table3 = crossjoin(table1, table2) expect3 = (('id', 'colour', 'id', 'shape'), (1, 'blue', 1, 'circle'), (1, 'blue', 3, 'square'), (2, 'red', 1, 'circle'), (2, 'red', 3, 'square')) ieq(expect3, table3)
def test_crossjoin_empty(): table1 = (('id', 'colour'), (1, 'blue'), (2, 'red')) table2 = (('id', 'shape'),) table3 = crossjoin(table1, table2) expect3 = (('id', 'colour', 'id', 'shape'),) ieq(expect3, table3)
def test_crossjoin_prefix(): table1 = (("id", "colour"), (1, "blue"), (2, "red")) table2 = (("id", "shape"), (1, "circle"), (3, "square")) table3 = crossjoin(table1, table2, prefix=True) expect3 = ( ("1_id", "1_colour", "2_id", "2_shape"), (1, "blue", 1, "circle"), (1, "blue", 3, "square"), (2, "red", 1, "circle"), (2, "red", 3, "square"), ) ieq(expect3, table3)
def test_crossjoin(): table1 = (("id", "colour"), (1, "blue"), (2, "red")) table2 = (("id", "shape"), (1, "circle"), (3, "square")) table3 = crossjoin(table1, table2) expect3 = ( ("id", "colour", "id", "shape"), (1, "blue", 1, "circle"), (1, "blue", 3, "square"), (2, "red", 1, "circle"), (2, "red", 3, "square"), ) ieq(expect3, table3)
def join_execute(cl, cr, join, **kwargs): cl, cr = cl(), cr() if 'addLfields' in kwargs: cl = etl.addfields(cl, kwargs['addLfields']) if 'addRfields' in kwargs: cr = etl.addfields(cr, kwargs['addRfields']) args = cl, cr if join == Join.UNION: c = etl.crossjoin(*args) else: kwargs = filter_keys(kwargs, ("key", "lkey", "rkey", "missing", "presorted", "buffersize", "tempdir", "cache")) if join == Join.INNER: c = etl.join(*args, **kwargs) elif join == Join.LEFT: c = etl.leftjoin(*args, **kwargs) elif join == Join.RIGHT: c = etl.rightjoin(*args, **kwargs) elif join == Join.FULL: c = etl.outerjoin(*args, **kwargs) return c
# outerjoin() ############# import petl as etl table1 = [['id', 'colour'], [1, 'blue'], [2, 'red'], [3, 'purple']] table2 = [['id', 'shape'], [1, 'circle'], [3, 'square'], [4, 'ellipse']] table3 = etl.outerjoin(table1, table2, key='id') table3 # crossjoin() ############# import petl as etl table1 = [['id', 'colour'], [1, 'blue'], [2, 'red']] table2 = [['id', 'shape'], [1, 'circle'], [3, 'square']] table3 = etl.crossjoin(table1, table2) table3 # antijoin() ############ import petl as etl table1 = [['id', 'colour'], [0, 'black'], [1, 'blue'], [2, 'red'], [4, 'yellow'], [5, 'white']] table2 = [['id', 'shape'], [1, 'circle'], [3, 'square']] table3 = etl.antijoin(table1, table2, key='id') table3 # lookupjoin() ##############
look(table3) # crossjoin table1 = [['id', 'colour'], [1, 'blue'], [2, 'red']] table2 = [['id', 'shape'], [1, 'circle'], [3, 'square']] from petl import crossjoin, look look(table1) look(table2) table3 = crossjoin(table1, table2) look(table3) # antijoin table1 = [['id', 'colour'], [0, 'black'], [1, 'blue'], [2, 'red'], [4, 'yellow'], [5, 'white']] table2 = [['id', 'shape'], [1, 'circle'], [3, 'square']]
[4, 'ellipse']] table3 = etl.outerjoin(table1, table2, key='id') table3 # crossjoin() ############# import petl as etl table1 = [['id', 'colour'], [1, 'blue'], [2, 'red']] table2 = [['id', 'shape'], [1, 'circle'], [3, 'square']] table3 = etl.crossjoin(table1, table2) table3 # antijoin() ############ import petl as etl table1 = [['id', 'colour'], [0, 'black'], [1, 'blue'], [2, 'red'], [4, 'yellow'], [5, 'white']] table2 = [['id', 'shape'], [1, 'circle'],