def test_jit_promotion(self): expr = make_expr(dshape('10, int32'), dshape('10, float32')) result = blaze.eval(expr, strategy='jit') expected = blaze.array([ 0, 2, 6, 12, 20, 30, 42, 56, 72, 90], dshape=dshape('10, float64')) self.assertEqual(type(result), blaze.Array) self.assertTrue(np.all(result == expected))
def test_metadata_all_prop(): a = blaze.ones(blaze.dshape('20, 20, float64')) b = blaze.zeros(blaze.dshape('20, 20, float64')) c = blaze.NDTable([(1.0, 1.0), (1.0, 1.0)], dshape='2, {x: int32; y: float32}') assert blaze.metadata.all_prop((a, b), blaze.metadata.arraylike) assert not blaze.metadata.all_prop((a, b, c), blaze.metadata.arraylike)
def test_dot_not2d_exception(): '''Dot product of arrays other than 2D should raise exception.''' a = blaze.ones(blaze.dshape('20, 20, 20, float64')) b = blaze.ones(blaze.dshape('20, 20, 20, float64')) with assert_raises(ValueError): out = dot(a, b, outname=None)
def test_dot_shape_exception(): '''Dot product with wrong inner dimensions should raise exception.''' a = blaze.ones(blaze.dshape('20, 20, float64')) b = blaze.ones(blaze.dshape('30, 30, float64')) with assert_raises(ValueError): out = dot(a, b, outname=None)
def test_best_match_ellipses(self): d1 = dshape('10, T1, int32') d2 = dshape('..., float32') match = best_match(g, [d1, d2]) self.assertEqual(str(match.sig), 'X, Y, float32 -> ..., float32 -> X, int32') self.assertEqual(str(match.resolved_sig), '10, T1, float32 -> ..., float32 -> 10, int32')
def correct_commodities(): src_dir = path.join(data_dir, 'agmarknet/by_commodity') init_dir = os.getcwd() os.chdir(src_dir) folders = glob.glob('*') csv_dir = os.getcwd() for folder in folders: os.chdir(path.join(csv_dir, folder)) files = glob.glob('*_all.csv') for file in files: csvr = odo.resource(path.join(csv_dir, folder, file)) # Have to use resource to discover URIs num_col = len(odo.discover(csvr)[1].types) ds = None if num_col == 9: ds = bz.dshape("var * {date: datetime, state: ?string, market: ?string, commodity: ?string, variety: ?string, arrival: ?string, min: ?string, max: ?string, modal: ?string}") elif num_col == 10: ds = bz.dshape("var * {date: datetime, state: ?string, market: ?string, commodity: ?string, variety: ?string, arrival: ?string, grade: ?string, min: ?string, max: ?string, modal: ?string}") else: ds = odo.discover(csvr) d = bz.Data(path.join(csv_dir, folder, file), dshape=ds) if num_col == 10: d = bz.transform(d, grade=d.grade.map(lambda x: x.strip(), 'string')) d = bz.transform(d, commodity=d.commodity.map(lambda x: x.strip(), 'string')) d = bz.transform(d, commodity=d.commodity.map(lambda x: commodity_corrections[x] if x in commodity_corrections else x, 'string')) d = bz.transform(d, state=d.state.map(lambda x: x.strip(), 'string')) d = bz.transform(d, state=d.state.map(lambda x: state_corrections[x] if x in state_corrections else x, 'string')) d = bz.transform(d, market=d.market.map(lambda x: x.strip(), 'string')) return
def test_dot_out_exception(): '''Output array of wrong size should raise exception.''' a = blaze.ones(blaze.dshape('20, 20, float64')) b = blaze.ones(blaze.dshape('20, 30, float64')) out = blaze.zeros(blaze.dshape('20, 20, float64')) with assert_raises(ValueError): dot(a, b, out=out)
def test_graph(self): a = array(np.arange(10), dshape=dshape('10, int32')) b = array(np.arange(10), dshape=dshape('10, float32')) expr = add(a, mul(a, b)) graph, ctx = expr.expr self.assertEqual(len(ctx.params), 2) self.assertFalse(ctx.constraints) self.assertEqual(graph.dshape, dshape('10, float64'))
def test_best_match_broadcasting(self): d1 = dshape('10, complex64') d2 = dshape('10, float32') match = best_match(f, [d1, d2]) self.assertEqual(str(match.sig), 'X, Y, cfloat32 -> X, Y, cfloat32 -> X, Y, cfloat32') self.assertEqual(str(match.resolved_sig), '1, 10, cfloat32 -> 1, 10, cfloat32 -> 1, 10, cfloat32')
def test_interp(self): a = array(range(10), dshape=dshape('10, int32')) b = array(range(10), dshape=dshape('10, float32')) expr = add(a, mul(a, b)) result = blaze.eval(expr, strategy='py') expected = blaze.array([ 0, 2, 6, 12, 20, 30, 42, 56, 72, 90]) self.assertEqual(type(result), blaze.Array) self.assertTrue(np.all(result == expected))
def test_not_compat(): with assert_raises(NotNumpyCompatible): to_numpy(dshape('x, int32')) with assert_raises(NotNumpyCompatible): to_numpy(dshape('{1}, int32')) with assert_raises(NotNumpyCompatible): to_numpy(dshape('Range(0, 3), int32'))
def test_dtw(): data = ones(dshape('100, float32')) query = ones(dshape('100, float32')) loc, dist = ucr.dtw(data, query, 0.1, 100, verbose=False) # these are stupid, mostly just to check for regressions assert isinstance(loc, int) assert isinstance(dist, float)
def test_coercions(self): f, values, graph = make_graph() explicit_coercions(f) ops = [(op.opcode, op.type) for op in f.ops][:-1] expected = [('convert', dshape("10, float64")), ('kernel', dshape("10, float64")), ('convert', dshape("10, cfloat64")), ('kernel', dshape("10, cfloat64"))] self.assertEqual(ops, expected)
def test_string_atom(self): self.assertEqual(blaze.dshape("string"), blaze.dshape("string('U8')")) self.assertEqual(blaze.dshape("string('ascii')").encoding, "A") self.assertEqual(blaze.dshape("string('A')").encoding, "A") self.assertEqual(blaze.dshape("string('utf-8')").encoding, "U8") self.assertEqual(blaze.dshape("string('U8')").encoding, "U8") self.assertEqual(blaze.dshape("string('utf-16')").encoding, "U16") self.assertEqual(blaze.dshape("string('U16')").encoding, "U16") self.assertEqual(blaze.dshape("string('utf-32')").encoding, "U32") self.assertEqual(blaze.dshape("string('U32')").encoding, "U32")
def test_best_match(self): d1 = dshape('10, T1, int32') d2 = dshape('T2, T2, float32') match = best_match(f, [d1, d2]) self.assertEqual(str(match.sig), 'X, Y, float32 -> X, Y, float32 -> X, Y, float32') input = dshape('1, 1, float32 -> 1, 1, float32 -> R') self.assertEqual(str(unify_simple(input, match.resolved_sig)), '10, 1, float32 -> 10, 1, float32 -> 10, 1, float32')
def test_cat_dshapes(self): # concatenating 1 dshape is a no-op dslist = [dshape('3, 10, int32')] self.assertEqual(datashape.cat_dshapes(dslist), dslist[0]) # two dshapes dslist = [dshape('3, 10, int32'), dshape('7, 10, int32')] self.assertEqual(datashape.cat_dshapes(dslist), dshape('10, 10, int32'))
def test_string_atom(self): self.assertEqual(blaze.dshape('string'), blaze.dshape("string('U8')")) self.assertEqual(blaze.dshape("string('ascii')").encoding, 'A') self.assertEqual(blaze.dshape("string('A')").encoding, 'A') self.assertEqual(blaze.dshape("string('utf-8')").encoding, 'U8') self.assertEqual(blaze.dshape("string('U8')").encoding, 'U8') self.assertEqual(blaze.dshape("string('utf-16')").encoding, 'U16') self.assertEqual(blaze.dshape("string('U16')").encoding, 'U16') self.assertEqual(blaze.dshape("string('utf-32')").encoding, 'U32') self.assertEqual(blaze.dshape("string('U32')").encoding, 'U32')
def test_overload(self): # Create an overloaded blaze func, populate it with # some ckernel implementations extracted from numpy, # and test some calls on it. d = blaze.overloading.Dispatcher() myfunc = blaze.BlazeFunc(d) def myfunc_dummy(x, y): raise NotImplementedError # overload int32 -> np.add sig = blaze.dshape("A..., int32 -> A..., int32 -> A..., int32") d.add_overload(myfunc_dummy, sig, {}) ckd = _lowlevel.ckernel_deferred_from_ufunc(np.add, (np.int32, np.int32, np.int32), False) myfunc.implement(myfunc_dummy, sig, "ckernel", ckd) # overload int16 -> np.subtract (so we can see the difference) sig = blaze.dshape("A..., int16 -> A..., int16 -> A..., int16") d.add_overload(myfunc_dummy, sig, {}) ckd = _lowlevel.ckernel_deferred_from_ufunc(np.subtract, (np.int16, np.int16, np.int16), False) myfunc.implement(myfunc_dummy, sig, "ckernel", ckd) # int32 overload -> add a = blaze.eval(myfunc(blaze.array([3,4]), blaze.array([1,2]))) self.assertEqual(a.dshape, blaze.dshape('2, int32')) self.assertEqual(nd.as_py(a._data.dynd_arr()), [4, 6]) # int16 overload -> subtract a = blaze.eval(myfunc(blaze.array([3,4], dshape='int16'), blaze.array([1,2], dshape='int16'))) self.assertEqual(a.dshape, blaze.dshape('2, int16')) self.assertEqual(nd.as_py(a._data.dynd_arr()), [2, 2]) # type promotion to int32 a = blaze.eval(myfunc(blaze.array([3,4], dshape='int16'), blaze.array([1,2]))) self.assertEqual(a.dshape, blaze.dshape('2, int32')) self.assertEqual(nd.as_py(a._data.dynd_arr()), [4, 6]) a = blaze.eval(myfunc(blaze.array([3,4]), blaze.array([1,2], dshape='int16'))) self.assertEqual(a.dshape, blaze.dshape('2, int32')) self.assertEqual(nd.as_py(a._data.dynd_arr()), [4, 6]) # type promotion to int16 a = blaze.eval(myfunc(blaze.array([3,4], dshape='int8'), blaze.array([1,2], dshape='int8'))) self.assertEqual(a.dshape, blaze.dshape('2, int16')) self.assertEqual(nd.as_py(a._data.dynd_arr()), [2, 2]) # A little bit of nesting a = blaze.eval(myfunc(myfunc(blaze.array([3,4]), blaze.array([1,2])), blaze.array([2,10]))) self.assertEqual(a.dshape, blaze.dshape('2, int32')) self.assertEqual(nd.as_py(a._data.dynd_arr()), [6, 16]) # More nesting, with conversions a = blaze.eval(myfunc(myfunc(blaze.array([1,2]), blaze.array([-2, 10])), myfunc(blaze.array([1, 5], dshape='int16'), blaze.array(3, dshape='int16')))) self.assertEqual(a.dshape, blaze.dshape('2, int32')) self.assertEqual(nd.as_py(a._data.dynd_arr()), [-3, 14])
def test_ir(self): f, values, graph = make_graph() # Structure self.assertEqual(len(f.blocks), 1) self.assertTrue(f.startblock.is_terminated()) # Types got = [op.type for op in f.ops][:-1] expected = [dshape("10, float64"), dshape("10, cfloat64")] self.assertEqual(got, expected)
def test_overload(self): myfunc = create_overloaded_add() # Test int32 overload -> add a = blaze.eval(myfunc(blaze.array([3,4]), blaze.array([1,2]))) self.assertEqual(a.dshape, blaze.dshape('2, int32')) self.assertEqual(nd.as_py(a._data.dynd_arr()), [4, 6]) # Test int16 overload -> subtract a = blaze.eval(myfunc(blaze.array([3,4], dshape='int16'), blaze.array([1,2], dshape='int16'))) self.assertEqual(a.dshape, blaze.dshape('2, int16')) self.assertEqual(nd.as_py(a._data.dynd_arr()), [2, 2])
def test_scalar(self): a = ctypes.c_int(3) dd = data_descriptor_from_ctypes(a, writable=True) self.assertEqual(dd.dshape, blaze.dshape('int32')) self.assertEqual(dd_as_py(dd), 3) self.assertTrue(isinstance(dd_as_py(dd), int)) a = ctypes.c_float(3.25) dd = data_descriptor_from_ctypes(a, writable=True) self.assertEqual(dd.dshape, blaze.dshape('float32')) self.assertEqual(dd_as_py(dd), 3.25) self.assertTrue(isinstance(dd_as_py(dd), float))
def test_1d_array(self): # An array where the size is in the type a = ffi.new('short[32]', [2*i for i in range(32)]) dd = data_descriptor_from_cffi(ffi, a, writable=True) self.assertEqual(dd.dshape, blaze.dshape('32, int16')) self.assertEqual(dd_as_py(dd), [2*i for i in range(32)]) # An array where the size is not in the type a = ffi.new('double[]', [1.5*i for i in range(32)]) dd = data_descriptor_from_cffi(ffi, a, writable=True) self.assertEqual(dd.dshape, blaze.dshape('32, float64')) self.assertEqual(dd_as_py(dd), [1.5*i for i in range(32)])
def test_scalar(self): a = ffi.new('int *', 3) dd = data_descriptor_from_cffi(ffi, a, writable=True) self.assertEqual(dd.dshape, blaze.dshape('int32')) self.assertEqual(dd_as_py(dd), 3) self.assertTrue(isinstance(dd_as_py(dd), int)) a = ffi.new('float *', 3.25) dd = data_descriptor_from_cffi(ffi, a, writable=True) self.assertEqual(dd.dshape, blaze.dshape('float32')) self.assertEqual(dd_as_py(dd), 3.25) self.assertTrue(isinstance(dd_as_py(dd), float))
def test_simple_unify(): A = NDArray([0], dshape('s, t, int')) B = NDArray([0], dshape('u, v, int')) C = NDArray([0], dshape('w, x, int')) D = NDArray([0], dshape('y, z, int')) # ============== g = (A*B+C*D)**2 # ============== compile(g)
def make_graph(): a = blaze.array(range(10), dshape('10, int32')) b = blaze.array(range(10), dshape('10, float64')) c = blaze.array([i+0j for i in range(10)], dshape('10, complex128')) result = mul(add(a, b), c) graph, expr_ctx = result.expr ctx = ExecutionContext() f, values = from_expr(graph, expr_ctx, ctx) return f, values, graph
def test_dot(): '''Test of 2D dot product''' a = blaze.ones(blaze.dshape('20, 20, float64')) b = blaze.ones(blaze.dshape('20, 30, float64')) # Do not write output array to disk out = dot(a, b, outname=None) expected_ds = blaze.dshape('20, 30, float64') assert out.datashape._equal(expected_ds) # FIXME: Slow, but no other way to do this with Array API implemented so far for row in out: for elem in row: assert abs(elem - 20.0) < 1e-8
def test_2d_array(self): # An array where the leading array size is in the type vals = [[2**i + j for i in range(35)] for j in range(32)] a = ffi.new('long long[32][35]', vals) dd = data_descriptor_from_cffi(ffi, a, writable=True) self.assertEqual(dd.dshape, blaze.dshape('32, 35, int64')) self.assertEqual(dd_as_py(dd), vals) # An array where the leading array size is not in the type vals = [[a + b*2 for a in range(35)] for b in range(32)] a = ffi.new('unsigned char[][35]', vals) dd = data_descriptor_from_cffi(ffi, a, writable=True) self.assertEqual(dd.dshape, blaze.dshape('32, 35, uint8')) self.assertEqual(dd_as_py(dd), vals)
def test_1d_array(self): a = (ctypes.c_short * 32)() for i in range(32): a[i] = 2*i dd = data_descriptor_from_ctypes(a, writable=True) self.assertEqual(dd.dshape, blaze.dshape('32, int16')) self.assertEqual(dd_as_py(dd), [2*i for i in range(32)]) a = (ctypes.c_double * 32)() for i in range(32): a[i] = 1.5*i dd = data_descriptor_from_ctypes(a, writable=True) self.assertEqual(dd.dshape, blaze.dshape('32, float64')) self.assertEqual(dd_as_py(dd), [1.5*i for i in range(32)])
def test_simple_unify(): A = NDArray([0], dshape('s, t, int')) B = NDArray([0], dshape('u, v, int')) C = NDArray([0], dshape('w, x, int')) D = NDArray([0], dshape('y, z, int')) # ============== g = (A * B + C * D)**2 # ============== # Operator Constraints # # A : (s, t) # B : (u, v) # C : (w, x) # D : (y, z) # # AB : (a, b) # CD : (c, d) # AB + CD : (e, f) # (AB + CD)**2 : (g, h) # Constraint Generation # --------------------- # t = u, a = s, b = v in AB # x = y, c = w, d = z in CD # a = c = e, b = d = f in AB + CD # e = f = g = h in (AB + CD)**2 # Substitution # ------------- # a = b = c = d = e = f = g = h = s = v = w = z # t = u # x = y # Constraint Solution # ------------------- # A : a -> t # B : t -> a # C : a -> x # D : x -> a line = Pipeline() result = line.run_pipeline(g)
def test_simple_unify(): A = NDArray([0], dshape('s, t, int')) B = NDArray([0], dshape('u, v, int')) C = NDArray([0], dshape('w, x, int')) D = NDArray([0], dshape('y, z, int')) # ============== g = (A*B+C*D)**2 # ============== # Operator Constraints # # A : (s, t) # B : (u, v) # C : (w, x) # D : (y, z) # # AB : (a, b) # CD : (c, d) # AB + CD : (e, f) # (AB + CD)**2 : (g, h) # Constraint Generation # --------------------- # t = u, a = s, b = v in AB # x = y, c = w, d = z in CD # a = c = e, b = d = f in AB + CD # e = f = g = h in (AB + CD)**2 # Substitution # ------------- # a = b = c = d = e = f = g = h = s = v = w = z # t = u # x = y # Constraint Solution # ------------------- # A : a -> t # B : t -> a # C : a -> x # D : x -> a line = Pipeline() result = line.run_pipeline(g)
def test_object_blob(): ds = blaze.dshape('x, blob') c = blaze.Array([(i, str(i * .2)) for i in range(10)], ds) for i, v in enumerate(c): assert v[0] == i assert v[1] == str(i * .2)
def test_all_construct(): # Assert that the pretty pritner works for all of the # toplevel structures expected_ds = dshape('3, int') a = NDArray([1,2,3]) str(a) repr(a) a.datashape._equal(expected_ds) a = Array([1,2,3]) str(a) repr(a) a.datashape._equal(expected_ds) a = NDTable([(1, 1)]) str(a) repr(a) #a.datashape._equal(expected_ds) a = Table([(1, 1)]) str(a) repr(a)
def test_op_dtype4(): a = NDArray([1], dshape='1, int') b = NDArray([2], dshape='1, int') x = (a + b) x.simple_type() == dshape('int')
def test_join(): left = [['Alice', 100], ['Bob', 200]] right = [['Alice', 1], ['Bob', 2]] L = Symbol('L', 'var * {name: string, amount: int}') R = Symbol('R', 'var * {name: string, id: int}') joined = join(L, R, 'name') assert dshape(joined.schema) == \ dshape('{name: string, amount: int, id: int}') result = list(compute(joined, {L: left, R: right})) expected = [('Alice', 100, 1), ('Bob', 200, 2)] assert result == expected
class FromIterMemory_int64array(FromiterTemplate, TestCase): ds = dshape('x, int64') count = 1000 p = params(clevel=5) def gen(self): return (i for i in xrange(self.count))
def test_coerce_series_string_datetime(d, tp, ptp): s = pd.Series(d, name='a') e = symbol('t', discover(s)).coerce(to=tp) assert e.schema == dshape(tp) result = compute(e, s) expected = s.astype(ptp) assert_series_equal(result, expected)
def test_object_unicode(): ds = blaze.dshape('x, blob') c = blaze.Array([u'a' * i for i in range(10)], ds) for i, v in enumerate(c): # The outcome are 0-dim arrays (that might change in the future) assert v[()] == u'a' * i
def test_unused_datetime_columns(): ds = dshape('2 * {val: string, when: datetime}') with filetext("val,when\na,2000-01-01\nb,2000-02-02") as fn: csv = CSV(fn, has_header=True) s = symbol('s', discover(csv)) assert into(list, compute(s.val, csv)) == ['a', 'b']
def test_interactive_dshape_works(mongo_host_port): try: d = Data('mongodb://{}:{}/test_db::bank'.format(*mongo_host_port), dshape='var * {name: string, amount: int64}') except pymongo.errors.ConnectionFailure: pytest.skip('No mongo server running') assert d.dshape == dshape('var * {name: string, amount: int64}')
def test_interactive_dshape_works(): try: d = Data('mongodb://localhost:27017/test_db::bank', dshape='var * {name: string, amount: int64}') except pymongo.errors.ConnectionFailure: pytest.skip('No mongo server running') assert d.dshape == dshape('var * {name: string, amount: int64}')
def test_join(): left = [['Alice', 100], ['Bob', 200]] right = [['Alice', 1], ['Bob', 2]] L = TableSymbol('L', '{name: string, amount: int}') R = TableSymbol('R', '{name: string, id: int}') joined = join(L, R, 'name') assert dshape(joined.schema) == \ dshape('{name: string, amount: int, id: int}') result = list(compute(joined, {L: left, R: right})) expected = [('Alice', 100, 1), ('Bob', 200, 2)] assert result == expected
def test_record(): expected_ds = dshape('1, {x: int32; y: float32}') t = NDTable([(1, 2.1), (2, 3.1)], dshape='1, {x: int32; y: float32}') t.datashape._equal(expected_ds) str(t) repr(t)
def test_metadata_has_prop(): a = blaze.ones(blaze.dshape('20, 20, float64')) c = blaze.NDTable([(1.0, 1.0), (1.0, 1.0)], dshape='2, {x: int32; y: float32}') assert blaze.metadata.has_prop(a, blaze.metadata.arraylike) assert blaze.metadata.has_prop(c, blaze.metadata.tablelike) assert not blaze.metadata.has_prop(a, blaze.metadata.tablelike)
def test_intfloat_blob(): ds = blaze.dshape('x, blob') c = blaze.Array([(i, i * .2) for i in range(10)], ds) for i, v in enumerate(c): print "v:", v, v[0], type(v[0]) assert v[0] == i assert v[1] == i * .2
def test_nesting(self): myfunc = create_overloaded_add() # A little bit of nesting a = blaze.eval(myfunc(myfunc(blaze.array([3,4]), blaze.array([1,2])), blaze.array([2,10]))) self.assertEqual(a.dshape, blaze.dshape('2, int32')) self.assertEqual(nd.as_py(a._data.dynd_arr()), [6, 16])
def test_nesting_and_coercion(self): myfunc = create_overloaded_add() # More nesting, with conversions a = blaze.eval(myfunc(myfunc(blaze.array([1,2]), blaze.array([-2, 10])), myfunc(blaze.array([1, 5], dshape='int16'), blaze.array(3, dshape='int16')))) self.assertEqual(a.dshape, blaze.dshape('2, int32')) self.assertEqual(nd.as_py(a._data.dynd_arr()), [-3, 14])
def test_overload_coercion(self): myfunc = create_overloaded_add() # Test type promotion to int32 a = blaze.eval(myfunc(blaze.array([3,4], dshape='int16'), blaze.array([1,2]))) self.assertEqual(a.dshape, blaze.dshape('2, int32')) self.assertEqual(nd.as_py(a._data.dynd_arr()), [4, 6]) a = blaze.eval(myfunc(blaze.array([3,4]), blaze.array([1,2], dshape='int16'))) self.assertEqual(a.dshape, blaze.dshape('2, int32')) self.assertEqual(nd.as_py(a._data.dynd_arr()), [4, 6]) # Test type promotion to int16 a = blaze.eval(myfunc(blaze.array([3,4], dshape='int8'), blaze.array([1,2], dshape='int8'))) self.assertEqual(a.dshape, blaze.dshape('2, int16')) self.assertEqual(nd.as_py(a._data.dynd_arr()), [2, 2])
def test_promote(): from blaze.expr.graph import IntNode, FloatNode # ---------------------------------- x, y = IntNode(1), FloatNode(1.) res = promote(x, y) ## TODO: check if this is platform specific assert res == blaze.float64 ## ---------------------------------- x, y = IntNode(1), IntNode(1) res = promote(x, y) assert res == blaze.int32 # ---------------------------------- x = NDArray([1, 2, 3], dshape('3, int32')) y = NDArray([1, 2, 3], dshape('3, int32')) res = promote(x, y) assert res == blaze.int32
def test_join(): left = DataFrame([['Alice', 100], ['Bob', 200]], columns=['name', 'amount']) right = DataFrame([['Alice', 1], ['Bob', 2]], columns=['name', 'id']) lsym = symbol('L', 'var * {name: string, amount: int}') rsym = symbol('R', 'var * {name: string, id: int}') joined = join(lsym, rsym, 'name') assert (dshape( joined.schema) == dshape('{name: string, amount: int, id: int}')) result = compute(joined, {lsym: left, rsym: right}) expected = DataFrame([['Alice', 100, 1], ['Bob', 200, 2]], columns=['name', 'amount', 'id']) tm.assert_frame_equal(result, expected) assert list(result.columns) == list(joined.fields)
def test_join_promotion(): a_data = pd.DataFrame([[0.0, 1.5], [1.0, 2.5]], columns=list('ab')) b_data = pd.DataFrame([[0, 1], [1, 2]], columns=list('ac')) a = symbol('a', discover(a_data)) b = symbol('b', discover(b_data)) joined = join(a, b, 'a') assert joined.dshape == dshape('var * {a: float64, b: float64, c: int64}') expected = pd.merge(a_data, b_data, on='a') result = compute(joined, {a: a_data, b: b_data}) tm.assert_frame_equal(result, expected)
def test_join(): left = DataFrame([['Alice', 100], ['Bob', 200]], columns=['name', 'amount']) right = DataFrame([['Alice', 1], ['Bob', 2]], columns=['name', 'id']) L = symbol('L', 'var * {name: string, amount: int}') R = symbol('R', 'var * {name: string, id: int}') joined = join(L, R, 'name') assert (dshape(joined.schema) == dshape('{name: string, amount: int, id: int}')) result = compute(joined, {L: left, R: right}) expected = DataFrame([['Alice', 100, 1], ['Bob', 200, 2]], columns=['name', 'amount', 'id']) print(result) print(expected) assert str(result) == str(expected) assert list(result.columns) == list(joined.fields)