def test_concat(sql_two_tables): t_table, u_table = sql_two_tables t_data = pd.DataFrame(np.arange(5), columns=['a']) u_data = pd.DataFrame(np.arange(5, 10), columns=['a']) odo(t_data, t_table) odo(u_data, u_table) t = symbol('t', discover(t_data)) u = symbol('u', discover(u_data)) tm.assert_frame_equal( compute(concat(t, u).sort('a'), {t: t_table, u: u_table}, return_type=pd.DataFrame), pd.DataFrame(np.arange(10), columns=['a']), )
def test_concat(sql_two_tables): t_table, u_table = sql_two_tables t_data = pd.DataFrame(np.arange(5), columns=["a"]) u_data = pd.DataFrame(np.arange(5, 10), columns=["a"]) odo(t_data, t_table) odo(u_data, u_table) t = symbol("t", discover(t_data)) u = symbol("u", discover(u_data)) tm.assert_frame_equal( odo(compute(concat(t, u).sort("a"), {t: t_table, u: u_table}), pd.DataFrame), pd.DataFrame(np.arange(10), columns=["a"]), )
def test_concat(): d = {"a.csv": "a,b\n1,2\n3,4", "b.csv": "a,b\n5,6\n7,8"} with filetexts(d): a_rsc = resource("a.csv") b_rsc = resource("b.csv") a = symbol("a", discover(a_rsc)) b = symbol("b", discover(b_rsc)) tm.assert_frame_equal( odo(compute(concat(a, b), {a: a_rsc, b: b_rsc}), pd.DataFrame), # windows needs explicit int64 construction b/c default is int32 pd.DataFrame(np.arange(1, 9, dtype="int64").reshape(4, 2), columns=list("ab")), )
def test_concat_invalid_axis(sql_two_tables): t_table, u_table = sql_two_tables t_data = pd.DataFrame(np.arange(5), columns=['a']) u_data = pd.DataFrame(np.arange(5, 10), columns=['a']) odo(t_data, t_table) odo(u_data, u_table) # We need to force the shape to not be a record here so we can # create the `Concat` node with an axis=1. t = symbol('t', '5 * 1 * int32') u = symbol('u', '5 * 1 * int32') with pytest.raises(ValueError) as e: compute(concat(t, u, axis=1), {t: t_table, u: u_table}, return_type='native') # Preserve the suggestion to use merge. assert "'merge'" in str(e.value)
def test_concat_invalid_axis(sql_two_tables): t_table, u_table = sql_two_tables t_data = pd.DataFrame(np.arange(5), columns=['a']) u_data = pd.DataFrame(np.arange(5, 10), columns=['a']) odo(t_data, t_table) odo(u_data, u_table) # We need to force the shape to not be a record here so we can # create the `Concat` node with an axis=1. t = symbol('t', '5 * 1 * int32') u = symbol('u', '5 * 1 * int32') with pytest.raises(ValueError) as e: compute(concat(t, u, axis=1), {t: t_table, u: u_table}) # Preserve the suggestion to use merge. assert "'merge'" in str(e.value)
def test_concat(): d = {'a.csv': 'a,b\n1,2\n3,4', 'b.csv': 'a,b\n5,6\n7,8'} with filetexts(d): a_rsc = data('a.csv') b_rsc = data('b.csv') a = symbol('a', discover(a_rsc)) b = symbol('b', discover(b_rsc)) tm.assert_frame_equal( odo( compute(concat(a, b), {a: a_rsc, b: b_rsc}), pd.DataFrame, ), # windows needs explicit int64 construction b/c default is int32 pd.DataFrame(np.arange(1, 9, dtype='int64').reshape(4, 2), columns=list('ab')), )
# -*- coding: utf-8 -*- """ Created on Thu Sep 24 17:50:05 2015 @author: bolaka """ # suppress pandas warnings import warnings warnings.simplefilter(action = "ignore", category = RuntimeWarning) warnings.simplefilter(action = "ignore", category = FutureWarning) # imports import xgboost as xgb import pandas as pd import numpy as np from sklearn import preprocessing from numpy.random import seed from blaze import CSV, Table, concat # reproduce results seed(786) train_csv = CSV('train.csv') train = Table(train_csv) test_csv = CSV('test.csv') test = Table(test_csv) combined = concat(train, test, axis=1) combined.dshape