Esempio n. 1
def test_concat(sql_two_tables):
    t_table, u_table = sql_two_tables
    t_data = pd.DataFrame(np.arange(5), columns=['a'])
    u_data = pd.DataFrame(np.arange(5, 10), columns=['a'])
    odo(t_data, t_table)
    odo(u_data, u_table)

    t = symbol('t', discover(t_data))
    u = symbol('u', discover(u_data))
        compute(concat(t, u).sort('a'), {t: t_table, u: u_table}, return_type=pd.DataFrame),
        pd.DataFrame(np.arange(10), columns=['a']),
Esempio n. 2
def test_concat(sql_two_tables):
    t_table, u_table = sql_two_tables
    t_data = pd.DataFrame(np.arange(5), columns=['a'])
    u_data = pd.DataFrame(np.arange(5, 10), columns=['a'])
    odo(t_data, t_table)
    odo(u_data, u_table)

    t = symbol('t', discover(t_data))
    u = symbol('u', discover(u_data))
        compute(concat(t, u).sort('a'), {t: t_table, u: u_table}, return_type=pd.DataFrame),
        pd.DataFrame(np.arange(10), columns=['a']),
Esempio n. 3
def test_concat(sql_two_tables):
    t_table, u_table = sql_two_tables
    t_data = pd.DataFrame(np.arange(5), columns=["a"])
    u_data = pd.DataFrame(np.arange(5, 10), columns=["a"])
    odo(t_data, t_table)
    odo(u_data, u_table)

    t = symbol("t", discover(t_data))
    u = symbol("u", discover(u_data))
        odo(compute(concat(t, u).sort("a"), {t: t_table, u: u_table}), pd.DataFrame),
        pd.DataFrame(np.arange(10), columns=["a"]),
Esempio n. 4
def test_concat():
    d = {"a.csv": "a,b\n1,2\n3,4", "b.csv": "a,b\n5,6\n7,8"}

    with filetexts(d):
        a_rsc = resource("a.csv")
        b_rsc = resource("b.csv")

        a = symbol("a", discover(a_rsc))
        b = symbol("b", discover(b_rsc))

            odo(compute(concat(a, b), {a: a_rsc, b: b_rsc}), pd.DataFrame),
            # windows needs explicit int64 construction b/c default is int32
            pd.DataFrame(np.arange(1, 9, dtype="int64").reshape(4, 2), columns=list("ab")),
Esempio n. 5
def test_concat_invalid_axis(sql_two_tables):
    t_table, u_table = sql_two_tables
    t_data = pd.DataFrame(np.arange(5), columns=['a'])
    u_data = pd.DataFrame(np.arange(5, 10), columns=['a'])
    odo(t_data, t_table)
    odo(u_data, u_table)

    # We need to force the shape to not be a record here so we can
    # create the `Concat` node with an axis=1.
    t = symbol('t', '5 * 1 * int32')
    u = symbol('u', '5 * 1 * int32')

    with pytest.raises(ValueError) as e:
        compute(concat(t, u, axis=1), {t: t_table, u: u_table}, return_type='native')

    # Preserve the suggestion to use merge.
    assert "'merge'" in str(e.value)
Esempio n. 6
def test_concat_invalid_axis(sql_two_tables):
    t_table, u_table = sql_two_tables
    t_data = pd.DataFrame(np.arange(5), columns=['a'])
    u_data = pd.DataFrame(np.arange(5, 10), columns=['a'])
    odo(t_data, t_table)
    odo(u_data, u_table)

    # We need to force the shape to not be a record here so we can
    # create the `Concat` node with an axis=1.
    t = symbol('t', '5 * 1 * int32')
    u = symbol('u', '5 * 1 * int32')

    with pytest.raises(ValueError) as e:
        compute(concat(t, u, axis=1), {t: t_table, u: u_table})

    # Preserve the suggestion to use merge.
    assert "'merge'" in str(e.value)
Esempio n. 7
def test_concat():
    d = {'a.csv': 'a,b\n1,2\n3,4',
         'b.csv': 'a,b\n5,6\n7,8'}

    with filetexts(d):
        a_rsc = data('a.csv')
        b_rsc = data('b.csv')

        a = symbol('a', discover(a_rsc))
        b = symbol('b', discover(b_rsc))

                compute(concat(a, b), {a: a_rsc, b: b_rsc}), pd.DataFrame,

            # windows needs explicit int64 construction b/c default is int32
            pd.DataFrame(np.arange(1, 9, dtype='int64').reshape(4, 2),
Esempio n. 8
def test_concat():
    d = {'a.csv': 'a,b\n1,2\n3,4',
         'b.csv': 'a,b\n5,6\n7,8'}

    with filetexts(d):
        a_rsc = data('a.csv')
        b_rsc = data('b.csv')

        a = symbol('a', discover(a_rsc))
        b = symbol('b', discover(b_rsc))

                compute(concat(a, b), {a: a_rsc, b: b_rsc}), pd.DataFrame,

            # windows needs explicit int64 construction b/c default is int32
            pd.DataFrame(np.arange(1, 9, dtype='int64').reshape(4, 2),
Esempio n. 9
# -*- coding: utf-8 -*-
Created on Thu Sep 24 17:50:05 2015

@author: bolaka

# suppress pandas warnings
import warnings
warnings.simplefilter(action = "ignore", category = RuntimeWarning)
warnings.simplefilter(action = "ignore", category = FutureWarning)

# imports
import xgboost as xgb
import pandas as pd
import numpy as np
from sklearn import preprocessing
from numpy.random import seed
from blaze import CSV, Table, concat

# reproduce results

train_csv = CSV('train.csv')
train = Table(train_csv)

test_csv = CSV('test.csv')
test = Table(test_csv)

combined = concat(train, test, axis=1)