Example #1
0
def test_option():
    assert_dshape_equal(Option(int32), Option(int32))

    with pytest.raises(AssertionError) as e:
        assert_dshape_equal(Option(int32), Option(float32))
    assert "'int32' != 'float32'" in str(e.value)
    assert '_.ty' in str(e.value)
Example #2
0
def test_nested():
    assert_dshape_equal(dshape("var * {a: 3 * {b: int32}}"), dshape("var * {a: 3 * {b: int32}}"))

    with pytest.raises(AssertionError) as e:
        assert_dshape_equal(dshape("var * {a: 3 * {b: int32}}"), dshape("var * {a: 3 * {b: float32}}"))
    assert "'int32' != 'float32'" in str(e.value)
    assert "_.measure['a'].measure['b'].name" in str(e.value)
Example #3
0
def test_Projection_retains_shape():
    t = symbol('t', '5 * {name: string, amount: int, id: int32}')

    assert_dshape_equal(
        t[['name', 'amount']].dshape,
        dshape('5 * {name: string, amount: int}')
    )
Example #4
0
def test_datashape_measure():
    assert_dshape_equal(dshape("int"), dshape("int"))

    with pytest.raises(AssertionError) as e:
        assert_dshape_equal(dshape("int"), dshape("string"))
    assert "int32 != string" in str(e.value)
    assert "_.measure" in str(e.value)
Example #5
0
def test_add_data_to_server(temp_add_server, serial):
    # add data
    iris_path = example('iris.csv')
    blob = serial.dumps({'iris': iris_path})
    response1 = temp_add_server.post('/add',
                                     headers=mimetype(serial),
                                     data=blob)
    assert 'CREATED' in response1.status
    assert response1.status_code == RC.CREATED

    # check for expected server datashape
    response2 = temp_add_server.get('/datashape')
    expected2 = discover({'iris': data(iris_path)})
    response_dshape = datashape.dshape(response2.data.decode('utf-8'))
    assert_dshape_equal(response_dshape.measure.dict['iris'],
                        expected2.measure.dict['iris'])

    # compute on added data
    client.requests = temp_add_server
    t = data(bz.Client('localhost:6363'))
    expr = t.iris.petal_length.sum()

    response3 = temp_add_server.post('/compute',
                                     data=serial.dumps({'expr':
                                                        to_tree(expr)}),
                                     headers=mimetype(serial))

    result3 = serial.data_loads(serial.loads(response3.data)['data'])
    expected3 = compute(expr, {'iris': data(iris_path)})
    assert result3 == expected3
Example #6
0
def test_add_expanded_payload_has_effect(temp_add_server, serial):
    # Ensure that the expanded payload format actually passes the arguments
    # through to the resource constructor
    iris_path = example('iris-latin1.tsv')
    csv_kwargs = {'delimiter': '\t', 'encoding': 'iso-8859-1'}
    blob = serial.dumps({'iris': {'source': iris_path, 'kwargs': csv_kwargs}})
    response1 = temp_add_server.post('/add',
                                     headers=mimetype(serial),
                                     data=blob)
    assert 'CREATED' in response1.status
    assert response1.status_code == RC.CREATED

    # check for expected server datashape
    response2 = temp_add_server.get('/datashape')
    expected2 = discover({'iris': data(iris_path, **csv_kwargs)})
    response_dshape = datashape.dshape(response2.data.decode('utf-8'))
    assert_dshape_equal(response_dshape.measure.dict['iris'],
                        expected2.measure.dict['iris'])

    # compute on added data
    client.requests = temp_add_server
    t = data(bz.Client('localhost:6363'))
    expr = t.iris.petal_length.sum()

    response3 = temp_add_server.post('/compute',
                                     data=serial.dumps({'expr':
                                                        to_tree(expr)}),
                                     headers=mimetype(serial))

    result3 = serial.data_loads(serial.loads(response3.data)['data'])
    expected3 = compute(expr, {'iris': data(iris_path, **csv_kwargs)})
    assert result3 == expected3
Example #7
0
def test_compute_kwargs(test, serial):
    expr = t.dumb.sort()
    bad_query = {'expr': to_tree(expr)}

    result = test.post('/compute',
                       headers=mimetype(serial),
                       data=serial.dumps(bad_query))
    assert result.status_code == RC.INTERNAL_SERVER_ERROR
    assert b'return_df must be passed' in result.data

    good_query = {
        'expr': to_tree(expr),
        'compute_kwargs': {
            'return_df': odo(DumbResource.df, list)
        }
    }
    result = test.post('/compute',
                       headers=mimetype(serial),
                       data=serial.dumps(good_query))
    assert result.status_code == RC.OK
    tdata = serial.loads(result.data)
    dshape = discover(DumbResource.df)
    assert_dshape_equal(datashape.dshape(tdata['datashape']), dshape)
    assert_frame_equal(
        odo(serial.data_loads(tdata['data']), DataFrame, dshape=dshape),
        DumbResource.df)
Example #8
0
def test_record():
    assert_dshape_equal(
        Record((('a', int32), ('b', float32))),
        Record((('a', int32), ('b', float32))),
    )

    with pytest.raises(AssertionError) as e:
        assert_dshape_equal(
            Record((('a', int32), ('b', float32))),
            Record((('a', int32), ('b', int32))),
        )
    assert "'float32' != 'int32'" in str(e)
    assert "_['b'].name" in str(e.value)

    with pytest.raises(AssertionError) as e:
        assert_dshape_equal(
            Record((('a', int32), ('b', float32))),
            Record((('a', int32), ('c', float32))),
        )
    assert "'b' != 'c'" in str(e.value)

    assert_dshape_equal(
        Record((('b', float32), ('a', int32))),
        Record((('a', int32), ('b', float32))),
        check_record_order=False,
    )

    with pytest.raises(AssertionError) as e:
        assert_dshape_equal(
            Record((('b', float32), ('a', float32))),
            Record((('a', int32), ('b', float32))),
            check_record_order=False,
        )
    assert "'float32' != 'int32'" in str(e.value)
    assert "_['a']" in str(e.value)
Example #9
0
def test_categorical_pandas():
    df = pd.DataFrame({'x': list('a'*5 + 'b'*5 + 'c'*5),
                       'y': range(15)}, columns=['x', 'y'])
    df.x = df.x.astype('category')
    assert_dshape_equal(discover(df), 15 * Record([('x',
                        Categorical(['a', 'b', 'c'])), ('y', int64)]))
    assert_dshape_equal(discover(df.x), 15 * Categorical(['a', 'b', 'c']))
Example #10
0
def test_funcproto(sym):
    # Simple funcproto
    assert (parse('(float32) -> float64', sym) ==
            ct.DataShape(ct.Function(ct.DataShape(ct.float32),
                                     ct.DataShape(ct.float64))))
    assert (parse('(int16, int32) -> bool', sym) ==
            ct.DataShape(ct.Function(ct.DataShape(ct.int16),
                                     ct.DataShape(ct.int32),
                                     ct.DataShape(ct.bool_))))
    # A trailing comma is ok
    assert (parse('(float32,) -> float64', sym) ==
            ct.DataShape(ct.Function(ct.DataShape(ct.float32),
                                     ct.DataShape(ct.float64))))
    assert_dshape_equal(
        parse('(int16, int32,) -> bool', sym),
        ct.DataShape(ct.Function(
            ct.DataShape(ct.int16),
            ct.DataShape(ct.int32),
            ct.DataShape(ct.bool_)
        ))
    )

    # Empty argument signature.
    assert_dshape_equal(
        parse('() -> bool', sym),
        ct.DataShape(ct.Function(
            ct.DataShape(ct.bool_),
        ))
    )
Example #11
0
def test_add_data_to_server(temp_add_server, serial):
    # add data
    iris_path = example('iris.csv')
    blob = serial.dumps({'iris': iris_path})
    response1 = temp_add_server.post('/add',
                                     headers=mimetype(serial),
                                     data=blob)
    assert 'CREATED' in response1.status
    assert response1.status_code == RC.CREATED

    # check for expected server datashape
    response2 = temp_add_server.get('/datashape')
    expected2 = discover({'iris': data(iris_path)})
    response_dshape = datashape.dshape(response2.data.decode('utf-8'))
    assert_dshape_equal(response_dshape.measure.dict['iris'],
                        expected2.measure.dict['iris'])

    # compute on added data
    t = data({'iris': data(iris_path)})
    expr = t.iris.petal_length.sum()

    response3 = temp_add_server.post('/compute',
                                     data=serial.dumps({'expr': to_tree(expr)}),
                                     headers=mimetype(serial))

    result3 = serial.data_loads(serial.loads(response3.data)['data'])
    expected3 = compute(expr, {'iris': data(iris_path)})
    assert result3 == expected3
Example #12
0
def test_odo_kwargs(test, serial):
    expr = t.dumb
    bad_query = {'expr': to_tree(expr)}

    result = test.post(
        '/compute',
        headers=mimetype(serial),
        data=serial.dumps(bad_query),
    )
    assert result.status_code == 500
    assert b'return_df must be passed' in result.data

    good_query = {
        'expr': to_tree(expr),
        'odo_kwargs': {
            'return_df': odo(DumbResource.df, list),
        },
    }
    result = test.post(
        '/compute',
        headers=mimetype(serial),
        data=serial.dumps(good_query)
    )
    assert result.status_code == 200
    data = serial.loads(result.data)
    dshape = discover(DumbResource.df)
    assert_dshape_equal(
        datashape.dshape(data['datashape']),
        dshape,
    )
    assert_frame_equal(
        odo(data['data'], DataFrame, dshape=dshape),
        DumbResource.df,
    )
Example #13
0
def test_datashape_measure():
    assert_dshape_equal(dshape('int'), dshape('int'))

    with pytest.raises(AssertionError) as e:
        assert_dshape_equal(dshape('int'), dshape('string'))
    assert 'int32 != string' in str(e.value)
    assert '_.measure' in str(e.value)
Example #14
0
def test_compute_kwargs(test, serial):
    expr = t.dumb.sort()
    bad_query = {'expr': to_tree(expr)}

    result = test.post(
        '/compute',
        headers=mimetype(serial),
        data=serial.dumps(bad_query),
    )
    assert result.status_code == 500
    assert b'return_df must be passed' in result.data

    good_query = {
        'expr': to_tree(expr),
        'compute_kwargs': {
            'return_df': odo(DumbResource.df, list),
        },
    }
    result = test.post(
        '/compute',
        headers=mimetype(serial),
        data=serial.dumps(good_query)
    )
    assert result.status_code == 200
    data = serial.loads(result.data)
    dshape = discover(DumbResource.df)
    assert_dshape_equal(
        datashape.dshape(data['datashape']),
        dshape,
    )
    assert_frame_equal(
        odo(data['data'], DataFrame, dshape=dshape),
        DumbResource.df,
    )
Example #15
0
def test_add_expanded_payload_has_effect(temp_add_server, serial):
    # Ensure that the expanded payload format actually passes the arguments
    # through to the resource constructor
    iris_path = example('iris-latin1.tsv')
    csv_kwargs = {'delimiter': '\t', 'encoding': 'iso-8859-1'}
    blob = serial.dumps({'iris': {'source': iris_path,
                                  'kwargs': csv_kwargs}})
    response1 = temp_add_server.post('/add',
                                     headers=mimetype(serial),
                                     data=blob)
    assert 'CREATED' in response1.status
    assert response1.status_code == RC.CREATED

    # check for expected server datashape
    response2 = temp_add_server.get('/datashape')
    expected2 = discover({'iris': data(iris_path, **csv_kwargs)})
    response_dshape = datashape.dshape(response2.data.decode('utf-8'))
    assert_dshape_equal(response_dshape.measure.dict['iris'],
                        expected2.measure.dict['iris'])

    # compute on added data
    t = data({'iris': data(iris_path, **csv_kwargs)})
    expr = t.iris.petal_length.sum()

    response3 = temp_add_server.post('/compute',
                                     data=serial.dumps({'expr': to_tree(expr)}),
                                     headers=mimetype(serial))

    result3 = serial.data_loads(serial.loads(response3.data)['data'])
    expected3 = compute(expr, {'iris': data(iris_path, **csv_kwargs)})
    assert result3 == expected3
Example #16
0
def test_option():
    assert_dshape_equal(Option(int32), Option(int32))

    with pytest.raises(AssertionError) as e:
        assert_dshape_equal(Option(int32), Option(float32))
    assert "'int32' != 'float32'" in str(e.value)
    assert '_.ty' in str(e.value)
Example #17
0
def test_record():
    assert_dshape_equal(
        Record((('a', int32), ('b', float32))),
        Record((('a', int32), ('b', float32))),
    )

    with pytest.raises(AssertionError) as e:
        assert_dshape_equal(
            Record((('a', int32), ('b', float32))),
            Record((('a', int32), ('b', int32))),
        )
    assert "'float32' != 'int32'" in str(e)
    assert "_['b'].name" in str(e.value)

    with pytest.raises(AssertionError) as e:
        assert_dshape_equal(
            Record((('a', int32), ('b', float32))),
            Record((('a', int32), ('c', float32))),
        )
    assert "'b' != 'c'" in str(e.value)

    assert_dshape_equal(
        Record((('b', float32), ('a', int32))),
        Record((('a', int32), ('b', float32))),
        check_record_order=False,
    )

    with pytest.raises(AssertionError) as e:
        assert_dshape_equal(
            Record((('b', float32), ('a', float32))),
            Record((('a', int32), ('b', float32))),
            check_record_order=False,
        )
    assert "'float32' != 'int32'" in str(e.value)
    assert "_['a']" in str(e.value)
Example #18
0
def test_datashape_measure():
    assert_dshape_equal(dshape('int'), dshape('int'))

    with pytest.raises(AssertionError) as e:
        assert_dshape_equal(dshape('int'), dshape('string'))
    assert 'int32 != string' in str(e.value)
    assert '_.measure' in str(e.value)
Example #19
0
def test_like(ds):
    t = symbol('t', ds)
    expr = getattr(t, 'name', t).str.like('Alice*')
    assert expr.pattern == 'Alice*'
    assert_dshape_equal(
        expr.schema.measure,
        dshape('%sbool' % ('?' if '?' in ds else '')).measure,
    )
Example #20
0
def test_function(dshape_, contains):
    base = dshape('(int32, int64) -> int64')
    assert_dshape_equal(base, base)

    with pytest.raises(AssertionError) as e:
        assert_dshape_equal(dshape(dshape_), base)
    for c in contains:
        assert c in str(e.value)
Example #21
0
def test_function(dshape_, contains):
    base = dshape('(int32, int64) -> int64')
    assert_dshape_equal(base, base)

    with pytest.raises(AssertionError) as e:
        assert_dshape_equal(dshape(dshape_), base)
    for c in contains:
        assert c in str(e.value)
Example #22
0
def test_pickle_roundtrip():
    ds = Data(1)
    assert ds.isidentical(pickle.loads(pickle.dumps(ds)))
    assert (ds + 1).isidentical(pickle.loads(pickle.dumps(ds + 1)))
    es = Data(np.array([1, 2, 3]))
    rs = pickle.loads(pickle.dumps(es))
    assert (es.data == rs.data).all()
    assert_dshape_equal(es.dshape, rs.dshape)
Example #23
0
def test_pickle_roundtrip():
    ds = data(1)
    assert ds.isidentical(pickle.loads(pickle.dumps(ds)))
    assert (ds + 1).isidentical(pickle.loads(pickle.dumps(ds + 1)))
    es = data(np.array([1, 2, 3]))
    rs = pickle.loads(pickle.dumps(es))
    assert (es.data == rs.data).all()
    assert_dshape_equal(es.dshape, rs.dshape)
Example #24
0
def test_like(ds):
    t = symbol('t', ds)
    expr = getattr(t, 'name', t).str.like('Alice*')
    assert expr.pattern == 'Alice*'
    assert_dshape_equal(
        expr.schema.measure,
        dshape('%sbool' % ('?' if '?' in ds else '')).measure,
    )
Example #25
0
def test_coalesce_invalid_promotion(lhs, rhs, expected):
    # Joe 2016-03-16: imho promote(record, record) should check that the keys
    # are the same and then create a new record from:
    # zip(keys, map(promote, lhs, rhs))
    f = symbol("e", lhs)
    g = symbol("g", rhs)
    expr = coalesce(f, g)
    assert_dshape_equal(expr.dshape, dshape(expected))
    assert expr.lhs.isidentical(f)
    assert expr.rhs.isidentical(g)
Example #26
0
def test_coalesce_invalid_promotion(lhs, rhs, expected):
    # Joe 2016-03-16: imho promote(record, record) should check that the keys
    # are the same and then create a new record from:
    # zip(keys, map(promote, lhs, rhs))
    f = symbol('e', lhs)
    g = symbol('g', rhs)
    expr = coalesce(f, g)
    assert_dshape_equal(expr.dshape, dshape(expected))
    assert expr.lhs.isidentical(f)
    assert expr.rhs.isidentical(g)
Example #27
0
def test_discover():
    assert discover(sa.String()) == datashape.string
    metadata = sa.MetaData()
    s = sa.Table('accounts', metadata, sa.Column('name', sa.String),
                 sa.Column('amount', sa.Integer),
                 sa.Column('timestamp', sa.DateTime, primary_key=True))
    ds = dshape('var * {name: ?string, amount: ?int32, timestamp: datetime}')
    assert_dshape_equal(discover(s), ds)
    for name in ds.measure.names:
        assert isinstance(name, string_types)
Example #28
0
def test_float_dtype(sql_with_floats):
    sql_with_floats, bind = sql_with_floats

    expected = dshape("var * {a: float64, b: ?float64}")

    assert_dshape_equal(discover(sql_with_floats), expected)

    # Also check that reflection from the database returns expected dshape.
    assert_dshape_equal(discover(bind).subshape[sql_with_floats.name],
                        expected)
Example #29
0
def test_pickle_roundtrip():
    ds = data(1)
    assert ds.isidentical(
        pickle.loads(pickle.dumps(ds, protocol=pickle.HIGHEST_PROTOCOL)), )
    assert (ds + 1).isidentical(
        pickle.loads(pickle.dumps(ds + 1, protocol=pickle.HIGHEST_PROTOCOL)), )
    es = data(np.array([1, 2, 3]))
    rs = pickle.loads(pickle.dumps(es, protocol=pickle.HIGHEST_PROTOCOL))
    assert (es.data == rs.data).all()
    assert_dshape_equal(es.dshape, rs.dshape)
Example #30
0
def test_discover():
    df = pd.DataFrame(
        {"x": list("a" * 5 + "b" * 5 + "c" * 5), "y": np.arange(15, dtype=np.int64), "z": list(map(float, range(15)))},
        columns=["x", "y", "z"],
    )
    df.x = df.x.astype("category")
    ddf = dd.from_pandas(df, npartitions=2)
    assert_dshape_equal(
        discover(ddf), var * Record([("x", Categorical(["a", "b", "c"])), ("y", int64), ("z", float64)])
    )
    assert_dshape_equal(discover(ddf.x), var * Categorical(["a", "b", "c"]))
Example #31
0
def test_discover():
    df = pd.DataFrame({'x': list('a'*5 + 'b'*5 + 'c'*5),
                       'y': range(15),
                       'z': list(map(float, range(15)))},
                       columns=['x', 'y', 'z'])
    df.x = df.x.astype('category')
    ddf = dd.from_pandas(df, npartitions=2)
    assert_dshape_equal(discover(ddf),
                        var * Record([('x', Categorical(['a', 'b', 'c'])),
                                            ('y', int64), ('z', float64)]))
    assert_dshape_equal(discover(ddf.x), var * Categorical(['a', 'b', 'c']))
Example #32
0
File: test_sql.py Project: EGQM/odo
def test_discover():
    assert discover(sa.String()) == datashape.string
    metadata = sa.MetaData()
    s = sa.Table('accounts', metadata,
                 sa.Column('name', sa.String),
                 sa.Column('amount', sa.Integer),
                 sa.Column('timestamp', sa.DateTime, primary_key=True))
    ds = dshape('var * {name: ?string, amount: ?int32, timestamp: datetime}')
    assert_dshape_equal(discover(s), ds)
    for name in ds.measure.names:
        assert isinstance(name, string_types)
Example #33
0
def test_compute_up_on_dict():
    d = {'a': [1, 2, 3], 'b': [4, 5, 6]}

    assert_dshape_equal(
        discover(d),
        dshape('{a: 3 * int64, b: 3 * int64}').measure,
        check_record_order=False,  # dict order undefined
    )

    s = symbol('s', discover(d))
    assert compute(s.a, {s: d}) == [1, 2, 3]
Example #34
0
File: test_s3.py Project: EGQM/odo
def test_s3_gz_csv_discover():
    result = discover(S3(CSV)('s3://nyqpug/tips.gz'))
    expected = datashape.dshape("""var * {
      total_bill: float64,
      tip: float64,
      sex: ?string,
      smoker: ?string,
      day: ?string,
      time: ?string,
      size: int64
      }""")
    assert_dshape_equal(result, expected)
Example #35
0
def test_pickle_roundtrip():
    ds = data(1)
    assert ds.isidentical(
        pickle.loads(pickle.dumps(ds, protocol=pickle.HIGHEST_PROTOCOL)),
    )
    assert (ds + 1).isidentical(
        pickle.loads(pickle.dumps(ds + 1, protocol=pickle.HIGHEST_PROTOCOL)),
    )
    es = data(np.array([1, 2, 3]))
    rs = pickle.loads(pickle.dumps(es, protocol=pickle.HIGHEST_PROTOCOL))
    assert (es.data == rs.data).all()
    assert_dshape_equal(es.dshape, rs.dshape)
Example #36
0
def test_s3_gz_csv_discover():
    result = discover(S3(CSV)('s3://nyqpug/tips.gz'))
    expected = datashape.dshape("""var * {
      total_bill: float64,
      tip: float64,
      sex: ?string,
      smoker: ?string,
      day: ?string,
      time: ?string,
      size: int64
      }""")
    assert_dshape_equal(result, expected)
Example #37
0
def test_categorical_pandas():
    df = pd.DataFrame(
        {
            'x': list('a' * 5 + 'b' * 5 + 'c' * 5),
            'y': np.arange(15, dtype=np.int64)
        },
        columns=['x', 'y'])
    df.x = df.x.astype('category')
    assert_dshape_equal(
        discover(df), 15 * Record([('x', Categorical(['a', 'b', 'c'])),
                                   ('y', int64)]))
    assert_dshape_equal(discover(df.x), 15 * Categorical(['a', 'b', 'c']))
Example #38
0
def test_nested():
    assert_dshape_equal(
        dshape('var * {a: 3 * {b: int32}}'),
        dshape('var * {a: 3 * {b: int32}}'),
    )

    with pytest.raises(AssertionError) as e:
        assert_dshape_equal(
            dshape('var * {a: 3 * {b: int32}}'),
            dshape('var * {a: 3 * {b: float32}}'),
        )
    assert "'int32' != 'float32'" in str(e.value)
    assert "_.measure['a'].measure['b'].name" in str(e.value)
def test_discover():
    df = pd.DataFrame(
        {
            'x': list('a' * 5 + 'b' * 5 + 'c' * 5),
            'y': np.arange(15, dtype=np.int64),
            'z': list(map(float, range(15)))
        },
        columns=['x', 'y', 'z'])
    df.x = df.x.astype('category')
    ddf = dd.from_pandas(df, npartitions=2)
    assert_dshape_equal(
        discover(ddf),
        var * Record([('x', Categorical(['a', 'b', 'c'])), ('y', int64),
                      ('z', float64)]))
    assert_dshape_equal(discover(ddf.x), var * Categorical(['a', 'b', 'c']))
Example #40
0
def test_isin_expr(test, serial):
    name_filter = t.accounts[t.accounts.amount > 100].name
    expr = t.cities.name.isin(name_filter)
    query = {'expr': to_tree(expr)}
    result = test.post('/compute',
                       headers=mimetype(serial),
                       data=serial.dumps(query))
    expected = {'data': [False, True],
                'names': ['name'],
                'datashape': '2 * bool'}
    assert result.status_code == RC.OK
    resp = serial.loads(result.data)
    assert list(serial.data_loads(resp['data'])) == expected['data']
    assert list(resp['names']) == expected['names']
    assert_dshape_equal(resp['datashape'], expected['datashape'])
Example #41
0
def test_mssql_types():
    typ = sa.dialects.mssql.BIT()
    t = sa.Table('t', sa.MetaData(), sa.Column('bit', typ))
    assert_dshape_equal(discover(t), dshape('var * {bit: ?bool}'))
    typ = sa.dialects.mssql.DATETIMEOFFSET()
    t = sa.Table('t', sa.MetaData(), sa.Column('dt', typ))
    assert_dshape_equal(discover(t), dshape('var * {dt: ?string}'))
    typ = sa.dialects.mssql.MONEY()
    t = sa.Table('t', sa.MetaData(), sa.Column('money', typ))
    assert_dshape_equal(discover(t), dshape('var * {money: ?float64}'))
    typ = sa.dialects.mssql.SMALLMONEY()
    t = sa.Table('t', sa.MetaData(), sa.Column('money', typ))
    assert_dshape_equal(discover(t), dshape('var * {money: ?float32}'))
    typ = sa.dialects.mssql.UNIQUEIDENTIFIER()
    t = sa.Table('t', sa.MetaData(), sa.Column('uuid', typ))
    assert_dshape_equal(discover(t), dshape('var * {uuid: ?string}'))
Example #42
0
def test_add_errors(temp_add_server, serial):
    pre_datashape = datashape.dshape(
        temp_add_server.get('/datashape').data.decode('utf-8'))
    bunk_path = example('bunk.csv')
    blob = serial.dumps({'bunk': bunk_path})
    response1 = temp_add_server.post('/add',
                                     headers=mimetype(serial),
                                     data=blob)
    assert response1.status_code == RC.UNPROCESSABLE_ENTITY

    # Test that the datashape of the server is accessible and unchanged after
    # trying to add a non-existent dataset.
    response2 = temp_add_server.get('/datashape')
    assert response2.status_code == RC.OK
    response_dshape = datashape.dshape(response2.data.decode('utf-8'))
    assert_dshape_equal(pre_datashape, response_dshape)
Example #43
0
def test_mssql_types():
    typ = sa.dialects.mssql.BIT()
    t = sa.Table('t', sa.MetaData(), sa.Column('bit', typ))
    assert_dshape_equal(discover(t), dshape('var * {bit: ?bool}'))
    typ = sa.dialects.mssql.DATETIMEOFFSET()
    t = sa.Table('t', sa.MetaData(), sa.Column('dt', typ))
    assert_dshape_equal(discover(t), dshape('var * {dt: ?string}'))
    typ = sa.dialects.mssql.MONEY()
    t = sa.Table('t', sa.MetaData(), sa.Column('money', typ))
    assert_dshape_equal(discover(t), dshape('var * {money: ?float64}'))
    typ = sa.dialects.mssql.SMALLMONEY()
    t = sa.Table('t', sa.MetaData(), sa.Column('money', typ))
    assert_dshape_equal(discover(t), dshape('var * {money: ?float32}'))
    typ = sa.dialects.mssql.UNIQUEIDENTIFIER()
    t = sa.Table('t', sa.MetaData(), sa.Column('uuid', typ))
    assert_dshape_equal(discover(t), dshape('var * {uuid: ?string}'))
Example #44
0
def test_add_errors(temp_add_server, serial):
    pre_datashape = datashape.dshape(temp_add_server
                                     .get('/datashape')
                                     .data.decode('utf-8'))
    bunk_path = example('bunk.csv')
    blob = serial.dumps({'bunk': bunk_path})
    response1 = temp_add_server.post('/add',
                                     headers=mimetype(serial),
                                     data=blob)
    assert response1.status_code == RC.UNPROCESSABLE_ENTITY

    # Test that the datashape of the server is accessible and unchanged after
    # trying to add a non-existent dataset.
    response2 = temp_add_server.get('/datashape')
    assert response2.status_code == RC.OK
    response_dshape = datashape.dshape(response2.data.decode('utf-8'))
    assert_dshape_equal(pre_datashape, response_dshape)
Example #45
0
def test_isin_expr(test, serial):
    name_filter = t.accounts[t.accounts.amount > 100].name
    expr = t.cities.name.isin(name_filter)
    query = {'expr': to_tree(expr)}
    result = test.post('/compute',
                       headers=mimetype(serial),
                       data=serial.dumps(query))
    expected = {
        'data': [False, True],
        'names': ['name'],
        'datashape': '2 * bool'
    }
    assert result.status_code == RC.OK
    resp = serial.loads(result.data)
    assert list(serial.data_loads(resp['data'])) == expected['data']
    assert list(resp['names']) == expected['names']
    assert_dshape_equal(resp['datashape'], expected['datashape'])
Example #46
0
def test_datetime(cls):
    assert_dshape_equal(cls(), cls())
    assert_dshape_equal(cls("US/Eastern"), cls("US/Eastern"))

    with pytest.raises(AssertionError) as e:
        assert_dshape_equal(cls("US/Eastern"), cls("US/Central"))
    assert "'US/Eastern' != 'US/Central'" in str(e.value)
    assert "_.tz" in str(e.value)

    assert_dshape_equal(cls("US/Eastern"), cls("US/Central"), check_tz=False)
Example #47
0
def test_timedelta():
    assert_dshape_equal(TimeDelta(), TimeDelta())
    assert_dshape_equal(TimeDelta("ns"), TimeDelta("ns"))

    with pytest.raises(AssertionError) as e:
        assert_dshape_equal(TimeDelta("us"), TimeDelta("ns"))
    assert "'us' != 'ns'" in str(e.value)
    assert "_.unit" in str(e.value)

    assert_dshape_equal(TimeDelta("us"), TimeDelta("ns"), check_timedelta_unit=False)
Example #48
0
def test_merge():
    t = symbol('t', 'int64')
    p = symbol('p', 'var * {amount: int}')

    accounts = symbol('accounts',
                      'var * {name: string, balance: int32, id: int32}')
    new_amount = (accounts.balance * 1.5).label('new')

    c = merge(accounts[['name', 'balance']], new_amount)
    assert c.fields == ['name', 'balance', 'new']
    assert c.schema == dshape('{name: string, balance: int32, new: float64}')

    d = merge(t, p)
    assert d.fields == ['t', 'amount']
    assert_dshape_equal(d.dshape, dshape('var * {t: int64, amount: int}'))

    with pytest.raises(TypeError) as e:
        merge(t, t)
    assert str(e.value) == 'cannot merge all scalar expressions'
Example #49
0
def test_merge():
    t = symbol('t', 'int64')
    p = symbol('p', 'var * {amount: int}')

    accounts = symbol('accounts',
                      'var * {name: string, balance: int32, id: int32}')
    new_amount = (accounts.balance * 1.5).label('new')

    c = merge(accounts[['name', 'balance']], new_amount)
    assert c.fields == ['name', 'balance', 'new']
    assert c.schema == dshape('{name: string, balance: int32, new: float64}')

    d = merge(t, p)
    assert d.fields == ['t', 'amount']
    assert_dshape_equal(d.dshape, dshape('var * {t: int64, amount: int}'))

    with pytest.raises(TypeError) as e:
        merge(t, t)
    assert str(e.value) == 'cannot merge all scalar expressions'
Example #50
0
def test_funcproto(sym):
    # Simple funcproto
    assert (parse('(float32) -> float64', sym) == ct.DataShape(
        ct.Function(ct.DataShape(ct.float32), ct.DataShape(ct.float64))))
    assert (parse('(int16, int32) -> bool', sym) == ct.DataShape(
        ct.Function(ct.DataShape(ct.int16), ct.DataShape(ct.int32),
                    ct.DataShape(ct.bool_))))
    # A trailing comma is ok
    assert (parse('(float32,) -> float64', sym) == ct.DataShape(
        ct.Function(ct.DataShape(ct.float32), ct.DataShape(ct.float64))))
    assert_dshape_equal(
        parse('(int16, int32,) -> bool', sym),
        ct.DataShape(
            ct.Function(ct.DataShape(ct.int16), ct.DataShape(ct.int32),
                        ct.DataShape(ct.bool_))))

    # Empty argument signature.
    assert_dshape_equal(parse('() -> bool', sym),
                        ct.DataShape(ct.Function(ct.DataShape(ct.bool_), )))
Example #51
0
def test_datetime(cls):
    assert_dshape_equal(cls(), cls())
    assert_dshape_equal(cls('US/Eastern'), cls('US/Eastern'))

    with pytest.raises(AssertionError) as e:
        assert_dshape_equal(cls('US/Eastern'), cls('US/Central'))
    assert "'US/Eastern' != 'US/Central'" in str(e.value)
    assert '_.tz' in str(e.value)

    assert_dshape_equal(
        cls('US/Eastern'),
        cls('US/Central'),
        check_tz=False,
    )
Example #52
0
def test_datetimetz_pandas():
    df = pd.DataFrame(
        OrderedDict([
            ('naive', pd.date_range('2014', periods=5)),
            ('Europe/Moscow', pd.date_range('2014', periods=5, tz='Europe/Moscow')),
            ('UTC', pd.date_range('2014', periods=5, tz='UTC')),
            ('US/Eastern', pd.date_range('2014', periods=5, tz='US/Eastern')),
        ])
    )

    assert_dshape_equal(
        discover(df),
        5 * Record[
            'naive': Option(DateTime(tz=None)),
            'Europe/Moscow': Option(DateTime(tz='Europe/Moscow')),
            'UTC': Option(DateTime(tz='UTC')),
            'US/Eastern': Option(DateTime(tz='US/Eastern')),
        ]
    )

    assert_dshape_equal(discover(df.naive), 5 * Option(DateTime(tz=None)))
    for tz in ('Europe/Moscow', 'UTC', 'US/Eastern'):
        assert_dshape_equal(
            discover(df[tz]),
            5 * Option(DateTime(tz=tz))
        )
Example #53
0
def test_merge_options():
    s = symbol('s', 'var * {a: ?A, b: ?B}')

    merged = merge(a=s.a, b=s.b)
    assert_dshape_equal(merged.dshape, dshape('var * {a: ?A, b: ?B}'))
    assert_dshape_equal(merged.a.dshape, dshape('var * ?A'))
    assert_dshape_equal(merged.b.dshape, dshape('var * ?B'))
Example #54
0
def test_merge_options():
    s = symbol('s', 'var * {a: ?A, b: ?B}')

    merged = merge(a=s.a, b=s.b)
    assert_dshape_equal(merged.dshape, dshape('var * {a: ?A, b: ?B}'))
    assert_dshape_equal(merged.a.dshape, dshape('var * ?A'))
    assert_dshape_equal(merged.b.dshape, dshape('var * ?B'))
Example #55
0
def test_timedelta():
    assert_dshape_equal(TimeDelta(), TimeDelta())
    assert_dshape_equal(TimeDelta('ns'), TimeDelta('ns'))

    with pytest.raises(AssertionError) as e:
        assert_dshape_equal(TimeDelta('us'), TimeDelta('ns'))
    assert "'us' != 'ns'" in str(e.value)
    assert '_.unit' in str(e.value)

    assert_dshape_equal(
        TimeDelta('us'),
        TimeDelta('ns'),
        check_timedelta_unit=False,
    )
Example #56
0
def test_add_data_to_server(serial):
    with temp_server(data) as test:
        # add data
        initial_datashape = datashape.dshape(test.get('/datashape').data.decode('utf-8'))
        iris_path = example('iris.csv')
        blob = serial.dumps({'iris': iris_path})
        response1 = test.post(
            '/add',
            headers=mimetype(serial),
            data=blob,
        )
        assert 'OK' in response1.status
        assert response1.status_code == 200

        # check for expected server datashape
        new_datashape = datashape.dshape(test.get('/datashape').data.decode('utf-8'))
        data2 = data.copy()
        data2.update({'iris': resource(iris_path)})
        expected2 = datashape.dshape(discover(data2))
        from pprint import pprint as pp
        assert_dshape_equal(new_datashape, expected2)
        assert new_datashape.measure.fields != initial_datashape.measure.fields

        # compute on added data
        t = Data({'iris': resource(iris_path)})
        expr = t.iris.petal_length.sum()

        response3 = test.post(
            '/compute',
            data=serial.dumps({'expr': to_tree(expr)}),
            headers=mimetype(serial)
        )

        result3 = serial.loads(response3.data)['data']
        expected3 = compute(expr, {'iris': resource(iris_path)})
        assert result3 == expected3
Example #57
0
def test_tuple():
    assert_dshape_equal(Tuple((int32, float32)), Tuple((int32, float32)))

    with pytest.raises(AssertionError) as e:
        assert_dshape_equal(Tuple((int32, float32)), Tuple((int32, int32)))
    assert "'float32' != 'int32'" in str(e)
    assert "_.dshapes[1].measure.name" in str(e.value)

    with pytest.raises(AssertionError) as e:
        assert_dshape_equal(Tuple((int32, float32)), Tuple((int32, int32)))
    assert "'float32' != 'int32'" in str(e)
    assert '_.dshapes[1].measure.name' in str(e.value)