Ejemplo n.º 1
0
def test_concat_functions():
    def foo(a, b):
        return a + b

    df1 = vaex.from_scalars(x=1, y=2)
    df2 = vaex.from_scalars(x=2, y=3)
    df1.add_function('foo', foo)
    df2.add_function('foo', foo)
    # w has same expression and function
    df1['w'] = df1.func.foo(df1.x, df1.y)
    df2['w'] = df2.func.foo(df2.x, df2.y)
    assert df1.w.tolist() == [3]
    df = vaex.concat([df1, df2])
    assert df.w.tolist() == [1 + 2, 2 + 3]

    # now bar is a new function
    def bar1(a, b):
        return a + b

    def bar2(a, b):
        return a + b

    df1 = vaex.from_scalars(x=1, y=2)
    df2 = vaex.from_scalars(x=2, y=3)
    df1.add_function('bar', bar1)
    df2.add_function('bar', bar2)
    with pytest.raises(ValueError):
        df = vaex.concat([df1, df2])
Ejemplo n.º 2
0
def test_multiple_tasks_different_columns_names():
    df1 = vaex.from_scalars(x=1, y=2)
    df2 = vaex.from_scalars(x=1, y=2)
    x = df1.sum('x', delay=True)
    y = df2.sum('y', delay=True)
    df1.execute()
    assert x.get() == 1
    assert y.get() == 2
Ejemplo n.º 3
0
def test_concat_keep_virtual():
    df1 = vaex.from_scalars(x=1, y=2)
    df2 = vaex.from_scalars(x=2, y=3)
    # w has same expression
    df1['w'] = df1.x + df1.y
    df2['w'] = df2.x + df2.y
    df = vaex.concat([df1, df2])
    assert 'w' in df.virtual_columns
    assert 'w' not in df.get_column_names(virtual=False)
    assert 'w' not in df.dataset
Ejemplo n.º 4
0
def test_passes_two_datasets():
    df1 = vaex.from_scalars(x=1, y=2)
    df2 = vaex.from_scalars(x=1, y=3)
    executor = df1.executor
    executor.passes = 0
    df1.sum('x')
    assert executor.passes == 1
    df1.sum('x', delay=True)
    df2.sum('x', delay=True)
    df1.execute()
    assert executor.passes == 3
Ejemplo n.º 5
0
def test_concat_unequals_virtual_columns():
    df1 = vaex.from_scalars(x=1, y=2)
    df2 = vaex.from_scalars(x=2, y=3)
    # w has same expression
    df1['w'] = df1.x + df1.y
    df2['w'] = df2.x + df2.y
    # z does not
    df1['z'] = df1.x + df1.y
    df2['z'] = df2.x * df2.y
    df = vaex.concat([df1, df2])
    assert df.w.tolist() == [1 + 2, 2 + 3]
    assert df.z.tolist() == [1 + 2, 2 * 3]
Ejemplo n.º 6
0
def test_concat_unequals_virtual_columns():
    ds1 = vaex.from_scalars(x=1, y=2)
    ds2 = vaex.from_scalars(x=2, y=3)
    # w has same expression
    ds1['w'] = ds1.x + ds1.y
    ds2['w'] = ds2.x + ds2.y
    # z does not
    ds1['z'] = ds1.x + ds1.y
    ds2['z'] = ds2.x * ds2.y
    ds = vaex.concat([ds1, ds2])
    assert ds.w.tolist() == [1 + 2, 2 + 3]
    assert ds.z.tolist() == [1 + 2, 2 * 3]
Ejemplo n.º 7
0
def test_join_functions():
    df1 = vaex.from_scalars(j=444, x=1, y=2)
    df2 = vaex.from_scalars(k=555, x=1)
    # df2['x'] = df2.apply(lambda y: y-1, arguments=[df2.y])
    df2['z'] = df2.apply(lambda x: x + 10, arguments=[df1.x])
    df = df1.join(df2, on='x')
    assert 'lambda_function' in df.get_names()
    assert df.x.tolist() == [1]
    assert df.y.tolist() == [2]
    assert df.z.tolist() == [11]
    assert df.j.tolist() == [444]
    assert df.k.tolist() == [555]
Ejemplo n.º 8
0
def test_propagate_uncertainty():
    ds = vaex.from_scalars(x=1, y=2, e_x=2, e_y=4)
    ds['r'] = ds.x + ds.y
    ds.propagate_uncertainties([ds.r])
    print(ds.r_uncertainty.expression)
    assert ds.r_uncertainty.expand(
    ).expression == 'sqrt(((e_x ** 2) + (e_y ** 2)))'
Ejemplo n.º 9
0
def test_expression_expand():
    ds = vaex.from_scalars(x=1, y=2)

    ds['g'] = ds.x
    assert ds.g.expression == 'g'
    assert ds.g.variables() == {'x'}
    # TODO: this doesn't work, because outself and include_virtual contradict eachother
    # but we don't use this interally
    # assert ds.g.variables(ourself=True, include_virtual=False) == {'g', 'x'}

    ds['r'] = ds.x * ds.y
    assert ds.r.expression == 'r'
    assert ds.r.variables() == {'x', 'y'}
    assert ds.r.variables(ourself=True,
                          include_virtual=False) == {'r', 'x', 'y'}
    ds['s'] = ds.r + ds.x
    assert ds.s.variables() == {'r', 'x', 'y'}
    assert ds.s.variables(ourself=True) == {'s', 'r', 'x', 'y'}
    assert ds.s.variables(include_virtual=False) == {'x', 'y'}
    assert ds.s.variables(ourself=True,
                          include_virtual=False) == {'s', 'x', 'y'}
    ds['t'] = ds.s + ds.y
    assert ds.t.variables() == {'s', 'r', 'x', 'y'}
    ds['u'] = np.arctan(ds.t)
    assert ds.u.variables() == {'t', 's', 'r', 'x', 'y'}
Ejemplo n.º 10
0
def test_join_virtual_columns(on):
    df1 = vaex.from_scalars(j=444, x=1, y=2)
    df1['z'] = df1.x + df1.y
    df1['__h'] = df1.z * 2
    df2 = vaex.from_scalars(j=444, x=2, yy=3)
    df2['z'] = df2.x + df2.yy
    df2['__h'] = df2.z * 3
    df = df1.join(df2, rprefix='r_', rsuffix='_rhs', on=on)
    assert df.x.values[0] == 1
    assert df.y.values[0] == 2
    assert df.z.values[0] == 3
    assert df.__h.values[0] == 6
    assert df.r_x_rhs.values[0] == 2
    assert df.yy.values[0] == 3
    assert df.r_z_rhs.values[0] == 5
    assert df.__r_h_rhs.values[0] == 15
Ejemplo n.º 11
0
def test_matrix():
    ds = vaex.from_scalars(x=1, y=0, z=0, x_e=0.1, y_e=0.2, z_e=0.3)
    matrix = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]
    ds.add_virtual_columns_matrix3d(ds.x, ds.y, ds.z, 'xn', 'yn', 'zy', matrix)
    ds.propagate_uncertainties([ds.xn])
    assert ds.xn.values[0] == ds.x.values[0]
    assert ds.xn_uncertainty.values[0] == ds.x_e.values[0]

    ds = vaex.from_scalars(x=1, y=0, z=0, x_e=0.1, y_e=0.2, z_e=0.3)
    matrix = [[0, 1, 0], [1, 0, 0], [0, 0, 1]]
    ds.add_virtual_columns_matrix3d(ds.x, ds.y, ds.z, 'xn', 'yn', 'zy', matrix)
    ds.propagate_uncertainties([ds.xn, ds.yn])
    assert ds.xn.values[0] == ds.y.values[0]
    assert ds.xn_uncertainty.values[0] == ds.y_e.values[0]

    assert ds.yn.values[0] == ds.x.values[0]
    assert ds.yn_uncertainty.values[0] == ds.x_e.values[0]
def test_matrix():
	ds = vaex.from_scalars(x=1, y=0, z=0, x_e=0.1, y_e=0.2, z_e=0.3)
	matrix = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]
	ds.add_virtual_columns_matrix3d(ds.x, ds.y, ds.z, 'xn', 'yn', 'zy', matrix)
	ds.propagate_uncertainties([ds.xn])
	assert ds.xn.values[0] == ds.x.values[0]
	assert ds.xn_uncertainty.values[0] == ds.x_e.values[0]

	ds = vaex.from_scalars(x=1, y=0, z=0, x_e=0.1, y_e=0.2, z_e=0.3)
	matrix = [[0, 1, 0], [1, 0, 0], [0, 0, 1]]
	ds.add_virtual_columns_matrix3d(ds.x, ds.y, ds.z, 'xn', 'yn', 'zy', matrix)
	ds.propagate_uncertainties([ds.xn, ds.yn])
	assert ds.xn.values[0] == ds.y.values[0]
	assert ds.xn_uncertainty.values[0] == ds.y_e.values[0]

	assert ds.yn.values[0] == ds.x.values[0]
	assert ds.yn_uncertainty.values[0] == ds.x_e.values[0]
Ejemplo n.º 13
0
def test_join_variables():
    df1 = vaex.from_scalars(j=444, x=1, y=2)
    df1.add_variable('a', 2)
    df1.add_variable('b', 3)
    df1['z'] = df1.x * df1['a'] + df1.y * df1['b']

    df2 = vaex.from_scalars(j=444, x=2, yy=3)
    df2.add_variable('a', 3)
    df2.add_variable('b', 4)
    df2['z'] = df2.x * df2['a'] + df2.yy * df2['b']
    df = df1.join(df2, rprefix='r_', rsuffix='_rhs')
    assert df.x.values[0] == 1
    assert df.y.values[0] == 2
    assert df.z.values[0] == 2 + 2 * 3
    # assert df.__h.values[0] == 6
    assert df.r_x_rhs.values[0] == 2
    assert df.yy.values[0] == 3
    assert df.r_z_rhs.values[0] == 2 * 3 + 3 * 4
Ejemplo n.º 14
0
def test_column_list_traitlets():
    df = vaex.from_scalars(x=1, y=2)
    df['z'] = df.x + df.y
    column_list = vt.ColumnsMixin(df=df)
    assert len(column_list.columns) == 3
    df['w'] = df.z * 2
    assert len(column_list.columns) == 4
    del df['w']
    assert len(column_list.columns) == 3
Ejemplo n.º 15
0
def test_expression_expand():
	ds = vaex.from_scalars(x=1, y=2)
	ds['r'] = ds.x * ds.y
	assert ds.r.expression == 'r'
	assert ds.r.expand().expression == '(x * y)'
	ds['s'] = ds.r + ds.x
	assert ds.s.expand().expression == '((x * y) + x)'
	ds['t'] = ds.s + ds.y
	assert ds.t.expand(stop=['r']).expression == '((r + x) + y)'
	ds['u'] = np.arctan2(ds.s, ds.y)
	assert ds.u.expand(stop=['r']).expression == 'arctan2((r + x), y)'
Ejemplo n.º 16
0
def test_expression_expand():
    ds = vaex.from_scalars(x=1, y=2)
    ds['r'] = ds.x * ds.y
    assert ds.r.expression == 'r'
    assert ds.r.variables() == {'x', 'y'}
    ds['s'] = ds.r + ds.x
    assert ds.s.variables() == {'x', 'y'}
    ds['t'] = ds.s + ds.y
    assert ds.t.variables() == {'x', 'y'}
    ds['u'] = np.arctan(ds.t)
    assert ds.u.variables() == {'x', 'y'}
Ejemplo n.º 17
0
def test_nested_use_of_executor():
    df = vaex.from_scalars(x=1, y=2)

    @vaex.delayed
    def next(x):
        # although the exector is still in its look, it's not using the threads anymore
        # so we should be able to use the executor again
        return x + df.y.sum()

    value = next(df.x.sum(delay=True))
    df.execute()
    assert value.get() == 1 + 2
Ejemplo n.º 18
0
 def dfs(alpha, delta, pm_a, pm_d, radians=radians):
     ds_1 = vaex.from_scalars(alpha=alpha, delta=delta, pm_a=pm_a, pm_d=pm_d, alpha_e=0.01, delta_e=0.02, pm_a_e=0.003, pm_d_e=0.004)
     ds_1 = ds_1.astro.pm_eq2gal("alpha", "delta", "pm_a", "pm_d", "pm_l", "pm_b", propagate_uncertainties=True, radians=radians)
     N = 100000
     # distance
     alpha =        np.random.normal(0, 0.01, N)  + alpha
     delta =        np.random.normal(0, 0.02, N)  + delta
     pm_a =         np.random.normal(0, 0.003, N)  + pm_a
     pm_d =         np.random.normal(0, 0.004, N)  + pm_d
     ds_many = vaex.from_arrays(alpha=alpha, delta=delta, pm_a=pm_a, pm_d=pm_d)
     ds_many.astro.pm_eq2gal("alpha", "delta", "pm_a", "pm_d", "pm_l", "pm_b", radians=radians, inplace=True)
     return ds_1, ds_many
Ejemplo n.º 19
0
def test_expression_expand():
    ds = vaex.from_scalars(x=1, y=2)
    ds['r'] = ds.x * ds.y
    assert ds.r.expression == 'r'
    assert ds.r.variables() == {'x', 'y'}
    ds['s'] = ds.r + ds.x
    assert ds.s.variables() == {'r', 'x', 'y'}
    assert ds.s.variables(ourself=True) == {'s', 'r', 'x', 'y'}
    assert ds.s.variables(include_virtual=False) == {'x', 'y'}
    assert ds.s.variables(ourself=True, include_virtual=False) == {'s', 'x', 'y'}
    ds['t'] = ds.s + ds.y
    assert ds.t.variables() == {'s', 'r', 'x', 'y'}
    ds['u'] = np.arctan(ds.t)
    assert ds.u.variables() == {'t', 's', 'r', 'x', 'y'}
Ejemplo n.º 20
0
def test_virtual_columns_equatorial():
    df = vaex.from_scalars(alpha=0, delta=0, distance=1)

    df.add_virtual_columns_equatorial_to_galactic_cartesian("alpha",
                                                            "delta",
                                                            "distance",
                                                            "x",
                                                            "y",
                                                            "z",
                                                            radians=False)
    df.add_virtual_column("r", "sqrt(x**2+y**2+z**2)")

    x = df['x'].values[0]
    y = df['y'].values[0]
    z = df['z'].values[0]
    assert x**2 + y**2 + z**2 == 1
    assert df['r'].values[0] == 1
Ejemplo n.º 21
0
def test_selection_toggle_list():
    df = vaex.from_scalars(x=1)
    widget = vaex.jupyter.widgets.SelectionToggleList(df=df)
    assert widget.selection_names == []
    assert widget.value == []
    df.select('x > 0')
    assert widget.selection_names == ['default']
    assert widget.value == []
    widget.value = ['default']
    df.select('x < 0', name='neg')
    assert widget.selection_names == ['default', 'neg']
    assert widget.value == ['default']
    df.select_nothing('default')
    assert widget.selection_names == ['neg']
    assert widget.value == []
    df.select('x > 0')
    assert widget.selection_names == ['default', 'neg']
    assert widget.value == []

    widget.value = ['default', 'neg']
    df.select_nothing('default')
    assert widget.value == ['neg']
    df.select_nothing('neg')
    assert widget.value == []
Ejemplo n.º 22
0
def test_eq2gal():
    df = vaex.from_scalars(ra=1, dec=2)
    df = df.astro.eq2gal()
    assert df.l.tolist() != 1
    assert df.b.tolist() != 2
def test_invert():
    df = vaex.from_scalars(x=1, y=2)
    df['r'] = ~(df.x > df.y)
    df.r.expand().expression == '~(x > y)'
def test_propagate_uncertainty():
	ds = vaex.from_scalars(x=1, y=2, e_x=2, e_y=4)
	ds['r'] = ds.x +  ds.y
	ds.propagate_uncertainties([ds.r])
	print(ds.r_uncertainty.expression)
	assert ds.r_uncertainty.expand().expression == 'sqrt(((e_x ** 2) + (e_y ** 2)))'
Ejemplo n.º 25
0
def test_open_nonstandard_extension(tmpdir):
    df = vaex.from_scalars(x=1, s='Hello')
    df.export_hdf5(tmpdir / 'this_is_hdf5.xyz')
    df = vaex.open(tmpdir / 'this_is_hdf5.xyz')
    assert df.x.tolist() == [1]
    assert df.s.tolist() == ['Hello']
Ejemplo n.º 26
0
def compute_flow_data(days, hours, zone):
    logger.info("Compute: flow data: days=%r hours=%r zone=%r", days, hours,
                zone)
    df, selection = create_selection(days, hours)
    df.select(df.pickup_zone == zone, mode='and')
    selection = True
    df_flow_zone = df.groupby(
        [df.pickup_zone, df.dropoff_zone],
        agg={'count_trips': vaex.agg.count(selection=selection)})
    # sort descending so we can take the top N
    df_flow_zone = df_flow_zone.sort('count_trips', ascending=False)

    df_flow_zone['pickup_borough'] = df_flow_zone.pickup_zone.map(
        zone_index_to_borough_index)
    df_flow_zone['dropoff_borough'] = df_flow_zone.dropoff_zone.map(
        zone_index_to_borough_index)

    pickup_zone = zone
    pickup_borough = zone_index_to_borough_index[pickup_zone]

    # Now to include the total count of all trips for zones that are not the top N
    # only trips leaving from this zone and to a different borough
    df_outflow_zone = df_flow_zone[(df_flow_zone.pickup_zone == pickup_zone)]
    df_outflow_zone = df_outflow_zone[
        df_outflow_zone.dropoff_borough != pickup_borough]

    df_outflows_top = []
    df_outflows_rest = []

    for dropoff_borough in range(6):
        if dropoff_borough == pickup_borough:
            continue
        # outflow from this zone, to a particular borough
        df_outflow_zone_borough = df_outflow_zone[
            df_outflow_zone.dropoff_borough == dropoff_borough]
        if len(df_outflow_zone_borough):
            n_max = min(len(df_outflow_zone_borough), n_largest)
            # top N zones of outflow from this zone, to a particular borough
            df_outflows_top.append(df_outflow_zone_borough[:n_max])

            if len(df_outflow_zone_borough) > n_largest:
                count_other = df_outflow_zone_borough[n_largest:][
                    'count_trips'].sum()

                # rest of the outflow from this zone, to a particular borough
                df_outflows_rest.append(
                    vaex.from_scalars(pickup_borough=pickup_borough,
                                      dropoff_borough=dropoff_borough,
                                      dropoff_zone=len(zone_index_to_name) +
                                      dropoff_borough,
                                      count_trips=count_other))

    df_outflow_top = vaex.concat(df_outflows_top)
    df_outflow_borough = df_outflow_zone.groupby(
        ['pickup_borough', 'dropoff_borough'],
        agg={'count_trips': vaex.agg.sum('count_trips')})
    if df_outflows_rest:
        df_outflow_rest = vaex.concat(df_outflows_rest)
    else:
        # create an empy dataframe with the same schema to make the rest of the code simpler
        df_outflow_rest = vaex.from_scalars(pickup_borough=-1,
                                            dropoff_borough=-1,
                                            dropoff_zone=-1,
                                            count_trips=-1)[:0]

    # return as dict and lists so it can be serialized by the memoize decorator
    flow_data = dict(
        outflow_top=df_outflow_top.to_dict(array_type='list'),
        outflow_rest=df_outflow_rest.to_dict(array_type='list'),
        outflow_borough=df_outflow_borough.to_dict(array_type='list'))
    return flow_data