Ejemplo n.º 1
0
	def test_add_dataset(self):
		app = vx.ui.main.VaexApp()
		ds = vx.example()
		app.dataset_selector.add(ds)
		self.assert_(not app.dataset_selector.is_empty())
		self.assertEqual(int(app.dataset_panel.label_length.text().replace(",", "")), len(ds))
		self.assertEqual(ds, app.current_dataset)
Ejemplo n.º 2
0
def test_column_file():
    path = vaex.example().dataset.path
    df = vaex.open(path, nommap=True)
    x = df.dataset._columns['x']
    assert isinstance(x, vaex.file.column.ColumnFile)
    df = vaex.from_arrays(x=x)  # will trigger fingerprint
    x.fingerprint()  # just to be sure
Ejemplo n.º 3
0
def test_widget_counter_selection(flush_guard):
    df = vaex.example()
    c = df.widget.counter_selection('test', lazy=True)
    assert c.value == 0
    # we don't want to *cause* an execution
    df.select(df.x > 0, name='test')
    assert c.value == 0
    # flush()
    # but will update when once happens
    df.sum(df.x)
    count_pos = df.count(selection='test')
    assert c.value == count_pos

    df.select(df.x < 0, name='test')
    assert c.value == count_pos
    df.sum(df.x)
    count_neg = df.count(selection='test')
    assert c.value == count_neg

    # existing selection, and non-lazy
    c = df.widget.counter_selection('test')
    assert c.value == count_neg
    df.select(df.x > 0, name='test')
    assert c.value == count_pos
    df.select(df.x < 0, name='test')
    assert c.value == count_neg
    flush(all=True)
Ejemplo n.º 4
0
def test_widget_selection(flush_guard):
    df = vaex.example()
    with pytest.raises(ValueError) as e:
        selection_widget_default = df.widget.selection_expression()
    assert "'default'" in str(e.value)

    counts = {'default': 0, 'pos': 0}

    @df.signal_selection_changed.connect
    def update(df, name):
        nonlocal counts
        counts[name] += 1

    count_pos = df.count(selection=df.x > 0)

    df.select(df.x > 0)
    selection_widget_default = df.widget.selection_expression()
    assert selection_widget_default.value.expression == '(x > 0)'

    selection_widget = df.widget.selection_expression(df.x > 0, name='pos')
    assert selection_widget_default.value.expression == '(x > 0)'
    # selection_widget = df.widget.selection(df.x > 0, name='pos')
    assert counts == {'default': 2, 'pos': 1}
    assert df.count(selection='pos') == count_pos
    selection_widget.v_model = 'x < 0'
    assert selection_widget.error_messages is None
    assert counts == {'default': 2, 'pos': 2}
    flush(all=True)
Ejemplo n.º 5
0
def test_widget_heatmap(flush_guard):
    df = vaex.example()
    df.select_rectangle('x', 'y', [[-10, 10], [-50, 50]], name='check')
    check_rectangle = df.count(selection='check')

    df.select(df.x > 0)
    check_positive = df.count(selection=True)
    heatmap = df.widget.heatmap('x', 'y', selection=[None, True])

    flush()
    assert heatmap.model.grid[1].sum().item() == check_positive - 1
    toolbar = heatmap.toolbar
    toolbar.interact_value = "pan-zoom"
    assert isinstance(heatmap.plot.figure.interaction,
                      bqplot.interacts.PanZoom)
    toolbar.interact_value = "select-rect"
    assert isinstance(heatmap.plot.figure.interaction,
                      bqplot.interacts.BrushSelector)

    heatmap.plot.figure.interaction.selected_x = [-10, 10]
    heatmap.plot.figure.interaction.selected_y = [-50, 50]
    assert heatmap.model.grid.shape[0] == 2
    flush()
    assert heatmap.model.grid[1].sum().item() == check_rectangle

    toolbar.interact_value = "doesnotexit"
    assert heatmap.plot.figure.interaction is None

    # vizdata = heatmap.plot.mark.image.tolist()
    # heatmap.model.x_slice = 10
    # assert heatmap.plot.mark.image.tolist() != vizdata
    vizdata = heatmap.plot.mark.image.value
    heatmap.model.x.max = 10
    flush(all=True)
    assert heatmap.plot.mark.image.value != vizdata, "image should change"
Ejemplo n.º 6
0
def test_expression():
    df = vaex.example()
    expression = df.widget.expression()
    assert expression.value is None
    expression.value = 'x'
    assert expression.value.expression == 'x'
    assert expression.valid
    assert expression.error_messages is None
    assert "good" in expression.success_messages
    flush(all=True)
    assert expression.error_messages is None
    assert expression.success_messages is None

    expression.v_model = 'x+'
    assert not expression.valid
    assert expression.error_messages is not None
    assert expression.success_messages is None
    flush()
    assert expression.error_messages is not None
    assert expression.success_messages is None

    expression = df.widget.expression(df.y)
    assert expression.value == 'y'

    axis = vaex.jupyter.model.Axis(df=df, expression=df.x + 2)
    expression = df.widget.expression(axis)
    assert str(expression.value) == '(x + 2)'
    axis.expression = df.x + 3
    assert str(expression.value) == '(x + 3)'
Ejemplo n.º 7
0
def test_sklearn_incremental_predictor_serialize(tmpdir):
    df = vaex.example()
    df_train, df_test = df.ml.train_test_split(test_size=0.1, verbose=False)

    features = df_train.column_names[:6]
    target = 'FeH'

    incremental = IncrementalPredictor(model=SGDRegressor(),
                                       features=features,
                                       batch_size=10 * 1000,
                                       num_epochs=5,
                                       shuffle=True,
                                       prediction_name='pred')
    incremental.fit(df=df_train, target=target)
    df_train = incremental.transform(df_train)

    # State transfer - serialization
    df_train.state_write(str(tmpdir.join('test.json')))
    df_test.state_load(str(tmpdir.join('test.json')))

    assert df_train.column_count() == df_test.column_count()
    assert df_test.pred.values.shape == (33000, )

    pred_in_memory = incremental.predict(df_test)
    np.testing.assert_array_almost_equal(pred_in_memory,
                                         df_test.pred.values,
                                         decimal=1)
Ejemplo n.º 8
0
def test_cache_hash():
    # TODO: what if the directory is not writable?
    # ds1 = dataset.DatasetArrays(x=x, y=y)
    path_data = HERE / 'data' / 'test.hdf5'
    if path_data.exists():
        path_data.unlink()
    path_hashes = HERE / 'data' / 'test.hdf5.d' / 'hashes.yaml'
    if path_hashes.exists():
        path_hashes.unlink()

    df = vaex.example()[:10]
    df.export(str(path_data))
    df2 = vaex.open(str(path_data))
    assert df2.dataset._hash_calculations == 0
    assert not path_hashes.exists()
    df2 = df2.hashed()
    assert df2.dataset._hash_calculations > 0
    assert path_hashes.exists()

    # and pickling
    ds = df2.dataset
    ds2 = rebuild(ds)
    assert ds2._hash_calculations == 0
    assert ds == ds2

    df3 = vaex.open(str(path_data))
    ds3 = df3.dataset
    assert ds3._hash_calculations == 0
    assert ds3 == ds2
Ejemplo n.º 9
0
def test_sklearn_incremental_predictor_partial_fit_calls(
        batch_size, num_epochs):
    df = vaex.example()
    df_train, df_test = df.ml.train_test_split(test_size=0.1, verbose=False)

    features = df_train.column_names[:6]
    target = 'FeH'

    N_total = len(df_train)
    num_batches = (N_total + batch_size - 1) // batch_size

    # Create a mock model for counting the number of samples seen and partial_fit calls
    class MockModel():
        def __init__(self):
            self.n_samples_ = 0
            self.n_partial_fit_calls_ = 0

        def partial_fit(self, X, y):
            self.n_samples_ += X.shape[0]
            self.n_partial_fit_calls_ += 1

    incremental = IncrementalPredictor(model=MockModel(),
                                       features=features,
                                       batch_size=batch_size,
                                       num_epochs=num_epochs,
                                       shuffle=False,
                                       prediction_name='pred')

    incremental.fit(df=df_train, target=target)
    assert incremental.model.n_samples_ == N_total * num_epochs
    assert incremental.model.n_partial_fit_calls_ == num_batches * num_epochs
Ejemplo n.º 10
0
def test_plot_widget_bqplot():
    # basic coverage for now
    df = vaex.example()
    df.plot_widget(df.x, df.y)
    df.plot_widget(df.x.astype('float32'), df.y.astype('float32'))
    df.plot_widget(df.x.astype('float32'),
                   df.y.astype('float32'),
                   limits='minmax')
Ejemplo n.º 11
0
    def test_df_apply(self):
        df = vaex.example()

        def func(x, y):
            return (x + y) / (x - y)

        apply_func = df.apply(func, arguments=[df.x, df.y])
        print(apply_func)
Ejemplo n.º 12
0
def test_data_array_view(flush_guard):
    df = vaex.example()

    x = vaex.jupyter.model.Axis(df=df, expression='x')
    y = vaex.jupyter.model.Axis(df=df, expression='y')
    view = df.widget.data_array(axes=[x, y])
    flush(all=True)
    assert view.model.grid is not None
Ejemplo n.º 13
0
def test_observe_expression():
    call_counter = MagicMock()
    df = vaex.example()
    w = SomeWidget(df=df, expression=df.x + 1)
    w.observe(call_counter, 'expression')
    call_counter.assert_not_called()
    w.expression = '(x + 2)'
    call_counter.assert_called_once()
Ejemplo n.º 14
0
Archivo: ui.py Proyecto: yokeldd/vaex
 def test_add_dataset(self):
     app = vx.ui.main.VaexApp()
     ds = vx.example()
     app.dataset_selector.add(ds)
     self.assert_(not app.dataset_selector.is_empty())
     self.assertEqual(
         int(app.dataset_panel.label_length.text().replace(",", "")),
         len(ds))
     self.assertEqual(ds, app.current_dataset)
Ejemplo n.º 15
0
def test_to_json():
    df = vaex.example()
    w = SomeWidget(df=df, expression=df.x + 1)
    state = w.get_state()
    assert state['expression'] == '(x + 1)'
    state = state.copy()
    state['expression'] = '(x + 2)'
    w.set_state(state)
    w.expression.expression == '(x + 2)'
Ejemplo n.º 16
0
def test_validate_expression():
    df = vaex.example()
    w = SomeWidget(df=df, expression=df.x + 1)
    w.expression = '(x + 2)'
    assert w.expression.expression == '(x + 2)'
    with pytest.raises(SyntaxError):
        w.expression = 'x + '
    with pytest.raises(NameError):
        w.expression = 'x2 + 1'
    assert w.expression.expression == '(x + 2)'
Ejemplo n.º 17
0
    def test_df_evaluate(self):
        df = vaex.example()

        def func(x, y):
            return (x + y) / (x - y)

        apply_func = df.apply(func, arguments=[df.x, df.y])

        df['new_col'] = df.evaluate(apply_func)
        print(df.min(df['new_col']))
        print(df.mean(df['new_col']))
        print(df.max(df['new_col']))
Ejemplo n.º 18
0
def test_hashable():
    # tests if we can use datasets as keys of dicts
    x = np.arange(10)
    y = x**2
    ds1 = dataset.DatasetArrays(x=x, y=y).hashed()
    df = vaex.example()
    some_dict = {ds1: '1', df.dataset: '2'}
    assert some_dict[ds1] == '1'
    assert some_dict[df.dataset] == '2'

    assert some_dict[rebuild(ds1)] == '1'
    assert some_dict[rebuild(df.dataset)] == '2'
Ejemplo n.º 19
0
def test_column():
    df = vaex.example()
    column = df.widget.column()
    assert column.value is None
    column = df.widget.column(df.y)
    assert column.value == 'y'

    axis = vaex.jupyter.model.Axis(df=df, expression=df.x)
    column_widget = df.widget.column(axis)
    assert str(column_widget.value) == 'x'
    axis.expression = df.y
    assert str(column_widget.value) == 'y'
Ejemplo n.º 20
0
	def test_table(self):
		ds = vx.example()
		self.app.dataset_selector.add(ds)
		self.dataset.set_current_row(3)
		table = self.app.dataset_panel.tableview()
		self.dataset.set_current_row(0)
		model = table.tableModel.createIndex(1, 1)
		table.tableView.pressed.emit(model)
		self.assertEqual(ds.get_current_row(), 1)
		QtTest.QTest.qWait(100) # make sure it gets rendered (is this the good way?)
		#QtTest.QTest.mouseClick(table.count_from_zero, QtCore.Qt.LeftButton)
		table.count_from_zero.setCheckState(QtCore.Qt.Checked)
		QtTest.QTest.qWait(10000) # make sure it gets rendered (is this the good way?)
Ejemplo n.º 21
0
def test_column_names(df_arrow):
    ds = df_arrow
    columns_names = ds.get_column_names(virtual=True)
    ds['__x2'] = ds.x
    assert columns_names == ds.get_column_names(virtual=True)
    assert '__x2' in ds.get_column_names(virtual=True, hidden=True)

    ds = vaex.example()
    ds['__x'] = ds['x'] + 1
    assert 'FeH' in ds.get_column_names(regex='e*')
    assert 'FeH' not in ds.get_column_names(regex='e')
    assert '__x' not in ds.get_column_names(regex='__x')
    assert '__x' in ds.get_column_names(regex='__x', hidden=True)
Ejemplo n.º 22
0
def test_correlation():
    df = vaex.example()

    # A single column pair
    xy = yx = df.correlation('x', 'y')
    xy_expected = np.corrcoef(df.x.values, df.y.values)[0, 1]
    np.testing.assert_array_almost_equal(xy, xy_expected, decimal=5)

    np.testing.assert_array_almost_equal(df.correlation('x', 'y'),
                                         df.correlation('y', 'x'))

    xx = df.correlation('x', 'x')
    yy = df.correlation('y', 'y')
    zz = df.correlation('z', 'z')

    zx = xz = df.correlation('x', 'z')
    zy = yz = df.correlation('y', 'z')

    # A list of columns
    result = df.correlation(x=['x', 'y', 'z'])
    expected3 = expected = np.array(([xx, xy, xz], [yx, yy, yz], [zx, zy, zz]))
    np.testing.assert_array_almost_equal(result, expected)

    # A list of columns and a single target
    desired = df.correlation(x=['x', 'y', 'z'], y='z')
    expected = np.array([xz, yz, zz])
    np.testing.assert_array_almost_equal(desired, expected)

    result = df.correlation(x=['x', 'y', 'z'], y=['y', 'z'])
    assert result.shape == (3, 2)
    expected = np.array(([xy, xz], [yy, yz], [zy, zz]))
    np.testing.assert_array_almost_equal(result, expected)

    result = df.correlation(x=['x', 'y', 'z'], y=['y', 'z'])

    result = df.correlation(['x', 'y'], binby='x', shape=4, limits=[-2, 2])
    result0 = df.correlation(['x', 'y'], selection=(df.x >= -2) & (df.x < -1))
    np.testing.assert_array_almost_equal(result[0], result0)

    xar = df.correlation(['x', 'y', 'z'], array_type='xarray')
    np.testing.assert_array_almost_equal(xar.data, expected3)
    assert xar.dims == ("x", "y")
    assert xar.coords['x'].data.tolist() == ['x', 'y', 'z']
    assert xar.coords['y'].data.tolist() == ['x', 'y', 'z']

    dfc = df.correlation([('x', 'y'), ('x', 'z'), ('y', 'z')])
    assert len(dfc) == 3
    assert dfc['x'].tolist() == ['x', 'x', 'y']
    assert dfc['y'].tolist() == ['y', 'z', 'z']
    np.testing.assert_array_almost_equal(dfc['correlation'].tolist(),
                                         [xy, xz, yz])
Ejemplo n.º 23
0
def webserver(request, webserver_fastapi, webserver_tornado, df_server,
              df_server_huge):
    webserver = locals()[request.param]
    df_example = vaex.example()
    df = df_server.copy()
    df = df.materialize('z')  # in the fastapi we drop the state
    df.drop('obj', inplace=True)
    df.drop('datetime', inplace=True)
    df.drop('timedelta', inplace=True)

    df.name = 'test'
    df_example.name = 'example'
    webserver.set_datasets([df, df_server_huge, df_example])
    return webserver
Ejemplo n.º 24
0
def test_column_names(ds_local):
	ds = ds_local
	columns_names = ds.get_column_names(virtual=True)
	ds['__x'] = ds.x
	assert columns_names == ds.get_column_names(virtual=True)
	assert '__x' in ds.get_column_names(virtual=True, hidden=True)
	assert len(columns_names) == len(ds.get_column_names(virtual=True, hidden=True))-1

	ds = vaex.example()
	ds['__x'] = ds['x'] + 1
	assert 'FeH' in ds.get_column_names(regex='e*')
	assert 'FeH' not in ds.get_column_names(regex='e')
	assert '__x' not in ds.get_column_names(regex='__x')
	assert '__x' in ds.get_column_names(regex='__x', hidden=True)
Ejemplo n.º 25
0
Archivo: ui.py Proyecto: yokeldd/vaex
 def test_table(self):
     ds = vx.example()
     self.app.dataset_selector.add(ds)
     self.dataset.set_current_row(3)
     table = self.app.dataset_panel.tableview()
     self.dataset.set_current_row(0)
     model = table.tableModel.createIndex(1, 1)
     table.tableView.pressed.emit(model)
     self.assertEqual(ds.get_current_row(), 1)
     QtTest.QTest.qWait(
         100)  # make sure it gets rendered (is this the good way?)
     #QtTest.QTest.mouseClick(table.count_from_zero, QtCore.Qt.LeftButton)
     table.count_from_zero.setCheckState(QtCore.Qt.Checked)
     QtTest.QTest.qWait(
         10000)  # make sure it gets rendered (is this the good way?)
Ejemplo n.º 26
0
def test_mutual_information():
    df = vaex.example()

    # A single pair
    xy = yx = df.mutual_information('x', 'y')
    expected = np.array(0.068934)
    np.testing.assert_array_almost_equal(xy, expected)

    np.testing.assert_array_almost_equal(df.mutual_information('y', 'x'), df.mutual_information('x', 'y'))

    xx = df.mutual_information('x', 'x')
    yy = df.mutual_information('y', 'y')
    zz = df.mutual_information('z', 'z')

    zx = xz = df.mutual_information('x', 'z')
    zy = yz = df.mutual_information('y', 'z')

    # A list of columns
    result = df.mutual_information(x=['x', 'y', 'z'])
    expected = np.array(([xx, xy, xz],
                         [yx, yy, yz],
                         [zx, zy, zz]))
    np.testing.assert_array_almost_equal(result, expected)

    # A list of columns and a single target
    result = df.mutual_information(x=['x', 'y', 'z'], y='z')
    expected = np.array([xz, yz, zz])
    np.testing.assert_array_almost_equal(result, expected)

    # A list of columns and targets
    result = df.mutual_information(x=['x', 'y', 'z'], y=['y', 'z'])
    assert result.shape == (3, 2)
    expected = np.array(([xy, xz],
                         [yy, yz],
                         [zy, zz]
                         ))
    np.testing.assert_array_almost_equal(result, expected)

    # a list of custom pairs
    result = df.mutual_information(x=[['x', 'y'], ['x', 'z'], ['y', 'z']])
    assert result.shape == (3,)
    expected = np.array([xy, xz, yz])
    np.testing.assert_array_almost_equal(result, expected)


    result = df.mutual_information(x=['x', 'y'], dimension=3, mi_shape=4)
    assert result.shape == (2, 2, 2)
Ejemplo n.º 27
0
def test_percentile_1d():
    x = np.array([0, 0, 10, 100, 200])
    df = vaex.from_arrays(x=x)
    median = df.median_approx(df.x)
    assert median < 10.

    x = np.array([0, 0, 90, 100, 200])
    df = vaex.from_arrays(x=x)
    median = df.median_approx(df.x)
    assert median > 90.

    # coverage test
    df = vaex.example()
    df.percentile_approx('x',
                         percentage=80,
                         binby=df.z,
                         limits='minmax',
                         shape=100)
Ejemplo n.º 28
0
def test_accessor_nested():
    df = vaex.example()
    vaex._add_lazy_accessor('spam.egg', lambda: Egg)
    with pytest.raises(expected_exception=AttributeError):
        a = df.spam
    vaex._add_lazy_accessor('spam.egg.foo', lambda: Foo)
    with pytest.raises(expected_exception=AttributeError):
        a = df.spam
    vaex._add_lazy_accessor('spam', lambda: Spam)
    assert df.spam is df.spam
    assert df.spam.df is df
    assert isinstance(df.spam, Spam)

    assert df.spam.egg is df.spam.egg
    assert df.spam.egg.spam is df.spam
    assert isinstance(df.spam.egg, Egg)

    assert df.spam.egg.foo is df.spam.egg.foo
    assert df.spam.egg.foo.df is df.spam.egg  # abuse of foo
    assert isinstance(df.spam.egg.foo, Foo)
Ejemplo n.º 29
0
def main(argv):

    parser = argparse.ArgumentParser(argv[0])
    parser.add_argument("filename", help="filename for dataset", nargs='*')
    parser.add_argument("--address", help="address to bind the server to (default: %(default)s)", default="0.0.0.0")
    parser.add_argument("--port", help="port to listen on (default: %(default)s)", type=int, default=9000)
    parser.add_argument('--verbose', '-v', action='count', default=2)
    parser.add_argument('--cache', help="cache size in bytes for requests, set to zero to disable (default: %(default)s)", type=int, default=500000000)
    parser.add_argument('--compress', help="compress larger replies (default: %(default)s)", default=True, action='store_true')
    parser.add_argument('--no-compress', dest="compress", action='store_false')
    parser.add_argument('--development', default=False, action='store_true', help="enable development features (auto reloading)")
    parser.add_argument('--threads-per-job', default=4, type=int, help="threads per job (default: %(default)s)")
    # config = layeredconfig.LayeredConfig(defaults, env, layeredconfig.Commandline(parser=parser, commandline=argv[1:]))
    config = parser.parse_args(argv[1:])

    verbosity = ["ERROR", "WARNING", "INFO", "DEBUG"]
    logging.getLogger("vaex").setLevel(verbosity[config.verbose])
    # import vaex
    # vaex.set_log_level_debug()
    from vaex.settings import webserver as settings

    # filenames = config.filenames
    filenames = []
    filenames = config.filename
    datasets = []
    for filename in filenames:
        ds = vx.open(filename)
        if ds is None:
            print("error opening file: %r" % filename)
        else:
            datasets.append(ds)
    datasets = datasets or [vx.example()]
    # datasets = [ds for ds in datasets if ds is not None]
    logger.info("datasets:")
    for dataset in datasets:
        logger.info("\thttp://%s:%d/%s or ws://%s:%d/%s", config.address, config.port, dataset.name, config.address, config.port, dataset.name)
    server = WebServer(datasets=datasets, address=config.address, port=config.port, cache_byte_size=config.cache,
                       compress=config.compress, development=config.development,
                       threads_per_job=config.threads_per_job)
    server.serve()
Ejemplo n.º 30
0
def test_widget_histogram(flush_guard, no_vaex_cache):
    df = vaex.example()
    assert df.widget is df.widget
    df.select_box(['x'], [[-10, 20]], name='check')

    check_range = df.count(selection='check')

    df.select(df.x > 0)
    check_positive = df.count(selection='default')
    histogram = df.widget.histogram('x',
                                    selection=[None, "default"],
                                    toolbar=True)

    flush()
    assert histogram.model.grid[1].sum(
    ) == check_positive  # for some reason, because 'x' it float32, we don't need -1
    toolbar = histogram.toolbar
    toolbar.interact_value = "pan-zoom"
    assert isinstance(histogram.plot.figure.interaction,
                      bqplot.interacts.PanZoom)
    toolbar.interact_value = "select-x"
    assert isinstance(histogram.plot.figure.interaction,
                      bqplot.interacts.BrushIntervalSelector)

    histogram.plot.figure.interaction.selected = [-10, 20]
    flush(all=True)
    assert histogram.model.grid.shape[0] == 2
    assert histogram.model.grid[1].sum() == check_range

    toolbar.interact_value = "doesnotexit"
    assert histogram.plot.figure.interaction is None
    # coverage
    histogram.plot.highlight(0)
    histogram.plot.highlight(None)

    vizdata = histogram.plot.mark.y.tolist()
    histogram.model.x_slice = 10
    assert histogram.plot.mark.y.tolist() == vizdata
    histogram.dimension_groups = 'slice'
    assert histogram.plot.mark.y.tolist() != vizdata
Ejemplo n.º 31
0
def main(argv):

	parser = argparse.ArgumentParser(argv[0])
	parser.add_argument("filename", help="filename for dataset", nargs='*')
	parser.add_argument("--address", help="address to bind the server to (default: %(default)s)", default="0.0.0.0")
	parser.add_argument("--port", help="port to listen on (default: %(default)s)", type=int, default=9000)
	parser.add_argument('--verbose', '-v', action='count', default=2)
	parser.add_argument('--cache', help="cache size in bytes for requests, set to zero to disable (default: %(default)s)", type=int, default=500000000)
	parser.add_argument('--compress', help="compress larger replies (default: %(default)s)", default=True, action='store_true')
	parser.add_argument('--no-compress', dest="compress", action='store_false')
	parser.add_argument('--development', default=False, action='store_true', help="enable development features (auto reloading)")
	parser.add_argument('--threads-per-job', default=4, type=int, help="threads per job (default: %(default)s)")
	#config = layeredconfig.LayeredConfig(defaults, env, layeredconfig.Commandline(parser=parser, commandline=argv[1:]))
	config = parser.parse_args(argv[1:])

	verbosity = ["ERROR", "WARNING", "INFO", "DEBUG"]
	logging.getLogger("vaex").setLevel(verbosity[config.verbose])
	#import vaex
	#vaex.set_log_level_debug()
	from vaex.settings import webserver as settings

	#filenames = config.filenames
	filenames = []
	filenames = config.filename
	datasets = []
	for filename in filenames:
		ds = vx.open(filename)
		if ds is None:
			print("error opening file: %r" % filename)
		else:
			datasets.append(ds)
	datasets = datasets or [vx.example()]
	#datasets = [ds for ds in datasets if ds is not None]
	logger.info("datasets:")
	for dataset in datasets:
		logger.info("\thttp://%s:%d/%s or ws://%s:%d/%s", config.address, config.port, dataset.name, config.address, config.port, dataset.name)
	server = WebServer(datasets=datasets, address=config.address, port=config.port, cache_byte_size=config.cache,
					   compress=config.compress, development=config.development,
					   threads_per_job=config.threads_per_job)
	server.serve()
Ejemplo n.º 32
0
def test_percentile_approx():
    df = vaex.example()
    # Simple test
    percentile = df.percentile_approx('z', percentage=99)
    expected_result = 15.1739
    np.testing.assert_almost_equal(percentile, expected_result, decimal=1)

    # Test for multiple percentages
    percentiles = df.percentile_approx('x',
                                       percentage=[0, 25, 50, 75, 100],
                                       percentile_shape=65536)
    expected_result = [-78.133026, -3.5992, -0.0367, 3.4684, 130.49751]
    np.testing.assert_array_almost_equal(percentiles,
                                         expected_result,
                                         decimal=1)

    # Test for multiple expressions
    percentiles_2d = df.percentile_approx(['x', 'y'], percentage=[33, 66])
    expected_result = np.array(([-2.3310, 1.9540], [-2.4313, 2.1021]))
    np.testing.assert_array_almost_equal(percentiles_2d,
                                         expected_result,
                                         decimal=1)
Ejemplo n.º 33
0
	def setUp(self):
		self.dataset = vx.example()
Ejemplo n.º 34
0
                        task._result = task.reduce(task._results)
                        task.fulfill(task._result)
                        # remove references
                    task._result = None
                    task._results = None
                self.signal_end.emit()
                # if new tasks were added as a result of this, execute them immediately
                # TODO: we may want to include infinite recursion protection
                self._is_executing = False
                if len(self.task_queue) > 0:
                    logger.debug("task queue not empty.. start over!")
                    self.execute()
        finally:
            self._is_executing = False


if __name__ == "__main__":
    import vaex
    import sys
    vaex.set_log_level_debug()
    server = vaex.server(sys.argv[1], port=int(sys.argv[2]))
    datasets = server.datasets()
    print(datasets)
    dataset = datasets[0]
    dataset = vaex.example()
    print(dataset("x").minmax())
    dataset.select("x < 0")
    print(dataset.selected_length(), len(dataset))
    print(dataset("x").selected().is_masked)
    print(dataset("x").selected().minmax())
Ejemplo n.º 35
0
                        task._result = task.reduce(task._results)
                        task.fulfill(task._result)
                        # remove references
                    task._result = None
                    task._results = None
                self.signal_end.emit()
                # if new tasks were added as a result of this, execute them immediately
                # TODO: we may want to include infinite recursion protection
                self._is_executing = False
                if len(self.task_queue) > 0:
                    logger.debug("task queue not empty.. start over!")
                    self.execute()
        finally:
            self._is_executing = False


if __name__ == "__main__":
    import vaex
    import sys
    vaex.set_log_level_debug()
    server = vaex.server(sys.argv[1], port=int(sys.argv[2]))
    datasets = server.datasets()
    print(datasets)
    dataset = datasets[0]
    dataset = vaex.example()
    print(dataset("x").minmax())
    dataset.select("x < 0")
    print(dataset.selected_length(), len(dataset))
    print(dataset("x").selected().is_masked)
    print(dataset("x").selected().minmax())