def test__stream_good_dict_of_index_and_series_data_transformed(self, pd): df = pd.DataFrame(index=pd.date_range('now', periods=30, freq='T'), columns=['A'], data=np.cumsum(np.random.standard_normal(30), axis=0)) ds = ColumnDataSource(data={ 'index': convert_datetime_array(df.index.values), 'A': df.A }) ds._document = "doc" stuff = {} mock_setter = object() def mock(*args, **kw): stuff['args'] = args stuff['kw'] = kw ds.data._stream = mock new_df = pd.DataFrame(index=df.index + pd.to_timedelta('30m'), columns=df.columns, data=np.random.standard_normal(30)) ds._stream({'index': new_df.index, 'A': new_df.A}, "foo", mock_setter) assert np.array_equal(stuff['args'][2]['index'], convert_datetime_array(new_df.index.values)) assert np.array_equal(stuff['args'][2]['A'], new_df.A.values)
def test_stream_df_to_ds_created_from_df_default_index(self, pd): data = pd.DataFrame(dict(a=[10], b=[20], c=[30])) ds = ColumnDataSource(data) ds._document = "doc" notify_owners_stuff = {} def notify_owners_mock(*args, **kw): notify_owners_stuff['args'] = args notify_owners_stuff['kw'] = kw ds.data._notify_owners = notify_owners_mock stream_stuff = {} data_stream = ds.data._stream def stream_wrapper(*args, **kwargs): stream_stuff['args'] = args stream_stuff['kwargs'] = kwargs data_stream(*args, **kwargs) ds.data._stream = stream_wrapper ds._stream(pd.DataFrame(dict(a=[11, 12], b=[21, 22], c=[31, 32])), 7) assert len(stream_stuff['args']) == 5 expected_df = pd.DataFrame( dict(a=np.array([11, 12]), b=np.array([21, 22]), c=np.array([31, 32]))) expected_stream_data = expected_df.to_dict('series') expected_stream_data['index'] = expected_df.index.values expected_args = ("doc", ds, expected_stream_data, 7, None) for i, (arg, ex_arg) in enumerate(zip(stream_stuff['args'], expected_args)): if i == 2: for k, v in arg.items(): assert np.array_equal(v, ex_arg[k]) else: assert stream_stuff['args'][i] == expected_args[i] assert stream_stuff['kwargs'] == {} assert len(notify_owners_stuff['args']) == 1 self._assert_equal_dicts_of_arrays( notify_owners_stuff['args'][0], dict(a=np.array([10]), b=np.array([20]), c=np.array([30]), index=np.array([0]))) self._assert_equal_dicts_of_arrays( dict(ds.data), dict(a=np.array([10, 11, 12]), b=np.array([20, 21, 22]), c=np.array([30, 31, 32]), index=np.array([0, 0, 1])))
def test__graph_will_convert_dataframes_to_sources(pd): node_source = pd.DataFrame(data=dict(foo=[])) edge_source = pd.DataFrame(data=dict(start=[], end=[], bar=[])) kw = _graph(node_source, edge_source) # 'index' column is added from pandas df assert set(kw['node_renderer'].data_source.data.keys()) == {"index", "foo"} assert set(kw['edge_renderer'].data_source.data.keys()) == { "index", "start", "end", "bar" }
def test_property_matches_dicts_with_index_values(capsys, pd): p = pb.Property() d1 = pd.DataFrame(dict(foo=np.arange(10))) d2 = pd.DataFrame(dict(foo=np.arange(10))) assert p.matches(d1.index, d1.index) is True assert p.matches(d1.index, d2.index) is True # XXX not sure if this is preferable to have match, or not assert p.matches(d1.index, list(range(10))) is True assert p.matches(d1.index, np.arange(11)) is False assert p.matches(d1.index, np.arange(10) + 1) is False assert p.matches(d1.index, 10) is False out, err = capsys.readouterr() assert err == ""
def test__df_index_name_with_unnamed_multi_index(self, pd): arrays = [ np.array(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux']), np.array(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']) ] df = pd.DataFrame(np.random.randn(8, 4), index=arrays) assert df.index.names == [None, None] assert ColumnDataSource._df_index_name(df) == "index"
def test_glyph_label_is_value_if_column_not_in_df_datasource_is_added_as_legend( p, pd): source = pd.DataFrame( data=dict(x=[1, 2, 3], y=[1, 2, 3], label=['a', 'b', 'c'])) p.circle(x='x', y='y', legend='milk', source=source) legends = p.select(Legend) assert len(legends) == 1 assert legends[0].items[0].label == {'value': 'milk'}
def test_init_dataframe_data_kwarg(self, pd): data = dict(a=[1, 2], b=[2, 3]) df = pd.DataFrame(data) ds = ColumnDataSource(data=df) assert set(df.columns).issubset(set(ds.column_names)) for key in data.keys(): assert isinstance(ds.data[key], np.ndarray) assert list(df[key]) == list(ds.data[key]) assert isinstance(ds.data['index'], np.ndarray) assert [0, 1] == list(ds.data['index']) assert set(ds.column_names) - set(df.columns) == set(["index"])
def test_stream_dict_to_ds_created_from_df(self, pd): data = pd.DataFrame(dict(a=[10], b=[20], c=[30])).set_index('c') ds = ColumnDataSource(data) ds._document = "doc" notify_owners_stuff = {} def notify_owners_mock(*args, **kw): notify_owners_stuff['args'] = args notify_owners_stuff['kw'] = kw ds.data._notify_owners = notify_owners_mock stream_stuff = {} data_stream = ds.data._stream def stream_wrapper(*args, **kwargs): stream_stuff['args'] = args stream_stuff['kwargs'] = kwargs data_stream(*args, **kwargs) ds.data._stream = stream_wrapper ds._stream( dict(a=[11, 12], b=np.array([21, 22]), c=pd.Series([31, 32])), 7) assert len(stream_stuff['args']) == 5 expected_stream_args = ("doc", ds, dict(a=[11, 12], b=np.array([21, 22]), c=pd.Series([31, 32])), 7, None) for i, (arg, ex_arg) in enumerate( zip(stream_stuff['args'], expected_stream_args)): if i == 2: assert arg['a'] == ex_arg['a'] del arg['a'], ex_arg['a'] self._assert_equal_dicts_of_arrays(arg, ex_arg) else: assert arg == ex_arg assert stream_stuff['kwargs'] == {} assert len(notify_owners_stuff['args']) == 1 self._assert_equal_dicts_of_arrays( notify_owners_stuff['args'][0], dict(a=np.array([10]), b=np.array([20]), c=np.array([30]))) self._assert_equal_dicts_of_arrays( dict(ds.data), dict(a=np.array([10, 11, 12]), b=np.array([20, 21, 22]), c=np.array([30, 31, 32])))
def test_categorical_color_mapper_with_pandas_index(pd): fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries'] years = ['2015', '2016', '2017'] data = {'2015' : [2, 1, 4, 3, 2, 4], '2016' : [5, 3, 3, 2, 4, 6], '2017' : [3, 2, 4, 4, 5, 3]} df = pd.DataFrame(data, index=fruits) fruits = df.index years = df.columns m = CategoricalColorMapper(palette=Spectral6, factors=years, start=1, end=2) assert list(m.factors) == list(years) assert isinstance(m.factors, pd.Index)
def test_init_groupby_with_None_subindex_name(self, pd): df = pd.DataFrame({ "A": [1, 2, 3, 4] * 2, "B": [10, 20, 30, 40] * 2, "C": range(8) }) group = df.groupby(['A', [10, 20, 30, 40] * 2]) ds = ColumnDataSource(data=group) s = group.describe() assert len(ds.column_names) == 17 assert isinstance(ds.data['index'], np.ndarray) for key in s.columns.values: k2 = "_".join(key) assert isinstance(ds.data[k2], np.ndarray) assert list(s[key]) == list(ds.data[k2])
def test_pandas_datetime_types(self, pd): """ should convert to millis """ idx = pd.date_range('2001-1-1', '2001-1-5') df = pd.DataFrame({'vals': idx}, index=idx) serialized = self.serialize({'vals': df.vals, 'idx': df.index}) deserialized = self.deserialize(serialized) baseline = { u'vals': [ 978307200000, 978393600000, 978480000000, 978566400000, 978652800000 ], u'idx': [ 978307200000, 978393600000, 978480000000, 978566400000, 978652800000 ] } assert deserialized == baseline
def test__df_index_name_with_unnamed_index(self, pd): df = pd.DataFrame(dict(a=[10], b=[20], c=[30])) assert ColumnDataSource._df_index_name(df) == "index"
def test_columnsource_auto_conversion_from_pandas(self, pd): p = plt.figure() df = pd.DataFrame({'x': [1, 2, 3], 'y': [2, 3, 4]}) p.circle(x='x', y='y', source=df)
def test_pandas_data(self, pd): m = FakeModel() df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) e = bde.ColumnsStreamedEvent("doc", m, df, 200, "setter", "invoker") assert isinstance(e.data, dict) assert e.data == {c: df[c] for c in df.columns}