Beispiel #1
0
 def test_dataset_groupby_path(self):
     ds = Dataset([(0, 0, 1), (0, 1, 2), (1, 2, 3), (1, 3, 4)],
                  ['group', 'x', 'y'])
     subpaths = ds.groupby('group', group_type=Path)
     self.assertEqual(len(subpaths), 2)
     self.assertEqual(subpaths[0], Path([(0, 1), (1, 2)]))
     self.assertEqual(subpaths[1], Path([(2, 3), (3, 4)]))
Beispiel #2
0
 def test_dataset_groupby_dynamic_alias(self):
     array = np.random.rand(11, 11)
     dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array},
                       kdims=[('x', 'X'), ('y', 'Y')], vdims=[('z', 'Z')])
     grouped = dataset.groupby('X', dynamic=True)
     first = Dataset({'y': self.y_ints, 'z': array[:, 0]},
                     kdims=[('y', 'Y')], vdims=[('z', 'Z')])
     self.assertEqual(grouped[0], first)
Beispiel #3
0
 def test_dataset_groupby_dynamic(self):
     array = np.random.rand(11, 11)
     dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array},
                       kdims=['x', 'y'], vdims=['z'])
     grouped = dataset.groupby('x', dynamic=True)
     first = Dataset({'y': self.y_ints, 'z': array[:, 0]},
                     kdims=['y'], vdims=['z'])
     self.assertEqual(grouped[0], first)
Beispiel #4
0
 def test_dataset_groupby_multiple_dims(self):
     dataset = Dataset((range(8), range(8), range(8), range(8),
                        np.random.rand(8, 8, 8, 8)),
                       kdims=['a', 'b', 'c', 'd'], vdims=['Value'])
     grouped = dataset.groupby(['c', 'd'])
     keys = list(product(range(8), range(8)))
     self.assertEqual(list(grouped.keys()), keys)
     for c, d in keys:
         self.assertEqual(grouped[c, d], dataset.select(c=c, d=d).reindex(['a', 'b']))
Beispiel #5
0
 def test_dataset_groupby_dynamic_alias(self):
     array = np.random.rand(11, 11)
     dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array},
                       kdims=[('x', 'X'), ('y', 'Y')], vdims=[('z', 'Z')])
     with DatatypeContext([self.datatype, 'dictionary' , 'dataframe'], dataset):
         grouped = dataset.groupby('X', dynamic=True)
     first = Dataset({'y': self.y_ints, 'z': array[:, 0]},
                     kdims=[('y', 'Y')], vdims=[('z', 'Z')])
     self.assertEqual(grouped[0], first)
Beispiel #6
0
class GridDatasetTest(HomogeneousColumnTypes, ComparisonTestCase):
    """
    Test of the Grid array interface
    """

    datatype = 'grid'

    def setUp(self):
        self.restore_datatype = Dataset.datatype
        Dataset.datatype = ['grid']
        self.data_instance_type = dict
        self.init_data()

    def init_data(self):
        self.xs = range(11)
        self.xs_2 = [el**2 for el in self.xs]

        self.y_ints = [i*2 for i in range(11)]
        self.dataset_hm = Dataset((self.xs, self.y_ints),
                                  kdims=['x'], vdims=['y'])
        self.dataset_hm_alias = Dataset((self.xs, self.y_ints),
                                        kdims=[('x', 'X')], vdims=[('y', 'Y')])
        self.grid_xs = [0, 1]
        self.grid_ys = [0.1, 0.2, 0.3]
        self.grid_zs = [[0, 1], [2, 3], [4, 5]]
        self.dataset_grid = Dataset((self.grid_xs, self.grid_ys,
                                     self.grid_zs), kdims=['x', 'y'],
                                    vdims=['z'])
        self.dataset_grid_alias = Dataset((self.grid_xs, self.grid_ys,
                                           self.grid_zs), kdims=[('x', 'X'), ('y', 'Y')],
                                          vdims=[('z', 'Z')])
        self.dataset_grid_inv = Dataset((self.grid_xs[::-1], self.grid_ys[::-1],
                                         self.grid_zs), kdims=['x', 'y'],
                                        vdims=['z'])

    def test_canonical_vdim(self):
        x = np.array([ 0.  ,  0.75,  1.5 ])
        y = np.array([ 1.5 ,  0.75,  0.  ])
        z = np.array([[ 0.06925999,  0.05800389,  0.05620127],
                      [ 0.06240918,  0.05800931,  0.04969735],
                      [ 0.05376789,  0.04669417,  0.03880118]])
        dataset = Dataset((x, y, z), kdims=['x', 'y'], vdims=['z'])
        canonical = np.array([[ 0.05376789,  0.04669417,  0.03880118],
                              [ 0.06240918,  0.05800931,  0.04969735],
                              [ 0.06925999,  0.05800389,  0.05620127]])
        self.assertEqual(dataset.dimension_values('z', flat=False),
                         canonical)

    def test_dataset_dim_vals_grid_kdims_xs(self):
        self.assertEqual(self.dataset_grid.dimension_values(0, expanded=False),
                         np.array([0, 1]))

    def test_dataset_dim_vals_grid_kdims_xs_alias(self):
        self.assertEqual(self.dataset_grid_alias.dimension_values('x', expanded=False),
                         np.array([0, 1]))
        self.assertEqual(self.dataset_grid_alias.dimension_values('X', expanded=False),
                         np.array([0, 1]))

    def test_dataset_dim_vals_grid_kdims_xs_inv(self):
        self.assertEqual(self.dataset_grid_inv.dimension_values(0, expanded=False),
                         np.array([0, 1]))

    def test_dataset_dim_vals_grid_kdims_expanded_xs_flat(self):
        expanded_xs = np.array([0, 0, 0, 1, 1, 1])
        self.assertEqual(self.dataset_grid.dimension_values(0),
                         expanded_xs)

    def test_dataset_dim_vals_grid_kdims_expanded_xs_flat_inv(self):
        expanded_xs = np.array([0, 0, 0, 1, 1, 1])
        self.assertEqual(self.dataset_grid_inv.dimension_values(0),
                         expanded_xs)

    def test_dataset_dim_vals_grid_kdims_expanded_xs(self):
        expanded_xs = np.array([[0, 0, 0], [1, 1, 1]])
        self.assertEqual(self.dataset_grid.dimension_values(0, flat=False),
                         expanded_xs)

    def test_dataset_dim_vals_grid_kdims_expanded_xs_inv(self):
        expanded_xs = np.array([[0, 0, 0], [1, 1, 1]])
        self.assertEqual(self.dataset_grid_inv.dimension_values(0, flat=False),
                         expanded_xs)

    def test_dataset_dim_vals_grid_kdims_ys(self):
        self.assertEqual(self.dataset_grid.dimension_values(1, expanded=False),
                         np.array([0.1, 0.2, 0.3]))

    def test_dataset_dim_vals_grid_kdims_ys_inv(self):
        self.assertEqual(self.dataset_grid_inv.dimension_values(1, expanded=False),
                         np.array([0.1, 0.2, 0.3]))

    def test_dataset_dim_vals_grid_kdims_expanded_ys_flat(self):
        expanded_ys = np.array([0.1, 0.2, 0.3,
                                0.1, 0.2, 0.3])
        self.assertEqual(self.dataset_grid.dimension_values(1),
                         expanded_ys)

    def test_dataset_dim_vals_grid_kdims_expanded_ys_flat_inv(self):
        expanded_ys = np.array([0.1, 0.2, 0.3,
                                0.1, 0.2, 0.3])
        self.assertEqual(self.dataset_grid_inv.dimension_values(1),
                         expanded_ys)

    def test_dataset_dim_vals_grid_kdims_expanded_ys(self):
        expanded_ys = np.array([[0.1, 0.2, 0.3],
                                [0.1, 0.2, 0.3]])
        self.assertEqual(self.dataset_grid.dimension_values(1, flat=False),
                         expanded_ys)

    def test_dataset_dim_vals_grid_kdims_expanded_ys_inv(self):
        expanded_ys = np.array([[0.1, 0.2, 0.3],
                                [0.1, 0.2, 0.3]])
        self.assertEqual(self.dataset_grid_inv.dimension_values(1, flat=False),
                         expanded_ys)

    def test_dataset_dim_vals_grid_vdims_zs_flat(self):
        expanded_zs = np.array([0, 2, 4, 1, 3, 5])
        self.assertEqual(self.dataset_grid.dimension_values(2),
                         expanded_zs)

    def test_dataset_dim_vals_grid_vdims_zs_flat_inv(self):
        expanded_zs = np.array([5, 3, 1, 4, 2, 0])
        self.assertEqual(self.dataset_grid_inv.dimension_values(2),
                         expanded_zs)

    def test_dataset_dim_vals_grid_vdims_zs(self):
        expanded_zs = np.array([[0, 1], [2, 3], [4, 5]])
        self.assertEqual(self.dataset_grid.dimension_values(2, flat=False),
                         expanded_zs)

    def test_dataset_dim_vals_grid_vdims_zs_inv(self):
        expanded_zs = np.array([[5, 4], [3, 2], [1, 0]])
        self.assertEqual(self.dataset_grid_inv.dimension_values(2, flat=False),
                         expanded_zs)

    def test_dataset_array_init_hm(self):
        "Tests support for arrays (homogeneous)"
        exception = "None of the available storage backends "\
         "were able to support the supplied data format."
        with self.assertRaisesRegexp(Exception, exception):
            Dataset(np.column_stack([self.xs, self.xs_2]),
                    kdims=['x'], vdims=['x2'])

    def test_dataset_dataframe_init_hm(self):
        "Tests support for homogeneous DataFrames"
        if pd is None:
            raise SkipTest("Pandas not available")
        exception = "None of the available storage backends "\
         "were able to support the supplied data format."
        with self.assertRaisesRegexp(Exception, exception):
            Dataset(pd.DataFrame({'x':self.xs, 'x2':self.xs_2}),
                    kdims=['x'], vdims=['x2'])

    def test_dataset_dataframe_init_hm_alias(self):
        "Tests support for homogeneous DataFrames"
        if pd is None:
            raise SkipTest("Pandas not available")
        exception = "None of the available storage backends "\
         "were able to support the supplied data format."
        with self.assertRaisesRegexp(Exception, exception):
            Dataset(pd.DataFrame({'x':self.xs, 'x2':self.xs_2}),
                    kdims=['x'], vdims=['x2'])

    def test_dataset_ndelement_init_hm(self):
        "Tests support for homogeneous NdElement (backwards compatibility)"
        exception = "None of the available storage backends "\
         "were able to support the supplied data format."
        with self.assertRaisesRegexp(Exception, exception):
            Dataset(NdElement(zip(self.xs, self.xs_2),
                              kdims=['x'], vdims=['x2']))

    def test_dataset_2D_aggregate_partial_hm(self):
        array = np.random.rand(11, 11)
        dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array},
                          kdims=['x', 'y'], vdims=['z'])
        self.assertEqual(dataset.aggregate(['x'], np.mean),
                         Dataset({'x':self.xs, 'z': np.mean(array, axis=0)},
                                 kdims=['x'], vdims=['z']))

    def test_dataset_2D_aggregate_partial_hm_alias(self):
        array = np.random.rand(11, 11)
        dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array},
                          kdims=[('x', 'X'), ('y', 'Y')], vdims=[('z', 'Z')])
        self.assertEqual(dataset.aggregate(['X'], np.mean),
                         Dataset({'x':self.xs, 'z': np.mean(array, axis=0)},
                                 kdims=[('x', 'X')], vdims=[('z', 'Z')]))

    def test_dataset_2D_reduce_hm(self):
        array = np.random.rand(11, 11)
        dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array},
                          kdims=['x', 'y'], vdims=['z'])
        self.assertEqual(np.array(dataset.reduce(['x', 'y'], np.mean)),
                         np.mean(array))

    def test_dataset_2D_reduce_hm_alias(self):
        array = np.random.rand(11, 11)
        dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array},
                          kdims=[('x', 'X'), ('y', 'Y')], vdims=[('z', 'Z')])
        self.assertEqual(np.array(dataset.reduce(['x', 'y'], np.mean)),
                         np.mean(array))
        self.assertEqual(np.array(dataset.reduce(['X', 'Y'], np.mean)),
                         np.mean(array))

    def test_dataset_add_dimensions_value_hm(self):
        with self.assertRaisesRegexp(Exception, 'Cannot add key dimension to a dense representation.'):
            self.dataset_hm.add_dimension('z', 1, 0)

    def test_dataset_add_dimensions_values_hm(self):
        table =  self.dataset_hm.add_dimension('z', 1, range(1,12), vdim=True)
        self.assertEqual(table.vdims[1], 'z')
        self.compare_arrays(table.dimension_values('z'), np.array(list(range(1,12))))

    def test_dataset_add_dimensions_values_hm_alias(self):
        table =  self.dataset_hm.add_dimension(('z', 'Z'), 1, range(1,12), vdim=True)
        self.assertEqual(table.vdims[1], 'Z')
        self.compare_arrays(table.dimension_values('Z'), np.array(list(range(1,12))))

    def test_dataset_sort_vdim_hm(self):
        exception = ('Compressed format cannot be sorted, either instantiate '
                     'in the desired order or use the expanded format.')
        with self.assertRaisesRegexp(Exception, exception):
            self.dataset_hm.sort('y')

    def test_dataset_sort_vdim_hm_alias(self):
        exception = ('Compressed format cannot be sorted, either instantiate '
                     'in the desired order or use the expanded format.')
        with self.assertRaisesRegexp(Exception, exception):
            self.dataset_hm.sort('y')

    def test_dataset_groupby(self):
        self.assertEqual(self.dataset_hm.groupby('x').keys(), list(self.xs))

    def test_dataset_groupby_dynamic(self):
        array = np.random.rand(11, 11)
        dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array},
                          kdims=['x', 'y'], vdims=['z'])
        grouped = dataset.groupby('x', dynamic=True)
        first = Dataset({'y': self.y_ints, 'z': array[:, 0]},
                        kdims=['y'], vdims=['z'])
        self.assertEqual(grouped[0], first)

    def test_dataset_groupby_dynamic_alias(self):
        array = np.random.rand(11, 11)
        dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array},
                          kdims=[('x', 'X'), ('y', 'Y')], vdims=[('z', 'Z')])
        grouped = dataset.groupby('X', dynamic=True)
        first = Dataset({'y': self.y_ints, 'z': array[:, 0]},
                        kdims=[('y', 'Y')], vdims=[('z', 'Z')])
        self.assertEqual(grouped[0], first)

    def test_dataset_groupby_multiple_dims(self):
        dataset = Dataset((range(8), range(8), range(8), range(8),
                           np.random.rand(8, 8, 8, 8)),
                          kdims=['a', 'b', 'c', 'd'], vdims=['Value'])
        grouped = dataset.groupby(['c', 'd'])
        keys = list(product(range(8), range(8)))
        self.assertEqual(list(grouped.keys()), keys)
        for c, d in keys:
            self.assertEqual(grouped[c, d], dataset.select(c=c, d=d).reindex(['a', 'b']))

    def test_dataset_groupby_drop_dims(self):
        array = np.random.rand(3, 20, 10)
        ds = Dataset({'x': range(10), 'y': range(20), 'z': range(3), 'Val': array},
                     kdims=['x', 'y', 'z'], vdims=['Val'])
        with DatatypeContext([self.datatype, 'columns', 'dataframe']):
            partial = ds.to(Dataset, kdims=['x'], vdims=['Val'], groupby='y')
        self.assertEqual(partial.last['Val'], array[:, -1, :].T.flatten())

    def test_dataset_groupby_drop_dims_dynamic(self):
        array = np.random.rand(3, 20, 10)
        ds = Dataset({'x': range(10), 'y': range(20), 'z': range(3), 'Val': array},
                     kdims=['x', 'y', 'z'], vdims=['Val'])
        with DatatypeContext([self.datatype, 'columns', 'dataframe']):
            partial = ds.to(Dataset, kdims=['x'], vdims=['Val'], groupby='y', dynamic=True)
            self.assertEqual(partial[19]['Val'], array[:, -1, :].T.flatten())
Beispiel #7
0
class HeterogeneousColumnTypes(HomogeneousColumnTypes):
    """
    Tests for data formats that all dataset to have varied types
    """

    def init_data(self):
        self.kdims = ['Gender', 'Age']
        self.vdims = ['Weight', 'Height']
        self.gender, self.age = ['M','M','F'], [10,16,12]
        self.weight, self.height = [15,18,10], [0.8,0.6,0.8]
        self.table = Dataset({'Gender':self.gender, 'Age':self.age,
                              'Weight':self.weight, 'Height':self.height},
                             kdims=self.kdims, vdims=self.vdims)

        self.alias_kdims = [('gender', 'Gender'), ('age', 'Age')]
        self.alias_vdims = [('weight', 'Weight'), ('height', 'Height')]
        self.alias_table = Dataset({'gender':self.gender, 'age':self.age,
                                    'weight':self.weight, 'height':self.height},
                                   kdims=self.alias_kdims, vdims=self.alias_vdims)

        super(HeterogeneousColumnTypes, self).init_data()
        self.ys = np.linspace(0, 1, 11)
        self.zs = np.sin(self.xs)
        self.dataset_ht = Dataset({'x':self.xs, 'y':self.ys},
                                  kdims=['x'], vdims=['y'])

    # Test the constructor to be supported by all interfaces supporting
    # heterogeneous column types.

    def test_dataset_ndelement_init_ht(self):
        "Tests support for heterogeneous NdElement (backwards compatibility)"
        dataset = Dataset(NdElement(zip(self.xs, self.ys), kdims=['x'], vdims=['y']))
        self.assertTrue(isinstance(dataset.data, self.data_instance_type))

    def test_dataset_dataframe_init_ht(self):
        "Tests support for heterogeneous DataFrames"
        if pd is None:
            raise SkipTest("Pandas not available")
        dataset = Dataset(pd.DataFrame({'x':self.xs, 'y':self.ys}), kdims=['x'], vdims=['y'])
        self.assertTrue(isinstance(dataset.data, self.data_instance_type))

    def test_dataset_dataframe_init_ht_alias(self):
        "Tests support for heterogeneous DataFrames"
        if pd is None:
            raise SkipTest("Pandas not available")
        dataset = Dataset(pd.DataFrame({'x':self.xs, 'y':self.ys}),
                          kdims=[('x', 'X')], vdims=[('y', 'Y')])
        self.assertTrue(isinstance(dataset.data, self.data_instance_type))

    # Test literal formats

    def test_dataset_expanded_dimvals_ht(self):
        self.assertEqual(self.table.dimension_values('Gender', expanded=False),
                         np.array(['M', 'F']))

    def test_dataset_implicit_indexing_init(self):
        dataset = Dataset(self.ys, kdims=['x'], vdims=['y'])
        self.assertTrue(isinstance(dataset.data, self.data_instance_type))

    def test_dataset_tuple_init(self):
        dataset = Dataset((self.xs, self.ys), kdims=['x'], vdims=['y'])
        self.assertTrue(isinstance(dataset.data, self.data_instance_type))

    def test_dataset_tuple_init_alias(self):
        dataset = Dataset((self.xs, self.ys), kdims=[('x', 'X')], vdims=[('y', 'Y')])
        self.assertTrue(isinstance(dataset.data, self.data_instance_type))

    def test_dataset_simple_zip_init(self):
        dataset = Dataset(zip(self.xs, self.ys), kdims=['x'], vdims=['y'])
        self.assertTrue(isinstance(dataset.data, self.data_instance_type))

    def test_dataset_simple_zip_init_alias(self):
        dataset = Dataset(zip(self.xs, self.ys), kdims=[('x', 'X')], vdims=[('y', 'Y')])
        self.assertTrue(isinstance(dataset.data, self.data_instance_type))

    def test_dataset_zip_init(self):
        dataset = Dataset(zip(self.gender, self.age,
                              self.weight, self.height),
                          kdims=self.kdims, vdims=self.vdims)
        self.assertTrue(isinstance(dataset.data, self.data_instance_type))

    def test_dataset_zip_init_alias(self):
        dataset = self.alias_table.clone(zip(self.gender, self.age,
                                             self.weight, self.height))
        self.assertTrue(isinstance(dataset.data, self.data_instance_type))

    def test_dataset_odict_init(self):
        dataset = Dataset(OrderedDict(zip(self.xs, self.ys)), kdims=['A'], vdims=['B'])
        self.assertTrue(isinstance(dataset.data, self.data_instance_type))

    def test_dataset_odict_init_alias(self):
        dataset = Dataset(OrderedDict(zip(self.xs, self.ys)),
                          kdims=[('a', 'A')], vdims=[('b', 'B')])
        self.assertTrue(isinstance(dataset.data, self.data_instance_type))

    def test_dataset_dict_init(self):
        dataset = Dataset(dict(zip(self.xs, self.ys)), kdims=['A'], vdims=['B'])
        self.assertTrue(isinstance(dataset.data, self.data_instance_type))

    # Operations

    def test_dataset_redim_with_alias_dframe(self):
        test_df = pd.DataFrame({'x': range(10), 'y': range(0,20,2)})
        dataset = Dataset(test_df, kdims=[('x', 'X-label')], vdims=['y'])
        redim_df = pd.DataFrame({'X': range(10), 'y': range(0,20,2)})
        dataset_redim = Dataset(redim_df, kdims=['X'], vdims=['y'])
        self.assertEqual(dataset.redim(**{'X-label':'X'}), dataset_redim)
        self.assertEqual(dataset.redim(**{'x':'X'}), dataset_redim)

    def test_dataset_sort_vdim_ht(self):
        dataset = Dataset({'x':self.xs, 'y':-self.ys},
                          kdims=['x'], vdims=['y'])
        dataset_sorted = Dataset({'x': self.xs[::-1], 'y':-self.ys[::-1]},
                                 kdims=['x'], vdims=['y'])
        self.assertEqual(dataset.sort('y'), dataset_sorted)

    def test_dataset_sort_string_ht(self):
        dataset_sorted = Dataset({'Gender':['F', 'M', 'M'], 'Age':[12, 10, 16],
                                  'Weight':[10,15,18], 'Height':[0.8,0.8,0.6]},
                                 kdims=self.kdims, vdims=self.vdims)
        self.assertEqual(self.table.sort(), dataset_sorted)

    def test_dataset_sample_ht(self):
        samples = self.dataset_ht.sample([0, 5, 10]).dimension_values('y')
        self.assertEqual(samples, np.array([0, 0.5, 1]))

    def test_dataset_reduce_ht(self):
        reduced = Dataset({'Age':self.age, 'Weight':self.weight, 'Height':self.height},
                          kdims=self.kdims[1:], vdims=self.vdims)
        self.assertEqual(self.table.reduce(['Gender'], np.mean), reduced)

    def test_dataset_1D_reduce_ht(self):
        self.assertEqual(self.dataset_ht.reduce('x', np.mean), np.float64(0.5))

    def test_dataset_2D_reduce_ht(self):
        reduced = Dataset({'Weight':[14.333333333333334], 'Height':[0.73333333333333339]},
                          kdims=[], vdims=self.vdims)
        self.assertEqual(self.table.reduce(function=np.mean), reduced)

    def test_dataset_2D_partial_reduce_ht(self):
        dataset = Dataset({'x':self.xs, 'y':self.ys, 'z':self.zs},
                          kdims=['x', 'y'], vdims=['z'])
        reduced = Dataset({'x':self.xs, 'z':self.zs},
                          kdims=['x'], vdims=['z'])
        self.assertEqual(dataset.reduce(['y'], np.mean), reduced)

    def test_dataset_aggregate_ht(self):
        aggregated = Dataset({'Gender':['M', 'F'], 'Weight':[16.5, 10], 'Height':[0.7, 0.8]},
                             kdims=self.kdims[:1], vdims=self.vdims)
        self.compare_dataset(self.table.aggregate(['Gender'], np.mean), aggregated)

    def test_dataset_aggregate_ht_alias(self):
        aggregated = Dataset({'gender':['M', 'F'], 'weight':[16.5, 10], 'height':[0.7, 0.8]},
                             kdims=self.alias_kdims[:1], vdims=self.alias_vdims)
        self.compare_dataset(self.alias_table.aggregate('Gender', np.mean), aggregated)

    def test_dataset_2D_aggregate_partial_ht(self):
        dataset = Dataset({'x':self.xs, 'y':self.ys, 'z':self.zs},
                          kdims=['x', 'y'], vdims=['z'])
        reduced = Dataset({'x':self.xs, 'z':self.zs},
                          kdims=['x'], vdims=['z'])
        self.assertEqual(dataset.aggregate(['x'], np.mean), reduced)

    def test_dataset_groupby(self):
        group1 = {'Age':[10,16], 'Weight':[15,18], 'Height':[0.8,0.6]}
        group2 = {'Age':[12], 'Weight':[10], 'Height':[0.8]}
        grouped = HoloMap([('M', Dataset(group1, kdims=['Age'], vdims=self.vdims)),
                           ('F', Dataset(group2, kdims=['Age'], vdims=self.vdims))],
                          kdims=['Gender'])
        self.assertEqual(self.table.groupby(['Gender']), grouped)

    def test_dataset_groupby_alias(self):
        group1 = {'age':[10,16], 'weight':[15,18], 'height':[0.8,0.6]}
        group2 = {'age':[12], 'weight':[10], 'height':[0.8]}
        grouped = HoloMap([('M', Dataset(group1, kdims=[('age', 'Age')],
                                         vdims=self.alias_vdims)),
                           ('F', Dataset(group2, kdims=[('age', 'Age')],
                                         vdims=self.alias_vdims))],
                          kdims=[('gender', 'Gender')])
        self.assertEqual(self.alias_table.groupby('Gender'), grouped)

    def test_dataset_groupby_dynamic(self):
        grouped_dataset = self.table.groupby('Gender', dynamic=True)
        self.assertEqual(grouped_dataset['M'],
                         self.table.select(Gender='M').reindex(['Age']))
        self.assertEqual(grouped_dataset['F'],
                         self.table.select(Gender='F').reindex(['Age']))

    def test_dataset_groupby_dynamic_alias(self):
        grouped_dataset = self.alias_table.groupby('Gender', dynamic=True)
        self.assertEqual(grouped_dataset['M'],
                         self.alias_table.select(gender='M').reindex(['Age']))
        self.assertEqual(grouped_dataset['F'],
                         self.alias_table.select(gender='F').reindex(['Age']))

    def test_dataset_add_dimensions_value_ht(self):
        table = self.dataset_ht.add_dimension('z', 1, 0)
        self.assertEqual(table.kdims[1], 'z')
        self.compare_arrays(table.dimension_values('z'), np.zeros(table.shape[0]))

    def test_dataset_add_dimensions_value_ht_alias(self):
        table = self.dataset_ht.add_dimension(('z', 'Z'), 1, 0)
        self.assertEqual(table.kdims[1], 'z')
        self.compare_arrays(table.dimension_values('z'), np.zeros(table.shape[0]))

    def test_dataset_add_dimensions_values_ht(self):
        table = self.dataset_ht.add_dimension('z', 1, range(1,12))
        self.assertEqual(table.kdims[1], 'z')
        self.compare_arrays(table.dimension_values('z'), np.array(list(range(1,12))))

    # Indexing

    def test_dataset_index_row_gender_female(self):
        indexed = Dataset({'Gender':['F'], 'Age':[12],
                           'Weight':[10], 'Height':[0.8]},
                          kdims=self.kdims, vdims=self.vdims)
        row = self.table['F',:]
        self.assertEquals(row, indexed)

    def test_dataset_index_rows_gender_male(self):
        row = self.table['M',:]
        indexed = Dataset({'Gender':['M', 'M'], 'Age':[10, 16],
                           'Weight':[15,18], 'Height':[0.8,0.6]},
                          kdims=self.kdims, vdims=self.vdims)
        self.assertEquals(row, indexed)

    def test_dataset_select_rows_gender_male(self):
        row = self.table.select(Gender='M')
        indexed = Dataset({'Gender':['M', 'M'], 'Age':[10, 16],
                           'Weight':[15,18], 'Height':[0.8,0.6]},
                          kdims=self.kdims, vdims=self.vdims)
        self.assertEquals(row, indexed)

    def test_dataset_select_rows_gender_male_alias(self):
        row = self.alias_table.select(Gender='M')
        alias_row = self.alias_table.select(gender='M')
        indexed = Dataset({'gender':['M', 'M'], 'age':[10, 16],
                           'weight':[15,18], 'height':[0.8,0.6]},
                          kdims=self.alias_kdims, vdims=self.alias_vdims)
        self.assertEquals(row, indexed)
        self.assertEquals(alias_row, indexed)

    def test_dataset_index_row_age(self):
        indexed = Dataset({'Gender':['F'], 'Age':[12],
                           'Weight':[10], 'Height':[0.8]},
                          kdims=self.kdims, vdims=self.vdims)
        self.assertEquals(self.table[:, 12], indexed)

    def test_dataset_index_item_table(self):
        indexed = Dataset({'Gender':['F'], 'Age':[12],
                           'Weight':[10], 'Height':[0.8]},
                          kdims=self.kdims, vdims=self.vdims)
        self.assertEquals(self.table['F', 12], indexed)

    def test_dataset_index_value1(self):
        self.assertEquals(self.table['F', 12, 'Weight'], 10)

    def test_dataset_index_value2(self):
        self.assertEquals(self.table['F', 12, 'Height'], 0.8)

    def test_dataset_index_column_ht(self):
        self.compare_arrays(self.dataset_ht['y'], self.ys)

    def test_dataset_boolean_index(self):
        row = self.table[np.array([True, True, False])]
        indexed = Dataset({'Gender':['M', 'M'], 'Age':[10, 16],
                           'Weight':[15,18], 'Height':[0.8,0.6]},
                          kdims=self.kdims, vdims=self.vdims)
        self.assertEquals(row, indexed)

    def test_dataset_value_dim_index(self):
        row = self.table[:, :, 'Weight']
        indexed = Dataset({'Gender':['M', 'M', 'F'], 'Age':[10, 16, 12],
                           'Weight':[15,18, 10]},
                          kdims=self.kdims, vdims=self.vdims[:1])
        self.assertEquals(row, indexed)

    def test_dataset_value_dim_scalar_index(self):
        row = self.table['M', 10, 'Weight']
        self.assertEquals(row, 15)

    # Casting

    def test_dataset_array_ht(self):
        self.assertEqual(self.dataset_ht.array(),
                         np.column_stack([self.xs, self.ys]))
Beispiel #8
0
 def test_dataset_scalar_groupby(self):
     ds = Dataset({'A': 1, 'B': np.arange(10)}, kdims=['A', 'B'])
     groups = ds.groupby('A')
     self.assertEqual(groups, HoloMap({1: Dataset({'B': np.arange(10)}, 'B')}, 'A'))
Beispiel #9
0
class HeterogeneousColumnTests(HomogeneousColumnTests):
    """
    Tests for data formats that allow dataset to have varied types
    """

    def init_column_data(self):
        self.kdims = ['Gender', 'Age']
        self.vdims = ['Weight', 'Height']
        self.gender, self.age = np.array(['M','M','F']), np.array([10,16,12])
        self.weight, self.height = np.array([15,18,10]), np.array([0.8,0.6,0.8])
        self.table = Dataset({'Gender':self.gender, 'Age':self.age,
                              'Weight':self.weight, 'Height':self.height},
                             kdims=self.kdims, vdims=self.vdims)

        self.alias_kdims = [('gender', 'Gender'), ('age', 'Age')]
        self.alias_vdims = [('weight', 'Weight'), ('height', 'Height')]
        self.alias_table = Dataset({'gender':self.gender, 'age':self.age,
                                    'weight':self.weight, 'height':self.height},
                                   kdims=self.alias_kdims, vdims=self.alias_vdims)

        super(HeterogeneousColumnTests, self).init_column_data()
        self.ys = np.linspace(0, 1, 11)
        self.zs = np.sin(self.xs)
        self.dataset_ht = Dataset({'x':self.xs, 'y':self.ys},
                                  kdims=['x'], vdims=['y'])

    # Test the constructor to be supported by all interfaces supporting
    # heterogeneous column types.

    @pd_skip
    def test_dataset_dataframe_init_ht(self):
        "Tests support for heterogeneous DataFrames"
        dataset = Dataset(pd.DataFrame({'x':self.xs, 'y':self.ys}), kdims=['x'], vdims=['y'])
        self.assertTrue(isinstance(dataset.data, self.data_type))

    @pd_skip
    def test_dataset_dataframe_init_ht_alias(self):
        "Tests support for heterogeneous DataFrames"
        dataset = Dataset(pd.DataFrame({'x':self.xs, 'y':self.ys}),
                          kdims=[('x', 'X')], vdims=[('y', 'Y')])
        self.assertTrue(isinstance(dataset.data, self.data_type))

    # Test literal formats

    def test_dataset_expanded_dimvals_ht(self):
        self.assertEqual(self.table.dimension_values('Gender', expanded=False),
                         np.array(['M', 'F']))

    def test_dataset_implicit_indexing_init(self):
        dataset = Scatter(self.ys, kdims=['x'], vdims=['y'])
        self.assertTrue(isinstance(dataset.data, self.data_type))

    def test_dataset_tuple_init(self):
        dataset = Dataset((self.xs, self.ys), kdims=['x'], vdims=['y'])
        self.assertTrue(isinstance(dataset.data, self.data_type))

    def test_dataset_tuple_init_alias(self):
        dataset = Dataset((self.xs, self.ys), kdims=[('x', 'X')], vdims=[('y', 'Y')])
        self.assertTrue(isinstance(dataset.data, self.data_type))

    def test_dataset_simple_zip_init(self):
        dataset = Dataset(zip(self.xs, self.ys), kdims=['x'], vdims=['y'])
        self.assertTrue(isinstance(dataset.data, self.data_type))

    def test_dataset_simple_zip_init_alias(self):
        dataset = Dataset(zip(self.xs, self.ys), kdims=[('x', 'X')], vdims=[('y', 'Y')])
        self.assertTrue(isinstance(dataset.data, self.data_type))

    def test_dataset_zip_init(self):
        dataset = Dataset(zip(self.gender, self.age,
                              self.weight, self.height),
                          kdims=self.kdims, vdims=self.vdims)
        self.assertTrue(isinstance(dataset.data, self.data_type))

    def test_dataset_zip_init_alias(self):
        dataset = self.alias_table.clone(zip(self.gender, self.age,
                                             self.weight, self.height))
        self.assertTrue(isinstance(dataset.data, self.data_type))

    def test_dataset_odict_init(self):
        dataset = Dataset(OrderedDict(zip(self.xs, self.ys)), kdims=['A'], vdims=['B'])
        self.assertTrue(isinstance(dataset.data, self.data_type))

    def test_dataset_odict_init_alias(self):
        dataset = Dataset(OrderedDict(zip(self.xs, self.ys)),
                          kdims=[('a', 'A')], vdims=[('b', 'B')])
        self.assertTrue(isinstance(dataset.data, self.data_type))

    def test_dataset_dict_init(self):
        dataset = Dataset(dict(zip(self.xs, self.ys)), kdims=['A'], vdims=['B'])
        self.assertTrue(isinstance(dataset.data, self.data_type))

    def test_dataset_range_with_dimension_range(self):
        dt64 = np.array([np.datetime64(datetime.datetime(2017, 1, i)) for i in range(1, 4)])
        ds = Dataset(dt64, [Dimension('Date', range=(dt64[0], dt64[-1]))])
        self.assertEqual(ds.range('Date'), (dt64[0], dt64[-1]))
        
    # Operations

    @pd_skip
    def test_dataset_redim_with_alias_dframe(self):
        test_df = pd.DataFrame({'x': range(10), 'y': range(0,20,2)})
        dataset = Dataset(test_df, kdims=[('x', 'X-label')], vdims=['y'])
        redim_df = pd.DataFrame({'X': range(10), 'y': range(0,20,2)})
        dataset_redim = Dataset(redim_df, kdims=['X'], vdims=['y'])
        self.assertEqual(dataset.redim(**{'X-label':'X'}), dataset_redim)
        self.assertEqual(dataset.redim(**{'x':'X'}), dataset_redim)

    def test_dataset_mixed_type_range(self):
        ds = Dataset((['A', 'B', 'C', None],), 'A')
        self.assertEqual(ds.range(0), ('A', 'C'))

    def test_dataset_sort_vdim_ht(self):
        dataset = Dataset({'x':self.xs, 'y':-self.ys},
                          kdims=['x'], vdims=['y'])
        dataset_sorted = Dataset({'x': self.xs[::-1], 'y':-self.ys[::-1]},
                                 kdims=['x'], vdims=['y'])
        self.assertEqual(dataset.sort('y'), dataset_sorted)

    def test_dataset_sort_string_ht(self):
        dataset_sorted = Dataset({'Gender':['F', 'M', 'M'], 'Age':[12, 10, 16],
                                  'Weight':[10,15,18], 'Height':[0.8,0.8,0.6]},
                                 kdims=self.kdims, vdims=self.vdims)
        self.assertEqual(self.table.sort(), dataset_sorted)

    def test_dataset_sample_ht(self):
        samples = self.dataset_ht.sample([0, 5, 10]).dimension_values('y')
        self.assertEqual(samples, np.array([0, 0.5, 1]))

    def test_dataset_reduce_ht(self):
        reduced = Dataset({'Age':self.age, 'Weight':self.weight, 'Height':self.height},
                          kdims=self.kdims[1:], vdims=self.vdims)
        self.assertEqual(self.table.reduce(['Gender'], np.mean), reduced)

    def test_dataset_1D_reduce_ht(self):
        self.assertEqual(self.dataset_ht.reduce('x', np.mean), np.float64(0.5))

    def test_dataset_2D_reduce_ht(self):
        reduced = Dataset({'Weight':[14.333333333333334], 'Height':[0.73333333333333339]},
                          kdims=[], vdims=self.vdims)
        self.assertEqual(self.table.reduce(function=np.mean), reduced)

    def test_dataset_2D_partial_reduce_ht(self):
        dataset = Dataset({'x':self.xs, 'y':self.ys, 'z':self.zs},
                          kdims=['x', 'y'], vdims=['z'])
        reduced = Dataset({'x':self.xs, 'z':self.zs},
                          kdims=['x'], vdims=['z'])
        self.assertEqual(dataset.reduce(['y'], np.mean), reduced)

    def test_dataset_2D_aggregate_spread_fn_with_duplicates(self):
        dataset = Dataset({'x': np.array([0, 0, 1, 1]), 'y': np.array([0, 1, 2, 3]),
                           'z': np.array([1, 2, 3, 4])},
                          kdims=['x', 'y'], vdims=['z'])
        agg = dataset.aggregate('x', function=np.mean, spreadfn=np.var)
        self.assertEqual(agg, Dataset({'x': np.array([0, 1]), 'z': np.array([1.5, 3.5]),
                                       'z_var': np.array([0.25, 0.25])},
                                      kdims=['x'], vdims=['z', 'z_var']))

    def test_dataset_aggregate_ht(self):
        aggregated = Dataset({'Gender':['M', 'F'], 'Weight':[16.5, 10], 'Height':[0.7, 0.8]},
                             kdims=self.kdims[:1], vdims=self.vdims)
        self.compare_dataset(self.table.aggregate(['Gender'], np.mean), aggregated)

    def test_dataset_aggregate_string_types(self):
        ds = Dataset({'Gender':['M', 'M'], 'Weight':[20, 10], 'Name':['Peter', 'Matt']},
                             kdims='Gender', vdims=['Weight', 'Name'])
        aggregated = Dataset({'Gender': ['M'], 'Weight': [15]},
                             kdims='Gender', vdims=['Weight'])
        self.compare_dataset(ds.aggregate(['Gender'], np.mean), aggregated)

    def test_dataset_aggregate_string_types_size(self):
        ds = Dataset({'Gender':['M', 'M'], 'Weight':[20, 10], 'Name':['Peter', 'Matt']},
                             kdims='Gender', vdims=['Weight', 'Name'])
        aggregated = Dataset({'Gender': ['M'], 'Weight': [2], 'Name': [2]},
                             kdims='Gender', vdims=['Weight', 'Name'])
        self.compare_dataset(ds.aggregate(['Gender'], np.size), aggregated)

    def test_dataset_aggregate_ht_alias(self):
        aggregated = Dataset({'gender':['M', 'F'], 'weight':[16.5, 10], 'height':[0.7, 0.8]},
                             kdims=self.alias_kdims[:1], vdims=self.alias_vdims)
        self.compare_dataset(self.alias_table.aggregate('Gender', np.mean), aggregated)

    def test_dataset_2D_aggregate_partial_ht(self):
        dataset = Dataset({'x':self.xs, 'y':self.ys, 'z':self.zs},
                          kdims=['x', 'y'], vdims=['z'])
        reduced = Dataset({'x':self.xs, 'z':self.zs},
                          kdims=['x'], vdims=['z'])
        self.assertEqual(dataset.aggregate(['x'], np.mean), reduced)

    def test_dataset_empty_aggregate(self):
        dataset = Dataset([], kdims=self.kdims, vdims=self.vdims)
        aggregated = Dataset([], kdims=self.kdims[:1], vdims=self.vdims)
        self.compare_dataset(dataset.aggregate(['Gender'], np.mean), aggregated)

    def test_dataset_empty_aggregate_with_spreadfn(self):
        dataset = Dataset([], kdims=self.kdims, vdims=self.vdims)
        aggregated = Dataset([], kdims=self.kdims[:1], vdims=[d for vd in self.vdims for d in [vd, vd+'_std']])
        self.compare_dataset(dataset.aggregate(['Gender'], np.mean, np.std), aggregated)

    def test_dataset_groupby(self):
        group1 = {'Age':[10,16], 'Weight':[15,18], 'Height':[0.8,0.6]}
        group2 = {'Age':[12], 'Weight':[10], 'Height':[0.8]}
        grouped = HoloMap([('M', Dataset(group1, kdims=['Age'], vdims=self.vdims)),
                           ('F', Dataset(group2, kdims=['Age'], vdims=self.vdims))],
                          kdims=['Gender'], sort=False)
        print(grouped.keys())
        self.assertEqual(self.table.groupby(['Gender']), grouped)

    def test_dataset_groupby_alias(self):
        group1 = {'age':[10,16], 'weight':[15,18], 'height':[0.8,0.6]}
        group2 = {'age':[12], 'weight':[10], 'height':[0.8]}
        grouped = HoloMap([('M', Dataset(group1, kdims=[('age', 'Age')],
                                         vdims=self.alias_vdims)),
                           ('F', Dataset(group2, kdims=[('age', 'Age')],
                                         vdims=self.alias_vdims))],
                          kdims=[('gender', 'Gender')], sort=False)
        self.assertEqual(self.alias_table.groupby('Gender'), grouped)

    def test_dataset_groupby_second_dim(self):
        group1 = {'Gender':['M'], 'Weight':[15], 'Height':[0.8]}
        group2 = {'Gender':['M'], 'Weight':[18], 'Height':[0.6]}
        group3 = {'Gender':['F'], 'Weight':[10], 'Height':[0.8]}
        grouped = HoloMap([(10, Dataset(group1, kdims=['Gender'], vdims=self.vdims)),
                           (16, Dataset(group2, kdims=['Gender'], vdims=self.vdims)),
                           (12, Dataset(group3, kdims=['Gender'], vdims=self.vdims))],
                          kdims=['Age'], sort=False)
        self.assertEqual(self.table.groupby(['Age']), grouped)

    def test_dataset_groupby_dynamic(self):
        grouped_dataset = self.table.groupby('Gender', dynamic=True)
        self.assertEqual(grouped_dataset['M'],
                         self.table.select(Gender='M').reindex(['Age']))
        self.assertEqual(grouped_dataset['F'],
                         self.table.select(Gender='F').reindex(['Age']))

    def test_dataset_groupby_dynamic_alias(self):
        grouped_dataset = self.alias_table.groupby('Gender', dynamic=True)
        self.assertEqual(grouped_dataset['M'],
                         self.alias_table.select(gender='M').reindex(['Age']))
        self.assertEqual(grouped_dataset['F'],
                         self.alias_table.select(gender='F').reindex(['Age']))

    def test_dataset_add_dimensions_value_ht(self):
        table = self.dataset_ht.add_dimension('z', 1, 0)
        self.assertEqual(table.kdims[1], 'z')
        self.compare_arrays(table.dimension_values('z'), np.zeros(table.shape[0]))

    def test_dataset_add_dimensions_value_ht_alias(self):
        table = self.dataset_ht.add_dimension(('z', 'Z'), 1, 0)
        self.assertEqual(table.kdims[1], 'z')
        self.compare_arrays(table.dimension_values('z'), np.zeros(table.shape[0]))

    def test_dataset_add_dimensions_values_ht(self):
        table = self.dataset_ht.add_dimension('z', 1, range(1,12))
        self.assertEqual(table.kdims[1], 'z')
        self.compare_arrays(table.dimension_values('z'), np.array(list(range(1,12))))

    def test_redim_with_extra_dimension(self):
        dataset = self.dataset_ht.add_dimension('Temp', 0, 0).clone(kdims=['x', 'y'], vdims=[])
        redimmed = dataset.redim(x='Time')
        self.assertEqual(redimmed.dimension_values('Time'),
                         self.dataset_ht.dimension_values('x'))

    # Indexing

    def test_dataset_index_row_gender_female(self):
        indexed = Dataset({'Gender':['F'], 'Age':[12],
                           'Weight':[10], 'Height':[0.8]},
                          kdims=self.kdims, vdims=self.vdims)
        row = self.table['F',:]
        self.assertEquals(row, indexed)

    def test_dataset_index_rows_gender_male(self):
        row = self.table['M',:]
        indexed = Dataset({'Gender':['M', 'M'], 'Age':[10, 16],
                           'Weight':[15,18], 'Height':[0.8,0.6]},
                          kdims=self.kdims, vdims=self.vdims)
        self.assertEquals(row, indexed)

    def test_dataset_select_rows_gender_male(self):
        row = self.table.select(Gender='M')
        indexed = Dataset({'Gender':['M', 'M'], 'Age':[10, 16],
                           'Weight':[15,18], 'Height':[0.8,0.6]},
                          kdims=self.kdims, vdims=self.vdims)
        self.assertEquals(row, indexed)

    def test_dataset_select_rows_gender_male_alias(self):
        row = self.alias_table.select(Gender='M')
        alias_row = self.alias_table.select(gender='M')
        indexed = Dataset({'gender':['M', 'M'], 'age':[10, 16],
                           'weight':[15,18], 'height':[0.8,0.6]},
                          kdims=self.alias_kdims, vdims=self.alias_vdims)
        self.assertEquals(row, indexed)
        self.assertEquals(alias_row, indexed)

    def test_dataset_index_row_age(self):
        indexed = Dataset({'Gender':['F'], 'Age':[12],
                           'Weight':[10], 'Height':[0.8]},
                          kdims=self.kdims, vdims=self.vdims)
        self.assertEquals(self.table[:, 12], indexed)

    def test_dataset_index_item_table(self):
        indexed = Dataset({'Gender':['F'], 'Age':[12],
                           'Weight':[10], 'Height':[0.8]},
                          kdims=self.kdims, vdims=self.vdims)
        self.assertEquals(self.table['F', 12], indexed)

    def test_dataset_index_value1(self):
        self.assertEquals(self.table['F', 12, 'Weight'], 10)

    def test_dataset_index_value2(self):
        self.assertEquals(self.table['F', 12, 'Height'], 0.8)

    def test_dataset_index_column_ht(self):
        self.compare_arrays(self.dataset_ht['y'], self.ys)

    def test_dataset_boolean_index(self):
        row = self.table[np.array([True, True, False])]
        indexed = Dataset({'Gender':['M', 'M'], 'Age':[10, 16],
                           'Weight':[15,18], 'Height':[0.8,0.6]},
                          kdims=self.kdims, vdims=self.vdims)
        self.assertEquals(row, indexed)

    def test_dataset_value_dim_index(self):
        row = self.table[:, :, 'Weight']
        indexed = Dataset({'Gender':['M', 'M', 'F'], 'Age':[10, 16, 12],
                           'Weight':[15,18, 10]},
                          kdims=self.kdims, vdims=self.vdims[:1])
        self.assertEquals(row, indexed)

    def test_dataset_value_dim_scalar_index(self):
        row = self.table['M', 10, 'Weight']
        self.assertEquals(row, 15)

    # Tabular indexing

    def test_dataset_iloc_slice_rows(self):
        sliced = self.table.iloc[1:2]
        table = Dataset({'Gender':self.gender[1:2], 'Age':self.age[1:2],
                         'Weight':self.weight[1:2], 'Height':self.height[1:2]},
                        kdims=self.kdims, vdims=self.vdims)
        self.assertEqual(sliced, table)

    def test_dataset_iloc_slice_rows_slice_cols(self):
        sliced = self.table.iloc[1:2, 1:3]
        table = Dataset({'Age':self.age[1:2], 'Weight':self.weight[1:2]},
                        kdims=self.kdims[1:], vdims=self.vdims[:1])
        self.assertEqual(sliced, table)

    def test_dataset_iloc_slice_rows_list_cols(self):
        sliced = self.table.iloc[1:2, [1, 3]]
        table = Dataset({'Age':self.age[1:2], 'Height':self.height[1:2]},
                        kdims=self.kdims[1:], vdims=self.vdims[1:])
        self.assertEqual(sliced, table)

    def test_dataset_iloc_slice_rows_index_cols(self):
        sliced = self.table.iloc[1:2, 2]
        table = Dataset({'Weight':self.weight[1:2]}, kdims=[], vdims=self.vdims[:1])
        self.assertEqual(sliced, table)

    def test_dataset_iloc_list_rows(self):
        sliced = self.table.iloc[[0, 2]]
        table = Dataset({'Gender':self.gender[[0, 2]], 'Age':self.age[[0, 2]],
                         'Weight':self.weight[[0, 2]], 'Height':self.height[[0, 2]]},
                        kdims=self.kdims, vdims=self.vdims)
        self.assertEqual(sliced, table)

    def test_dataset_iloc_list_rows_list_cols(self):
        sliced = self.table.iloc[[0, 2], [0, 2]]
        table = Dataset({'Gender':self.gender[[0, 2]],  'Weight':self.weight[[0, 2]]},
                        kdims=self.kdims[:1], vdims=self.vdims[:1])
        self.assertEqual(sliced, table)

    def test_dataset_iloc_list_rows_list_cols_by_name(self):
        sliced = self.table.iloc[[0, 2], ['Gender', 'Weight']]
        table = Dataset({'Gender':self.gender[[0, 2]],  'Weight':self.weight[[0, 2]]},
                        kdims=self.kdims[:1], vdims=self.vdims[:1])
        self.assertEqual(sliced, table)

    def test_dataset_iloc_list_rows_slice_cols(self):
        sliced = self.table.iloc[[0, 2], slice(1, 3)]
        table = Dataset({'Age':self.age[[0, 2]],  'Weight':self.weight[[0, 2]]},
                        kdims=self.kdims[1:], vdims=self.vdims[:1])
        self.assertEqual(sliced, table)

    def test_dataset_iloc_index_rows_index_cols(self):
        indexed = self.table.iloc[1, 1]
        self.assertEqual(indexed, self.age[1])

    def test_dataset_iloc_index_rows_slice_cols(self):
        indexed = self.table.iloc[1, 1:3]
        table = Dataset({'Age':self.age[[1]],  'Weight':self.weight[[1]]},
                        kdims=self.kdims[1:], vdims=self.vdims[:1])
        self.assertEqual(indexed, table)

    def test_dataset_iloc_list_cols(self):
        sliced = self.table.iloc[:, [0, 2]]
        table = Dataset({'Gender':self.gender,  'Weight':self.weight},
                        kdims=self.kdims[:1], vdims=self.vdims[:1])
        self.assertEqual(sliced, table)

    def test_dataset_iloc_list_cols_by_name(self):
        sliced = self.table.iloc[:, ['Gender', 'Weight']]
        table = Dataset({'Gender':self.gender,  'Weight':self.weight},
                        kdims=self.kdims[:1], vdims=self.vdims[:1])
        self.assertEqual(sliced, table)

    def test_dataset_iloc_ellipsis_list_cols(self):
        sliced = self.table.iloc[..., [0, 2]]
        table = Dataset({'Gender':self.gender,  'Weight':self.weight},
                        kdims=self.kdims[:1], vdims=self.vdims[:1])
        self.assertEqual(sliced, table)

    def test_dataset_iloc_ellipsis_list_cols_by_name(self):
        sliced = self.table.iloc[..., ['Gender', 'Weight']]
        table = Dataset({'Gender':self.gender,  'Weight':self.weight},
                        kdims=self.kdims[:1], vdims=self.vdims[:1])
        self.assertEqual(sliced, table)

    # Casting

    def test_dataset_array_ht(self):
        self.assertEqual(self.dataset_ht.array(),
                         np.column_stack([self.xs, self.ys]))
Beispiel #10
0
 def test_grid_3d_groupby_concat_roundtrip(self):
     array = np.random.rand(4, 5, 3, 2)
     orig = Dataset((range(2), range(3), range(5), range(4), array), ['A', 'B', 'x', 'y'], 'z')
     self.assertEqual(concat(orig.groupby(['A', 'B'])), orig)
Beispiel #11
0
 def test_dataset_dynamic_groupby_with_transposed_dimensions(self):
     dat = np.zeros((3,5,7))
     dataset = Dataset((range(7), range(5), range(3), dat), ['z','x','y'], 'value')
     grouped = dataset.groupby('z', kdims=['y', 'x'], dynamic=True)
     self.assertEqual(grouped[2].dimension_values(2, flat=False), dat[:, :, -1].T)
class GridDatasetTest(HomogeneousColumnTypes, ComparisonTestCase):
    """
    Test of the NdDataset interface (mostly for backwards compatibility)
    """

    def setUp(self):
        self.restore_datatype = Dataset.datatype
        Dataset.datatype = ['grid']
        self.data_instance_type = dict
        self.init_data()

    def init_data(self):
        self.xs = range(11)
        self.xs_2 = [el**2 for el in self.xs]

        self.y_ints = [i*2 for i in range(11)]
        self.dataset_hm = Dataset((self.xs, self.y_ints),
                                  kdims=['x'], vdims=['y'])

    def test_dataset_array_init_hm(self):
        "Tests support for arrays (homogeneous)"
        exception = "None of the available storage backends "\
         "were able to support the supplied data format."
        with self.assertRaisesRegexp(Exception, exception):
            Dataset(np.column_stack([self.xs, self.xs_2]),
                    kdims=['x'], vdims=['x2'])

    def test_dataset_dataframe_init_hm(self):
        "Tests support for homogeneous DataFrames"
        if pd is None:
            raise SkipTest("Pandas not available")
        exception = "None of the available storage backends "\
         "were able to support the supplied data format."
        with self.assertRaisesRegexp(Exception, exception):
            Dataset(pd.DataFrame({'x':self.xs, 'x2':self.xs_2}),
                    kdims=['x'], vdims=['x2'])

    def test_dataset_ndelement_init_hm(self):
        "Tests support for homogeneous NdElement (backwards compatibility)"
        exception = "None of the available storage backends "\
         "were able to support the supplied data format."
        with self.assertRaisesRegexp(Exception, exception):
            Dataset(NdElement(zip(self.xs, self.xs_2),
                              kdims=['x'], vdims=['x2']))

    def test_dataset_2D_aggregate_partial_hm(self):
        array = np.random.rand(11, 11)
        dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array},
                          kdims=['x', 'y'], vdims=['z'])
        self.assertEqual(dataset.aggregate(['x'], np.mean),
                         Dataset({'x':self.xs, 'z': np.mean(array, axis=0)},
                                 kdims=['x'], vdims=['z']))

    def test_dataset_2D_reduce_hm(self):
        array = np.random.rand(11, 11)
        dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array},
                          kdims=['x', 'y'], vdims=['z'])
        self.assertEqual(np.array(dataset.reduce(['x', 'y'], np.mean)),
                         np.mean(array))

    def test_dataset_add_dimensions_value_hm(self):
        with self.assertRaisesRegexp(Exception, 'Cannot add key dimension to a dense representation.'):
            self.dataset_hm.add_dimension('z', 1, 0)

    def test_dataset_add_dimensions_values_hm(self):
        table =  self.dataset_hm.add_dimension('z', 1, range(1,12), vdim=True)
        self.assertEqual(table.vdims[1], 'z')
        self.compare_arrays(table.dimension_values('z'), np.array(list(range(1,12))))

    def test_dataset_sort_vdim_hm(self):
        exception = ('Compressed format cannot be sorted, either instantiate '
                     'in the desired order or use the expanded format.')
        with self.assertRaisesRegexp(Exception, exception):
            self.dataset_hm.sort('y')

    def test_dataset_groupby(self):
        self.assertEqual(self.dataset_hm.groupby('x').keys(), list(self.xs))
class HeterogeneousColumnTypes(HomogeneousColumnTypes):
    """
    Tests for data formats that all dataset to have varied types
    """

    def init_data(self):
        self.kdims = ['Gender', 'Age']
        self.vdims = ['Weight', 'Height']
        self.gender, self.age = ['M','M','F'], [10,16,12]
        self.weight, self.height = [15,18,10], [0.8,0.6,0.8]
        self.table = Dataset({'Gender':self.gender, 'Age':self.age,
                              'Weight':self.weight, 'Height':self.height},
                             kdims=self.kdims, vdims=self.vdims)

        super(HeterogeneousColumnTypes, self).init_data()
        self.ys = np.linspace(0, 1, 11)
        self.zs = np.sin(self.xs)
        self.dataset_ht = Dataset({'x':self.xs, 'y':self.ys},
                                  kdims=['x'], vdims=['y'])

    # Test the constructor to be supported by all interfaces supporting
    # heterogeneous column types.

    def test_dataset_ndelement_init_ht(self):
        "Tests support for heterogeneous NdElement (backwards compatibility)"
        dataset = Dataset(NdElement(zip(self.xs, self.ys), kdims=['x'], vdims=['y']))
        self.assertTrue(isinstance(dataset.data, self.data_instance_type))

    def test_dataset_dataframe_init_ht(self):
        "Tests support for heterogeneous DataFrames"
        if pd is None:
            raise SkipTest("Pandas not available")
        dataset = Dataset(pd.DataFrame({'x':self.xs, 'y':self.ys}), kdims=['x'], vdims=['y'])
        self.assertTrue(isinstance(dataset.data, self.data_instance_type))

    # Test literal formats

    def test_dataset_expanded_dimvals_ht(self):
        self.assertEqual(self.table.dimension_values('Gender', expanded=False),
                         np.array(['M', 'F']))

    def test_dataset_implicit_indexing_init(self):
        dataset = Dataset(self.ys, kdims=['x'], vdims=['y'])
        self.assertTrue(isinstance(dataset.data, self.data_instance_type))

    def test_dataset_tuple_init(self):
        dataset = Dataset((self.xs, self.ys), kdims=['x'], vdims=['y'])
        self.assertTrue(isinstance(dataset.data, self.data_instance_type))

    def test_dataset_simple_zip_init(self):
        dataset = Dataset(zip(self.xs, self.ys), kdims=['x'], vdims=['y'])
        self.assertTrue(isinstance(dataset.data, self.data_instance_type))

    def test_dataset_zip_init(self):
        dataset = Dataset(zip(self.gender, self.age,
                              self.weight, self.height),
                          kdims=self.kdims, vdims=self.vdims)
        self.assertTrue(isinstance(dataset.data, self.data_instance_type))

    def test_dataset_odict_init(self):
        dataset = Dataset(OrderedDict(zip(self.xs, self.ys)), kdims=['A'], vdims=['B'])
        self.assertTrue(isinstance(dataset.data, self.data_instance_type))

    def test_dataset_dict_init(self):
        dataset = Dataset(dict(zip(self.xs, self.ys)), kdims=['A'], vdims=['B'])
        self.assertTrue(isinstance(dataset.data, self.data_instance_type))

    # Operations

    def test_dataset_sort_vdim_ht(self):
        dataset = Dataset({'x':self.xs, 'y':-self.ys},
                          kdims=['x'], vdims=['y'])
        dataset_sorted = Dataset({'x': self.xs[::-1], 'y':-self.ys[::-1]},
                                 kdims=['x'], vdims=['y'])
        self.assertEqual(dataset.sort('y'), dataset_sorted)

    def test_dataset_sort_string_ht(self):
        dataset_sorted = Dataset({'Gender':['F', 'M', 'M'], 'Age':[12, 10, 16],
                                  'Weight':[10,15,18], 'Height':[0.8,0.8,0.6]},
                                 kdims=self.kdims, vdims=self.vdims)
        self.assertEqual(self.table.sort(), dataset_sorted)

    def test_dataset_sample_ht(self):
        samples = self.dataset_ht.sample([0, 5, 10]).dimension_values('y')
        self.assertEqual(samples, np.array([0, 0.5, 1]))

    def test_dataset_reduce_ht(self):
        reduced = Dataset({'Age':self.age, 'Weight':self.weight, 'Height':self.height},
                          kdims=self.kdims[1:], vdims=self.vdims)
        self.assertEqual(self.table.reduce(['Gender'], np.mean), reduced)

    def test_dataset_1D_reduce_ht(self):
        self.assertEqual(self.dataset_ht.reduce('x', np.mean), np.float64(0.5))

    def test_dataset_2D_reduce_ht(self):
        reduced = Dataset({'Weight':[14.333333333333334], 'Height':[0.73333333333333339]},
                          kdims=[], vdims=self.vdims)
        self.assertEqual(self.table.reduce(function=np.mean), reduced)

    def test_dataset_2D_partial_reduce_ht(self):
        dataset = Dataset({'x':self.xs, 'y':self.ys, 'z':self.zs},
                          kdims=['x', 'y'], vdims=['z'])
        reduced = Dataset({'x':self.xs, 'z':self.zs},
                          kdims=['x'], vdims=['z'])
        self.assertEqual(dataset.reduce(['y'], np.mean), reduced)

    def test_column_aggregate_ht(self):
        aggregated = Dataset({'Gender':['M', 'F'], 'Weight':[16.5, 10], 'Height':[0.7, 0.8]},
                             kdims=self.kdims[:1], vdims=self.vdims)
        self.compare_dataset(self.table.aggregate(['Gender'], np.mean), aggregated)

    def test_dataset_2D_aggregate_partial_ht(self):
        dataset = Dataset({'x':self.xs, 'y':self.ys, 'z':self.zs},
                          kdims=['x', 'y'], vdims=['z'])
        reduced = Dataset({'x':self.xs, 'z':self.zs},
                          kdims=['x'], vdims=['z'])
        self.assertEqual(dataset.aggregate(['x'], np.mean), reduced)


    def test_dataset_groupby(self):
        group1 = {'Age':[10,16], 'Weight':[15,18], 'Height':[0.8,0.6]}
        group2 = {'Age':[12], 'Weight':[10], 'Height':[0.8]}
        grouped = HoloMap([('M', Dataset(group1, kdims=['Age'], vdims=self.vdims)),
                           ('F', Dataset(group2, kdims=['Age'], vdims=self.vdims))],
                          kdims=['Gender'])
        self.assertEqual(self.table.groupby(['Gender']), grouped)


    def test_dataset_add_dimensions_value_ht(self):
        table = self.dataset_ht.add_dimension('z', 1, 0)
        self.assertEqual(table.kdims[1], 'z')
        self.compare_arrays(table.dimension_values('z'), np.zeros(len(table)))

    def test_dataset_add_dimensions_values_ht(self):
        table = self.dataset_ht.add_dimension('z', 1, range(1,12))
        self.assertEqual(table.kdims[1], 'z')
        self.compare_arrays(table.dimension_values('z'), np.array(list(range(1,12))))

    # Indexing

    def test_dataset_index_row_gender_female(self):
        indexed = Dataset({'Gender':['F'], 'Age':[12],
                           'Weight':[10], 'Height':[0.8]},
                          kdims=self.kdims, vdims=self.vdims)
        row = self.table['F',:]
        self.assertEquals(row, indexed)

    def test_dataset_index_rows_gender_male(self):
        row = self.table['M',:]
        indexed = Dataset({'Gender':['M', 'M'], 'Age':[10, 16],
                           'Weight':[15,18], 'Height':[0.8,0.6]},
                          kdims=self.kdims, vdims=self.vdims)
        self.assertEquals(row, indexed)

    def test_dataset_index_row_age(self):
        indexed = Dataset({'Gender':['F'], 'Age':[12],
                           'Weight':[10], 'Height':[0.8]},
                          kdims=self.kdims, vdims=self.vdims)
        self.assertEquals(self.table[:, 12], indexed)

    def test_dataset_index_item_table(self):
        indexed = Dataset({'Gender':['F'], 'Age':[12],
                           'Weight':[10], 'Height':[0.8]},
                          kdims=self.kdims, vdims=self.vdims)
        self.assertEquals(self.table['F', 12], indexed)

    def test_dataset_index_value1(self):
        self.assertEquals(self.table['F', 12, 'Weight'], 10)

    def test_dataset_index_value2(self):
        self.assertEquals(self.table['F', 12, 'Height'], 0.8)

    def test_dataset_index_column_ht(self):
        self.compare_arrays(self.dataset_ht['y'], self.ys)

    def test_dataset_boolean_index(self):
        row = self.table[np.array([True, True, False])]
        indexed = Dataset({'Gender':['M', 'M'], 'Age':[10, 16],
                           'Weight':[15,18], 'Height':[0.8,0.6]},
                          kdims=self.kdims, vdims=self.vdims)
        self.assertEquals(row, indexed)

    def test_dataset_value_dim_index(self):
        row = self.table[:, :, 'Weight']
        indexed = Dataset({'Gender':['M', 'M', 'F'], 'Age':[10, 16, 12],
                           'Weight':[15,18, 10]},
                          kdims=self.kdims, vdims=self.vdims[:1])
        self.assertEquals(row, indexed)

    def test_dataset_value_dim_scalar_index(self):
        row = self.table['M', 10, 'Weight']
        self.assertEquals(row, 15)

    # Casting

    def test_dataset_array_ht(self):
        self.assertEqual(self.dataset_ht.array(),
                         np.column_stack([self.xs, self.ys]))
Beispiel #14
0
class GridDatasetTest(HomogeneousColumnTypes, ComparisonTestCase):
    """
    Test of the NdDataset interface (mostly for backwards compatibility)
    """
    def setUp(self):
        self.restore_datatype = Dataset.datatype
        Dataset.datatype = ['grid']
        self.data_instance_type = dict
        self.init_data()

    def init_data(self):
        self.xs = range(11)
        self.xs_2 = [el**2 for el in self.xs]

        self.y_ints = [i * 2 for i in range(11)]
        self.dataset_hm = Dataset((self.xs, self.y_ints),
                                  kdims=['x'],
                                  vdims=['y'])

    def test_dataset_array_init_hm(self):
        "Tests support for arrays (homogeneous)"
        exception = "None of the available storage backends "\
         "were able to support the supplied data format."
        with self.assertRaisesRegexp(Exception, exception):
            Dataset(np.column_stack([self.xs, self.xs_2]),
                    kdims=['x'],
                    vdims=['x2'])

    def test_dataset_dataframe_init_hm(self):
        "Tests support for homogeneous DataFrames"
        if pd is None:
            raise SkipTest("Pandas not available")
        exception = "None of the available storage backends "\
         "were able to support the supplied data format."
        with self.assertRaisesRegexp(Exception, exception):
            Dataset(pd.DataFrame({
                'x': self.xs,
                'x2': self.xs_2
            }),
                    kdims=['x'],
                    vdims=['x2'])

    def test_dataset_ndelement_init_hm(self):
        "Tests support for homogeneous NdElement (backwards compatibility)"
        exception = "None of the available storage backends "\
         "were able to support the supplied data format."
        with self.assertRaisesRegexp(Exception, exception):
            Dataset(
                NdElement(zip(self.xs, self.xs_2), kdims=['x'], vdims=['x2']))

    def test_dataset_2D_aggregate_partial_hm(self):
        array = np.random.rand(11, 11)
        dataset = Dataset({
            'x': self.xs,
            'y': self.y_ints,
            'z': array
        },
                          kdims=['x', 'y'],
                          vdims=['z'])
        self.assertEqual(
            dataset.aggregate(['x'], np.mean),
            Dataset({
                'x': self.xs,
                'z': np.mean(array, axis=1)
            },
                    kdims=['x'],
                    vdims=['z']))

    def test_dataset_2D_reduce_hm(self):
        array = np.random.rand(11, 11)
        dataset = Dataset({
            'x': self.xs,
            'y': self.y_ints,
            'z': array
        },
                          kdims=['x', 'y'],
                          vdims=['z'])
        self.assertEqual(np.array(dataset.reduce(['x', 'y'], np.mean)),
                         np.mean(array))

    def test_dataset_add_dimensions_value_hm(self):
        with self.assertRaisesRegexp(
                Exception,
                'Cannot add key dimension to a dense representation.'):
            self.dataset_hm.add_dimension('z', 1, 0)

    def test_dataset_add_dimensions_values_hm(self):
        table = self.dataset_hm.add_dimension('z', 1, range(1, 12), vdim=True)
        self.assertEqual(table.vdims[1], 'z')
        self.compare_arrays(table.dimension_values('z'),
                            np.array(list(range(1, 12))))

    def test_dataset_sort_vdim_hm(self):
        exception = ('Compressed format cannot be sorted, either instantiate '
                     'in the desired order or use the expanded format.')
        with self.assertRaisesRegexp(Exception, exception):
            self.dataset_hm.sort('y')

    def test_dataset_groupby(self):
        self.assertEqual(self.dataset_hm.groupby('x').keys(), list(self.xs))