def test_merge_with_not_resizable(self):
     dset1 = ds.NumpyDataset(self.name, self.packet_shape)
     dset2 = ds.NumpyDataset(self.name, self.packet_shape)
     dset1.resizable = False
     packet = self.items['raw'][0]
     dset2.add_data_item(packet, self.mock_targets[0], self.mock_meta[0])
     self.assertRaises(Exception, dset1.merge_with, dset2)
 def test_merge_with_incompatible_dataset(self):
     bad_packet_shape = (self.n_f + 1, self.f_h, self.f_h)
     dset1 = ds.NumpyDataset(self.name, bad_packet_shape)
     dset2 = ds.NumpyDataset(self.name, self.packet_shape)
     packet = self.items['raw'][0]
     dset2.add_data_item(packet, self.mock_targets[0], self.mock_meta[0])
     self.assertRaises(ValueError, dset1.merge_with, dset2)
 def test_is_compatible_with_bad_item_types(self):
     bad_item_types = self.item_types.copy()
     bad_item_types['raw'] = not bad_item_types['raw']
     dset1 = ds.NumpyDataset(self.name,
                             self.packet_shape,
                             item_types=bad_item_types)
     dset2 = ds.NumpyDataset(self.name,
                             self.packet_shape,
                             item_types=self.item_types)
     self.assertFalse(dset1.is_compatible_with(dset2))
 def test_merge_with_new_metafields(self):
     dset1 = ds.NumpyDataset(self.name, self.packet_shape)
     dset2 = ds.NumpyDataset(self.name, self.packet_shape)
     packet = self.items['raw'][0]
     meta2 = self.mock_meta[0].copy()
     meta2['test'] = 'value'
     exp_metafields = self.metafields.union(meta2.keys())
     dset1.add_data_item(packet, self.mock_targets[0], self.mock_meta[0])
     dset2.add_data_item(packet, self.mock_targets[0], meta2)
     dset1.merge_with(dset2)
     self.assertSetEqual(dset1.metadata_fields, exp_metafields)
Example #5
0
    def load_dataset(self, name, item_types=None):
        """
            Load a dataset from secondary storage.

            This function assumes that the relevant dataset files are located
            in the same directory (loaddir).

            Parameters
            ----------
            :param name:        the dataset name.
            :type name:         str
            :param item_types:  (optional) types of dataset items to load.
            :type item_types:   typing.Mapping[str, bool]
        """
        # TODO: Think of a way to load dataset with items that does not depend
        # on knowledge of NumpyDataset internals
        # currently excluded from unit tests for that very reason
        self._check_before_read()
        config = self.load_dataset_config(name)
        itypes = item_types or config['item_types']
        dataset = ds.NumpyDataset(name,
                                  config['packet_shape'],
                                  item_types=itypes)
        data = self._data_handler.load_data(name, dataset.item_types)
        targets = self._target_handler.load_targets(name)
        metadata = self._meta_handler.load_metadata(name)
        dataset._data.extend(data)
        dataset._targ.extend({'classification': targets})
        dataset._meta.extend(metadata)
        dataset._num_data = config['num_data']
        return dataset
def get_output_dataset_and_handler(output_packet_shape, **dataset_args):
    dataset = ds.NumpyDataset(dataset_args['name'],
                              output_packet_shape,
                              item_types=dataset_args['item_types'],
                              dtype=dataset_args['dtype'])
    output_handler = fs_io.DatasetFsPersistencyHandler(
        save_dir=dataset_args['outdir'])
    return dataset, output_handler
    def test_add_item_wrong_packet_shape(self):
        dset = ds.NumpyDataset(self.name,
                               self.packet_shape,
                               item_types=self.item_types)
        packet = np.ones((1, *self.packet_shape))
        targ, meta = self.mock_targets[0], self.mock_meta[0]

        self.assertRaises(ValueError, dset.add_data_item, packet, targ, meta)
    def test_get_targets(self):
        packet, target = self.items['raw'][0], self.mock_targets[0]
        meta = self.mock_meta[0]
        exp_targets = [self.mock_targets[0]]

        dset = ds.NumpyDataset(self.name, self.packet_shape)
        dset.add_data_item(packet, target, meta)
        targets = dset.get_targets()
        self._assertDatasetTargets(targets, exp_targets)
    def test_add_item_non_resizable_dataset(self):
        dset = ds.NumpyDataset(self.name,
                               self.packet_shape,
                               item_types=self.item_types)
        dset.resizable = False
        packet = self.items['raw'][0]
        targ, meta = self.mock_targets[0], self.mock_meta[0]

        self.assertRaises(Exception, dset.add_data_item, packet, targ, meta)
    def test_add_item(self):
        dset = ds.NumpyDataset(self.name,
                               self.packet_shape,
                               item_types=self.item_types)
        packet = self.items['raw'][0]
        exp_data = {k: [v[0]] for k, v in self.items.items()}
        num_data = dset.num_data

        dset.add_data_item(packet, self.mock_targets[0], self.mock_meta[0])
        self.assertEqual(dset.num_data, num_data + 1)
 def test_merge_with_only_subset_of_items(self):
     dset1 = ds.NumpyDataset(self.name, self.packet_shape)
     dset2 = ds.NumpyDataset(self.name, self.packet_shape)
     packet = self.items['raw'][0]
     meta2 = self.mock_meta[0].copy()
     meta2['test'] = 'value'
     meta3 = self.mock_meta[0].copy()
     meta3['test2'] = 'value'
     exp_metafields = self.metafields.union(meta2.keys())
     dset1.add_data_item(packet, self.mock_targets[0], self.mock_meta[0])
     dset2.add_data_item(packet, self.mock_targets[0], meta2)
     dset2.add_data_item(packet, self.mock_targets[0], meta2)
     dset2.add_data_item(packet, self.mock_targets[0], meta3)
     # add items 0 and 1 from dset2 to dset1
     dset1.merge_with(dset2, slice(2))
     # metadata fields from item 2 of dset2 should not be added
     exp_metafields = self.metafields.union(meta2.keys())
     self.assertEqual(dset1.num_data, 3)
     self.assertSetEqual(dset1.metadata_fields, exp_metafields)
 def test_implicit_dtype_conversion_when_adding_items(self):
     dset = ds.NumpyDataset(self.name,
                            self.packet_shape,
                            dtype='float16',
                            item_types=self.item_types)
     dset.add_data_item(self.items['raw'][0], self.mock_targets[0],
                        self.mock_meta[0])
     items_dict = dset.get_data_as_dict()
     for itype, is_present in dset.item_types.items():
         if is_present:
             self.assertEqual(items_dict[itype][0].dtype.name, 'float16')
    def test_get_metadata(self):
        packet, target = self.items['raw'][0], self.mock_targets[0]
        meta = self.mock_meta[0]
        exp_metadata = [self.mock_meta[0]]

        dset = ds.NumpyDataset(self.name, self.packet_shape)
        dset.add_data_item(packet, target, meta)
        metadata = dset.get_metadata()
        msg = "Metadata not equal: expected {}:, actual {}:".format(
            exp_metadata, meta)
        self.assertListEqual(metadata, exp_metadata, msg)
    def test_get_data_as_dict(self):
        item_types = {'raw': True, 'yx': True, 'gtux': False, 'gtuy': False}
        exp_items = {k: [self.items[k][0]] for k, v in item_types.items() if v}
        packet, target = self.items['raw'][0], self.mock_targets[0]
        meta = self.mock_meta[0]

        dset = ds.NumpyDataset(self.name,
                               self.packet_shape,
                               item_types=item_types)
        dset.add_data_item(packet, target, meta)
        items = dset.get_data_as_dict()
        self._assertDatasetData(items, exp_items, exp_items.keys())
 def test_merge_with(self):
     dset1 = ds.NumpyDataset(self.name,
                             self.packet_shape,
                             item_types=self.item_types)
     dset2 = ds.NumpyDataset(self.name,
                             self.packet_shape,
                             item_types=self.item_types)
     packet = self.items['raw'][0]
     dset1.add_data_item(packet, self.mock_targets[0], self.mock_meta[0])
     dset2.add_data_item(packet, self.mock_targets[0], self.mock_meta[0])
     num_data = dset1.num_data
     exp_data = {
         'raw': [packet, packet],
         'yx': [self.items['yx'][0], self.items['yx'][0]],
         'gtux': [self.items['gtux'][0], self.items['gtux'][0]],
         'gtuy': [self.items['gtuy'][0], self.items['gtuy'][0]]
     }
     exp_targets = [self.mock_targets[0], self.mock_targets[0]]
     exp_metadata = [self.mock_meta[0], self.mock_meta[0]]
     exp_metafields = self.metafields
     dset1.merge_with(dset2)
     self._assertDatasetItems(dset1, exp_data, exp_targets, exp_metadata,
                              exp_metafields, num_data + dset2.num_data,
                              cons.ALL_ITEM_TYPES)
    def test_get_data_as_arraylike(self):
        keys = ('raw', 'yx')
        item_types = {'raw': True, 'yx': True, 'gtux': False, 'gtuy': False}
        exp_items = ([self.items['raw'][0]], [self.items['yx'][0]])
        packet, target = self.items['raw'][0], self.mock_targets[0]
        meta = self.mock_meta[0]

        dset = ds.NumpyDataset(self.name,
                               self.packet_shape,
                               item_types=item_types)
        dset.add_data_item(packet, target, meta)
        items = dset.get_data_as_arraylike()
        for idx in range(len(keys)):
            err_msg = "items of type '{}' are not equal".format(keys[idx])
            nptest.assert_array_equal(items[idx], exp_items[idx], err_msg)
def main(**settings):
    srcdir, outdir = settings['srcdir'], settings['outdir']
    name, outname = args['name'], args['outname']
    if outname is None:
        outname = name
    if outdir is None:
        outdir = srcdir
    io_handler = io_utils.DatasetFsPersistencyHandler(load_dir=srcdir,
                                                      save_dir=outdir)
    items_slice = args['items_slice']
    old_dataset = io_handler.load_dataset(name)
    new_dataset = ds.NumpyDataset(outname, old_dataset.accepted_packet_shape,
                                  item_types=old_dataset.item_types,
                                  dtype=old_dataset.dtype)
    new_dataset.merge_with(old_dataset, items_slice)
    io_handler.save_dataset(new_dataset)
    def test_add_metafield(self):
        dset = ds.NumpyDataset(self.name, self.packet_shape)
        packet = self.items['raw'][0]
        exp_meta = self.mock_meta.copy()
        exp_meta[0] = exp_meta[0].copy()
        exp_meta[1] = exp_meta[1].copy()
        dset.add_data_item(packet, self.mock_targets[0], exp_meta[0])
        dset.add_data_item(packet, self.mock_targets[1], exp_meta[1])
        exp_meta = exp_meta.copy()
        exp_meta[0] = exp_meta[0].copy()
        exp_meta[1] = exp_meta[1].copy()
        exp_meta[0]['random_metafield'] = 'default'
        exp_meta[1]['random_metafield'] = 'default'
        exp_metafields = self.metafields.union(['random_metafield'])

        dset.add_metafield('random_metafield', default_value='default')
        self.assertListEqual(dset.get_metadata(), exp_meta)
        self.assertSetEqual(dset.metadata_fields, exp_metafields)
Example #19
0
    def setUpClass(cls, num_items=2, name='test', item_types=None):
        items_mixin = DatasetItemsMixin()
        items_mixin.setUpClass(num_items=num_items)
        targets_mixin = DatasetTargetsMixin()
        targets_mixin.setUpClass(num_items=num_items)
        meta_mixin = DatasetMetadataMixin()
        meta_mixin.setUpClass(num_items=num_items)
        packets = items_mixin.items['raw']
        targets = targets_mixin.mock_targets
        metadata = meta_mixin.mock_meta

        if item_types is None:
            item_types = items_mixin.item_types
        dset = ds.NumpyDataset(name, items_mixin.packet_shape,
                               item_types=item_types)
        for idx in range(num_items):
            dset.add_data_item(packets[idx], targets[idx], metadata[idx])
        cls.dset = dset
Example #20
0
    def load_empty_dataset(self, name, item_types=None):
        """
            Create a dataset from configuration stored in secondary storage
            without loading any of its actual contents (data, targets,
            metadata).

            Parameters
            ----------
            :param name:        the dataset name/config filename prefix.
            :type name:         str
            :param item_types:  (optional) types of dataset items to load.
            :type item_types:   typing.Mapping[str, bool]
        """
        attrs = self.load_dataset_config(name)
        itypes = item_types or attrs['item_types']
        dataset = ds.NumpyDataset(name,
                                  attrs['packet_shape'],
                                  item_types=itypes,
                                  dtype=attrs['dtype'])
        return dataset
 def test_is_compatible_with(self):
     dset1 = ds.NumpyDataset(self.name, self.packet_shape)
     dset2 = ds.NumpyDataset(self.name, self.packet_shape)
     self.assertTrue(dset1.is_compatible_with(dset2))
    def create_dataset(self, name, num_data, item_types, dtype='uint8'):
        """
            Generate and return a numpy dataset containing simulated showers
            and corresponding targets for them, for use in training neural
            networks for classifiction tasks.

            The data returned is divided into equal-sized quarters as follows:

            1/4: shower data (possibly with malfunctioned EC units)
            2/4: shower data (without malfunctioned EC units)
            3/4: noise data (possibly with malfunctioned EC units)
            4/4: noise data (without malfunctioned EC units)

            Whether there are any data items with malfunctioning ECs depends on
            the property bad_ECs_range.

            Parameters
            ----------
            num_data :          int
                                The number of data items to create in total.
            item_types :        dict of str to bool
                                The requested item types, where the keys are
                                from the utils.dataset_utils.item_types
                                module-level constant.
            Returns
            -------
            dataset :   utils.dataset_utils.NumpyDataset
                        A numpy dataset with capacity and num_items both equal
                        to num_data.
        """
        # create output data holders as needed
        template_shape = self._bg_template.packet_template.packet_shape
        dataset = ds.NumpyDataset(name,
                                  template_shape,
                                  item_types=item_types,
                                  dtype=dtype)

        # output and target generation
        ec_gen = self._bg_template.get_new_bad_ECs
        num_showers = int(num_data / 2)
        shower_creator = self.create_shower_packet
        noise_creator = self.create_noise_packet
        shower_target = cons.CLASSIFICATION_TARGETS['shower']
        noise_target = cons.CLASSIFICATION_TARGETS['noise']
        iteration_handlers = ({
            'target':
            shower_target,
            'start':
            0,
            'stop':
            int(num_showers / 2),
            'packet_handler':
            lambda angle: shower_creator(angle, ec_gen())
        }, {
            'target':
            shower_target,
            'start':
            int(num_showers / 2),
            'stop':
            num_showers,
            'packet_handler':
            lambda angle: shower_creator(angle)
        }, {
            'target':
            noise_target,
            'start':
            num_showers,
            'stop':
            num_data - int(num_showers / 2),
            'packet_handler':
            lambda angle: noise_creator(ec_gen())
        }, {
            'target': noise_target,
            'start': num_data - int(num_showers / 2),
            'stop': num_data,
            'packet_handler': lambda angle: noise_creator()
        })
        # main loop
        for handler in iteration_handlers:
            start, stop = handler['start'], handler['stop']
            packet_handler = handler['packet_handler']
            target = handler['target']
            # idx serves as both an index into targets and data, as well as
            # shower angle in xy projection
            for idx in range(start, stop):
                packet, meta = packet_handler(idx)
                dataset.add_data_item(packet, target, meta)
        return dataset
 def test_is_compatible_with_bad_packet_shape(self):
     bad_packet_shape = (self.n_f + 1, self.f_h, self.f_h)
     dset1 = ds.NumpyDataset(self.name, bad_packet_shape)
     dset2 = ds.NumpyDataset(self.name, self.packet_shape)
     self.assertFalse(dset1.is_compatible_with(dset2))