Exemple #1
0
    def _create_entries(self, data, key, current_and_next):
        self.exp._barrier()

        expInfo = self.exp.meta_data
        group_name = expInfo.get(["group_name", key])
        data.data_info.set('group_name', group_name)
        try:
            group_name = group_name + '_' + data.name
        except AttributeError:
            pass

        self.exp._barrier()
        group = data.backing_file.create_group(group_name)
        self.exp._barrier()
        shape = data.get_shape()
        if current_and_next is 0:
            logging.warn('Creating the dataset without chunks')
            data.data = group.create_dataset("data", shape, data.dtype)
        else:
            chunking = Chunking(self.exp, current_and_next)
            chunks = chunking._calculate_chunking(shape, data.dtype)
            self.exp._barrier()
            logging.warn('Creating the dataset with chunks.')
            data.data = self.__create_dataset_nofill(group,
                                                     "data",
                                                     shape,
                                                     data.dtype,
                                                     chunks=chunks)
            logging.warn('Dataset created!')

        self.exp._barrier()

        return group_name, group
Exemple #2
0
    def _create_entries(self, data, key, current_and_next):
        self.exp._barrier()

        expInfo = self.exp.meta_data
        group_name = expInfo.get(["group_name", key])
        data.data_info.set('group_name', group_name)
        try:
            group_name = group_name + '_' + data.name
        except AttributeError:
            pass

        self.exp._barrier()
        group = data.backing_file.create_group(group_name)
        self.exp._barrier()
        shape = data.get_shape()
        if current_and_next is 0:
            logging.warn('Creating the dataset without chunks')
            data.data = group.create_dataset("data", shape, data.dtype)
        else:
            chunking = Chunking(self.exp, current_and_next)
            chunks = chunking._calculate_chunking(shape, data.dtype)
            self.exp._barrier()
            nBytes = np.prod(shape) * np.dtype(data.dtype).itemsize
            nProcs = self.exp.meta_data.get('nProcesses')
            # parallel hdf5 cannot handle data_size/nProcesses > 2GB
            self.__hdf5_file_write_failed_check(nBytes, nProcs)
            data.data = group.create_dataset("data",
                                             shape,
                                             data.dtype,
                                             chunks=chunks)

        self.exp._barrier()

        return group_name, group
Exemple #3
0
    def pre_process(self):
        # Create the hdf5 output file
        self.hdf5 = Hdf5Utils(self.exp)
        self.in_data = self.get_in_datasets()[0]
        self.data_name = self.in_data.get_name()
        current_pattern = self.__set_current_pattern()
        pattern_idx = {'current': current_pattern, 'next': []}

        self.filename = self.__get_file_name()
        self.group_name = self._get_group_name(self.data_name)
        logging.debug("creating the backing file %s", self.filename)
        self.backing_file = self.hdf5._open_backing_h5(self.filename, 'w')
        group = self.backing_file.create_group(self.group_name)
        group.attrs['NX_class'] = 'NXdata'
        group.attrs['signal'] = 'data'
        self.exp._barrier()
        shape = self.in_data.get_shape()
        chunking = Chunking(self.exp, pattern_idx)
        dtype = self.in_data.data.dtype
        chunks = chunking._calculate_chunking(shape, dtype)
        self.exp._barrier()
        self.out_data = self.hdf5.create_dataset_nofill(group,
                                                        "data",
                                                        shape,
                                                        dtype,
                                                        chunks=chunks)
    def _create_entries(self, data, key, current_and_next):
        msg = self.__class__.__name__ + '_create_entries'
        self.exp._barrier(msg=msg+'1')

        expInfo = self.exp.meta_data
        group_name = expInfo.get(["group_name", key])
        data.data_info.set('group_name', group_name)
        try:
            group_name = group_name + '_' + data.name
        except AttributeError:
            pass

        self.exp._barrier(msg=msg+'2')
        group = data.backing_file.require_group(group_name)
        self.exp._barrier(msg=msg+'3')
        shape = data.get_shape()

        if 'data' in group:
            data.data = group['data']
        elif current_and_next is 0:
            logging.warn('Creating the dataset without chunks')
            data.data = group.create_dataset("data", shape, data.dtype)
        else:
            chunk_max = self.__set_optimal_hdf5_chunk_cache_size(data, group)
            chunking = Chunking(self.exp, current_and_next)
            chunks = chunking._calculate_chunking(shape, data.dtype,
                                                  chunk_max=chunk_max)

            self.exp._barrier(msg=msg+'4')
            data.data = self.create_dataset_nofill(
                    group, "data", shape, data.dtype, chunks=chunks)

        self.exp._barrier(msg=msg+'5')
        return group_name, group
Exemple #5
0
    def _create_entries(self, data, key, current_and_next):
        msg = self.__class__.__name__ + '_create_entries'
        self.exp._barrier(msg=msg+'1')

        expInfo = self.exp.meta_data
        group_name = expInfo.get(["group_name", key])
        data.data_info.set('group_name', group_name)
        try:
            group_name = group_name + '_' + data.name
        except AttributeError:
            pass

        self.exp._barrier(msg=msg+'2')
        group = data.backing_file.require_group(group_name)
        self.exp._barrier(msg=msg+'3')
        shape = data.get_shape()

        if 'data' in group:
            data.data = group['data']
        elif current_and_next is 0:
            logging.warn('Creating the dataset without chunks')
            data.data = group.create_dataset("data", shape, data.dtype)
        else:
            chunk_max = self.__set_optimal_hdf5_chunk_cache_size(data, group)
            chunking = Chunking(self.exp, current_and_next)
            chunks = chunking._calculate_chunking(shape, data.dtype,
                                                  chunk_max=chunk_max)

            self.exp._barrier(msg=msg+'4')
            data.data = self.create_dataset_nofill(
                    group, "data", shape, data.dtype, chunks=chunks)

        self.exp._barrier(msg=msg+'5')

        return group_name, group
Exemple #6
0
    def __create_entries(self, data, key, current_and_next):
        expInfo = self.exp.meta_data
        group_name = expInfo.get_meta_data(["group_name", key])
        data.data_info.set_meta_data('group_name', group_name)
        try:
            group_name = group_name + '_' + data.name
        except AttributeError:
            pass

        group = data.backing_file.create_group(group_name)
        group.attrs[NX_CLASS] = 'NXdata'
        group.attrs['signal'] = 'data'

        logging.info("create_entries: 1")
        self.exp._barrier()

        shape = data.get_shape()
        if current_and_next is 0:
            data.data = group.create_dataset("data", shape, data.dtype)
        else:
            logging.info("create_entries: 2")
            self.exp._barrier()

            chunking = Chunking(self.exp, current_and_next)
            chunks = chunking._calculate_chunking(shape, data.dtype)
            logging.info("create_entries: 3")
            self.exp._barrier()
            data.data = group.create_dataset("data", shape, data.dtype,
                                             chunks=chunks)
            logging.info("create_entries: 4")
            self.exp._barrier()

        return group_name, group
Exemple #7
0
    def __get_backing_file(self, data_obj):
        fname = '%s/%s.h5' % \
            (self.exp.get('out_path'), self.parameters['file_name'])

        if os.path.exists(fname):
            return h5py.File(fname, 'r')

        self.hdf5 = Hdf5Utils(self.exp)

        size = tuple(self.parameters['size'])

        patterns = data_obj.get_data_patterns()
        p_name = patterns[self.parameters['pattern']] if \
            self.parameters['pattern'] is not None else patterns.keys()[0]
        p_name = patterns.keys()[0]
        p_dict = patterns[p_name]
        p_dict['max_frames_transfer'] = 1
        nnext = {p_name: p_dict}

        pattern_idx = {'current': nnext, 'next': nnext}
        chunking = Chunking(self.exp, pattern_idx)
        chunks = chunking._calculate_chunking(size, np.int16)

        h5file = self.hdf5._open_backing_h5(fname, 'w')
        dset = h5file.create_dataset('test', size, chunks=chunks)

        self.exp._barrier()

        slice_dirs = nnext.values()[0]['slice_dims']
        nDims = len(dset.shape)
        total_frames = np.prod([dset.shape[i] for i in slice_dirs])
        sub_size = \
            [1 if i in slice_dirs else dset.shape[i] for i in range(nDims)]

        # need an mpi barrier after creating the file before populating it
        idx = 0
        sl, total_frames = \
            self.__get_start_slice_list(slice_dirs, dset.shape, total_frames)
        # calculate the first slice
        for i in range(total_frames):
            low, high = self.parameters['range']
            dset[tuple(sl)] = np.random.randint(
                low, high=high, size=sub_size, dtype=self.parameters['dtype'])
            if sl[slice_dirs[idx]].stop == dset.shape[slice_dirs[idx]]:
                idx += 1
                if idx == len(slice_dirs):
                    break
            tmp = sl[slice_dirs[idx]]
            sl[slice_dirs[idx]] = slice(tmp.start+1, tmp.stop+1)

        self.exp._barrier()

#        try:
#            h5file.close()
#        except:
#            logging.debug('There was a problem trying to close the file in random_hdf5_loader')

        return self.hdf5._open_backing_h5(fname, 'r')
Exemple #8
0
    def _create_entries(self, data, key, current_and_next):
        self.exp._barrier()

        expInfo = self.exp.meta_data
        group_name = expInfo.get(["group_name", key])
        data.data_info.set('group_name', group_name)
        try:
            group_name = group_name + '_' + data.name
        except AttributeError:
            pass

        self.exp._barrier()
        group = data.backing_file.create_group(group_name)
        self.exp._barrier()
        shape = data.get_shape()
        if current_and_next is 0:
            logging.warn('Creating the dataset without chunks')
            data.data = group.create_dataset("data", shape, data.dtype)
        else:

            # change cache properties
            propfaid = group.file.id.get_access_plist()
            settings = list(propfaid.get_cache())
            settings[2] *= 1
            propfaid.set_cache(*settings)
            # calculate total number of chunks and set nSlots=nChunks

            chunking = Chunking(self.exp, current_and_next)
            chunks = chunking._calculate_chunking(shape,
                                                  data.dtype,
                                                  chunk_max=settings[2])

            self.exp._barrier()
            data.data = self.__create_dataset_nofill(group,
                                                     "data",
                                                     shape,
                                                     data.dtype,
                                                     chunks=chunks)

        self.exp._barrier()

        return group_name, group
Exemple #9
0
 def create_chunking_instance(self, current_list, nnext_list, nProcs):
     current = self.create_pattern('a', current_list)
     nnext = self.create_pattern('b', nnext_list)
     options = tu.set_experiment('tomoRaw')
     options['processes'] = range(nProcs)
     # set a dummy process list
     options['process_file'] = \
         tu.get_test_process_path('basic_tomo_process.nxs')
     exp = Experiment(options)
     test_dict = {'current': current, 'next': nnext}
     chunking = Chunking(exp, test_dict)
     return chunking
Exemple #10
0
    def create_entries(self, data, key, current_and_next):
        expInfo = self.exp.meta_data
        group_name = expInfo.get_meta_data(["group_name", key])
        data.data_info.set_meta_data('group_name', group_name)
        try:
            group_name = group_name + '_' + data.name
        except AttributeError:
            pass

        group = data.backing_file.create_group(group_name)
        group.attrs[NX_CLASS] = 'NXdata'
        group.attrs['signal'] = 'data'

        logging.info("create_entries: 1")
        self.exp.barrier()

        if data.get_variable_flag() is True:
            dt = h5py.special_dtype(vlen=data.dtype)
            data.data = group.create_dataset('data', data.get_shape()[:-1], dt)
        else:
            shape = data.get_shape()
            if current_and_next is 0:
                data.data = group.create_dataset("data", shape, data.dtype)
            else:
                logging.info("create_entries: 2")
                self.exp.barrier()

                chunking = Chunking(self.exp, current_and_next)
                chunks = chunking.calculate_chunking(shape, data.dtype)
                logging.info("create_entries: 3")
                self.exp.barrier()
                # print "chunks = ", chunks
                data.data = group.create_dataset("data", shape, data.dtype,
                                                 chunks=chunks)
                logging.info("create_entries: 4")
                self.exp.barrier()

        return group_name, group
Exemple #11
0
    def _create_dosna_dataset(self, object_id, data, key, current_and_next):
        group_name = self.exp.meta_data.get(["group_name", key])
        data.data_info.set('group_name', group_name)
        try:
            group_name = group_name + '_' + data.name
        except AttributeError:
            pass

        shape = data.get_shape()
        dataset_name = "{}_{}".format(group_name,
                                      self._extract_digits(object_id))

        if current_and_next is 0:
            data.data = self.dosna_connection.create_dataset(
                dataset_name, shape, data.dtype)
        else:
            chunking = Chunking(self.exp, current_and_next)
            chunks = chunking._calculate_chunking(shape, data.dtype)
            data.data = self.dosna_connection.create_dataset(dataset_name,
                                                             shape,
                                                             data.dtype,
                                                             chunk_size=chunks)
        self.dataset_cache.append(data.data)
    def _create_dosna_dataset(self, object_id, data, key, current_and_next):
        group_name = self.exp.meta_data.get(["group_name", key])
        data.data_info.set('group_name', group_name)
        try:
            group_name = group_name + '_' + data.name
        except AttributeError:
            pass

        shape = data.get_shape()
        dataset_name = "{}_{}".format(group_name,
                                      self._extract_digits(object_id))

        if current_and_next is 0:
            data.data = self.dosna_connection.create_dataset(dataset_name,
                                                             shape,
                                                             data.dtype)
        else:
            chunking = Chunking(self.exp, current_and_next)
            chunks = chunking._calculate_chunking(shape, data.dtype)
            data.data = self.dosna_connection.create_dataset(dataset_name,
                                                             shape,
                                                             data.dtype,
                                                             chunk_size=chunks)
        self.dataset_cache.append(data.data)