예제 #1
0
def expand_hdf5(h5f: File, out_dir: Path, seeds: Optional[Set[str]] = None,
                jobs: int = 1, progress: bool = False):
    """
    Expand an HDF5 containing code coverage.

    Args:
        h5f: h5py file object.
        out_dir: Directory to extract seed coverage to.
        seeds: An optional seed set. If provided, only these seeds will be
               extracted.
        jobs: Number of parallel jobs to run.
        progress: Set to `True` for progress bar.

    Returns:
        Yields each extracted seed.
    """
    h5_filename = h5f.filename

    with mpp.Pool(processes=jobs) as pool:
        get_cov = partial(_get_seed_cov, out_dir=out_dir, seeds=seeds)
        h5_iter = zip(repeat(h5_filename), h5f.keys())
        num_seeds = len(seeds) if seeds else len(list(h5f.keys()))
        print('%d seeds to extract' % num_seeds)
        iter_func = partial(tqdm, desc='Expanding %s' % h5_filename,
                            total=num_seeds, unit='seeds') if progress else id
        for seed in iter_func(pool.istarmap(get_cov, h5_iter)):
            if seed:
                yield seed
예제 #2
0
class CountsHdf5Reader:
    def __init__(self, filename):
        self.data = File(filename, 'r')

    def keys(self):
        '''
        # >>> path = 'fake-files/input/linnarsson/linnarsson.molecules.hdf5'
        # >>> reader = CountsHdf5Reader(path)
        # >>> len(reader.keys())
        # 39
        # >>> sorted(list(reader.keys()))[:2]
        # ['Acta2_Hybridization5', 'Aldoc_Hybridization1']

        '''
        return self.data.keys()

    def __getitem__(self, key):
        '''
        # >>> path = 'fake-files/input/linnarsson/linnarsson.molecules.hdf5'
        # >>> reader = CountsHdf5Reader(path)
        # >>> pairs = list(reader['Acta2_Hybridization5'])
        # >>> len(pairs)
        # 13052
        # >>> pairs[0]
        # [18215.0, 20052.0]

        '''
        return (list(pair) for pair in self.data[key])
예제 #3
0
def write_metadata(
    infile: h5py.File, outfile: h5py.File, links_list: List[str], mask: SWIFTMask,
):
    """
    Copy over all the metadata from snapshot to output file

    Parameters
    ----------
    infile : h5py.File
        hdf5 file handle for input snapshot
    outfile : h5py.File
        hdf5 file handle for output snapshot
    links_list : list of str
        names of links found in the snapshot
    mask : SWIFTMask
        the mask being used to define subset
    """

    update_metadata_counts(infile, outfile, mask)

    skip_list = links_list.copy()
    skip_list += ["PartType", "Cells"]
    for field in infile.keys():
        if not any([substr for substr in skip_list if substr in field]):
            infile.copy(field, outfile)
예제 #4
0
파일: __init__.py 프로젝트: vibbits/ilastik
 def populateFrom(self, importedFile: h5py.File, topGroupKeys: List[str]):
     # We copy ilastikVersion as well as workflowName because that can influence the way in which the deserializers
     # interpret the imported data
     for key in topGroupKeys + self.BASE_KEYS:
         if key in importedFile.keys():
             self.clearValue(key)
             importedFile.copy(key, self.file["/"])
예제 #5
0
class TestMapping(BaseTest):
    """
        Test if the registration of Group as a
        Mapping behaves as expected
    """
    def setUp(self):
        data = ('a', 'b')
        self.f = File('foo.hdf5', 'w')
        self.grp = self.f.create_group('bar')
        self.attr = self.f.attrs.create('x', data)

    def TearDown(self):
        if self.f:
            self.close()

    def test_keys(self):
        key_1 = self.f.keys()
        self.assertIsInstance(repr(key_1), str)
        key_2 = self.grp.keys()
        self.assertIsInstance(repr(key_2), str)

    def test_values(self):
        value_1 = self.f.values()
        self.assertIsInstance(repr(value_1), str)
        value_2 = self.grp.values()
        self.assertIsInstance(repr(value_2), str)

    def test_items(self):
        item_1 = self.f.items()
        self.assertIsInstance(repr(item_1), str)
        item_2 = self.grp.items()
        self.assertIsInstance(repr(item_1), str)
예제 #6
0
def convert_raw_to_img(src_filename, dst_filename, verbose=False, numba=False):
    """
    converts .raw. data files to .img. data files preserving all original fields. The function will create a new file with the same name but different suffix
    """
    if verbose:
        print(f'source filename: {src_filename}')
        print(f'destination filename: {dst_filename}')
    from h5py import File
    from lcp_video.analysis import mono12p_to_image, mono12p_to_image_numba
    if numba:
        mono12p_to_image = mono12p_to_image_numba
    src = File(src_filename, 'r')
    width = src['image width'][()]
    height = src['image height'][()]
    length = src['images'].shape[0]

    with File(dst_filename, 'w') as dst:
        for key in src.keys():
            if "images" not in key:
                dst.create_dataset(key, data=src[key][()])
            else:
                dst.create_dataset('images', (length, height, width),
                                   dtype='int16',
                                   chunks=(1, height, width))
        for i in range(length):
            raw = src['images'][i]
            dst['images'][i] = mono12p_to_image(raw, height, width).reshape(
                (height, width))
예제 #7
0
    def __init__(self, annot_file, split, tr_percent=0.7):
        print('Initializing data loader {}  from {}'.format(split, annot_file))
        f = File(annot_file, 'r')
        keys = [key for key in f.keys()]
        annot = {}
        for key in keys:
            annot[key] = np.asarray(f[key]).copy()
        f.close()
        # Keys: input, output, strike_low, strike_high, start_time, end_time, grid_size_str, grid_size_mat

        full_data_len = annot['input'].shape[0]
        ids = np.arange(full_data_len)
        max_id_te = int((1 - tr_percent) * full_data_len)
        te_ids = ids[ids < max_id_te]
        tr_ids = ids[ids >= max_id_te]

        self.tr_mean_inp = annot['input'][tr_ids].mean(axis=0)
        self.tr_std_inp = annot['input'][tr_ids].std(axis=0)
        annot['input'] = (annot['input'] -
                          self.tr_mean_inp) / (self.tr_std_inp + 1e-8)
        self.tr_mean_opt = annot['output'][tr_ids].mean(axis=0)
        self.tr_std_opt = annot['output'][tr_ids].std(axis=0)
        annot['output'] = (annot['output'] -
                           self.tr_mean_opt) / (self.tr_std_opt + 1e-8)

        for key in keys:
            if not annot[key].shape == ():
                annot[key] = annot[key][tr_ids if split == 'train' else te_ids]

        self.annot = annot
        self.nSamples = annot['input'].shape[0]
        print('Loaded {} {} samples'.format(split, self.nSamples))
예제 #8
0
def get_random_noise_mc_info_extr(input_file):
    """
    Wrapper function that includes the actual mc_info_extr
    for random noise simulations. There are no n_gen like in the neutrino case.

    Parameters
    ----------
    input_file : km3net data file
            Can be online or offline format.

    Returns
    -------
    mc_info_extr : function
            The actual mc_info_extr function that holds the extractions.

    """

    # check if std reco is present
    f = File(input_file, "r")
    has_std_reco = "reco" in f.keys()

    if has_std_reco:
        #also check, which rec types are present
        rec_types, rec_parameters_names = get_rec_types_in_file(f)

    def mc_info_extr(blob):
        """
        Processes a blob and creates the y with mc_info and, if existing, std reco.

        For this random noise case it is only general event info, like the id.

        Parameters
        ----------
        blob : dict
                The blob from the pipeline.

        Returns
        -------
        track : dict
                Containing all the specified info the y should have.

        """
        event_info = blob["EventInfo"]

        track = {
            "event_id": event_info.event_id[0],
            "run_id": event_info.run_id[0],
            "particle_type": 0,
        }

        # get all the std reco info
        if has_std_reco:

            std_reco_info = get_std_reco(blob, rec_types, rec_parameters_names)

            track.update(std_reco_info)

        return track

    return mc_info_extr
def create_submap_dataset(h5file: h5py.File):
    dataset = {}
    for submap_name in h5file.keys():
        submap_dict = {}
        submap_dict['num_segments'] = np.array(h5file[submap_name +
                                                      '/num_segments'])[0]
        segments = []
        center_submap_xy = torch.Tensor([0., 0.])
        num_points = 0
        for i in range(submap_dict['num_segments']):
            segment_name = submap_name + '/segment_' + str(i)
            segments.append(np.array(h5file[segment_name]))
            center_submap_xy += segments[-1].sum(axis=0)[:2]
            num_points += segments[-1].shape[0]
        center_submap_xy /= num_points
        # segments = [np.array(segment - np.hstack([center_submap_xy, 0.])) for segment in segments]
        segment_centers = np.array([
            segment.mean(axis=0) - np.hstack([center_submap_xy, 0.])
            for segment in segments
        ])

        submap_dict['segment_centers'] = torch.Tensor(segment_centers)
        submap_dict['segment_scales'] = torch.Tensor(
            np.array([np.sqrt(segment.var(axis=0)) for segment in segments]))
        submap_dict['segments'] = [
            torch.Tensor((segment - segment.mean(axis=0)) /
                         np.sqrt(segment.var(axis=0))) for segment in segments
        ]

        dataset[submap_name] = submap_dict

    return dataset
    def embed_data(self, h5_file: h5py.File, embedder: EmbeddingModel, save_states: bool = False):
        """Embeds cylinder flow data into a 1D vector representation for the transformer.

        TODO: Remove redundant arguments

        Args:
            h5_file (h5py.File): HDF5 file object of Lorenz raw data
            embedder (EmbeddingModel): Embedding neural network
            save_states (bool, optional): To save the physical states or not, should be True for validation and testing. Defaults to False.
        """
        # Iterate through stored time-series
        samples = 0
        embedder.eval()
        for key in h5_file.keys():
            ux = torch.Tensor(h5_file[key + '/ux'])
            uy = torch.Tensor(h5_file[key + '/uy'])
            p = torch.Tensor(h5_file[key + '/p'])
            data_series = torch.stack([ux, uy, p], dim=1).to(embedder.devices[0])
            visc = (2.0 / float(key))*torch.ones(ux.size(0), 1).to(embedder.devices[0])
            with torch.no_grad():
                embedded_series = embedder.embed(data_series, visc).cpu()

            # Stride over time-series
            for i in range(0, data_series.size(0) - self.block_size + 1, self.stride):  # Truncate in block of block_size
                data_series0 = embedded_series[i: i + self.block_size]  # .repeat(1, 4)
                self.examples.append(data_series0)
                self.position_ids.append(torch.arange(0, self.block_size, dtype=torch.long)+i)
                if save_states:
                    self.states.append(data_series[i: i + self.block_size].cpu())
            samples = samples + 1
            if (self.ndata > 0 and samples >= self.ndata):  # If we have enough time-series samples break loop
                break
예제 #11
0
파일: sassenatasks.py 프로젝트: camm/code
def orderByQmodulus(filename,outfile=None):
  """ Sassena does not enforce any ordering of the structure factors.
  Here we order by increasing value of modulus of Q-vectors. """
  from h5py import File
  import numpy
  f=File(filename,'r')
  overwrite=False
  if not outfile:
    outfile=tempfile() # temporaty output file
    overwrite=True
  g=File(outfile,'w')
  ds_q = numpy.array(f["qvectors"]) # shape==(nvectors,3)
  moduli=numpy.square(ds_q).sum(axis=1) # moduli-squared of the Q-vectors
  rank=numpy.argsort(moduli) # rank from smallest to greatest
  for dset in ('qvectors', 'fqt', 'fq', 'fq0', 'fq2'):
    if dset in f.keys(): 
      x=numpy.array(f[dset])
      if not outfile:
        del f[dset]
        f[dset]=x[rank]
      else:
        g[dset]=x[rank]
  for key,val in f.attrs.items(): g.attrs[key]=val
  g.close()
  f.close()
  if overwrite:
    os.system('/bin/mv %s %s'%(outfile,filename))
  return None
예제 #12
0
class H5Writer:
    L = TypeVar("L", List[ndarray], Dict[str, ndarray])

    #__H5PY: h5py.File = h5py.File(fileobj=None, mode=None)
    def __init__(self, filename: str) -> None:
        self.__file = File(filename, 'a')

    def saveImgDataIntoGroup(self, imgData: L, groupName: str,
                             datasetNames: List[str]) -> None:
        #with File(filename, 'a') as file:
        group: Group = self.__file.create_group(groupName)
        print('... group was created successfully!')
        assert (len(imgData) == len(datasetNames)
                ), 'the number of data to save and data set names are no equal'
        for i in range(len(datasetNames)):
            group.create_dataset(datasetNames[i],
                                 data=asarray(imgData[i]),
                                 compression='gzip',
                                 compression_opts=9)
            print('... dataset was created successfully!')

    def loadImgDataFromGroup(self,
                             groupName: str = None,
                             datasetNames: str = None) -> Generator:
        # with File(filename, "r") as file:
        keys: List[str] = list(self.__file.keys())
        imgArray: ndarray = None
        if (keys):
            print(keys)
        else:
            pass
        if (groupName):
            group: Group = self.__file.get(
                groupName)  # group2 = hf.get('group2/subfolder')
            items: List[Tuple] = list(
                group.items()
            )  # [(u'data3', <HDF5 dataset "data3": shape (100, 3333), type "<f8">)]
            if (items):
                print(items)
                try:
                    for i in range(len(items)):
                        print('recovering group class:',
                              group.get(items[i][0]))
                        yield asarray(
                            group.get(items[i][0])
                        )  # n1 = group1.get('data1') \n np.array(n1).shape
                except StopIteration:
                    self.closingH5PY()
            else:
                pass
        elif (datasetNames):
            #cls.__closingH5PY(file)
            yield self.__file.get(
                datasetNames)  # n1 = group1.get('data1') \n np.array(n1).shape

            #print("Size of List", len(imgArray), "Size of Tuple", len(imgArray[0]), "Size of Array", imgArray[0][0].shape, imgArray[0][0].size)

    def closingH5PY(self) -> None:
        self.__file.close()
예제 #13
0
    def __init__(self, embeddings_file: h5py.File):
        """
        :param embeddings_file: an h5py File, aka `h5py.File("/path/to/file.h5")`.
        """
        self._lookup_table = dict(
            (embeddings_file[new_id].attrs["original_id"], new_id)
            for new_id in embeddings_file.keys())

        self._embeddings_file = embeddings_file
예제 #14
0
def plot_samples(h5py_file: h5py.File,
                 n_samples: int = 3,
                 dataset_length: int = 4000,
                 cmap: str = 'Greys_r',
                 vmin: float = None,
                 vmax: float = None) -> None:
    """Plot samples and pixel distributions as they come out of the h5py file directly."""
    sample_indices = np.random.choice(dataset_length, n_samples)
    keys = sorted(list(h5py_file.keys()))
    for counter, idx in enumerate(sample_indices):
        fig, axes = plt.subplots(ncols=len(keys) + 1,
                                 nrows=2,
                                 figsize=(12, 12))
        mask = h5py_file['mask'][idx]
        scan = h5py_file['scan'][idx]
        masked_scan = np.where(mask.astype(bool), scan, np.zeros(scan.shape))
        min_val = np.min(masked_scan) if vmin is None else vmin
        max_val = np.max(masked_scan) if vmax is None else vmax
        masked_pixels = scan[mask.astype(bool)].flatten()
        datasets = [h5py_file[key] for key in keys] + [masked_scan]
        for dataset_name, dataset, ax in zip(keys + ['masked_scan'], datasets,
                                             np.transpose(axes)):
            if dataset_name != 'masked_scan':
                array_2d = dataset[idx]
            else:  # actually not a dataset but simply an array already
                array_2d = dataset
            im = ax[0].imshow(np.reshape(array_2d, (200, 200)),
                              cmap=cmap,
                              vmin=min_val,
                              vmax=max_val)
            divider = make_axes_locatable(ax[0])
            cax = divider.append_axes("right", size="5%", pad=0.05)
            plt.colorbar(im, cax=cax)
            ax[0].axis('off')
            ax[0].set_title(dataset_name)
            ax[1].hist(
                array_2d if dataset_name != 'masked_scan' else masked_pixels,
                bins=30,
                density=False)
            try:
                description = stats.describe(array_2d if dataset_name !=
                                             'masked_scan' else masked_pixels)
            except ValueError:
                print(
                    f'Found sample with empty mask. No statistics available.')
            else:
                ax[1].set_title(
                    f'mean: {description.mean:.2f}, var: {description.variance:.2f}'
                )
                print(
                    f'{dataset_name:15}: min/max: {description.minmax[0]:.2f}/{description.minmax[1]:.2f}, '
                    f'mean: {description.mean:.2f}, variance: {description.variance:.2f}'
                )
        plt.tight_layout()
        plt.show()
    def embed_data(self,
                   h5_file: h5py.File,
                   embedder: EmbeddingModel,
                   save_states: bool = False):
        """Embeds cylinder flow data into a 1D vector representation for the transformer.

        TODO: Remove redundant arguments, add minibatch option for the encoding

        Args:
            h5_file (h5py.File): HDF5 file object of Lorenz raw data
            embedder (EmbeddingModel): Embedding neural network
            save_states (bool, optional): To save the physical states or not, should be True for validation and testing. Defaults to False.
        """
        # Iterate through stored time-series
        samples = 0
        embedder.eval()

        logger.info(
            'Parsing hdf5 file and embedding data, this could take a bit...')
        # Loop simulations
        for key in h5_file.keys():

            u = torch.Tensor(h5_file[key + '/u'])
            v = torch.Tensor(h5_file[key + '/v'])
            data_series = torch.stack([u, v], dim=1).to(embedder.devices[0])
            # data_series = torch.nn.functional.interpolate(data_series, (32, 32, 32), mode='trilinear', align_corners=True)

            embedded_series = torch.zeros([data_series.size(0)] +
                                          [embedder.embedding_dims])
            with torch.no_grad():
                # Mini-batch embedding due to model size
                for i in range(0, data_series.size(0), 96):
                    embedded_series[i:i + 96] = embedder.embed(
                        data_series[i:i + 96]).cpu()

            # Stride over time-series
            for i in range(0,
                           data_series.size(0) - self.block_size + 1,
                           self.stride):  # Truncate in block of block_size
                data_series0 = embedded_series[i:i + self.block_size]
                self.examples.append(data_series0)
                self.position_ids.append(
                    torch.arange(0, self.block_size, dtype=torch.long) + i)
                if save_states:
                    self.states.append(data_series[i:i +
                                                   self.block_size].cpu())

            samples = samples + 1
            if self.ndata > 0 and samples >= self.ndata:  # If we have enough time-series samples break loop
                break

        logger.info(
            'Collected {:d} time-series from hdf5 file for a total of {:d} time-series.'
            .format(samples, len(self.examples)))
    def __init__(self, hdf_path):

        hdf = File(hdf_path, "r")

        self.policies = list(hdf.keys())
        self.award_amounts = list(hdf[self.policies[0]].keys())
        self.pubneg_rates = list(
            hdf[self.policies[0]][self.award_amounts[0]].keys())
        self.fpdrs = list(hdf[self.policies[0]][self.award_amounts[2]][
            self.pubneg_rates[0]].keys())

        self.hdf = hdf
예제 #17
0
def time_slice_info(meta_file: h5py.File) -> TimeSliceInfo:
    """
    Assemble information about the event data time slices from the metadata file.

    Args:
        meta_file:  Metadata ('_meta.h5') file.  Assumes metadata version 1.

    Returns:
        - List of slice objects used to select each time slice from the virtual source
          objects in order to populate the virtual layouts.  Length and order
          correspond to 'events_per_ts'.
        - List of the number of events in each time slice, in the order that the time
          slices will appear in the VDS.  Length is the number of time slices recorded.
    """
    fp_per_module = meta_file["fp_per_module"][()]

    ts_keys = sorted(filter(ts_key_regex.match, meta_file.keys()))
    ts_data = [meta_file[ts_key] for ts_key in ts_keys]

    time_slices = []
    num_events_per_ts = []
    # Loop through the modules, acting on the time slice metadata for each in turn.
    for num_files, ts_counts in zip(fp_per_module, ts_data):
        ts_counts = ts_counts[()]
        # Reshape the time slice metadata for a single module into a rectangular array
        # with shape (number of time slices per file, number of files), so as to be
        # able to generate file-specific slices.
        num_ts_per_fp = -(-ts_counts.size // num_files)
        ts_counts.resize(num_ts_per_fp * num_files)
        # Keep a separate record of each module's array of event counts per time slice.
        num_events_per_ts.append(ts_counts)
        ts_counts = ts_counts.reshape(num_ts_per_fp, num_files)
        # Generate the cumulative count of events per time slice for each file.
        ts_per_module = np.pad(np.cumsum(ts_counts, axis=0), ((1, 0), (0, 0)))
        # Turn these counts into slices to select from a virtual source for each file.
        time_slices.append(
            map(slice, ts_per_module[:-1].flatten(), ts_per_module[1:].flatten())
        )

    # Assemble all the source slices into a single list, ordered first
    # chronologically, then by module number.  Where modules have recorded different
    # numbers of time slices, zip_longest will pad with None.
    time_slices = list(chain.from_iterable(zip_longest(*time_slices)))

    # Resize each module's array of event counts per time slice so that their sizes
    # match.  This is achieved by zero-padding to match the None-padding of the list
    # of time slices by zip_longest.
    max_size = max(data.size for data in num_events_per_ts)
    num_events_per_ts = np.column_stack(
        [np.pad(data, (0, max_size - data.size)) for data in num_events_per_ts]
    ).flatten()

    return time_slices, num_events_per_ts
def contents(data: h5py.File) -> list:
    """
    Returns the list of the contents of a h5py file.

    Parameters
    ----------
    data : h5py.File

    Returns
    -------
    list
    """
    return list(data.keys())
예제 #19
0
    def _get_outputs(self, input_file: h5py.File) -> Union[Any, Tuple]:
        """Extracts the step output from a given h5 file

        Args:
            input_file (h5py.File): File to load from

        Returns:
            Union[Any, Tuple]: Previously computed output of the step
        """
        outputs = list()
        nr_outputs = len(input_file.keys())

        # Legacy, remove at some point
        if nr_outputs == 1 and self.output_key in input_file.keys():
            return tuple([input_file[self.output_key][()]])

        for i in range(nr_outputs):
            outputs.append(input_file[f"{self.output_key}_{i}"][()])
        if len(outputs) == 1:
            return outputs[0]
        else:
            return tuple(outputs)
예제 #20
0
def get_song_key_respecting_dic(dataset: h5py.File, dictionary: list) -> list:
    """ Verify which songs in dictionnary respect a given dictionary

    # Arguments:
        dataset (h5py.File): the dataset containing a key->songs mapping
        dictionary (list): the dictionary to use
    # Returns:
        a list of the song keys that respect the dictionary
    """
    song_key_respecting_dic = list()
    print('Starting song analysis from dictionary...')
    for i, song_key in enumerate(list(dataset.keys())[:100]):
        print('Execution: {:.2f}\r'.format(i / len(dataset.keys()) * 100),
              end='')
        song = dataset[song_key][0]
        song_respect_dic = True
        for char in song:
            if char not in dictionary:
                song_respect_dic = False
                break
        if song_respect_dic:
            song_key_respecting_dic.append(song_key)
    return song_key_respecting_dic
예제 #21
0
def copy_hdf5(src_file: h5.File, dest_file: h5.File, indices: list):
    first_dim = len(indices)
    for key in src_file.keys():
        src_data = src_file[key]
        shape = list(src_data.shape)
        shape[0] = first_dim
        dest_data = dest_file.create_dataset(name=key,
                                             shape=shape,
                                             dtype=src_data.dtype)

        for dest_i, src_i in enumerate(indices):
            dest_data[dest_i] = src_data[src_i]
            print('copied {}'.format(dest_i))
    return
예제 #22
0
 def pp_keys(self):
     data_file = File(self.data_path + self.log, 'r', libver='latest', swmr=True)
     try:
         keys = [key for key in data_file.keys()]
         pp_key_vals = []
         for vals in keys:
             if vals != 'tpts' and vals != 'p_tot':
                 pp_key_vals.append(vals)
             else:
                 pass
         data_file.close()
         return pp_key_vals
     except:
         self.exit_handler()
         raise Exception('ERROR')
예제 #23
0
def find_links(
    input_file: h5py.File,
    link_names: Optional[List] = [],
    link_paths: Optional[List] = [],
    path: Optional[str] = None,
) -> (List[str], List[str]):
    """
    Recursively finds all the links in the snapshot and writes them to a list

    Parameters
    ----------
    input_file : h5py.File
        hdf5 file handle for snapshot
    link_names : list of str, optional
        names of links found in the snapshot
    link_paths : list of str, optional
        paths where links found in the snapshot point to
    path : str, optional
        the path to the current location in the snapshot

    Returns
    -------
    link_names, link_paths : list of str, list of str
        lists of the names and links of paths in `input_file`
    """
    if path is not None:
        keys = input_file[path].keys()
    else:
        keys = input_file.keys()
        path = ""

    link_names = []
    link_paths = []
    for key in keys:
        subpath = f"{path}/{key}"
        dataset = input_file.get(subpath, getlink=True)
        if isinstance(dataset, h5py.SoftLink):
            link_names.append(subpath.lstrip("/"))
            link_paths.append(dataset.path)
        else:
            try:
                if input_file[subpath].keys() is not None:
                    find_links(input_file, link_names, link_paths, subpath)
            except:
                pass

    return link_names, link_paths
예제 #24
0
def iterate_nxs(nxs: h5py.File):
    q = []
    q.extend([(None, nxs[group_key]) for group_key in nxs.keys()])
    # q = [(file, "entry")]
    while len(q) > 0:
        parent, current_group = q.pop(0)
        # current_group_reference = parent_group[key]
        # print("Value:", group)

        next_groups = getattr(current_group, "keys")() if hasattr(current_group, "keys") else []

        # extend with the next group references
        q.extend([(current_group, current_group[next_group_name]) for next_group_name in next_groups])
        # q.extend([(current_group_reference, next_group_name) for next_group_name in next_groups])
        # print("Queue state  :", q)

        yield parent, current_group
def process_raw(filename, stats, delete=False):
    """
    """
    from h5py import File
    from numpy import empty
    fraw = File(filename, 'r')
    raw = fraw['images'][()]
    length = fraw['images'].shape[0]
    width = fraw['image width'][()]
    height = fraw['image height'][()]
    pixel_format = fraw['pixel format'][()]
    #preparation. Create ROI file and populate it with all fields.

    images = convert_raw_to_images(raw,
                                   pixel_format=pixel_format,
                                   height=height,
                                   width=width,
                                   length=length)

    hits_dict = hits_from_chunk(images)
    filename_hits = filename.replace('.raw.hdf5', '.hits.hdf5')

    hits_header = ['hits0', 'hits1', 'hits2', 'hits3']
    with File(filename_hits, 'w') as fhits:
        fhits.create_dataset('hits0', data=hits_dict['hits0'], dtype='int32')
        fhits.create_dataset('hits1', data=hits_dict['hits1'], dtype='int32')
        fhits.create_dataset('hits2', data=hits_dict['hits2'], dtype='uint8')
        fhits.create_dataset('hits3',
                             data=hits_dict['hits3'],
                             dtype='uint8',
                             compression='lzf')

    roi = roi_from_hits_and_data(hits2=hits_dict['hits2'], images=images)

    filename_roi = filename.replace('.raw.hdf5', '.roi.hdf5')
    with h5py.File(filename_roi, 'w') as froi:
        for key in fraw.keys():
            if key != 'images':
                froi.create_dataset(key, data=fraw[key])
            elif key == 'images':
                froi.create_dataset(key,
                                    data=roi,
                                    chunks=(1, height, width),
                                    compression='lzf',
                                    dtype='int16')
            froi.create_dataset
예제 #26
0
파일: database.py 프로젝트: holm10/UEDGE
def create_dict(dump,variables=None):
    ''' Creates a dictionary of all variables in 'dump'
        Parameters:
            dump        Path to dump to be dictionarized
            variables   List of variables to be saved (default is all) - no packages
    '''
    from numpy import array
    ret=dict()          # Dictionary to be returned
    f=File(dump,'r')    # Open readable dump as f
    for pack in f.keys():   # Loop through the list of packages
        p=f.get(pack)       # Get the package object p
        for var in p.keys():    # Loop through all variables in the package
            if variables is None:   # By default, save all variables to dict
                ret[pack+'.'+var]=array(p.get(var))    # Store the variable to the dictionary as array
            elif var in variables:  # If variables listed, save only listed variables
                ret[pack+'.'+var]=array(p.get(var))    # Store the variable to the dictionary as array
    return ret
예제 #27
0
def read_hdf5_content(f: h5py.File, gp_current, gp_max, pattern: Pattern, filename, well_regex: Pattern,
                      normalize_enum: int, terminal_columns: int, verbose: bool, best_well_max, best_well_min):
    key_list = list(f.keys())
    key_list.sort(key=lambda a: int(re.split(pattern, a)[1]))
    worker_x = []

    for k in range(len(key_list)):
        key = key_list[k]
        # print("Loading dataset associated with key ", str(key))
        current_well = re.split(well_regex, key)[1]

        if verbose:
            line_print("Reading data file: " + filename + " - Current dataset key: " + str(
                key) + " Well: " + current_well + " [" + str(gp_current) + "/" + str(gp_max) + "]",
                       max_width=terminal_columns)

        current_x = np.array(f[str(key)])
        if normalize_enum == 0:
            pass
        elif normalize_enum == 1:
            current_x = normalize_np(current_x, 0, 255)
        elif normalize_enum == 2:
            current_x[0] = normalize_np(current_x[0], current_x[0].min(), current_x[0].max())
            current_x[1] = normalize_np(current_x[1], current_x[1].min(), current_x[1].max())
            current_x[2] = normalize_np(current_x[2], current_x[2].min(), current_x[2].max())
        elif normalize_enum == 3:
            current_x[0] = normalize_np(current_x[0], current_x.min(), current_x.max())
            current_x[1] = normalize_np(current_x[1], current_x.min(), current_x.max())
            current_x[2] = normalize_np(current_x[2], current_x.min(), current_x.max())
        elif normalize_enum == 4:
            best_well_max[0] = max(best_well_max[0], current_x[0].max())
            best_well_max[1] = max(best_well_max[1], current_x[1].max())
            best_well_max[2] = max(best_well_max[2], current_x[2].max())

            best_well_min[0] = min(best_well_min[0], current_x[0].min())
            best_well_min[1] = min(best_well_min[1], current_x[1].min())
            best_well_min[2] = min(best_well_min[2], current_x[2].min())
        else:
            raise Exception('Undefined state of normalize_enum')

        worker_x.append(np.array(current_x))

    return worker_x
예제 #28
0
def get_event_timestamp_ext_link_index(ext_link_file: h5py.File,
                                       timestamps: np.ndarray):
    """
    Function which generates an index map from timestamps to index in external link file
    :param ext_link_file: external link file as h5py.File object
    :param timestamps: array of timestamps
    :return: map of indexes in timestamps to indexes in ext_link_file
    """
    # find name of groups to be read
    groups_list = list(ext_link_file.keys())
    timestamps_ext_link = []
    for event_ind, event in enumerate(groups_list):
        timestamps_ext_link.append(
            np.datetime64(ext_link_file[event].attrs.__getitem__(
                "Timestamp").decode('utf8')))
    timestamps_ext_link = np.array(timestamps_ext_link)
    ext_link_index = np.empty_like(timestamps, dtype=int)
    for index, timestamp in enumerate(timestamps):
        ext_link_index[index] = np.where(timestamp == timestamps_ext_link)[0]
    return ext_link_index
예제 #29
0
 def load_container_list(dfile: h5py.File):
     """
     Loads a list of RegionContainer objects from an hdf5 file
     :param dfile: Handle of hdf5 file from which list should be loaded
     :return: A list of RegionContainer objects
     """
     container_list = []
     for k in dfile.keys():
         try:
             pos = np.array(dfile[k]["positions"])
             pos = [(p[0], p[1]) for p in pos]
             rn = str(np.array(dfile[k]["region_name"]))
             zi = int(np.array(dfile[k]["z_index"]))
             rc = RegionContainer(pos, rn, zi)
             container_list.append(rc)
         except KeyError:
             warnings.warn(
                 "Found non RegionContainer object in file {0}".format(
                     dfile.filename))
             continue
     return container_list
예제 #30
0
def test_tree_to_hdf5_and_back(hdf5_temp: h5py.File):
    data = {
        'a': 1,
        'b': 3.14,
        'c': 'asdf',
        'd': np.full(4, 3.14),
        'q': {
            'foo': 'bar',
            'deep': {}
        }
    }
    tree_to_hdf5(data, hdf5_temp)
    assert hdf5_temp.attrs == {'a': 1, 'b': 3.14, 'c': 'asdf'}
    assert hdf5_temp.keys() == {'d', 'q'}
    assert np.array(hdf5_temp['d']) == approx(np.full(4, 3.14))
    assert hdf5_temp['q'].attrs == {'foo': 'bar'}
    assert hdf5_temp['q'].keys() == {'deep'}
    assert hdf5_temp['q/deep'].attrs == {}
    assert hdf5_temp['q/deep'].keys() == set()
    data['d'] = approx(data['d'])
    assert hdf5_to_tree(hdf5_temp) == data
예제 #31
0
def find_datasets(input_file: h5py.File,
                  dataset_names=[],
                  path=None,
                  recurse=False) -> List[str]:
    """
    Recursively finds all the datasets in the snapshot and writes them to a list

    Parameters
    ----------
    input_file : h5py.File
        hdf5 file handle for snapshot
    dataset_names : list of str, optional
        names of datasets found in the snapshot
    path : str, optional
        the path to the current location in the snapshot
    recurse : bool, optional
        flag to indicate whether we're recursing or not

    Returns
    -------
    dataset_names : list of str
        names of datasets in `path` in `input_file`
    """
    if not recurse:
        dataset_names = []

    if path is not None:
        keys = input_file[path].keys()
    else:
        keys = input_file.keys()
        path = ""

    for key in keys:
        subpath = f"{path}/{key}"
        if isinstance(input_file[subpath], h5py.Dataset):
            dataset_names.append(subpath)
        elif input_file[subpath].keys() is not None:
            find_datasets(input_file, dataset_names, subpath, recurse=True)

    return dataset_names