# Authors: David Alexander, Jim Bullard __all__ = [ "BasH5Reader" , "BaxH5Reader" , "BasH5Collection" ] try: import h5py except ImportError: from pbcore.util import h5py_dummy h5py = h5py_dummy() import numpy as np import os.path as op from bisect import bisect_left, bisect_right from operator import getitem from itertools import groupby from collections import OrderedDict from pbcore.io.FofnIO import readFofn from pbcore.chemistry import (decodeTriple, tripleFromMetadataXML, ChemistryLookupError) from pbcore.model import ExtraBaseRegionsMixin, HQ_REGION from ._utils import arrayFromDataset, CommonEqualityMixin # ZMW hole Types SEQUENCING_ZMW = 0
from __future__ import absolute_import from __future__ import print_function try: import h5py except ImportError: from pbcore.util import h5py_dummy h5py = h5py_dummy() import numpy as np from cStringIO import StringIO def arrayFromDataset(ds, offsetBegin, offsetEnd): """ Extract a one-dimensional array from an HDF5 dataset. """ shape = (offsetEnd - offsetBegin,) a = np.ndarray(shape=shape, dtype=ds.dtype) mspace = h5py.h5s.create_simple(shape) fspace = ds.id.get_space() fspace.select_hyperslab((offsetBegin,), shape, (1,)) ds.id.read(mspace, fspace, a) return a def splitFileContents(f, delimiter, BLOCKSIZE=8192): """ Same semantics as f.read().split(delimiter), but with memory usage determined by largest chunk rather than entire file size """