Beispiel #1
0
# Authors: David Alexander, Jim Bullard



__all__ = [ "BasH5Reader"     ,
            "BaxH5Reader"     ,
            "BasH5Collection" ]

try:
    import h5py
except ImportError:
    from pbcore.util import h5py_dummy
    h5py = h5py_dummy()

import numpy as np
import os.path as op
from bisect import bisect_left, bisect_right
from operator import getitem
from itertools import groupby
from collections import OrderedDict

from pbcore.io.FofnIO import readFofn
from pbcore.chemistry import (decodeTriple,
                              tripleFromMetadataXML,
                              ChemistryLookupError)
from pbcore.model import ExtraBaseRegionsMixin, HQ_REGION
from ._utils import arrayFromDataset, CommonEqualityMixin


# ZMW hole Types
SEQUENCING_ZMW = 0
Beispiel #2
0
from __future__ import absolute_import
from __future__ import print_function

try:
    import h5py
except ImportError:
    from pbcore.util import h5py_dummy
    h5py = h5py_dummy()

import numpy as np
from cStringIO import StringIO


def arrayFromDataset(ds, offsetBegin, offsetEnd):
    """
    Extract a one-dimensional array from an HDF5 dataset.
    """
    shape = (offsetEnd - offsetBegin,)
    a = np.ndarray(shape=shape, dtype=ds.dtype)
    mspace = h5py.h5s.create_simple(shape)
    fspace = ds.id.get_space()
    fspace.select_hyperslab((offsetBegin,), shape, (1,))
    ds.id.read(mspace, fspace, a)
    return a


def splitFileContents(f, delimiter, BLOCKSIZE=8192):
    """
    Same semantics as f.read().split(delimiter), but with memory usage
    determined by largest chunk rather than entire file size
    """