def get_feature_ref(self): return FeatureReference.from_hdf5(self.h5[h5_constants.H5_FEATURE_REF_ATTR])
def load_feature_ref_from_h5_group(group): '''Load just the FeatureRef from an h5py.Group.''' feature_group = group[h5_constants.H5_FEATURE_REF_ATTR] return FeatureReference.from_hdf5(feature_group)
def open(filename, mode, feature_ref=None, barcodes=None, library_info=None, barcode_info=None): """Open a molecule info object. Args: filename (str): Filename to open or create mode (str): 'r' for reading, 'w' for writing. feature_ref (FeatureReference): Required when mode is 'w'. barcodes (list of str): All possible barcode sequences. Required when mode is 'w'. library_info (list of dict): Library metadata. Required when mode is 'w'. barcode_info (BarcodeInfo): Per-barcode metadata. Returns: MoleculeInfo: A new object """ assert mode == 'r' or mode == 'w' mc = MoleculeCounter() if mode == 'w': if feature_ref is None: raise ValueError('Feature reference must be specified when opening a molecule info object for writing') if barcodes is None: raise ValueError('Barcodes must be specified when opening a molecule info object for writing') if library_info is None: raise ValueError('Library info must be specified when opening a molecule info object for writing') if barcode_info is None: raise ValueError('Barcode info must be specified when opening a molecule info object for writing') mc.h5 = h5py.File(filename, 'w') cr_io.set_hdf5_attr(mc.h5, FILE_VERSION_KEY, CURR_FILE_VERSION) cr_io.set_hdf5_attr(mc.h5, h5_constants.H5_FILETYPE_KEY, MOLECULE_H5_FILETYPE) cr_io.set_hdf5_attr(mc.h5, FILE_VERSION_KEY, CURR_FILE_VERSION) mc.h5.create_group(METRICS_GROUP_NAME) # Write feature reference fref_group = mc.h5.create_group(h5_constants.H5_FEATURE_REF_ATTR) feature_ref.to_hdf5(fref_group) # Write barcodes # If there are multiple barcode lengths, use the largest for the numpy dtype. max_barcode_len = np.max(map(len, barcodes)) barcode_dtype = np.dtype('S%d' % max_barcode_len) mc.h5.create_dataset('barcodes', data=np.fromiter(barcodes, barcode_dtype, count=len(barcodes)), compression=HDF5_COMPRESSION) # Write library info lib_info_json = json.dumps(library_info, indent=4, sort_keys=True) cr_io.create_hdf5_string_dataset(mc.h5, 'library_info', [lib_info_json]) # Write barcode info g = mc.h5.create_group(BARCODE_INFO_GROUP_NAME) MoleculeCounter.save_barcode_info(barcode_info, g) # Create empty per-molecule datasets for name, col_type in MOLECULE_INFO_COLUMNS.iteritems(): mc.columns[name] = mc.h5.create_dataset(name, (0,), maxshape=(None,), dtype=col_type, compression=HDF5_COMPRESSION, chunks=(HDF5_CHUNK_SIZE,)) elif mode == 'r': mc.h5 = h5py.File(filename, 'r') try: mc.file_version = mc.h5.attrs[FILE_VERSION_KEY] except AttributeError: mc.file_version = 1 # V1 doesn't have version field if mc.file_version < CURR_FILE_VERSION: raise ValueError('The molecule info HDF5 file (format version %d) was produced by an older version of Cell Ranger. Reading these files is unsupported.' % mc.file_version) if mc.file_version > CURR_FILE_VERSION: raise ValueError('The molecule info HDF5 file (format version %d) was produced by an newer version of Cell Ranger. Reading these files is unsupported.' % mc.file_version) for key in mc.h5.keys(): if key in MOLECULE_INFO_COLUMNS: mc.columns[key] = mc.h5[key] elif key in MOLECULE_REF_COLUMNS: mc.ref_columns[key] = mc.h5[key] elif key == h5_constants.H5_FEATURE_REF_ATTR: mc.feature_reference = FeatureReference.from_hdf5(mc.h5[key]) elif key == METRICS_GROUP_NAME \ or key == BARCODE_INFO_GROUP_NAME: pass else: raise AttributeError("Unrecognized dataset key: %s" % key) # Load library info mc.library_info = json.loads(cr_io.read_hdf5_string_dataset(mc.h5['library_info'])[0]) return mc