def write_read(self, read): """Write a read. :param read: either a `Read` object or an hdf group handle from a source multi-read file. """ if self.closed: raise RuntimeError('Cannot write after closed.') if self.current_reads == 0: # start a new file self.close() filename = '{}mreads_file{}.fast5'.format(self.prefix, self.file_counter) filename = os.path.join(self.out_path, filename) self.current_file = h5py.File(filename, 'w') self.current_file.attrs[_sanitize_data_for_writing( 'file_version')] = _sanitize_data_for_writing("2.0") self.file_counter += 1 # write data if isinstance(read, Read): self._write_read(read) elif isinstance(read, h5py.Group): self._copy_read_group(read) else: raise TypeError("Cannot write type {} to output file.") self.current_reads += 1 # update if self.current_reads == self.reads_per_file: self.current_reads = 0
def New(cls, fname, read='a', tracking_id={}, context_tags={}, channel_id={}): """Construct a fresh bulk file, with meta data written to standard locations. There is currently no checking this meta data. TODO: Add meta data checking. """ # Start a new file, populate it with meta with h5py.File(fname, 'w') as h: h.attrs[_sanitize_data_for_writing( 'file_version')] = _sanitize_data_for_writing(1.0) for data, location in zip( [tracking_id, context_tags], [cls.__tracking_path__, cls.__context_path__]): # see cjw's comment in fast5.py: # 'no idea why these must be str, just following ossetra' cls.__add_attrs(h, data, location, convert=str) # return instance from new file return cls(fname, read)
def __add_attrs(self, data, location, convert=None): """Implementation of _add_attrs as staticmethod. This allows functionality to be used in .New() constructor but is otherwise nasty! """ if location not in self: self.create_group(location) attrs = self[location].attrs for k, v in data.items(): if convert is not None: attrs[_sanitize_data_for_writing(k)] = _sanitize_data_for_writing(convert(v)) else: attrs[_sanitize_data_for_writing(k)] = _sanitize_data_for_writing(v)
def _add_numpy_table(self, data, location): data = _sanitize_data_for_writing(data) self.create_dataset(location, data=data, compression=True)