def test_011_v0_6_raw(self): # Check that it is recognized properly. fname = os.path.join(test_data, 'read_file_v0.6_raw.fast5') result = Fast5Info(fname) self.assertEqual(0.6, result.version) # Copy file and Update to current format. new_file = self.generate_temp_filename() copyfile(fname, new_file) Fast5File.update_legacy_file(new_file) result = Fast5Info(new_file) self.assertEqual(CURRENT_FAST5_VERSION, result.version) self.assertEqual(1, len(result.read_info)) self.assertEqual(627, result.read_info[0].read_number) # Load the event data. with Fast5File(new_file, mode='r') as fh: analist = fh.list_analyses('event_detection') self.assertEqual(1, len(analist)) group = '{}/Reads/Read_627'.format(analist[0][1]) data = fh.get_analysis_dataset(group, 'Events') self.assertEqual(2337, data.size) self.assertEqual(set(('mean', 'stdv', 'start', 'length')), set(data.dtype.names)) read_info = fh.status.read_info[0] self.assertEqual(2337, read_info.event_data_count) channel_info = fh.get_channel_info() self.assertEqual(118, channel_info['channel_number']) raw = fh.get_raw_data(read_number=627) self.assertEqual(46037, raw.size) self.assertEqual(46037, read_info.duration)
def test_012_v1_0_single(self): # Check that it is recognized properly. fname = os.path.join(test_data, 'read_file_v1.0_single.fast5') result = Fast5Info(fname) self.assertEqual(1.0, result.version) # Copy file and Update to current format. new_file = os.path.join(self.save_path, 'single_read_v1.0_test.fast5') copyfile(fname, new_file) Fast5File.update_legacy_file(new_file) result = Fast5Info(new_file) self.assertEqual(1.1, result.version) self.assertEqual(1, len(result.read_info)) self.assertEqual(59, result.read_info[0].read_number) # Load the event data. with Fast5File(new_file, mode='r') as fh: analist = fh.list_analyses('event_detection') self.assertEqual(1, len(analist)) group = '{}/Reads/Read_59'.format(analist[0][1]) data = fh.get_analysis_dataset(group, 'Events') self.assertEqual(7875, data.size) self.assertEqual(set(('mean', 'stdv', 'start', 'length')), set(data.dtype.names)) read_info = fh.status.read_info[0] self.assertEqual(7875, read_info.event_data_count) channel_info = fh.get_channel_info() self.assertEqual(1, channel_info['channel_number'])
def __init__(self, fname, mode='r'): """ Constructor. Opens the specified file. :param fname: Filename to open. :param mode: File open mode (r, r+, w, w-, x). """ if mode not in ['r', 'r+', 'w', 'w-', 'x']: raise IOError('Unsupported file handle mode : "{}" use {}'.format( mode, ['r', 'r+', 'w', 'w-', 'x'])) self.filename = fname self.handle = None self._is_open = False if mode in ['w', 'w-', 'x']: with h5py.File(fname, mode) as fh: fh.attrs['file_version'] = CURRENT_FAST5_VERSION fh.create_group('Analyses') fh.create_group('Raw/Reads') fh.create_group('UniqueGlobalKey/channel_id') fh.create_group('UniqueGlobalKey/context_tags') fh.create_group('UniqueGlobalKey/tracking_id') mode = 'r+' self.mode = mode self.status = Fast5Info(self.filename) if self.status.valid: self.handle = h5py.File(self.filename, self.mode) self._is_open = True
def _initialise_file(self): if self.mode in ['w', 'w-', 'x']: with h5py.File(self.filename, self.mode) as fh: fh.attrs['file_version'] = CURRENT_FAST5_VERSION fh.create_group('Analyses') fh.create_group('Raw/Reads') fh.create_group(self.global_key + 'channel_id') fh.create_group(self.global_key + 'context_tags') fh.create_group(self.global_key + 'tracking_id') self.mode = 'r+' self.status = Fast5Info(self.filename) if self.status.valid: self.handle = h5py.File(self.filename, self.mode) self._is_open = True
def __init__(self, filename): """ Constructor. Opens the specified file :param filename: Filename to open """ # Attributes Initializeation self.filename = filename self.handle = None self._is_open = False self.status = Fast5Info(self.filename) self.events = None if self.status.valid: self.handle = h5py.File(self.filename, 'r') # Open the file in read-only mode self._is_open = True
def _initialise_file(self): try: if self.mode in ['w', 'w-', 'x']: with h5py.File(self.filename, self.mode) as fh: fh.attrs['file_version'] = CURRENT_FAST5_VERSION fh.create_group('Analyses') fh.create_group('Raw/Reads') fh.create_group(self.global_key + 'channel_id') fh.create_group(self.global_key + 'context_tags') fh.create_group(self.global_key + 'tracking_id') self.mode = 'r+' self.status = Fast5Info(self.filename) if self.status.valid: self.handle = h5py.File(self.filename, self.mode) except Exception: raise Fast5FileTypeError( "Failed to initialise single-read Fast5File: '{}'".format( self.filename))
def _initialise_file(self): if self.mode == 'r': # For read-only files, copy the file to (hopefully) faster storage from tempfile import NamedTemporaryFile from shutil import copyfile self._readonly_file_obj = NamedTemporaryFile() copyfile(self.filename, self._readonly_file_obj.name) self.filename = self._readonly_file_obj.name elif self.mode in ['w', 'w-', 'x']: with h5py.File(self.filename, self.mode) as fh: fh.attrs['file_version'] = CURRENT_FAST5_VERSION fh.create_group('Analyses') fh.create_group('Raw/Reads') fh.create_group(self.global_key + 'channel_id') fh.create_group(self.global_key + 'context_tags') fh.create_group(self.global_key + 'tracking_id') self.mode = 'r+' self.status = Fast5Info(self.filename) if self.status.valid: self.handle = h5py.File(self.filename, self.mode) self._is_open = True
def update_legacy_file(fname): """ Update a fast5 file from an older version to the new standard. :param fname: The filename of the fast5 file. """ status = Fast5Info(fname) if not status.valid: raise IOError('Cannot update invalid file: {}'.format(fname)) with h5py.File(fname, 'r+') as handle: # Add Raw/Read/Read_## groups for reads if they are missing. for read_info in status.read_info: read_group_name = 'Raw/Reads/Read_{}'.format( read_info.read_number) if read_group_name in handle: rgh = handle[read_group_name] else: rgh = handle.create_group(read_group_name) rgh.attrs['read_number'] = read_info.read_number rgh.attrs['read_id'] = read_info.read_id rgh.attrs['duration'] = read_info.duration rgh.attrs['start_time'] = read_info.start_time rgh.attrs['start_mux'] = read_info.start_mux # Add the Analyses and tracking_id groups, if they are missing. if not 'Analyses' in handle: handle.create_group('Analyses') if not 'tracking_id' in handle['UniqueGlobalKey']: handle.create_group('UniqueGlobalKey/tracking_id') # Update the EventDetection_000 created by old versions of MinKNOW, if needed. if status.version < 1.1: if 'Analyses/EventDetection_000' in handle: reads_group = handle['Analyses/EventDetection_000/Reads'] data_group_names = reads_group.keys() for data_group_name in data_group_names: read_group = reads_group[data_group_name] read_number = read_group.attrs['read_number'] read_info = status.read_info[ status.read_number_map[read_number]] read_group.attrs['read_id'] = read_info.read_id if 'Events' in read_group: dataset = read_group['Events'] if 'variance' in dataset.dtype.names: old_data = read_group['Events'][()] new_data = np.empty(old_data.size, dtype=[('mean', float), ('stdv', float), ('start', int), ('length', int)]) new_data[:]['mean'] = old_data['mean'] new_data[:]['stdv'] = np.sqrt( old_data['variance']) new_data[:]['start'] = old_data['start'] new_data[:]['length'] = old_data['length'] del read_group['Events'] read_group.create_dataset('Events', data=new_data, compression='gzip') # Update the version number. handle.attrs['file_version'] = CURRENT_FAST5_VERSION