def test_002_write_events(self): fname = os.path.join(self.save_path, 'test.fast5') with Fast5File(fname, 'w') as fh: fh.add_channel_info({'channel_number': 1, 'sampling_rate': 4000}) fh.add_read(12, 'unique_snowflake', 12345, 111, 0, 120.75) with EventDetectionTools(fh, group_name='EventDetection_000', meta={ 'name': 'test', 'version': '0.1.0' }) as evdet: data = np.zeros(100, dtype=[('start', int), ('length', int), ('mean', float), ('stdv', float)]) read_attrs = {'read_number': 12} evdet.set_event_data(data, read_attrs) with Fast5File(fname, 'r') as fh: self.assertEqual(1, len(fh.status.read_info)) read_info = fh.status.read_info[0] self.assertEqual(12, read_info.read_number) group = fh.get_latest_analysis('EventDetection') self.assertEqual('EventDetection_000', group) with EventDetectionTools(fh) as evdet: self.assertTrue(evdet.has_event_data()) data, attrs = evdet.get_event_data() self.assertDictEqual( { u'read_number': 12, u'read_id': 'unique_snowflake', u'start_time': 12345, u'duration': 111, u'start_mux': 0, u'median_before': 120.75 }, attrs) self.assertEqual(100, data.size)
def test_012_v1_0_single(self): # Check that it is recognized properly. fname = os.path.join(test_data, 'read_file_v1.0_single.fast5') result = Fast5Info(fname) self.assertEqual(1.0, result.version) # Copy file and Update to current format. new_file = os.path.join(self.save_path, 'single_read_v1.0_test.fast5') copyfile(fname, new_file) Fast5File.update_legacy_file(new_file) result = Fast5Info(new_file) self.assertEqual(1.1, result.version) self.assertEqual(1, len(result.read_info)) self.assertEqual(59, result.read_info[0].read_number) # Load the event data. with Fast5File(new_file, mode='r') as fh: analist = fh.list_analyses('event_detection') self.assertEqual(1, len(analist)) group = '{}/Reads/Read_59'.format(analist[0][1]) data = fh.get_analysis_dataset(group, 'Events') self.assertEqual(7875, data.size) self.assertEqual(set(('mean', 'stdv', 'start', 'length')), set(data.dtype.names)) read_info = fh.status.read_info[0] self.assertEqual(7875, read_info.event_data_count) channel_info = fh.get_channel_info() self.assertEqual(1, channel_info['channel_number'])
def test_fast5_set_analysis_config(self): fname = os.path.join(self.save_path, 'set_analysis_config.fast5') group_name = 'First_000' component = 'first' with Fast5File(fname, mode='w') as fast5: self.assertFalse(fast5.list_analyses()) with NamedTemporaryFile(dir=self.save_path, delete=False, mode='w+t') as f: config_str = "[section]\nkey=value\nkey1=value1\n" f.write(config_str) config_path = f.name config = ConfigParser() config.read(config_path) self.assertTrue(config) self.assertEqual(config.get('section', 'key'), 'value') with self.assertRaises(KeyError): fast5.set_analysis_config(group_name, config) fast5.add_analysis(component, group_name, {}) fast5.set_analysis_config(group_name, config) get_config = fast5.get_analysis_config(group_name) self.assertEqual(get_config['section']['key'], 'value') os.remove(fname) with Fast5File(fname, mode='w') as fast5: self.assertFalse(fast5.list_analyses()) config = {'section': {'key': 'value', 'key1': 'value1'}} with self.assertRaises(KeyError): fast5.set_analysis_config(group_name, config) fast5.add_analysis(component, group_name, {}) fast5.set_analysis_config(group_name, config) get_config = fast5.get_analysis_config(group_name) self.assertEqual(get_config['section']['key'], 'value')
def test_011_v0_6_raw(self): # Check that it is recognized properly. fname = os.path.join(test_data, 'read_file_v0.6_raw.fast5') result = Fast5Info(fname) self.assertEqual(0.6, result.version) # Copy file and Update to current format. new_file = self.generate_temp_filename() copyfile(fname, new_file) Fast5File.update_legacy_file(new_file) result = Fast5Info(new_file) self.assertEqual(CURRENT_FAST5_VERSION, result.version) self.assertEqual(1, len(result.read_info)) self.assertEqual(627, result.read_info[0].read_number) # Load the event data. with Fast5File(new_file, mode='r') as fh: analist = fh.list_analyses('event_detection') self.assertEqual(1, len(analist)) group = '{}/Reads/Read_627'.format(analist[0][1]) data = fh.get_analysis_dataset(group, 'Events') self.assertEqual(2337, data.size) self.assertEqual(set(('mean', 'stdv', 'start', 'length')), set(data.dtype.names)) read_info = fh.status.read_info[0] self.assertEqual(2337, read_info.event_data_count) channel_info = fh.get_channel_info() self.assertEqual(118, channel_info['channel_number']) raw = fh.get_raw_data(read_number=627) self.assertEqual(46037, raw.size) self.assertEqual(46037, read_info.duration)
def get_reads(fast5, window_size: int = None, window_step: int = None, scale: bool = False, template: bool = True, return_all=True) -> np.array: """ Scaled pA values (float32) or raw DAQ values (int16), return first read (1D) or if all_reads = True return array of all reads """ if template: reads = np.array([Fast5File(fast5).get_raw_data(scale=scale)]) else: reads = np.array([ Fast5File(fast5).get_raw_data(attr.read_number, scale=scale) for attr in Fast5Info(fast5).read_info ]) # Windows will only return full-sized windows, # incomplete windows at end of read are not included - # this is necessary for complete tensors in training and prediction: if window_size and window_step: reads = np.array([ view_as_windows(read, window_shape=window_size, step=window_step) for read in reads ]) if return_all: return reads else: if len(reads) > 0: return reads[0] else: raise ValueError("No reads in array.")
def test_001_put_and_retrieve(self): fname = self.generate_temp_filename() if py3: dtypes = [('mean', float), ('start', float), ('stdv', float), ('length', float), ('called_state', '<U5'), ('move', int)] else: dtypes = [('mean', float), ('start', float), ('stdv', float), ('length', float), ('called_state', '|S5'), ('move', int)] data1 = np.zeros(10, dtype=dtypes) data1['mean'] = [ 10.0, 15.0, 8.5, 7.2, 13.6, 9.4, 11.8, 10.1, 4.2, 10.9 ] data1['stdv'] = [0.7, 0.9, 1.0, 1.1, 0.75, 0.6, 0.83, 1.12, 9.45, 2.9] data1['start'] = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] data1['length'] = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] data1['move'] = [1, 1, 0, 1, 0, 2, 1, 1, 1, 0] data1['called_state'] = [ 'AAAAA', 'AAAAT', 'AAAAT', 'AAATC', 'AAATC', 'ATCCG', 'TCCGT', 'CCGTT', 'CGTTA', 'CGTTA' ] data2 = data1[::-1] seq1 = 'AAAAATCCGTTA' seq2 = 'TAACGGATTTTT' qstring1 = 'blahblahblah' qstring2 = 'halbhalbhalb' with Fast5File(fname, mode='w') as fh: fh.add_channel_info({ 'channel_number': 1, 'sampling_rate': 4000, 'digitisation': 8192, 'range': 819.2, 'offset': 0 }) fh.add_read(12, 'unique_snowflake', 12345, 1000, 0, 120.75) attrs = {'name': 'test', 'version': 0, 'time_stamp': 'just now'} fh.add_analysis('basecall_1d', 'Basecall_1D_000', attrs) with Basecall1DTools(fh, group_name='Basecall_1D_000') as basecall: basecall.add_event_data('template', data1) basecall.add_event_data('complement', data2) basecall.add_called_sequence('template', 'template', seq1, qstring1) basecall.add_called_sequence('complement', 'complement', seq2, qstring2) with Fast5File(fname, mode='r') as fh: with Basecall1DTools(fh, group_name='Basecall_1D_000') as basecall: events1 = basecall.get_event_data('template') np.testing.assert_array_equal(events1, data1) events2 = basecall.get_event_data('complement') np.testing.assert_array_equal(events2, data2) n1, s1, q1 = basecall.get_called_sequence('template') self.assertEqual(n1, 'template') self.assertEqual(s1, seq1) self.assertEqual(q1, qstring1) n2, s2, q2 = basecall.get_called_sequence('complement') self.assertEqual(n2, 'complement') self.assertEqual(s2, seq2) self.assertEqual(q2, qstring2)
def test_fast5_add_and_get_chain(self): fname = os.path.join(self.save_path, 'chain_test.fast5') group_name1 = 'First_000' component1 = 'first' component1_path = 'Analyses/{}'.format(group_name1) group_name2 = 'Second_000' component2 = 'second' # Add fake group with Fast5File(fname=fname, mode='w') as fast5: fast5.add_analysis(component1, group_name1, attrs={}) fast5.add_analysis(component2, group_name2, attrs={}) # Check group was added successfully target_list_of_analyses = [(component1, group_name1), (component2, group_name2)] self.assertEqual(fast5.list_analyses(), target_list_of_analyses) # Check fake group has chain including itself target_chain = [(component2, group_name2)] self.assertEqual(fast5.get_chain(group_name2), target_chain) # Add component chain fake_component_map = {component1: group_name1} fast5.add_chain(group_name=group_name2, component_map=fake_component_map) # Check attributes are as expected attr = {'component': component2, component1: component1_path} self.assertEqual(fast5.get_analysis_attributes(group_name2), attr) # Check chain is as expected chain = [(component2, group_name2), (component1, group_name1)] self.assertEqual(fast5.get_chain(group_name2), chain)
def _write_strand(self, strand): event_data = strand.get('event_data', None) raw_data = strand.get('raw_data', None) fname = '{}_ch{}_read{}_strand.fast5'.format(self._basename, strand['channel'], self._current_file) full_path = os.path.join(self._path, fname) with Fast5File(full_path, 'r+') as fh: fh.add_read(strand['read_attrs']['read_number'], strand['read_attrs']['read_id'], strand['read_attrs']['start_time'], strand['read_attrs']['duration'], strand['read_attrs'].get('start_mux', 0), strand['read_attrs'].get('median_before', 0.0)) if raw_data is not None: fh.add_raw_data(strand['read_attrs']['read_number'], raw_data) if event_data is not None: ev_attrs = {'name': 'MinKNOW', 'version': self._tracking_id.get('version', 'unknown')} cfg_items = {} for key, subgroup in self._config.items(): cfg_items[key] = {name: value for name, value in subgroup.items()} group_name = fh.get_latest_analysis('EventDetection') if group_name is None: group_name = 'EventDetection_000' fh.add_analysis('event_detection', group_name, ev_attrs, cfg_items) read_attrs = {name: strand['read_attrs'][name] for name in REQUIRED_FIELDS} fh.add_analysis_subgroup(group_name, 'Reads/Read_{}'.format(strand['read_attrs']['read_number']), attrs=read_attrs) fh.add_analysis_dataset('{}/Reads/Read_{}'.format(group_name, strand['read_attrs']['read_number']), 'Events', event_data) self._strand_counter += 1 return fname
def get_fast5_run_id(fast5: Fast5File, filepath: str) -> str: """Extracts the run id from a given fast5 file.""" run_id = fast5.get_tracking_id().get('run_id', '') if run_id == '': logging.warning(" No run id found for {}\nFile can still be " "used if read id is present".format(filepath)) return run_id
def create_multi_read_file(input_files, output_file): results = deque([os.path.basename(output_file)]) if not os.path.exists(os.path.dirname(output_file)): os.makedirs(os.path.dirname(output_file)) if os.path.exists(output_file): logger.info( "FileExists - appending new reads to existing file: {}".format( output_file)) try: with MultiFast5File(output_file, 'a') as multi_f5: for filename in input_files: try: with Fast5File(filename, 'r') as single_f5: add_read_to_multi_fast5(multi_f5, single_f5) results.append(os.path.basename(filename)) except Exception as e: logger.error( "{}\n\tFailed to add single read file: '{}' to '{}'" "".format(e, filename, output_file), exc_info=exc_info) except Exception as e: logger.error("{}\n\tFailed to write to MultiRead file: {}" "".format(e, output_file), exc_info=exc_info) finally: return results
def create_multi_read_file(input_files, output_file, target_compression): results = [] os.makedirs(os.path.dirname(output_file), exist_ok=True) if os.path.exists(output_file): logger.info( "FileExists - appending new reads to existing file: {}".format( output_file)) try: with MultiFast5File(output_file, 'a') as multi_f5: for filename in input_files: try: with Fast5File(filename, 'r') as f5_input: read = f5_input.get_read(f5_input.read_id) multi_f5.add_existing_read( read, target_compression=target_compression) results.append(os.path.basename(filename)) except Fast5FileTypeError as e: logger.error( "{}: Cannot input MultiRead files to single_to_multi: '{}'" "".format(e, filename), exc_info=exc_info) raise except Exception as e: logger.error( "{}\n\tFailed to add single read file: '{}' to '{}'" "".format(e, filename, output_file), exc_info=exc_info) except Fast5FileTypeError: raise except Exception as e: logger.error("{}\n\tFailed to write to MultiRead file: {}" "".format(e, output_file), exc_info=exc_info) return results, output_file
def test_001_get_latest_analysis(self): test_file = os.path.join(test_data, 'basecall_2d_file_v1.0.fast5') with Fast5File(test_file, mode='r') as fh: group_name = fh.get_latest_analysis('Basecall_2D') self.assertEqual('Basecall_2D_000', group_name) # Test a non-existent group. group_name = fh.get_latest_analysis('Garbage_5D') self.assertEqual(None, group_name)
def test_add_read_to_multi(self): with Fast5File(os.path.join(test_data, "single_reads", "read0.fast5"), 'r') as single_fast5, \ MultiFast5File(self.generate_temp_filename(), 'w') as multi_out: multi_out.add_existing_read(single_fast5) expected_raw = single_fast5.get_raw_data() actual_raw = multi_out.get_read( single_fast5.get_read_id()).get_raw_data() self.assertTrue(numpy.array_equal(actual_raw, expected_raw))
def test_single_to_multi(self): input_file = os.path.join(test_data, "single_read_analyses", "read.fast5") output_file = self.generate_temp_filename() with Fast5File(input_file, 'r') as input_f5, \ EmptyFast5(output_file, 'a') as output_f5: compress_single_read(output_f5, input_f5, VBZ, sanitize=True) self._test(input_file, output_file, 'single')
def test_002_events_only(self): fname = self.generate_temp_filename() with Fast5File(fname, mode='w') as fh: fh.add_channel_info({ 'channel_number': 1, 'sampling_rate': 4000, 'digitisation': 8192, 'range': 819.2, 'offset': 0 }) fh.add_read(12, 'unique_snowflake', 10000, 1000, 0, 120.75) with EventDetectionTools(fh, group_name='EventDetection_000', meta={'name': 'test'}) as evdet: data = np.zeros(100, dtype=[('start', int), ('length', int), ('mean', float), ('stdv', float)]) data['start'][2] = 10010 data['start'][46] = 10470 data['length'][46] = 10 data['start'][53] = 10520 data['start'][97] = 10960 data['length'][97] = 20 read_attrs = {'read_number': 12} evdet.set_event_data(data, read_attrs) attrs = { 'name': 'test', 'version': 0, 'time_stamp': 'just now', 'event_detection': 'Analyses/EventDetection_000' } fh.add_analysis('segmentation', 'Segmentation_000', attrs) segment_data = { 'has_template': 1, 'has_complement': 1, 'start_event_template': 2, 'end_event_template': 47, 'start_event_complement': 53, 'end_event_complement': 98 } fh.set_summary_data('Segmentation_000', 'segmentation', segment_data) with SegmentationTools(fh, group_name='Segmentation_000') as segment: results = segment.get_results() self.assertDictEqual( { 'has_template': True, 'has_complement': True, 'start_event_template': 2, 'end_event_template': 47, 'start_event_complement': 53, 'end_event_complement': 98, 'first_sample_template': 10, 'duration_template': 470, 'first_sample_complement': 520, 'duration_complement': 460 }, results)
def try_convert_read(input_file, output_handle): with Fast5File(input_file, 'r') as single_f5: file_type = check_file_type(single_f5) if file_type != SINGLE_READ: raise Fast5FileTypeError( "Could not convert Single->Multi for file type '{}' with path '{}'" "".format(file_type, input_file)) add_single_read_to_multi_fast5(output_handle, single_f5) return os.path.basename(input_file)
def test_001_put_and_retrieve(self): fname = os.path.join(self.save_path, 'test_file.fast5') dtypes = [('template', int), ('complement', int)] data1 = np.zeros(10, dtype=dtypes) data1['template'] = [0, 1, 2, 2, 3, 4, 5, 6, 7, 8] data1['complement'] = [9, 8, 7, 6, 5, 4, 3, 2, 1, 0] if py3: dtypes.append(('model_state', '<U5')) else: dtypes.append(('model_state', '|S5')) data2 = np.zeros(10, dtype=dtypes) data2['template'] = data1['template'] data2['complement'] = data1['complement'] data2['model_state'] = [ 'AAAAA', 'AAAAT', 'AAATC', 'AAATC', 'ATCCG', 'TCCGT', 'CCGTT', 'CGTTA', 'CGTTA', 'GTTAC' ] seq = 'AAAAATCCGTTAC' qstring = 'blahblahblahb' with Fast5File(fname, mode='w') as fh: fh.add_channel_info({ 'channel_number': 1, 'sampling_rate': 4000, 'digitisation': 8192, 'range': 819.2, 'offset': 0 }) fh.add_read(12, 'unique_snowflake', 12345, 1000, 0, 120.75) attrs = {'name': 'test', 'version': 0, 'time_stamp': 'just now'} fh.add_analysis('basecall_2d', 'Basecall_2D_000', attrs) with Basecall2DTools(fh, group_name='Basecall_2D_000') as basecall: basecall.add_prior_alignment(data1) basecall.add_2d_call_alignment(data2) basecall.add_called_sequence('test_2d', seq, qstring) with Fast5File(fname, mode='r') as fh: with Basecall2DTools(fh, group_name='Basecall_2D_000') as basecall: hp_align = basecall.get_prior_alignment() np.testing.assert_array_equal(hp_align, data1) bc2d = basecall.get_2d_call_alignment() np.testing.assert_array_equal(bc2d, data2) n, s, q = basecall.get_called_sequence() self.assertEqual(n, 'test_2d') self.assertEqual(s, seq) self.assertEqual(q, qstring)
def extract_signal(fn): """ Extract raw data from the fast5 file. Important parameter could be scale If True, returns scaled floating point values in pA if False, returns raw DAQ values as 16 bit integers """ f = Fast5File(fn, "r") print("{}\t{}".format( fn, ','.join([str(i) for i in list(f.get_raw_data(scale=False))])))
def test_001_raw_only(self): fname = self.generate_temp_filename() with Fast5File(fname, mode='w') as fh: fh.add_channel_info({ 'channel_number': 1, 'sampling_rate': 4000, 'digitisation': 8192, 'range': 819.2, 'offset': 0 }) fh.add_read(12, 'unique_snowflake', 12345, 1000, 0, 120.75) raw = np.empty(1000, dtype=np.int16) raw[:] = range(1000) fh.add_raw_data(raw) attrs = {'name': 'test', 'version': 0, 'time_stamp': 'just now'} fh.add_analysis('segmentation', 'Segmentation_000', attrs) segment_data = { 'has_template': 1, 'has_complement': 1, 'first_sample_template': 10, 'duration_template': 470, 'first_sample_complement': 520, 'duration_complement': 460 } fh.set_summary_data('Segmentation_000', 'segmentation', segment_data) with SegmentationTools(fh, group_name='Segmentation_000') as segment: results = segment.get_results() self.assertDictEqual( { 'has_template': True, 'has_complement': True, 'first_sample_template': 10, 'duration_template': 470, 'first_sample_complement': 520, 'duration_complement': 460 }, results) temp_raw = segment.get_raw_data('template', scale=False) np.testing.assert_array_equal(temp_raw, raw[10:480]) comp_raw = segment.get_raw_data('complement', scale=False) np.testing.assert_array_equal(comp_raw, raw[520:980]) temp_raw, comp_raw = segment.get_raw_data('both', scale=False) np.testing.assert_array_equal(temp_raw, raw[10:480]) np.testing.assert_array_equal(comp_raw, raw[520:980]) temp_raw, comp_raw = segment.get_raw_data('both', scale=True) scaled_temp = raw[10:480] * 0.1 scaled_comp = raw[520:980] * 0.1 np.testing.assert_array_almost_equal(temp_raw, scaled_temp, decimal=5) np.testing.assert_array_almost_equal(comp_raw, scaled_comp, decimal=5)
def test_003_add_analysis_subgroup(self): fname = self.generate_temp_filename() with Fast5File(fname, mode='w') as fast5: fast5.add_analysis('test', 'Test_000', attrs={}) fast5.add_analysis_subgroup('Test_000', 'Sub1', attrs={ 'foo': 'bar', 'monkey': 1 }) att_in = fast5.get_analysis_attributes('Test_000/Sub1') self.assertEqual({'foo': 'bar', 'monkey': 1}, att_in)
def __init__(self, source, mode='r', group_name=None, meta=None, config=None): """ Create a new analysis_tools object. :param source: Either an open Fast5File object, or a filename of a fast5 file. :param mode: The open mode (r or r+). Only if a filename is used for the source argument. :param group_name: The specific analysis instance you are interested in. :param meta: Metadata for a new analysis. :param config: Configuration data for a new analysis. To create a new analysis group, provide a group name that does not already exist, and an optional dictionary with the metadata. The following fields are recommended, as a minimum: * name - The name of the software used. * time_stamp - The time at which the analysis was performed. If the group name already exists, the "meta" parameter is ignored. If the specified group has a "component" attribute, and its value does not match self.analysis_id, an exception will be thrown. """ if isinstance(source, Fast5File): self.filename = source.filename # Useful for debugging purposes self.handle = source self.close_handle_when_done = False elif isinstance(source, str): self.filename = source # Useful for debugging purposes self.handle = Fast5File(source, mode) self.close_handle_when_done = True else: raise KeyError( 'Unrecognized type for argument "source": {}'.format(source)) if group_name is None: group_name = self.handle.get_latest_analysis(self.group_id) if group_name is None: raise KeyError('No group: {} found in file: {}'.format( group_name, self.filename)) self.group_name = group_name attrs = self.handle.get_analysis_attributes(group_name) if attrs is None: self.handle.add_analysis(self.analysis_id, group_name, meta, config) attrs = self.handle.get_analysis_attributes(group_name) if 'component' in attrs and attrs['component'] != self.analysis_id: raise ValueError('Component {} is not {}'.format( attrs.get('component'), self.analysis_id))
def test_003_add_analysis_subgroup(self): fname = os.path.join(self.save_path, 'group_test.fast5') with Fast5File(fname, mode='w') as fast5: fast5.add_analysis('test', 'Test_000', attrs={}) fast5.add_analysis_subgroup('Test_000', 'Sub1', attrs={ 'foo': 'bar', 'monkey': 1 }) att_in = fast5.get_analysis_attributes('Test_000/Sub1') self.assertEqual({'foo': 'bar', 'monkey': 1}, att_in)
def __init__(self, source, mode='r', group_name=None, meta=None, config=None): """ Create a new alignment tools object. :param source: Either an open Fast5File object, or a filename of a fast5 file. :param mode: The open mode (r or r+). Only if a filename is used for the source argument. :param group_name: The specific alignment analysis instance you are interested in. :param meta: Metadata for a new alignment analysis. :param config: Configuration data for a new alignment analysis. To create a new alignment analysis, provide a group name that does not already exist, and an optional dictionary with the metadata. The following fields are recommended, as a minimum: * name - The name of the basecall software used. * time_stamp - The time at which the analysis was performed. If the group name already exists, the "meta" parameter is ignored. If the specified group has a "component" attribute, and its value is not "alignment", an exception will be thrown. """ if isinstance(source, Fast5Read): self.handle = source self.close_handle_when_done = False elif isinstance(source, str): self.handle = Fast5File(source, mode) self.close_handle_when_done = True else: raise Exception('Unrecognized type for argument "source".') if group_name is None: group_name = self.handle.get_latest_analysis('Alignment') if group_name is None: raise Exception('No Alignment analysis group found in file.') self.group_name = group_name attrs = self.handle.get_analysis_attributes(group_name) if attrs is None: if meta is None: meta = {} self.handle.add_analysis('alignment', group_name, meta, config) attrs = self.handle.get_analysis_attributes(group_name) if ('component' in attrs and attrs['component'] not in ['alignment', 'calibration_strand']): self.close() raise Exception( 'Analysis does not appear to be an alignment component.')
def test_002_add_analysis_group(self): fname = os.path.join(self.save_path, 'group_test.fast5') with Fast5File(fname, mode='w') as fast5: att = {'foo': 1, 'bar': 2} fast5.add_analysis('test', 'Test_000', att) att_in = fast5.get_analysis_attributes('Test_000') att['component'] = 'test' self.assertEqual(att, att_in) att2 = {'Bob': 'your uncle'} fast5.add_analysis_attributes('Test_000', att2) att_in = fast5.get_analysis_attributes('Test_000') att.update(att2) self.assertEqual(att, att_in)
def test_002_add_analysis_group(self): fname = self.generate_temp_filename() with Fast5File(fname, mode='w') as fast5: att = {'foo': 1, 'bar': 2} fast5.add_analysis('test', 'Test_000', att) att_in = fast5.get_analysis_attributes('Test_000') att['component'] = 'test' self.assertEqual(att, att_in) att2 = {'Bob': 'your uncle'} fast5.add_analysis_attributes('Test_000', att2) att_in = fast5.get_analysis_attributes('Test_000') att.update(att2) self.assertEqual(att, att_in)
def test_check_single_read_folder(self): input_folder = os.path.join(test_data, 'single_reads') compression_results = list(check_compression(input_folder, recursive=False, follow_symlinks=False, check_all_reads=False)) ## expected expected_results = [] for input_file in os.listdir(input_folder): input_path = os.path.join(input_folder, input_file) with Fast5File(input_path, 'r') as f5: expected_results.append((GZIP, f5.read_id, input_path)) self.assertTrue(numpy.array_equal(expected_results, compression_results))
def compress_file(input_file, output_file, target_compression): try: makedirs(os.path.dirname(output_file), exist_ok=True) if is_multi_read(input_file): with MultiFast5File(input_file, 'r') as input_f5, MultiFast5File(output_file, 'a') as output_f5: for read in input_f5.get_reads(): compress_read_from_multi(output_f5, read, target_compression) else: with Fast5File(input_file, 'r') as input_f5, \ EmptyFast5(output_file, 'a') as output_f5: compress_read_from_single(output_f5, input_f5, target_compression) except Exception as e: # Error raised in Pool.aync will be lost so we explicitly print them. logging.exception(e) raise
def test_correct_type(self): single_read_path = os.path.join(test_data, "single_reads", "read0.fast5") single_read_id = Fast5File(single_read_path).get_read_id() with get_fast5_file(single_read_path) as f5: self.assertEqual(type(f5), Fast5File) self.assertEqual(check_file_type(f5), SINGLE_READ) self.assertEqual(len(f5.get_read_ids()), 1) self.assertEqual(single_read_id, f5.get_read_ids()[0]) self.get_raw(f5) multi_read_path = os.path.join(test_data, "multi_read", "batch_0.fast5") with get_fast5_file(multi_read_path) as f5: self.assertEqual(type(f5), MultiFast5File) self.assertEqual(check_file_type(f5), MULTI_READ) self.assertTrue(len(f5.get_read_ids()) >= 1) self.get_raw(f5)
def _start_new_file(self, strand): self._current_file = strand['read_attrs']['read_number'] self._strand_counter = 0 self._current_channel = strand['channel'] channel_info = {'channel_number': strand['channel'], 'offset': strand['offset'], 'range': strand['range'], 'digitisation': strand['digitisation'], 'sampling_rate': strand['sampling_rate']} fname = '{}_ch{}_read{}_strand.fast5'.format(self._basename, strand['channel'], self._current_file) full_path = os.path.join(self._path, fname) with Fast5File(full_path, 'w') as fh: fh.set_tracking_id(self._tracking_id) fh.add_context_tags(self._context_tags) fh.add_channel_info(channel_info)
def test_002_read_summary_data(self): test_file = os.path.join(test_data, 'telemetry_test.fast5') summary = Fast5File.read_summary_data(test_file, 'segmentation') expected = { 'filename': 'telemetry_test.fast5', 'channel_id': { u'channel_number': 129, u'range': 10000.0, u'sampling_rate': 5000, u'digitisation': 10000, u'offset': 0.0 }, 'reads': [{ 'duration': 755.79559999999947, 'start_time': 4034.6948000000002, 'read_id': 'telemetry_test.fast5', 'start_mux': 1, 'read_number': 199 }], 'tracking_id': { u'device_id': '445444' }, 'data': { u'split_hairpin': { u'median_sd_comp': 1.4719812720343015, u'range_comp': 3.965029408419298, u'median_level_temp': 88.66729546440973, u'duration_temp': 327.82499999999936, u'num_temp': 10773, u'num_events': 24091, u'median_sd_temp': 1.328457722537222, u'range_temp': 4.01780031383548, u'median_level_comp': 89.8680971725336, u'split_index': 10903, u'duration_comp': 422.3665999999994, u'num_comp': 13158 }, u'empty': {} }, 'software': { u'time_stamp': '2014-Jun-04 16:28:31', u'version': '0.5.4', 'component': u'Validation' } } self.assertEqual(expected, summary)