def test_002_write_events(self):
     fname = os.path.join(self.save_path, 'test.fast5')
     with Fast5File(fname, 'w') as fh:
         fh.add_channel_info({'channel_number': 1, 'sampling_rate': 4000})
         fh.add_read(12, 'unique_snowflake', 12345, 111, 0, 120.75)
         with EventDetectionTools(fh,
                                  group_name='EventDetection_000',
                                  meta={
                                      'name': 'test',
                                      'version': '0.1.0'
                                  }) as evdet:
             data = np.zeros(100,
                             dtype=[('start', int), ('length', int),
                                    ('mean', float), ('stdv', float)])
             read_attrs = {'read_number': 12}
             evdet.set_event_data(data, read_attrs)
     with Fast5File(fname, 'r') as fh:
         self.assertEqual(1, len(fh.status.read_info))
         read_info = fh.status.read_info[0]
         self.assertEqual(12, read_info.read_number)
         group = fh.get_latest_analysis('EventDetection')
         self.assertEqual('EventDetection_000', group)
         with EventDetectionTools(fh) as evdet:
             self.assertTrue(evdet.has_event_data())
             data, attrs = evdet.get_event_data()
             self.assertDictEqual(
                 {
                     u'read_number': 12,
                     u'read_id': 'unique_snowflake',
                     u'start_time': 12345,
                     u'duration': 111,
                     u'start_mux': 0,
                     u'median_before': 120.75
                 }, attrs)
             self.assertEqual(100, data.size)
 def test_012_v1_0_single(self):
     # Check that it is recognized properly.
     fname = os.path.join(test_data, 'read_file_v1.0_single.fast5')
     result = Fast5Info(fname)
     self.assertEqual(1.0, result.version)
     # Copy file and Update to current format.
     new_file = os.path.join(self.save_path, 'single_read_v1.0_test.fast5')
     copyfile(fname, new_file)
     Fast5File.update_legacy_file(new_file)
     result = Fast5Info(new_file)
     self.assertEqual(1.1, result.version)
     self.assertEqual(1, len(result.read_info))
     self.assertEqual(59, result.read_info[0].read_number)
     # Load the event data.
     with Fast5File(new_file, mode='r') as fh:
         analist = fh.list_analyses('event_detection')
         self.assertEqual(1, len(analist))
         group = '{}/Reads/Read_59'.format(analist[0][1])
         data = fh.get_analysis_dataset(group, 'Events')
         self.assertEqual(7875, data.size)
         self.assertEqual(set(('mean', 'stdv', 'start', 'length')),
                          set(data.dtype.names))
         read_info = fh.status.read_info[0]
         self.assertEqual(7875, read_info.event_data_count)
         channel_info = fh.get_channel_info()
         self.assertEqual(1, channel_info['channel_number'])
    def test_fast5_set_analysis_config(self):
        fname = os.path.join(self.save_path, 'set_analysis_config.fast5')
        group_name = 'First_000'
        component = 'first'
        with Fast5File(fname, mode='w') as fast5:
            self.assertFalse(fast5.list_analyses())
            with NamedTemporaryFile(dir=self.save_path,
                                    delete=False,
                                    mode='w+t') as f:
                config_str = "[section]\nkey=value\nkey1=value1\n"
                f.write(config_str)
                config_path = f.name

            config = ConfigParser()
            config.read(config_path)
            self.assertTrue(config)
            self.assertEqual(config.get('section', 'key'), 'value')
            with self.assertRaises(KeyError):
                fast5.set_analysis_config(group_name, config)

            fast5.add_analysis(component, group_name, {})
            fast5.set_analysis_config(group_name, config)
            get_config = fast5.get_analysis_config(group_name)
            self.assertEqual(get_config['section']['key'], 'value')

        os.remove(fname)
        with Fast5File(fname, mode='w') as fast5:
            self.assertFalse(fast5.list_analyses())
            config = {'section': {'key': 'value', 'key1': 'value1'}}
            with self.assertRaises(KeyError):
                fast5.set_analysis_config(group_name, config)
            fast5.add_analysis(component, group_name, {})
            fast5.set_analysis_config(group_name, config)
            get_config = fast5.get_analysis_config(group_name)
            self.assertEqual(get_config['section']['key'], 'value')
 def test_011_v0_6_raw(self):
     # Check that it is recognized properly.
     fname = os.path.join(test_data, 'read_file_v0.6_raw.fast5')
     result = Fast5Info(fname)
     self.assertEqual(0.6, result.version)
     # Copy file and Update to current format.
     new_file = self.generate_temp_filename()
     copyfile(fname, new_file)
     Fast5File.update_legacy_file(new_file)
     result = Fast5Info(new_file)
     self.assertEqual(CURRENT_FAST5_VERSION, result.version)
     self.assertEqual(1, len(result.read_info))
     self.assertEqual(627, result.read_info[0].read_number)
     # Load the event data.
     with Fast5File(new_file, mode='r') as fh:
         analist = fh.list_analyses('event_detection')
         self.assertEqual(1, len(analist))
         group = '{}/Reads/Read_627'.format(analist[0][1])
         data = fh.get_analysis_dataset(group, 'Events')
         self.assertEqual(2337, data.size)
         self.assertEqual(set(('mean', 'stdv', 'start', 'length')),
                          set(data.dtype.names))
         read_info = fh.status.read_info[0]
         self.assertEqual(2337, read_info.event_data_count)
         channel_info = fh.get_channel_info()
         self.assertEqual(118, channel_info['channel_number'])
         raw = fh.get_raw_data(read_number=627)
         self.assertEqual(46037, raw.size)
         self.assertEqual(46037, read_info.duration)
Esempio n. 5
0
def get_reads(fast5,
              window_size: int = None,
              window_step: int = None,
              scale: bool = False,
              template: bool = True,
              return_all=True) -> np.array:
    """ Scaled pA values (float32) or raw DAQ values (int16),
        return first read (1D) or if all_reads = True return
        array of all reads """

    if template:
        reads = np.array([Fast5File(fast5).get_raw_data(scale=scale)])
    else:
        reads = np.array([
            Fast5File(fast5).get_raw_data(attr.read_number, scale=scale)
            for attr in Fast5Info(fast5).read_info
        ])

    # Windows will only return full-sized windows,
    # incomplete windows at end of read are not included -
    # this is necessary for complete tensors in training and prediction:

    if window_size and window_step:
        reads = np.array([
            view_as_windows(read, window_shape=window_size, step=window_step)
            for read in reads
        ])

    if return_all:
        return reads
    else:
        if len(reads) > 0:
            return reads[0]
        else:
            raise ValueError("No reads in array.")
Esempio n. 6
0
 def test_001_put_and_retrieve(self):
     fname = self.generate_temp_filename()
     if py3:
         dtypes = [('mean', float), ('start', float), ('stdv', float),
                   ('length', float), ('called_state', '<U5'),
                   ('move', int)]
     else:
         dtypes = [('mean', float), ('start', float), ('stdv', float),
                   ('length', float), ('called_state', '|S5'),
                   ('move', int)]
     data1 = np.zeros(10, dtype=dtypes)
     data1['mean'] = [
         10.0, 15.0, 8.5, 7.2, 13.6, 9.4, 11.8, 10.1, 4.2, 10.9
     ]
     data1['stdv'] = [0.7, 0.9, 1.0, 1.1, 0.75, 0.6, 0.83, 1.12, 9.45, 2.9]
     data1['start'] = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
     data1['length'] = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]
     data1['move'] = [1, 1, 0, 1, 0, 2, 1, 1, 1, 0]
     data1['called_state'] = [
         'AAAAA', 'AAAAT', 'AAAAT', 'AAATC', 'AAATC', 'ATCCG', 'TCCGT',
         'CCGTT', 'CGTTA', 'CGTTA'
     ]
     data2 = data1[::-1]
     seq1 = 'AAAAATCCGTTA'
     seq2 = 'TAACGGATTTTT'
     qstring1 = 'blahblahblah'
     qstring2 = 'halbhalbhalb'
     with Fast5File(fname, mode='w') as fh:
         fh.add_channel_info({
             'channel_number': 1,
             'sampling_rate': 4000,
             'digitisation': 8192,
             'range': 819.2,
             'offset': 0
         })
         fh.add_read(12, 'unique_snowflake', 12345, 1000, 0, 120.75)
         attrs = {'name': 'test', 'version': 0, 'time_stamp': 'just now'}
         fh.add_analysis('basecall_1d', 'Basecall_1D_000', attrs)
         with Basecall1DTools(fh, group_name='Basecall_1D_000') as basecall:
             basecall.add_event_data('template', data1)
             basecall.add_event_data('complement', data2)
             basecall.add_called_sequence('template', 'template', seq1,
                                          qstring1)
             basecall.add_called_sequence('complement', 'complement', seq2,
                                          qstring2)
     with Fast5File(fname, mode='r') as fh:
         with Basecall1DTools(fh, group_name='Basecall_1D_000') as basecall:
             events1 = basecall.get_event_data('template')
             np.testing.assert_array_equal(events1, data1)
             events2 = basecall.get_event_data('complement')
             np.testing.assert_array_equal(events2, data2)
             n1, s1, q1 = basecall.get_called_sequence('template')
             self.assertEqual(n1, 'template')
             self.assertEqual(s1, seq1)
             self.assertEqual(q1, qstring1)
             n2, s2, q2 = basecall.get_called_sequence('complement')
             self.assertEqual(n2, 'complement')
             self.assertEqual(s2, seq2)
             self.assertEqual(q2, qstring2)
    def test_fast5_add_and_get_chain(self):
        fname = os.path.join(self.save_path, 'chain_test.fast5')
        group_name1 = 'First_000'
        component1 = 'first'
        component1_path = 'Analyses/{}'.format(group_name1)
        group_name2 = 'Second_000'
        component2 = 'second'

        # Add fake group
        with Fast5File(fname=fname, mode='w') as fast5:
            fast5.add_analysis(component1, group_name1, attrs={})
            fast5.add_analysis(component2, group_name2, attrs={})

            # Check group was added successfully
            target_list_of_analyses = [(component1, group_name1),
                                       (component2, group_name2)]
            self.assertEqual(fast5.list_analyses(), target_list_of_analyses)

            # Check fake group has chain including itself
            target_chain = [(component2, group_name2)]
            self.assertEqual(fast5.get_chain(group_name2), target_chain)

            # Add component chain
            fake_component_map = {component1: group_name1}
            fast5.add_chain(group_name=group_name2,
                            component_map=fake_component_map)

            # Check attributes are as expected
            attr = {'component': component2, component1: component1_path}
            self.assertEqual(fast5.get_analysis_attributes(group_name2), attr)
            # Check chain is as expected
            chain = [(component2, group_name2), (component1, group_name1)]
            self.assertEqual(fast5.get_chain(group_name2), chain)
Esempio n. 8
0
 def _write_strand(self, strand):
     event_data = strand.get('event_data', None)
     raw_data = strand.get('raw_data', None)
     fname = '{}_ch{}_read{}_strand.fast5'.format(self._basename, strand['channel'],
                                                  self._current_file)
     full_path = os.path.join(self._path, fname)
     
     with Fast5File(full_path, 'r+') as fh:
         fh.add_read(strand['read_attrs']['read_number'], strand['read_attrs']['read_id'],
                     strand['read_attrs']['start_time'], strand['read_attrs']['duration'],
                     strand['read_attrs'].get('start_mux', 0),
                     strand['read_attrs'].get('median_before', 0.0))
         if raw_data is not None:
             fh.add_raw_data(strand['read_attrs']['read_number'], raw_data)
         if event_data is not None:
             ev_attrs = {'name': 'MinKNOW',
                         'version': self._tracking_id.get('version', 'unknown')}
             cfg_items = {}
             for key, subgroup in self._config.items():
                 cfg_items[key] = {name: value for name, value in subgroup.items()}
             group_name = fh.get_latest_analysis('EventDetection')
             if group_name is None:
                 group_name = 'EventDetection_000'
                 fh.add_analysis('event_detection', group_name, ev_attrs, cfg_items)
             read_attrs = {name: strand['read_attrs'][name] for name in REQUIRED_FIELDS}
             fh.add_analysis_subgroup(group_name, 'Reads/Read_{}'.format(strand['read_attrs']['read_number']),
                                      attrs=read_attrs)
             fh.add_analysis_dataset('{}/Reads/Read_{}'.format(group_name, strand['read_attrs']['read_number']),
                                     'Events', event_data)
     self._strand_counter += 1
     return fname
Esempio n. 9
0
def get_fast5_run_id(fast5: Fast5File, filepath: str) -> str:
    """Extracts the run id from a given fast5 file."""
    run_id = fast5.get_tracking_id().get('run_id', '')
    if run_id == '':
        logging.warning(" No run id found for {}\nFile can still be "
                        "used if read id is present".format(filepath))
    return run_id
Esempio n. 10
0
def create_multi_read_file(input_files, output_file):
    results = deque([os.path.basename(output_file)])
    if not os.path.exists(os.path.dirname(output_file)):
        os.makedirs(os.path.dirname(output_file))
    if os.path.exists(output_file):
        logger.info(
            "FileExists - appending new reads to existing file: {}".format(
                output_file))
    try:
        with MultiFast5File(output_file, 'a') as multi_f5:
            for filename in input_files:
                try:
                    with Fast5File(filename, 'r') as single_f5:
                        add_read_to_multi_fast5(multi_f5, single_f5)
                        results.append(os.path.basename(filename))
                except Exception as e:
                    logger.error(
                        "{}\n\tFailed to add single read file: '{}' to '{}'"
                        "".format(e, filename, output_file),
                        exc_info=exc_info)
    except Exception as e:
        logger.error("{}\n\tFailed to write to MultiRead file: {}"
                     "".format(e, output_file),
                     exc_info=exc_info)
    finally:
        return results
def create_multi_read_file(input_files, output_file, target_compression):
    results = []
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    if os.path.exists(output_file):
        logger.info(
            "FileExists - appending new reads to existing file: {}".format(
                output_file))
    try:
        with MultiFast5File(output_file, 'a') as multi_f5:
            for filename in input_files:
                try:
                    with Fast5File(filename, 'r') as f5_input:
                        read = f5_input.get_read(f5_input.read_id)
                        multi_f5.add_existing_read(
                            read, target_compression=target_compression)
                    results.append(os.path.basename(filename))
                except Fast5FileTypeError as e:
                    logger.error(
                        "{}: Cannot input MultiRead files to single_to_multi: '{}'"
                        "".format(e, filename),
                        exc_info=exc_info)
                    raise
                except Exception as e:
                    logger.error(
                        "{}\n\tFailed to add single read file: '{}' to '{}'"
                        "".format(e, filename, output_file),
                        exc_info=exc_info)

    except Fast5FileTypeError:
        raise
    except Exception as e:
        logger.error("{}\n\tFailed to write to MultiRead file: {}"
                     "".format(e, output_file),
                     exc_info=exc_info)
    return results, output_file
Esempio n. 12
0
 def test_001_get_latest_analysis(self):
     test_file = os.path.join(test_data, 'basecall_2d_file_v1.0.fast5')
     with Fast5File(test_file, mode='r') as fh:
         group_name = fh.get_latest_analysis('Basecall_2D')
         self.assertEqual('Basecall_2D_000', group_name)
         # Test a non-existent group.
         group_name = fh.get_latest_analysis('Garbage_5D')
         self.assertEqual(None, group_name)
Esempio n. 13
0
 def test_add_read_to_multi(self):
     with Fast5File(os.path.join(test_data, "single_reads", "read0.fast5"), 'r') as single_fast5, \
             MultiFast5File(self.generate_temp_filename(), 'w') as multi_out:
         multi_out.add_existing_read(single_fast5)
         expected_raw = single_fast5.get_raw_data()
         actual_raw = multi_out.get_read(
             single_fast5.get_read_id()).get_raw_data()
         self.assertTrue(numpy.array_equal(actual_raw, expected_raw))
Esempio n. 14
0
 def test_single_to_multi(self):
     input_file = os.path.join(test_data, "single_read_analyses",
                               "read.fast5")
     output_file = self.generate_temp_filename()
     with Fast5File(input_file, 'r') as input_f5, \
             EmptyFast5(output_file, 'a') as output_f5:
         compress_single_read(output_f5, input_f5, VBZ, sanitize=True)
     self._test(input_file, output_file, 'single')
 def test_002_events_only(self):
     fname = self.generate_temp_filename()
     with Fast5File(fname, mode='w') as fh:
         fh.add_channel_info({
             'channel_number': 1,
             'sampling_rate': 4000,
             'digitisation': 8192,
             'range': 819.2,
             'offset': 0
         })
         fh.add_read(12, 'unique_snowflake', 10000, 1000, 0, 120.75)
         with EventDetectionTools(fh,
                                  group_name='EventDetection_000',
                                  meta={'name': 'test'}) as evdet:
             data = np.zeros(100,
                             dtype=[('start', int), ('length', int),
                                    ('mean', float), ('stdv', float)])
             data['start'][2] = 10010
             data['start'][46] = 10470
             data['length'][46] = 10
             data['start'][53] = 10520
             data['start'][97] = 10960
             data['length'][97] = 20
             read_attrs = {'read_number': 12}
             evdet.set_event_data(data, read_attrs)
         attrs = {
             'name': 'test',
             'version': 0,
             'time_stamp': 'just now',
             'event_detection': 'Analyses/EventDetection_000'
         }
         fh.add_analysis('segmentation', 'Segmentation_000', attrs)
         segment_data = {
             'has_template': 1,
             'has_complement': 1,
             'start_event_template': 2,
             'end_event_template': 47,
             'start_event_complement': 53,
             'end_event_complement': 98
         }
         fh.set_summary_data('Segmentation_000', 'segmentation',
                             segment_data)
         with SegmentationTools(fh,
                                group_name='Segmentation_000') as segment:
             results = segment.get_results()
             self.assertDictEqual(
                 {
                     'has_template': True,
                     'has_complement': True,
                     'start_event_template': 2,
                     'end_event_template': 47,
                     'start_event_complement': 53,
                     'end_event_complement': 98,
                     'first_sample_template': 10,
                     'duration_template': 470,
                     'first_sample_complement': 520,
                     'duration_complement': 460
                 }, results)
Esempio n. 16
0
def try_convert_read(input_file, output_handle):
    with Fast5File(input_file, 'r') as single_f5:
        file_type = check_file_type(single_f5)
        if file_type != SINGLE_READ:
            raise Fast5FileTypeError(
                "Could not convert Single->Multi for file type '{}' with path '{}'"
                "".format(file_type, input_file))
        add_single_read_to_multi_fast5(output_handle, single_f5)
        return os.path.basename(input_file)
Esempio n. 17
0
 def test_001_put_and_retrieve(self):
     fname = os.path.join(self.save_path, 'test_file.fast5')
     dtypes = [('template', int), ('complement', int)]
     data1 = np.zeros(10, dtype=dtypes)
     data1['template'] = [0, 1, 2, 2, 3, 4, 5, 6, 7, 8]
     data1['complement'] = [9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
     if py3:
         dtypes.append(('model_state', '<U5'))
     else:
         dtypes.append(('model_state', '|S5'))
     data2 = np.zeros(10, dtype=dtypes)
     data2['template'] = data1['template']
     data2['complement'] = data1['complement']
     data2['model_state'] = [
         'AAAAA', 'AAAAT', 'AAATC', 'AAATC', 'ATCCG', 'TCCGT', 'CCGTT',
         'CGTTA', 'CGTTA', 'GTTAC'
     ]
     seq = 'AAAAATCCGTTAC'
     qstring = 'blahblahblahb'
     with Fast5File(fname, mode='w') as fh:
         fh.add_channel_info({
             'channel_number': 1,
             'sampling_rate': 4000,
             'digitisation': 8192,
             'range': 819.2,
             'offset': 0
         })
         fh.add_read(12, 'unique_snowflake', 12345, 1000, 0, 120.75)
         attrs = {'name': 'test', 'version': 0, 'time_stamp': 'just now'}
         fh.add_analysis('basecall_2d', 'Basecall_2D_000', attrs)
         with Basecall2DTools(fh, group_name='Basecall_2D_000') as basecall:
             basecall.add_prior_alignment(data1)
             basecall.add_2d_call_alignment(data2)
             basecall.add_called_sequence('test_2d', seq, qstring)
     with Fast5File(fname, mode='r') as fh:
         with Basecall2DTools(fh, group_name='Basecall_2D_000') as basecall:
             hp_align = basecall.get_prior_alignment()
             np.testing.assert_array_equal(hp_align, data1)
             bc2d = basecall.get_2d_call_alignment()
             np.testing.assert_array_equal(bc2d, data2)
             n, s, q = basecall.get_called_sequence()
             self.assertEqual(n, 'test_2d')
             self.assertEqual(s, seq)
             self.assertEqual(q, qstring)
Esempio n. 18
0
def extract_signal(fn):
    """
    Extract raw data from the fast5 file.
    Important parameter could be scale
    If True, returns scaled floating point values in pA
    if False, returns raw DAQ values as 16 bit integers
    """
    f = Fast5File(fn, "r")
    print("{}\t{}".format(
        fn, ','.join([str(i) for i in list(f.get_raw_data(scale=False))])))
 def test_001_raw_only(self):
     fname = self.generate_temp_filename()
     with Fast5File(fname, mode='w') as fh:
         fh.add_channel_info({
             'channel_number': 1,
             'sampling_rate': 4000,
             'digitisation': 8192,
             'range': 819.2,
             'offset': 0
         })
         fh.add_read(12, 'unique_snowflake', 12345, 1000, 0, 120.75)
         raw = np.empty(1000, dtype=np.int16)
         raw[:] = range(1000)
         fh.add_raw_data(raw)
         attrs = {'name': 'test', 'version': 0, 'time_stamp': 'just now'}
         fh.add_analysis('segmentation', 'Segmentation_000', attrs)
         segment_data = {
             'has_template': 1,
             'has_complement': 1,
             'first_sample_template': 10,
             'duration_template': 470,
             'first_sample_complement': 520,
             'duration_complement': 460
         }
         fh.set_summary_data('Segmentation_000', 'segmentation',
                             segment_data)
         with SegmentationTools(fh,
                                group_name='Segmentation_000') as segment:
             results = segment.get_results()
             self.assertDictEqual(
                 {
                     'has_template': True,
                     'has_complement': True,
                     'first_sample_template': 10,
                     'duration_template': 470,
                     'first_sample_complement': 520,
                     'duration_complement': 460
                 }, results)
             temp_raw = segment.get_raw_data('template', scale=False)
             np.testing.assert_array_equal(temp_raw, raw[10:480])
             comp_raw = segment.get_raw_data('complement', scale=False)
             np.testing.assert_array_equal(comp_raw, raw[520:980])
             temp_raw, comp_raw = segment.get_raw_data('both', scale=False)
             np.testing.assert_array_equal(temp_raw, raw[10:480])
             np.testing.assert_array_equal(comp_raw, raw[520:980])
             temp_raw, comp_raw = segment.get_raw_data('both', scale=True)
             scaled_temp = raw[10:480] * 0.1
             scaled_comp = raw[520:980] * 0.1
             np.testing.assert_array_almost_equal(temp_raw,
                                                  scaled_temp,
                                                  decimal=5)
             np.testing.assert_array_almost_equal(comp_raw,
                                                  scaled_comp,
                                                  decimal=5)
 def test_003_add_analysis_subgroup(self):
     fname = self.generate_temp_filename()
     with Fast5File(fname, mode='w') as fast5:
         fast5.add_analysis('test', 'Test_000', attrs={})
         fast5.add_analysis_subgroup('Test_000',
                                     'Sub1',
                                     attrs={
                                         'foo': 'bar',
                                         'monkey': 1
                                     })
         att_in = fast5.get_analysis_attributes('Test_000/Sub1')
         self.assertEqual({'foo': 'bar', 'monkey': 1}, att_in)
Esempio n. 21
0
    def __init__(self,
                 source,
                 mode='r',
                 group_name=None,
                 meta=None,
                 config=None):
        """ Create a new analysis_tools object.

        :param source: Either an open Fast5File object, or a filename
            of a fast5 file.
        :param mode: The open mode (r or r+). Only if a filename is used
            for the source argument.
        :param group_name: The specific analysis instance you are interested in.
        :param meta: Metadata for a new analysis.
        :param config: Configuration data for a new analysis.

        To create a new analysis group, provide a group name that
        does not already exist, and an optional dictionary with the metadata.
        The following fields are recommended, as a minimum:

            * name - The name of the software used.
            * time_stamp - The time at which the analysis was performed.

        If the group name already exists, the "meta" parameter is ignored. If
        the specified group has a "component" attribute, and its value does not
        match self.analysis_id, an exception will be thrown.
        """
        if isinstance(source, Fast5File):
            self.filename = source.filename  # Useful for debugging purposes
            self.handle = source
            self.close_handle_when_done = False
        elif isinstance(source, str):
            self.filename = source  # Useful for debugging purposes
            self.handle = Fast5File(source, mode)
            self.close_handle_when_done = True
        else:
            raise KeyError(
                'Unrecognized type for argument "source": {}'.format(source))
        if group_name is None:
            group_name = self.handle.get_latest_analysis(self.group_id)
            if group_name is None:
                raise KeyError('No group: {} found in file: {}'.format(
                    group_name, self.filename))
        self.group_name = group_name
        attrs = self.handle.get_analysis_attributes(group_name)

        if attrs is None:
            self.handle.add_analysis(self.analysis_id, group_name, meta,
                                     config)
            attrs = self.handle.get_analysis_attributes(group_name)
        if 'component' in attrs and attrs['component'] != self.analysis_id:
            raise ValueError('Component {} is not {}'.format(
                attrs.get('component'), self.analysis_id))
Esempio n. 22
0
 def test_003_add_analysis_subgroup(self):
     fname = os.path.join(self.save_path, 'group_test.fast5')
     with Fast5File(fname, mode='w') as fast5:
         fast5.add_analysis('test', 'Test_000', attrs={})
         fast5.add_analysis_subgroup('Test_000',
                                     'Sub1',
                                     attrs={
                                         'foo': 'bar',
                                         'monkey': 1
                                     })
         att_in = fast5.get_analysis_attributes('Test_000/Sub1')
         self.assertEqual({'foo': 'bar', 'monkey': 1}, att_in)
Esempio n. 23
0
 def __init__(self,
              source,
              mode='r',
              group_name=None,
              meta=None,
              config=None):
     """ Create a new alignment tools object.
     
     :param source: Either an open Fast5File object, or a filename
         of a fast5 file.
     :param mode: The open mode (r or r+). Only if a filename is used
         for the source argument.
     :param group_name: The specific alignment analysis instance
         you are interested in.
     :param meta: Metadata for a new alignment analysis.
     :param config: Configuration data for a new alignment analysis.
     
     To create a new alignment analysis, provide a group name that
     does not already exist, and an optional dictionary with the metadata.
     The following fields are recommended, as a minimum:
         
         * name - The name of the basecall software used.
         * time_stamp - The time at which the analysis was performed.
     
     If the group name already exists, the "meta" parameter is ignored. If
     the specified group has a "component" attribute, and its value is not
     "alignment", an exception will be thrown.
     """
     if isinstance(source, Fast5Read):
         self.handle = source
         self.close_handle_when_done = False
     elif isinstance(source, str):
         self.handle = Fast5File(source, mode)
         self.close_handle_when_done = True
     else:
         raise Exception('Unrecognized type for argument "source".')
     if group_name is None:
         group_name = self.handle.get_latest_analysis('Alignment')
         if group_name is None:
             raise Exception('No Alignment analysis group found in file.')
     self.group_name = group_name
     attrs = self.handle.get_analysis_attributes(group_name)
     if attrs is None:
         if meta is None:
             meta = {}
         self.handle.add_analysis('alignment', group_name, meta, config)
         attrs = self.handle.get_analysis_attributes(group_name)
     if ('component' in attrs and attrs['component']
             not in ['alignment', 'calibration_strand']):
         self.close()
         raise Exception(
             'Analysis does not appear to be an alignment component.')
Esempio n. 24
0
 def test_002_add_analysis_group(self):
     fname = os.path.join(self.save_path, 'group_test.fast5')
     with Fast5File(fname, mode='w') as fast5:
         att = {'foo': 1, 'bar': 2}
         fast5.add_analysis('test', 'Test_000', att)
         att_in = fast5.get_analysis_attributes('Test_000')
         att['component'] = 'test'
         self.assertEqual(att, att_in)
         att2 = {'Bob': 'your uncle'}
         fast5.add_analysis_attributes('Test_000', att2)
         att_in = fast5.get_analysis_attributes('Test_000')
         att.update(att2)
         self.assertEqual(att, att_in)
 def test_002_add_analysis_group(self):
     fname = self.generate_temp_filename()
     with Fast5File(fname, mode='w') as fast5:
         att = {'foo': 1, 'bar': 2}
         fast5.add_analysis('test', 'Test_000', att)
         att_in = fast5.get_analysis_attributes('Test_000')
         att['component'] = 'test'
         self.assertEqual(att, att_in)
         att2 = {'Bob': 'your uncle'}
         fast5.add_analysis_attributes('Test_000', att2)
         att_in = fast5.get_analysis_attributes('Test_000')
         att.update(att2)
         self.assertEqual(att, att_in)
Esempio n. 26
0
    def test_check_single_read_folder(self):
        input_folder = os.path.join(test_data, 'single_reads')
        compression_results = list(check_compression(input_folder, recursive=False,
                                                     follow_symlinks=False, check_all_reads=False))

        ## expected
        expected_results = []
        for input_file in os.listdir(input_folder):
            input_path = os.path.join(input_folder, input_file)
            with Fast5File(input_path, 'r') as f5:
                expected_results.append((GZIP, f5.read_id, input_path))

        self.assertTrue(numpy.array_equal(expected_results, compression_results))
Esempio n. 27
0
def compress_file(input_file, output_file, target_compression):
    try:
        makedirs(os.path.dirname(output_file), exist_ok=True)
        if is_multi_read(input_file):
            with MultiFast5File(input_file, 'r') as input_f5, MultiFast5File(output_file, 'a') as output_f5:
                for read in input_f5.get_reads():
                    compress_read_from_multi(output_f5, read, target_compression)
        else:
            with Fast5File(input_file, 'r') as input_f5, \
                    EmptyFast5(output_file, 'a') as output_f5:
                compress_read_from_single(output_f5, input_f5, target_compression)
    except Exception as e:
        # Error raised in Pool.aync will be lost so we explicitly print them.
        logging.exception(e)
        raise
Esempio n. 28
0
    def test_correct_type(self):
        single_read_path = os.path.join(test_data, "single_reads", "read0.fast5")
        single_read_id = Fast5File(single_read_path).get_read_id()
        with get_fast5_file(single_read_path) as f5:
            self.assertEqual(type(f5), Fast5File)
            self.assertEqual(check_file_type(f5), SINGLE_READ)
            self.assertEqual(len(f5.get_read_ids()), 1)
            self.assertEqual(single_read_id, f5.get_read_ids()[0])
            self.get_raw(f5)

        multi_read_path = os.path.join(test_data, "multi_read", "batch_0.fast5")
        with get_fast5_file(multi_read_path) as f5:
            self.assertEqual(type(f5), MultiFast5File)
            self.assertEqual(check_file_type(f5), MULTI_READ)
            self.assertTrue(len(f5.get_read_ids()) >= 1)
            self.get_raw(f5)
Esempio n. 29
0
 def _start_new_file(self, strand):
     self._current_file = strand['read_attrs']['read_number']
     self._strand_counter = 0
     self._current_channel = strand['channel']
     channel_info = {'channel_number': strand['channel'],
                     'offset': strand['offset'],
                     'range': strand['range'],
                     'digitisation': strand['digitisation'],
                     'sampling_rate': strand['sampling_rate']}
     fname = '{}_ch{}_read{}_strand.fast5'.format(self._basename, strand['channel'],
                                                  self._current_file)
     full_path = os.path.join(self._path, fname)
     with Fast5File(full_path, 'w') as fh:
         fh.set_tracking_id(self._tracking_id)
         fh.add_context_tags(self._context_tags)
         fh.add_channel_info(channel_info)
Esempio n. 30
0
 def test_002_read_summary_data(self):
     test_file = os.path.join(test_data, 'telemetry_test.fast5')
     summary = Fast5File.read_summary_data(test_file, 'segmentation')
     expected = {
         'filename':
         'telemetry_test.fast5',
         'channel_id': {
             u'channel_number': 129,
             u'range': 10000.0,
             u'sampling_rate': 5000,
             u'digitisation': 10000,
             u'offset': 0.0
         },
         'reads': [{
             'duration': 755.79559999999947,
             'start_time': 4034.6948000000002,
             'read_id': 'telemetry_test.fast5',
             'start_mux': 1,
             'read_number': 199
         }],
         'tracking_id': {
             u'device_id': '445444'
         },
         'data': {
             u'split_hairpin': {
                 u'median_sd_comp': 1.4719812720343015,
                 u'range_comp': 3.965029408419298,
                 u'median_level_temp': 88.66729546440973,
                 u'duration_temp': 327.82499999999936,
                 u'num_temp': 10773,
                 u'num_events': 24091,
                 u'median_sd_temp': 1.328457722537222,
                 u'range_temp': 4.01780031383548,
                 u'median_level_comp': 89.8680971725336,
                 u'split_index': 10903,
                 u'duration_comp': 422.3665999999994,
                 u'num_comp': 13158
             },
             u'empty': {}
         },
         'software': {
             u'time_stamp': '2014-Jun-04 16:28:31',
             u'version': '0.5.4',
             'component': u'Validation'
         }
     }
     self.assertEqual(expected, summary)