예제 #1
0
    def reads(self):
        """Yield `Reads` obtained from delta splitting."""

        with BulkFast5(self.fast5) as fh:

            # load channel, tracking and context meta so we don't need fast5
            # later to e.g. write fast5 files.
            self.load_fast5_meta(fh)

            events = fh.get_events(self.channel)

            bounds = read_bounds_from_delta(events['mean'],
                                            delta=self.delta,
                                            look_back_n=self.look_back)
            for event_indices in bounds:
                read_events = None
                if self.with_events:
                    read_events = events[event_indices[0]:event_indices[1]]
                    read_events = self._convert_event_fields(
                        read_events, fh.sample_rate)

                meta = {
                    'start_time':
                    read_events[0]['start'],
                    'duration':
                    read_events[-1]['start'] + read_events[-1]['length'] -
                    read_events[0]['start'],
                    'num_events':
                    event_indices[1] - event_indices[0],
                    'start_event':
                    event_indices[0],
                    'end_event':
                    event_indices[1],
                }
                self._add_channel_states(fh, meta)

                if set(meta.keys()) != set(self.meta_keys):
                    extra = set(meta.keys()) - set(self.meta_keys)
                    missing = set(self.meta_keys) - set(meta.keys())
                    raise ValueError(
                        '{} about to yield read with unexpected metrics. '
                        'Extra: {}. Missing {}.'.format(
                            self.__class__, extra, missing))
                read_raw = None
                if self.with_raw:
                    read_raw = fh.get_raw(
                        self.channel,
                        times=(meta['start_time'],
                               meta['start_time'] + meta['duration']),
                        use_scaling=False)

                if meta['start_time'] > self.max_time:
                    raise StopIteration

                yield Read(events=read_events,
                           raw=read_raw,
                           meta=meta,
                           channel_meta=self.channel_meta,
                           context_meta=self.context_meta,
                           tracking_meta=self.tracking_meta)
예제 #2
0
    def _create_read_obj(self, reads, f5):
        read_meta = {}
        read_meta['start_time'] = reads[0]['start_time']
        read_meta['duration'] = reads[-1]['start_time'] + reads[-1][
            'duration'] - reads[0]['start_time']
        read_meta['start_event'] = reads[0]['start_event']
        read_meta['end_event'] = reads[-1]['end_event']
        read_meta['initial_n_reads'] = len(reads)
        read_meta['initial_classification'] = reads[0]['class']
        if read_meta['initial_classification'] not in self.non_block_classes:
            read_meta['initial_classification'] = 'unproductive'

        read_events = None
        if self.with_events:
            read_events = f5.get_events(
                self.channel,
                event_indices=(read_meta['start_event'],
                               read_meta['end_event']))
            read_events = self._convert_event_fields(read_events,
                                                     f5.sample_rate)
        read_raw = None
        if self.with_raw:
            read_raw = f5.get_raw(
                self.channel,
                times=(read_meta['start_time'],
                       read_meta['start_time'] + read_meta['duration']),
                use_scaling=False)
        if self.with_states:  # add mux, channel states from fast5
            self._add_channel_states(f5, read_meta)
        return Read(events=read_events,
                    raw=read_raw,
                    meta=read_meta,
                    channel_meta=self.channel_meta,
                    context_meta=self.context_meta,
                    tracking_meta=self.tracking_meta)
예제 #3
0
 def reads(self):
     with BulkFast5(self.fast5) as fh:
         # load channel, tracking and context meta so we don't need fast5
         # later to e.g. write fast5 files.
         self.load_fast5_meta(fh)
         for chunk in iterators.blocker(fh.get_events(self.channel),
                                        self.interval):
             yield Read(events=chunk)
예제 #4
0
    def reads(self):
        """Yield `Reads` with various meta data provided by MinKnow."""

        if self.with_events or self.with_raw or self.with_states:
            f5 = BulkFast5(self.fast5)
            # load channel, tracking and context meta so we don't need fast5
            # later to e.g. write fast5 files.
            self.load_fast5_meta(f5)
        else:
            # initialise fast5/channel meta variables so we have a generic call
            # to Read constructor even when we don't have the fast5
            self.channel_meta = None
            self.context_meta = None
            self.tracking_meta = None

        if self.with_events:
            if set(['start_event', 'end_event']).issubset(self.meta_keys):
                get_events = lambda meta: f5.get_events(
                    self.channel,
                    event_indices=(meta['start_event'], meta['end_event']))
            else:
                logger.warn('Reading events using timings, this will be slow.')
                get_events = lambda meta: f5.get_events(
                    self.channel,
                    times=(meta['start_time'], meta['start_time'] + meta[
                        'duration']))

        for meta in self.iterate_input_file():
            read_events = None
            if self.with_events:
                read_events = get_events(meta)
                read_events = self._convert_event_fields(
                    read_events, f5.sample_rate)
            read_raw = None
            if self.with_raw:
                read_raw = f5.get_raw(
                    self.channel,
                    times=(meta['start_time'],
                           meta['start_time'] + meta['duration']),
                    use_scaling=False)
            if self.with_states:  # add mux, channel states from fast5
                self._add_channel_states(f5, meta)

            if meta['start_time'] > self.max_time:
                raise StopIteration

            yield Read(events=read_events,
                       raw=read_raw,
                       meta=meta,
                       channel_meta=self.channel_meta,
                       context_meta=self.context_meta,
                       tracking_meta=self.tracking_meta)

        if self.with_events or self.with_raw or self.with_states:
            f5.close()
예제 #5
0
    def reads(self):
        with BulkFast5(self.fast5) as fh:

            # load channel, tracking and context meta so we don't need fast5
            # later to e.g. write fast5 files.
            self.load_fast5_meta(fh)

            events = fh.get_events(self.channel)
            # convert event fields 'start' and 'length' from raw indices into times
            for col in ['start', 'length']:
                times = events[col] / fh.sample_rate
                events = drop_fields(events, col, usemask=False)
                events = append_fields(events, col, times, usemask=False)

            read_bound_event_indices = np.where(
                np.ediff1d((
                    events['mean'] < self.threshold).astype(int)) != 0)[0]
            # first event should be start of first read
            read_bound_event_indices = np.insert(read_bound_event_indices + 1,
                                                 0, 0)
            # pad end with last event index + 1.
            read_bound_event_indices = np.append(read_bound_event_indices,
                                                 len(events) - 1)
            for start_event, next_start_event in iterators.window(
                    read_bound_event_indices, 2):
                start_t, end_t = events[start_event]['start'], events[
                    next_start_event]['start']
                meta = {
                    'start_time': start_t,
                    'duration': end_t - start_t,
                    'pore_level': self.pore_level,
                    'capture_level': self.capture_level,
                    'threshold': self.threshold,
                }
                self._add_channel_states(fh, meta)
                read_raw = None
                if self.with_raw:
                    read_raw = fh.get_raw(self.channel,
                                          times=(start_t, end_t),
                                          use_scaling=False)

                if meta['start_time'] > self.max_time:
                    raise StopIteration

                yield Read(events=read_events,
                           raw=read_raw,
                           meta=meta,
                           channel_meta=self.channel_meta,
                           context_meta=self.context_meta,
                           tracking_meta=self.tracking_meta)
예제 #6
0
 def reads(self):
     spread = 1
     start = 0.0
     for mean in np.random.choice(range(20, 50, 5), size=self.n_reads):
         read = np.empty(self.read_length,
                         dtype=[('mean', float), ('stdv', float),
                                ('start', float), ('length', float)])
         read['mean'] = np.random.normal(mean,
                                         spread,
                                         size=self.read_length)
         read['stdv'] = 1.0
         read['length'] = 1.0
         read['start'] = np.linspace(start,
                                     start + self.read_length,
                                     num=self.read_length,
                                     endpoint=False)
         yield Read(events=read)
         start += read['start'][-1] + read['length'][-1]
예제 #7
0
    def reads(self):
        """Yield `Reads` with various meta data provided by MinKnow."""
        with BulkFast5(self.fast5) as fh:

            # load channel, tracking and context meta so we don't need fast5
            # later to e.g. write fast5 files.
            self.load_fast5_meta(fh)

            # use read classification from the penultimate block of multi-block reads
            for read in fh.get_reads(self.channel, penultimate_class=True):
                event_indices = (read['event_index_start'],
                                 read['event_index_end'])

                read_events = None
                if self.with_events:
                    read_events = fh.get_events(self.channel,
                                                event_indices=event_indices)
                    read_events = self._convert_event_fields(
                        read_events, fh.sample_rate)

                # map new dict keys to read columns
                meta_keys = [('read_id', 'read_id'),
                             ('initial_classification', 'classification'),
                             ('median_current', 'median'),
                             ('median_sd', 'median_sd'),
                             ('range_current', 'range'),
                             ('median_dwell', 'median_dwell'),
                             ('start_time', 'read_start'),
                             ('duration', 'read_length'), ('drift', 'drift')]
                meta = {key: read[col] for key, col in meta_keys}
                divide = ('median_dwell', 'duration', 'start_time')
                for name in divide:
                    meta[name] = float(meta[name]) / fh.sample_rate

                meta.update({
                    'num_events': event_indices[1] - event_indices[0],
                    'start_event': event_indices[0],
                    'end_event': event_indices[1],
                })
                self._add_channel_states(fh, meta)

                if set(meta.keys()) != set(self.meta_keys):
                    extra = set(meta.keys()) - set(self.meta_keys)
                    missing = set(self.meta_keys) - set(meta.keys())
                    raise ValueError(
                        '{} about to yield read with unexpected metrics. '
                        'Extra: {}. Missing {}.'.format(
                            self.__class__, extra, missing))
                read_raw = None
                if self.with_raw:
                    read_raw = fh.get_raw(
                        self.channel,
                        times=(meta['start_time'],
                               meta['start_time'] + meta['duration']),
                        use_scaling=False)

                if meta['start_time'] > self.max_time:
                    raise StopIteration

                yield Read(events=read_events,
                           raw=read_raw,
                           meta=meta,
                           channel_meta=self.channel_meta,
                           context_meta=self.context_meta,
                           tracking_meta=self.tracking_meta)