def reads(self): """Yield `Reads` obtained from delta splitting.""" with BulkFast5(self.fast5) as fh: # load channel, tracking and context meta so we don't need fast5 # later to e.g. write fast5 files. self.load_fast5_meta(fh) events = fh.get_events(self.channel) bounds = read_bounds_from_delta(events['mean'], delta=self.delta, look_back_n=self.look_back) for event_indices in bounds: read_events = None if self.with_events: read_events = events[event_indices[0]:event_indices[1]] read_events = self._convert_event_fields( read_events, fh.sample_rate) meta = { 'start_time': read_events[0]['start'], 'duration': read_events[-1]['start'] + read_events[-1]['length'] - read_events[0]['start'], 'num_events': event_indices[1] - event_indices[0], 'start_event': event_indices[0], 'end_event': event_indices[1], } self._add_channel_states(fh, meta) if set(meta.keys()) != set(self.meta_keys): extra = set(meta.keys()) - set(self.meta_keys) missing = set(self.meta_keys) - set(meta.keys()) raise ValueError( '{} about to yield read with unexpected metrics. ' 'Extra: {}. Missing {}.'.format( self.__class__, extra, missing)) read_raw = None if self.with_raw: read_raw = fh.get_raw( self.channel, times=(meta['start_time'], meta['start_time'] + meta['duration']), use_scaling=False) if meta['start_time'] > self.max_time: raise StopIteration yield Read(events=read_events, raw=read_raw, meta=meta, channel_meta=self.channel_meta, context_meta=self.context_meta, tracking_meta=self.tracking_meta)
def _create_read_obj(self, reads, f5): read_meta = {} read_meta['start_time'] = reads[0]['start_time'] read_meta['duration'] = reads[-1]['start_time'] + reads[-1][ 'duration'] - reads[0]['start_time'] read_meta['start_event'] = reads[0]['start_event'] read_meta['end_event'] = reads[-1]['end_event'] read_meta['initial_n_reads'] = len(reads) read_meta['initial_classification'] = reads[0]['class'] if read_meta['initial_classification'] not in self.non_block_classes: read_meta['initial_classification'] = 'unproductive' read_events = None if self.with_events: read_events = f5.get_events( self.channel, event_indices=(read_meta['start_event'], read_meta['end_event'])) read_events = self._convert_event_fields(read_events, f5.sample_rate) read_raw = None if self.with_raw: read_raw = f5.get_raw( self.channel, times=(read_meta['start_time'], read_meta['start_time'] + read_meta['duration']), use_scaling=False) if self.with_states: # add mux, channel states from fast5 self._add_channel_states(f5, read_meta) return Read(events=read_events, raw=read_raw, meta=read_meta, channel_meta=self.channel_meta, context_meta=self.context_meta, tracking_meta=self.tracking_meta)
def reads(self): with BulkFast5(self.fast5) as fh: # load channel, tracking and context meta so we don't need fast5 # later to e.g. write fast5 files. self.load_fast5_meta(fh) for chunk in iterators.blocker(fh.get_events(self.channel), self.interval): yield Read(events=chunk)
def reads(self): """Yield `Reads` with various meta data provided by MinKnow.""" if self.with_events or self.with_raw or self.with_states: f5 = BulkFast5(self.fast5) # load channel, tracking and context meta so we don't need fast5 # later to e.g. write fast5 files. self.load_fast5_meta(f5) else: # initialise fast5/channel meta variables so we have a generic call # to Read constructor even when we don't have the fast5 self.channel_meta = None self.context_meta = None self.tracking_meta = None if self.with_events: if set(['start_event', 'end_event']).issubset(self.meta_keys): get_events = lambda meta: f5.get_events( self.channel, event_indices=(meta['start_event'], meta['end_event'])) else: logger.warn('Reading events using timings, this will be slow.') get_events = lambda meta: f5.get_events( self.channel, times=(meta['start_time'], meta['start_time'] + meta[ 'duration'])) for meta in self.iterate_input_file(): read_events = None if self.with_events: read_events = get_events(meta) read_events = self._convert_event_fields( read_events, f5.sample_rate) read_raw = None if self.with_raw: read_raw = f5.get_raw( self.channel, times=(meta['start_time'], meta['start_time'] + meta['duration']), use_scaling=False) if self.with_states: # add mux, channel states from fast5 self._add_channel_states(f5, meta) if meta['start_time'] > self.max_time: raise StopIteration yield Read(events=read_events, raw=read_raw, meta=meta, channel_meta=self.channel_meta, context_meta=self.context_meta, tracking_meta=self.tracking_meta) if self.with_events or self.with_raw or self.with_states: f5.close()
def reads(self): with BulkFast5(self.fast5) as fh: # load channel, tracking and context meta so we don't need fast5 # later to e.g. write fast5 files. self.load_fast5_meta(fh) events = fh.get_events(self.channel) # convert event fields 'start' and 'length' from raw indices into times for col in ['start', 'length']: times = events[col] / fh.sample_rate events = drop_fields(events, col, usemask=False) events = append_fields(events, col, times, usemask=False) read_bound_event_indices = np.where( np.ediff1d(( events['mean'] < self.threshold).astype(int)) != 0)[0] # first event should be start of first read read_bound_event_indices = np.insert(read_bound_event_indices + 1, 0, 0) # pad end with last event index + 1. read_bound_event_indices = np.append(read_bound_event_indices, len(events) - 1) for start_event, next_start_event in iterators.window( read_bound_event_indices, 2): start_t, end_t = events[start_event]['start'], events[ next_start_event]['start'] meta = { 'start_time': start_t, 'duration': end_t - start_t, 'pore_level': self.pore_level, 'capture_level': self.capture_level, 'threshold': self.threshold, } self._add_channel_states(fh, meta) read_raw = None if self.with_raw: read_raw = fh.get_raw(self.channel, times=(start_t, end_t), use_scaling=False) if meta['start_time'] > self.max_time: raise StopIteration yield Read(events=read_events, raw=read_raw, meta=meta, channel_meta=self.channel_meta, context_meta=self.context_meta, tracking_meta=self.tracking_meta)
def reads(self): spread = 1 start = 0.0 for mean in np.random.choice(range(20, 50, 5), size=self.n_reads): read = np.empty(self.read_length, dtype=[('mean', float), ('stdv', float), ('start', float), ('length', float)]) read['mean'] = np.random.normal(mean, spread, size=self.read_length) read['stdv'] = 1.0 read['length'] = 1.0 read['start'] = np.linspace(start, start + self.read_length, num=self.read_length, endpoint=False) yield Read(events=read) start += read['start'][-1] + read['length'][-1]
def reads(self): """Yield `Reads` with various meta data provided by MinKnow.""" with BulkFast5(self.fast5) as fh: # load channel, tracking and context meta so we don't need fast5 # later to e.g. write fast5 files. self.load_fast5_meta(fh) # use read classification from the penultimate block of multi-block reads for read in fh.get_reads(self.channel, penultimate_class=True): event_indices = (read['event_index_start'], read['event_index_end']) read_events = None if self.with_events: read_events = fh.get_events(self.channel, event_indices=event_indices) read_events = self._convert_event_fields( read_events, fh.sample_rate) # map new dict keys to read columns meta_keys = [('read_id', 'read_id'), ('initial_classification', 'classification'), ('median_current', 'median'), ('median_sd', 'median_sd'), ('range_current', 'range'), ('median_dwell', 'median_dwell'), ('start_time', 'read_start'), ('duration', 'read_length'), ('drift', 'drift')] meta = {key: read[col] for key, col in meta_keys} divide = ('median_dwell', 'duration', 'start_time') for name in divide: meta[name] = float(meta[name]) / fh.sample_rate meta.update({ 'num_events': event_indices[1] - event_indices[0], 'start_event': event_indices[0], 'end_event': event_indices[1], }) self._add_channel_states(fh, meta) if set(meta.keys()) != set(self.meta_keys): extra = set(meta.keys()) - set(self.meta_keys) missing = set(self.meta_keys) - set(meta.keys()) raise ValueError( '{} about to yield read with unexpected metrics. ' 'Extra: {}. Missing {}.'.format( self.__class__, extra, missing)) read_raw = None if self.with_raw: read_raw = fh.get_raw( self.channel, times=(meta['start_time'], meta['start_time'] + meta['duration']), use_scaling=False) if meta['start_time'] > self.max_time: raise StopIteration yield Read(events=read_events, raw=read_raw, meta=meta, channel_meta=self.channel_meta, context_meta=self.context_meta, tracking_meta=self.tracking_meta)