def get_remapping(sig_fn, dacs, scale_params, ref_seq, stride, read_id, r_to_q_poss, rl_cumsum, r_ref_pos, ref_out_info): read = fast5_interface.get_fast5_file(sig_fn, 'r').get_read(read_id) channel_info = dict(fast5utils.get_channel_info(read).items()) rd_factor = channel_info['range'] / channel_info['digitisation'] shift_from_pA = (scale_params[0] + channel_info['offset']) * rd_factor scale_from_pA = scale_params[1] * rd_factor read_attrs = dict(fast5utils.get_read_attributes(read).items()) # prepare taiyaki signal object sig = tai_signal.Signal(dacs=dacs) sig.channel_info = channel_info sig.read_attributes = read_attrs sig.offset = channel_info['offset'] sig.range = channel_info['range'] sig.digitisation = channel_info['digitisation'] path = np.full((dacs.shape[0] // stride) + 1, -1) # skip last value since this is where the two seqs end for ref_pos, q_pos in enumerate(r_to_q_poss[:-1]): # if the query position maps to the end of the mapping skip it if rl_cumsum[q_pos + r_ref_pos.q_trim_start] >= path.shape[0]: continue path[rl_cumsum[q_pos + r_ref_pos.q_trim_start]] = ref_pos remapping = tai_mapping.Mapping.from_remapping_path( sig, path, ref_seq, stride) try: remapping.add_integer_reference(ref_out_info.alphabet) except Exception: raise mh.MegaError('Invalid reference sequence encountered') return (remapping.get_read_dictionary(shift_from_pA, scale_from_pA, read_id), prepare_mapping_funcs.RemapResult.SUCCESS)
def get_remapping( sig_fn, dacs, scale_params, ref_seq, stride, read_id, r_to_q_poss, rl_cumsum, r_ref_pos, ref_out_info, ): read = fast5_io.get_read(sig_fn, read_id) channel_info = dict(fast5utils.get_channel_info(read).items()) read_params = { "trim_start": 0, "trim_end": 0, "shift": scale_params[0], "scale": scale_params[1], } sig = tai_signal.Signal( dacs=dacs, channel_info=channel_info, read_id=read_id, read_params=read_params, ) ref_to_sig = np.empty(len(ref_seq) + 1, dtype=np.int32) # skip last value since this is where the two seqs end for ref_pos, q_pos in enumerate(r_to_q_poss): ref_to_sig[ref_pos] = rl_cumsum[q_pos + r_ref_pos.q_trim_start] * stride try: int_ref = tai_mapping.SignalMapping.get_integer_reference( ref_seq, ref_out_info.alphabet_info.alphabet ) except Exception: raise mh.MegaError("Invalid reference sequence encountered") sig_mapping = tai_mapping.SignalMapping(ref_to_sig, int_ref, signalObj=sig) # annotate mod motifs if ref_out_info.ref_mods_all_motifs is not None: # annotate all mod base motif positions with alts int_ref = set_all_motif_mods(int_ref, ref_out_info.ref_mods_all_motifs) # set new Reference with mods annotated sig_mapping.Reference = int_ref return ( sig_mapping.get_read_dictionary(), prepare_mapping_funcs.RemapResult.SUCCESS, )
def __init__(self, read=None, dacs=None): """Loads data from read in fast5 file. If read is None and dacs is a np array then initialse the untrimmed_dacs to this array. (this allows testing with non-fast5 data) param read : an ont_fast5_api read object param dacs : np int array (only used if first param is None) """ if read is None: try: self.untrimmed_dacs = dacs.copy() except: raise Exception("Cannot initialise SignalWithMap object") self.offset = 0 self.range = 1 self.digitisation = 1 else: self.channel_info = { k: v for k, v in fast5utils.get_channel_info(read).items() } # channel_info contains attributes of the channel such as calibration parameters and sample rate self.read_attributes = { k: v for k, v in fast5utils.get_read_attributes(read).items() } # read_attributes includes read id, start time, and active mux #print("Channel info:",[(k,v) for k,v in self.channel_info.items()]) #print("Read attributes:",[(k,v) for k,v in self.read_attributes.items()]) # the sample number (counted from when the device was switched on) when the signal starts self.start_sample = self.read_attributes['start_time'] self.sample_rate = self.channel_info['sampling_rate'] # a unique key corresponding to this read self.read_id = self.read_attributes['read_id'].decode("utf-8") # digitised current levels. # this function returns a copy, not a reference. self.untrimmed_dacs = read.get_raw_data() # parameters to convert between DACs and picoamps self.range = self.channel_info['range'] self.offset = self.channel_info['offset'] self.digitisation = self.channel_info['digitisation'] # We want to allow trimming without mucking about with the original data # To start with, set trimming parameters to trim nothing self.signalstart = 0 # end is defined exclusively so that self.dacs[signalstart:signalend_exc] is the bit we want. self.signalend_exc = len(self.untrimmed_dacs)
def get_remapping(sig_fn, dacs, scale_params, ref_seq, stride, read_id, r_to_q_poss, rl_cumsum, r_ref_pos, ref_out_info): read = fast5_interface.get_fast5_file(sig_fn, 'r').get_read(read_id) channel_info = dict(fast5utils.get_channel_info(read).items()) rd_factor = channel_info['range'] / channel_info['digitisation'] read_params = { 'trim_start': 0, 'trim_end': 0, 'shift': (scale_params[0] + channel_info['offset']) * rd_factor, 'scale': scale_params[1] * rd_factor } sig = tai_signal.Signal(dacs=dacs, channel_info=channel_info, read_id=read_id, read_params=read_params) path = np.full((dacs.shape[0] // stride) + 1, -1) # skip last value since this is where the two seqs end for ref_pos, q_pos in enumerate(r_to_q_poss[:-1]): # if the query position maps to the end of the mapping skip it if rl_cumsum[q_pos + r_ref_pos.q_trim_start] >= path.shape[0]: continue path[rl_cumsum[q_pos + r_ref_pos.q_trim_start]] = ref_pos try: int_ref = tai_mapping.SignalMapping.get_integer_reference( ref_seq, ref_out_info.alphabet) except Exception: raise mh.MegaError('Invalid reference sequence encountered') sig_mapping = tai_mapping.SignalMapping.from_remapping_path( path, int_ref, stride, sig) # annotate mod motifs if ref_out_info.ref_mods_all_motifs is not None: # annotate all mod base motif positions with alts int_ref = set_all_motif_mods(int_ref, ref_out_info.ref_mods_all_motifs, ref_out_info.collapse_alphabet) # set new Reference with mods annotated sig_mapping.Reference = int_ref return (sig_mapping.get_read_dictionary(), prepare_mapping_funcs.RemapResult.SUCCESS)
def __init__(self, read=None, dacs=None, channel_info={ 'offset': 0, 'range': 1, 'digitisation': 1, 'sampling_rate': 4000 }, read_id=None, read_params={ 'trim_start': 0, 'trim_end': 0, 'shift': 0, 'scale': 1 }): """Constructor for Signal class. Loads data from read in fast5 file. If read is None and dacs is a np array then initialise the untrimmed_dacs to this array. (this allows testing with non-fast5 data) Args: read (ont_fast5_api read object) : the read data dacs (np int array) : (only used if first param is None) channel_info (dict) : containing keys: offset, range, digitisation, and sampling_rate. read_id (str): UUID read identifier read_params (dict): dictionary containing keys: trim_start, trim_end, shift, and scale (as returned from prepare_mapping_funcs.get_per_read_params_dict_from_tsv) Returns: new Signal object. """ if read is None: try: self.untrimmed_dacs = dacs.copy() except Exception: raise Exception("Cannot initialise Signal object") self.channel_info = channel_info self.read_id = read_id else: self.channel_info = dict(fast5utils.get_channel_info(read).items()) # UUID read identifier self.read_id = fast5utils.get_read_attributes( read)['read_id'].decode("utf-8") # digitised current levels. # this function returns a copy, not a reference. self.untrimmed_dacs = read.get_raw_data() self.sample_rate = self.channel_info['sampling_rate'] # parameters to convert between DACs and picoamps self.range = self.channel_info['range'] self.offset = self.channel_info['offset'] self.digitisation = self.channel_info['digitisation'] # We want to allow trimming without mucking about with the original # data. To start with, set trimming parameters to trim nothing self.signalstart = 0 # end is defined exclusively so that # self.dacs[signalstart:signalend_exc] is the bit we want. self.signalend_exc = len(self.untrimmed_dacs) self.set_trim_absolute(read_params['trim_start'], read_params['trim_end']) self.shift_from_pA = read_params['shift'] self.scale_from_pA = read_params['scale']