Exemple #1
0
def get_remapping(sig_fn, dacs, scale_params, ref_seq, stride, read_id,
                  r_to_q_poss, rl_cumsum, r_ref_pos, ref_out_info):
    read = fast5_interface.get_fast5_file(sig_fn, 'r').get_read(read_id)
    channel_info = dict(fast5utils.get_channel_info(read).items())
    rd_factor = channel_info['range'] / channel_info['digitisation']
    shift_from_pA = (scale_params[0] + channel_info['offset']) * rd_factor
    scale_from_pA = scale_params[1] * rd_factor
    read_attrs = dict(fast5utils.get_read_attributes(read).items())

    # prepare taiyaki signal object
    sig = tai_signal.Signal(dacs=dacs)
    sig.channel_info = channel_info
    sig.read_attributes = read_attrs
    sig.offset = channel_info['offset']
    sig.range = channel_info['range']
    sig.digitisation = channel_info['digitisation']

    path = np.full((dacs.shape[0] // stride) + 1, -1)
    # skip last value since this is where the two seqs end
    for ref_pos, q_pos in enumerate(r_to_q_poss[:-1]):
        # if the query position maps to the end of the mapping skip it
        if rl_cumsum[q_pos + r_ref_pos.q_trim_start] >= path.shape[0]:
            continue
        path[rl_cumsum[q_pos + r_ref_pos.q_trim_start]] = ref_pos
    remapping = tai_mapping.Mapping.from_remapping_path(
        sig, path, ref_seq, stride)
    try:
        remapping.add_integer_reference(ref_out_info.alphabet)
    except Exception:
        raise mh.MegaError('Invalid reference sequence encountered')

    return (remapping.get_read_dictionary(shift_from_pA, scale_from_pA,
                                          read_id),
            prepare_mapping_funcs.RemapResult.SUCCESS)
Exemple #2
0
def get_remapping(
    sig_fn,
    dacs,
    scale_params,
    ref_seq,
    stride,
    read_id,
    r_to_q_poss,
    rl_cumsum,
    r_ref_pos,
    ref_out_info,
):
    read = fast5_io.get_read(sig_fn, read_id)
    channel_info = dict(fast5utils.get_channel_info(read).items())
    read_params = {
        "trim_start": 0,
        "trim_end": 0,
        "shift": scale_params[0],
        "scale": scale_params[1],
    }
    sig = tai_signal.Signal(
        dacs=dacs,
        channel_info=channel_info,
        read_id=read_id,
        read_params=read_params,
    )

    ref_to_sig = np.empty(len(ref_seq) + 1, dtype=np.int32)
    # skip last value since this is where the two seqs end
    for ref_pos, q_pos in enumerate(r_to_q_poss):
        ref_to_sig[ref_pos] = rl_cumsum[q_pos + r_ref_pos.q_trim_start] * stride
    try:
        int_ref = tai_mapping.SignalMapping.get_integer_reference(
            ref_seq, ref_out_info.alphabet_info.alphabet
        )
    except Exception:
        raise mh.MegaError("Invalid reference sequence encountered")
    sig_mapping = tai_mapping.SignalMapping(ref_to_sig, int_ref, signalObj=sig)

    # annotate mod motifs
    if ref_out_info.ref_mods_all_motifs is not None:
        # annotate all mod base motif positions with alts
        int_ref = set_all_motif_mods(int_ref, ref_out_info.ref_mods_all_motifs)
        # set new Reference with mods annotated
        sig_mapping.Reference = int_ref

    return (
        sig_mapping.get_read_dictionary(),
        prepare_mapping_funcs.RemapResult.SUCCESS,
    )
Exemple #3
0
    def __init__(self, read=None, dacs=None):
        """Loads data from read in fast5 file.
        If read is None
        and dacs is a np array then initialse the untrimmed_dacs to this array.
        (this allows testing with non-fast5 data)

        param read : an ont_fast5_api read object
        param dacs : np int array (only used if first param is None)
        """
        if read is None:
            try:
                self.untrimmed_dacs = dacs.copy()
            except:
                raise Exception("Cannot initialise SignalWithMap object")
            self.offset = 0
            self.range = 1
            self.digitisation = 1
        else:
            self.channel_info = {
                k: v
                for k, v in fast5utils.get_channel_info(read).items()
            }
            # channel_info contains attributes of the channel such as calibration parameters and sample rate
            self.read_attributes = {
                k: v
                for k, v in fast5utils.get_read_attributes(read).items()
            }
            # read_attributes includes read id, start time, and active mux
            #print("Channel info:",[(k,v) for k,v in self.channel_info.items()])
            #print("Read attributes:",[(k,v) for k,v in self.read_attributes.items()])
            # the sample number (counted from when the device was switched on) when the signal starts
            self.start_sample = self.read_attributes['start_time']
            self.sample_rate = self.channel_info['sampling_rate']
            # a unique key corresponding to this read
            self.read_id = self.read_attributes['read_id'].decode("utf-8")
            # digitised current levels.
            # this function returns a copy, not a reference.
            self.untrimmed_dacs = read.get_raw_data()
            # parameters to convert between DACs and picoamps
            self.range = self.channel_info['range']
            self.offset = self.channel_info['offset']
            self.digitisation = self.channel_info['digitisation']

        # We want to allow trimming without mucking about with the original data
        # To start with, set trimming parameters to trim nothing
        self.signalstart = 0
        # end is defined exclusively so that self.dacs[signalstart:signalend_exc] is the bit we want.
        self.signalend_exc = len(self.untrimmed_dacs)
Exemple #4
0
def get_remapping(sig_fn, dacs, scale_params, ref_seq, stride, read_id,
                  r_to_q_poss, rl_cumsum, r_ref_pos, ref_out_info):
    read = fast5_interface.get_fast5_file(sig_fn, 'r').get_read(read_id)
    channel_info = dict(fast5utils.get_channel_info(read).items())
    rd_factor = channel_info['range'] / channel_info['digitisation']
    read_params = {
        'trim_start': 0,
        'trim_end': 0,
        'shift': (scale_params[0] + channel_info['offset']) * rd_factor,
        'scale': scale_params[1] * rd_factor
    }
    sig = tai_signal.Signal(dacs=dacs,
                            channel_info=channel_info,
                            read_id=read_id,
                            read_params=read_params)

    path = np.full((dacs.shape[0] // stride) + 1, -1)
    # skip last value since this is where the two seqs end
    for ref_pos, q_pos in enumerate(r_to_q_poss[:-1]):
        # if the query position maps to the end of the mapping skip it
        if rl_cumsum[q_pos + r_ref_pos.q_trim_start] >= path.shape[0]:
            continue
        path[rl_cumsum[q_pos + r_ref_pos.q_trim_start]] = ref_pos

    try:
        int_ref = tai_mapping.SignalMapping.get_integer_reference(
            ref_seq, ref_out_info.alphabet)
    except Exception:
        raise mh.MegaError('Invalid reference sequence encountered')
    sig_mapping = tai_mapping.SignalMapping.from_remapping_path(
        path, int_ref, stride, sig)

    # annotate mod motifs
    if ref_out_info.ref_mods_all_motifs is not None:
        # annotate all mod base motif positions with alts
        int_ref = set_all_motif_mods(int_ref, ref_out_info.ref_mods_all_motifs,
                                     ref_out_info.collapse_alphabet)
        # set new Reference with mods annotated
        sig_mapping.Reference = int_ref

    return (sig_mapping.get_read_dictionary(),
            prepare_mapping_funcs.RemapResult.SUCCESS)
Exemple #5
0
    def __init__(self,
                 read=None,
                 dacs=None,
                 channel_info={
                     'offset': 0,
                     'range': 1,
                     'digitisation': 1,
                     'sampling_rate': 4000
                 },
                 read_id=None,
                 read_params={
                     'trim_start': 0,
                     'trim_end': 0,
                     'shift': 0,
                     'scale': 1
                 }):
        """Constructor for Signal class. Loads data from read in fast5 file.

        If read is None and dacs is a np array then initialise the
        untrimmed_dacs to this array.
        (this allows testing with non-fast5 data)

        Args:

            read (ont_fast5_api read object) : the read data
            dacs (np int array) : (only used if first param is None)
            channel_info (dict) :  containing keys: offset, range,
                                   digitisation, and sampling_rate.
            read_id (str): UUID read identifier
            read_params (dict): dictionary containing keys: trim_start,
                                trim_end, shift, and scale (as returned from
                       prepare_mapping_funcs.get_per_read_params_dict_from_tsv)

        Returns:
            new Signal object.
        """
        if read is None:
            try:
                self.untrimmed_dacs = dacs.copy()
            except Exception:
                raise Exception("Cannot initialise Signal object")
            self.channel_info = channel_info
            self.read_id = read_id
        else:
            self.channel_info = dict(fast5utils.get_channel_info(read).items())
            # UUID read identifier
            self.read_id = fast5utils.get_read_attributes(
                read)['read_id'].decode("utf-8")
            # digitised current levels.
            # this function returns a copy, not a reference.
            self.untrimmed_dacs = read.get_raw_data()

        self.sample_rate = self.channel_info['sampling_rate']
        # parameters to convert between DACs and picoamps
        self.range = self.channel_info['range']
        self.offset = self.channel_info['offset']
        self.digitisation = self.channel_info['digitisation']

        # We want to allow trimming without mucking about with the original
        # data. To start with, set trimming parameters to trim nothing
        self.signalstart = 0
        # end is defined exclusively so that
        # self.dacs[signalstart:signalend_exc] is the bit we want.
        self.signalend_exc = len(self.untrimmed_dacs)

        self.set_trim_absolute(read_params['trim_start'],
                               read_params['trim_end'])
        self.shift_from_pA = read_params['shift']
        self.scale_from_pA = read_params['scale']