Example #1
0
def match_events_with_eventalign(events=None, event_detections=None, minus=False, rna=False):
    """Match event index with event detection data to label segments of signal for each kmer

    # RNA is sequenced 3'-5'
    # reversed for fasta/q sequence
    # if mapped to reverse strand
    # reverse reverse complement = complement

    # DNA is sequenced 5'-3'
    # if mapped to reverse strand
    # reverse complement

    :param events: events table reference_index', 'event_index', 'aligned_kmer', 'posterior_probability
    :param event_detections: event detection event table
    :param minus: boolean option to for minus strand mapping
    :param rna: boolean for RNA read
    """
    assert events is not None, "Must pass signal alignment events"
    assert event_detections is not None, "Must pass event_detections events"

    check_numpy_table(events, req_fields=('position', 'event_index',
                                          'reference_kmer'))

    check_numpy_table(event_detections, req_fields=('start', 'length'))

    label = np.zeros(len(events), dtype=[('raw_start', int), ('raw_length', int), ('reference_index', int),
                                         ('posterior_probability', float), ('kmer', 'S6')])

    label['raw_start'] = [event_detections[x]["start"] for x in events["event_index"]]
    label['raw_length'] = [event_detections[x]["length"] for x in events["event_index"]]
    label['reference_index'] = events["position"]

    def convert_to_str(string):
        """Helper function to catch bytes as strings"""
        if type(string) is str:
            return string
        else:
            return bytes.decode(string)

    flip = ReverseComplement()
    if minus:
        if rna:
            kmers = [flip.complement(convert_to_str(x)) for x in events["reference_kmer"]]
        else:
            kmers = [flip.reverse_complement(convert_to_str(x)) for x in events["reference_kmer"]]
    else:
        if rna:
            kmers = [flip.reverse(convert_to_str(x)) for x in events["reference_kmer"]]
        else:
            kmers = events["reference_kmer"]
    label['kmer'] = kmers
    label['posterior_probability'] = np.ones(len(events))
    # np.sort(label, order='raw_start', kind='mergesort')

    return label
Example #2
0
    def test_reverse_complement(self):
        rev_comp = ReverseComplement(find="ACGTMKRYBVDHNacgtmkrybvdhn", replace="TGCAKMYRVBHDNtgcakmyrvbhdn")
        for x in range(10):
            rand_len = np.random.randint(0, 1000)
            random_dna = get_random_string(rand_len, chars=list(set("ACGTMKRYBVDHN")))

            self.assertEqual(reverse_complement(random_dna, reverse=True, complement=True),
                             rev_comp.reverse_complement(random_dna))
            self.assertEqual(reverse_complement(random_dna, reverse=False, complement=True),
                             rev_comp.complement(random_dna))
            self.assertEqual(reverse_complement(random_dna, reverse=True, complement=False),
                             rev_comp.reverse(random_dna))
            self.assertEqual(reverse_complement(random_dna, reverse=False, complement=False),
                             random_dna)