Exemple #1
0
def random_flip(sequence, rnum=None):
    randin = rnum
    if not randin:
        randin = RandomSource()
    if randin.random() < 0.5:
        return rc(sequence)
    return sequence
Exemple #2
0
class MakeErrors:
    def __init__(self, rand=None, seed=None):
        if rand:
            self.random = rand
        else:
            self.random = RandomSource()
            if seed:
                self.random = RandomSource(seed)
        #### context information ####
        self._before_base = None
        self._after_base = None
        #### set the reference base to change for del,mismatch ###
        self._observed_base = None
        #### set waht to change base to for ins or mismatch
        self._modified_base = None

    def set_before_context(self, base):
        self._before_base = base

    def set_after_context(self, base):
        self._after_base = base

    def set_observed_base(self, base):
        self._observed_base = base

    def set_modified_base(self, base):
        self._modified_base = base

    def random_substitution(self, fastq, rate):
        sequence = fastq.seq
        seq = ""
        for i in range(len(sequence)):
            # check context
            prev = None
            if i >= 1:
                prev = sequence[i - 1]
            next = None
            if i < len(sequence) - 1:
                next = sequence[i + 1]
            if self._before_base and (not prev or prev != self._before_base):
                seq += sequence[i]
                continue
            if self._after_base and (not next or next != self._after_base):
                seq += sequence[i]
                continue
            if self._observed_base and (sequence[i] != self._observed_base):
                seq += sequence[i]
                continue

            rnum = self.random.random()
            if rnum < rate:
                if not self._modified_base:
                    seq += self.random.different_random_nt(sequence[i])
                else:
                    seq += self._modified_base
            else:
                seq += sequence[i]
        return Fastq([fastq.name, seq, "+", fastq.qual])

    def random_deletion(self, fastq, rate):
        sequence = fastq.seq
        quality = fastq.qual
        seq = ""
        qual = None
        if quality:
            qual = ""
        for i in range(len(sequence)):
            # check context
            prev = None
            if i >= 1:
                prev = sequence[i - 1]
            next = None
            if i < len(sequence) - 1:
                next = sequence[i + 1]
            if self._before_base and (not prev or prev != self._before_base):
                seq += sequence[i]
                if quality:
                    qual += quality[i]
                continue
            if self._after_base and (not next or next != self._after_base):
                seq += sequence[i]
                if quality:
                    qual += quality[i]
                continue
            if self._observed_base and (sequence[i] != self._observed_base):
                seq += sequence[i]
                if quality:
                    qual += quality[i]
                continue

            rnum = self.random.random()
            if rnum >= rate:
                seq += sequence[i]
                if quality:
                    qual += quality[i]
        return Fastq([fastq.name, seq, "+", qual])

    def random_insertion(self, rate, max_inserts=1):
        sequence = fastq.seq
        quality = fastq.qual
        seq = ""
        qual = None
        ibase = rate_to_phred33(rate)
        if quality:
            qual = ""
        z = 0
        while self.random.random() < rate and z < max_inserts:
            if self._before_base:
                break  # can't do this one
            if self._after_base:
                if self._after_base != sequence[1]:
                    break
            z += 1
            if self._modified_base:
                seq += self._modified_base
                if quality:
                    qual += ibase
            else:
                seq += self.random.random_nt()
                if quality:
                    qual += ibase
        z = 0
        for i in range(len(sequence)):
            # check context
            prev = sequence[i]
            next = None
            if i < len(sequence) - 1:
                next = sequence[i + 1]
            if self._before_base and (not prev or prev != self._before_base):
                seq += sequence[i]
                if quality:
                    qual += quality[i]
                continue
            if self._after_base and (not next or next != self._after_base):
                seq += sequence[i]
                if quality:
                    qual += quality[i]
                continue

            seq += sequence[i]
            if quality:
                qual += quality[i]
            while self.random.random() < rate and z < max_inserts:
                z += 1
                if self._modified_base:
                    seq += self._modified_base
                    if quality:
                        qual += ibase
                else:
                    seq += self.random.random_nt()
                    if quality:
                        qual += ibase
            z = 0
        return Fastq([fastq.name, seq, "+", qual])

    def random_flip(self, sequence):
        if self.random.random() < 0.5:
            return rc(sequence)
        return sequence