def random_flip(sequence, rnum=None): randin = rnum if not randin: randin = RandomSource() if randin.random() < 0.5: return rc(sequence) return sequence
class MakeErrors: def __init__(self, rand=None, seed=None): if rand: self.random = rand else: self.random = RandomSource() if seed: self.random = RandomSource(seed) #### context information #### self._before_base = None self._after_base = None #### set the reference base to change for del,mismatch ### self._observed_base = None #### set waht to change base to for ins or mismatch self._modified_base = None def set_before_context(self, base): self._before_base = base def set_after_context(self, base): self._after_base = base def set_observed_base(self, base): self._observed_base = base def set_modified_base(self, base): self._modified_base = base def random_substitution(self, fastq, rate): sequence = fastq.seq seq = "" for i in range(len(sequence)): # check context prev = None if i >= 1: prev = sequence[i - 1] next = None if i < len(sequence) - 1: next = sequence[i + 1] if self._before_base and (not prev or prev != self._before_base): seq += sequence[i] continue if self._after_base and (not next or next != self._after_base): seq += sequence[i] continue if self._observed_base and (sequence[i] != self._observed_base): seq += sequence[i] continue rnum = self.random.random() if rnum < rate: if not self._modified_base: seq += self.random.different_random_nt(sequence[i]) else: seq += self._modified_base else: seq += sequence[i] return Fastq([fastq.name, seq, "+", fastq.qual]) def random_deletion(self, fastq, rate): sequence = fastq.seq quality = fastq.qual seq = "" qual = None if quality: qual = "" for i in range(len(sequence)): # check context prev = None if i >= 1: prev = sequence[i - 1] next = None if i < len(sequence) - 1: next = sequence[i + 1] if self._before_base and (not prev or prev != self._before_base): seq += sequence[i] if quality: qual += quality[i] continue if self._after_base and (not next or next != self._after_base): seq += sequence[i] if quality: qual += quality[i] continue if self._observed_base and (sequence[i] != self._observed_base): seq += sequence[i] if quality: qual += quality[i] continue rnum = self.random.random() if rnum >= rate: seq += sequence[i] if quality: qual += quality[i] return Fastq([fastq.name, seq, "+", qual]) def random_insertion(self, rate, max_inserts=1): sequence = fastq.seq quality = fastq.qual seq = "" qual = None ibase = rate_to_phred33(rate) if quality: qual = "" z = 0 while self.random.random() < rate and z < max_inserts: if self._before_base: break # can't do this one if self._after_base: if self._after_base != sequence[1]: break z += 1 if self._modified_base: seq += self._modified_base if quality: qual += ibase else: seq += self.random.random_nt() if quality: qual += ibase z = 0 for i in range(len(sequence)): # check context prev = sequence[i] next = None if i < len(sequence) - 1: next = sequence[i + 1] if self._before_base and (not prev or prev != self._before_base): seq += sequence[i] if quality: qual += quality[i] continue if self._after_base and (not next or next != self._after_base): seq += sequence[i] if quality: qual += quality[i] continue seq += sequence[i] if quality: qual += quality[i] while self.random.random() < rate and z < max_inserts: z += 1 if self._modified_base: seq += self._modified_base if quality: qual += ibase else: seq += self.random.random_nt() if quality: qual += ibase z = 0 return Fastq([fastq.name, seq, "+", qual]) def random_flip(self, sequence): if self.random.random() < 0.5: return rc(sequence) return sequence