Beispiel #1
0
def random_flip(sequence, rnum=None):
    randin = rnum
    if not randin:
        randin = RandomSource()
    if randin.random() < 0.5:
        return rc(sequence)
    return sequence
Beispiel #2
0
  def __init__(self,transcriptome,seed=None,rand=None):
    if rand: self.random = rand
    elif seed: self.random = RandomSource(seed)
    else: self.random = RandomSource()

    self._transcriptome = transcriptome
    ######
    tcnt = len(self._transcriptome.get_transcripts())
    self._weights = [float(i+1)/float(tcnt) for i in range(0,tcnt)]
    ## _log stores what we are emitting ##
    self._log = []
Beispiel #3
0
 def __init__(self, rand=None, seed=None):
     if rand:
         self.random = rand
     else:
         self.random = RandomSource()
         if seed:
             self.random = RandomSource(seed)
     self._gauss_min = None
     self._gauss_mu = None
     self._gauss_sigma = None
     self.set_lr_cuts()
Beispiel #4
0
    def __init__(self, transcriptome, seed=None, rand=None):
        if rand: self.random = rand
        elif seed: self.random = RandomSource(seed)
        else: self.random = RandomSource()

        self._transcriptome = transcriptome
        ######
        tcnt = len(self._transcriptome.get_transcripts())
        self._weights = [float(i + 1) / float(tcnt) for i in range(0, tcnt)]
        ## _log stores what we are emitting ##
        self._log = []
Beispiel #5
0
 def __init__(self, rand=None, seed=None):
     if rand:
         self.random = rand
     else:
         self.random = RandomSource()
         if seed:
             self.random = RandomSource(seed)
     #### context information ####
     self._before_base = None
     self._after_base = None
     #### set the reference base to change for del,mismatch ###
     self._observed_base = None
     #### set waht to change base to for ins or mismatch
     self._modified_base = None
Beispiel #6
0
class TranscriptomeEmitter:
  def __init__(self,transcriptome,seed=None,rand=None):
    if rand: self.random = rand
    elif seed: self.random = RandomSource(seed)
    else: self.random = RandomSource()

    self._transcriptome = transcriptome
    ######
    tcnt = len(self._transcriptome.get_transcripts())
    self._weights = [float(i+1)/float(tcnt) for i in range(0,tcnt)]
    ## _log stores what we are emitting ##
    self._log = []

  def emit_transcript(self):
    i = self.random.get_weighted_random_index(self._weights)
    return self._transcriptome.get_transcripts()[i]

  # input: an array of weights <<txname1> <weight1>> <<txname2> <weight2>>...
  def set_weights_by_dict(self,weights):
    self._weights = []
    txnames = [x.get_transcript_name() for x in self._transcriptome.get_transcripts()]
    for txname in txnames:
      if txname in weights:
        self._weights.append(float(weights[txname]))
      else:
        self._weights.append(float(0))
    return
Beispiel #7
0
class TranscriptomeEmitter:
    def __init__(self, transcriptome, seed=None, rand=None):
        if rand: self.random = rand
        elif seed: self.random = RandomSource(seed)
        else: self.random = RandomSource()

        self._transcriptome = transcriptome
        ######
        tcnt = len(self._transcriptome.get_transcripts())
        self._weights = [float(i + 1) / float(tcnt) for i in range(0, tcnt)]
        ## _log stores what we are emitting ##
        self._log = []

    def emit_transcript(self):
        i = self.random.get_weighted_random_index(self._weights)
        return self._transcriptome.get_transcripts()[i]

    # input: an array of weights <<txname1> <weight1>> <<txname2> <weight2>>...
    def set_weights_by_dict(self, weights):
        self._weights = []
        txnames = [
            x.get_transcript_name()
            for x in self._transcriptome.get_transcripts()
        ]
        for txname in txnames:
            if txname in weights:
                self._weights.append(float(weights[txname]))
            else:
                self._weights.append(float(0))
        return
Beispiel #8
0
class MakeCuts:
    def __init__(self, rand=None, seed=None):
        if rand:
            self.random = rand
        else:
            self.random = RandomSource()
            if seed:
                self.random = RandomSource(seed)
        self._gauss_min = None
        self._gauss_mu = None
        self._gauss_sigma = None
        self.set_lr_cuts()

    def get_cut(self, seq):
        rgauss = self.random.gauss(self._gauss_mu, self._gauss_sigma)
        l = min(len(seq), max(self._gauss_min, int(rgauss)))
        # print self._gauss_min
        # print self._gauss_mu
        # print rgauss
        print l
        leeway = len(seq) - l
        start = self.random.randint(0, leeway)
        return seq[start : start + l]

    def set_custom(self, gmin, gmu, gsigma):
        self._gauss_min = gmin
        self._gauss_mu = gmu
        self._gauss_sigma = gsigma

    def set_lr_cuts(self):
        self._gauss_min = 1000
        self._gauss_mu = 4000
        self._gauss_sigma = 500

    def set_sr_cuts(self):
        self._gauss_min = 150
        self._gauss_mu = 290
        self._gauss_sigma = 290
Beispiel #9
0
def main(args):
    # check outputs
    if len(args.output) > 1 and not args.sr:
        sys.stderr.write(
            "Error: Long reads don't support multiple output files\n")
        sys.exit()
    elif len(args.output) > 2:
        sys.stderr.wrtie(
            "Error: Short reads support at most two output files (paired end)\n"
        )
        sys.exit()
    if args.sr_length < args.minimum_read_length:
        args.minimum_read_length = args.sr_length
    inf = sys.stdin
    if args.emitter != '-':
        inf = open(args.emitter)
    sys.stderr.write("reading in transcriptome emitter\n")
    indata = pickle.loads(
        zlib.decompress(base64.b64decode(inf.read().rstrip())))
    txome = Transcriptome()
    txome.load_serialized(indata['txome'])
    rnum = RandomSource()
    rnum_tx = RandomSource()  # for drawing transcripts
    if args.seed:
        rnum = RandomSource(args.seed)
        rnum_tx = RandomSource(args.seed)
    # Load in error profile data
    ep = None
    if args.error_profile:
        sys.stderr.write("read in error profile\n")
        ep = ErrorProfilePermuter(args.error_profile, rnum,
                                  args.skew_profile_error_rate)
    txemitter = TranscriptomeEmitter(txome, rand=rnum_tx)
    if indata['weight_type'] == 'expression_table':
        sys.stderr.write(
            "Using expression table defined transcript expression\n")
        txweight = indata['weights']
        txemitter.set_weights_by_dict(txweight)
    elif indata['weight_type'] == 'exponential_distribution':
        sys.stderr.write(
            "ERROR not yet implemented exponential distribution\n")
        sys.exit()
    elif indata['weight_type'] == 'uniform_distribution':
        sys.stderr.write(
            "Using uniform distribution of transcript expression\n")
    cutter = MakeCuts(rand=rnum_tx)
    if args.sr:
        cutter.set_custom(args.sr_gauss_min, args.sr_gauss_mu,
                          args.sr_gauss_sigma)
    elif args.lr:
        cutter.set_custom(args.lr_gauss_min, args.lr_gauss_mu,
                          args.lr_gauss_sigma)
    # Prepare outputs
    of1 = sys.stdout
    if args.output[0][-3:] == '.gz':
        of1 = gzip.open(args.output[0], 'w')
    elif args.output[0] != '-':
        of1 = open(args.output[0], 'w')
    of2 = None
    if len(args.output) > 1:
        if args.output[1][-3:] == '.gz':
            of2 = gzip.open(args.output[1], 'w')
        elif args.output[0] != '-':
            of2 = open(args.ouptput[1], 'w')
    of_origin = None
    if args.output_original_source:
        if args.output_original_source[-3:] == '.gz':
            of_origin = gzip.open(args.output_original_source, 'w')
        else:
            of_origin = open(args.output_original_source, 'w')
    of_sc = None
    if args.output_sequence_change:
        if args.output_sequence_change[-3:] == '.gz':
            of_sc = gzip.open(args.output_sequence_change, 'w')
        else:
            of_sc = open(args.output_sequence_change, 'w')

    absmax = args.count * 100
    finished_count = 0
    z = 0
    while finished_count < args.count:
        z += 1
        if z > absmax: break
        tx = txemitter.emit_transcript()
        seq = tx.get_sequence()
        stage1seq = seq
        if args.trim_5prime or args.trim_3prime:
            fivestart = 0
            threeend = len(seq)
            if args.trim_5prime:
                lcut = int(args.trim_5prime[0] * len(seq))
                rcut = int(args.trim_5prime[1] * len(seq))
                fivestart = rnum_tx.randint(lcut, rcut)
            if args.trim_3prime:
                lcut = int(args.trim_3prime[0] * len(seq))
                rcut = int(args.trim_3prime[1] * len(seq))
                threeend = rnum_tx.randint(lcut, rcut)
            # set sequence to its new trimmed bounds
            seq = seq[fivestart:threeend]

        # flip sequence if necessary
        if not args.no_flip:
            seq = random_flip(seq, rnum_tx)

        l_read = create_name(rnum)
        r_read = None
        if args.sr or args.lr:
            cutseq = cutter.get_cut(seq)
        else:
            cutseq = seq  #case for no_fragmentation
        ############# if we pass this we will really start with this one
        if len(cutseq) < args.minimum_read_length: continue
        # can now log our read name
        if of_origin:
            of_origin.write(l_read + "\t" + tx.get_gene_name() + "\t" +
                            tx.get_transcript_name() + "\n")
        stage2seq = cutseq
        r = None
        if args.sr:
            r_read = l_read
            l = cutseq[0:args.sr_length]
            r = rc(cutseq[-1 * args.sr_length:])
        elif args.lr:
            l = cutseq
        else:
            l = cutseq
        stage3left = l
        stage3right = r
        if not stage3right: stage3right = ''
        #################
        #  l (or l and r) contains the sequence prior to errors being added
        l_qual = 'I' * len(l)
        r_qual = None
        if r: r_qual = 'I' * len(r)
        if args.fixed_quality:
            #sys.stderr.write("Use fixed quality\n")
            if len(args.fixed_quality) != 1:
                sys.stderr.write(
                    "ERROR fixed quaility should be 1 character\n")
                sys.exit()
            l_qual = args.fixed_quality * len(l)
            if r: r_qual = args.fixed_quality * len(r)
        elif args.quality_from_error_rate:
            #sys.stderr.write("Set quality from error rate\n")
            qchar = chr(
                int(-10 * math.log10(args.quality_from_error_rate)) + 33)
            l_qual = qchar * len(l)
            if r: r_qual = qchar * len(r)
        else:  #default is generate quality from profile
            if not ep:
                sys.stderr.write(
                    "ERROR: cannot generate quality from a profile.  Set error profile or chooce quaility from error rate or fixed quality\n"
                )
                sys.exit()
            l_qual = ep.emit_qual(len(l))
            if r: r_qual = ep.emit_qual(len(r))
        # Now prior to errors l_qual and r_qual contain our qualities

        l_fastq = Fastq([l_read, l, '+', l_qual])
        r_fastq = None
        if r:
            r_fastq = Fastq([r_read, r, '+', r_qual])
        # Permute sequences by a specific error rate
        if args.specific_errors:
            rate = args.specific_errors
            me = MakeErrors(rand=rnum)
            if args.specific_before_context:
                me.set_before_context(args.specific_before_context)
            if args.specific_after_context:
                me.set_after_context(args.specific_after_context)
            if args.specific_reference_base:
                if args.specific_reference_base != '-':
                    me.set_observed_base(args.specific_reference_base)
            if args.specific_modified_base:
                if args.specific_modified_base != '-':
                    me.set_modified_base(args.specific_modified_base)
            if args.specific_reference_base == '-':  #doing insertions
                l_fastq = me.random_insertion(l_fastq, rate)
                if r_fastq: r_fastq = me.random_insertion(r_fastq, rate)
            elif args.specific_modified_base == '-':  #doing deletions
                l_fastq = me.random_deletion(l_fastq, rate)
                if r_fastq: r_fastq = me.random_insertion(r_fastq, rate)
            else:
                l_fastq = me.random_substitution(l_fastq, rate)
                if r_fastq: r_fastq = me.random_insertion(r_fastq, rate)
        elif args.uniform_any_error:
            l_fastq = do_uniform_any(l_fastq, rnum, args.uniform_any_error)
            if r_fastq:
                r_fastq = do_uniform_any(r_fastq, rnum, args.uniform_any_error)
        elif args.uniform_mismatch_error:
            l_fastq = do_uniform_mismatch(l_fastq, rnum,
                                          args.uniform_mismatch_error)
            if r_fastq:
                r_fastq = do_uniform_mismatch(r_fastq, rnum,
                                              args.uniform_mismatch_error)
        elif args.any_error_by_quality:
            l_fastq = do_quality_any(l_fastq, rnum)
            if r_fastq: r_fastq = do_quality_any(r_fastq, rnum)
        elif args.mismatch_error_by_quality:
            l_fastq = do_quality_mismatch(l_fastq, rnum)
            if r_fastq: r_fastq = do_quality_mismatch(r_fastq, rnum)
        elif args.profile_context_error:
            l_fastq = ep.permute_context(l_fastq)
            if r_fastq: r_fastq = ep.permute_context(r_fastq)
        elif args.profile_general_error:
            l_fastq = ep.permute_general(l_fastq)
            if r_fastq: r_fastq = ep.permute_general(r_fastq)

        # if SR grown/shrink to appropriate length
        if args.sr and len(l_fastq) != args.sr_length:
            l_fastq = fit_length(l_fastq, args.sr_length, rnum)
        if r:
            if args.sr and len(r_fastq) != args.sr_length:
                r_fastq = fit_length(r_fastq, args.sr_length, rnum)

        of1.write(l_fastq.fastq())
        if of2:
            of2.write(r_fastq.fastq())

        stage4left = l_fastq.seq
        stage4right = ''
        if of_sc:
            of_sc.write(l_fastq.name+"\t"+tx.get_gene_name()+"\t"+tx.get_transcript_name()+"\t" \
                      + stage1seq+"\t"+stage2seq+"\t"+stage3left+"\t"+stage3right+"\t"+stage4left+"\t"+stage4right+"\n")
        if r_fastq: stage4right = r_fastq.seq
        finished_count += 1
        if finished_count % 1000 == 0:
            sys.stderr.write(
                str(finished_count) + '/' + str(args.count) + "   \r")
    sys.stderr.write("\n")
    of1.close()
    if of2:
        of2.close()
    if of_origin:
        of_origin.close()
    if of_sc:
        of_sc.close()
    # Temporary working directory step 3 of 3 - Cleanup
    if not args.specific_tempdir:
        rmtree(args.tempdir)
Beispiel #10
0
def main(args):
  # check outputs
  if len(args.output) > 1 and not args.sr:
    sys.stderr.write("Error: Long reads don't support multiple output files\n")
    sys.exit()
  elif len(args.output) > 2:
    sys.stderr.wrtie("Error: Short reads support at most two output files (paired end)\n")
    sys.exit()
  if args.sr_length < args.minimum_read_length:
    args.minimum_read_length = args.sr_length
  inf = sys.stdin
  if args.emitter != '-':
    inf = open(args.emitter)
  sys.stderr.write("reading in transcriptome emitter\n")
  indata = pickle.loads(zlib.decompress(base64.b64decode(inf.read().rstrip())))
  txome = Transcriptome()
  txome.load_serialized(indata['txome'])
  rnum = RandomSource()
  rnum_tx = RandomSource() # for drawing transcripts
  if args.seed: 
    rnum = RandomSource(args.seed)
    rnum_tx = RandomSource(args.seed)
  # Load in error profile data
  ep = None
  if args.error_profile:
    sys.stderr.write("read in error profile\n")
    ep = ErrorProfilePermuter(args.error_profile,rnum,args.skew_profile_error_rate)
  txemitter = TranscriptomeEmitter(txome,rand=rnum_tx)
  if indata['weight_type'] == 'expression_table':
    sys.stderr.write("Using expression table defined transcript expression\n")
    txweight = indata['weights']
    txemitter.set_weights_by_dict(txweight)
  elif indata['weight_type'] == 'exponential_distribution':
    sys.stderr.write("ERROR not yet implemented exponential distribution\n")
    sys.exit()
  elif indata['weight_type'] == 'uniform_distribution':
    sys.stderr.write("Using uniform distribution of transcript expression\n")
  cutter = MakeCuts(rand=rnum_tx)
  if args.sr:
    cutter.set_custom(args.sr_gauss_min,args.sr_gauss_mu,args.sr_gauss_sigma)
  elif args.lr:
    cutter.set_custom(args.lr_gauss_min,args.lr_gauss_mu,args.lr_gauss_sigma)
  # Prepare outputs
  of1 = sys.stdout
  if args.output[0][-3:] == '.gz':
    of1 = gzip.open(args.output[0],'w')
  elif args.output[0] != '-':
    of1 = open(args.output[0],'w')
  of2 = None
  if len(args.output) > 1:
    if args.output[1][-3:] == '.gz':
      of2 = gzip.open(args.output[1],'w')
    elif args.output[0] != '-':
      of2 = open(args.ouptput[1],'w')
  of_origin = None
  if args.output_original_source:
    if args.output_original_source[-3:]=='.gz':
      of_origin = gzip.open(args.output_original_source,'w')
    else:
      of_origin = open(args.output_original_source,'w')
  of_sc = None
  if args.output_sequence_change:
    if args.output_sequence_change[-3:]=='.gz':
      of_sc = gzip.open(args.output_sequence_change,'w')
    else:
      of_sc = open(args.output_sequence_change,'w')
  
  absmax = args.count*100
  finished_count = 0
  z = 0
  while finished_count < args.count:
    z += 1
    if z > absmax: break
    tx = txemitter.emit_transcript()
    seq = tx.get_sequence()
    stage1seq = seq
    if args.trim_5prime or args.trim_3prime:
      fivestart = 0
      threeend = len(seq)
      if args.trim_5prime:
        lcut = int(args.trim_5prime[0]*len(seq))
        rcut = int(args.trim_5prime[1]*len(seq))
        fivestart = rnum_tx.randint(lcut,rcut)
      if args.trim_3prime:
        lcut = int(args.trim_3prime[0]*len(seq))
        rcut = int(args.trim_3prime[1]*len(seq))
        threeend = rnum_tx.randint(lcut,rcut)
      # set sequence to its new trimmed bounds
      seq = seq[fivestart:threeend]

    # flip sequence if necessary
    if not args.no_flip:
      seq = random_flip(seq,rnum_tx)

    l_read = create_name(rnum)
    r_read = None
    if args.sr or args.lr:
     cutseq = cutter.get_cut(seq)
    else: cutseq = seq #case for no_fragmentation
    ############# if we pass this we will really start with this one
    if len(cutseq) < args.minimum_read_length: continue
    # can now log our read name
    if of_origin:
      of_origin.write(l_read+"\t"+tx.get_gene_name()+"\t"+tx.get_transcript_name()+"\n")
    stage2seq = cutseq
    r = None
    if args.sr:
      r_read = l_read
      l = cutseq[0:args.sr_length]
      r = rc(cutseq[-1*args.sr_length:])
    elif args.lr:
      l = cutseq
    else: l = cutseq
    stage3left = l
    stage3right = r
    if not stage3right: stage3right = ''
    #################
    #  l (or l and r) contains the sequence prior to errors being added
    l_qual = 'I'*len(l) 
    r_qual = None
    if r: r_qual = 'I'*len(r)
    if args.fixed_quality:
      #sys.stderr.write("Use fixed quality\n")
      if len(args.fixed_quality) != 1:
        sys.stderr.write("ERROR fixed quaility should be 1 character\n")
        sys.exit()
      l_qual = args.fixed_quality*len(l)
      if r: r_qual = args.fixed_quality*len(r)
    elif args.quality_from_error_rate:
      #sys.stderr.write("Set quality from error rate\n")
      qchar = chr(int(-10*math.log10(args.quality_from_error_rate))+33)
      l_qual = qchar*len(l)
      if r: r_qual = qchar*len(r)
    else: #default is generate quality from profile
      if not ep:
        sys.stderr.write("ERROR: cannot generate quality from a profile.  Set error profile or chooce quaility from error rate or fixed quality\n")
        sys.exit()
      l_qual = ep.emit_qual(len(l))
      if r: r_qual = ep.emit_qual(len(r))
    # Now prior to errors l_qual and r_qual contain our qualities

    l_fastq = Fastq([l_read,l,'+',l_qual])
    r_fastq = None
    if r:
      r_fastq = Fastq([r_read,r,'+',r_qual])
    # Permute sequences by a specific error rate
    if args.specific_errors:
      rate = args.specific_errors
      me = MakeErrors(rand=rnum)
      if args.specific_before_context: me.set_before_context(args.specific_before_context)
      if args.specific_after_context: me.set_after_context(args.specific_after_context)
      if args.specific_reference_base: 
        if args.specific_reference_base != '-':
          me.set_observed_base(args.specific_reference_base)
      if args.specific_modified_base: 
        if args.specific_modified_base != '-':
          me.set_modified_base(args.specific_modified_base)
      if args.specific_reference_base == '-': #doing insertions
        l_fastq = me.random_insertion(l_fastq,rate)
        if r_fastq: r_fastq = me.random_insertion(r_fastq,rate)
      elif args.specific_modified_base == '-': #doing deletions
        l_fastq = me.random_deletion(l_fastq,rate)
        if r_fastq: r_fastq = me.random_insertion(r_fastq,rate)
      else:
        l_fastq = me.random_substitution(l_fastq,rate)
        if r_fastq: r_fastq = me.random_insertion(r_fastq,rate)
    elif args.uniform_any_error:
      l_fastq = do_uniform_any(l_fastq,rnum,args.uniform_any_error)
      if r_fastq: r_fastq = do_uniform_any(r_fastq,rnum,args.uniform_any_error)  
    elif args.uniform_mismatch_error:
      l_fastq = do_uniform_mismatch(l_fastq,rnum,args.uniform_mismatch_error)
      if r_fastq: r_fastq = do_uniform_mismatch(r_fastq,rnum,args.uniform_mismatch_error)  
    elif args.any_error_by_quality:
      l_fastq = do_quality_any(l_fastq,rnum)
      if r_fastq: r_fastq = do_quality_any(r_fastq,rnum)      
    elif args.mismatch_error_by_quality:
      l_fastq = do_quality_mismatch(l_fastq,rnum)
      if r_fastq: r_fastq = do_quality_mismatch(r_fastq,rnum)
    elif args.profile_context_error:
      l_fastq = ep.permute_context(l_fastq)
      if r_fastq: r_fastq = ep.permute_context(r_fastq)
    elif args.profile_general_error:
      l_fastq = ep.permute_general(l_fastq)
      if r_fastq: r_fastq = ep.permute_general(r_fastq)
      
    # if SR grown/shrink to appropriate length
    if args.sr and len(l_fastq) != args.sr_length:
      l_fastq = fit_length(l_fastq,args.sr_length,rnum)
    if r:
      if args.sr and len(r_fastq) != args.sr_length:
        r_fastq = fit_length(r_fastq,args.sr_length,rnum)

    of1.write(l_fastq.fastq())
    if of2: 
      of2.write(r_fastq.fastq())

    stage4left = l_fastq.seq
    stage4right = ''
    if of_sc:
      of_sc.write(l_fastq.name+"\t"+tx.get_gene_name()+"\t"+tx.get_transcript_name()+"\t" \
                + stage1seq+"\t"+stage2seq+"\t"+stage3left+"\t"+stage3right+"\t"+stage4left+"\t"+stage4right+"\n")
    if r_fastq: stage4right = r_fastq.seq
    finished_count += 1
    if finished_count %1000==0: sys.stderr.write(str(finished_count)+'/'+str(args.count)+"   \r")
  sys.stderr.write("\n")
  of1.close()
  if of2:
    of2.close()
  if of_origin:
    of_origin.close()
  if of_sc:
    of_sc.close()
  # Temporary working directory step 3 of 3 - Cleanup
  if not args.specific_tempdir:
    rmtree(args.tempdir)
Beispiel #11
0
class MakeErrors:
    def __init__(self, rand=None, seed=None):
        if rand:
            self.random = rand
        else:
            self.random = RandomSource()
            if seed:
                self.random = RandomSource(seed)
        #### context information ####
        self._before_base = None
        self._after_base = None
        #### set the reference base to change for del,mismatch ###
        self._observed_base = None
        #### set waht to change base to for ins or mismatch
        self._modified_base = None

    def set_before_context(self, base):
        self._before_base = base

    def set_after_context(self, base):
        self._after_base = base

    def set_observed_base(self, base):
        self._observed_base = base

    def set_modified_base(self, base):
        self._modified_base = base

    def random_substitution(self, fastq, rate):
        sequence = fastq.seq
        seq = ""
        for i in range(len(sequence)):
            # check context
            prev = None
            if i >= 1:
                prev = sequence[i - 1]
            next = None
            if i < len(sequence) - 1:
                next = sequence[i + 1]
            if self._before_base and (not prev or prev != self._before_base):
                seq += sequence[i]
                continue
            if self._after_base and (not next or next != self._after_base):
                seq += sequence[i]
                continue
            if self._observed_base and (sequence[i] != self._observed_base):
                seq += sequence[i]
                continue

            rnum = self.random.random()
            if rnum < rate:
                if not self._modified_base:
                    seq += self.random.different_random_nt(sequence[i])
                else:
                    seq += self._modified_base
            else:
                seq += sequence[i]
        return Fastq([fastq.name, seq, "+", fastq.qual])

    def random_deletion(self, fastq, rate):
        sequence = fastq.seq
        quality = fastq.qual
        seq = ""
        qual = None
        if quality:
            qual = ""
        for i in range(len(sequence)):
            # check context
            prev = None
            if i >= 1:
                prev = sequence[i - 1]
            next = None
            if i < len(sequence) - 1:
                next = sequence[i + 1]
            if self._before_base and (not prev or prev != self._before_base):
                seq += sequence[i]
                if quality:
                    qual += quality[i]
                continue
            if self._after_base and (not next or next != self._after_base):
                seq += sequence[i]
                if quality:
                    qual += quality[i]
                continue
            if self._observed_base and (sequence[i] != self._observed_base):
                seq += sequence[i]
                if quality:
                    qual += quality[i]
                continue

            rnum = self.random.random()
            if rnum >= rate:
                seq += sequence[i]
                if quality:
                    qual += quality[i]
        return Fastq([fastq.name, seq, "+", qual])

    def random_insertion(self, rate, max_inserts=1):
        sequence = fastq.seq
        quality = fastq.qual
        seq = ""
        qual = None
        ibase = rate_to_phred33(rate)
        if quality:
            qual = ""
        z = 0
        while self.random.random() < rate and z < max_inserts:
            if self._before_base:
                break  # can't do this one
            if self._after_base:
                if self._after_base != sequence[1]:
                    break
            z += 1
            if self._modified_base:
                seq += self._modified_base
                if quality:
                    qual += ibase
            else:
                seq += self.random.random_nt()
                if quality:
                    qual += ibase
        z = 0
        for i in range(len(sequence)):
            # check context
            prev = sequence[i]
            next = None
            if i < len(sequence) - 1:
                next = sequence[i + 1]
            if self._before_base and (not prev or prev != self._before_base):
                seq += sequence[i]
                if quality:
                    qual += quality[i]
                continue
            if self._after_base and (not next or next != self._after_base):
                seq += sequence[i]
                if quality:
                    qual += quality[i]
                continue

            seq += sequence[i]
            if quality:
                qual += quality[i]
            while self.random.random() < rate and z < max_inserts:
                z += 1
                if self._modified_base:
                    seq += self._modified_base
                    if quality:
                        qual += ibase
                else:
                    seq += self.random.random_nt()
                    if quality:
                        qual += ibase
            z = 0
        return Fastq([fastq.name, seq, "+", qual])

    def random_flip(self, sequence):
        if self.random.random() < 0.5:
            return rc(sequence)
        return sequence