def trim_anchor(anchor_name, anchor_list, sequence, readDir): print 'looking for anchor:', anchor_name exact = '' exactTrimmedOff = '' #print sequence freedom = C.mbl_anchors[anchor_name]['freedom'] length = C.mbl_anchors[anchor_name]['length'] start = C.mbl_anchors[anchor_name]['start'] reversed = C.mbl_anchors[anchor_name]['reversed'] max_divergence = C.max_divergence if (reversed == True and readDir == 'F') or (reversed == False and readDir == 'R'): print "anchor reversed vs readDir mismatch -Skipping", reversed, readDir return '', '', sequence #print 'Reversed',reversed list_of_tuples = anchortrim.generate_tuples(start, freedom, length, reversed_read=reversed) #print list_of_tuples if readDir == 'R': import pipeline.utils as utils anchor_list = utils.revcomp(anchor_list) print anchor_list anchor, location = anchortrim.find_best_distance(sequence, anchor_list, max_divergence, list_of_tuples) if anchor and location: print 'anchor:', anchor, 'loc:', location # busines time. if reversed: trimmed_sequence = sequence[ location[0]:] # this includes the anchor in trimmed sequence.. else: trimmed_sequence = sequence[:location[ 1]] # same thing here for the reversed == False #trimmed_sequence = anchortrim.trim_sequence(seq, location, s) #print 'trimmed',trimmed_sequence exact = anchor if reversed: exactTrimmedOff = sequence[:location[0]] #return sequence[location[0]:] # this includes the anchor in trimmed sequence.. else: exactTrimmedOff = sequence[location[1]:] #return sequence[:location[1]] # same thing here for the reversed == False else: print 'no anchor location found' trimmed_sequence = sequence return exact, exactTrimmedOff, trimmed_sequence
def trim_anchor(anchor_name, anchor_list, sequence, readDir): print 'looking for anchor:',anchor_name exact = '' exactTrimmedOff = '' #print sequence freedom = C.mbl_anchors[anchor_name]['freedom'] length = C.mbl_anchors[anchor_name]['length'] start = C.mbl_anchors[anchor_name]['start'] reversed = C.mbl_anchors[anchor_name]['reversed'] max_divergence = C.max_divergence if (reversed == True and readDir == 'F') or (reversed == False and readDir == 'R'): print "anchor reversed vs readDir mismatch -Skipping",reversed,readDir return '','',sequence #print 'Reversed',reversed list_of_tuples = anchortrim.generate_tuples(start, freedom, length, reversed_read = reversed) #print list_of_tuples if readDir == 'R': import pipeline.utils as utils anchor_list = utils.revcomp(anchor_list) print anchor_list anchor, location = anchortrim.find_best_distance(sequence, anchor_list, max_divergence, list_of_tuples) if anchor and location: print 'anchor:',anchor,'loc:',location # busines time. if reversed: trimmed_sequence = sequence[location[0]:] # this includes the anchor in trimmed sequence.. else: trimmed_sequence = sequence[:location[1]] # same thing here for the reversed == False #trimmed_sequence = anchortrim.trim_sequence(seq, location, s) #print 'trimmed',trimmed_sequence exact = anchor if reversed: exactTrimmedOff = sequence[:location[0]] #return sequence[location[0]:] # this includes the anchor in trimmed sequence.. else: exactTrimmedOff = sequence[location[1]:] #return sequence[:location[1]] # same thing here for the reversed == False else: print 'no anchor location found' trimmed_sequence = sequence return exact, exactTrimmedOff, trimmed_sequence
def __init__(self, runobj, domain, region, lane_key): self.domain = domain self.region = region # list of included primer classes self.primer_list ={} self.primer_list['F']=[] self.primer_list['R']=[] # list of primer sequences self.primer_seq_list={} self.primer_seq_list['F'] = [] self.primer_seq_list['R'] = [] # list of expanded primer sequences self.primer_expanded_seq_list={} self.primer_expanded_seq_list['F'] = [] self.primer_expanded_seq_list['R'] = [] self.primer_names ={} self.primer_names_by_reverse_complement = {} # changes Bacteria to Bacterial for the name if self.domain[-1:] != 'l': self.domain = self.domain + "l" self.name = self.domain + ":" + self.region # now they can specify to not use the mbl primers for this lane/runkey if runobj.samples[lane_key].use_mbl_primers == 1: suite = runobj.primer_suites[self.name] if suite != None: for key, value in suite.items(): direction = value['direction'] sequence = value['sequence'] domain = value['domain'] region = value['region'] p = Primer(key,direction,domain,region,sequence) self.primer_list[direction].append(p) self.primer_seq_list[direction].append(sequence) self.primer_expanded_seq_list[direction] = self.primer_expanded_seq_list[direction] + p.expanded_seqs for eseq in p.expanded_seqs: self.primer_names[eseq] = key # we will need this for Reverse reads self.primer_names_by_reverse_complement[revcomp(eseq)] = key # they may have given some forward/reverse primers on a per lane/runkey basis direction = 'F' for idx, sequence in enumerate(runobj.samples[lane_key].forward_primers): key = "cust_" + lane_key + "_" + direction + "_" + str(idx) p = Primer(key,dir,'custom','custom',sequence) self.primer_list[direction].append(p) self.primer_seq_list[direction].append(sequence) self.primer_expanded_seq_list[direction] = self.primer_expanded_seq_list[direction] + p.expanded_seqs for eseq in p.expanded_seqs: self.primer_names[eseq] = key # we will need this for Reverse reads self.primer_names_by_reverse_complement[revcomp(eseq)] = key direction = 'R' for idx, sequence in enumerate(runobj.samples[lane_key].reverse_primers): key = "cust_" + lane_key + "_" + direction + "_" + str(idx) p = Primer(key,dir,'custom','custom',sequence) self.primer_list[direction].append(p) self.primer_seq_list[direction].append(sequence) self.primer_expanded_seq_list[direction] = self.primer_expanded_seq_list[direction] + p.expanded_seqs for eseq in p.expanded_seqs: self.primer_names[eseq] = key # we will need this for Reverse reads self.primer_names_by_reverse_complement[revcomp(eseq)] = key
def test_revcomp(self): self.assertEqual(utils.revcomp('TGGGCGTAAAG'),'CTTTACGCCCA')