def main(): num = 0 file = sys.argv[1] set = 0 for line in open(file,'r'): genename = file.split('.')[0] if line[:13] == "Primer Pair: " : set += 1 prim = {} prim['f'] = [] prim['r'] = [] num += 1 spl = line.strip().split("revOligo") for i in range(0,len(spl)): oli = spl[i].split("seq=") snum = 0 for j in range(1,len(oli)): snum += 1 oneoli = oli[j].split("}") if( i == 0): #print ">F:"+file+"_"+str(num)+"_"+str(snum) prim['f'].append(oneoli[0]) else: #print ">R:"+file+"_"+str(num)+"_"+str(snum) prim['r'].append(oneoli[0]) #print oneoli[0] #print prim for i in range(0,len(prim['f'])): r = reverse_complement.get_rc(prim['r'][i]) print genename+"\t"+str(set)+"_"+str(i+1)+"\t"+prim['f'][i]+"\t"+ r
def get_target(fpri, rpri, ori): target = "" rev = reverse_complement.get_rc(rpri) for item in ori.items(): if fpri in item[1] and rev in item[1]: target = item[1] return target
def get_primers(i, con, iter, rc): k = 1 fprim = [] rprim = [] fp = [] rp = [] for j in range(0, iter * 2 + 1): line = con[i + j] if line[2:25] == "Primer Pair: PrimerPair" or line[3:26] == "Primer Pair: PrimerPair": spl = line.split("Oligo{seq=") loc = line.split("[")[1].split(", ") fp.append(loc[0]) rp.append(loc[1].split("]")[0]) fw = spl[1].split("}")[0] rev = spl[2].split("}")[0] if rc == 1: revrc = reverse_complement.get_rc(rev) else: revrc = rev fprim.append(fw) rprim.append(revrc) k += 1 elif line.strip() == "": break return fprim, rprim, fp, rp
def get_target(tempseq,fprimer,rprimer): seq = ''.join(tempseq) rprimer = reverse_complement.get_rc(rprimer) if(fprimer in seq and rprimer in seq and seq.find(fprimer) < seq.find(rprimer)): return 1 else: return 0
def read_trad_primer(filename): fpri = [] rpri = [] flag = 0 for line in open(filename, "r"): if line.strip() == "": continue if line[:2] == ">F": flag = 0 elif line[:2] == ">R": flag = 1 else: if flag == 0: fpri.append(line.strip()) else: seq = reverse_complement.get_rc(line.strip()) rpri.append(seq) return fpri, rpri
def show_list(gene,target,map,match): org = map[target] prod = [] for item in match.items(): if(target in item[0]): prod = item[1] for x in prod: spl = x.split(' ') fpri = spl[0] rpri = '' exp = spl[2] if(sys.argv[4] == "truseq"): rpri = reverse_complement.get_rc(spl[1]) else: rpri = spl[1] if(int(exp) > 200): result = [gene,org,fpri,rpri,exp] print '\t'.join(result)
def get_primers(i,con,iter,fname,rc): k=1 for j in range(0,iter*2+1): line = con[i+j] if (line[2:14]=="Primer Pair:"): spl = line.split("Oligo{seq=") fw = spl[1].split('}')[0] rev = spl[2].split('}')[0] if (rc == 1): revrc = reverse_complement.get_rc(rev) else: revrc = rev print ">F:"+fname.split('.')[0]+'_'+str(k) print fw print ">R:"+fname.split('.')[0]+'_'+str(k) print revrc k += 1 elif(line.strip() == ""): break
def read_primer(file): prim = open(file,'r') swi = 0 fpri = {} rpri = {} name = "" for line in prim: if(line[:1] == ">"): name = line.strip()[1:] else: seq = line.strip() rseq = reverse_complement.get_rc(seq) if fpri.has_key(seq): temp = fpri[seq] + ','+name fpri[seq] = temp temp = rpri[rseq] + ',' +name rpri[rseq] = temp else: fpri[seq] = name rpri[rseq] = name prim.close() return fpri,rpri
def main(): min = 200 max = 370 #parameters that used mis = 0 #read primer fpri, rpri = utils.read_trad_primer(sys.argv[1]) lfile = sys.argv[2] num = 0 seq = utils.read_fasta(sys.argv[2]) for item in seq.items(): se = item[1] for f in fpri: for i in range(0, len(se)-len(f)): str1 = se[i:len(f)+i] #print str1, f #print mismatch(str1,f,mis) if mismatch(str1,f,mis): #print "match" num += 1 output = ">F:"+lfile+"_"+str(num)+"\n"+f+'\n' #print output #print ">F:",f, i for r in rpri: rp = r for j in range(0, len(se)-len(rp)): str2 = se[j:len(rp)+j] if mismatch(str2,rp,mis): #print "match" frp = reverse_complement.get_rc(rp) #print ">R:",frp,j+len(rp), j+len(rp)-i flen = j+len(rp)-i #print flen if(flen >min and flen<max): output = output + ">R:"+lfile+"_"+str(num)+"\n"+frp+'\n' print output
def get_product(fpri,rpri,se,name): product = '' psize = 0 pfpri = '' prpri = '' for x in fpri.items(): ma = [m.start() for m in re.finditer(x[0], se)] if len(ma) > 0: for st in ma: tempseq = se[st:st+400] for y in rpri.items(): rma = [m.start() for m in re.finditer(y[0],tempseq)] if len(rma) > 0 : for rst in rma: product = tempseq[:rst+len(y[0])] psize = len(product) pfpri = x[0] prpri = y[0] print ">%s %s(%s) %s %s(%s) %s %s\n%s" %(name, pfpri,fpri[pfpri],st, reverse_complement.get_rc(prpri),rpri[prpri],st+rst+len(y[0]),len(product), product) return 0
import sys import reverse_complement for line in open(sys.argv[1],'r'): if(line[:1]==">"): print line.strip()+"_rc" else: rc = reverse_complement.get_rc(line.strip()) print rc
#!/usr/bin/python import sys sys.path.append('./utils') import reverse_complement seq = "ATGCTat" print seq print reverse_complement.get_rc(seq) seq_url = ">name\nATGTatn" print seq_url print reverse_complement.get_from_url(seq_url)