if "chr" not in chrom: chrom="chr"+chrom coord=intWithComma(fields[colCoordinate]) alleleA=fields[colAlleleA] alleleB=fields[colAlleleB] if leftGet>=coord: leftGet=coord-1 leftBound=coord-leftGet rightBound=coord+rightGet bedSeqResult=bedSeqClient.getBedSeq(chrom+"\t"+str(leftBound-1)+"\t"+str(rightBound)) seqGot=bedSeqResult.split("\t")[3].upper() alleleGotNt=seqGot[leftGet] #if 0, then get the 0th char, if 1, then get the 1th char if alleleGotNt not in [alleleA,alleleB]: print >> stderr,"error: allele info not consistent with genome seq lino=%d GenomeSeq=%s alleleGot=%s alleleA=%s alleleB=%s coordSNP=%s:%d-%d coordBound=%s:%d-%d" %(lino,seqGot,alleleGotNt,alleleA,alleleB,chrom,coord,coord,chrom,leftBound,rightBound) exit(1) alleleASeq=seqGot[:leftGet]+alleleA+seqGot[leftGet+1:] alleleBSeq=seqGot[:leftGet]+alleleB+seqGot[leftGet+1:] fields+=[alleleASeq,alleleBSeq] print >> stdout,fs.join(fields)
lino+=1 lin=lin.rstrip("\r\n") fields=lin.split("\t") pair1Name=fields[4] pair1Chrom=fields[1] pair1Start0=int(fields[2]) pair1End1=int(fields[3]) pair1Strand=fields[6] pair2Name=fields[11] pair2Start0=int(fields[9]) pair2End1=int(fields[10]) pair2Strand=fields[13] minStart0=min(pair1Start0,pair2Start0) maxEnd1=max(pair1End1,pair2End1) #print >> stderr,pair1Start0,pair1End1,pair2Start0,pair2End1,#flankingLen+pair1End1-pair1Start0-minStart0,pair1Strand flankingStart0=minStart0-flankingLen flankingEnd1=maxEnd1+flankingLen matchName=(pair1Name+"_"+pair2Name).replace(":","_") gbWriter=GenbankFileWriter(matchName) gbWriter.source=pair1Chrom+":"+str(minStart0+1)+"-"+str(maxEnd1) gbWriter.organism=pair1Chrom+":"+str(minStart0+1)+"-"+str(maxEnd1) gbWriter.sequence=bseq.getBedSeq(pair1Chrom+"\t"+str(flankingStart0)+"\t"+str(flankingEnd1)).split("\t")[3] #print >> stderr,gbWriter.sequence gbWriter.addFeature(flankingLen+pair1Start0-minStart0+1,flankingLen+pair1End1-minStart0,pair1Strand,"pair1") gbWriter.addFeature(flankingLen+pair2Start0-minStart0+1,flankingLen+pair2End1-minStart0,pair2Strand,"pair2") gbWriter.writeGenbankFile(str(lino)+"_"+matchName+".gb") fil.close()