def main(): print >> sys.stderr, "Print the result to screen" if len(sys.argv) != 3: print >>sys.stderr, 'Using python %s cdsfile repfile' \ % sys.argv[0] sys.exit(0) #--------------------------------------------------------- repDict = {} ctIO.readRep(sys.argv[2], repDict) locusL = repDict.keys() locusL.sort() cdsDict = ctIO.readFasta(sys.argv[1], locusL) for locus in locusL: print '>%s' % locus seq = cdsDict[locus] tmpList = repDict[locus] for posDict in tmpList: posKeys = posDict.keys() posKeys.sort() repList = [] for posTuple in posKeys: start = (posTuple[0] - 1) * 3 end = posTuple[1] * 3 if start >= end: print >> sys.stderr, locus, posTuple sys.exit(1) #--------patch a bug---2011-08-25 #repList.append(seq[start:end]+':'+str(start+3)) repList.append(seq[start:end] + ':' + str(start + 1)) #-------------------------------------------------- print '#'.join(repList)
def main(): print >> sys.stderr, "Print the result to screen" if len(sys.argv) < 4: print >> sys.stderr, 'Using python %s pep prospero \ outputfile [overlap percentage]' % sys.argv[0] sys.exit(0) #---ori-------------------------------------------- pat = re.compile(">.+?from (\d+) to (\d+).+?from (\d+) to (\d+) ") seqDict = readFasta(sys.argv[1]) repDict = {} for line in open(sys.argv[2]): if line.startswith('using sequence1'): locus = line.strip().split()[-1] seq = seqDict[locus] repDict[locus] = [] elif line[0] == '>': match = pat.match(line) tmpDict = {} pos1 = int(match.group(1)) pos2 = int(match.group(2)) pos3 = int(match.group(3)) pos4 = int(match.group(4)) if len(sys.argv) == 5: if (pos2-pos3+1.0)/(pos4-pos1+1.0) > \ float(sys.argv[4]): continue tmpDict[(pos1, pos2)] = seq[pos1 - 1:pos2] tmpDict[(pos3, pos4)] = seq[pos3 - 1:pos4] repDict[locus].append(tmpDict) #------------------------------------------------ outputRep(repDict, sys.argv[3])
def main(): print >> sys.stderr, "Print the result to three files" if len(sys.argv) != 3: print >> sys.stderr, 'Using python %s seq rep' % sys.argv[0] sys.exit(0) #------------------------------------------- codonList = codonSet() cdsDict = readFasta(sys.argv[1]) #ct_rdict(cdsDict) repDict = {} readRep(sys.argv[2], repDict) #ct_rdict(repDict) codonRepDict, codonSeqDict = originalSta(repDict, cdsDict) #ct_rdict(codonRepDict) #print '*********************' #ct_rdict(codonSeqDict) #-------compare within protein with repeats---- #--get codons within repeat and divide codons within other #seuquences, and bar graph the number of them, heatmap the #ratio of each codons of one protein. codonNumSeq = \ totalNumberProrep(codonRepDict, codonSeqDict, sys.argv[2]) singlRatioProRep(codonRepDict, codonSeqDict, sys.argv[2], codonList) #--compare proteins have no repeat and proteins have repeats but #delete repeats repOrNot(codonNumSeq, codonRepDict, codonSeqDict, sys.argv[2], codonList)
def main(): print >>sys.stderr, "Print the result to screen" if len(sys.argv) < 4: print >>sys.stderr, 'Using python %s pep prospero \ outputfile [overlap percentage]' % sys.argv[0] sys.exit(0) #---ori-------------------------------------------- pat = re.compile(">.+?from (\d+) to (\d+).+?from (\d+) to (\d+) ") seqDict = readFasta(sys.argv[1]) repDict = {} for line in open(sys.argv[2]): if line.startswith('using sequence1'): locus = line.strip().split()[-1] seq = seqDict[locus] repDict[locus] = [] elif line[0] == '>': match = pat.match(line) tmpDict = {} pos1 = int(match.group(1)) pos2 = int(match.group(2)) pos3 = int(match.group(3)) pos4 = int(match.group(4)) if len(sys.argv) == 5: if (pos2-pos3+1.0)/(pos4-pos1+1.0) > \ float(sys.argv[4]): continue tmpDict[(pos1, pos2)] = seq[pos1-1:pos2] tmpDict[(pos3, pos4)] = seq[pos3-1:pos4] repDict[locus].append(tmpDict) #------------------------------------------------ outputRep(repDict, sys.argv[3])
def main(): print >>sys.stderr, "Print the result to screen" if len(sys.argv) != 3: print >>sys.stderr, 'Using python %s cdsfile repfile' \ % sys.argv[0] sys.exit(0) #--------------------------------------------------------- repDict = {} ctIO.readRep(sys.argv[2], repDict) locusL = repDict.keys() locusL.sort() cdsDict = ctIO.readFasta(sys.argv[1], locusL) for locus in locusL: print '>%s' % locus seq = cdsDict[locus] tmpList = repDict[locus] for posDict in tmpList: posKeys = posDict.keys() posKeys.sort() repList = [] for posTuple in posKeys: start = (posTuple[0] - 1) * 3 end = posTuple[1] * 3 if start >= end: print >>sys.stderr, locus, posTuple sys.exit(1) #--------patch a bug---2011-08-25 #repList.append(seq[start:end]+':'+str(start+3)) repList.append(seq[start:end]+':'+str(start+1)) #-------------------------------------------------- print '#'.join(repList)
def main(): print >>sys.stderr, "Print the result to three files" if len(sys.argv) != 3: print >>sys.stderr, 'Using python %s seq rep' % sys.argv[0] sys.exit(0) #------------------------------------------- codonList = codonSet() cdsDict = readFasta(sys.argv[1]) #ct_rdict(cdsDict) repDict = {} readRep(sys.argv[2], repDict) #ct_rdict(repDict) codonRepDict, codonSeqDict = originalSta(repDict, cdsDict) #ct_rdict(codonRepDict) #print '*********************' #ct_rdict(codonSeqDict) #-------compare within protein with repeats---- #--get codons within repeat and divide codons within other #seuquences, and bar graph the number of them, heatmap the #ratio of each codons of one protein. codonNumSeq = \ totalNumberProrep(codonRepDict, codonSeqDict, sys.argv[2]) singlRatioProRep(codonRepDict, codonSeqDict, sys.argv[2], codonList) #--compare proteins have no repeat and proteins have repeats but #delete repeats repOrNot(codonNumSeq, codonRepDict, codonSeqDict, sys.argv[2], codonList)
def main(): print >>sys.stderr, "Print the result to screen" if len(sys.argv) != 2: print >>sys.stderr, 'Using python %s filename' % sys.argv[0] sys.exit(0) #------------------------------------------------ repDict = readFasta(sys.argv[1]) #---------------------------------- tmpSet = set() for key, value in repDict.items(): if value not in tmpSet: tmpSet.add(value) print '>%s\n%s' % (key, value)
def main(): print >>sys.stderr, "Print the result to files" print >>sys.stderr, "Split a multiple sequence fasta file to\ multiple files with one sequence each" if len(sys.argv) != 2: print >>sys.stderr, 'Using python %s filename' % sys.argv[0] sys.exit(0) #--------------- seqDict = readFasta(sys.argv[1]) for key, value in seqDict.items(): fh = open(key, 'w') print >>fh, '>%s\n%s' % (key, value) fh.close()
def main(): if len(sys.argv) != 2: print >> sys.stderr, "Print the result to screen" print >> sys.stderr, 'Using python %s filename' % sys.argv[0] sys.exit(0) #------------------------------------------------ repDict = readFasta(sys.argv[1]) #---------------------------------- tmpSet = set() for key, value in repDict.items(): if value not in tmpSet: tmpSet.add(value) print '>%s\n%s' % (key, value)
def main(): print >> sys.stderr, "Print the result to files" print >> sys.stderr, "Split a multiple sequence fasta file to\ multiple files with one sequence each" if len(sys.argv) != 2: print >> sys.stderr, 'Using python %s filename' % sys.argv[0] sys.exit(0) #--------------- seqDict = readFasta(sys.argv[1]) for key, value in seqDict.items(): fh = open(key, 'w') print >> fh, '>%s\n%s' % (key, value) fh.close()
def main(): print >>sys.stderr, "Print the result to screen" if len(sys.argv) != 4: print >>sys.stderr, 'Using python %s filename subjS atseq' % sys.argv[0] sys.exit(0) #--------------------------------------------------- subjSDict = readSubjS(sys.argv[2]) atDict = readFasta(sys.argv[3]) at = 1 for line in open(sys.argv[1]): if line[0] == '=': group = line[1:].split()[1] at = 1 #label the following locus is Arabidopsis elif line[0] == '>': if at: locus = (line[1:].rsplit('.', 1))[0] seq = atDict[locus] at = 0 else: locus = line[1:-1] seq = subjSDict[locus] #-------------------------------- else: