def main(): print >>sys.stderr, "Transfer repetitions to multiple sequence \ alignment files. One geoup in one file. Waiting for alignment." if len(sys.argv) != 2: print >>sys.stderr,'Using python %s repfile' % sys.argv[0] sys.exit(0) #------------------------------------- repDict = {} readRep(sys.argv[1], repDict) for locus, valueL in repDict.items(): midlen = 30 group = 0 for groupD in valueL: group += 1 groupDKeyL = groupD.keys() groupDKeyL.sort() maxlen = 0 for seq in groupD.values(): lenseq = len(seq) if lenseq > maxlen: maxlen = lenseq if maxlen <= midlen: file = locus+'.'+str(group)+'.short' else: file = locus+'.'+str(group)+'.long' fh = open(file, 'w') for pos in groupDKeyL: seq = groupD[pos] #------------------------------------------- posn = ':'.join((str(pos[0]), str(pos[1]))) print >>fh, '>%s.%s.%s\n%s' % \ (locus, str(group), posn, seq) #--------END one group------------------------------ fh.close()
def main(): print >> sys.stderr, "Print the result to screen" if len(sys.argv) != 3: print >>sys.stderr, 'Using python %s cdsfile repfile' \ % sys.argv[0] sys.exit(0) #--------------------------------------------------------- repDict = {} ctIO.readRep(sys.argv[2], repDict) locusL = repDict.keys() locusL.sort() cdsDict = ctIO.readFasta(sys.argv[1], locusL) for locus in locusL: print '>%s' % locus seq = cdsDict[locus] tmpList = repDict[locus] for posDict in tmpList: posKeys = posDict.keys() posKeys.sort() repList = [] for posTuple in posKeys: start = (posTuple[0] - 1) * 3 end = posTuple[1] * 3 if start >= end: print >> sys.stderr, locus, posTuple sys.exit(1) #--------patch a bug---2011-08-25 #repList.append(seq[start:end]+':'+str(start+3)) repList.append(seq[start:end] + ':' + str(start + 1)) #-------------------------------------------------- print '#'.join(repList)
def main(): print >>sys.stderr, "Print the result to three files" if len(sys.argv) != 3: print >>sys.stderr, 'Using python %s seq rep' % sys.argv[0] sys.exit(0) #------------------------------------------- codonList = codonSet() cdsDict = readFasta(sys.argv[1]) #ct_rdict(cdsDict) repDict = {} readRep(sys.argv[2], repDict) #ct_rdict(repDict) codonRepDict, codonSeqDict = originalSta(repDict, cdsDict) #ct_rdict(codonRepDict) #print '*********************' #ct_rdict(codonSeqDict) #-------compare within protein with repeats---- #--get codons within repeat and divide codons within other #seuquences, and bar graph the number of them, heatmap the #ratio of each codons of one protein. codonNumSeq = \ totalNumberProrep(codonRepDict, codonSeqDict, sys.argv[2]) singlRatioProRep(codonRepDict, codonSeqDict, sys.argv[2], codonList) #--compare proteins have no repeat and proteins have repeats but #delete repeats repOrNot(codonNumSeq, codonRepDict, codonSeqDict, sys.argv[2], codonList)
def main(): print >> sys.stderr, "position distribution, divide protein\ into three equal length segments, N-, mid-, C- terminal. Detect by\ midpoint of repetitions. If located in boundary, random choose" print >> sys.stderr, "Print the result to screen" if len(sys.argv) != 3: print >> sys.stderr, 'Using python %s seq.for.c rep' % sys.argv[0] sys.exit(0) #------------------------------------------------------------- repDict = {} seqDict = {} readSeq(sys.argv[1], seqDict) readRep(sys.argv[2], repDict) for key, valueL in repDict.items(): length = len(seqDict[key]) first = length / 3 second = first * 2 #print key, first, second, length for valueD in valueL: for keys in valueD.keys(): #print keys, mid = sum(keys) / 2 if mid < first: print -1 elif mid == first: print -1 if randint(0, 1) else 0 elif mid < second: print 0 elif mid == second: print 0 if randint(0, 1) else 1 else: print 1
def main(): print >> sys.stderr, "Print the result to three files" if len(sys.argv) != 3: print >> sys.stderr, 'Using python %s seq rep' % sys.argv[0] sys.exit(0) #------------------------------------------- codonList = codonSet() cdsDict = readFasta(sys.argv[1]) #ct_rdict(cdsDict) repDict = {} readRep(sys.argv[2], repDict) #ct_rdict(repDict) codonRepDict, codonSeqDict = originalSta(repDict, cdsDict) #ct_rdict(codonRepDict) #print '*********************' #ct_rdict(codonSeqDict) #-------compare within protein with repeats---- #--get codons within repeat and divide codons within other #seuquences, and bar graph the number of them, heatmap the #ratio of each codons of one protein. codonNumSeq = \ totalNumberProrep(codonRepDict, codonSeqDict, sys.argv[2]) singlRatioProRep(codonRepDict, codonSeqDict, sys.argv[2], codonList) #--compare proteins have no repeat and proteins have repeats but #delete repeats repOrNot(codonNumSeq, codonRepDict, codonSeqDict, sys.argv[2], codonList)
def main(): print >> sys.stderr, "Print the result to screen" if len(sys.argv) != 2: print >> sys.stderr, 'Using python %s repfile' % sys.argv[0] sys.exit(0) #------------------------------------------------------ repdict = {} readRep(sys.argv[1], repdict) for id, valueL in repdict.items(): i = 0 for seqDict in valueL: filename = id + str(i) + '.fasta' fh = open(filename, 'w') seqDictK = seqDict.keys() seqDictK.sort() output = '' for key in seqDictK: output += ''.join(('>pos', str(key[0]), '-', str(key[1])\ , '\n', seqDict[key], '\n')) #-------------------------------------------- print >> fh, output, fh.close() cmd = 't_coffee ' + filename i += 1 os.system(cmd)
def main(): print >>sys.stderr, "Print the result to screen" if len(sys.argv) != 2: print >>sys.stderr, 'Using python %s repfile' % sys.argv[0] sys.exit(0) #------------------------------------------------------ repdict = {} readRep(sys.argv[1], repdict) for id, valueL in repdict.items(): i = 0 for seqDict in valueL: filename = id + str(i) + '.fasta' fh = open(filename, 'w') seqDictK = seqDict.keys() seqDictK.sort() output = '' for key in seqDictK: output += ''.join(('>pos', str(key[0]), '-', str(key[1])\ , '\n', seqDict[key], '\n')) #-------------------------------------------- print >>fh, output, fh.close() cmd = 't_coffee ' + filename i += 1 os.system(cmd)
def main(): print >>sys.stderr, "position distribution, divide protein\ into three equal length segments, N-, mid-, C- terminal. Detect by\ midpoint of repetitions. If located in boundary, random choose" print >>sys.stderr, "Print the result to screen" if len(sys.argv) != 3: print >>sys.stderr, 'Using python %s seq.for.c rep' % sys.argv[0] sys.exit(0) #------------------------------------------------------------- repDict = {} seqDict = {} readSeq(sys.argv[1], seqDict) readRep(sys.argv[2], repDict) for key, valueL in repDict.items(): length = len(seqDict[key]) first = length / 3 second = first * 2 #print key, first, second, length for valueD in valueL: for keys in valueD.keys(): #print keys, mid = sum(keys) / 2 if mid < first: print -1 elif mid == first: print -1 if randint(0,1) else 0 elif mid < second: print 0 elif mid == second: print 0 if randint(0,1) else 1 else: print 1
def main(): print >>sys.stderr, "Using the average shannonIndex value \ of a group sequences to represent the last entropy." print >>sys.stderr, "Print the result to screen" if len(sys.argv) != 3: print >>sys.stderr, 'Using python %s filename\ threshold(2)[threethe more the high complexity]' % sys.argv[0] sys.exit(0) #----------------------------------- #this three dict have the same structure repDict = {} lcsDict = {} #save low complexity sequences regularDict = {} #save regular sequences readRep(sys.argv[1], repDict) lcs = int(sys.argv[2]) for locus, valueL in repDict.items(): for itemD in valueL: entropy = 0 i_valueS = set(itemD.values()) #i_keys = itemD.keys() for item in i_valueS: entropy += si(item) entropy = entropy / len(i_valueS) if entropy <= lcs: saveDict(lcsDict, locus, itemD) else: saveDict(regularDict, locus, itemD) #--------End one dict--------------- #-------------end all----------------- prefile = sys.argv[1].split('/')[-1] outputRep(lcsDict, prefile+'.LCSs') outputRep(regularDict, prefile+'.HCSs')
def main(): print >> sys.stderr, "Transfer repetitions to multiple sequence \ alignment files. One geoup in one file. Waiting for alignment." if len(sys.argv) != 2: print >> sys.stderr, 'Using python %s repfile' % sys.argv[0] sys.exit(0) #------------------------------------- repDict = {} readRep(sys.argv[1], repDict) for locus, valueL in repDict.items(): midlen = 30 group = 0 for groupD in valueL: group += 1 groupDKeyL = groupD.keys() groupDKeyL.sort() maxlen = 0 for seq in groupD.values(): lenseq = len(seq) if lenseq > maxlen: maxlen = lenseq if maxlen <= midlen: file = locus + '.' + str(group) + '.short' else: file = locus + '.' + str(group) + '.long' fh = open(file, 'w') for pos in groupDKeyL: seq = groupD[pos] #------------------------------------------- posn = ':'.join((str(pos[0]), str(pos[1]))) print >>fh, '>%s.%s.%s\n%s' % \ (locus, str(group), posn, seq) #--------END one group------------------------------ fh.close()
def main(): print >>sys.stderr, "Print the result to screen" if len(sys.argv) != 3: print >>sys.stderr, 'Using python %s cdsfile repfile' \ % sys.argv[0] sys.exit(0) #--------------------------------------------------------- repDict = {} ctIO.readRep(sys.argv[2], repDict) locusL = repDict.keys() locusL.sort() cdsDict = ctIO.readFasta(sys.argv[1], locusL) for locus in locusL: print '>%s' % locus seq = cdsDict[locus] tmpList = repDict[locus] for posDict in tmpList: posKeys = posDict.keys() posKeys.sort() repList = [] for posTuple in posKeys: start = (posTuple[0] - 1) * 3 end = posTuple[1] * 3 if start >= end: print >>sys.stderr, locus, posTuple sys.exit(1) #--------patch a bug---2011-08-25 #repList.append(seq[start:end]+':'+str(start+3)) repList.append(seq[start:end]+':'+str(start+1)) #-------------------------------------------------- print '#'.join(repList)
def main(): print >>sys.stderr, "Print the result to screen" if len(sys.argv) != 2: print >>sys.stderr, 'Using python %s filename' % sys.argv[0] sys.exit(0) #------------------------------ repDict = {} readRep(sys.argv[1], repDict) outputRep(repDict, sys.argv[1]+'ctIO.test')
def main(): print >>sys.stderr, 'used to transfer merged repetition file \ to fasta, the output is STDOUT.' if len(sys.argv) != 2: print 'Using python %s filename' % sys.argv[0] sys.exit(0) repDict = {} readRep(sys.argv[1], repDict) transfer(repDict)
def main(): print >> sys.stderr, "Print the result to screen" if len(sys.argv) != 2: print >> sys.stderr, 'Using python %s filename' % sys.argv[0] sys.exit(0) #------------------------------ repDict = {} readRep(sys.argv[1], repDict) outputRep(repDict, sys.argv[1] + 'ctIO.test')
def main(): print >> sys.stderr, 'used to transfer merged repetition file \ to fasta, the output is STDOUT.' if len(sys.argv) != 2: print 'Using python %s filename' % sys.argv[0] sys.exit(0) repDict = {} readRep(sys.argv[1], repDict) transfer(repDict)
def main(): print >>sys.stderr, "Split a multiple sequence fasta file to\ multiple files aligned together using t_coffee" print >>sys.stderr, 'used to transfer merged repetition file \ to fasta, the output is STDOUT.' if len(sys.argv) != 2: print 'Using python %s filename' % sys.argv[0] sys.exit(0) repDict = {} readRep(sys.argv[1], repDict) transfer(repDict)
def main(): print >>sys.stderr, "Using the average shannonIndex value \ of a group sequences to represent the last entropy." print >>sys.stderr, "Print the result to screen" if len(sys.argv) != 2: print >>sys.stderr, 'Using python %s filename' % sys.argv[0] sys.exit(0) #----------------------------------- repDict = {} readRep(sys.argv[1], repDict) for valueL in repDict.values(): for itemD in valueL: entropy = 0 i_valueS = set(itemD.values()) for item in i_valueS: entropy += si(item) entropy = entropy / len(i_valueS) print "%.2f" % entropy
def main(): print >> sys.stderr, "Using the average shannonIndex value \ of a group sequences to represent the last entropy." print >> sys.stderr, "Print the result to screen" if len(sys.argv) != 2: print >> sys.stderr, 'Using python %s filename' % sys.argv[0] sys.exit(0) #----------------------------------- repDict = {} readRep(sys.argv[1], repDict) for valueL in repDict.values(): for itemD in valueL: entropy = 0 i_valueS = set(itemD.values()) for item in i_valueS: entropy += si(item) entropy = entropy / len(i_valueS) print "%.2f" % entropy
def main(): print >> sys.stderr, "To detect the conservation among orthologs,\ use repetitions as the query and its related orthologs as db(after \ makeblastdb). " if len(sys.argv) != 3: print >> sys.stderr, 'Using python %s repfile dbpath/' % sys.argv[0] sys.exit(0) #------------------------------------- file = sys.argv[1] if file.find('LCSs') != -1: label = '.LCSs' elif file.find('HCSs') != -1: label = '.HCSs' #patched at 20110922. Before not give the inital value to [label]. #So it will give an error when dealing with non 'LCSs' and 'HCSs' #files. else: label = '' noOrtho = 0 path = sys.argv[2] repDict = {} readRep(sys.argv[1], repDict) for locus, valueL in repDict.items(): tmppath = path + locus #print tmppath if not os.path.exists(tmppath): noOrtho += 1 continue #------------------------------- midlen = 30 short = locus + label + '.short' long = locus + label + '.long' fhshort = open(short, 'w') fhlong = open(long, 'w') group = 0 for groupD in valueL: group += 1 tmpDict = {} groupDKeyL = groupD.keys() groupDKeyL.sort() for pos in groupDKeyL: seq = groupD[pos] if seq not in tmpDict: tmpDict[seq] = [str(pos[0])] else: tmpDict[seq].append(str(pos[0])) #------------------------------------------- tmpDictKeyL = tmpDict.keys() tmpDictKeyL.sort() for seq in tmpDictKeyL: lenseq = len(seq) pos = ':'.join(tmpDict[seq]) if lenseq <= midlen: print >>fhshort, '>%s.%s.%s\n%s' % \ (locus, str(group), pos, seq) else: print >>fhlong, '>%s.%s.%s\n%s' % \ (locus, str(group), pos, seq) #--------END one group------------------------------ fhshort.close() fhlong.close() cmdshort = ' '.join(('psiblast -query', short, '-db', tmppath, \ '-out', short+'.out', '-num_iterations 5','-evalue 20000',\ '-matrix PAM30', '-comp_based_stats 0', '-word_size 2')) cmdlong = ' '.join(('psiblast -query', long, '-db', tmppath, \ '-out', long+'.out', '-num_iterations 5')) os.system(cmdshort) os.system(cmdlong) cmdshort = ' '.join(('psiblast -query', short, '-db', tmppath, \ '-out', short+'.table', '-num_iterations 5','-evalue 20000',\ '-matrix PAM30', '-comp_based_stats 0', '-word_size 2', '-outfmt 7')) cmdlong = ' '.join(('psiblast -query', long, '-db', tmppath, \ '-out', long+'.table', '-num_iterations 5', '-outfmt 7')) #print cmd #break os.system(cmdshort) os.system(cmdlong) #------------END one locus print noOrtho
def main(): print >> sys.stderr, "Print the result to screen" if len(sys.argv) != 3: print >>sys.stderr, 'Using python %s repname interpro'\ % sys.argv[0] sys.exit(0) #--------------------------------------------- aDict = readInterpro(sys.argv[2]) repDict = {} ''' repDict = { AT1G62760: [ {(25, 31): 'SSLSPSS', (51,57): 'SSLSPSS'}, {(52, 60): 'SLSPSSPPP',}, ] } ''' readRep(sys.argv[1], repDict) if 0: print aDict print repDict ''' Some symbol: !: new ------ domain --- rep @: overlap ------ domain ------- rep ^: rep in domain ----------- domain ------- rep $: domain in rep ------- domain ------------- rep ''' for locus, valueL in repDict.items(): if locus in aDict: hasDomain = 1 domainPosL = aDict[locus] else: hasDomain = 0 for dictrep in valueL: for posset in dictrep.keys(): begin = posset[0] end = posset[1] if 0: print "begin is %d, end is %d" % (begin, end) if hasDomain: nooverlap = 1 for domainset in domainPosL: ds = domainset[0] de = domainset[1] if 0: print "ds is %d, de is %d" % (ds, de) if (begin > ds and begin < de and end > de)\ or (begin < ds and end > ds and end < de): dictrep[posset] += ':' + str(begin) + '@' nooverlap = 0 if 0: print '@' break elif (begin > ds and end <= de) or\ (begin == ds and end < de): dictrep[posset] += ':' + str(begin) + '^' nooverlap = 0 if 0: print '^' break elif (begin <= ds and end >= de): dictrep[posset] += ':' + str(begin) + '$' nooverlap = 0 if 0: print '$' break #--------end tracing each position------------------ if nooverlap: dictrep[posset] += ':' + str(begin) + '!' if 0: print '!' #----------if no domain------------- else: dictrep[posset] += ':' + str(begin) + '!' if 0: print '!!' #-------------------------------- #-----end of trace one group domain------------------ #---------end of trace onelocusus------------------------------- #-------------end of all------- output(repDict)
def main(): print >>sys.stderr, "Print the result to screen" if len(sys.argv) != 3: print >>sys.stderr, 'Using python %s repname interpro'\ % sys.argv[0] sys.exit(0) #--------------------------------------------- aDict = readInterpro(sys.argv[2]) repDict = {} ''' repDict = { AT1G62760: [ {(25, 31): 'SSLSPSS', (51,57): 'SSLSPSS'}, {(52, 60): 'SLSPSSPPP',}, ] } ''' readRep(sys.argv[1], repDict) if 0: print aDict print repDict ''' Some symbol: !: new ------ domain --- rep @: overlap ------ domain ------- rep ^: rep in domain ----------- domain ------- rep $: domain in rep ------- domain ------------- rep ''' for locus, valueL in repDict.items(): if locus in aDict: hasDomain = 1 domainPosL = aDict[locus] else: hasDomain = 0 for dictrep in valueL: for posset in dictrep.keys(): begin = posset[0] end = posset[1] if 0: print "begin is %d, end is %d" % (begin, end) if hasDomain: nooverlap = 1 for domainset in domainPosL: ds = domainset[0] de = domainset[1] if 0: print "ds is %d, de is %d" % (ds, de) if (begin > ds and begin < de and end > de)\ or (begin < ds and end > ds and end < de): dictrep[posset] += ':'+str(begin)+'@' nooverlap = 0 if 0: print '@' break elif (begin > ds and end <= de) or\ (begin == ds and end < de): dictrep[posset] += ':'+str(begin)+'^' nooverlap = 0 if 0: print '^' break elif (begin <= ds and end >= de): dictrep[posset] += ':'+str(begin)+'$' nooverlap = 0 if 0: print '$' break #--------end tracing each position------------------ if nooverlap: dictrep[posset] += ':'+str(begin)+'!' if 0 : print '!' #----------if no domain------------- else: dictrep[posset] += ':'+str(begin)+'!' if 0 : print '!!' #-------------------------------- #-----end of trace one group domain------------------ #---------end of trace onelocusus------------------------------- #-------------end of all------- output(repDict)
def main(): (options, args) = cmdpara(sys.argv) print >> sys.stderr, "*******Print the result to screen.*******" # -------------------macro------------------------------------- isRep = 0 isLoc = 0 # ------------------------------------------------------------- if options.seqfile != None: seqdict = {} ctIO.readseq(options.seqfile, seqdict) if options.seqrepfile != None: seqdict = {} repdict = {} ctIO.readseqrep(options.seqrepfile, seqdict, repdict) isRep = 1 if options.repfile != None: repdict = {} ctIO.readRep(options.repfile, repdict) isRep = 1 if options.locusfile != None: locusList = [line.strip() for line in open(options.locusfile)] isLoc = 1 if not isLoc: locusList = repdict.keys() if isRep else seqdict.keys() if isRep or isLoc: annodict = {} ctIO.readAnno(options.anno, annodict, 1, locusList) interproDict = {} ctIO.readInterpro(options.interpro, interproDict, locusList) # print locusList # print repdict.keys() # print repdict # print isRep # sys.exit(1) # ---------------------------------------------------------------- latexHead() latexExplain() # --------------------------------------------------------------- for id in locusList: if id not in seqdict: print >> sys.stderr, "Unknown locus %s" % id else: hasInterpro = 0 seq = list(seqdict[id]) # -------------get newDict-------------------------------- newDict = {} if id in interproDict: hasInterpro = 1 domainDespList = [] domainPosL = interproDict[id].keys() getnewDictDomain(interproDict[id], domainDespList, newDict) # ------------------------------------------------ if isRep: repDespList = [] repdictSonL = repdict[id] num = len(repdictSonL) if not hasInterpro: domainPosL = [] getnewDictRep(repdictSonL, domainPosL, repDespList, newDict) getnewDictRepDesp(id, num, repDespList) # ----------------get newDict--------------------------- modifySeq(seq, newDict) # --------------------------------------------------------- print "".join((r"\section{", id, "}")) annos = annodict[id].replace("_", r"\_") annos = annos.replace("%", r"\%") annos = annos.replace("~", r"\~") annos = annos.replace("&", r"\&") annos = r"\tair{" + id[:-2] + "} " + annos print r"\anno{", annos, "}" print r""" \noindent\begin{minipage}{\textwidth} \noindent\rule{\textwidth}{2pt} \DNA!""" # --without annotation seq = "".join(seq) print seq print r"""! \end{minipage} """ # ------------------------------------------------ print print "." * 100 print # ---------------Rep------------------------------ if isRep: for repSeq in repDespList: print repSeq print # -------------Domain desp---------------------- if hasInterpro: print "." * 100 print for domainDesp in domainDespList: print domainDesp print print r"\clearpage" print # ----------------End of else ---one locus------------- # ----------------END of for ---all locus------------------ latexTail()
def main(): (options, args) = cmdpara(sys.argv) if options.sort: print 'sort' sys.exit(1) else: print 'no sort' sys.exit(1) print >> sys.stderr, "*******Print the result to screen.*******" #-------------------macro------------------------------------- isRep = 0 isLoc = 0 #------------------------------------------------------------- if options.seqfile != None: seqdict = {} ctIO.readseq(options.seqfile, seqdict) if options.seqrepfile != None: seqdict = {} repdict = {} ctIO.readseqrep(options.seqrepfile, seqdict, repdict) isRep = 1 if options.repfile != None: repdict = {} ctIO.readRep(options.repfile, repdict) isRep = 1 if options.locusfile != None: locusList = [line.strip() for line in open(options.locusfile)] isLoc = 1 if not isLoc: locusList = repdict.keys() if isRep else seqdict.keys() locusList.sort() if isRep or isLoc: annodict = {} ctIO.readAnno(options.anno, annodict, 1, locusList) interproDict = {} ctIO.readInterpro(options.interpro, interproDict, locusList) #print locusList #print repdict.keys() #print repdict #print isRep #sys.exit(1) #---------------------------------------------------------------- latexHead() latexExplain() #--------------------------------------------------------------- for id in locusList: if id not in seqdict: print >> sys.stderr, "Unknown locus %s" % id else: hasInterpro = 0 seq = list(seqdict[id]) #-------------get newDict-------------------------------- newDict = {} if id in interproDict: hasInterpro = 1 domainDespList = [] domainPosL = interproDict[id].keys() getnewDictDomain(interproDict[id], domainDespList, newDict) #------------------------------------------------ if isRep: repDespList = [] repdictSonL = repdict[id] num = len(repdictSonL) if not hasInterpro: domainPosL = [] getnewDictRep(repdictSonL, domainPosL, repDespList, newDict) getnewDictRepDesp(id, num, repDespList) #----------------get newDict--------------------------- modifySeq(seq, newDict) #--------------------------------------------------------- shortAnno = '' annos = annodict[id].replace('_', r'\_') annos = annos.replace('%', r'\%') annos = annos.replace('~', r'\~') annos = annos.replace('&', r'\&') firstBr = annos.find('[') if firstBr != -1: shortAnno = annos[:firstBr] print ''.join((r'\section{', id, ' ', shortAnno, '}')) annos = r'\tair{' + id[:-2] + '} ' + annos print r'\anno{', annos, '}' print r''' \noindent\begin{minipage}{\textwidth} \noindent\rule{\textwidth}{2pt} \DNA!''' #--without annotation seq = ''.join(seq) print seq print r'''! \end{minipage} ''' #------------------------------------------------ print print '.' * 100 print #---------------Rep------------------------------ if isRep: for repSeq in repDespList: print repSeq print #-------------Domain desp---------------------- if hasInterpro: print '.' * 100 print for domainDesp in domainDespList: print domainDesp print print r'\clearpage' print #----------------End of else ---one locus------------- #----------------END of for ---all locus------------------ latexTail()
def main(): print >>sys.stderr, "To detect the conservation among orthologs,\ use repetitions as the query and its related orthologs as db(after \ makeblastdb). " if len(sys.argv) != 3: print >>sys.stderr,'Using python %s repfile dbpath/' % sys.argv[0] sys.exit(0) #------------------------------------- file = sys.argv[1] if file.find('LCSs') != -1: label = '.LCSs' elif file.find('HCSs') != -1: label = '.HCSs' #patched at 20110922. Before not give the inital value to [label]. #So it will give an error when dealing with non 'LCSs' and 'HCSs' #files. else: label = '' noOrtho = 0 path = sys.argv[2] repDict = {} readRep(sys.argv[1], repDict) for locus, valueL in repDict.items(): tmppath = path + locus #print tmppath if not os.path.exists(tmppath): noOrtho += 1 continue #------------------------------- midlen = 30 short = locus+label+'.short' long = locus +label+'.long' fhshort = open(short, 'w') fhlong = open(long, 'w') group = 0 for groupD in valueL: group += 1 tmpDict = {} groupDKeyL = groupD.keys() groupDKeyL.sort() for pos in groupDKeyL: seq = groupD[pos] if seq not in tmpDict: tmpDict[seq] = [str(pos[0])] else: tmpDict[seq].append(str(pos[0])) #------------------------------------------- tmpDictKeyL = tmpDict.keys() tmpDictKeyL.sort() for seq in tmpDictKeyL: lenseq = len(seq) pos = ':'.join(tmpDict[seq]) if lenseq <= midlen: print >>fhshort, '>%s.%s.%s\n%s' % \ (locus, str(group), pos, seq) else: print >>fhlong, '>%s.%s.%s\n%s' % \ (locus, str(group), pos, seq) #--------END one group------------------------------ fhshort.close() fhlong.close() cmdshort = ' '.join(('psiblast -query', short, '-db', tmppath, \ '-out', short+'.out', '-num_iterations 5','-evalue 20000',\ '-matrix PAM30', '-comp_based_stats 0', '-word_size 2')) cmdlong = ' '.join(('psiblast -query', long, '-db', tmppath, \ '-out', long+'.out', '-num_iterations 5')) os.system(cmdshort) os.system(cmdlong) cmdshort = ' '.join(('psiblast -query', short, '-db', tmppath, \ '-out', short+'.table', '-num_iterations 5','-evalue 20000',\ '-matrix PAM30', '-comp_based_stats 0', '-word_size 2', '-outfmt 7')) cmdlong = ' '.join(('psiblast -query', long, '-db', tmppath, \ '-out', long+'.table', '-num_iterations 5', '-outfmt 7')) #print cmd #break os.system(cmdshort) os.system(cmdlong) #------------END one locus print noOrtho
def main(): print >>sys.stderr, "Paste the mother sequene and related\ repetition together" print >>sys.stderr, "Print the result to screen" if len(sys.argv) < 3: print >>sys.stderr, 'Using python %s forc repResult \ [anno] [locus] ' % sys.argv[0] sys.exit(0) tair = \ "" seqDict = {} readseq(sys.argv[1], seqDict) #-------------------------------------------- isAnno = 0 if len(sys.argv) > 3: if sys.argv[3]: isAnno = 1 annoDict = {} readAnnoNew(sys.argv[3], annoDict) #-------------------------------------------- isLoc = 0 if len(sys.argv) > 4: if sys.argv[4]: isLoc = 1 locL = [locus.strip() for locus in open(sys.argv[4])] #-------------------------------------------- repDict = {} ''' repDict = { AT1G62760: [ {(25, 31): 'SSLSPSS', (51,57): 'SSLSPSS'}, {(52, 60): 'SLSPSSPPP',}, ] } ''' readRep(sys.argv[2], repDict) #------------------------------------------- repDictKeyL = repDict.keys() repDictKeyL.sort() for locus in repDictKeyL: if isLoc and (locus not in locL) and \ (locus[:-2] not in locL): continue #------------------------------------ print '>', locus locusSub = "name=" + locus[:-2] print tair.replace("name=", locusSub) if isAnno and locus in annoDict: print annoDict[locus].replace('\\\\', '\n') #if locus in seqDict: # print seqDict[locus] seq = seqDict[locus] #this substitute the last one for we #know it has this key, if not, wrong locusRepL = repDict[locus] repoutput = [] for repitemDIct in locusRepL: grep = '' posKey = repitemDIct.keys() posKey.sort() for pos in posKey: rep = repitemDIct[pos] grep += ':'.join([rep, str(pos[0]), str(pos[1])]) reps = '*'+rep+'*' seq = seq.replace(rep, reps) repoutput.append(grep) print seq print '\n'.join(repoutput)
def main(): (options, args) = cmdpara(sys.argv) if options.sort: print 'sort' sys.exit(1) else: print 'no sort' sys.exit(1) print >>sys.stderr, "*******Print the result to screen.*******" #-------------------macro------------------------------------- isRep = 0 isLoc = 0 #------------------------------------------------------------- if options.seqfile != None: seqdict = {} ctIO.readseq(options.seqfile, seqdict) if options.seqrepfile != None: seqdict = {} repdict = {} ctIO.readseqrep(options.seqrepfile, seqdict, repdict) isRep = 1 if options.repfile != None: repdict = {} ctIO.readRep(options.repfile, repdict) isRep = 1 if options.locusfile != None: locusList = [line.strip() for line in open(options.locusfile)] isLoc = 1 if not isLoc: locusList = repdict.keys() if isRep else seqdict.keys() locusList.sort() if isRep or isLoc: annodict = {} ctIO.readAnno(options.anno, annodict, 1, locusList) interproDict = {} ctIO.readInterpro(options.interpro, interproDict, locusList) #print locusList #print repdict.keys() #print repdict #print isRep #sys.exit(1) #---------------------------------------------------------------- latexHead() latexExplain() #--------------------------------------------------------------- for id in locusList: if id not in seqdict: print >>sys.stderr, "Unknown locus %s" % id else: hasInterpro = 0 seq = list(seqdict[id]) #-------------get newDict-------------------------------- newDict = {} if id in interproDict: hasInterpro = 1 domainDespList = [] domainPosL = interproDict[id].keys() getnewDictDomain(interproDict[id], domainDespList, newDict) #------------------------------------------------ if isRep: repDespList = [] repdictSonL = repdict[id] num = len(repdictSonL) if not hasInterpro: domainPosL = [] getnewDictRep(repdictSonL, domainPosL, repDespList, newDict) getnewDictRepDesp(id, num, repDespList) #----------------get newDict--------------------------- modifySeq(seq, newDict) #--------------------------------------------------------- shortAnno = '' annos = annodict[id].replace('_', r'\_') annos = annos.replace('%', r'\%') annos = annos.replace('~', r'\~') annos = annos.replace('&', r'\&') firstBr = annos.find('[') if firstBr != -1: shortAnno = annos[:firstBr] print ''.join((r'\section{', id, ' ', shortAnno, '}' )) annos = r'\tair{' + id[:-2] + '} ' + annos print r'\anno{', annos, '}' print r''' \noindent\begin{minipage}{\textwidth} \noindent\rule{\textwidth}{2pt} \DNA!''' #--without annotation seq = ''.join(seq) print seq print r'''! \end{minipage} ''' #------------------------------------------------ print print '.' * 100 print #---------------Rep------------------------------ if isRep: for repSeq in repDespList: print repSeq print #-------------Domain desp---------------------- if hasInterpro: print '.' * 100 print for domainDesp in domainDespList: print domainDesp print print r'\clearpage' print #----------------End of else ---one locus------------- #----------------END of for ---all locus------------------ latexTail()