def main(): print >> sys.stderr, "Print the result to current directory" lenS = len(sys.argv) if lenS != 3 and lenS != 2: print >> sys.stderr, 'Using python %s seqfile(for.c) [locus](optional)' % sys.argv[ 0] sys.exit(0) #-------------------------------- seqDict = {} ctIO.readseq(sys.argv[1], seqDict) locusDict = {} if lenS == 2: locusL = seqDict.keys() locusL.sort() elif lenS == 3: locusL = [line.strip() for line in open(sys.argv[2])] for line in locusL: line = line.strip() dot = line.find('.') locus = line[:dot] after = line[dot:] if locus in locusDict: locusDict[locus].append(after) else: locusDict[locus] = [after] #-------------------------------- fhl = open('locusHasIsoforms', 'w') for locus, afterL in locusDict.items(): if len(afterL) < 2: continue filename = locus + '.fa' fh = open(filename, 'w') for after in afterL: newLoc = locus + after print >> fhl, newLoc print >> fh, '>%s' % newLoc print >> fh, seqDict[newLoc] fh.close() cmd = 't_coffee ' + filename os.system(cmd) #--------------------------- fhl.close()
def main(): print >>sys.stderr, "Print the result to current directory" lenS = len(sys.argv) if lenS != 3 and lenS != 2: print >>sys.stderr, 'Using python %s seqfile(for.c) [locus](optional)' % sys.argv[0] sys.exit(0) #-------------------------------- seqDict = {} ctIO.readseq(sys.argv[1], seqDict) locusDict = {} if lenS == 2: locusL = seqDict.keys() locusL.sort() elif lenS == 3: locusL = [line.strip() for line in open(sys.argv[2])] for line in locusL: line = line.strip() dot = line.find('.') locus = line[:dot] after = line[dot:] if locus in locusDict: locusDict[locus].append(after) else: locusDict[locus] = [after] #-------------------------------- fhl = open('locusHasIsoforms', 'w') for locus, afterL in locusDict.items(): if len(afterL) < 2: continue filename = locus+'.fa' fh = open(filename, 'w') for after in afterL: newLoc = locus+after print >>fhl, newLoc print >>fh, '>%s' % newLoc print >>fh, seqDict[newLoc] fh.close() cmd = 't_coffee ' + filename os.system(cmd) #--------------------------- fhl.close()
def main(): print >> sys.stderr, "Paste the mother sequene and related\ repetition together" print >> sys.stderr, "Print the result to screen" if len(sys.argv) < 3: print >> sys.stderr, 'Using python %s forc repResult \ [anno] [locus] ' % sys.argv[0] sys.exit(0) seqDict = {} readseq(sys.argv[1], seqDict) #-------------------------------------------- isAnno = 0 if sys.argv[3]: isAnno = 1 annoDict = {} readAnno(sys.argv[3], annoDict) #-------------------------------------------- isLoc = 0 if sys.argv[4]: isLoc = 1 locL = [locus.strip() for locus in open(sys.argv[4])] #-------------------------------------------- for line in open(sys.argv[2]): if line[0] == '>': output = 1 locus = line[1:].strip() if isLoc and \ (locus not in locL) and \ (locus[:-2] not in locL): output = 0 continue #------------------------------------ print '>', locus if isAnno and locus in annoDict: print annoDict[locus] if locus in seqDict: print seqDict[locus] else: if output: print line,
def main(): print >>sys.stderr, "Print the result to screen" if len(sys.argv) != 3: print >>sys.stderr, 'Using python %s seqforc curatedRep' % sys.argv[0] sys.exit(0) seqDict = {} ctIO.readseq(sys.argv[1], seqDict) repDict = {} for line in open(sys.argv[2]): if line[0] == '>': locus = line[1:] locus = locus.strip() seq = seqDict[locus] shift = 0 repDict[locus] = {} else: starPos = line.find('*') if starPos != -1: rep = line[:starPos] rep = rep.strip() rep = rep.replace(' ','') repstart = seq.find(rep, shift) if repstart == -1: print >>sys.stderr, 'No such repetition %s'\ % rep sys.exit(0) shift = repstart + len(rep) key = '*' * line.count('*') if key not in repDict[locus]: repDict[locus][key] = [] repDict[locus][key].append(rep+':'+str(repstart+1)) #---------id rep------------------- #----------Not locus line------------- #-------------End reading----------------- for locus, sonDict in repDict.items(): if len(sonDict) > 0: print '>%s' % locus for value in sonDict.values(): print '#'.join(value) else: print >>sys.stderr, locus
def main(): print >> sys.stderr, "Print the result to screen" if len(sys.argv) != 3: print >> sys.stderr, 'Using python %s seqforc curatedRep' % sys.argv[0] sys.exit(0) seqDict = {} ctIO.readseq(sys.argv[1], seqDict) repDict = {} for line in open(sys.argv[2]): if line[0] == '>': locus = line[1:] locus = locus.strip() seq = seqDict[locus] shift = 0 repDict[locus] = {} else: starPos = line.find('*') if starPos != -1: rep = line[:starPos] rep = rep.strip() rep = rep.replace(' ', '') repstart = seq.find(rep, shift) if repstart == -1: print >>sys.stderr, 'No such repetition %s'\ % rep sys.exit(0) shift = repstart + len(rep) key = '*' * line.count('*') if key not in repDict[locus]: repDict[locus][key] = [] repDict[locus][key].append(rep + ':' + str(repstart + 1)) #---------id rep------------------- #----------Not locus line------------- #-------------End reading----------------- for locus, sonDict in repDict.items(): if len(sonDict) > 0: print '>%s' % locus for value in sonDict.values(): print '#'.join(value) else: print >> sys.stderr, locus
def main(): print >>sys.stderr, "Paste the mother sequene and related\ repetition together" print >>sys.stderr, "Print the result to screen" if len(sys.argv) < 3: print >>sys.stderr, 'Using python %s forc repResult \ [anno] [locus] ' % sys.argv[0] sys.exit(0) seqDict = {} readseq(sys.argv[1], seqDict) #-------------------------------------------- isAnno = 0 if sys.argv[3]: isAnno = 1 annoDict = {} readAnno(sys.argv[3], annoDict) #-------------------------------------------- isLoc = 0 if sys.argv[4]: isLoc = 1 locL = [locus.strip() for locus in open(sys.argv[4])] #-------------------------------------------- for line in open(sys.argv[2]): if line[0] == '>': output = 1 locus = line[1:].strip() if isLoc and \ (locus not in locL) and \ (locus[:-2] not in locL): output = 0 continue #------------------------------------ print '>', locus if isAnno and locus in annoDict: print annoDict[locus] if locus in seqDict: print seqDict[locus] else: if output: print line,
def main(): print >>sys.stderr, "Paste the mother sequene and related\ repetition together" print >>sys.stderr, "Print the result to screen" if len(sys.argv) < 3: print >>sys.stderr, 'Using python %s forc repResult \ [anno] [locus] ' % sys.argv[0] sys.exit(0) tair = \ "http://www.arabidopsis.org/servlets/TairObject?name=&type=locus" seqDict = {} readseq(sys.argv[1], seqDict) #-------------------------------------------- isAnno = 0 if len(sys.argv) > 3: if sys.argv[3]: isAnno = 1 annoDict = {} readAnnoNew(sys.argv[3], annoDict) #-------------------------------------------- isLoc = 0 if len(sys.argv) > 4: if sys.argv[4]: isLoc = 1 locL = [locus.strip() for locus in open(sys.argv[4])] #-------------------------------------------- repDict = {} ''' repDict = { AT1G62760: [ {(25, 31): 'SSLSPSS', (51,57): 'SSLSPSS'}, {(52, 60): 'SLSPSSPPP',}, ] } ''' readRep(sys.argv[2], repDict) #------------------------------------------- repDictKeyL = repDict.keys() repDictKeyL.sort() for locus in repDictKeyL: if isLoc and (locus not in locL) and \ (locus[:-2] not in locL): continue #------------------------------------ print '>', locus locusSub = "name=" + locus[:-2] print tair.replace("name=", locusSub) if isAnno and locus in annoDict: print annoDict[locus].replace('\\\\', '\n') #if locus in seqDict: # print seqDict[locus] seq = seqDict[locus] #this substitute the last one for we #know it has this key, if not, wrong locusRepL = repDict[locus] repoutput = [] for repitemDIct in locusRepL: grep = '' posKey = repitemDIct.keys() posKey.sort() for pos in posKey: rep = repitemDIct[pos] grep += ':'.join([rep, str(pos[0]), str(pos[1])]) reps = '*'+rep+'*' seq = seq.replace(rep, reps) repoutput.append(grep) print seq print '\n'.join(repoutput)
def main(): (options, args) = cmdpara(sys.argv) if options.sort: print 'sort' sys.exit(1) else: print 'no sort' sys.exit(1) print >> sys.stderr, "*******Print the result to screen.*******" #-------------------macro------------------------------------- isRep = 0 isLoc = 0 #------------------------------------------------------------- if options.seqfile != None: seqdict = {} ctIO.readseq(options.seqfile, seqdict) if options.seqrepfile != None: seqdict = {} repdict = {} ctIO.readseqrep(options.seqrepfile, seqdict, repdict) isRep = 1 if options.repfile != None: repdict = {} ctIO.readRep(options.repfile, repdict) isRep = 1 if options.locusfile != None: locusList = [line.strip() for line in open(options.locusfile)] isLoc = 1 if not isLoc: locusList = repdict.keys() if isRep else seqdict.keys() locusList.sort() if isRep or isLoc: annodict = {} ctIO.readAnno(options.anno, annodict, 1, locusList) interproDict = {} ctIO.readInterpro(options.interpro, interproDict, locusList) #print locusList #print repdict.keys() #print repdict #print isRep #sys.exit(1) #---------------------------------------------------------------- latexHead() latexExplain() #--------------------------------------------------------------- for id in locusList: if id not in seqdict: print >> sys.stderr, "Unknown locus %s" % id else: hasInterpro = 0 seq = list(seqdict[id]) #-------------get newDict-------------------------------- newDict = {} if id in interproDict: hasInterpro = 1 domainDespList = [] domainPosL = interproDict[id].keys() getnewDictDomain(interproDict[id], domainDespList, newDict) #------------------------------------------------ if isRep: repDespList = [] repdictSonL = repdict[id] num = len(repdictSonL) if not hasInterpro: domainPosL = [] getnewDictRep(repdictSonL, domainPosL, repDespList, newDict) getnewDictRepDesp(id, num, repDespList) #----------------get newDict--------------------------- modifySeq(seq, newDict) #--------------------------------------------------------- shortAnno = '' annos = annodict[id].replace('_', r'\_') annos = annos.replace('%', r'\%') annos = annos.replace('~', r'\~') annos = annos.replace('&', r'\&') firstBr = annos.find('[') if firstBr != -1: shortAnno = annos[:firstBr] print ''.join((r'\section{', id, ' ', shortAnno, '}')) annos = r'\tair{' + id[:-2] + '} ' + annos print r'\anno{', annos, '}' print r''' \noindent\begin{minipage}{\textwidth} \noindent\rule{\textwidth}{2pt} \DNA!''' #--without annotation seq = ''.join(seq) print seq print r'''! \end{minipage} ''' #------------------------------------------------ print print '.' * 100 print #---------------Rep------------------------------ if isRep: for repSeq in repDespList: print repSeq print #-------------Domain desp---------------------- if hasInterpro: print '.' * 100 print for domainDesp in domainDespList: print domainDesp print print r'\clearpage' print #----------------End of else ---one locus------------- #----------------END of for ---all locus------------------ latexTail()
def main(): (options, args) = cmdpara(sys.argv) print >> sys.stderr, "*******Print the result to screen.*******" # -------------------macro------------------------------------- isRep = 0 isLoc = 0 # ------------------------------------------------------------- if options.seqfile != None: seqdict = {} ctIO.readseq(options.seqfile, seqdict) if options.seqrepfile != None: seqdict = {} repdict = {} ctIO.readseqrep(options.seqrepfile, seqdict, repdict) isRep = 1 if options.repfile != None: repdict = {} ctIO.readRep(options.repfile, repdict) isRep = 1 if options.locusfile != None: locusList = [line.strip() for line in open(options.locusfile)] isLoc = 1 if not isLoc: locusList = repdict.keys() if isRep else seqdict.keys() if isRep or isLoc: annodict = {} ctIO.readAnno(options.anno, annodict, 1, locusList) interproDict = {} ctIO.readInterpro(options.interpro, interproDict, locusList) # print locusList # print repdict.keys() # print repdict # print isRep # sys.exit(1) # ---------------------------------------------------------------- latexHead() latexExplain() # --------------------------------------------------------------- for id in locusList: if id not in seqdict: print >> sys.stderr, "Unknown locus %s" % id else: hasInterpro = 0 seq = list(seqdict[id]) # -------------get newDict-------------------------------- newDict = {} if id in interproDict: hasInterpro = 1 domainDespList = [] domainPosL = interproDict[id].keys() getnewDictDomain(interproDict[id], domainDespList, newDict) # ------------------------------------------------ if isRep: repDespList = [] repdictSonL = repdict[id] num = len(repdictSonL) if not hasInterpro: domainPosL = [] getnewDictRep(repdictSonL, domainPosL, repDespList, newDict) getnewDictRepDesp(id, num, repDespList) # ----------------get newDict--------------------------- modifySeq(seq, newDict) # --------------------------------------------------------- print "".join((r"\section{", id, "}")) annos = annodict[id].replace("_", r"\_") annos = annos.replace("%", r"\%") annos = annos.replace("~", r"\~") annos = annos.replace("&", r"\&") annos = r"\tair{" + id[:-2] + "} " + annos print r"\anno{", annos, "}" print r""" \noindent\begin{minipage}{\textwidth} \noindent\rule{\textwidth}{2pt} \DNA!""" # --without annotation seq = "".join(seq) print seq print r"""! \end{minipage} """ # ------------------------------------------------ print print "." * 100 print # ---------------Rep------------------------------ if isRep: for repSeq in repDespList: print repSeq print # -------------Domain desp---------------------- if hasInterpro: print "." * 100 print for domainDesp in domainDespList: print domainDesp print print r"\clearpage" print # ----------------End of else ---one locus------------- # ----------------END of for ---all locus------------------ latexTail()
def main(): (options, args) = cmdpara(sys.argv) if options.sort: print 'sort' sys.exit(1) else: print 'no sort' sys.exit(1) print >>sys.stderr, "*******Print the result to screen.*******" #-------------------macro------------------------------------- isRep = 0 isLoc = 0 #------------------------------------------------------------- if options.seqfile != None: seqdict = {} ctIO.readseq(options.seqfile, seqdict) if options.seqrepfile != None: seqdict = {} repdict = {} ctIO.readseqrep(options.seqrepfile, seqdict, repdict) isRep = 1 if options.repfile != None: repdict = {} ctIO.readRep(options.repfile, repdict) isRep = 1 if options.locusfile != None: locusList = [line.strip() for line in open(options.locusfile)] isLoc = 1 if not isLoc: locusList = repdict.keys() if isRep else seqdict.keys() locusList.sort() if isRep or isLoc: annodict = {} ctIO.readAnno(options.anno, annodict, 1, locusList) interproDict = {} ctIO.readInterpro(options.interpro, interproDict, locusList) #print locusList #print repdict.keys() #print repdict #print isRep #sys.exit(1) #---------------------------------------------------------------- latexHead() latexExplain() #--------------------------------------------------------------- for id in locusList: if id not in seqdict: print >>sys.stderr, "Unknown locus %s" % id else: hasInterpro = 0 seq = list(seqdict[id]) #-------------get newDict-------------------------------- newDict = {} if id in interproDict: hasInterpro = 1 domainDespList = [] domainPosL = interproDict[id].keys() getnewDictDomain(interproDict[id], domainDespList, newDict) #------------------------------------------------ if isRep: repDespList = [] repdictSonL = repdict[id] num = len(repdictSonL) if not hasInterpro: domainPosL = [] getnewDictRep(repdictSonL, domainPosL, repDespList, newDict) getnewDictRepDesp(id, num, repDespList) #----------------get newDict--------------------------- modifySeq(seq, newDict) #--------------------------------------------------------- shortAnno = '' annos = annodict[id].replace('_', r'\_') annos = annos.replace('%', r'\%') annos = annos.replace('~', r'\~') annos = annos.replace('&', r'\&') firstBr = annos.find('[') if firstBr != -1: shortAnno = annos[:firstBr] print ''.join((r'\section{', id, ' ', shortAnno, '}' )) annos = r'\tair{' + id[:-2] + '} ' + annos print r'\anno{', annos, '}' print r''' \noindent\begin{minipage}{\textwidth} \noindent\rule{\textwidth}{2pt} \DNA!''' #--without annotation seq = ''.join(seq) print seq print r'''! \end{minipage} ''' #------------------------------------------------ print print '.' * 100 print #---------------Rep------------------------------ if isRep: for repSeq in repDespList: print repSeq print #-------------Domain desp---------------------- if hasInterpro: print '.' * 100 print for domainDesp in domainDespList: print domainDesp print print r'\clearpage' print #----------------End of else ---one locus------------- #----------------END of for ---all locus------------------ latexTail()