コード例 #1
0
def main():
    print >> sys.stderr, "Print the result to current directory"
    lenS = len(sys.argv)
    if lenS != 3 and lenS != 2:
        print >> sys.stderr, 'Using python %s seqfile(for.c) [locus](optional)' % sys.argv[
            0]
        sys.exit(0)
    #--------------------------------
    seqDict = {}
    ctIO.readseq(sys.argv[1], seqDict)
    locusDict = {}
    if lenS == 2:
        locusL = seqDict.keys()
        locusL.sort()
    elif lenS == 3:
        locusL = [line.strip() for line in open(sys.argv[2])]
    for line in locusL:
        line = line.strip()
        dot = line.find('.')
        locus = line[:dot]
        after = line[dot:]
        if locus in locusDict:
            locusDict[locus].append(after)
        else:
            locusDict[locus] = [after]
    #--------------------------------
    fhl = open('locusHasIsoforms', 'w')
    for locus, afterL in locusDict.items():
        if len(afterL) < 2:
            continue
        filename = locus + '.fa'

        fh = open(filename, 'w')
        for after in afterL:
            newLoc = locus + after
            print >> fhl, newLoc
            print >> fh, '>%s' % newLoc
            print >> fh, seqDict[newLoc]
        fh.close()
        cmd = 't_coffee ' + filename
        os.system(cmd)
    #---------------------------
    fhl.close()
コード例 #2
0
ファイル: alignIsoformers.py プロジェクト: Tong-Chen/NGS
def main():
    print >>sys.stderr, "Print the result to current directory"
    lenS = len(sys.argv)
    if lenS != 3 and lenS != 2:
        print >>sys.stderr, 'Using python %s seqfile(for.c) [locus](optional)' % sys.argv[0]
        sys.exit(0)
    #--------------------------------
    seqDict = {}
    ctIO.readseq(sys.argv[1], seqDict)
    locusDict = {}
    if lenS == 2:
        locusL = seqDict.keys()
        locusL.sort()
    elif lenS == 3:
        locusL = [line.strip() for line in open(sys.argv[2])]
    for line in locusL:
        line = line.strip()
        dot = line.find('.')
        locus = line[:dot]
        after = line[dot:]
        if locus in locusDict:
            locusDict[locus].append(after)
        else:
            locusDict[locus] = [after]
    #--------------------------------
    fhl = open('locusHasIsoforms', 'w')
    for locus, afterL in locusDict.items():
        if len(afterL) < 2:
            continue
        filename = locus+'.fa'
        
        fh = open(filename, 'w')
        for after in afterL:
            newLoc = locus+after
            print >>fhl, newLoc
            print >>fh, '>%s' % newLoc
            print >>fh, seqDict[newLoc]
        fh.close()
        cmd = 't_coffee ' + filename
        os.system(cmd)
    #---------------------------
    fhl.close()
コード例 #3
0
def main():
    print >> sys.stderr, "Paste the mother sequene and related\
 repetition together"

    print >> sys.stderr, "Print the result to screen"
    if len(sys.argv) < 3:
        print >> sys.stderr, 'Using python %s forc repResult \
[anno] [locus] ' % sys.argv[0]
        sys.exit(0)

    seqDict = {}
    readseq(sys.argv[1], seqDict)
    #--------------------------------------------
    isAnno = 0
    if sys.argv[3]:
        isAnno = 1
        annoDict = {}
        readAnno(sys.argv[3], annoDict)
    #--------------------------------------------
    isLoc = 0
    if sys.argv[4]:
        isLoc = 1
        locL = [locus.strip() for locus in open(sys.argv[4])]
    #--------------------------------------------
    for line in open(sys.argv[2]):
        if line[0] == '>':
            output = 1
            locus = line[1:].strip()
            if isLoc and \
                (locus not in locL) and \
                (locus[:-2] not in locL):
                output = 0
                continue
            #------------------------------------
            print '>', locus
            if isAnno and locus in annoDict:
                print annoDict[locus]
            if locus in seqDict:
                print seqDict[locus]
        else:
            if output:
                print line,
コード例 #4
0
ファイル: getRepFromCurate.py プロジェクト: Tong-Chen/NGS
def main():
    print >>sys.stderr, "Print the result to screen"
    if len(sys.argv) != 3:
        print >>sys.stderr, 'Using python %s seqforc curatedRep' % sys.argv[0]
        sys.exit(0)
    seqDict = {}
    ctIO.readseq(sys.argv[1], seqDict)
    repDict = {}
    for line in open(sys.argv[2]):
        if line[0] == '>':
            locus = line[1:]
            locus = locus.strip()
            seq = seqDict[locus]
            shift = 0
            repDict[locus] = {}
        else:
            starPos = line.find('*')
            if starPos != -1:
                rep = line[:starPos]
                rep = rep.strip()
                rep = rep.replace(' ','')
                repstart = seq.find(rep, shift)
                if repstart == -1:
                    print >>sys.stderr, 'No such repetition %s'\
                        % rep
                    sys.exit(0)
                shift = repstart + len(rep)
                key = '*' * line.count('*')
                if key not in repDict[locus]:
                    repDict[locus][key] = []
                repDict[locus][key].append(rep+':'+str(repstart+1))
            #---------id rep-------------------
        #----------Not locus line-------------
    #-------------End reading-----------------
    for locus, sonDict in repDict.items():
        if len(sonDict) > 0:
            print '>%s' % locus
            for value in sonDict.values():
                print '#'.join(value)
        else:
            print >>sys.stderr, locus
コード例 #5
0
ファイル: getRepFromCurate.py プロジェクト: yuzhenpeng/NGS
def main():
    print >> sys.stderr, "Print the result to screen"
    if len(sys.argv) != 3:
        print >> sys.stderr, 'Using python %s seqforc curatedRep' % sys.argv[0]
        sys.exit(0)
    seqDict = {}
    ctIO.readseq(sys.argv[1], seqDict)
    repDict = {}
    for line in open(sys.argv[2]):
        if line[0] == '>':
            locus = line[1:]
            locus = locus.strip()
            seq = seqDict[locus]
            shift = 0
            repDict[locus] = {}
        else:
            starPos = line.find('*')
            if starPos != -1:
                rep = line[:starPos]
                rep = rep.strip()
                rep = rep.replace(' ', '')
                repstart = seq.find(rep, shift)
                if repstart == -1:
                    print >>sys.stderr, 'No such repetition %s'\
                        % rep
                    sys.exit(0)
                shift = repstart + len(rep)
                key = '*' * line.count('*')
                if key not in repDict[locus]:
                    repDict[locus][key] = []
                repDict[locus][key].append(rep + ':' + str(repstart + 1))
            #---------id rep-------------------
        #----------Not locus line-------------
    #-------------End reading-----------------
    for locus, sonDict in repDict.items():
        if len(sonDict) > 0:
            print '>%s' % locus
            for value in sonDict.values():
                print '#'.join(value)
        else:
            print >> sys.stderr, locus
コード例 #6
0
ファイル: pasteSeqRepV2.py プロジェクト: Tong-Chen/NGS
def main():
    print >>sys.stderr, "Paste the mother sequene and related\
 repetition together"
    print >>sys.stderr, "Print the result to screen"
    if len(sys.argv) < 3:
        print >>sys.stderr, 'Using python %s forc repResult \
[anno] [locus] ' % sys.argv[0]
        sys.exit(0)
    
    seqDict = {}
    readseq(sys.argv[1], seqDict)
    #--------------------------------------------
    isAnno = 0
    if sys.argv[3]:
        isAnno = 1
        annoDict = {}
        readAnno(sys.argv[3], annoDict)
    #--------------------------------------------
    isLoc = 0
    if sys.argv[4]:
        isLoc = 1
        locL = [locus.strip() for locus in open(sys.argv[4])]
    #--------------------------------------------
    for line in open(sys.argv[2]):
        if line[0] == '>':
            output = 1
            locus = line[1:].strip()
            if isLoc and \
                (locus not in locL) and \
                (locus[:-2] not in locL):
                output = 0
                continue
            #------------------------------------
            print '>', locus
            if isAnno and locus in annoDict:
                print annoDict[locus]
            if locus in seqDict:
                print seqDict[locus]
        else:
            if output:
                print line,
コード例 #7
0
def main():
    print >>sys.stderr, "Paste the mother sequene and related\
 repetition together"
    print >>sys.stderr, "Print the result to screen"
    if len(sys.argv) < 3:
        print >>sys.stderr, 'Using python %s forc repResult \
[anno] [locus] ' % sys.argv[0]
        sys.exit(0)
    tair = \
        "http://www.arabidopsis.org/servlets/TairObject?name=&type=locus"
    seqDict = {}
    readseq(sys.argv[1], seqDict)
    #--------------------------------------------
    isAnno = 0
    if len(sys.argv) > 3:
        if sys.argv[3]:
            isAnno = 1
            annoDict = {}
            readAnnoNew(sys.argv[3], annoDict)
    #--------------------------------------------
    isLoc = 0
    if len(sys.argv) > 4:
        if sys.argv[4]:
            isLoc = 1
            locL = [locus.strip() for locus in open(sys.argv[4])]
    #--------------------------------------------
    repDict = {}
    '''
    repDict = 
    {
    AT1G62760: 
        [
            {(25, 31): 'SSLSPSS', (51,57): 'SSLSPSS'},
            {(52, 60): 'SLSPSSPPP',},    
        ] 
    }
    '''
    readRep(sys.argv[2], repDict)
    #-------------------------------------------
    repDictKeyL = repDict.keys()
    repDictKeyL.sort()
    for locus in repDictKeyL:
        if isLoc and (locus not in locL) and \
            (locus[:-2] not in locL):
            continue
        #------------------------------------
        print '>', locus
        locusSub = "name=" + locus[:-2]
        print tair.replace("name=", locusSub)
        if isAnno and locus in annoDict:
            print annoDict[locus].replace('\\\\', '\n')
        #if locus in seqDict:
        #    print seqDict[locus]
        seq = seqDict[locus] #this substitute the last one for we
        #know it has this key, if not, wrong
        locusRepL = repDict[locus]
        repoutput = []
        for repitemDIct in locusRepL:
            grep = ''
            posKey = repitemDIct.keys()
            posKey.sort()
            for pos in posKey:
                rep = repitemDIct[pos]
                grep += ':'.join([rep, str(pos[0]), str(pos[1])])
                reps = '*'+rep+'*'
                seq = seq.replace(rep, reps)
            repoutput.append(grep)
        print seq
        print '\n'.join(repoutput)
コード例 #8
0
def main():
    (options, args) = cmdpara(sys.argv)
    if options.sort:
        print 'sort'
        sys.exit(1)
    else:
        print 'no sort'
        sys.exit(1)
    print >> sys.stderr, "*******Print the result to screen.*******"

    #-------------------macro-------------------------------------
    isRep = 0
    isLoc = 0
    #-------------------------------------------------------------
    if options.seqfile != None:
        seqdict = {}
        ctIO.readseq(options.seqfile, seqdict)
    if options.seqrepfile != None:
        seqdict = {}
        repdict = {}
        ctIO.readseqrep(options.seqrepfile, seqdict, repdict)
        isRep = 1
    if options.repfile != None:
        repdict = {}
        ctIO.readRep(options.repfile, repdict)
        isRep = 1
    if options.locusfile != None:
        locusList = [line.strip() for line in open(options.locusfile)]
        isLoc = 1
    if not isLoc:
        locusList = repdict.keys() if isRep else seqdict.keys()
        locusList.sort()
    if isRep or isLoc:
        annodict = {}
        ctIO.readAnno(options.anno, annodict, 1, locusList)
        interproDict = {}
        ctIO.readInterpro(options.interpro, interproDict, locusList)

    #print locusList
    #print repdict.keys()
    #print repdict
    #print isRep
    #sys.exit(1)
    #----------------------------------------------------------------
    latexHead()
    latexExplain()
    #---------------------------------------------------------------
    for id in locusList:
        if id not in seqdict:
            print >> sys.stderr, "Unknown locus %s" % id
        else:
            hasInterpro = 0
            seq = list(seqdict[id])
            #-------------get newDict--------------------------------
            newDict = {}
            if id in interproDict:
                hasInterpro = 1
                domainDespList = []
                domainPosL = interproDict[id].keys()
                getnewDictDomain(interproDict[id], domainDespList, newDict)
            #------------------------------------------------
            if isRep:
                repDespList = []
                repdictSonL = repdict[id]
                num = len(repdictSonL)
                if not hasInterpro:
                    domainPosL = []
                getnewDictRep(repdictSonL, domainPosL, repDespList, newDict)
                getnewDictRepDesp(id, num, repDespList)
            #----------------get newDict---------------------------
            modifySeq(seq, newDict)
            #---------------------------------------------------------
            shortAnno = ''
            annos = annodict[id].replace('_', r'\_')
            annos = annos.replace('%', r'\%')
            annos = annos.replace('~', r'\~')
            annos = annos.replace('&', r'\&')
            firstBr = annos.find('[')
            if firstBr != -1:
                shortAnno = annos[:firstBr]
            print ''.join((r'\section{', id, ' ', shortAnno, '}'))
            annos = r'\tair{' + id[:-2] + '} ' + annos
            print r'\anno{', annos, '}'

            print r'''
\noindent\begin{minipage}{\textwidth}
\noindent\rule{\textwidth}{2pt}
\DNA!'''
            #--without annotation
            seq = ''.join(seq)
            print seq

            print r'''!
\end{minipage}            
'''
            #------------------------------------------------
            print
            print '.' * 100
            print
            #---------------Rep------------------------------
            if isRep:
                for repSeq in repDespList:
                    print repSeq
                    print
            #-------------Domain desp----------------------
            if hasInterpro:
                print '.' * 100
                print
                for domainDesp in domainDespList:
                    print domainDesp
                    print
            print r'\clearpage'
            print
        #----------------End of else ---one locus-------------
    #----------------END of for ---all locus------------------
    latexTail()
コード例 #9
0
ファイル: labelDomainOrRepV1.py プロジェクト: Tong-Chen/NGS
def main():
    (options, args) = cmdpara(sys.argv)
    print >> sys.stderr, "*******Print the result to screen.*******"

    # -------------------macro-------------------------------------
    isRep = 0
    isLoc = 0
    # -------------------------------------------------------------
    if options.seqfile != None:
        seqdict = {}
        ctIO.readseq(options.seqfile, seqdict)
    if options.seqrepfile != None:
        seqdict = {}
        repdict = {}
        ctIO.readseqrep(options.seqrepfile, seqdict, repdict)
        isRep = 1
    if options.repfile != None:
        repdict = {}
        ctIO.readRep(options.repfile, repdict)
        isRep = 1
    if options.locusfile != None:
        locusList = [line.strip() for line in open(options.locusfile)]
        isLoc = 1
    if not isLoc:
        locusList = repdict.keys() if isRep else seqdict.keys()
    if isRep or isLoc:
        annodict = {}
        ctIO.readAnno(options.anno, annodict, 1, locusList)
        interproDict = {}
        ctIO.readInterpro(options.interpro, interproDict, locusList)

    # print locusList
    # print repdict.keys()
    # print repdict
    # print isRep
    # sys.exit(1)
    # ----------------------------------------------------------------
    latexHead()
    latexExplain()
    # ---------------------------------------------------------------
    for id in locusList:
        if id not in seqdict:
            print >> sys.stderr, "Unknown locus %s" % id
        else:
            hasInterpro = 0
            seq = list(seqdict[id])
            # -------------get newDict--------------------------------
            newDict = {}
            if id in interproDict:
                hasInterpro = 1
                domainDespList = []
                domainPosL = interproDict[id].keys()
                getnewDictDomain(interproDict[id], domainDespList, newDict)
            # ------------------------------------------------
            if isRep:
                repDespList = []
                repdictSonL = repdict[id]
                num = len(repdictSonL)
                if not hasInterpro:
                    domainPosL = []
                getnewDictRep(repdictSonL, domainPosL, repDespList, newDict)
                getnewDictRepDesp(id, num, repDespList)
            # ----------------get newDict---------------------------
            modifySeq(seq, newDict)
            # ---------------------------------------------------------
            print "".join((r"\section{", id, "}"))
            annos = annodict[id].replace("_", r"\_")
            annos = annos.replace("%", r"\%")
            annos = annos.replace("~", r"\~")
            annos = annos.replace("&", r"\&")
            annos = r"\tair{" + id[:-2] + "} " + annos
            print r"\anno{", annos, "}"

            print r"""
\noindent\begin{minipage}{\textwidth}
\noindent\rule{\textwidth}{2pt}
\DNA!"""
            # --without annotation
            seq = "".join(seq)
            print seq

            print r"""!
\end{minipage}            
"""
            # ------------------------------------------------
            print
            print "." * 100
            print
            # ---------------Rep------------------------------
            if isRep:
                for repSeq in repDespList:
                    print repSeq
                    print
            # -------------Domain desp----------------------
            if hasInterpro:
                print "." * 100
                print
                for domainDesp in domainDespList:
                    print domainDesp
                    print
            print r"\clearpage"
            print
        # ----------------End of else ---one locus-------------
    # ----------------END of for ---all locus------------------
    latexTail()
コード例 #10
0
ファイル: labelDomainOrRepV2.py プロジェクト: Tong-Chen/NGS
def main():
    (options, args) = cmdpara(sys.argv)
    if options.sort:
        print 'sort'
        sys.exit(1)
    else:
        print 'no sort'
        sys.exit(1)
    print >>sys.stderr, "*******Print the result to screen.*******"

    #-------------------macro-------------------------------------
    isRep = 0
    isLoc = 0
    #-------------------------------------------------------------
    if options.seqfile != None:
        seqdict = {}
        ctIO.readseq(options.seqfile, seqdict)
    if options.seqrepfile != None:
        seqdict = {}
        repdict = {}
        ctIO.readseqrep(options.seqrepfile, seqdict, repdict)
        isRep = 1
    if options.repfile != None:
        repdict = {}
        ctIO.readRep(options.repfile, repdict)
        isRep = 1
    if options.locusfile != None:
        locusList = [line.strip() for line in open(options.locusfile)]
        isLoc = 1
    if not isLoc:
        locusList = repdict.keys() if isRep else seqdict.keys()
        locusList.sort()
    if isRep or isLoc:
        annodict = {}
        ctIO.readAnno(options.anno, annodict, 1, locusList)
        interproDict = {}
        ctIO.readInterpro(options.interpro, interproDict, locusList)

    #print locusList
    #print repdict.keys()
    #print repdict
    #print isRep
    #sys.exit(1)
    #----------------------------------------------------------------
    latexHead()
    latexExplain()
    #---------------------------------------------------------------
    for id in locusList:
        if id not in seqdict:
            print >>sys.stderr, "Unknown locus %s" % id
        else:
            hasInterpro = 0
            seq = list(seqdict[id])
            #-------------get newDict--------------------------------
            newDict = {}
            if id in interproDict:
                hasInterpro = 1
                domainDespList = []
                domainPosL = interproDict[id].keys()
                getnewDictDomain(interproDict[id], domainDespList,
                    newDict)
            #------------------------------------------------
            if isRep:
                repDespList = []
                repdictSonL = repdict[id]
                num = len(repdictSonL)
                if not hasInterpro:
                    domainPosL = []
                getnewDictRep(repdictSonL, domainPosL, repDespList, newDict)
                getnewDictRepDesp(id, num, repDespList)
            #----------------get newDict---------------------------           
            modifySeq(seq, newDict)
            #---------------------------------------------------------
            shortAnno = ''
            annos = annodict[id].replace('_', r'\_')
            annos = annos.replace('%', r'\%')
            annos = annos.replace('~', r'\~')
            annos = annos.replace('&', r'\&')
            firstBr = annos.find('[')
            if firstBr != -1:
                shortAnno = annos[:firstBr]
            print ''.join((r'\section{', id, ' ', shortAnno, '}' ))
            annos = r'\tair{' + id[:-2] + '} ' + annos
            print r'\anno{', annos, '}'

            print r'''
\noindent\begin{minipage}{\textwidth}
\noindent\rule{\textwidth}{2pt}
\DNA!'''
            #--without annotation
            seq = ''.join(seq)
            print seq

            print r'''!
\end{minipage}            
'''
            #------------------------------------------------
            print
            print '.' * 100
            print
            #---------------Rep------------------------------
            if isRep:
                for repSeq in repDespList:
                    print repSeq
                    print
            #-------------Domain desp----------------------
            if hasInterpro:
                print '.' * 100
                print
                for domainDesp in domainDespList:
                    print domainDesp
                    print
            print r'\clearpage'
            print
        #----------------End of else ---one locus-------------
    #----------------END of for ---all locus------------------
    latexTail()