Example #1
0
def main():
    print >>sys.stderr, "Transfer repetitions to multiple sequence \
alignment files. One geoup in one file. Waiting for alignment."
    if len(sys.argv) != 2:
        print >>sys.stderr,'Using python %s repfile' % sys.argv[0]
        sys.exit(0)
    #-------------------------------------
    repDict = {}
    readRep(sys.argv[1], repDict)
    for locus, valueL in repDict.items():
        midlen = 30
        group = 0
        for groupD in valueL:
            group += 1
            groupDKeyL = groupD.keys()
            groupDKeyL.sort()
            maxlen = 0
            for seq in groupD.values():
                lenseq = len(seq)
                if lenseq > maxlen:
                    maxlen = lenseq
            if maxlen <= midlen:
                file = locus+'.'+str(group)+'.short'
            else:
                file = locus+'.'+str(group)+'.long'
            fh = open(file, 'w')
            for pos in groupDKeyL:
                seq = groupD[pos]
            #-------------------------------------------
                posn = ':'.join((str(pos[0]), str(pos[1])))
                print >>fh, '>%s.%s.%s\n%s' % \
                    (locus, str(group), posn, seq)
            #--------END one group------------------------------
            fh.close()
Example #2
0
def main():
    print >> sys.stderr, "Print the result to screen"
    if len(sys.argv) != 3:
        print >>sys.stderr, 'Using python %s cdsfile repfile' \
            % sys.argv[0]
        sys.exit(0)
    #---------------------------------------------------------
    repDict = {}
    ctIO.readRep(sys.argv[2], repDict)
    locusL = repDict.keys()
    locusL.sort()
    cdsDict = ctIO.readFasta(sys.argv[1], locusL)

    for locus in locusL:
        print '>%s' % locus
        seq = cdsDict[locus]
        tmpList = repDict[locus]
        for posDict in tmpList:
            posKeys = posDict.keys()
            posKeys.sort()
            repList = []
            for posTuple in posKeys:
                start = (posTuple[0] - 1) * 3
                end = posTuple[1] * 3
                if start >= end:
                    print >> sys.stderr, locus, posTuple
                    sys.exit(1)
                #--------patch a bug---2011-08-25
                #repList.append(seq[start:end]+':'+str(start+3))
                repList.append(seq[start:end] + ':' + str(start + 1))
            #--------------------------------------------------
            print '#'.join(repList)
Example #3
0
def main():
    print >>sys.stderr, "Print the result to three files"
    if len(sys.argv) != 3:
        print >>sys.stderr, 'Using python %s seq rep' % sys.argv[0]
        sys.exit(0)
    #-------------------------------------------
    codonList = codonSet()
    cdsDict = readFasta(sys.argv[1])
    #ct_rdict(cdsDict)
    repDict = {}
    readRep(sys.argv[2], repDict)
    #ct_rdict(repDict)
    codonRepDict, codonSeqDict = originalSta(repDict, cdsDict)
    #ct_rdict(codonRepDict)
    #print '*********************'
    #ct_rdict(codonSeqDict)
    #-------compare within protein with repeats----
    #--get codons within repeat and divide codons within other
    #seuquences, and bar graph the number of them, heatmap the 
    #ratio of each codons of one protein.
    codonNumSeq = \
        totalNumberProrep(codonRepDict, codonSeqDict, sys.argv[2])
    singlRatioProRep(codonRepDict, codonSeqDict, sys.argv[2], codonList)
    #--compare proteins have no repeat and proteins have repeats but
    #delete repeats  
    repOrNot(codonNumSeq, codonRepDict, codonSeqDict, sys.argv[2],
            codonList)
Example #4
0
def main():
    print >> sys.stderr, "position distribution, divide protein\
into three equal length segments, N-, mid-, C- terminal. Detect by\
midpoint of repetitions. If located in boundary, random choose"

    print >> sys.stderr, "Print the result to screen"
    if len(sys.argv) != 3:
        print >> sys.stderr, 'Using python %s seq.for.c rep' % sys.argv[0]
        sys.exit(0)
    #-------------------------------------------------------------
    repDict = {}
    seqDict = {}
    readSeq(sys.argv[1], seqDict)
    readRep(sys.argv[2], repDict)

    for key, valueL in repDict.items():
        length = len(seqDict[key])
        first = length / 3
        second = first * 2
        #print key, first, second, length
        for valueD in valueL:
            for keys in valueD.keys():
                #print keys,
                mid = sum(keys) / 2
                if mid < first:
                    print -1
                elif mid == first:
                    print -1 if randint(0, 1) else 0
                elif mid < second:
                    print 0
                elif mid == second:
                    print 0 if randint(0, 1) else 1
                else:
                    print 1
Example #5
0
def main():
    print >> sys.stderr, "Print the result to three files"
    if len(sys.argv) != 3:
        print >> sys.stderr, 'Using python %s seq rep' % sys.argv[0]
        sys.exit(0)
    #-------------------------------------------
    codonList = codonSet()
    cdsDict = readFasta(sys.argv[1])
    #ct_rdict(cdsDict)
    repDict = {}
    readRep(sys.argv[2], repDict)
    #ct_rdict(repDict)
    codonRepDict, codonSeqDict = originalSta(repDict, cdsDict)
    #ct_rdict(codonRepDict)
    #print '*********************'
    #ct_rdict(codonSeqDict)
    #-------compare within protein with repeats----
    #--get codons within repeat and divide codons within other
    #seuquences, and bar graph the number of them, heatmap the
    #ratio of each codons of one protein.
    codonNumSeq = \
        totalNumberProrep(codonRepDict, codonSeqDict, sys.argv[2])
    singlRatioProRep(codonRepDict, codonSeqDict, sys.argv[2], codonList)
    #--compare proteins have no repeat and proteins have repeats but
    #delete repeats
    repOrNot(codonNumSeq, codonRepDict, codonSeqDict, sys.argv[2], codonList)
Example #6
0
def main():
    print >> sys.stderr, "Print the result to screen"
    if len(sys.argv) != 2:
        print >> sys.stderr, 'Using python %s repfile' % sys.argv[0]
        sys.exit(0)
    #------------------------------------------------------
    repdict = {}
    readRep(sys.argv[1], repdict)
    for id, valueL in repdict.items():
        i = 0
        for seqDict in valueL:
            filename = id + str(i) + '.fasta'
            fh = open(filename, 'w')
            seqDictK = seqDict.keys()
            seqDictK.sort()
            output = ''
            for key in seqDictK:
                output += ''.join(('>pos', str(key[0]), '-', str(key[1])\
                    , '\n', seqDict[key], '\n'))
            #--------------------------------------------
            print >> fh, output,
            fh.close()
            cmd = 't_coffee ' + filename
            i += 1
            os.system(cmd)
Example #7
0
def main():
    print >>sys.stderr, "Print the result to screen"
    if len(sys.argv) != 2:
        print >>sys.stderr, 'Using python %s repfile' % sys.argv[0]
        sys.exit(0)
    #------------------------------------------------------
    repdict = {}
    readRep(sys.argv[1], repdict)
    for id, valueL in repdict.items():
        i = 0
        for seqDict in valueL:
            filename = id + str(i) + '.fasta'
            fh = open(filename, 'w')
            seqDictK = seqDict.keys()
            seqDictK.sort()
            output = ''
            for key in seqDictK:
                output += ''.join(('>pos', str(key[0]), '-', str(key[1])\
                    , '\n', seqDict[key], '\n'))
            #--------------------------------------------
            print >>fh, output,
            fh.close()
            cmd = 't_coffee ' + filename
            i += 1
            os.system(cmd)
Example #8
0
def main():
    print >>sys.stderr, "position distribution, divide protein\
into three equal length segments, N-, mid-, C- terminal. Detect by\
midpoint of repetitions. If located in boundary, random choose"
    print >>sys.stderr, "Print the result to screen"
    if len(sys.argv) != 3:
        print >>sys.stderr, 'Using python %s seq.for.c rep' % sys.argv[0]
        sys.exit(0)
    #-------------------------------------------------------------
    repDict = {}
    seqDict = {}
    readSeq(sys.argv[1], seqDict)
    readRep(sys.argv[2], repDict)

    for key, valueL in repDict.items():
        length = len(seqDict[key])
        first = length / 3
        second = first * 2
        #print key, first, second, length 
        for valueD in valueL:
            for keys in valueD.keys():
                #print keys,
                mid = sum(keys) / 2
                if mid < first:
                    print -1
                elif mid == first:
                    print -1 if randint(0,1) else 0
                elif mid < second:
                    print 0
                elif mid == second:
                    print 0 if randint(0,1) else 1
                else:
                    print 1
Example #9
0
def main():
    print >>sys.stderr, "Using the average shannonIndex value \
of a group sequences to represent the last entropy."
    print >>sys.stderr, "Print the result to screen"
    if len(sys.argv) != 3:
        print >>sys.stderr, 'Using python %s filename\
 threshold(2)[threethe more the high complexity]' % sys.argv[0]
        sys.exit(0)
    #-----------------------------------
    #this three dict have the same structure
    repDict = {}
    lcsDict = {} #save low complexity sequences
    regularDict = {} #save regular sequences
    readRep(sys.argv[1], repDict)
    lcs = int(sys.argv[2])
    for locus, valueL in repDict.items():
        for itemD in valueL:
            entropy = 0
            i_valueS = set(itemD.values())
            #i_keys = itemD.keys()
            for item in i_valueS:
                entropy += si(item)
            entropy = entropy / len(i_valueS)
            if entropy <= lcs:
                saveDict(lcsDict, locus, itemD)
            else:
                saveDict(regularDict, locus, itemD)
        #--------End one dict---------------
    #-------------end all-----------------
    prefile = sys.argv[1].split('/')[-1]
    outputRep(lcsDict, prefile+'.LCSs')
    outputRep(regularDict, prefile+'.HCSs')
Example #10
0
def main():
    print >> sys.stderr, "Transfer repetitions to multiple sequence \
alignment files. One geoup in one file. Waiting for alignment."

    if len(sys.argv) != 2:
        print >> sys.stderr, 'Using python %s repfile' % sys.argv[0]
        sys.exit(0)
    #-------------------------------------
    repDict = {}
    readRep(sys.argv[1], repDict)
    for locus, valueL in repDict.items():
        midlen = 30
        group = 0
        for groupD in valueL:
            group += 1
            groupDKeyL = groupD.keys()
            groupDKeyL.sort()
            maxlen = 0
            for seq in groupD.values():
                lenseq = len(seq)
                if lenseq > maxlen:
                    maxlen = lenseq
            if maxlen <= midlen:
                file = locus + '.' + str(group) + '.short'
            else:
                file = locus + '.' + str(group) + '.long'
            fh = open(file, 'w')
            for pos in groupDKeyL:
                seq = groupD[pos]
                #-------------------------------------------
                posn = ':'.join((str(pos[0]), str(pos[1])))
                print >>fh, '>%s.%s.%s\n%s' % \
                    (locus, str(group), posn, seq)
            #--------END one group------------------------------
            fh.close()
Example #11
0
def main():
    print >>sys.stderr, "Print the result to screen"
    if len(sys.argv) != 3:
        print >>sys.stderr, 'Using python %s cdsfile repfile' \
            % sys.argv[0]
        sys.exit(0)
    #---------------------------------------------------------
    repDict = {}
    ctIO.readRep(sys.argv[2], repDict)
    locusL = repDict.keys()
    locusL.sort()
    cdsDict = ctIO.readFasta(sys.argv[1], locusL)

    for locus in locusL:
        print '>%s' % locus
        seq = cdsDict[locus]
        tmpList = repDict[locus]
        for posDict in tmpList:
            posKeys = posDict.keys()
            posKeys.sort()
            repList = []
            for posTuple in posKeys:
                start = (posTuple[0] - 1) * 3
                end = posTuple[1] * 3
                if start >= end:
                    print >>sys.stderr, locus, posTuple
                    sys.exit(1)
                #--------patch a bug---2011-08-25
                #repList.append(seq[start:end]+':'+str(start+3))
                repList.append(seq[start:end]+':'+str(start+1))
            #--------------------------------------------------
            print '#'.join(repList)
Example #12
0
def main():
    print >>sys.stderr, "Print the result to screen"
    if len(sys.argv) != 2:
        print >>sys.stderr, 'Using python %s filename' % sys.argv[0]
        sys.exit(0)
    #------------------------------
    repDict = {}
    readRep(sys.argv[1], repDict)
    outputRep(repDict, sys.argv[1]+'ctIO.test')
Example #13
0
def main():
    print >>sys.stderr, 'used to transfer merged repetition file \
to fasta, the output is STDOUT.'
    if len(sys.argv) != 2:
        print 'Using python %s filename' % sys.argv[0]
        sys.exit(0)
    repDict = {}
    readRep(sys.argv[1], repDict)
    transfer(repDict)
Example #14
0
def main():
    print >> sys.stderr, "Print the result to screen"
    if len(sys.argv) != 2:
        print >> sys.stderr, 'Using python %s filename' % sys.argv[0]
        sys.exit(0)
    #------------------------------
    repDict = {}
    readRep(sys.argv[1], repDict)
    outputRep(repDict, sys.argv[1] + 'ctIO.test')
Example #15
0
def main():
    print >> sys.stderr, 'used to transfer merged repetition file \
to fasta, the output is STDOUT.'

    if len(sys.argv) != 2:
        print 'Using python %s filename' % sys.argv[0]
        sys.exit(0)
    repDict = {}
    readRep(sys.argv[1], repDict)
    transfer(repDict)
Example #16
0
def main():
    print >>sys.stderr, "Split a multiple sequence fasta file to\
multiple files aligned together using t_coffee"
    print >>sys.stderr, 'used to transfer merged repetition file \
to fasta, the output is STDOUT.'
    if len(sys.argv) != 2:
        print 'Using python %s filename' % sys.argv[0]
        sys.exit(0)
    repDict = {}
    readRep(sys.argv[1], repDict)
    transfer(repDict)
Example #17
0
def main():
    print >>sys.stderr, "Using the average shannonIndex value \
of a group sequences to represent the last entropy."
    print >>sys.stderr, "Print the result to screen"
    if len(sys.argv) != 2:
        print >>sys.stderr, 'Using python %s filename' % sys.argv[0]
        sys.exit(0)
    #-----------------------------------
    repDict = {}
    readRep(sys.argv[1], repDict)
    
    for valueL in repDict.values():
        for itemD in valueL:
            entropy = 0
            i_valueS = set(itemD.values())
            for item in i_valueS:
                entropy += si(item)
            entropy = entropy / len(i_valueS)
            print "%.2f" % entropy
Example #18
0
def main():
    print >> sys.stderr, "Using the average shannonIndex value \
of a group sequences to represent the last entropy."

    print >> sys.stderr, "Print the result to screen"
    if len(sys.argv) != 2:
        print >> sys.stderr, 'Using python %s filename' % sys.argv[0]
        sys.exit(0)
    #-----------------------------------
    repDict = {}
    readRep(sys.argv[1], repDict)

    for valueL in repDict.values():
        for itemD in valueL:
            entropy = 0
            i_valueS = set(itemD.values())
            for item in i_valueS:
                entropy += si(item)
            entropy = entropy / len(i_valueS)
            print "%.2f" % entropy
Example #19
0
def main():
    print >> sys.stderr, "To detect the conservation among orthologs,\
use repetitions as the query and its related orthologs as db(after \
makeblastdb). "

    if len(sys.argv) != 3:
        print >> sys.stderr, 'Using python %s repfile dbpath/' % sys.argv[0]
        sys.exit(0)
    #-------------------------------------
    file = sys.argv[1]
    if file.find('LCSs') != -1:
        label = '.LCSs'
    elif file.find('HCSs') != -1:
        label = '.HCSs'
    #patched at 20110922. Before not give the inital value to [label].
    #So it will give an error when dealing with non 'LCSs' and 'HCSs'
    #files.
    else:
        label = ''
    noOrtho = 0
    path = sys.argv[2]
    repDict = {}
    readRep(sys.argv[1], repDict)
    for locus, valueL in repDict.items():
        tmppath = path + locus
        #print tmppath
        if not os.path.exists(tmppath):
            noOrtho += 1
            continue
        #-------------------------------
        midlen = 30
        short = locus + label + '.short'
        long = locus + label + '.long'
        fhshort = open(short, 'w')
        fhlong = open(long, 'w')
        group = 0
        for groupD in valueL:
            group += 1
            tmpDict = {}
            groupDKeyL = groupD.keys()
            groupDKeyL.sort()
            for pos in groupDKeyL:
                seq = groupD[pos]
                if seq not in tmpDict:
                    tmpDict[seq] = [str(pos[0])]
                else:
                    tmpDict[seq].append(str(pos[0]))
            #-------------------------------------------
            tmpDictKeyL = tmpDict.keys()
            tmpDictKeyL.sort()
            for seq in tmpDictKeyL:
                lenseq = len(seq)
                pos = ':'.join(tmpDict[seq])
                if lenseq <= midlen:
                    print >>fhshort, '>%s.%s.%s\n%s' % \
                        (locus, str(group), pos, seq)
                else:
                    print >>fhlong, '>%s.%s.%s\n%s' % \
                        (locus, str(group), pos, seq)

            #--------END one group------------------------------
        fhshort.close()
        fhlong.close()
        cmdshort = ' '.join(('psiblast -query', short, '-db', tmppath, \
            '-out', short+'.out', '-num_iterations 5','-evalue 20000',\
            '-matrix PAM30', '-comp_based_stats 0', '-word_size 2'))
        cmdlong = ' '.join(('psiblast -query', long, '-db', tmppath, \
            '-out', long+'.out', '-num_iterations 5'))
        os.system(cmdshort)
        os.system(cmdlong)
        cmdshort = ' '.join(('psiblast -query', short, '-db', tmppath, \
            '-out', short+'.table', '-num_iterations 5','-evalue 20000',\
            '-matrix PAM30', '-comp_based_stats 0', '-word_size 2',
            '-outfmt 7'))
        cmdlong = ' '.join(('psiblast -query', long, '-db', tmppath, \
            '-out', long+'.table', '-num_iterations 5', '-outfmt 7'))
        #print cmd
        #break
        os.system(cmdshort)
        os.system(cmdlong)
        #------------END one locus
    print noOrtho
Example #20
0
def main():
    print >> sys.stderr, "Print the result to screen"
    if len(sys.argv) != 3:
        print >>sys.stderr, 'Using python %s repname interpro'\
            % sys.argv[0]
        sys.exit(0)
    #---------------------------------------------
    aDict = readInterpro(sys.argv[2])
    repDict = {}
    '''
    repDict = 
    {
    AT1G62760: 
        [
            {(25, 31): 'SSLSPSS', (51,57): 'SSLSPSS'},
            {(52, 60): 'SLSPSSPPP',},    
        ] 
    }
    '''
    readRep(sys.argv[1], repDict)
    if 0:
        print aDict
        print repDict
    '''
    Some symbol:
    !: new
    ------      domain
           ---  rep
    @: overlap
    ------    domain
      ------- rep
    ^: rep in domain
    -----------    domain
      -------      rep
    $: domain in rep
       -------     domain
    -------------  rep
    '''

    for locus, valueL in repDict.items():
        if locus in aDict:
            hasDomain = 1
            domainPosL = aDict[locus]
        else:
            hasDomain = 0
        for dictrep in valueL:
            for posset in dictrep.keys():
                begin = posset[0]
                end = posset[1]
                if 0:
                    print "begin is %d, end is %d" % (begin, end)
                if hasDomain:
                    nooverlap = 1
                    for domainset in domainPosL:
                        ds = domainset[0]
                        de = domainset[1]
                        if 0:
                            print "ds is %d, de is %d" % (ds, de)
                        if (begin > ds and begin < de and end > de)\
                           or (begin < ds and end > ds and end < de):
                            dictrep[posset] += ':' + str(begin) + '@'
                            nooverlap = 0
                            if 0: print '@'
                            break
                        elif (begin > ds and end <= de) or\
                                (begin == ds and end < de):
                            dictrep[posset] += ':' + str(begin) + '^'
                            nooverlap = 0
                            if 0: print '^'
                            break
                        elif (begin <= ds and end >= de):
                            dictrep[posset] += ':' + str(begin) + '$'
                            nooverlap = 0
                            if 0: print '$'
                            break
                    #--------end tracing each position------------------
                    if nooverlap:
                        dictrep[posset] += ':' + str(begin) + '!'
                        if 0: print '!'
                #----------if no domain-------------
                else:
                    dictrep[posset] += ':' + str(begin) + '!'
                    if 0: print '!!'
                #--------------------------------
            #-----end of trace one group domain------------------
        #---------end of trace onelocusus-------------------------------
    #-------------end of all-------
    output(repDict)
Example #21
0
def main():
    print >>sys.stderr, "Print the result to screen"
    if len(sys.argv) != 3:
        print >>sys.stderr, 'Using python %s repname interpro'\
            % sys.argv[0]
        sys.exit(0)
    #---------------------------------------------
    aDict = readInterpro(sys.argv[2])
    repDict = {}
    '''
    repDict = 
    {
    AT1G62760: 
        [
            {(25, 31): 'SSLSPSS', (51,57): 'SSLSPSS'},
            {(52, 60): 'SLSPSSPPP',},    
        ] 
    }
    '''
    readRep(sys.argv[1], repDict)
    if 0:
        print aDict
        print repDict
    '''
    Some symbol:
    !: new
    ------      domain
           ---  rep
    @: overlap
    ------    domain
      ------- rep
    ^: rep in domain
    -----------    domain
      -------      rep
    $: domain in rep
       -------     domain
    -------------  rep
    '''


    for locus, valueL in repDict.items():
        if locus in aDict:
            hasDomain = 1
            domainPosL = aDict[locus]
        else:
            hasDomain = 0
        for dictrep in valueL:
            for posset in dictrep.keys():
                begin = posset[0]
                end = posset[1]
                if 0:
                    print "begin is %d, end is %d" % (begin, end)
                if hasDomain:
                    nooverlap = 1
                    for domainset in domainPosL:
                        ds = domainset[0]
                        de = domainset[1]
                        if 0:
                            print "ds is %d, de is %d" % (ds, de)
                        if (begin > ds and begin < de and end > de)\
                           or (begin < ds and end > ds and end < de):
                            dictrep[posset] += ':'+str(begin)+'@'
                            nooverlap = 0
                            if 0: print '@'
                            break
                        elif (begin > ds and end <= de) or\
                                (begin == ds and end < de):
                            dictrep[posset] += ':'+str(begin)+'^'
                            nooverlap = 0
                            if 0: print '^'
                            break
                        elif (begin <= ds and end >= de):
                            dictrep[posset] += ':'+str(begin)+'$'
                            nooverlap = 0
                            if 0: print '$'
                            break
                    #--------end tracing each position------------------
                    if nooverlap:
                        dictrep[posset] += ':'+str(begin)+'!'
                        if 0 : print '!'
                #----------if no domain-------------
                else:
                    dictrep[posset] += ':'+str(begin)+'!'
                    if 0 : print '!!'
                #--------------------------------
            #-----end of trace one group domain------------------
        #---------end of trace onelocusus-------------------------------
    #-------------end of all-------
    output(repDict)
Example #22
0
def main():
    (options, args) = cmdpara(sys.argv)
    print >> sys.stderr, "*******Print the result to screen.*******"

    # -------------------macro-------------------------------------
    isRep = 0
    isLoc = 0
    # -------------------------------------------------------------
    if options.seqfile != None:
        seqdict = {}
        ctIO.readseq(options.seqfile, seqdict)
    if options.seqrepfile != None:
        seqdict = {}
        repdict = {}
        ctIO.readseqrep(options.seqrepfile, seqdict, repdict)
        isRep = 1
    if options.repfile != None:
        repdict = {}
        ctIO.readRep(options.repfile, repdict)
        isRep = 1
    if options.locusfile != None:
        locusList = [line.strip() for line in open(options.locusfile)]
        isLoc = 1
    if not isLoc:
        locusList = repdict.keys() if isRep else seqdict.keys()
    if isRep or isLoc:
        annodict = {}
        ctIO.readAnno(options.anno, annodict, 1, locusList)
        interproDict = {}
        ctIO.readInterpro(options.interpro, interproDict, locusList)

    # print locusList
    # print repdict.keys()
    # print repdict
    # print isRep
    # sys.exit(1)
    # ----------------------------------------------------------------
    latexHead()
    latexExplain()
    # ---------------------------------------------------------------
    for id in locusList:
        if id not in seqdict:
            print >> sys.stderr, "Unknown locus %s" % id
        else:
            hasInterpro = 0
            seq = list(seqdict[id])
            # -------------get newDict--------------------------------
            newDict = {}
            if id in interproDict:
                hasInterpro = 1
                domainDespList = []
                domainPosL = interproDict[id].keys()
                getnewDictDomain(interproDict[id], domainDespList, newDict)
            # ------------------------------------------------
            if isRep:
                repDespList = []
                repdictSonL = repdict[id]
                num = len(repdictSonL)
                if not hasInterpro:
                    domainPosL = []
                getnewDictRep(repdictSonL, domainPosL, repDespList, newDict)
                getnewDictRepDesp(id, num, repDespList)
            # ----------------get newDict---------------------------
            modifySeq(seq, newDict)
            # ---------------------------------------------------------
            print "".join((r"\section{", id, "}"))
            annos = annodict[id].replace("_", r"\_")
            annos = annos.replace("%", r"\%")
            annos = annos.replace("~", r"\~")
            annos = annos.replace("&", r"\&")
            annos = r"\tair{" + id[:-2] + "} " + annos
            print r"\anno{", annos, "}"

            print r"""
\noindent\begin{minipage}{\textwidth}
\noindent\rule{\textwidth}{2pt}
\DNA!"""
            # --without annotation
            seq = "".join(seq)
            print seq

            print r"""!
\end{minipage}            
"""
            # ------------------------------------------------
            print
            print "." * 100
            print
            # ---------------Rep------------------------------
            if isRep:
                for repSeq in repDespList:
                    print repSeq
                    print
            # -------------Domain desp----------------------
            if hasInterpro:
                print "." * 100
                print
                for domainDesp in domainDespList:
                    print domainDesp
                    print
            print r"\clearpage"
            print
        # ----------------End of else ---one locus-------------
    # ----------------END of for ---all locus------------------
    latexTail()
Example #23
0
def main():
    (options, args) = cmdpara(sys.argv)
    if options.sort:
        print 'sort'
        sys.exit(1)
    else:
        print 'no sort'
        sys.exit(1)
    print >> sys.stderr, "*******Print the result to screen.*******"

    #-------------------macro-------------------------------------
    isRep = 0
    isLoc = 0
    #-------------------------------------------------------------
    if options.seqfile != None:
        seqdict = {}
        ctIO.readseq(options.seqfile, seqdict)
    if options.seqrepfile != None:
        seqdict = {}
        repdict = {}
        ctIO.readseqrep(options.seqrepfile, seqdict, repdict)
        isRep = 1
    if options.repfile != None:
        repdict = {}
        ctIO.readRep(options.repfile, repdict)
        isRep = 1
    if options.locusfile != None:
        locusList = [line.strip() for line in open(options.locusfile)]
        isLoc = 1
    if not isLoc:
        locusList = repdict.keys() if isRep else seqdict.keys()
        locusList.sort()
    if isRep or isLoc:
        annodict = {}
        ctIO.readAnno(options.anno, annodict, 1, locusList)
        interproDict = {}
        ctIO.readInterpro(options.interpro, interproDict, locusList)

    #print locusList
    #print repdict.keys()
    #print repdict
    #print isRep
    #sys.exit(1)
    #----------------------------------------------------------------
    latexHead()
    latexExplain()
    #---------------------------------------------------------------
    for id in locusList:
        if id not in seqdict:
            print >> sys.stderr, "Unknown locus %s" % id
        else:
            hasInterpro = 0
            seq = list(seqdict[id])
            #-------------get newDict--------------------------------
            newDict = {}
            if id in interproDict:
                hasInterpro = 1
                domainDespList = []
                domainPosL = interproDict[id].keys()
                getnewDictDomain(interproDict[id], domainDespList, newDict)
            #------------------------------------------------
            if isRep:
                repDespList = []
                repdictSonL = repdict[id]
                num = len(repdictSonL)
                if not hasInterpro:
                    domainPosL = []
                getnewDictRep(repdictSonL, domainPosL, repDespList, newDict)
                getnewDictRepDesp(id, num, repDespList)
            #----------------get newDict---------------------------
            modifySeq(seq, newDict)
            #---------------------------------------------------------
            shortAnno = ''
            annos = annodict[id].replace('_', r'\_')
            annos = annos.replace('%', r'\%')
            annos = annos.replace('~', r'\~')
            annos = annos.replace('&', r'\&')
            firstBr = annos.find('[')
            if firstBr != -1:
                shortAnno = annos[:firstBr]
            print ''.join((r'\section{', id, ' ', shortAnno, '}'))
            annos = r'\tair{' + id[:-2] + '} ' + annos
            print r'\anno{', annos, '}'

            print r'''
\noindent\begin{minipage}{\textwidth}
\noindent\rule{\textwidth}{2pt}
\DNA!'''
            #--without annotation
            seq = ''.join(seq)
            print seq

            print r'''!
\end{minipage}            
'''
            #------------------------------------------------
            print
            print '.' * 100
            print
            #---------------Rep------------------------------
            if isRep:
                for repSeq in repDespList:
                    print repSeq
                    print
            #-------------Domain desp----------------------
            if hasInterpro:
                print '.' * 100
                print
                for domainDesp in domainDespList:
                    print domainDesp
                    print
            print r'\clearpage'
            print
        #----------------End of else ---one locus-------------
    #----------------END of for ---all locus------------------
    latexTail()
def main():
    print >>sys.stderr, "To detect the conservation among orthologs,\
use repetitions as the query and its related orthologs as db(after \
makeblastdb). "
    if len(sys.argv) != 3:
        print >>sys.stderr,'Using python %s repfile dbpath/' % sys.argv[0]
        sys.exit(0)
    #-------------------------------------
    file = sys.argv[1]
    if file.find('LCSs') != -1:
        label = '.LCSs'
    elif file.find('HCSs') != -1:
        label = '.HCSs'
    #patched at 20110922. Before not give the inital value to [label].
    #So it will give an error when dealing with non 'LCSs' and 'HCSs'
    #files.
    else:
        label = ''
    noOrtho = 0
    path = sys.argv[2]
    repDict = {}
    readRep(sys.argv[1], repDict)
    for locus, valueL in repDict.items():
        tmppath = path + locus
        #print tmppath
        if not os.path.exists(tmppath):
            noOrtho += 1
            continue
        #-------------------------------
        midlen = 30
        short = locus+label+'.short'
        long = locus +label+'.long'
        fhshort = open(short, 'w')
        fhlong = open(long, 'w')
        group = 0
        for groupD in valueL:
            group += 1
            tmpDict = {}
            groupDKeyL = groupD.keys()
            groupDKeyL.sort()
            for pos in groupDKeyL:
                seq = groupD[pos]
                if seq not in tmpDict:
                    tmpDict[seq] = [str(pos[0])]
                else:
                    tmpDict[seq].append(str(pos[0]))
            #-------------------------------------------
            tmpDictKeyL = tmpDict.keys()
            tmpDictKeyL.sort()
            for seq in tmpDictKeyL:
                lenseq = len(seq)
                pos = ':'.join(tmpDict[seq])
                if lenseq <= midlen:
                    print >>fhshort, '>%s.%s.%s\n%s' % \
                        (locus, str(group), pos, seq)
                else:
                    print >>fhlong, '>%s.%s.%s\n%s' % \
                        (locus, str(group), pos, seq)

            #--------END one group------------------------------
        fhshort.close()
        fhlong.close()
        cmdshort = ' '.join(('psiblast -query', short, '-db', tmppath, \
            '-out', short+'.out', '-num_iterations 5','-evalue 20000',\
            '-matrix PAM30', '-comp_based_stats 0', '-word_size 2'))
        cmdlong = ' '.join(('psiblast -query', long, '-db', tmppath, \
            '-out', long+'.out', '-num_iterations 5'))
        os.system(cmdshort)
        os.system(cmdlong)
        cmdshort = ' '.join(('psiblast -query', short, '-db', tmppath, \
            '-out', short+'.table', '-num_iterations 5','-evalue 20000',\
            '-matrix PAM30', '-comp_based_stats 0', '-word_size 2',
            '-outfmt 7'))
        cmdlong = ' '.join(('psiblast -query', long, '-db', tmppath, \
            '-out', long+'.table', '-num_iterations 5', '-outfmt 7'))
        #print cmd
        #break
        os.system(cmdshort)
        os.system(cmdlong)
        #------------END one locus
    print noOrtho
Example #25
0
def main():
    print >>sys.stderr, "Paste the mother sequene and related\
 repetition together"
    print >>sys.stderr, "Print the result to screen"
    if len(sys.argv) < 3:
        print >>sys.stderr, 'Using python %s forc repResult \
[anno] [locus] ' % sys.argv[0]
        sys.exit(0)
    tair = \
        "http://www.arabidopsis.org/servlets/TairObject?name=&type=locus"
    seqDict = {}
    readseq(sys.argv[1], seqDict)
    #--------------------------------------------
    isAnno = 0
    if len(sys.argv) > 3:
        if sys.argv[3]:
            isAnno = 1
            annoDict = {}
            readAnnoNew(sys.argv[3], annoDict)
    #--------------------------------------------
    isLoc = 0
    if len(sys.argv) > 4:
        if sys.argv[4]:
            isLoc = 1
            locL = [locus.strip() for locus in open(sys.argv[4])]
    #--------------------------------------------
    repDict = {}
    '''
    repDict = 
    {
    AT1G62760: 
        [
            {(25, 31): 'SSLSPSS', (51,57): 'SSLSPSS'},
            {(52, 60): 'SLSPSSPPP',},    
        ] 
    }
    '''
    readRep(sys.argv[2], repDict)
    #-------------------------------------------
    repDictKeyL = repDict.keys()
    repDictKeyL.sort()
    for locus in repDictKeyL:
        if isLoc and (locus not in locL) and \
            (locus[:-2] not in locL):
            continue
        #------------------------------------
        print '>', locus
        locusSub = "name=" + locus[:-2]
        print tair.replace("name=", locusSub)
        if isAnno and locus in annoDict:
            print annoDict[locus].replace('\\\\', '\n')
        #if locus in seqDict:
        #    print seqDict[locus]
        seq = seqDict[locus] #this substitute the last one for we
        #know it has this key, if not, wrong
        locusRepL = repDict[locus]
        repoutput = []
        for repitemDIct in locusRepL:
            grep = ''
            posKey = repitemDIct.keys()
            posKey.sort()
            for pos in posKey:
                rep = repitemDIct[pos]
                grep += ':'.join([rep, str(pos[0]), str(pos[1])])
                reps = '*'+rep+'*'
                seq = seq.replace(rep, reps)
            repoutput.append(grep)
        print seq
        print '\n'.join(repoutput)
Example #26
0
def main():
    (options, args) = cmdpara(sys.argv)
    if options.sort:
        print 'sort'
        sys.exit(1)
    else:
        print 'no sort'
        sys.exit(1)
    print >>sys.stderr, "*******Print the result to screen.*******"

    #-------------------macro-------------------------------------
    isRep = 0
    isLoc = 0
    #-------------------------------------------------------------
    if options.seqfile != None:
        seqdict = {}
        ctIO.readseq(options.seqfile, seqdict)
    if options.seqrepfile != None:
        seqdict = {}
        repdict = {}
        ctIO.readseqrep(options.seqrepfile, seqdict, repdict)
        isRep = 1
    if options.repfile != None:
        repdict = {}
        ctIO.readRep(options.repfile, repdict)
        isRep = 1
    if options.locusfile != None:
        locusList = [line.strip() for line in open(options.locusfile)]
        isLoc = 1
    if not isLoc:
        locusList = repdict.keys() if isRep else seqdict.keys()
        locusList.sort()
    if isRep or isLoc:
        annodict = {}
        ctIO.readAnno(options.anno, annodict, 1, locusList)
        interproDict = {}
        ctIO.readInterpro(options.interpro, interproDict, locusList)

    #print locusList
    #print repdict.keys()
    #print repdict
    #print isRep
    #sys.exit(1)
    #----------------------------------------------------------------
    latexHead()
    latexExplain()
    #---------------------------------------------------------------
    for id in locusList:
        if id not in seqdict:
            print >>sys.stderr, "Unknown locus %s" % id
        else:
            hasInterpro = 0
            seq = list(seqdict[id])
            #-------------get newDict--------------------------------
            newDict = {}
            if id in interproDict:
                hasInterpro = 1
                domainDespList = []
                domainPosL = interproDict[id].keys()
                getnewDictDomain(interproDict[id], domainDespList,
                    newDict)
            #------------------------------------------------
            if isRep:
                repDespList = []
                repdictSonL = repdict[id]
                num = len(repdictSonL)
                if not hasInterpro:
                    domainPosL = []
                getnewDictRep(repdictSonL, domainPosL, repDespList, newDict)
                getnewDictRepDesp(id, num, repDespList)
            #----------------get newDict---------------------------           
            modifySeq(seq, newDict)
            #---------------------------------------------------------
            shortAnno = ''
            annos = annodict[id].replace('_', r'\_')
            annos = annos.replace('%', r'\%')
            annos = annos.replace('~', r'\~')
            annos = annos.replace('&', r'\&')
            firstBr = annos.find('[')
            if firstBr != -1:
                shortAnno = annos[:firstBr]
            print ''.join((r'\section{', id, ' ', shortAnno, '}' ))
            annos = r'\tair{' + id[:-2] + '} ' + annos
            print r'\anno{', annos, '}'

            print r'''
\noindent\begin{minipage}{\textwidth}
\noindent\rule{\textwidth}{2pt}
\DNA!'''
            #--without annotation
            seq = ''.join(seq)
            print seq

            print r'''!
\end{minipage}            
'''
            #------------------------------------------------
            print
            print '.' * 100
            print
            #---------------Rep------------------------------
            if isRep:
                for repSeq in repDespList:
                    print repSeq
                    print
            #-------------Domain desp----------------------
            if hasInterpro:
                print '.' * 100
                print
                for domainDesp in domainDespList:
                    print domainDesp
                    print
            print r'\clearpage'
            print
        #----------------End of else ---one locus-------------
    #----------------END of for ---all locus------------------
    latexTail()