Python readFasta Beispiele, ctIO.readFasta Python Beispiele

Beispiel #1

0

Datei anzeigen

def main():
    print >> sys.stderr, "Print the result to screen"
    if len(sys.argv) != 3:
        print >>sys.stderr, 'Using python %s cdsfile repfile' \
            % sys.argv[0]
        sys.exit(0)
    #---------------------------------------------------------
    repDict = {}
    ctIO.readRep(sys.argv[2], repDict)
    locusL = repDict.keys()
    locusL.sort()
    cdsDict = ctIO.readFasta(sys.argv[1], locusL)

    for locus in locusL:
        print '>%s' % locus
        seq = cdsDict[locus]
        tmpList = repDict[locus]
        for posDict in tmpList:
            posKeys = posDict.keys()
            posKeys.sort()
            repList = []
            for posTuple in posKeys:
                start = (posTuple[0] - 1) * 3
                end = posTuple[1] * 3
                if start >= end:
                    print >> sys.stderr, locus, posTuple
                    sys.exit(1)
                #--------patch a bug---2011-08-25
                #repList.append(seq[start:end]+':'+str(start+3))
                repList.append(seq[start:end] + ':' + str(start + 1))
            #--------------------------------------------------
            print '#'.join(repList)

Beispiel #2

0

Datei anzeigen

def main():
    print >> sys.stderr, "Print the result to screen"
    if len(sys.argv) < 4:
        print >> sys.stderr, 'Using python %s pep prospero \
outputfile [overlap percentage]' % sys.argv[0]
        sys.exit(0)
    #---ori--------------------------------------------
    pat = re.compile(">.+?from (\d+) to (\d+).+?from (\d+) to (\d+) ")
    seqDict = readFasta(sys.argv[1])
    repDict = {}
    for line in open(sys.argv[2]):
        if line.startswith('using sequence1'):
            locus = line.strip().split()[-1]
            seq = seqDict[locus]
            repDict[locus] = []
        elif line[0] == '>':
            match = pat.match(line)
            tmpDict = {}
            pos1 = int(match.group(1))
            pos2 = int(match.group(2))
            pos3 = int(match.group(3))
            pos4 = int(match.group(4))
            if len(sys.argv) == 5:
                if (pos2-pos3+1.0)/(pos4-pos1+1.0) > \
                        float(sys.argv[4]):
                    continue
            tmpDict[(pos1, pos2)] = seq[pos1 - 1:pos2]
            tmpDict[(pos3, pos4)] = seq[pos3 - 1:pos4]
            repDict[locus].append(tmpDict)
    #------------------------------------------------
    outputRep(repDict, sys.argv[3])

Beispiel #3

0

Datei anzeigen

Datei: codonUsage.py Projekt: yuzhenpeng/NGS

def main():
    print >> sys.stderr, "Print the result to three files"
    if len(sys.argv) != 3:
        print >> sys.stderr, 'Using python %s seq rep' % sys.argv[0]
        sys.exit(0)
    #-------------------------------------------
    codonList = codonSet()
    cdsDict = readFasta(sys.argv[1])
    #ct_rdict(cdsDict)
    repDict = {}
    readRep(sys.argv[2], repDict)
    #ct_rdict(repDict)
    codonRepDict, codonSeqDict = originalSta(repDict, cdsDict)
    #ct_rdict(codonRepDict)
    #print '*********************'
    #ct_rdict(codonSeqDict)
    #-------compare within protein with repeats----
    #--get codons within repeat and divide codons within other
    #seuquences, and bar graph the number of them, heatmap the
    #ratio of each codons of one protein.
    codonNumSeq = \
        totalNumberProrep(codonRepDict, codonSeqDict, sys.argv[2])
    singlRatioProRep(codonRepDict, codonSeqDict, sys.argv[2], codonList)
    #--compare proteins have no repeat and proteins have repeats but
    #delete repeats
    repOrNot(codonNumSeq, codonRepDict, codonSeqDict, sys.argv[2], codonList)

Beispiel #4

0

Datei anzeigen

Datei: prosperoRep.py Projekt: Tong-Chen/NGS

def main():
    print >>sys.stderr, "Print the result to screen"
    if len(sys.argv) < 4:
        print >>sys.stderr, 'Using python %s pep prospero \
outputfile [overlap percentage]' % sys.argv[0]
        sys.exit(0)
    #---ori--------------------------------------------
    pat = re.compile(">.+?from (\d+) to (\d+).+?from (\d+) to (\d+) ")
    seqDict = readFasta(sys.argv[1])
    repDict = {}
    for line in open(sys.argv[2]):
        if line.startswith('using sequence1'):
            locus = line.strip().split()[-1]
            seq = seqDict[locus]
            repDict[locus] = []
        elif line[0] == '>':
            match = pat.match(line)
            tmpDict = {}
            pos1 = int(match.group(1))
            pos2 = int(match.group(2))
            pos3 = int(match.group(3))
            pos4 = int(match.group(4))
            if len(sys.argv) == 5:
                if (pos2-pos3+1.0)/(pos4-pos1+1.0) > \
                        float(sys.argv[4]):
                    continue
            tmpDict[(pos1, pos2)] = seq[pos1-1:pos2]  
            tmpDict[(pos3, pos4)] = seq[pos3-1:pos4]  
            repDict[locus].append(tmpDict)
    #------------------------------------------------
    outputRep(repDict, sys.argv[3])

Beispiel #5

0

Datei anzeigen

Datei: getNtFromAa.py Projekt: Tong-Chen/NGS

def main():
    print >>sys.stderr, "Print the result to screen"
    if len(sys.argv) != 3:
        print >>sys.stderr, 'Using python %s cdsfile repfile' \
            % sys.argv[0]
        sys.exit(0)
    #---------------------------------------------------------
    repDict = {}
    ctIO.readRep(sys.argv[2], repDict)
    locusL = repDict.keys()
    locusL.sort()
    cdsDict = ctIO.readFasta(sys.argv[1], locusL)

    for locus in locusL:
        print '>%s' % locus
        seq = cdsDict[locus]
        tmpList = repDict[locus]
        for posDict in tmpList:
            posKeys = posDict.keys()
            posKeys.sort()
            repList = []
            for posTuple in posKeys:
                start = (posTuple[0] - 1) * 3
                end = posTuple[1] * 3
                if start >= end:
                    print >>sys.stderr, locus, posTuple
                    sys.exit(1)
                #--------patch a bug---2011-08-25
                #repList.append(seq[start:end]+':'+str(start+3))
                repList.append(seq[start:end]+':'+str(start+1))
            #--------------------------------------------------
            print '#'.join(repList)

Beispiel #6

0

Datei anzeigen

Datei: codonUsage.py Projekt: Tong-Chen/NGS

def main():
    print >>sys.stderr, "Print the result to three files"
    if len(sys.argv) != 3:
        print >>sys.stderr, 'Using python %s seq rep' % sys.argv[0]
        sys.exit(0)
    #-------------------------------------------
    codonList = codonSet()
    cdsDict = readFasta(sys.argv[1])
    #ct_rdict(cdsDict)
    repDict = {}
    readRep(sys.argv[2], repDict)
    #ct_rdict(repDict)
    codonRepDict, codonSeqDict = originalSta(repDict, cdsDict)
    #ct_rdict(codonRepDict)
    #print '*********************'
    #ct_rdict(codonSeqDict)
    #-------compare within protein with repeats----
    #--get codons within repeat and divide codons within other
    #seuquences, and bar graph the number of them, heatmap the 
    #ratio of each codons of one protein.
    codonNumSeq = \
        totalNumberProrep(codonRepDict, codonSeqDict, sys.argv[2])
    singlRatioProRep(codonRepDict, codonSeqDict, sys.argv[2], codonList)
    #--compare proteins have no repeat and proteins have repeats but
    #delete repeats  
    repOrNot(codonNumSeq, codonRepDict, codonSeqDict, sys.argv[2],
            codonList)

Beispiel #7

0

Datei anzeigen

Datei: noRedundantFasta.py Projekt: Tong-Chen/NGS

def main():
    print >>sys.stderr, "Print the result to screen"
    if len(sys.argv) != 2:
        print >>sys.stderr, 'Using python %s filename' % sys.argv[0]
        sys.exit(0)
    #------------------------------------------------
    repDict = readFasta(sys.argv[1])
    #----------------------------------
    tmpSet = set()
    for key, value in repDict.items():
        if value not in tmpSet:
            tmpSet.add(value)
            print '>%s\n%s' % (key, value)

Beispiel #8

0

Datei anzeigen

Datei: splitToOneSeqFasta.py Projekt: Tong-Chen/NGS

def main():
    print >>sys.stderr, "Print the result to files"
    print >>sys.stderr, "Split a multiple sequence fasta file to\
multiple files with one sequence each"
    if len(sys.argv) != 2:
        print >>sys.stderr, 'Using python %s filename' % sys.argv[0]
        sys.exit(0)
    #---------------
    seqDict = readFasta(sys.argv[1])
    for key, value in seqDict.items():
        fh = open(key, 'w')
        print >>fh, '>%s\n%s' % (key, value)
        fh.close()

Beispiel #9

0

Datei anzeigen

Datei: noRedundantFasta.py Projekt: shycheng/NGS

def main():
    if len(sys.argv) != 2:
        print >> sys.stderr, "Print the result to screen"
        print >> sys.stderr, 'Using python %s filename' % sys.argv[0]
        sys.exit(0)
    #------------------------------------------------
    repDict = readFasta(sys.argv[1])
    #----------------------------------
    tmpSet = set()
    for key, value in repDict.items():
        if value not in tmpSet:
            tmpSet.add(value)
            print '>%s\n%s' % (key, value)

Beispiel #10

0

Datei anzeigen

Datei: splitToOneSeqFasta.py Projekt: yuzhenpeng/NGS

def main():
    print >> sys.stderr, "Print the result to files"
    print >> sys.stderr, "Split a multiple sequence fasta file to\
multiple files with one sequence each"

    if len(sys.argv) != 2:
        print >> sys.stderr, 'Using python %s filename' % sys.argv[0]
        sys.exit(0)
    #---------------
    seqDict = readFasta(sys.argv[1])
    for key, value in seqDict.items():
        fh = open(key, 'w')
        print >> fh, '>%s\n%s' % (key, value)
        fh.close()

Beispiel #11

0

Datei anzeigen

def main():
    print >>sys.stderr, "Print the result to screen"
    if len(sys.argv) != 4:
        print >>sys.stderr, 'Using python %s filename subjS atseq' % sys.argv[0]
        sys.exit(0)
    #---------------------------------------------------
    subjSDict = readSubjS(sys.argv[2])
    atDict = readFasta(sys.argv[3])
    at = 1
    for line in open(sys.argv[1]):
        if line[0] == '=':
            group = line[1:].split()[1]
            at = 1  #label the following locus is Arabidopsis
        elif line[0] == '>':
            if at:
                locus = (line[1:].rsplit('.', 1))[0]
                seq = atDict[locus]
                at = 0
            else:
                locus = line[1:-1]
                seq = subjSDict[locus]
            #--------------------------------
        else: