'seq': seq[ length-orf_start : i + 3 ]} )

    return orf_list
    
# Execute when run as a script
if __name__ == '__main__':
    if (len(sys.argv) < 2):
        print "Usage: python", sys.argv[0], "<filename in FASTA format> [<min ORF length>]"
    else:
        fileName = sys.argv[1]
        if (len(sys.argv) > 2):             # 2nd arg should be an integer
            try:
                minlen = int(sys.argv[2])    # Convert string to integer
            except ValueError:              # try-except catches errors
                print "\n\tExpecting an integer to define min ORF length, found",
                print sys.argv[2]
                exit()
        else:
            minlen = MIN_ORF_LENGTH

        print "ORF must be at least", minlen, "Base pairs long"

        text = cs190FileUtil.readFastaFile(fileName)
    
        # Time to start finding ORFs!
        orf_list = find_all_orfs(text, minlen)

        for orf in orf_list:
            print "Frame {frame} Start {start} End {end} Len {length}".format(
                frame=orf['frame'],start=orf['start'],end=orf['end'],length=orf['length'])
Example #2
0
# Take a string and find matches in a Fasta file
#    This can be useful when validating other tools
#    Search is not case sensitive
#
# Usage:
#       python search.py <file> <pattern>
#
# Pattern can be in quotes, or a single string
#
#       python search.py EColiK12.fasta "atggttaaag tttatgcccc" 
#       python search.py EColiK12.fasta  atggttaaagtttatgcccc

import string
import sys
import cs190FileUtil    # Utility to read Fasta files

if (len(sys.argv) < 3):
    print "Usage:   python", sys.argv[0], "<filename> <pattern>"
else:
    text = cs190FileUtil.readFastaFile(sys.argv[1])
    pattern = sys.argv[2]
    pattern = cs190FileUtil.prepare(pattern)  # Remove any blanks

    print "Search for", pattern

    # Look for start
    for pos in xrange(len(text)):
        if (pattern == text[pos:pos+len(pattern)]):
            # Print positions starting with 1
            print "Start:", pos+1
Example #3
0
def findLongestRepeat(text):
    max = 1  # Our current goal
    maxPos = -1
    maxDup = -1

    # Start at the begining, and continue for each spot
    for pos in range(len(text)):
        # Look for a match to the string we are sitting on
        dup = text.find(text[pos:pos + max], pos + 1, len(text))

        # We have a match: can we extend it?
        while (dup > 0):
            maxPos = pos
            maxDup = dup
            max = max + 1  # Now look for a longer match

            # Can we find a longer match?
            dup = text.find(text[pos:pos + max], dup, len(text))

    return [maxPos, maxDup, max - 1]


if (len(sys.argv) != 2):
    print("Usage: python", sys.argv[0], "<filename>")
else:
    text = cs190FileUtil.readFastaFile(sys.argv[1])
    [pos, dup, ln] = findLongestRepeat(text)
    print("Found duplicate of length", ln)
    print(pos, text[pos:pos + ln])
    print(dup, text[dup:dup + ln])