'seq': seq[ length-orf_start : i + 3 ]} ) return orf_list # Execute when run as a script if __name__ == '__main__': if (len(sys.argv) < 2): print "Usage: python", sys.argv[0], "<filename in FASTA format> [<min ORF length>]" else: fileName = sys.argv[1] if (len(sys.argv) > 2): # 2nd arg should be an integer try: minlen = int(sys.argv[2]) # Convert string to integer except ValueError: # try-except catches errors print "\n\tExpecting an integer to define min ORF length, found", print sys.argv[2] exit() else: minlen = MIN_ORF_LENGTH print "ORF must be at least", minlen, "Base pairs long" text = cs190FileUtil.readFastaFile(fileName) # Time to start finding ORFs! orf_list = find_all_orfs(text, minlen) for orf in orf_list: print "Frame {frame} Start {start} End {end} Len {length}".format( frame=orf['frame'],start=orf['start'],end=orf['end'],length=orf['length'])
# Take a string and find matches in a Fasta file # This can be useful when validating other tools # Search is not case sensitive # # Usage: # python search.py <file> <pattern> # # Pattern can be in quotes, or a single string # # python search.py EColiK12.fasta "atggttaaag tttatgcccc" # python search.py EColiK12.fasta atggttaaagtttatgcccc import string import sys import cs190FileUtil # Utility to read Fasta files if (len(sys.argv) < 3): print "Usage: python", sys.argv[0], "<filename> <pattern>" else: text = cs190FileUtil.readFastaFile(sys.argv[1]) pattern = sys.argv[2] pattern = cs190FileUtil.prepare(pattern) # Remove any blanks print "Search for", pattern # Look for start for pos in xrange(len(text)): if (pattern == text[pos:pos+len(pattern)]): # Print positions starting with 1 print "Start:", pos+1
def findLongestRepeat(text): max = 1 # Our current goal maxPos = -1 maxDup = -1 # Start at the begining, and continue for each spot for pos in range(len(text)): # Look for a match to the string we are sitting on dup = text.find(text[pos:pos + max], pos + 1, len(text)) # We have a match: can we extend it? while (dup > 0): maxPos = pos maxDup = dup max = max + 1 # Now look for a longer match # Can we find a longer match? dup = text.find(text[pos:pos + max], dup, len(text)) return [maxPos, maxDup, max - 1] if (len(sys.argv) != 2): print("Usage: python", sys.argv[0], "<filename>") else: text = cs190FileUtil.readFastaFile(sys.argv[1]) [pos, dup, ln] = findLongestRepeat(text) print("Found duplicate of length", ln) print(pos, text[pos:pos + ln]) print(dup, text[dup:dup + ln])