import argparse import os import time from helpers import read_strings from snp import Trie if __name__=='__main__': start = time.time() parser = argparse.ArgumentParser('BA9B Implement TrieMatching') parser.add_argument('--sample', default=False, action='store_true', help='process sample dataset') parser.add_argument('--extra', default=False, action='store_true', help='process extra dataset') parser.add_argument('--rosalind', default=False, action='store_true', help='process Rosalind dataset') args = parser.parse_args() if args.sample: trie = Trie(['ATCG','GGGT']) print (trie.MatchAll('AATCGGGTTCAATCGGGGT')) if args.extra: Input,Expected = read_strings('data/TrieMatching.txt',init=0) trie = Trie(Input[1:]) Actual = trie.MatchAll(Input[0]) Expected = [int(e) for e in Expected[0].split()] print (len(Expected),len(Actual)) diffs = [(e,a) for e,a in zip(Expected,Actual) if e!=a] print (diffs) elapsed = time.time()-start minutes = int(elapsed/60) seconds = elapsed-60*minutes print (f'Elapsed Time {minutes} m {seconds:.2f} s')
if __name__ == '__main__': start = time.time() parser = argparse.ArgumentParser( 'BA10G Perform a Multiple Sequence Alignment with a Profile HMM ') parser.add_argument('--sample', default=False, action='store_true', help='process sample dataset') parser.add_argument('--rosalind', default=False, action='store_true', help='process Rosalind dataset') args = parser.parse_args() if args.sample: pass if args.rosalind: Input = read_strings( f'data/rosalind_{os.path.basename(__file__).split(".")[0]}.txt') Result = None print(Result) with open(f'{os.path.basename(__file__).split(".")[0]}.txt', 'w') as f: for line in Result: f.write(f'{line}\n') elapsed = time.time() - start minutes = int(elapsed / 60) seconds = elapsed - 60 * minutes print(f'Elapsed Time {minutes} m {seconds:.2f} s')
action='store_true', help='process sample dataset') parser.add_argument('--extra', default=False, action='store_true', help='process extra dataset') parser.add_argument('--rosalind', default=False, action='store_true', help='process Rosalind dataset') args = parser.parse_args() if args.sample: pass if args.extra: Input, Expected = read_strings('data/....txt', init=0) trie = Trie(Input) Actual = None Expected.sort() print(len(Expected), len(Actual)) diffs = [(e, a) for e, a in zip(Expected, Actual) if e != a] print(diffs) if args.rosalind: Input = read_strings( f'data/rosalind_{os.path.basename(__file__).split(".")[0]}.txt') Result = None print(Result) with open(f'{os.path.basename(__file__).split(".")[0]}.txt', 'w') as f: for line in Result:
if args.banana: r, p, LCP = SuffixArray('panamabananas$', auxiliary=True, padLCP=True) for edge, _ in SuffixArray2Tree('panamabananas$', r, LCP, trace=args.trace): print(edge) if args.sample: for edge, _ in SuffixArray2Tree('GTAGT$', [5, 2, 3, 0, 4, 1], [0, 0, 0, 2, 0, 1], trace=args.trace): print(edge) if args.extra: Input, Expected = read_strings('data/SuffixTreeFromSuffixArray.txt', init=0) Result = [ edge for edge, _ in SuffixArray2Tree(Input[0], [int(s) for s in Input[1].split(',')], [int(s) for s in Input[2].split(',')], trace=args.trace) ] print(f'Expected {len(Expected)} Edges, actual = {len(Result)}') Result.sort(key=lambda x: f'{len(x):04}{x}') Expected.sort(key=lambda x: f'{len(x):04}{x}') i = 0 j = 0 while i < len(Expected) and j < len(Result): if Expected[i] == Result[j]: i += 1
def FindApproximateMatches(Text,Patterns,d): pass if __name__=='__main__': start = time.time() parser = argparse.ArgumentParser('BA9O Find All Approximate Occurrences of a Collection of Patterns in a String ') parser.add_argument('--sample', default=False, action='store_true', help='process sample dataset') parser.add_argument('--extra', default=False, action='store_true', help='process extra dataset') parser.add_argument('--rosalind', default=False, action='store_true', help='process Rosalind dataset') args = parser.parse_args() if args.sample: print (FindApproximateMatches('ACATGCTACTTT', ['ATT', 'GCC', 'GCTA', 'TATT'], 1)) if args.extra: Input,Expected = read_strings('data/MultipleApproximatePatternMatching.txt',init=0) Result = FindApproximateMatches(Input[0],int(Input[1:-1],int(Input[-1]))) for a,b in Result: print (a,b) if args.rosalind: Input = read_strings(f'data/rosalind_{os.path.basename(__file__).split(".")[0]}.txt') Result = FindApproximateMatches(Input[0],int(Input[1:-1],int(Input[-1]))) print (Result) with open(f'{os.path.basename(__file__).split(".")[0]}.txt','w') as f: for line in Result: f.write(f'{line}\n') elapsed = time.time() - start minutes = int(elapsed/60)
#pass #else: #pass if __name__=='__main__': start = time.time() parser = argparse.ArgumentParser('BA9E Find the Longest Substring Shared by Two Strings') parser.add_argument('--sample', default=False, action='store_true', help='process sample dataset') parser.add_argument('--extra', default=False, action='store_true', help='process extra dataset') parser.add_argument('--rosalind', default=False, action='store_true', help='process Rosalind dataset') args = parser.parse_args() if args.sample: print (FindLongestRepeat('TCGGTAGATTGCGCCCACTC', 'AGGGGCTCGCAGTGTAAGAA')) if args.extra: sys.setrecursionlimit(2000) Input,Expected = read_strings('data/LongestSharedSubstring.txt',init=0) #print (Input[0]) Actual = FindLongestRepeat(Input[0],Input[1]) print (len(Expected[0]),len(Actual)) print (Expected[0]) print (Actual) if args.rosalind: pass elapsed = time.time()-start minutes = int(elapsed/60) seconds = elapsed-60*minutes print (f'Elapsed Time {minutes} m {seconds:.2f} s')
action='store_true', help='process sample dataset') parser.add_argument('--extra', default=False, action='store_true', help='process extra dataset') parser.add_argument('--rosalind', default=False, action='store_true', help='process Rosalind dataset') args = parser.parse_args() if args.sample: print(FindShortestNonShared('CCAAGCTGCTAGAGG', 'CATGCTGGGCTGGCT')) if args.extra: Input, Expected = read_strings('data/ShortestNonSharedSubstring.txt', init=0) print(Expected[0]) Actual = FindShortestNonShared(Input[0], Input[1]) print(len(Expected[0]), len(Actual)) print(Expected[0]) print(Actual) if args.rosalind: Input = read_strings( f'data/rosalind_{os.path.basename(__file__).split(".")[0]}.txt') Result = FindShortestNonShared(Input[0], Input[1]) print(Result) with open(f'{os.path.basename(__file__).split(".")[0]}.txt', 'w') as f: f.write(f'{Result}\n') elapsed = time.time() - start
default=False, action='store_true', help='process extra dataset') parser.add_argument('--rosalind', default=False, action='store_true', help='process Rosalind dataset') args = parser.parse_args() if args.sample: tree = SuffixTree() tree.build('ATAAATG$') for edge in tree.collectEdges(): print(edge) if args.extra: Input, Expected = read_strings('data/SuffixTreeConstruction.txt', init=0) tree = SuffixTree() tree.build(Input[0]) #tree.print() Edges = tree.collectEdges() compare_edges(Edges, Expected) if args.rosalind: Input = read_strings(r'data/rosalind_ba9c.txt') tree = SuffixTree() tree.build(Input[0]) Edges = tree.collectEdges() for e in Edges: print(e)
help='Controls display of probabilities') args = parser.parse_args() if args.sample: Transitions, Emissions = EstimateParameters('yzzzyxzxxx', ['x', 'y', 'z'], 'BBABABABAB', ['A', 'B', 'C']) for row in formatTransition(Transitions, ['A', 'B', 'C'], precision=args.precision): print(row) for row in formatEmission(Emissions, ['A', 'B', 'C'], ['x', 'y', 'z'], precision=args.precision): print(row) if args.extra: Input, Expected = read_strings(f'data/HMMParameterEstimation.txt', init=0) Transitions, Emissions = EstimateParameters(Input[0], Input[2].split(), Input[4], Input[6].split()) for row in formatTransition(Transitions, Input[6].split(), precision=args.precision): print(row) print('--------') for row in formatEmission(Emissions, Input[6].split(), Input[2].split(), precision=args.precision): print(row) if args.rosalind: Input = read_strings(
DOWNRIGHT = 2 LinearSpaceAlignment(0,len(v)+1,0,len(w)+1) if __name__=='__main__': start = time.time() parser = argparse.ArgumentParser('BA5L.py Align Two Strings Using Linear Space') parser.add_argument('--sample', default=False, action='store_true', help='process sample dataset') parser.add_argument('--extra', default=False, action='store_true', help='process extra dataset') parser.add_argument('--rosalind', default=False, action='store_true', help='process Rosalind dataset') args = parser.parse_args() if args.sample: print (alignUsingLinearSpace('PLEASANTLY','MEANLY')) if args.extra: Input,Expected = read_strings(f'data/linear_space_alignment.txt',init=0) print (alignUsingLinearSpace(Input[0],Input[1])) if args.rosalind: Input = read_strings(f'data/rosalind_{os.path.basename(__file__).split(".")[0]}.txt') Result = None print (Result) with open(f'{os.path.basename(__file__).split(".")[0]}.txt','w') as f: for line in Result: f.write(f'{line}\n') elapsed = time.time() - start minutes = int(elapsed/60) seconds = elapsed - 60*minutes
action='store_true', help='process sample dataset') parser.add_argument('--extra', default=False, action='store_true', help='process extra dataset') parser.add_argument('--rosalind', default=False, action='store_true', help='process Rosalind dataset') args = parser.parse_args() if args.sample: print(FindLongestRepeat('ATATCGTTTTATCGTT')) if args.extra: Input, Expected = read_strings('data/LongestRepeat.txt', init=0) print(Input[0]) Actual = FindLongestRepeat(Input[0]) print(len(Expected[0]), len(Actual)) print(Expected[0]) print(Actual) if args.rosalind: Input = read_strings('data/rosalind_ba9d.txt') Result = FindLongestRepeat(Input[0]) print(Result) with open('ba9d.txt', 'w') as f: f.write(f'{Result}\n') elapsed = time.time() - start minutes = int(elapsed / 60)