def opencv_chunk_analysis(genomes, chunk_max, total_length): # analysis dictionary holds all data about the algorithms analysis = {'substring_length':len(args.pattern), 'substring': args.pattern, 'text_length':total_length} # Get time to run algorithm on all substrings boyermoore_data = {'name': 'boyermoore'} opencv_data = {'name': 'opencv'} with Timer() as t: bm_matches = bm.boyer_moore_mult_match_index(genomes, args.pattern) boyermoore_data['time'] = t.msecs boyermoore_data['accuracy'] = 1 for i in range(3,chunk_max,3): with Timer() as t: opencv_matches = cvmatch.cv_match_index_chunk(genomes, args.pattern, chunk_size=i) opencv_data['time'] = t.msecs opencv_data['chunk_size'] = i accuracy = 0 for i in range(len(opencv_matches)): i_accuracy = len(opencv_matches[i]) / len(bm_matches[i]) accuracy += i_accuracy opencv_data['accuracy'] = accuracy / len(bm_matches) algorithms = [] algorithms.append(boyermoore_data) algorithms.append(opencv_data) analysis['algorithms'] = algorithms # make pretty json format print json.dumps(analysis)
def opencv_chunk_analysis(genomes, chunk_max, total_length): # analysis dictionary holds all data about the algorithms analysis = { 'substring_length': len(args.pattern), 'substring': args.pattern, 'text_length': total_length } # Get time to run algorithm on all substrings boyermoore_data = {'name': 'boyermoore'} opencv_data = {'name': 'opencv'} with Timer() as t: bm_matches = bm.boyer_moore_mult_match_index(genomes, args.pattern) boyermoore_data['time'] = t.msecs boyermoore_data['accuracy'] = 1 for i in range(3, chunk_max, 3): with Timer() as t: opencv_matches = cvmatch.cv_match_index_chunk(genomes, args.pattern, chunk_size=i) opencv_data['time'] = t.msecs opencv_data['chunk_size'] = i accuracy = 0 for i in range(len(opencv_matches)): i_accuracy = len(opencv_matches[i]) / len(bm_matches[i]) accuracy += i_accuracy opencv_data['accuracy'] = accuracy / len(bm_matches) algorithms = [] algorithms.append(boyermoore_data) algorithms.append(opencv_data) analysis['algorithms'] = algorithms # make pretty json format print json.dumps(analysis)
title = title + str(count[title]) genomes[title] = genome sorted_genomes = collections.OrderedDict(sorted(genomes.items(), key=lambda t: t[0])) genome_strings = sorted_genomes.values() genome_titles = sorted_genomes.keys() # Parse args if args.algorithm == 'nlogn': for gn in genomes: matches = fft.fft_match_index_n_log_n(genomes[gn], args.pattern) print gn, ': Found matches at indices', matches.tolist() elif args.algorithm == 'nlogm': if len(genomes) > 1: matches = fft.fft_match_index_n_sq_log_m(genomes.values(),\ args.pattern[0], args.b) print 'found matches at', matches.tolist() else: for gn in genomes: matches = fft.fft_match_index_n_log_m(genomes[gn], args.pattern[0],args.b) print gn, ': Found matches at indices', matches.tolist() elif args.algorithm == 'boyermoore': for gn in genomes: matches = bm.boyer_moore_match_index(genomes[gn], args.pattern) print gn, ': Found matches at indices', matches.tolist() elif args.algorithm == 'opencv': matches = cvmatch.cv_match_index_chunk(genomes.values(), args.pattern[0], args.b) print genomes[genomes.keys()[0]] print genomes.keys(), ': Found matches at indices', matches.tolist()
sorted_genomes = collections.OrderedDict( sorted(genomes.items(), key=lambda t: t[0])) genome_strings = sorted_genomes.values() genome_titles = sorted_genomes.keys() # Parse args if args.algorithm == 'nlogn': for gn in genomes: matches = fft.fft_match_index_n_log_n(genomes[gn], args.pattern) print gn, ': Found matches at indices', matches.tolist() elif args.algorithm == 'nlogm': if len(genomes) > 1: matches = fft.fft_match_index_n_sq_log_m(genomes.values(),\ args.pattern[0], args.b) print 'found matches at', matches.tolist() else: for gn in genomes: matches = fft.fft_match_index_n_log_m(genomes[gn], args.pattern[0], args.b) print gn, ': Found matches at indices', matches.tolist() elif args.algorithm == 'boyermoore': for gn in genomes: matches = bm.boyer_moore_match_index(genomes[gn], args.pattern) print gn, ': Found matches at indices', matches.tolist() elif args.algorithm == 'opencv': matches = cvmatch.cv_match_index_chunk(genomes.values(), args.pattern[0], args.b) print genomes[genomes.keys()[0]] print genomes.keys(), ': Found matches at indices', matches.tolist()