def opencv_chunk_analysis(genomes, chunk_max, total_length):
    # analysis dictionary holds all data about the algorithms
    analysis = {'substring_length':len(args.pattern), 'substring': args.pattern,
                'text_length':total_length}

    # Get time to run algorithm on all substrings
    boyermoore_data = {'name': 'boyermoore'}
    opencv_data = {'name': 'opencv'}

    with Timer() as t:
        bm_matches = bm.boyer_moore_mult_match_index(genomes, args.pattern)
    boyermoore_data['time'] = t.msecs
    boyermoore_data['accuracy'] = 1

    for i in range(3,chunk_max,3):
        with Timer() as t:
            opencv_matches = cvmatch.cv_match_index_chunk(genomes, args.pattern, chunk_size=i)
        opencv_data['time'] = t.msecs
        opencv_data['chunk_size'] = i

        accuracy = 0
        for i in range(len(opencv_matches)):
            i_accuracy = len(opencv_matches[i]) / len(bm_matches[i])
            accuracy += i_accuracy
        opencv_data['accuracy'] = accuracy / len(bm_matches)

        algorithms = []
        algorithms.append(boyermoore_data)
        algorithms.append(opencv_data)

        analysis['algorithms'] = algorithms
        # make pretty json format
        print json.dumps(analysis)
def opencv_chunk_analysis(genomes, chunk_max, total_length):
    # analysis dictionary holds all data about the algorithms
    analysis = {
        'substring_length': len(args.pattern),
        'substring': args.pattern,
        'text_length': total_length
    }

    # Get time to run algorithm on all substrings
    boyermoore_data = {'name': 'boyermoore'}
    opencv_data = {'name': 'opencv'}

    with Timer() as t:
        bm_matches = bm.boyer_moore_mult_match_index(genomes, args.pattern)
    boyermoore_data['time'] = t.msecs
    boyermoore_data['accuracy'] = 1

    for i in range(3, chunk_max, 3):
        with Timer() as t:
            opencv_matches = cvmatch.cv_match_index_chunk(genomes,
                                                          args.pattern,
                                                          chunk_size=i)
        opencv_data['time'] = t.msecs
        opencv_data['chunk_size'] = i

        accuracy = 0
        for i in range(len(opencv_matches)):
            i_accuracy = len(opencv_matches[i]) / len(bm_matches[i])
            accuracy += i_accuracy
        opencv_data['accuracy'] = accuracy / len(bm_matches)

        algorithms = []
        algorithms.append(boyermoore_data)
        algorithms.append(opencv_data)

        analysis['algorithms'] = algorithms
        # make pretty json format
        print json.dumps(analysis)
    title = title + str(count[title])
    genomes[title] = genome

sorted_genomes = collections.OrderedDict(sorted(genomes.items(),
                                      key=lambda t: t[0]))
genome_strings = sorted_genomes.values()
genome_titles = sorted_genomes.keys()

# Parse args
if args.algorithm == 'nlogn':
    for gn in genomes:
        matches = fft.fft_match_index_n_log_n(genomes[gn], args.pattern)
        print gn, ': Found matches at indices', matches.tolist()
elif args.algorithm == 'nlogm':
    if len(genomes) > 1:
        matches = fft.fft_match_index_n_sq_log_m(genomes.values(),\
        args.pattern[0], args.b)
        print 'found matches at', matches.tolist()
    else:
        for gn in genomes:
            matches = fft.fft_match_index_n_log_m(genomes[gn], args.pattern[0],args.b)
            print gn, ': Found matches at indices', matches.tolist()
elif args.algorithm == 'boyermoore':
    for gn in genomes:
        matches = bm.boyer_moore_match_index(genomes[gn], args.pattern)
        print gn, ': Found matches at indices', matches.tolist()
elif args.algorithm == 'opencv':
    matches = cvmatch.cv_match_index_chunk(genomes.values(), args.pattern[0], args.b)
    print genomes[genomes.keys()[0]]
    print genomes.keys(), ': Found matches at indices', matches.tolist()
Beispiel #4
0
sorted_genomes = collections.OrderedDict(
    sorted(genomes.items(), key=lambda t: t[0]))
genome_strings = sorted_genomes.values()
genome_titles = sorted_genomes.keys()

# Parse args
if args.algorithm == 'nlogn':
    for gn in genomes:
        matches = fft.fft_match_index_n_log_n(genomes[gn], args.pattern)
        print gn, ': Found matches at indices', matches.tolist()
elif args.algorithm == 'nlogm':
    if len(genomes) > 1:
        matches = fft.fft_match_index_n_sq_log_m(genomes.values(),\
        args.pattern[0], args.b)
        print 'found matches at', matches.tolist()
    else:
        for gn in genomes:
            matches = fft.fft_match_index_n_log_m(genomes[gn], args.pattern[0],
                                                  args.b)
            print gn, ': Found matches at indices', matches.tolist()
elif args.algorithm == 'boyermoore':
    for gn in genomes:
        matches = bm.boyer_moore_match_index(genomes[gn], args.pattern)
        print gn, ': Found matches at indices', matches.tolist()
elif args.algorithm == 'opencv':
    matches = cvmatch.cv_match_index_chunk(genomes.values(), args.pattern[0],
                                           args.b)
    print genomes[genomes.keys()[0]]
    print genomes.keys(), ': Found matches at indices', matches.tolist()