Ejemplo n.º 1
0
def time_analysis(genomes, total_length, chunk_size='m'):
    # analysis dictionary holds all data about the algorithms
    analysis = {'substring_length':len(args.pattern), 'substring': args.pattern,
                    'text_length':total_length}

    # Get time to run algorithm on all substrings
    boyermoore_data = {'name': 'boyermoore'}
    nlogn_data = {'name': 'nlogn'}
    nlogm_data = {'name': 'nlogm'}
    opencv_data = {'name': 'opencv'}

    with Timer() as t:
        bm_matches = bm.boyer_moore_mult_match_index(genomes, args.pattern)
    boyermoore_data['time'] = t.msecs
    boyermoore_data['accuracy'] = 1

    with Timer() as t:
        nlogn_matches = fft.fft_match_index_n_sq_log_n(genomes, args.pattern)
    nlogn_data['time'] = t.msecs
    accuracy = 0

    for i in range(len(nlogn_matches)):
        i_accuracy = len(nlogn_matches[i]) / len(bm_matches[i])
        accuracy += i_accuracy
    nlogn_data['accuracy'] = accuracy / len(bm_matches)

    nlogm_matches = []
    total_length = 0
    with Timer() as t:
        for g in genomes:
            total_length += len(g)
            nlogm_matches.append(fft.fft_match_index_n_log_m(g, args.pattern, chunk_size))
    nlogm_data['time'] = t.msecs

    accuracy = 0
    for i in range(len(nlogm_matches)):
        i_accuracy = len(nlogm_matches[i]) / len(bm_matches[i])
        accuracy += i_accuracy
    nlogm_data['accuracy'] = accuracy / len(bm_matches)

    with Timer() as t:
        cvmatch.cv_match_index(genomes, args.pattern)
    opencv_data['time'] = t.msecs

    accuracy = 0
    for i in range(len(nlogm_matches)):
        i_accuracy = len(nlogm_matches[i]) / len(bm_matches[i])
        accuracy += i_accuracy
    opencv_data['accuracy'] = accuracy / len(bm_matches)

    algorithms = []
    algorithms.append(boyermoore_data)
    algorithms.append(nlogn_data)
    algorithms.append(nlogm_data)
    algorithms.append(opencv_data)

    analysis['algorithms'] = algorithms
    # make pretty json format
    print json.dumps(analysis)
Ejemplo n.º 2
0
def k_analysis(genomes):
    # analysis dictionary holds all data about the algorithms
    analysis = {
        'substring_length': len(args.pattern),
        'substring': args.pattern
    }

    # Get time to run algorithm on all substrings
    boyermoore_data = {'name': 'boyermoore'}
    nlogn_data = {'name': 'nlogn'}
    opencv_data = {'name': 'opencv'}
    for i in range(0, len(genomes)):
        analysis['k'] = i
        if len(genomes[:i]) == 0:
            k_genomes = [genomes[0]]
        else:
            k_genomes = genomes[:i]
        with Timer() as t:
            bm_matches = bm.boyer_moore_mult_match_index(
                k_genomes, args.pattern)
        boyermoore_data['time'] = t.msecs
        boyermoore_data['accuracy'] = 1

        with Timer() as t:
            nlogn_matches = fft.fft_match_index_n_sq_log_n(
                k_genomes, args.pattern)
        nlogn_data['time'] = t.msecs
        accuracy = 0

        for i in range(len(nlogn_matches)):
            i_accuracy = len(nlogn_matches[i]) / len(bm_matches[i])
            accuracy += i_accuracy
        nlogn_data['accuracy'] = accuracy / len(bm_matches)

        with Timer() as t:
            cvmatch.cv_match_index(k_genomes, args.pattern)
        opencv_data['time'] = t.msecs

        algorithms = []
        algorithms.append(boyermoore_data)
        algorithms.append(nlogn_data)
        algorithms.append(opencv_data)

        analysis['algorithms'] = algorithms
        # make pretty json format
        print json.dumps(analysis)
Ejemplo n.º 3
0
def k_analysis(genomes):
    # analysis dictionary holds all data about the algorithms
    analysis = {'substring_length':len(args.pattern), 'substring': args.pattern}

    # Get time to run algorithm on all substrings
    boyermoore_data = {'name': 'boyermoore'}
    nlogn_data = {'name': 'nlogn'}
    opencv_data = {'name': 'opencv'}
    for i in range(0,len(genomes)):
        analysis['k'] = i
        if len(genomes[:i]) == 0:
            k_genomes = [genomes[0]]
        else:
            k_genomes = genomes[:i]
        with Timer() as t:
            bm_matches = bm.boyer_moore_mult_match_index(k_genomes, args.pattern)
        boyermoore_data['time'] = t.msecs
        boyermoore_data['accuracy'] = 1

        with Timer() as t:
            nlogn_matches = fft.fft_match_index_n_sq_log_n(k_genomes, args.pattern)
        nlogn_data['time'] = t.msecs
        accuracy = 0

        for i in range(len(nlogn_matches)):
            i_accuracy = len(nlogn_matches[i]) / len(bm_matches[i])
            accuracy += i_accuracy
        nlogn_data['accuracy'] = accuracy / len(bm_matches)

        with Timer() as t:
            cvmatch.cv_match_index(k_genomes, args.pattern)
        opencv_data['time'] = t.msecs

        algorithms = []
        algorithms.append(boyermoore_data)
        algorithms.append(nlogn_data)
        algorithms.append(opencv_data)

        analysis['algorithms'] = algorithms
        # make pretty json format
        print json.dumps(analysis)
Ejemplo n.º 4
0
def time_analysis(genomes, total_length, chunk_size='m'):
    # analysis dictionary holds all data about the algorithms
    analysis = {
        'substring_length': len(args.pattern),
        'substring': args.pattern,
        'text_length': total_length
    }

    # Get time to run algorithm on all substrings
    boyermoore_data = {'name': 'boyermoore'}
    nlogn_data = {'name': 'nlogn'}
    nlogm_data = {'name': 'nlogm'}
    opencv_data = {'name': 'opencv'}

    with Timer() as t:
        bm_matches = bm.boyer_moore_mult_match_index(genomes, args.pattern)
    boyermoore_data['time'] = t.msecs
    boyermoore_data['accuracy'] = 1

    with Timer() as t:
        nlogn_matches = fft.fft_match_index_n_sq_log_n(genomes, args.pattern)
    nlogn_data['time'] = t.msecs
    accuracy = 0

    for i in range(len(nlogn_matches)):
        i_accuracy = len(nlogn_matches[i]) / len(bm_matches[i])
        accuracy += i_accuracy
    nlogn_data['accuracy'] = accuracy / len(bm_matches)

    nlogm_matches = []
    total_length = 0
    with Timer() as t:
        for g in genomes:
            total_length += len(g)
            nlogm_matches.append(
                fft.fft_match_index_n_log_m(g, args.pattern, chunk_size))
    nlogm_data['time'] = t.msecs

    accuracy = 0
    for i in range(len(nlogm_matches)):
        i_accuracy = len(nlogm_matches[i]) / len(bm_matches[i])
        accuracy += i_accuracy
    nlogm_data['accuracy'] = accuracy / len(bm_matches)

    with Timer() as t:
        cvmatch.cv_match_index(genomes, args.pattern)
    opencv_data['time'] = t.msecs

    accuracy = 0
    for i in range(len(nlogm_matches)):
        i_accuracy = len(nlogm_matches[i]) / len(bm_matches[i])
        accuracy += i_accuracy
    opencv_data['accuracy'] = accuracy / len(bm_matches)

    algorithms = []
    algorithms.append(boyermoore_data)
    algorithms.append(nlogn_data)
    algorithms.append(nlogm_data)
    algorithms.append(opencv_data)

    analysis['algorithms'] = algorithms
    # make pretty json format
    print json.dumps(analysis)