Esempio n. 1
0
def test_estimate_functions_1():
    res = functions.estimate_optimal_with_N_and_M(99, 1024)
    assert res[0] == 7, res[0]
    assert res[1] == 146, res[1]
    assert res[2] == 1022, res[2]
    assert abs(.008 - res[3]) < .001, res[3]

    res = functions.estimate_optimal_with_N_and_f(99, 0.00701925498897)
    assert res[0] == 7, res[0]
    assert res[1] == 145, res[1]
    assert res[2] == 1015, res[2]
    assert abs(.008 - res[3]) < .002, res[3]

    res = functions.estimate_optimal_with_N_and_M(1024, 2)
    assert res[0] == 1, res[0]
    assert res[1] == 2, res[1]
    assert res[2] == 2, res[2]
    assert res[3] == 1.0, res[3]

    # using a crazy high FP rate just for coverage
    res = functions.estimate_optimal_with_N_and_f(1024, 0.7)
    assert res[0] == 1, res[0]
    assert res[1] == 850, res[1]
    assert res[2] == 850, res[2]
    assert abs(.7 - res[3]) < 0.0022, abs(.7 - res[3])
def main():
    info('estimate_optimal_hash.py', ['counting'])
    args = get_parser().parse_args()
    N = args.N
    if args.M:
        M = args.M
        result = estimate_optimal_with_N_and_M(N,M)
        print("number of estimated distinct k-mers:  ", N, file=sys.stderr)
        print("size of memory available to use:      ", M, file=sys.stderr)
        print("optimal number of hash tables:        ", result.num_htables,
              file=sys.stderr)
        print("optimal size of hash tables:          ", result.htable_size,
              file=sys.stderr)
        print("estimated false positive rate:        ", result.fp_rate,
              file=sys.stderr)
        print("estimated usage of memory:            ", result.mem_use,
              file=sys.stderr)
        
    elif args.f:
        f = args.f
        result = estimate_optimal_with_N_and_f(N,f)
        print("number of estimated distinct k-mers:  ", N, file=sys.stderr)
        print("desired maximum false positive rate:  ", f, file=sys.stderr)
        print("optimal number of hash tables:        ", result.num_htables,
              file=sys.stderr)
        print("optimal size of hash tables:          ", result.htable_size,
              file=sys.stderr)
        print("estimated false positive rate:        ", result.fp_rate,
              file=sys.stderr)
        print("estimated usage of memory:            ", result.mem_use,
              file=sys.stderr)
        
    else:
        get_parser().error('No action requested, add -M (size of memory available to use) or -f (desired maximum false posotive rate)')
Esempio n. 3
0
def test_estimate_functions_namedtup():
    res = functions.estimate_optimal_with_N_and_M(99, 1024)
    assert res.num_htables == 7, res[0]
    assert res.htable_size == 146, res[1]
    assert res.mem_use == 1022, res[2]
    assert abs(.008 - res.fp_rate) < .001, res[3]

    res = functions.estimate_optimal_with_N_and_f(99, 0.00701925498897)
    assert res.num_htables == 7, res[0]
    assert res.htable_size == 145, res[1]
    assert res.mem_use == 1015, res[2]
    assert abs(.008 - res.fp_rate) < .002, res[3]