Beispiel #1
0
def test_build_chunks():
    N = 3
    bloomfilter_filepaths = ['bigsi/tests/data/test_kmers.bloom'] * N
    sample_names = generate_sample_names(len(bloomfilter_filepaths))

    bigsi1 = BIGSI.create(db="./db-bigsi-no-max-mem/",
                          m=10,
                          k=9,
                          h=1,
                          force=True)
    build(bloomfilter_filepaths, sample_names, bigsi1)

    bigsi2 = BIGSI.create(db="./db-bigsi-max-mem/", m=10, k=9, h=1, force=True)
    build(bloomfilter_filepaths, sample_names, bigsi2,
          max_memory=20)  # 20bytes

    # Reload and test equal
    bigsi1 = BIGSI("./db-bigsi-no-max-mem/")
    bigsi2 = BIGSI("./db-bigsi-max-mem")
    for i in range(10):
        assert bigsi1.graph[i] == bigsi2.graph[i]
    for k, v in bigsi2.metadata.items():
        assert bigsi1.metadata[k] == v

    bigsi1.delete_all()
    bigsi2.delete_all()
Beispiel #2
0
def test_merge():
    kmers1 = ['AAAAAAAAA'] * 3
    kmers2 = ['AAAAAAAAT'] * 9
    bigsi1 = BIGSI.create(db="./db-bigsi1/", m=10, k=9, h=1, force=True)
    blooms1 = []
    for s in kmers1:
        blooms1.append(bigsi1.bloom([s]))
    samples1 = [str(i) for i in range(len(kmers1))]
    bigsi1.build(blooms1, samples1)

    bigsi2 = BIGSI.create(db="./db-bigsi2/", m=10, k=9, h=1, force=True)
    blooms2 = []
    for s in kmers2:
        blooms2.append(bigsi2.bloom([s]))
    samples2 = [str(i) for i in range(len(kmers2))]
    bigsi2.build(blooms2, samples2)

    combined_samples = combine_samples(samples1, samples2)
    bigsicombined = BIGSI.create(db="./db-bigsi-c/",
                                 m=10,
                                 k=9,
                                 h=1,
                                 force=True)
    bigsicombined = BIGSI(db="./db-bigsi-c/", mode="c")
    bigsicombined.build(blooms1 + blooms2, combined_samples)

    bigsi1.merge(bigsi2)
    bigsi1 = BIGSI(db="./db-bigsi1/")
    for i in range(10):
        assert bigsi1.graph[i] == bigsicombined.graph[i]
    for k, v in bigsicombined.metadata.items():
        assert bigsi1.metadata[k] == v
    bigsi1.delete_all()
    bigsi2.delete_all()
    bigsicombined.delete_all()
Beispiel #3
0
def test_inexact_search():
    for config in CONFIGS:
        get_storage(config).delete_all()
    config = CONFIGS[0]
    kmers_1 = seq_to_kmers("ATACACAAT", config["k"])
    kmers_2 = seq_to_kmers("ATACACAAC", config["k"])
    bloom1 = BIGSI.bloom(config, kmers_1)
    bloom2 = BIGSI.bloom(config, kmers_2)

    for config in CONFIGS:
        get_storage(config).delete_all()
        with pytest.raises(BaseException):
            BIGSI(config)
        bigsi = BIGSI.build(config, [bloom1, bloom2], ["a", "b"])
        assert bigsi.search("ACAGTTAAC", 0.5) == []
        assert bigsi.lookup("AAT") == {"AAT": bitarray("10")}

        results = bigsi.search("ATACACAAT", 0.5)
        assert results[0] == {
            "percent_kmers_found": 100.0,
            "num_kmers": 6,
            "num_kmers_found": 6,
            "sample_name": "a",
        }
        assert (
            json.dumps(results[0])
            == '{"percent_kmers_found": 100.0, "num_kmers": 6, "num_kmers_found": 6, "sample_name": "a"}'
        )
        assert results[1] == {
            "percent_kmers_found": 83.33,
            "num_kmers": 6,
            "num_kmers_found": 5,
            "sample_name": "b",
        }
        bigsi.delete()