예제 #1
0
def test_build_chunks():
    N = 3
    bloomfilter_filepaths = ['bigsi/tests/data/test_kmers.bloom'] * N
    sample_names = generate_sample_names(len(bloomfilter_filepaths))

    bigsi1 = BIGSI.create(db="./db-bigsi-no-max-mem/",
                          m=10,
                          k=9,
                          h=1,
                          force=True)
    build(bloomfilter_filepaths, sample_names, bigsi1)

    bigsi2 = BIGSI.create(db="./db-bigsi-max-mem/", m=10, k=9, h=1, force=True)
    build(bloomfilter_filepaths, sample_names, bigsi2,
          max_memory=20)  # 20bytes

    # Reload and test equal
    bigsi1 = BIGSI("./db-bigsi-no-max-mem/")
    bigsi2 = BIGSI("./db-bigsi-max-mem")
    for i in range(10):
        assert bigsi1.graph[i] == bigsi2.graph[i]
    for k, v in bigsi2.metadata.items():
        assert bigsi1.metadata[k] == v

    bigsi1.delete_all()
    bigsi2.delete_all()
예제 #2
0
def test_inexact_search():
    for config in CONFIGS:
        get_storage(config).delete_all()
    config = CONFIGS[0]
    kmers_1 = seq_to_kmers("ATACACAAT", config["k"])
    kmers_2 = seq_to_kmers("ATACACAAC", config["k"])
    bloom1 = BIGSI.bloom(config, kmers_1)
    bloom2 = BIGSI.bloom(config, kmers_2)

    for config in CONFIGS:
        get_storage(config).delete_all()
        with pytest.raises(BaseException):
            BIGSI(config)
        bigsi = BIGSI.build(config, [bloom1, bloom2], ["a", "b"])
        assert bigsi.search("ACAGTTAAC", 0.5) == []
        assert bigsi.lookup("AAT") == {"AAT": bitarray("10")}

        results = bigsi.search("ATACACAAT", 0.5)
        assert results[0] == {
            "percent_kmers_found": 100.0,
            "num_kmers": 6,
            "num_kmers_found": 6,
            "sample_name": "a",
        }
        assert (
            json.dumps(results[0])
            == '{"percent_kmers_found": 100.0, "num_kmers": 6, "num_kmers_found": 6, "sample_name": "a"}'
        )
        assert results[1] == {
            "percent_kmers_found": 83.33,
            "num_kmers": 6,
            "num_kmers_found": 5,
            "sample_name": "b",
        }
        bigsi.delete()
예제 #3
0
def test_create():
    for config in CONFIGS:
        get_storage(config).delete_all()
        bloomfilters = [BIGSI.bloom(config, ["ATC", "ATA"])]
        samples = ["1"]
        bigsi = BIGSI.build(config, bloomfilters, samples)
        assert bigsi.kmer_size == 3
        assert bigsi.bloomfilter_size == 1000
        assert bigsi.num_hashes == 3
        assert bigsi.num_samples == 1
        assert bigsi.lookup("ATC") == {"ATC": bitarray("1")}
        assert bigsi.colour_to_sample(0) == "1"
        assert bigsi.sample_to_colour("1") == 0
        bigsi.delete()
예제 #4
0
def test_unique_sample_names():

    for config in CONFIGS:
        get_storage(config).delete_all()
        bloom = BIGSI.bloom(config, ["ATC", "ATA"])
        bigsi = BIGSI.build(config, [bloom], ["1"])
        with pytest.raises(ValueError):
            bigsi.insert(bloom, "1")
        assert bigsi.num_samples == 1
        assert bigsi.lookup(["ATC", "ATA", "ATT"]) == {
            "ATC": bitarray("1"),
            "ATA": bitarray("1"),
            "ATT": bitarray("0"),
        }
        bigsi.delete()
예제 #5
0
파일: test_cli.py 프로젝트: rpetit3/BIGSI
def test_insert_search_cmd():
    Graph = BIGSI.create(m=100, force=True)
    f = Graph.db
    response = hug.test.delete(bigsi.__main__, '', {'db': f})
    response = hug.test.post(bigsi.__main__, 'init', {'db': f, 'm': 1000})
    N = 3
    bloomfilter_filepaths = ['bigsi/tests/data/test_kmers.bloom'] * N
    samples = []
    for i in range(N):
        samples.append(''.join(
            random.choice(string.ascii_uppercase + string.digits)
            for _ in range(6)))
    response = hug.test.post(bigsi.__main__, 'build', {
        'db': f,
        'bloomfilters': bloomfilter_filepaths,
        'samples': samples
    })

    # Returns a Response object
    response = hug.test.post(
        bigsi.__main__, 'insert', {
            'db': f,
            'bloomfilter': 'bigsi/tests/data/test_kmers.bloom',
            'sample': "s3"
        })
    assert response.data.get('result') == 'success'
    seq = 'GATCGTTTGCGGCCACAGTTGCCAGAGATGA'
    response = hug.test.get(bigsi.__main__, 'search', {'db': f, 'seq': seq})

    assert "s3" in response.data.get(seq).get('results')
    response = hug.test.delete(bigsi.__main__, '', {
        'db': f,
    })
예제 #6
0
def test_get_bloomfilter(sample, seq):
    kmers = seq_to_kmers(seq, 31)
    bigsi = BIGSI.create(m=100, force=True)
    bigsi.insert(bigsi.bloom(kmers), sample)
    bf = bigsi.get_bloom_filter(sample)
    assert bf.length() == bigsi.graph.bloomfilter.size
    bigsi.delete_all()
예제 #7
0
def test_get_bloomfilter(seq):
    sample = "1234"
    kmers = seq_to_kmers(seq, 31)
    bigsi = BIGSI.create(m=10, force=True)
    bigsi.build([bigsi.bloom(kmers)], [sample])
    bf = bigsi.get_bloom_filter(sample)
    assert bf.length() == bigsi.graph.bloomfilter.size
    bigsi.delete_all()
예제 #8
0
def test_cant_build_chunks_if_max_memory_less_than_bf():
    N = 3
    bloomfilter_filepaths = ['bigsi/tests/data/test_kmers.bloom'] * N
    sample_names = generate_sample_names(len(bloomfilter_filepaths))

    bigsi2 = BIGSI.create(db="./db-bigsi-max-mem/", m=10, k=9, h=1, force=True)
    with pytest.raises(ValueError):
        build(bloomfilter_filepaths, sample_names, bigsi2,
              max_memory=1)  # 1byte (should fail)
예제 #9
0
def test_merge():
    for config in CONFIGS:
        get_storage(config).delete_all()
    config = CONFIGS[0]
    kmers_1 = seq_to_kmers("ATACACAAT", config["k"])
    kmers_2 = seq_to_kmers("ATACACAAC", config["k"])
    bloom1 = BIGSI.bloom(config, kmers_1)
    bloom2 = BIGSI.bloom(config, kmers_2)

    bigsi1 = BIGSI.build(CONFIGS[0], [bloom1], ["a"])
    bigsi2 = BIGSI.build(CONFIGS[1], [bloom2], ["b"])
    bigsic = BIGSI.build(CONFIGS[2], [bloom1, bloom2], ["a", "b"])

    bigsi1.merge(bigsi2)

    assert bigsi1.search("ATACACAAT", 0.5) == bigsic.search("ATACACAAT", 0.5)
    bigsi1.delete()
    bigsi2.delete()
    bigsic.delete()
예제 #10
0
파일: test_cli.py 프로젝트: rpetit3/BIGSI
def test_search_doesnt_required_write_access():
    Graph = BIGSI.create(m=100, force=True)
    f = Graph.db
    response = hug.test.delete(bigsi.__main__, '', {'db': f})
    response = hug.test.post(bigsi.__main__, 'init', {'db': f, 'm': 1000})
    N = 3
    bloomfilter_filepaths = ['bigsi/tests/data/test_kmers.bloom'] * N
    samples = []
    for i in range(N):
        samples.append(''.join(
            random.choice(string.ascii_uppercase + string.digits)
            for _ in range(6)))

    response = hug.test.post(bigsi.__main__, 'build', {
        'db': f,
        'bloomfilters': bloomfilter_filepaths,
        'samples': samples
    })

    # Make the DB read only
    os.chmod(Graph.graph_filename, S_IREAD | S_IRGRP | S_IROTH)
    os.chmod(Graph.metadata_filename, S_IREAD | S_IRGRP | S_IROTH)
    with pytest.raises(OSError):
        response = hug.test.post(
            bigsi.__main__, 'insert', {
                'db': f,
                'bloomfilter': 'bigsi/tests/data/test_kmers.bloom',
                'sample': "s3"
            })

    # Search doesn't raise errors
    seq = 'GATCGTTTGCGGCCACAGTTGCCAGAGATGA'
    response = hug.test.get(bigsi.__main__, 'search', {
        'db': f,
        'seq': seq,
        "score": True
    })
    #
    assert response.data.get(seq).get('results') != {}
    # assert "score" in list(response.data.get(seq).get('results').values())[0]
    seq = 'GATCGTTTGCGGCCACAGTTGCCAGAGATGAAAG'
    response = hug.test.get(bigsi.__main__, 'search', {
        'db': f,
        'seq': seq,
        'threshold': 0.1,
        "score": True
    })
    assert response.data.get(seq).get('results')
    assert "score" in list(response.data.get(seq).get('results').values())[0]
    # Delete requires read access
    os.chmod(Graph.graph_filename, S_IWUSR | S_IREAD)
    os.chmod(Graph.metadata_filename, S_IWUSR | S_IREAD)
    response = hug.test.delete(bigsi.__main__, '', {
        'db': f,
    })
예제 #11
0
def test_insert():
    for config in CONFIGS:
        get_storage(config).delete_all()
        bloomfilters = [BIGSI.bloom(config, ["ATC", "ATA"])]
        samples = ["1"]
        bigsi = BIGSI.build(config, bloomfilters, samples)
        bloomfilter_2 = BIGSI.bloom(config, ["ATC", "ATT"])
        bigsi.insert(bloomfilter_2, "2")
        assert bigsi.kmer_size == 3
        assert bigsi.bloomfilter_size == 1000
        assert bigsi.num_hashes == 3
        assert bigsi.num_samples == 2
        assert bigsi.lookup(["ATC", "ATA", "ATT"]) == {
            "ATC": bitarray("11"),
            "ATA": bitarray("10"),
            "ATT": bitarray("01"),
        }
        assert bigsi.colour_to_sample(0) == "1"
        assert bigsi.sample_to_colour("1") == 0
        assert bigsi.colour_to_sample(1) == "2"
        assert bigsi.sample_to_colour("2") == 1
        bigsi.delete()
예제 #12
0
def test_exact_search():
    config = CONFIGS[0]
    kmers_1 = seq_to_kmers("ATACACAAT", config["k"])
    kmers_2 = seq_to_kmers("ACAGAGAAC", config["k"])
    bloom1 = BIGSI.bloom(config, kmers_1)
    bloom2 = BIGSI.bloom(config, kmers_2)
    for config in CONFIGS:
        get_storage(config).delete_all()
        bigsi = BIGSI.build(config, [bloom1, bloom2], ["a", "b"])
        assert bigsi.search("ATACACAAT")[0] == {
            "percent_kmers_found": 100,
            "num_kmers": 6,
            "num_kmers_found": 6,
            "sample_name": "a",
        }
        assert bigsi.search("ACAGAGAAC")[0] == {
            "percent_kmers_found": 100,
            "num_kmers": 6,
            "num_kmers_found": 6,
            "sample_name": "b",
        }
        assert bigsi.search("ACAGTTAAC") == []
        bigsi.delete()
예제 #13
0
파일: test_cli.py 프로젝트: rpetit3/BIGSI
def test_bloom_cmd():
    Graph = BIGSI.create(m=100, force=True)
    f = '/tmp/test_kmers.bloom'
    response = hug.test.post(bigsi.__main__, 'bloom', {
        'db': Graph.db,
        'ctx': 'bigsi/tests/data/test_kmers.ctx',
        'outfile': f
    })
    a = bitarray()
    with open(f, 'rb') as inf:
        a.fromfile(inf)
    assert sum(a) > 0

    os.remove(f)
예제 #14
0
def test_search_for_amino_acid_mutation():
    kmer_size = 21
    bigsi = BIGSI.create(m=1000, k=kmer_size, force=True)
    variant_search = BIGSIAminoAcidMutationSearch(
        bigsi, "bigsi/tests/data/ref.fasta", "bigsi/tests/data/ref.gb")

    var_name1 = variant_search.aa2dna.get_variant_names("rpoB", "S450X",
                                                        True)[0]
    var_name2 = variant_search.aa2dna.get_variant_names("rpoB", "S450X",
                                                        True)[4]
    print(var_name1)
    print(var_name2)

    # # Add a the reference seq, the alternate and both as samples
    variant_probe_set1 = variant_search.create_variant_probe_set(var_name1)
    variant_probe_set2 = variant_search.create_variant_probe_set(var_name2)

    ref1 = variant_probe_set1.refs[0]
    alt1 = variant_probe_set1.alts[0]
    ref2 = variant_probe_set2.refs[0]
    alt2 = variant_probe_set2.alts[0]
    bloom1 = bigsi.bloom(bigsi.seq_to_kmers(ref1))
    bloom2 = bigsi.bloom(bigsi.seq_to_kmers(alt1))
    bloom3 = bigsi.bloom(bigsi.seq_to_kmers(ref2))
    bloom4 = bigsi.bloom(bigsi.seq_to_kmers(alt2))
    bigsi.insert(bloom1, 'ref1')
    bigsi.insert(bloom2, 'alt1')
    bigsi.insert(bloom3, 'ref2')
    bigsi.insert(bloom4, 'alt2')

    results = variant_search.search_for_amino_acid_variant(
        "rpoB", "S", 450, "X")
    assert results.get("rpoB_S450X").get("ref1").get("genotype") == "0/0"
    assert results.get("rpoB_S450X").get("ref1").get("aa_mut")[:-1] == "S450"
    assert results.get("rpoB_S450X").get("ref1").get(
        "variant")[:-3] == var_name1[:-3]

    assert results.get("rpoB_S450X").get("ref2").get("genotype") == "0/0"
    assert results.get("rpoB_S450X").get("ref2").get("aa_mut")[:-1] == "S450"
    assert results.get("rpoB_S450X").get("ref2").get(
        "variant")[:-3] == var_name2[:-3]

    assert results.get("rpoB_S450X").get("alt1").get("genotype") == "1/1"
    assert results.get("rpoB_S450X").get("alt1").get("aa_mut") == "S450K"
    assert results.get("rpoB_S450X").get("alt1").get("variant") == var_name1

    assert results.get("rpoB_S450X").get("alt2").get("genotype") == "1/1"
    assert results.get("rpoB_S450X").get("alt2").get("aa_mut") == "S450I"
    assert results.get("rpoB_S450X").get("alt2").get("variant") == var_name2
예제 #15
0
파일: test_cli.py 프로젝트: rpetit3/BIGSI
def test_build_cmd():
    Graph = BIGSI.create(m=100, force=True)
    f = Graph.db
    response = hug.test.delete(bigsi.__main__, '', {'db': f})
    response = hug.test.post(bigsi.__main__, 'init', {'db': f, 'm': 1000})
    N = 3
    bloomfilter_filepaths = ['bigsi/tests/data/test_kmers.bloom'] * N
    samples = []
    for i in range(N):
        samples.append(''.join(
            random.choice(string.ascii_uppercase + string.digits)
            for _ in range(6)))
    response = hug.test.post(bigsi.__main__, 'build', {
        'db': f,
        'bloomfilters': bloomfilter_filepaths,
        'samples': samples
    })
    # TODO fix below
    seq = 'GATCGTTTGCGGCCACAGTTGCCAGAGATGA'
    response = hug.test.get(bigsi.__main__, 'search', {
        'db': f,
        'seq': seq,
        "score": True
    })
    #
    assert response.data.get(seq).get('results') != {}
    # assert "score" in list(response.data.get(seq).get('results').values())[0]
    seq = 'GATCGTTTGCGGCCACAGTTGCCAGAGATGAAAG'
    response = hug.test.get(bigsi.__main__, 'search', {
        'db': f,
        'seq': seq,
        'threshold': 0.1,
        "score": True
    })
    assert response.data.get(seq).get('results')
    assert "score" in list(response.data.get(seq).get('results').values())[0]
    response = hug.test.delete(bigsi.__main__, '', {
        'db': f,
    })
예제 #16
0
import bigsi.__main__
import json
from bigsi.tests.base import ST_SEQ
from bigsi.tests.base import ST_KMER
from bigsi.tests.base import ST_SAMPLE_NAME
from bigsi.tests.base import ST_GRAPH
from bigsi import BIGSI
import hypothesis.strategies as st
from hypothesis import given
import random
import tempfile
from bigsi.utils import seq_to_kmers
from bitarray import bitarray
import numpy as np

Graph = BIGSI.create(m=100, force=True)


def test_bloom_cmd():
    f = '/tmp/test_kmers.bloom'
    response = hug.test.post(
        bigsi.__main__, 'bloom', {'db': Graph.db,
                                  'ctx': 'bigsi/tests/data/test_kmers.ctx',
                                  'outfile': f})
    a = bitarray()
    with open(f, 'rb') as inf:
        a.fromfile(inf)
    assert sum(a) > 0

    os.remove(f)
예제 #17
0
def test_merge():
    kmers1 = ['AAAAAAAAA'] * 3
    kmers2 = ['AAAAAAAAT'] * 9
    bigsi1 = BIGSI.create(db="./db-bigsi1/", m=10, k=9, h=1, force=True)
    blooms1 = []
    for s in kmers1:
        blooms1.append(bigsi1.bloom([s]))
    samples1 = [str(i) for i in range(len(kmers1))]
    bigsi1.build(blooms1, samples1)

    bigsi2 = BIGSI.create(db="./db-bigsi2/", m=10, k=9, h=1, force=True)
    blooms2 = []
    for s in kmers2:
        blooms2.append(bigsi2.bloom([s]))
    samples2 = [str(i) for i in range(len(kmers2))]
    bigsi2.build(blooms2, samples2)

    combined_samples = combine_samples(samples1, samples2)
    bigsicombined = BIGSI.create(db="./db-bigsi-c/",
                                 m=10,
                                 k=9,
                                 h=1,
                                 force=True)
    bigsicombined = BIGSI(db="./db-bigsi-c/", mode="c")
    bigsicombined.build(blooms1 + blooms2, combined_samples)

    bigsi1.merge(bigsi2)
    bigsi1 = BIGSI(db="./db-bigsi1/")
    for i in range(10):
        assert bigsi1.graph[i] == bigsicombined.graph[i]
    for k, v in bigsicombined.metadata.items():
        assert bigsi1.metadata[k] == v
    bigsi1.delete_all()
    bigsi2.delete_all()
    bigsicombined.delete_all()