def test_build_chunks(): N = 3 bloomfilter_filepaths = ['bigsi/tests/data/test_kmers.bloom'] * N sample_names = generate_sample_names(len(bloomfilter_filepaths)) bigsi1 = BIGSI.create(db="./db-bigsi-no-max-mem/", m=10, k=9, h=1, force=True) build(bloomfilter_filepaths, sample_names, bigsi1) bigsi2 = BIGSI.create(db="./db-bigsi-max-mem/", m=10, k=9, h=1, force=True) build(bloomfilter_filepaths, sample_names, bigsi2, max_memory=20) # 20bytes # Reload and test equal bigsi1 = BIGSI("./db-bigsi-no-max-mem/") bigsi2 = BIGSI("./db-bigsi-max-mem") for i in range(10): assert bigsi1.graph[i] == bigsi2.graph[i] for k, v in bigsi2.metadata.items(): assert bigsi1.metadata[k] == v bigsi1.delete_all() bigsi2.delete_all()
def test_merge(): kmers1 = ['AAAAAAAAA'] * 3 kmers2 = ['AAAAAAAAT'] * 9 bigsi1 = BIGSI.create(db="./db-bigsi1/", m=10, k=9, h=1, force=True) blooms1 = [] for s in kmers1: blooms1.append(bigsi1.bloom([s])) samples1 = [str(i) for i in range(len(kmers1))] bigsi1.build(blooms1, samples1) bigsi2 = BIGSI.create(db="./db-bigsi2/", m=10, k=9, h=1, force=True) blooms2 = [] for s in kmers2: blooms2.append(bigsi2.bloom([s])) samples2 = [str(i) for i in range(len(kmers2))] bigsi2.build(blooms2, samples2) combined_samples = combine_samples(samples1, samples2) bigsicombined = BIGSI.create(db="./db-bigsi-c/", m=10, k=9, h=1, force=True) bigsicombined = BIGSI(db="./db-bigsi-c/", mode="c") bigsicombined.build(blooms1 + blooms2, combined_samples) bigsi1.merge(bigsi2) bigsi1 = BIGSI(db="./db-bigsi1/") for i in range(10): assert bigsi1.graph[i] == bigsicombined.graph[i] for k, v in bigsicombined.metadata.items(): assert bigsi1.metadata[k] == v bigsi1.delete_all() bigsi2.delete_all() bigsicombined.delete_all()