def test_build_chunks(): N = 3 bloomfilter_filepaths = ['bigsi/tests/data/test_kmers.bloom'] * N sample_names = generate_sample_names(len(bloomfilter_filepaths)) bigsi1 = BIGSI.create(db="./db-bigsi-no-max-mem/", m=10, k=9, h=1, force=True) build(bloomfilter_filepaths, sample_names, bigsi1) bigsi2 = BIGSI.create(db="./db-bigsi-max-mem/", m=10, k=9, h=1, force=True) build(bloomfilter_filepaths, sample_names, bigsi2, max_memory=20) # 20bytes # Reload and test equal bigsi1 = BIGSI("./db-bigsi-no-max-mem/") bigsi2 = BIGSI("./db-bigsi-max-mem") for i in range(10): assert bigsi1.graph[i] == bigsi2.graph[i] for k, v in bigsi2.metadata.items(): assert bigsi1.metadata[k] == v bigsi1.delete_all() bigsi2.delete_all()
def test_cant_build_chunks_if_max_memory_less_than_bf(): N = 3 bloomfilter_filepaths = ['bigsi/tests/data/test_kmers.bloom'] * N sample_names = generate_sample_names(len(bloomfilter_filepaths)) bigsi2 = BIGSI.create(db="./db-bigsi-max-mem/", m=10, k=9, h=1, force=True) with pytest.raises(ValueError): build(bloomfilter_filepaths, sample_names, bigsi2, max_memory=1) # 1byte (should fail)
def build( self, bloomfilters: hug.types.multiple, samples: hug.types.multiple = [], config: hug.types.text = None, ): config = get_config_from_file(config) if samples: assert len(samples) == len(bloomfilters) else: samples = bloomfilters if config.get("max_build_mem_bytes"): max_memory_bytes = humanfriendly.parse_size( config["max_build_mem_bytes"]) else: max_memory_bytes = None return build( config=config, bloomfilter_filepaths=bloomfilters, samples=samples, max_memory=max_memory_bytes, )
def build(self, db: hug.types.text, bloomfilters: hug.types.multiple, samples: hug.types.multiple = []): if samples: assert len(samples) == len(bloomfilters) else: samples = bloomfilters return build(graph=BIGSI(db), bloomfilter_filepaths=bloomfilters, samples=samples)
def build(self, db: hug.types.text, bloomfilters: hug.types.multiple, samples: hug.types.multiple = [], max_memory: hug.types.text = '', lowmem: hug.types.smart_boolean = False): if samples: assert len(samples) == len(bloomfilters) else: samples = bloomfilters if max_memory: max_memory_bytes = humanfriendly.parse_size(max_memory) else: max_memory_bytes = None return build(index=BIGSI(db), bloomfilter_filepaths=bloomfilters, samples=samples, max_memory=max_memory_bytes, lowmem=lowmem)
def build( self, bloomfilters: hug.types.multiple = [], samples: hug.types.multiple = [], from_file: hug.types.text = None, config: hug.types.text = None, ): config = get_config_from_file(config) if from_file and bloomfilters: raise ValueError( "You can only specify blooms via from_file or bloomfilters, but not both" ) elif from_file: samples = [] bloomfilters = [] with open(from_file, "r") as tsvfile: reader = csv.reader(tsvfile, delimiter="\t") for row in reader: bloomfilters.append(row[0]) samples.append(row[1]) if samples: assert len(samples) == len(bloomfilters) else: samples = bloomfilters if config.get("max_build_mem_bytes"): max_memory_bytes = humanfriendly.parse_size( config["max_build_mem_bytes"]) else: max_memory_bytes = None return build( config=config, bloomfilter_filepaths=bloomfilters, samples=samples, max_memory=max_memory_bytes, )