예제 #1
0
def test_merge():
    bloomfilter_size = 250
    number_hash_functions = 1
    kmers1 = ["ATC", "ATG", "ATA", "ATT"]
    kmers2 = ["ATC", "ATG", "ATA", "TTT"]
    bloomfilter1 = BloomFilter(bloomfilter_size, number_hash_functions).update(
        convert_query_kmers(kmers1))
    bloomfilter2 = BloomFilter(bloomfilter_size, number_hash_functions).update(
        convert_query_kmers(kmers2))
    bloomfilters = [bloomfilter1, bloomfilter2]
    for storage in get_storages():
        storage.delete_all()
        ksi1 = KmerSignatureIndex.create(storage, bloomfilters,
                                         bloomfilter_size,
                                         number_hash_functions)
        ksi2 = KmerSignatureIndex.create(storage, bloomfilters,
                                         bloomfilter_size,
                                         number_hash_functions)
        ksi1.merge_indexes(ksi2)
        assert ksi1.lookup(["ATC"]) == {"ATC": bitarray("11" * 2)}
        assert ksi1.lookup(["ATC", "ATC", "ATT"]) == {
            "ATC": bitarray("11" * 2),
            "ATT": bitarray("10" * 2),
        }
        assert ksi1.lookup(["ATC", "ATC", "ATT", "TTT"]) == {
            "ATC": bitarray("11" * 2),
            "ATT": bitarray("10" * 2),
            "TTT": bitarray("01" * 2),
        }
예제 #2
0
파일: bigsi.py 프로젝트: wangdang511/BIGSI
 def __init__(self, config=None):
     if config is None:
         config = DEFAULT_CONFIG
     self.config = config
     self.storage = get_storage(config)
     SampleMetadata.__init__(self, self.storage)
     KmerSignatureIndex.__init__(self, self.storage)
     self.min_unique_kmers_in_query = (
         MIN_UNIQUE_KMERS_IN_QUERY
     )  ## TODO this can be inferred and set at build time
     self.scorer=Scorer(self.num_samples)
예제 #3
0
def test_lookup1():
    bloomfilter_size = 250
    number_hash_functions = 3
    kmers1 = ["ATC", "ATG", "ATA", "ATT"]
    kmers2 = ["ATC", "ATG", "ATA", "TTT"]
    bloomfilter1 = BloomFilter(bloomfilter_size, number_hash_functions).update(
        convert_query_kmers(kmers1))  # canonical
    bloomfilter2 = BloomFilter(bloomfilter_size, number_hash_functions).update(
        convert_query_kmers(kmers2))
    bloomfilters = [bloomfilter1.bitarray, bloomfilter2.bitarray]
    for storage in get_storages():
        storage.delete_all()

        KmerSignatureIndex.create(storage, bloomfilters, bloomfilter_size,
                                  number_hash_functions)
        ksi = KmerSignatureIndex(storage)

        assert ksi.lookup(["ATC"]) == {"ATC": bitarray("11")}
        print(ksi.lookup(["ATC", "ATC", "ATT"]))
        assert ksi.lookup(["ATC", "ATC", "ATT"]) == {
            "ATC": bitarray("11"),
            "ATT": bitarray("10"),
        }
        assert ksi.lookup(["ATC", "ATC", "ATT", "TTT"]) == {
            "ATC": bitarray("11"),
            "ATT": bitarray("10"),
            "TTT": bitarray("01"),
        }
예제 #4
0
파일: bigsi.py 프로젝트: wangdang511/BIGSI
 def build(cls, config, bloomfilters, samples):
     storage = get_storage(config)
     validate_build_params(bloomfilters, samples)
     logger.debug("Insert sample metadata")
     sm = SampleMetadata(storage).add_samples(samples)
     logger.debug("Create signature index")
     ksi = KmerSignatureIndex.create(
         storage,
         bloomfilters,
         config["m"],
         config["h"],
         config.get("low_mem_build", False),
     )
     storage.close()  ## Need to delete LOCK files before re init
     return cls(config)