Exemple #1
0
def test_build_chunks():
    N = 3
    bloomfilter_filepaths = ['bigsi/tests/data/test_kmers.bloom'] * N
    sample_names = generate_sample_names(len(bloomfilter_filepaths))

    bigsi1 = BIGSI.create(db="./db-bigsi-no-max-mem/",
                          m=10,
                          k=9,
                          h=1,
                          force=True)
    build(bloomfilter_filepaths, sample_names, bigsi1)

    bigsi2 = BIGSI.create(db="./db-bigsi-max-mem/", m=10, k=9, h=1, force=True)
    build(bloomfilter_filepaths, sample_names, bigsi2,
          max_memory=20)  # 20bytes

    # Reload and test equal
    bigsi1 = BIGSI("./db-bigsi-no-max-mem/")
    bigsi2 = BIGSI("./db-bigsi-max-mem")
    for i in range(10):
        assert bigsi1.graph[i] == bigsi2.graph[i]
    for k, v in bigsi2.metadata.items():
        assert bigsi1.metadata[k] == v

    bigsi1.delete_all()
    bigsi2.delete_all()
Exemple #2
0
def test_cant_build_chunks_if_max_memory_less_than_bf():
    N = 3
    bloomfilter_filepaths = ['bigsi/tests/data/test_kmers.bloom'] * N
    sample_names = generate_sample_names(len(bloomfilter_filepaths))

    bigsi2 = BIGSI.create(db="./db-bigsi-max-mem/", m=10, k=9, h=1, force=True)
    with pytest.raises(ValueError):
        build(bloomfilter_filepaths, sample_names, bigsi2,
              max_memory=1)  # 1byte (should fail)
Exemple #3
0
    def build(
        self,
        bloomfilters: hug.types.multiple,
        samples: hug.types.multiple = [],
        config: hug.types.text = None,
    ):
        config = get_config_from_file(config)

        if samples:
            assert len(samples) == len(bloomfilters)
        else:
            samples = bloomfilters

        if config.get("max_build_mem_bytes"):
            max_memory_bytes = humanfriendly.parse_size(
                config["max_build_mem_bytes"])
        else:
            max_memory_bytes = None

        return build(
            config=config,
            bloomfilter_filepaths=bloomfilters,
            samples=samples,
            max_memory=max_memory_bytes,
        )
Exemple #4
0
 def build(self,
           db: hug.types.text,
           bloomfilters: hug.types.multiple,
           samples: hug.types.multiple = []):
     if samples:
         assert len(samples) == len(bloomfilters)
     else:
         samples = bloomfilters
     return build(graph=BIGSI(db),
                  bloomfilter_filepaths=bloomfilters,
                  samples=samples)
Exemple #5
0
 def build(self,
           db: hug.types.text,
           bloomfilters: hug.types.multiple,
           samples: hug.types.multiple = [],
           max_memory: hug.types.text = '',
           lowmem: hug.types.smart_boolean = False):
     if samples:
         assert len(samples) == len(bloomfilters)
     else:
         samples = bloomfilters
     if max_memory:
         max_memory_bytes = humanfriendly.parse_size(max_memory)
     else:
         max_memory_bytes = None
     return build(index=BIGSI(db),
                  bloomfilter_filepaths=bloomfilters,
                  samples=samples,
                  max_memory=max_memory_bytes,
                  lowmem=lowmem)
Exemple #6
0
    def build(
        self,
        bloomfilters: hug.types.multiple = [],
        samples: hug.types.multiple = [],
        from_file: hug.types.text = None,
        config: hug.types.text = None,
    ):
        config = get_config_from_file(config)

        if from_file and bloomfilters:
            raise ValueError(
                "You can only specify blooms via from_file or bloomfilters, but not both"
            )
        elif from_file:
            samples = []
            bloomfilters = []
            with open(from_file, "r") as tsvfile:
                reader = csv.reader(tsvfile, delimiter="\t")
                for row in reader:
                    bloomfilters.append(row[0])
                    samples.append(row[1])
        if samples:
            assert len(samples) == len(bloomfilters)
        else:
            samples = bloomfilters

        if config.get("max_build_mem_bytes"):
            max_memory_bytes = humanfriendly.parse_size(
                config["max_build_mem_bytes"])
        else:
            max_memory_bytes = None

        return build(
            config=config,
            bloomfilter_filepaths=bloomfilters,
            samples=samples,
            max_memory=max_memory_bytes,
        )