Exemple #1
0
def test_build_url():
    assert build_url("http://host/path", "subpath") == "http://host/path/subpath"
    assert build_url("http://host/path/", "subpath") == "http://host/path/subpath"
    assert (
        build_url("http://host/path?a=b", "subpath") == "http://host/path/subpath?a=b"
    )
    assert (
        build_url("http://host/path/?a=b", "subpath") == "http://host/path/subpath?a=b"
    )
    assert build_url("http://host/path#a", "subpath") == "http://host/path/subpath#a"
    assert build_url("s3://host/path", "subpath") == "s3://host/path/subpath"
    assert build_url("relative_path/path", "subpath") == "relative_path/path/subpath"
    assert build_url("/absolute_path/path", "subpath") == "/absolute_path/path/subpath"
    assert (
        build_url("http://host/a%20path", "subpath") == "http://host/a%20path/subpath"
    )
    assert build_url("http://host/a path", "subpath") == "http://host/a%20path/subpath"
Exemple #2
0
                    f"For multiple inputs, multiple input regions must be a sequence of sequence of strings: {regions}"
                )
            input_regions = regions

    assert len(inputs) == len(input_regions)

    tasks = []
    parts = []
    for i, input in enumerate(inputs):
        filename = url_filename(str(input))
        input_region_list = input_regions[i]
        if input_region_list is None:
            # single partition case: make a list so the loop below works
            input_region_list = [None]  # type: ignore
        for r, region in enumerate(input_region_list):
            part_url = build_url(str(output), f"{filename}/part-{r}.zarr")
            output_part = fsspec.get_mapper(part_url, **output_storage_options)
            parts.append(part_url)
            task = dask.delayed(vcf_to_zarr_sequential)(
                input,
                output=output_part,
                region=region,
                chunk_length=chunk_length,
                chunk_width=chunk_width,
                ploidy=ploidy,
                mixed_ploidy=mixed_ploidy,
                truncate_calls=truncate_calls,
            )
            tasks.append(task)
    dask.compute(*tasks)
    return parts