def test_build_url(): assert build_url("http://host/path", "subpath") == "http://host/path/subpath" assert build_url("http://host/path/", "subpath") == "http://host/path/subpath" assert ( build_url("http://host/path?a=b", "subpath") == "http://host/path/subpath?a=b" ) assert ( build_url("http://host/path/?a=b", "subpath") == "http://host/path/subpath?a=b" ) assert build_url("http://host/path#a", "subpath") == "http://host/path/subpath#a" assert build_url("s3://host/path", "subpath") == "s3://host/path/subpath" assert build_url("relative_path/path", "subpath") == "relative_path/path/subpath" assert build_url("/absolute_path/path", "subpath") == "/absolute_path/path/subpath" assert ( build_url("http://host/a%20path", "subpath") == "http://host/a%20path/subpath" ) assert build_url("http://host/a path", "subpath") == "http://host/a%20path/subpath"
f"For multiple inputs, multiple input regions must be a sequence of sequence of strings: {regions}" ) input_regions = regions assert len(inputs) == len(input_regions) tasks = [] parts = [] for i, input in enumerate(inputs): filename = url_filename(str(input)) input_region_list = input_regions[i] if input_region_list is None: # single partition case: make a list so the loop below works input_region_list = [None] # type: ignore for r, region in enumerate(input_region_list): part_url = build_url(str(output), f"{filename}/part-{r}.zarr") output_part = fsspec.get_mapper(part_url, **output_storage_options) parts.append(part_url) task = dask.delayed(vcf_to_zarr_sequential)( input, output=output_part, region=region, chunk_length=chunk_length, chunk_width=chunk_width, ploidy=ploidy, mixed_ploidy=mixed_ploidy, truncate_calls=truncate_calls, ) tasks.append(task) dask.compute(*tasks) return parts