def test_region_partition_large_region(vcf_variants_loaders, temp_dirname): fvars = vcf_variants_loaders("backends/partition")[0] partition_desc = ParquetPartitionDescriptor(["1", "2"], 10000000, root_dirname=temp_dirname) parquet_writer = VariantsParquetWriter(fvars, partition_desc) assert parquet_writer is not None parquet_writer.write_dataset() assert os.path.exists(os.path.join(temp_dirname, "region_bin=1_0")) assert os.path.exists(os.path.join(temp_dirname, "region_bin=2_0")) assert os.path.exists( os.path.join(temp_dirname, "region_bin=1_0/variants_region_bin_1_0.parquet")) assert os.path.exists( os.path.join(temp_dirname, "region_bin=2_0/variants_region_bin_2_0.parquet"))
def test_region_partition_small_region(vcf_variants_loaders, temp_dirname): fvars = vcf_variants_loaders("backends/partition")[0] partition_desc = ParquetPartitionDescriptor(["1", "2"], 10, root_dirname=temp_dirname) parquet_writer = VariantsParquetWriter(fvars, partition_desc) assert parquet_writer is not None parquet_writer.write_dataset() assert os.path.exists(os.path.join(temp_dirname, "region_bin=1_86558")) assert os.path.exists(os.path.join(temp_dirname, "region_bin=1_86562")) assert os.path.exists(os.path.join(temp_dirname, "region_bin=1_86566")) assert os.path.exists(os.path.join(temp_dirname, "region_bin=1_86569")) assert os.path.exists(os.path.join(temp_dirname, "region_bin=1_87810")) assert os.path.exists(os.path.join(temp_dirname, "region_bin=1_90192")) assert os.path.exists(os.path.join(temp_dirname, "region_bin=1_90595")) assert os.path.exists(os.path.join(temp_dirname, "region_bin=1_122251")) assert os.path.exists(os.path.join(temp_dirname, "region_bin=2_86558")) assert os.path.exists(os.path.join(temp_dirname, "region_bin=2_86562")) assert os.path.exists(os.path.join(temp_dirname, "region_bin=2_86566")) assert os.path.exists(os.path.join(temp_dirname, "region_bin=2_86569")) assert os.path.exists(os.path.join(temp_dirname, "region_bin=2_87810")) assert os.path.exists(os.path.join(temp_dirname, "region_bin=2_90192")) assert os.path.exists(os.path.join(temp_dirname, "region_bin=2_90595")) assert os.path.exists(os.path.join(temp_dirname, "region_bin=2_122251")) assert os.path.exists( os.path.join( temp_dirname, "region_bin=1_90595/variants_region_bin_1_90595.parquet", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=2_122251/variants_region_bin_2_122251.parquet", ))
def test_region_family_frequency(vcf_variants_loaders, temp_dirname): fvars = vcf_variants_loaders("backends/partition")[0] temp_dirname = temp_dirname partition_desc = ParquetPartitionDescriptor( ["1", "2"], 100000, family_bin_size=100, rare_boundary=30, root_dirname=temp_dirname, ) parquet_writer = VariantsParquetWriter(fvars, partition_desc) assert parquet_writer is not None parquet_writer.write_dataset() assert os.path.exists( os.path.join( temp_dirname, "region_bin=1_8", "frequency_bin=2", "family_bin=6", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=1_8", "frequency_bin=2", "family_bin=69", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=1_9", "frequency_bin=2", "family_bin=6", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=1_9", "frequency_bin=2", "family_bin=69", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=1_12", "frequency_bin=3", "family_bin=6", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=1_12", "frequency_bin=3", "family_bin=69", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=2_8", "frequency_bin=2", "family_bin=6", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=2_8", "frequency_bin=2", "family_bin=69", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=2_9", "frequency_bin=2", "family_bin=6", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=2_9", "frequency_bin=2", "family_bin=69", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=2_12", "frequency_bin=3", "family_bin=6", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=2_12", "frequency_bin=3", "family_bin=69", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=1_9", "frequency_bin=2", "family_bin=6", "variants_region_bin_1_9_frequency_bin_2_family_bin_6.parquet", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=2_12", "frequency_bin=3", "family_bin=6", "variants_region_bin_2_12_frequency_bin_3_family_bin_6.parquet", ))
def test_all(vcf_variants_loaders, temp_dirname): fvars = vcf_variants_loaders("backends/partition")[0] partition_desc = ParquetPartitionDescriptor( ["1", "2"], 100000, family_bin_size=100, coding_effect_types=[ "missense", "nonsense", "frame-shift", "synonymous", ], rare_boundary=30, root_dirname=temp_dirname, ) parquet_writer = VariantsParquetWriter(fvars, partition_desc) assert parquet_writer is not None parquet_writer.write_dataset() assert os.path.exists( os.path.join( temp_dirname, "region_bin=1_8", "frequency_bin=2", "coding_bin=1", "family_bin=6", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=1_8", "frequency_bin=2", "coding_bin=1", "family_bin=69", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=1_9", "frequency_bin=2", "coding_bin=1", "family_bin=6", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=1_9", "frequency_bin=2", "coding_bin=1", "family_bin=69", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=1_12", "frequency_bin=3", "coding_bin=1", "family_bin=6", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=1_12", "frequency_bin=3", "coding_bin=1", "family_bin=69", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=2_8", "frequency_bin=2", "coding_bin=0", "family_bin=6", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=2_8", "frequency_bin=2", "coding_bin=0", "family_bin=69", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=2_9", "frequency_bin=2", "coding_bin=0", "family_bin=6", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=2_9", "frequency_bin=2", "coding_bin=0", "family_bin=69", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=2_12", "frequency_bin=3", "coding_bin=0", "family_bin=6", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=2_12", "frequency_bin=3", "coding_bin=0", "family_bin=69", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=1_9", "frequency_bin=2", "coding_bin=1", "family_bin=6", "variants_region_bin_1_9_frequency_bin_2_coding_bin_1" "_family_bin_6.parquet", )) assert os.path.exists( os.path.join( temp_dirname, "region_bin=2_12", "frequency_bin=3", "coding_bin=0", "family_bin=6", "variants_region_bin_2_12_frequency_bin_3_coding_bin_0" "_family_bin_6.parquet", ))