def get_vcf_chromosome_map(datasets, chromosome): all_vcfs = list(set(loc for d in datasets for loc in d['vcfLocations']['SS'])) vcf_chromosome_map = {} for vcf in all_vcfs: vcf_chromosomes = get_vcf_chromosomes(vcf) vcf_chromosome_map[vcf] = get_matching_chromosome(vcf_chromosomes, chromosome) return vcf_chromosome_map
def get_translated_regions(location): vcf_chromosomes = get_vcf_chromosomes(location) vcf_regions = [] for target_chromosome, region_list in regions.items(): chromosome = get_matching_chromosome(vcf_chromosomes, target_chromosome) if not chromosome: continue vcf_regions += ['{}:{}'.format(chromosome, region) for region in region_list] return vcf_regions
def get_translated_regions(location): vcf_chromosomes = get_vcf_chromosomes(location) regions = [] for target_chromosome in CHROMOSOMES: chromosome = get_matching_chromosome(vcf_chromosomes, target_chromosome) if not chromosome: continue start = 0 length, _ = vcf_chromosomes[chromosome] length_mbp = length / 1000000 while start < length_mbp: mbp_left = (length_mbp - start) region_size = min(MAX_SLICE_SIZE_MBP, mbp_left) if int(region_size) == region_size: region_size = int(region_size) regions.append(('{}:{}'.format(chromosome, start), region_size)) start += region_size return regions