def single_entry_handling(gene, genomicHGVS): check_file_status() vcf_utils.validate_gene(gene, genes_set) variants_list = [[gene, genomicHGVS]] output_file = os.path.join( json_data['output'], ''.join(['{}-{}'.format(gene, genomicHGVS), '.vcf'])) process_variants(variants_list, output_file)
def test_validate_gene(self): test_genes = { "correct": ["ALK", "BRCA1"], "incorrect": ["POLOP", "GHOSH"] } for gene in test_genes["correct"]: self.assertFalse(vcf_utils.validate_gene(gene, genes_set)) for gene in test_genes["incorrect"]: self.assertTrue(vcf_utils.validate_gene(gene, genes_set))
def create_non_substitution_entries(output, others): for variant in others: ENTRY = copy.deepcopy(json_data['vcf_entry']) gene, gHGVS = [variant[0], variant[1]] isGeneInvalid = vcf_utils.validate_gene(gene, genes_set) if isGeneInvalid: print(messages['error_messages']['INVALID_GENE'].format(gene)) print(messages['error_messages']['VARIANT_SKIPPED'].format( gene, gHGVS)) continue chrom = vcf_utils.get_chromosome(gene, gene_chrom_dict) if 'delins' in gHGVS: (pos, ref, alt) = variant_functions.delins_handling(gHGVS, chrom, genome) elif 'dup' in gHGVS: (pos, ref, alt) = variant_functions.duplication_handling( gHGVS, chrom, genome) elif 'del' in gHGVS and 'ins' not in gHGVS: (pos, ref, alt) = variant_functions.deletion_handling(gHGVS, chrom, genome) else: (pos, ref, alt) = variant_functions.insertion_handling(gHGVS, chrom, genome) ENTRY['#CHROM'] = chrom ENTRY['POS'] = pos ENTRY['REF'] = ref ENTRY['ALT'] = alt field_values = [str(ENTRY[i]) for i in json_data['vcf_header']] output.write(sep.join(field_values)) output.write('\n')
def create_substitution_entries(output, subs): for variant in subs: ENTRY = copy.deepcopy(json_data['vcf_entry']) gene, gHGVS = variant.values() isGeneInvalid = vcf_utils.validate_gene(gene, genes_set) if isGeneInvalid: print(messages['error_messages']['INVALID_GENE'].format(gene)) print(messages['error_messages']['VARIANT_SKIPPED'].format( gene, gHGVS)) continue ref_alt = re.findall(r'[A-Z]', gHGVS) position = ''.join(re.findall(r'[0-9]+', gHGVS)) ref, alt = [ref_alt[0], ref_alt[1]] chromosome = vcf_utils.get_chromosome(gene, gene_chrom_dict) (isPositionInvalid, actual_ref) = vcf_utils.validate_position( { "chrom": chromosome, "pos": position, "ref": ref }, genome) if isPositionInvalid: print(messages['error_messages']['INVALID_GENOMIC_HGVS'].format( gHGVS)) print(messages['error_messages']['REF_MISMATCH'].format( position, ref, actual_ref)) print(messages['error_messages']['VARIANT_SKIPPED'].format( gene, gHGVS)) continue ENTRY['#CHROM'] = chromosome ENTRY['POS'] = position ENTRY['REF'] = ref ENTRY['ALT'] = alt field_values = [ENTRY[i] for i in json_data['vcf_header']] output.write(sep.join(field_values)) output.write('\n')