def _get_puzzle_variant(self, gemini_variant, index): """Take a gemini variant and return a basic puzzle variant For the overview we only need limited variant information """ variant_dict = { 'CHROM':gemini_variant['chrom'].lstrip('chrCHR'), 'POS':str(gemini_variant['start']), 'ID':gemini_variant['rs_ids'], 'REF':gemini_variant['ref'], 'ALT':gemini_variant['alt'], 'QUAL':gemini_variant['qual'], 'FILTER':gemini_variant['filter'] } variant = Variant(**variant_dict) variant['index'] = index # Use the gemini id for fast search variant.update_variant_id(gemini_variant['variant_id']) #Add the most severe consequence variant['most_severe_consequence'] = gemini_variant['impact_so'] #Add the impact severity variant['impact_severity'] = gemini_variant['impact_severity'] max_freq = gemini_variant['max_aaf_all'] if max_freq: variant.set_max_freq(max_freq) #### Check the impact annotations #### if gemini_variant['cadd_scaled']: variant['cadd_score'] = gemini_variant['cadd_scaled'] return variant
def _format_variant(self, gemini_variant, individual_objs, index=0): """Make a puzzle variant from a gemini variant Args: gemini_variant (GeminiQueryRow): The gemini variant individual_objs (list(dict)): A list of Individuals index(int): The index of the variant Returns: variant (dict): A Variant object """ variant_dict = { 'CHROM':gemini_variant['chrom'].lstrip('chrCHR'), 'POS':str(gemini_variant['start']), 'ID':gemini_variant['rs_ids'], 'REF':gemini_variant['ref'], 'ALT':gemini_variant['alt'], 'QUAL':gemini_variant['qual'], 'FILTER':gemini_variant['filter'] } variant = Variant(**variant_dict) variant['index'] = index # Use the gemini id for fast search variant.update_variant_id(gemini_variant['variant_id']) # Update the individuals individual_genotypes = self._get_genotypes( gemini_variant=gemini_variant, individual_objs=individual_objs ) for individual in individual_genotypes: # Add the genotype calls to the variant variant.add_individual(individual) for transcript in self._get_transcripts(gemini_variant): variant.add_transcript(transcript) #Add the most severe consequence variant['most_severe_consequence'] = gemini_variant['impact_so'] for gene in self._get_genes(variant): variant.add_gene(gene) variant['start'] = int(variant_dict['POS']) if self.variant_type == 'sv': other_chrom = variant['CHROM'] # If we have a translocation: if ':' in variant_dict['ALT']: other_coordinates = variant_dict['ALT'].strip('ACGTN[]').split(':') other_chrom = other_coordinates[0].lstrip('chrCHR') other_position = other_coordinates[1] variant['stop'] = other_position #Set 'infinity' to length if translocation variant['sv_len'] = float('inf') variant['sv_type'] = 'BND' else: variant['stop'] = int(gemini_variant['end']) variant['sv_len'] = variant['stop'] - variant['start'] variant['sv_type'] = gemini_variant['sub_type'] variant['stop_chrom'] = other_chrom else: variant['stop'] = int(variant_dict['POS']) + \ (len(variant_dict['REF']) - len(variant_dict['ALT'])) variant['cytoband_start'] = get_cytoband_coord( chrom=variant['CHROM'], pos=variant['start']) if variant.get('stop_chrom'): variant['cytoband_stop'] = get_cytoband_coord( chrom=variant['stop_chrom'], pos=variant['stop']) #### Check the impact annotations #### if gemini_variant['cadd_scaled']: variant['cadd_score'] = gemini_variant['cadd_scaled'] # We use the prediction in text polyphen = gemini_variant['polyphen_pred'] if polyphen: variant.add_severity('Polyphen', polyphen) # We use the prediction in text sift = gemini_variant['sift_pred'] if sift: variant.add_severity('SIFT', sift) #### Check the frequencies #### thousand_g = gemini_variant['aaf_1kg_all'] if thousand_g: variant['thousand_g'] = float(thousand_g) variant.add_frequency(name='1000GAF', value=float(thousand_g)) exac = gemini_variant['aaf_exac_all'] if exac: variant.add_frequency(name='EXaC', value=float(exac)) esp = gemini_variant['aaf_esp_all'] if esp: variant.add_frequency(name='ESP', value=float(esp)) max_freq = gemini_variant['max_aaf_all'] if max_freq: variant.set_max_freq(max_freq) return variant
def _format_variant(self, gemini_variant, individual_objs, index=0, add_all_info=False): """Make a puzzle variant from a gemini variant Args: gemini_variant (GeminiQueryRow): The gemini variant individual_objs (list(dict)): A list of Individuals index(int): The index of the variant Returns: variant (dict): A Variant object """ chrom = gemini_variant['chrom'] if chrom.startswith('chr') or chrom.startswith('CHR'): chrom = chrom[3:] variant_dict = { 'CHROM':chrom, 'POS':str(gemini_variant['start']), 'ID':gemini_variant['rs_ids'], 'REF':gemini_variant['ref'], 'ALT':gemini_variant['alt'], 'QUAL':gemini_variant['qual'], 'FILTER':gemini_variant['filter'] } variant = Variant(**variant_dict) # Use the gemini id for fast search variant.update_variant_id(gemini_variant['variant_id']) logger.debug("Creating a variant object of variant {0}".format( variant.variant_id)) variant['index'] = index # Add the most severe consequence self._add_most_severe_consequence(variant, gemini_variant) #Add the impact severity self._add_impact_severity(variant, gemini_variant) ### POSITON ANNOATTIONS ### variant.start = int(gemini_variant['start']) variant.stop = int(gemini_variant['end']) #Add the sv specific coordinates if self.variant_type == 'sv': variant.sv_type = gemini_variant['sub_type'] variant.stop = int(gemini_variant['end']) self._add_sv_coordinates(variant) else: ### Consequence and region annotations #Add the transcript information self._add_transcripts(variant, gemini_variant) self._add_thousand_g(variant, gemini_variant) self._add_exac(variant, gemini_variant) self._add_gmaf(variant, gemini_variant) #### Check the impact annotations #### if gemini_variant['cadd_scaled']: variant.cadd_score = gemini_variant['cadd_scaled'] # We use the prediction in text polyphen = gemini_variant['polyphen_pred'] if polyphen: variant.add_severity('Polyphen', polyphen) # We use the prediction in text sift = gemini_variant['sift_pred'] if sift: variant.add_severity('SIFT', sift) #Add the genes based on the hgnc symbols self._add_hgnc_symbols(variant) if self.variant_type == 'snv': self._add_genes(variant) self._add_consequences(variant) ### GENOTYPE ANNOATTIONS ### #Get the genotype info if add_all_info: self._add_genotypes(variant, gemini_variant, individual_objs) if self.variant_type == 'sv': self._add_genes(variant) return variant
def _format_variant(self, case_id, gemini_variant, individual_objs, index=0, add_all_info=False): """Make a puzzle variant from a gemini variant Args: case_id (str): related case id gemini_variant (GeminiQueryRow): The gemini variant individual_objs (list(dict)): A list of Individuals index(int): The index of the variant Returns: variant (dict): A Variant object """ chrom = gemini_variant['chrom'] if chrom.startswith('chr') or chrom.startswith('CHR'): chrom = chrom[3:] variant_dict = { 'CHROM': chrom, 'POS': str(gemini_variant['start']), 'ID': gemini_variant['rs_ids'], 'REF': gemini_variant['ref'], 'ALT': gemini_variant['alt'], 'QUAL': gemini_variant['qual'], 'FILTER': gemini_variant['filter'] } variant = Variant(**variant_dict) # Use the gemini id for fast search variant.update_variant_id(gemini_variant['variant_id']) logger.debug("Creating a variant object of variant {0}".format( variant.variant_id)) variant['index'] = index # Add the most severe consequence self._add_most_severe_consequence(variant, gemini_variant) #Add the impact severity self._add_impact_severity(variant, gemini_variant) ### POSITON ANNOATTIONS ### variant.start = int(gemini_variant['start']) variant.stop = int(gemini_variant['end']) #Add the sv specific coordinates if self.variant_type == 'sv': variant.sv_type = gemini_variant['sub_type'] variant.stop = int(gemini_variant['end']) self._add_sv_coordinates(variant) else: ### Consequence and region annotations #Add the transcript information self._add_transcripts(variant, gemini_variant) self._add_thousand_g(variant, gemini_variant) self._add_exac(variant, gemini_variant) self._add_gmaf(variant, gemini_variant) #### Check the impact annotations #### if gemini_variant['cadd_scaled']: variant.cadd_score = gemini_variant['cadd_scaled'] # We use the prediction in text polyphen = gemini_variant['polyphen_pred'] if polyphen: variant.add_severity('Polyphen', polyphen) # We use the prediction in text sift = gemini_variant['sift_pred'] if sift: variant.add_severity('SIFT', sift) #Add the genes based on the hgnc symbols self._add_hgnc_symbols(variant) if self.variant_type == 'snv': self._add_genes(variant) self._add_consequences(variant) ### GENOTYPE ANNOATTIONS ### #Get the genotype info if add_all_info: self._add_genotypes(variant, gemini_variant, case_id, individual_objs) if self.variant_type == 'sv': self._add_genes(variant) return variant