Ejemplo n.º 1
0
    def _format_variant(self, gemini_variant, individual_objs, index=0, 
                        add_all_info=False):
        """Make a puzzle variant from a gemini variant

            Args:
                gemini_variant (GeminiQueryRow): The gemini variant
                individual_objs (list(dict)): A list of Individuals
                index(int): The index of the variant

            Returns:
                variant (dict): A Variant object
        """
        chrom = gemini_variant['chrom']
        if chrom.startswith('chr') or chrom.startswith('CHR'):
            chrom = chrom[3:]
        
        variant_dict = {
            'CHROM':chrom,
            'POS':str(gemini_variant['start']),
            'ID':gemini_variant['rs_ids'],
            'REF':gemini_variant['ref'],
            'ALT':gemini_variant['alt'],
            'QUAL':gemini_variant['qual'],
            'FILTER':gemini_variant['filter']
        }

        variant = Variant(**variant_dict)
        
        # Use the gemini id for fast search
        variant.update_variant_id(gemini_variant['variant_id'])
        logger.debug("Creating a variant object of variant {0}".format(
            variant.variant_id))
        
        variant['index'] = index
        
        # Add the most severe consequence
        self._add_most_severe_consequence(variant, gemini_variant)

        #Add the impact severity
        self._add_impact_severity(variant, gemini_variant)
        ### POSITON ANNOATTIONS ###
        variant.start = int(gemini_variant['start'])
        variant.stop = int(gemini_variant['end'])

        #Add the sv specific coordinates
        if self.variant_type == 'sv':
            variant.sv_type = gemini_variant['sub_type']
            variant.stop = int(gemini_variant['end'])
            self._add_sv_coordinates(variant)

        else:
            ### Consequence and region annotations
            #Add the transcript information
            self._add_transcripts(variant, gemini_variant)
            self._add_thousand_g(variant, gemini_variant)
            self._add_exac(variant, gemini_variant)
            self._add_gmaf(variant, gemini_variant)
            #### Check the impact annotations ####
            if gemini_variant['cadd_scaled']:
                variant.cadd_score = gemini_variant['cadd_scaled']

            # We use the prediction in text
            polyphen = gemini_variant['polyphen_pred']
            if polyphen:
                variant.add_severity('Polyphen', polyphen)

            # We use the prediction in text
            sift = gemini_variant['sift_pred']
            if sift:
                variant.add_severity('SIFT', sift)

        #Add the genes based on the hgnc symbols
        self._add_hgnc_symbols(variant)
        if self.variant_type == 'snv':
            self._add_genes(variant)

        self._add_consequences(variant)

        ### GENOTYPE ANNOATTIONS ###
        #Get the genotype info
        if add_all_info:
            self._add_genotypes(variant, gemini_variant, individual_objs)
            if self.variant_type == 'sv':
                self._add_genes(variant)
        
        return variant
Ejemplo n.º 2
0
    def _format_variant(self, gemini_variant, individual_objs, index=0):
        """Make a puzzle variant from a gemini variant

            Args:
                gemini_variant (GeminiQueryRow): The gemini variant
                individual_objs (list(dict)): A list of Individuals
                index(int): The index of the variant

            Returns:
                variant (dict): A Variant object
        """
        variant_dict = {
            'CHROM':gemini_variant['chrom'].lstrip('chrCHR'),
            'POS':str(gemini_variant['start']),
            'ID':gemini_variant['rs_ids'],
            'REF':gemini_variant['ref'],
            'ALT':gemini_variant['alt'],
            'QUAL':gemini_variant['qual'],
            'FILTER':gemini_variant['filter']
        }

        variant = Variant(**variant_dict)
        variant['index'] = index

        # Use the gemini id for fast search
        variant.update_variant_id(gemini_variant['variant_id'])
        # Update the individuals
        individual_genotypes = self._get_genotypes(
            gemini_variant=gemini_variant,
            individual_objs=individual_objs
            )

        for individual in individual_genotypes:
            # Add the genotype calls to the variant
            variant.add_individual(individual)

        for transcript in self._get_transcripts(gemini_variant):
            variant.add_transcript(transcript)

        #Add the most severe consequence
        variant['most_severe_consequence'] = gemini_variant['impact_so']

        for gene in self._get_genes(variant):
            variant.add_gene(gene)

        variant['start'] = int(variant_dict['POS'])

        if self.variant_type == 'sv':
            other_chrom = variant['CHROM']
            # If we have a translocation:
            if ':' in variant_dict['ALT']:
                other_coordinates = variant_dict['ALT'].strip('ACGTN[]').split(':')
                other_chrom = other_coordinates[0].lstrip('chrCHR')
                other_position = other_coordinates[1]
                variant['stop'] = other_position

                #Set 'infinity' to length if translocation
                variant['sv_len'] = float('inf')
                variant['sv_type'] = 'BND'
            else:
                variant['stop'] = int(gemini_variant['end'])
                variant['sv_len'] = variant['stop'] - variant['start']
                variant['sv_type'] = gemini_variant['sub_type']

            variant['stop_chrom'] = other_chrom

        else:
            variant['stop'] = int(variant_dict['POS']) + \
                (len(variant_dict['REF']) - len(variant_dict['ALT']))

        variant['cytoband_start'] = get_cytoband_coord(
                                        chrom=variant['CHROM'],
                                        pos=variant['start'])

        if variant.get('stop_chrom'):
            variant['cytoband_stop'] = get_cytoband_coord(
                                        chrom=variant['stop_chrom'],
                                        pos=variant['stop'])


        #### Check the impact annotations ####
        if gemini_variant['cadd_scaled']:
            variant['cadd_score'] = gemini_variant['cadd_scaled']

        # We use the prediction in text
        polyphen = gemini_variant['polyphen_pred']
        if polyphen:
            variant.add_severity('Polyphen', polyphen)

        # We use the prediction in text
        sift = gemini_variant['sift_pred']
        if sift:
            variant.add_severity('SIFT', sift)

        #### Check the frequencies ####
        thousand_g = gemini_variant['aaf_1kg_all']
        if thousand_g:
            variant['thousand_g'] = float(thousand_g)
            variant.add_frequency(name='1000GAF', value=float(thousand_g))

        exac = gemini_variant['aaf_exac_all']
        if exac:
            variant.add_frequency(name='EXaC', value=float(exac))

        esp = gemini_variant['aaf_esp_all']
        if esp:
            variant.add_frequency(name='ESP', value=float(esp))

        max_freq = gemini_variant['max_aaf_all']
        if max_freq:
            variant.set_max_freq(max_freq)

        return variant
Ejemplo n.º 3
0
    def _format_variant(self,
                        case_id,
                        gemini_variant,
                        individual_objs,
                        index=0,
                        add_all_info=False):
        """Make a puzzle variant from a gemini variant

            Args:
                case_id (str): related case id
                gemini_variant (GeminiQueryRow): The gemini variant
                individual_objs (list(dict)): A list of Individuals
                index(int): The index of the variant

            Returns:
                variant (dict): A Variant object
        """
        chrom = gemini_variant['chrom']
        if chrom.startswith('chr') or chrom.startswith('CHR'):
            chrom = chrom[3:]

        variant_dict = {
            'CHROM': chrom,
            'POS': str(gemini_variant['start']),
            'ID': gemini_variant['rs_ids'],
            'REF': gemini_variant['ref'],
            'ALT': gemini_variant['alt'],
            'QUAL': gemini_variant['qual'],
            'FILTER': gemini_variant['filter']
        }

        variant = Variant(**variant_dict)

        # Use the gemini id for fast search
        variant.update_variant_id(gemini_variant['variant_id'])
        logger.debug("Creating a variant object of variant {0}".format(
            variant.variant_id))

        variant['index'] = index

        # Add the most severe consequence
        self._add_most_severe_consequence(variant, gemini_variant)

        #Add the impact severity
        self._add_impact_severity(variant, gemini_variant)
        ### POSITON ANNOATTIONS ###
        variant.start = int(gemini_variant['start'])
        variant.stop = int(gemini_variant['end'])

        #Add the sv specific coordinates
        if self.variant_type == 'sv':
            variant.sv_type = gemini_variant['sub_type']
            variant.stop = int(gemini_variant['end'])
            self._add_sv_coordinates(variant)

        else:
            ### Consequence and region annotations
            #Add the transcript information
            self._add_transcripts(variant, gemini_variant)
            self._add_thousand_g(variant, gemini_variant)
            self._add_exac(variant, gemini_variant)
            self._add_gmaf(variant, gemini_variant)
            #### Check the impact annotations ####
            if gemini_variant['cadd_scaled']:
                variant.cadd_score = gemini_variant['cadd_scaled']

            # We use the prediction in text
            polyphen = gemini_variant['polyphen_pred']
            if polyphen:
                variant.add_severity('Polyphen', polyphen)

            # We use the prediction in text
            sift = gemini_variant['sift_pred']
            if sift:
                variant.add_severity('SIFT', sift)

        #Add the genes based on the hgnc symbols
        self._add_hgnc_symbols(variant)
        if self.variant_type == 'snv':
            self._add_genes(variant)

        self._add_consequences(variant)

        ### GENOTYPE ANNOATTIONS ###
        #Get the genotype info
        if add_all_info:
            self._add_genotypes(variant, gemini_variant, case_id,
                                individual_objs)
            if self.variant_type == 'sv':
                self._add_genes(variant)

        return variant