Example #1
0
    def _format_variant(self, gemini_variant, individual_objs, index=0, 
                        add_all_info=False):
        """Make a puzzle variant from a gemini variant

            Args:
                gemini_variant (GeminiQueryRow): The gemini variant
                individual_objs (list(dict)): A list of Individuals
                index(int): The index of the variant

            Returns:
                variant (dict): A Variant object
        """
        chrom = gemini_variant['chrom']
        if chrom.startswith('chr') or chrom.startswith('CHR'):
            chrom = chrom[3:]
        
        variant_dict = {
            'CHROM':chrom,
            'POS':str(gemini_variant['start']),
            'ID':gemini_variant['rs_ids'],
            'REF':gemini_variant['ref'],
            'ALT':gemini_variant['alt'],
            'QUAL':gemini_variant['qual'],
            'FILTER':gemini_variant['filter']
        }

        variant = Variant(**variant_dict)
        
        # Use the gemini id for fast search
        variant.update_variant_id(gemini_variant['variant_id'])
        logger.debug("Creating a variant object of variant {0}".format(
            variant.variant_id))
        
        variant['index'] = index
        
        # Add the most severe consequence
        self._add_most_severe_consequence(variant, gemini_variant)

        #Add the impact severity
        self._add_impact_severity(variant, gemini_variant)
        ### POSITON ANNOATTIONS ###
        variant.start = int(gemini_variant['start'])
        variant.stop = int(gemini_variant['end'])

        #Add the sv specific coordinates
        if self.variant_type == 'sv':
            variant.sv_type = gemini_variant['sub_type']
            variant.stop = int(gemini_variant['end'])
            self._add_sv_coordinates(variant)

        else:
            ### Consequence and region annotations
            #Add the transcript information
            self._add_transcripts(variant, gemini_variant)
            self._add_thousand_g(variant, gemini_variant)
            self._add_exac(variant, gemini_variant)
            self._add_gmaf(variant, gemini_variant)
            #### Check the impact annotations ####
            if gemini_variant['cadd_scaled']:
                variant.cadd_score = gemini_variant['cadd_scaled']

            # We use the prediction in text
            polyphen = gemini_variant['polyphen_pred']
            if polyphen:
                variant.add_severity('Polyphen', polyphen)

            # We use the prediction in text
            sift = gemini_variant['sift_pred']
            if sift:
                variant.add_severity('SIFT', sift)

        #Add the genes based on the hgnc symbols
        self._add_hgnc_symbols(variant)
        if self.variant_type == 'snv':
            self._add_genes(variant)

        self._add_consequences(variant)

        ### GENOTYPE ANNOATTIONS ###
        #Get the genotype info
        if add_all_info:
            self._add_genotypes(variant, gemini_variant, individual_objs)
            if self.variant_type == 'sv':
                self._add_genes(variant)
        
        return variant
Example #2
0
    def _format_variants(self, variant, index, case_obj, add_all_info=False):
        """Return a Variant object

        Format variant make a variant that includes enough information for
        the variant view.
        If add_all_info then all transcripts will be parsed

        Args:
            variant (cython2.Variant): A variant object
            index (int): The index of the variant
            case_obj (puzzle.models.Case): A case object

        """
        header_line = self.head.header
        # Get the individual ids for individuals in vcf file
        vcf_individuals = set([ind_id for ind_id in self.head.individuals])

        #Create a info dict:
        info_dict = dict(variant.INFO)

        chrom = variant.CHROM
        if chrom.startswith('chr') or chrom.startswith('CHR'):
            chrom = chrom[3:]

        variant_obj = Variant(
            CHROM=chrom,
            POS=variant.POS,
            ID=variant.ID,
            REF=variant.REF,
            ALT=variant.ALT[0],
            QUAL=variant.QUAL,
            FILTER=variant.FILTER,
        )
        variant_obj._set_variant_id()

        logger.debug("Creating a variant object of variant {0}".format(
            variant_obj.variant_id))

        variant_obj.index = index
        logger.debug("Updating index to: {0}".format(index))

        ########### Get the coordinates for the variant ##############
        variant_obj.start = variant.start
        variant_obj.stop = variant.end

        #SV variants needs to be handeled a bit different since the can be huge
        #it would take to much power to parse all vep/snpeff entrys for these.
        if self.variant_type == 'sv':
            variant_obj.stop = int(info_dict.get('END', variant_obj.POS))
            self._add_sv_coordinates(variant_obj)
            variant_obj.sv_type = info_dict.get('SVTYPE')

            # Special for FindSV software:
            # SV specific tag for number of occurances
            occurances = info_dict.get('OCC')
            if occurances:
                logger.debug("Updating occurances to: {0}".format(occurances))
                variant_obj['occurances'] = float(occurances)
                variant_obj.add_frequency('OCC', occurances)

        else:
            self._add_thousand_g(variant_obj, info_dict)
            self._add_cadd_score(variant_obj, info_dict)
            self._add_genetic_models(variant_obj, info_dict)
            self._add_transcripts(variant_obj, info_dict)
            self._add_exac(variant_obj, info_dict)

        self._add_hgnc_symbols(variant_obj)

        if add_all_info:
            self._add_genotype_calls(variant_obj, str(variant), case_obj)
            self._add_compounds(variant_obj, info_dict)
            self._add_gmaf(variant_obj, info_dict)
            self._add_genes(variant_obj)

        ##### Add consequences ####
        self._add_consequences(variant_obj, str(variant))
        self._add_most_severe_consequence(variant_obj)
        self._add_impact_severity(variant_obj)
        self._add_rank_score(variant_obj, info_dict)
        variant_obj.set_max_freq()
        return variant_obj
Example #3
0
    def _format_variant(self,
                        case_id,
                        gemini_variant,
                        individual_objs,
                        index=0,
                        add_all_info=False):
        """Make a puzzle variant from a gemini variant

            Args:
                case_id (str): related case id
                gemini_variant (GeminiQueryRow): The gemini variant
                individual_objs (list(dict)): A list of Individuals
                index(int): The index of the variant

            Returns:
                variant (dict): A Variant object
        """
        chrom = gemini_variant['chrom']
        if chrom.startswith('chr') or chrom.startswith('CHR'):
            chrom = chrom[3:]

        variant_dict = {
            'CHROM': chrom,
            'POS': str(gemini_variant['start']),
            'ID': gemini_variant['rs_ids'],
            'REF': gemini_variant['ref'],
            'ALT': gemini_variant['alt'],
            'QUAL': gemini_variant['qual'],
            'FILTER': gemini_variant['filter']
        }

        variant = Variant(**variant_dict)

        # Use the gemini id for fast search
        variant.update_variant_id(gemini_variant['variant_id'])
        logger.debug("Creating a variant object of variant {0}".format(
            variant.variant_id))

        variant['index'] = index

        # Add the most severe consequence
        self._add_most_severe_consequence(variant, gemini_variant)

        #Add the impact severity
        self._add_impact_severity(variant, gemini_variant)
        ### POSITON ANNOATTIONS ###
        variant.start = int(gemini_variant['start'])
        variant.stop = int(gemini_variant['end'])

        #Add the sv specific coordinates
        if self.variant_type == 'sv':
            variant.sv_type = gemini_variant['sub_type']
            variant.stop = int(gemini_variant['end'])
            self._add_sv_coordinates(variant)

        else:
            ### Consequence and region annotations
            #Add the transcript information
            self._add_transcripts(variant, gemini_variant)
            self._add_thousand_g(variant, gemini_variant)
            self._add_exac(variant, gemini_variant)
            self._add_gmaf(variant, gemini_variant)
            #### Check the impact annotations ####
            if gemini_variant['cadd_scaled']:
                variant.cadd_score = gemini_variant['cadd_scaled']

            # We use the prediction in text
            polyphen = gemini_variant['polyphen_pred']
            if polyphen:
                variant.add_severity('Polyphen', polyphen)

            # We use the prediction in text
            sift = gemini_variant['sift_pred']
            if sift:
                variant.add_severity('SIFT', sift)

        #Add the genes based on the hgnc symbols
        self._add_hgnc_symbols(variant)
        if self.variant_type == 'snv':
            self._add_genes(variant)

        self._add_consequences(variant)

        ### GENOTYPE ANNOATTIONS ###
        #Get the genotype info
        if add_all_info:
            self._add_genotypes(variant, gemini_variant, case_id,
                                individual_objs)
            if self.variant_type == 'sv':
                self._add_genes(variant)

        return variant