コード例 #1
0
ファイル: genotype.py プロジェクト: robinandeer/puzzle
    def _add_genotype_calls(self, variant_obj, variant_line, case_obj):
        """Add the genotype calls for the variant

        Args:
            variant_obj (puzzle.models.Variant)
            variant_dict (dict): A variant dictionary
            case_obj (puzzle.models.Case)

        """
        variant_line = variant_line.split('\t')
        #if there is gt calls we have no individuals to add
        if len(variant_line) > 8:
            gt_format = variant_line[8].split(':')
            for individual in case_obj.individuals:
                sample_id = individual.ind_id
                index = individual.ind_index

                gt_call = variant_line[9+index].split(':')

                raw_call = dict(zip(gt_format, gt_call))

                genotype = Genotype(**raw_call)

                variant_obj.add_individual(puzzle_genotype(
                    sample_id = sample_id,
                    genotype = genotype.genotype,
                    case_id = case_obj.name,
                    phenotype = individual.phenotype,
                    ref_depth = genotype.ref_depth,
                    alt_depth = genotype.alt_depth,
                    genotype_quality = genotype.genotype_quality,
                    depth = genotype.depth_of_coverage,
                    supporting_evidence = genotype.supporting_evidence,
                    pe_support = genotype.pe_support,
                    sr_support = genotype.sr_support,
                ))
コード例 #2
0
ファイル: variant_mixin.py プロジェクト: dnil/puzzle
    def _format_variant(self, variant_line, index, case_obj, head):
        """Return variant objects

            Args:
                raw_variants (Iterable): An iterable with variant lines
                case_obj (puzzle.nodels.Case): A case object

        """
        header_line = head.header

        # Get the individual ids for individuals in vcf file
        vcf_individuals = set([ind_id for ind_id in head.individuals])

        variant_columns = ['CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER']

        vep_header = head.vep_columns
        snpeff_header = head.snpeff_columns

        #Create a variant dict:
        variant_dict =  get_variant_dict(
            variant_line = variant_line,
            header_line = header_line
        )
        variant_dict['CHROM'] = variant_dict['CHROM'].lstrip('chrCHR')
        #Crreate a info dict:
        info_dict = get_info_dict(
            info_line = variant_dict['INFO']
        )
        #Check if vep annotation:
        vep_string = info_dict.get('CSQ')

        #Check if snpeff annotation:
        snpeff_string = info_dict.get('ANN')

        if vep_string:
            #Get the vep annotations
            vep_info = get_vep_info(
                vep_string = vep_string,
                vep_header = vep_header
            )

        elif snpeff_string:
            #Get the vep annotations
            snpeff_info = get_snpeff_info(
                snpeff_string = snpeff_string,
                snpeff_header = snpeff_header
            )

        variant = Variant(
            **{column: variant_dict.get(column, '.')
                for column in variant_columns}
            )

        logger.debug("Creating a variant object of variant {0}".format(
            variant.get('variant_id')))

        variant['index'] = index
        logger.debug("Updating index to: {0}".format(
            index))

        variant['start'] = int(variant_dict['POS'])


        if self.variant_type == 'sv':
            other_chrom = variant['CHROM']
            # If we have a translocation:
            if ':' in variant_dict['ALT'] and not '<' in variant_dict['ALT']:
                other_coordinates = variant_dict['ALT'].strip('ACGTN[]').split(':')
                other_chrom = other_coordinates[0].lstrip('chrCHR')
                other_position = other_coordinates[1]
                variant['stop'] = other_position

                #Set 'infinity' to length if translocation
                variant['sv_len'] = float('inf')
            else:
                variant['stop'] = int(info_dict.get('END', variant_dict['POS']))
                variant['sv_len'] = variant['stop'] - variant['start']

            variant['stop_chrom'] = other_chrom

        else:
            variant['stop'] = int(variant_dict['POS']) + \
                (len(variant_dict['REF']) - len(variant_dict['ALT']))

        variant['sv_type'] = info_dict.get('SVTYPE')
        variant['cytoband_start'] = get_cytoband_coord(
                                        chrom=variant['CHROM'],
                                        pos=variant['start'])
        if variant.get('stop_chrom'):
            variant['cytoband_stop'] = get_cytoband_coord(
                                        chrom=variant['stop_chrom'],
                                        pos=variant['stop'])

        # It would be easy to update these keys...
        thousand_g = info_dict.get('1000GAF')
        if thousand_g:
            logger.debug("Updating thousand_g to: {0}".format(
                thousand_g))
            variant['thousand_g'] = float(thousand_g)
            variant.add_frequency('1000GAF', variant.get('thousand_g'))

        #SV specific tag for number of occurances
        occurances = info_dict.get('OCC')
        if occurances:
            logger.debug("Updating occurances to: {0}".format(
                occurances))
            variant['occurances'] = float(occurances)
            variant.add_frequency('OCC', occurances)

        cadd_score = info_dict.get('CADD')
        if cadd_score:
            logger.debug("Updating cadd_score to: {0}".format(
                cadd_score))
            variant['cadd_score'] = float(cadd_score)

        rank_score_entry = info_dict.get('RankScore')
        if rank_score_entry:
            for family_annotation in rank_score_entry.split(','):
                rank_score = family_annotation.split(':')[-1]
            logger.debug("Updating rank_score to: {0}".format(
                rank_score))
            variant['rank_score'] = float(rank_score)

        genetic_models_entry = info_dict.get('GeneticModels')
        if genetic_models_entry:
            genetic_models = []
            for family_annotation in genetic_models_entry.split(','):
                for genetic_model in family_annotation.split(':')[-1].split('|'):
                    genetic_models.append(genetic_model)
            logger.debug("Updating rank_score to: {0}".format(
                rank_score))
            variant['genetic_models'] = genetic_models

        #Add genotype calls:
        for individual in case_obj.individuals:
            sample_id = individual.ind_id

            if sample_id in vcf_individuals:

                raw_call = dict(zip(
                    variant_dict['FORMAT'].split(':'),
                    variant_dict[sample_id].split(':'))
                )

                genotype = Genotype(**raw_call)

                variant.add_individual(puzzle_genotype(
                    sample_id = sample_id,
                    genotype = genotype.genotype,
                    case_id = individual.case_name,
                    phenotype = individual.phenotype,
                    ref_depth = genotype.ref_depth,
                    alt_depth = genotype.alt_depth,
                    genotype_quality = genotype.genotype_quality,
                    depth = genotype.depth_of_coverage,
                    supporting_evidence = genotype.supporting_evidence,
                    pe_support = genotype.pe_support,
                    sr_support = genotype.sr_support,
                ))

        # Add transcript information:
        gmaf = None
        if vep_string:
            for transcript_info in vep_info:
                transcript = self._get_vep_transcripts(transcript_info)
                gmaf_raw = transcript_info.get('GMAF')
                if gmaf_raw:
                    gmaf = float(gmaf_raw.split(':')[-1])
                variant.add_transcript(transcript)

        if gmaf:
            variant.add_frequency('GMAF', gmaf)
            if not variant.thousand_g:
                variant.thousand_g = gmaf

        elif snpeff_string:
            for transcript_info in snpeff_info:
                transcript = self._get_snpeff_transcripts(transcript_info)
                variant.add_transcript(transcript)

        most_severe_consequence = get_most_severe_consequence(
            variant['transcripts']
        )
        if most_severe_consequence:
            variant['most_severe_consequence'] = most_severe_consequence

            variant['impact_severity'] = IMPACT_SEVERITIES.get(most_severe_consequence)

        for gene in self._get_genes(variant):
            variant.add_gene(gene)

        self._add_compounds(variant=variant, info_dict=info_dict)

        return variant