def test_genotype_1_2():
    """
    A normal heterozygote call, has_variant and heterozygote is true.
    """
    my_genotype = Genotype(**{'GT': '1/2'})
    assert my_genotype.genotype == '1/2'
    assert my_genotype.heterozygote
    assert not my_genotype.homo_ref
    assert not my_genotype.homo_alt
    assert my_genotype.has_variant
    assert my_genotype.genotyped
Example #2
0
def test_haploid_no_call():
    """
    Test how genotype behaves with haploid call
    """
    haploid_call = Genotype(**{'GT':'0/.'})
    assert haploid_call.genotype == '0/.'
    assert not haploid_call.heterozygote
    assert haploid_call.homo_ref
    assert not haploid_call.homo_alt
    assert not haploid_call.has_variant
    assert haploid_call.genotyped
def test_homo_alt_2():
    """
    A homozygote alternative call. 
    has_variant and homo_alt is true.
    """
    my_genotype = Genotype(**{'GT': '3/3'})
    assert my_genotype.genotype == '3/3'
    assert not my_genotype.heterozygote
    assert not my_genotype.homo_ref
    assert my_genotype.homo_alt
    assert my_genotype.has_variant
    assert my_genotype.genotyped
def test_homo_ref():
    """
    A homozygote reference call. 
    has_variant and nocall is False and homo_ref is true.
    """
    my_homo_ref_genotype = Genotype(**{'GT': '0/0'})
    assert my_homo_ref_genotype.genotype == '0/0'
    assert not my_homo_ref_genotype.heterozygote
    assert my_homo_ref_genotype.homo_ref
    assert not my_homo_ref_genotype.homo_alt
    assert not my_homo_ref_genotype.has_variant
    assert my_homo_ref_genotype.genotyped
def test_nocall():
    """
    A nocall is when no informations is found on this position for the 
    individual. It should be False on all questions except nocall. 
    Also in the case of haploidity the result should be the same.
    """
    my_nocall = Genotype(**{'GT': './.'})
    assert my_nocall.genotype == './.'  #We never need to look at the alleles since genotype is defined by 'allele_1/allele_2'
    assert not my_nocall.heterozygote
    assert not my_nocall.homo_ref
    assert not my_nocall.homo_alt
    assert not my_nocall.has_variant
    assert not my_nocall.genotyped
def test_phased_data():
    """
    Try if the class van handle phased data. 
    In this case a heterozygote.
    """
    my_genotype = Genotype(**{'GT': '1|0'})
    assert my_genotype.genotype == '1/0'  # If asked about the genotype, it should still be on the same form.
    assert my_genotype.heterozygote
    assert not my_genotype.homo_ref
    assert not my_genotype.homo_alt
    assert my_genotype.has_variant
    assert my_genotype.allele_1 == '1'  # If asked about the genotype, it should still be on the same form.
    assert my_genotype.allele_2 == '0'  # If asked about the genotype, it should still be on the same form.
    assert my_genotype.genotyped
    assert my_genotype.phased
Example #7
0
def test_vardict():
    """
    Test genotype call from vardict
    """
    fb_format = "GT:DP:VD:AD:RD:AF:BIAS:PMEAN:PSTD:QUAL:QSTD:SBF:ODDRATIO:MQ:SN:HIAF:ADJAF:NM"
    gt_call = "0/1:192:4:3,1:77,110:0.0208:2,2:39.8:1:26:1:0.31065:4.25:60:3:0.0171:0:1.5"
    gt_dict = dict(zip(fb_format.split(':'), gt_call.split(':')))
    vardict_genotype = Genotype(**gt_dict)
    assert vardict_genotype.genotype == '0/1'# If asked about the genotype, it should still be on the same form.
    assert vardict_genotype.heterozygote
    assert not vardict_genotype.homo_ref
    assert not vardict_genotype.homo_alt
    assert vardict_genotype.has_variant
    assert vardict_genotype.allele_1 == '0'# If asked about the genotype, it should still be on the same form.
    assert vardict_genotype.allele_2 == '1'# If asked about the genotype, it should still be on the same form.
    assert vardict_genotype.genotyped
    assert not vardict_genotype.phased
    assert vardict_genotype.depth_of_coverage == 192
    assert vardict_genotype.alt_depth == 4
    assert vardict_genotype.ref_depth == 188
Example #8
0
def test_freebayes():
    """
    Test genotype call from freebayes
    """
    fb_format = "GT:DP:RO:QR:AO:QA:GL"
    gt_call = "0/1:15:12:352:3:126:-6.17418,0,-19.4464"
    gt_dict = dict(zip(fb_format.split(':'), gt_call.split(':')))
    freebayes_genotype = Genotype(**gt_dict)
    assert freebayes_genotype.genotype == '0/1'# If asked about the genotype, it should still be on the same form.
    assert freebayes_genotype.heterozygote
    assert not freebayes_genotype.homo_ref
    assert not freebayes_genotype.homo_alt
    assert freebayes_genotype.has_variant
    assert freebayes_genotype.allele_1 == '0'# If asked about the genotype, it should still be on the same form.
    assert freebayes_genotype.allele_2 == '1'# If asked about the genotype, it should still be on the same form.
    assert freebayes_genotype.genotyped
    assert not freebayes_genotype.phased
    assert freebayes_genotype.depth_of_coverage == 15
    assert freebayes_genotype.alt_depth == 3
    assert freebayes_genotype.ref_depth == 12
Example #9
0
    def _add_genotype_calls(self, variant_obj, variant_line, case_obj):
        """Add the genotype calls for the variant

        Args:
            variant_obj (puzzle.models.Variant)
            variant_dict (dict): A variant dictionary
            case_obj (puzzle.models.Case)

        """
        variant_line = variant_line.split('\t')
        #if there is gt calls we have no individuals to add
        if len(variant_line) > 8:
            gt_format = variant_line[8].split(':')
            for individual in case_obj.individuals:
                sample_id = individual.ind_id
                index = individual.ind_index

                gt_call = variant_line[9 + index].split(':')

                raw_call = dict(zip(gt_format, gt_call))

                genotype = Genotype(**raw_call)

                variant_obj.add_individual(
                    puzzle_genotype(
                        sample_id=sample_id,
                        genotype=genotype.genotype,
                        case_id=case_obj.name,
                        phenotype=individual.phenotype,
                        ref_depth=genotype.ref_depth,
                        alt_depth=genotype.alt_depth,
                        genotype_quality=genotype.genotype_quality,
                        depth=genotype.depth_of_coverage,
                        supporting_evidence=genotype.supporting_evidence,
                        pe_support=genotype.pe_support,
                        sr_support=genotype.sr_support,
                    ))
Example #10
0
def get_formated_variant(variant, individuals, family_id, gq_treshold=None):
    """Return a formated variant line
    
        Take a vcf formated variant line and return a dictionary with the
        relevant information.
    
        If criterias are not fullfilled, eg. variant have no gt call or quality
        is below gq treshold then an empty dictionary is returned.
        
        Args:
            variant (dict): A variant dictionary
            individuals (list[str]): A list with individual ids
            family_id (str): The family id
        
        Return:
            formated_variant (dict): A variant dictionary
    """
    gq_treshold = gq_treshold or 20

    chrom = variant['CHROM'].lstrip('chr')
    pos = int(variant['POS'])
    ref = variant['REF']
    alt = variant['ALT']

    formated_variant = {}

    if ',' in alt:
        raise Exception("Multi allele calls are not allowed.")

    format_field = variant['FORMAT'].split(':')

    found_variant = False
    found_homozygote = False
    found_hemizygote = False

    for ind_id in individuals:
        ind_obj = individuals[ind_id]

        if ind_id in variant:
            raw_gt_call = variant[ind_id]
        else:
            raise CaseError("Individual {0} from ped does not exist in"\
                            " vcf".format(ind_id))

        gt_call = dict(zip(format_field, raw_gt_call.split(':')))

        genotype = Genotype(**gt_call)
        if genotype.genotype_quality >= gq_treshold:
            if genotype.has_variant:
                logger.debug("Found variant in affected")
                found_variant = True

                # If variant in X or Y and individual is male,
                # we need to check hemizygosity
                if chrom in ['X', 'Y'] and ind_obj.sex == 1:
                    if not check_par(chrom, pos):
                        logger.debug("Found hemizygous variant")
                        found_hemizygote = True

                if genotype.homo_alt:
                    logger.debug("Found homozygote alternative variant")
                    found_homozygote = True

    if found_variant:
        formated_variant['_id'] = '_'.join([chrom, str(pos), ref, alt])
        formated_variant['chrom'] = chrom
        formated_variant['pos'] = pos
        formated_variant['ref'] = ref
        formated_variant['alt'] = alt
        formated_variant['homozygote'] = 0
        formated_variant['hemizygote'] = 0

        if found_hemizygote:
            formated_variant['hemizygote'] = 1
        elif found_homozygote:
            formated_variant['homozygote'] = 1

        if family_id:
            formated_variant['family_id'] = family_id

    return formated_variant
Example #11
0
def test_haploid_genotype():
    """
    Test how genotype behaves with haploid call
    """
    haploid_call = Genotype(**{'GT': '1'})
    assert haploid_call.genotype == '1/.'
Example #12
0
def test_phred_likelihoods():
    """
    A normal heterozygote call, has_variant and heterozygote is true.
    """
    my_genotype = Genotype(**{'GT': '0/1', 'PL': '60,70,80'})
    assert my_genotype.phred_likelihoods == [60, 70, 80]