def test_genotype_1_2(): """ A normal heterozygote call, has_variant and heterozygote is true. """ my_genotype = Genotype(**{'GT': '1/2'}) assert my_genotype.genotype == '1/2' assert my_genotype.heterozygote assert not my_genotype.homo_ref assert not my_genotype.homo_alt assert my_genotype.has_variant assert my_genotype.genotyped
def test_haploid_no_call(): """ Test how genotype behaves with haploid call """ haploid_call = Genotype(**{'GT':'0/.'}) assert haploid_call.genotype == '0/.' assert not haploid_call.heterozygote assert haploid_call.homo_ref assert not haploid_call.homo_alt assert not haploid_call.has_variant assert haploid_call.genotyped
def test_homo_alt_2(): """ A homozygote alternative call. has_variant and homo_alt is true. """ my_genotype = Genotype(**{'GT': '3/3'}) assert my_genotype.genotype == '3/3' assert not my_genotype.heterozygote assert not my_genotype.homo_ref assert my_genotype.homo_alt assert my_genotype.has_variant assert my_genotype.genotyped
def test_homo_ref(): """ A homozygote reference call. has_variant and nocall is False and homo_ref is true. """ my_homo_ref_genotype = Genotype(**{'GT': '0/0'}) assert my_homo_ref_genotype.genotype == '0/0' assert not my_homo_ref_genotype.heterozygote assert my_homo_ref_genotype.homo_ref assert not my_homo_ref_genotype.homo_alt assert not my_homo_ref_genotype.has_variant assert my_homo_ref_genotype.genotyped
def test_nocall(): """ A nocall is when no informations is found on this position for the individual. It should be False on all questions except nocall. Also in the case of haploidity the result should be the same. """ my_nocall = Genotype(**{'GT': './.'}) assert my_nocall.genotype == './.' #We never need to look at the alleles since genotype is defined by 'allele_1/allele_2' assert not my_nocall.heterozygote assert not my_nocall.homo_ref assert not my_nocall.homo_alt assert not my_nocall.has_variant assert not my_nocall.genotyped
def test_phased_data(): """ Try if the class van handle phased data. In this case a heterozygote. """ my_genotype = Genotype(**{'GT': '1|0'}) assert my_genotype.genotype == '1/0' # If asked about the genotype, it should still be on the same form. assert my_genotype.heterozygote assert not my_genotype.homo_ref assert not my_genotype.homo_alt assert my_genotype.has_variant assert my_genotype.allele_1 == '1' # If asked about the genotype, it should still be on the same form. assert my_genotype.allele_2 == '0' # If asked about the genotype, it should still be on the same form. assert my_genotype.genotyped assert my_genotype.phased
def test_vardict(): """ Test genotype call from vardict """ fb_format = "GT:DP:VD:AD:RD:AF:BIAS:PMEAN:PSTD:QUAL:QSTD:SBF:ODDRATIO:MQ:SN:HIAF:ADJAF:NM" gt_call = "0/1:192:4:3,1:77,110:0.0208:2,2:39.8:1:26:1:0.31065:4.25:60:3:0.0171:0:1.5" gt_dict = dict(zip(fb_format.split(':'), gt_call.split(':'))) vardict_genotype = Genotype(**gt_dict) assert vardict_genotype.genotype == '0/1'# If asked about the genotype, it should still be on the same form. assert vardict_genotype.heterozygote assert not vardict_genotype.homo_ref assert not vardict_genotype.homo_alt assert vardict_genotype.has_variant assert vardict_genotype.allele_1 == '0'# If asked about the genotype, it should still be on the same form. assert vardict_genotype.allele_2 == '1'# If asked about the genotype, it should still be on the same form. assert vardict_genotype.genotyped assert not vardict_genotype.phased assert vardict_genotype.depth_of_coverage == 192 assert vardict_genotype.alt_depth == 4 assert vardict_genotype.ref_depth == 188
def test_freebayes(): """ Test genotype call from freebayes """ fb_format = "GT:DP:RO:QR:AO:QA:GL" gt_call = "0/1:15:12:352:3:126:-6.17418,0,-19.4464" gt_dict = dict(zip(fb_format.split(':'), gt_call.split(':'))) freebayes_genotype = Genotype(**gt_dict) assert freebayes_genotype.genotype == '0/1'# If asked about the genotype, it should still be on the same form. assert freebayes_genotype.heterozygote assert not freebayes_genotype.homo_ref assert not freebayes_genotype.homo_alt assert freebayes_genotype.has_variant assert freebayes_genotype.allele_1 == '0'# If asked about the genotype, it should still be on the same form. assert freebayes_genotype.allele_2 == '1'# If asked about the genotype, it should still be on the same form. assert freebayes_genotype.genotyped assert not freebayes_genotype.phased assert freebayes_genotype.depth_of_coverage == 15 assert freebayes_genotype.alt_depth == 3 assert freebayes_genotype.ref_depth == 12
def _add_genotype_calls(self, variant_obj, variant_line, case_obj): """Add the genotype calls for the variant Args: variant_obj (puzzle.models.Variant) variant_dict (dict): A variant dictionary case_obj (puzzle.models.Case) """ variant_line = variant_line.split('\t') #if there is gt calls we have no individuals to add if len(variant_line) > 8: gt_format = variant_line[8].split(':') for individual in case_obj.individuals: sample_id = individual.ind_id index = individual.ind_index gt_call = variant_line[9 + index].split(':') raw_call = dict(zip(gt_format, gt_call)) genotype = Genotype(**raw_call) variant_obj.add_individual( puzzle_genotype( sample_id=sample_id, genotype=genotype.genotype, case_id=case_obj.name, phenotype=individual.phenotype, ref_depth=genotype.ref_depth, alt_depth=genotype.alt_depth, genotype_quality=genotype.genotype_quality, depth=genotype.depth_of_coverage, supporting_evidence=genotype.supporting_evidence, pe_support=genotype.pe_support, sr_support=genotype.sr_support, ))
def get_formated_variant(variant, individuals, family_id, gq_treshold=None): """Return a formated variant line Take a vcf formated variant line and return a dictionary with the relevant information. If criterias are not fullfilled, eg. variant have no gt call or quality is below gq treshold then an empty dictionary is returned. Args: variant (dict): A variant dictionary individuals (list[str]): A list with individual ids family_id (str): The family id Return: formated_variant (dict): A variant dictionary """ gq_treshold = gq_treshold or 20 chrom = variant['CHROM'].lstrip('chr') pos = int(variant['POS']) ref = variant['REF'] alt = variant['ALT'] formated_variant = {} if ',' in alt: raise Exception("Multi allele calls are not allowed.") format_field = variant['FORMAT'].split(':') found_variant = False found_homozygote = False found_hemizygote = False for ind_id in individuals: ind_obj = individuals[ind_id] if ind_id in variant: raw_gt_call = variant[ind_id] else: raise CaseError("Individual {0} from ped does not exist in"\ " vcf".format(ind_id)) gt_call = dict(zip(format_field, raw_gt_call.split(':'))) genotype = Genotype(**gt_call) if genotype.genotype_quality >= gq_treshold: if genotype.has_variant: logger.debug("Found variant in affected") found_variant = True # If variant in X or Y and individual is male, # we need to check hemizygosity if chrom in ['X', 'Y'] and ind_obj.sex == 1: if not check_par(chrom, pos): logger.debug("Found hemizygous variant") found_hemizygote = True if genotype.homo_alt: logger.debug("Found homozygote alternative variant") found_homozygote = True if found_variant: formated_variant['_id'] = '_'.join([chrom, str(pos), ref, alt]) formated_variant['chrom'] = chrom formated_variant['pos'] = pos formated_variant['ref'] = ref formated_variant['alt'] = alt formated_variant['homozygote'] = 0 formated_variant['hemizygote'] = 0 if found_hemizygote: formated_variant['hemizygote'] = 1 elif found_homozygote: formated_variant['homozygote'] = 1 if family_id: formated_variant['family_id'] = family_id return formated_variant
def test_haploid_genotype(): """ Test how genotype behaves with haploid call """ haploid_call = Genotype(**{'GT': '1'}) assert haploid_call.genotype == '1/.'
def test_phred_likelihoods(): """ A normal heterozygote call, has_variant and heterozygote is true. """ my_genotype = Genotype(**{'GT': '0/1', 'PL': '60,70,80'}) assert my_genotype.phred_likelihoods == [60, 70, 80]