def check_parents(model, individual, variant, family): """Check if information in the parents can tell us if model is de novo or not. Model in ['recessive', 'compound', 'dominant'].""" sex = family.individuals[individual].sex individual_genotype = variant['Genotypes'].get(individual, genotype.Genotype()) mother_id = family.individuals[individual].mother mother_genotype = variant['Genotypes'].get(mother_id, genotype.Genotype()) mother_phenotype = family.get_phenotype(mother_id) father_id = family.individuals[individual].father father_genotype = variant['Genotypes'].get(father_id, genotype.Genotype()) father_phenotype = family.get_phenotype(father_id) if model == 'recessive': # If a parnent is homozygote or if both parents are heterozygote the variant is not denovo if ((mother_genotype.homo_alt or father_genotype.homo_alt) or (mother_genotype.has_variant and father_genotype.has_variant)): variant['Inheritance_model']['AR_hom_dn'] = False # If both parents are called but none of the above is fullfilled it is denovo elif mother_genotype.genotyped and father_genotype.genotyped: variant['Inheritance_model']['AR_hom'] = False elif model == 'dominant': # If one or both parents are affected it is de novo if none of them have a variant if mother_genotype.has_variant or father_genotype.has_variant: variant['Inheritance_model']['AD_dn'] = False # If both parents are called but none of them carry the variant it is denovo elif mother_genotype.genotyped and father_genotype.genotyped: variant['Inheritance_model']['AD'] = False elif model == 'X_recessive': #If the individual is a male we only need if the mother carry the variant: if sex == 1: if mother_genotype.has_variant: variant['Inheritance_model']['XR_dn'] = False elif mother_genotype.genotyped: variant['Inheritance_model']['XR'] = False #If female, both parents must have the variant otherwise denovo is true elif sex == 2: if (mother_genotype.has_variant and father_genotype.has_variant): variant['Inheritance_model']['XR_dn'] = False #If both parents are genotyped but they both are not carriers XR is not true elif mother_genotype.genotyped and father_genotype.genotyped: variant['Inheritance_model']['XR'] = False elif model == 'X_dominant': #If the individual is a male we only need to look at the mother: if sex == 1: if mother_genotype.has_variant: variant['Inheritance_model']['XD_dn'] = False elif mother_genotype.genotyped: variant['Inheritance_model']['XD'] = False #If female, one of the parents must have the variant otherwise denovo is true elif sex == 2: if (mother_genotype.has_variant or father_genotype.has_variant): variant['Inheritance_model']['XD_dn'] = False elif mother_genotype.genotyped and father_genotype.genotyped: variant['Inheritance_model']['XD'] = False
def test_nocall(): """A nocall is when no informations is found on this position for the individual. It should be False on all questions except nocall. Also in the case of haploidity the result should be the same.""" my_nocall = genotype.Genotype('./.') my_short_nocall = genotype.Genotype('.') assert my_nocall.genotype == './.' #We never need to look at the alleles since genotype is defined by 'allele_1/allele_2' assert my_short_nocall.genotype == './.' assert not my_nocall.heterozygote assert not my_short_nocall.heterozygote assert not my_nocall.homo_ref assert not my_short_nocall.homo_ref assert not my_nocall.homo_alt assert not my_short_nocall.homo_alt assert not my_nocall.has_variant assert not my_short_nocall.has_variant assert my_nocall.nocall assert my_short_nocall.nocall
def check_dominant(variant, family): """Check if the variant follows the dominant pattern in this family.""" for individual in family.individuals: # Check in all individuals what genotypes that are in the trio based of the individual picked. individual_genotype = variant['Genotypes'].get(individual, genotype.Genotype()) if family.individuals[ individual].phenotype == 1: # The case where the individual is healthy if individual_genotype.has_variant: # If the individual is healthy and have a variation on one or both alleles it can not be dominant. variant['Inheritance_model']['AD'] = False variant['Inheritance_model']['AD_dn'] = False return elif family.individuals[individual].phenotype == 2: # The case when the individual is sick if individual_genotype.genotyped: if not individual_genotype.heterozygote: # Individual has to be heterozygote i AD can be true variant['Inheritance_model']['AD'] = False variant['Inheritance_model']['AD_dn'] = False return # Now the ind is sick and have a variant ≠ ref, check parents for de novo if family.individuals[individual].has_parents: check_parents('dominant', individual, variant, family) return
def check_X_dominant(variant, family): """Check if the variant follows the x linked dominant pattern of inheritance in this family.""" for individual in family.individuals: # Get the genotype for this variant for this individual individual_genotype = variant['Genotypes'].get(individual, genotype.Genotype()) # The case where the individual is healthy if not family.individuals[individual].affected: # Healthy womans can be carriers but not homozygote: if family.individuals[individual].sex == 2: if individual_genotype.homo_alt: variant['Inheritance_model']['XD'] = False variant['Inheritance_model']['XD_dn'] = False return # Males can not carry the variant: elif family.individuals[individual].sex == 1: if individual_genotype.has_variant: variant['Inheritance_model']['XD'] = False variant['Inheritance_model']['XD_dn'] = False return # The case when the individual is sick elif family.individuals[individual].affected: #If the individual is sick and homozygote ref it can not be x-linked-dominant if individual_genotype.homo_ref: variant['Inheritance_model']['XD'] = False variant['Inheritance_model']['XD_dn'] = False return elif individual_genotype.has_variant: if family.individuals[individual].has_parents: check_parents('X_dominant', individual, variant, family) return
def check_compound_candidates(variants, family): """Sort out the compound candidates, this function is used to reduce the number of potential candidates.""" #Make a copy of the dictionary to not change the original one. {variant_id:variant_dict} comp_candidates = dict( (variant_id, variants[variant_id]) for variant_id in variants) for individual in family.individuals: individual_variants = {} for variant_id in dict((variant_id, comp_candidates[variant_id]) for variant_id in comp_candidates): individual_genotype = variants[variant_id]['Genotypes'].get( individual, genotype.Genotype()) if individual_genotype.homo_alt: comp_candidates.pop(variant_id, 0) # If an individual is affected: else: if family.individuals[individual].affected: # It has to be heterozygote for the variant to be a candidate if not individual_genotype.heterozygote: comp_candidates.pop(variant_id, 0) else: # Now we have a potential candidate: individual_variants[variant_id] = '' #If the individual is sick then all potential compound candidates of a gene must exist in that individual. #So we remove all variants that the sick individual don't have if family.individuals[individual].affected: if len(individual_variants) > 1: for variant_id in comp_candidates: if variant_id not in individual_variants: comp_candidates.pop(variant_id, 0) else: # If a sick individual dont have any compounds pairs there are no compound candidates. comp_candidates = {} return list(comp_candidates.keys())
def test_genotype_1_2(): """docstring for test_genotype_1_2. A normal heterozygote call, has_variant and heterozygote is true.""" my_genotype = genotype.Genotype('1/2') assert my_genotype.genotype == '1/2' assert not my_genotype.nocall assert my_genotype.heterozygote assert not my_genotype.homo_ref assert not my_genotype.homo_alt assert my_genotype.has_variant
def test_genotype_0(): """The vcf format allows calls that look like '0'. This is a haploid call and should be treated as Homozygote Reference.""" my_genotype = genotype.Genotype('0') assert my_genotype.genotype == '0/.' assert not my_genotype.nocall assert not my_genotype.heterozygote assert my_genotype.homo_ref assert not my_genotype.homo_alt assert not my_genotype.has_variant
def test_genotype_1(): """The vcf format allows calls that look like '1'. This is a haploid call and should be treated as Homozygote Alternative.""" my_genotype = genotype.Genotype('1') assert my_genotype.genotype == '1/.' assert not my_genotype.nocall assert not my_genotype.heterozygote assert not my_genotype.homo_ref assert my_genotype.homo_alt assert my_genotype.has_variant
def test_phased_data(): """Try if the class van handle phased data. In this case a heterozygote.""" my_genotype = genotype.Genotype('1|0') assert my_genotype.genotype == '1/0' # If asked about the genotype, it should still be on the same form. assert not my_genotype.nocall assert my_genotype.heterozygote assert not my_genotype.homo_ref assert not my_genotype.homo_alt assert my_genotype.has_variant
def test_homo_alt_2(): """A homozygote alternative call. has_variant and homo_alt is true.""" my_genotype = genotype.Genotype('3/3') assert my_genotype.genotype == '3/3' assert not my_genotype.nocall assert not my_genotype.heterozygote assert not my_genotype.homo_ref assert my_genotype.homo_alt assert my_genotype.has_variant
def test_homo_ref(): """A homozygote reference call. has_variant and nocall is False and homo_ref is true.""" my_homo_ref_genotype = genotype.Genotype('0/0') assert my_homo_ref_genotype.genotype == '0/0' assert not my_homo_ref_genotype.nocall assert not my_homo_ref_genotype.heterozygote assert my_homo_ref_genotype.homo_ref assert not my_homo_ref_genotype.homo_alt assert not my_homo_ref_genotype.has_variant
def setup_class(self): """Setup a simple family with family id 1, sick son id 1, healthy father id 2, healthy mother id 3""" # Setup family with sick kid, sick father and healthy mother: self.recessive_family = family.Family(family_id = '1') sick_son = individual.Individual(ind='1', family='1',mother='3', father='2', sex=1, phenotype=2) healthy_father = individual.Individual(ind='2', family='1',mother='0', father='0', sex=1, phenotype=1) healthy_mother = individual.Individual(ind='3', family='1',mother='0', father='0', sex=2, phenotype=1) #Setup variant with only autosomal dominant de novo pattern self.recessive_dn_variant = genetic_variant.Variant(chrom = '1', start = 5, stop=5, alternative = 'A', reference = 'C', identity = 'rs2230749') sick_son.add_genotype(self.recessive_dn_variant.variant_id, genotype.Genotype(GT='1/1')) healthy_father.add_genotype(self.recessive_dn_variant.variant_id, genotype.Genotype(GT='0/1')) healthy_mother.add_genotype(self.recessive_dn_variant.variant_id, genotype.Genotype(GT='0/0')) #Setup variant with only autosomal recessive pattern self.recessive_variant = genetic_variant.Variant(chrom = '1', start = 10, stop=10, alternative = 'C', reference = 'T') sick_son.add_genotype(self.recessive_variant.variant_id, genotype.Genotype(GT='1/1')) healthy_father.add_genotype(self.recessive_variant.variant_id, genotype.Genotype(GT='0/1')) healthy_mother.add_genotype(self.recessive_variant.variant_id, genotype.Genotype(GT='0/1')) #Setup potential recessive but does not follow any patterns self.almost_recessive_variant = genetic_variant.Variant(chrom = '1', start = 20, stop=20, alternative = 'C', reference = 'T') sick_son.add_genotype(self.almost_recessive_variant.variant_id, genotype.Genotype(GT='./.')) healthy_father.add_genotype(self.almost_recessive_variant.variant_id, genotype.Genotype(GT='0/1')) healthy_mother.add_genotype(self.almost_recessive_variant.variant_id, genotype.Genotype(GT='0/1')) self.recessive_family.add_individual(healthy_father) self.recessive_family.add_individual(sick_son) self.recessive_family.add_individual(healthy_mother) self.recessive_family.add_variant(self.recessive_dn_variant) self.recessive_family.add_variant(self.recessive_variant) self.recessive_family.add_variant(self.almost_recessive_variant) self.my_healthy_father_model = genetic_models.genetic_models(self.recessive_family)
def cmms_variant(self, splitted_variant_line, individuals): """Returns a variant object in the cmms format.""" variant = dict(list(zip(self.header_line, splitted_variant_line))) # Get the genes: features_overlapped = self.get_genes(variant['HGNC_symbol'], 'HGNC') variant['Genotypes'] = {} for individual in individuals: try: gt_info = variant['IDN:'+individual].split(':')[1].split('=')[1] except (IndexError, KeyError): gt_info = './.' variant['Genotypes'][individual] = genotype.Genotype(GT=gt_info) return variant, features_overlapped
def setup_class(self): """Setup a simple family with family id 1, sick daughter id 1, healthy father id 2, healthy mother id 3""" self.daughter = individual.Individual(ind='1', family='1', mother='3', father='2', sex=2, phenotype=2) self.father = individual.Individual(ind='2', family='1', mother='0', father='0', sex=1, phenotype=1) self.mother = individual.Individual(ind='3', family='1', mother='0', father='0', sex=2, phenotype=1) self.daughter_genotypes = {} self.father_genotypes = {} self.mother_genotypes = {} self.daughter_genotypes['1_1_T_A'] = genotype.Genotype(GT='0/1') self.daughter_genotypes['1_3_A_C'] = genotype.Genotype(GT='1/1') self.father_genotypes['1_1_T_A'] = genotype.Genotype(GT='0/0') self.father_genotypes['1_3_A_C'] = genotype.Genotype(GT='0/1') self.mother_genotypes['1_1_T_A'] = genotype.Genotype(GT='./.') self.mother_genotypes['1_3_A_C'] = genotype.Genotype(GT='0/1')
def check_X_recessive(variant, family): """Check if the variant follows the x linked heterozygous pattern of inheritance in this family.""" for individual in family.individuals: # Get the genotype for this variant for this individual individual_genotype = variant['Genotypes'].get(individual, genotype.Genotype()) # The case where the individual is healthy if not family.individuals[individual].affected: # If individual is healthy and homozygote alternative the variant can not be deleterious: if individual_genotype.homo_alt: variant['Inheritance_model']['XR'] = False variant['Inheritance_model']['XR_dn'] = False return #The case where the individual is a male if family.individuals[individual].sex == 1: if individual_genotype.has_variant: # If the individual is healthy, male and have a variation it can not be x-linked-recessive. variant['Inheritance_model']['XR'] = False variant['Inheritance_model']['XR_dn'] = False return # The case when the individual is sick elif family.individuals[individual].affected: #If the individual is sick and homozygote ref it can not be x-recessive if individual_genotype.homo_ref: variant['Inheritance_model']['XR'] = False variant['Inheritance_model']['XR_dn'] = False return # Women have to be hom alt to be sick (almost allways carriers) elif family.individuals[individual].sex == 2: if individual_genotype.genotyped: if not individual_genotype.homo_alt: variant['Inheritance_model']['XR'] = False variant['Inheritance_model']['XR_dn'] = False return if family.individuals[individual].has_parents: check_parents('X_recessive', individual, variant, family) return
def check_recessive(variant, family): """Check if the variant follows the autosomal recessive pattern in this family.""" for individual in family.individuals: individual_genotype = variant['Genotypes'].get(individual, genotype.Genotype()) # The case where the individual is healthy: if family.individuals[individual].phenotype == 1: # If the individual is healthy and homozygote alt the model is broken. if individual_genotype.homo_alt: variant['Inheritance_model']['AR_hom'] = False variant['Inheritance_model']['AR_hom_dn'] = False return # The case when the individual is sick: elif family.individuals[individual].phenotype == 2: # In the case of a sick individual it must be homozygote alternative for Autosomal recessive to be true. # Also, we can not exclude the model if no call. if not individual_genotype.homo_alt: variant['Inheritance_model']['AR_hom'] = False variant['Inheritance_model']['AR_hom_dn'] = False return #Models are followed but we need to check the parents to see if de novo is followed or not. elif family.individuals[individual].has_parents: check_parents('recessive', individual, variant, family) return
def setup_class(self): """Setup a simple family with family id 1, sick son id 1, healthy father id 2, healthy mother id 3""" # Setup family with sick kid, sick father and healthy mother: self.recessive_family = family.Family(family_id = '1') sick_son = individual.Individual(ind='1', family='1',mother='3', father='2', sex=1, phenotype=2) healthy_father = individual.Individual(ind='2', family='1',mother='0', father='0', sex=1, phenotype=1) healthy_mother = individual.Individual(ind='3', family='1',mother='0', father='0', sex=2, phenotype=1) #Setup two variants with only autosomal recessive pattern self.recessive_comp_variant_1 = genetic_variant.Variant(chrom = '1', start = 5, stop=5, alternative = 'A', reference = 'C', identity = 'rs2230749', all_info={'Ensemble_GeneID':'ENSG00000187634;'}) self.recessive_comp_variant_2 = genetic_variant.Variant(chrom = '1', start = 10, stop=10, alternative = 'C', reference = 'T', identity = '.', all_info={'Ensemble_GeneID':'ENSG00000187634;'}) sick_son.add_genotype(self.recessive_comp_variant_1.variant_id, genotype.Genotype(GT='0/1')) healthy_father.add_genotype(self.recessive_comp_variant_1.variant_id, genotype.Genotype(GT='0/1')) healthy_mother.add_genotype(self.recessive_comp_variant_1.variant_id, genotype.Genotype(GT='0/0')) sick_son.add_genotype(self.recessive_comp_variant_2.variant_id, genotype.Genotype(GT='0/1')) healthy_father.add_genotype(self.recessive_comp_variant_2.variant_id, genotype.Genotype(GT='0/0')) healthy_mother.add_genotype(self.recessive_comp_variant_2.variant_id, genotype.Genotype(GT='0/1')) self.recessive_family.add_individual(healthy_father) self.recessive_family.add_individual(sick_son) self.recessive_family.add_individual(healthy_mother) self.recessive_family.add_variant(self.recessive_comp_variant_1) self.recessive_family.add_variant(self.recessive_comp_variant_2) self.my_healthy_father_model = genetic_models.genetic_models(self.recessive_family) for variant in self.recessive_family.variants: self.recessive_family.variants[variant].check_models()
def setup_class(self): """Setup a simple family with family id 1, sick son id 1, healthy father id 2, healthy mother id 3""" # Setup family with sick kid, sick father and healthy mother: self.recessive_family = family.Family(family_id='1') sick_son = individual.Individual(ind='1', family='1', mother='3', father='2', sex=1, phenotype=2) healthy_father = individual.Individual(ind='2', family='1', mother='0', father='0', sex=1, phenotype=1) healthy_mother = individual.Individual(ind='3', family='1', mother='0', father='0', sex=2, phenotype=1) #Setup a variant where all are homozygote alternative self.homozygote_alternative = genetic_variant.Variant( chrom='1', start=5, stop=5, alternative='A', reference='C', identity='rs2230749', all_info={'Ensemble_GeneID': 'ENSG00000187634;'}) sick_son.add_genotype(self.homozygote_alternative.variant_id, genotype.Genotype(GT='1/1')) healthy_father.add_genotype(self.homozygote_alternative.variant_id, genotype.Genotype(GT='1/1')) healthy_mother.add_genotype(self.homozygote_alternative.variant_id, genotype.Genotype(GT='1/0')) #Setup a variant that is recessive de novo self.recessive_dn = genetic_variant.Variant( chrom='1', start=6, stop=6, alternative='C', reference='T', identity='.', all_info={'Ensemble_GeneID': 'ENSG00000187634;'}) sick_son.add_genotype(self.recessive_dn.variant_id, genotype.Genotype(GT='1/1')) healthy_father.add_genotype(self.recessive_dn.variant_id, genotype.Genotype(GT='./.')) healthy_mother.add_genotype(self.recessive_dn.variant_id, genotype.Genotype(GT='0/1')) self.recessive_family.add_individual(healthy_father) self.recessive_family.add_individual(sick_son) self.recessive_family.add_individual(healthy_mother) self.recessive_family.add_variant(self.homozygote_alternative) self.recessive_family.add_variant(self.recessive_dn) self.my_healthy_father_model = genetic_models.genetic_models( self.recessive_family) for variant in self.recessive_family.variants: self.recessive_family.variants[variant].check_models()
def setup_class(self): """Setup a simple family with family id 1, sick daughter id 1, healthy father id 2, healthy mother id 3""" # Setup family with sick kid, sick father and healthy mother: self.sick_father_family = family.Family(family_id='2') sick_daugther = individual.Individual(ind='1', family='2', mother='3', father='2', sex=2, phenotype=2) sick_father = individual.Individual(ind='2', family='2', mother='0', father='0', sex=1, phenotype=2) healthy_mother = individual.Individual(ind='3', family='2', mother='0', father='0', sex=2, phenotype=1) #Setup variant with only autosomal dominant pattern self.dominant_variant = genetic_variant.Variant(chrom='1', start=5, stop=5, alternative='A', reference='C', identity='rs2230749') sick_daugther.add_genotype(self.dominant_variant.variant_id, genotype.Genotype(GT='0/1')) sick_father.add_genotype(self.dominant_variant.variant_id, genotype.Genotype(GT='0/1')) healthy_mother.add_genotype(self.dominant_variant.variant_id, genotype.Genotype(GT='0/0')) #Setup variant with only autosomal recessive pattern(Should not work here) self.recessive_variant = genetic_variant.Variant(chrom='1', start=10, stop=10, alternative='C', reference='T') sick_daugther.add_genotype(self.recessive_variant.variant_id, genotype.Genotype(GT='1/1')) sick_father.add_genotype(self.recessive_variant.variant_id, genotype.Genotype(GT='0/1')) healthy_mother.add_genotype(self.recessive_variant.variant_id, genotype.Genotype(GT='0/1')) self.sick_father_family.add_individual(sick_father) self.sick_father_family.add_individual(sick_daugther) self.sick_father_family.add_individual(healthy_mother) self.sick_father_family.add_variant(self.dominant_variant) self.sick_father_family.add_variant(self.recessive_variant) self.my_sick_father_model = genetic_models.genetic_models( self.sick_father_family)
def check_compounds(variant_1, variant_2, family, phased, intervals): """Check which variants in the list that follow the compound heterozygous model. At this stage we\ know that none of the individuals are homozygote alternative for the variants.""" # Check in all individuals what genotypes that are in the trio based of the individual picked. for individual in family.individuals: genotype_1 = variant_1['Genotypes'].get(individual, genotype.Genotype()) genotype_2 = variant_2['Genotypes'].get(individual, genotype.Genotype()) if family.individuals[individual].phenotype != 2: # If the individual is not sick and have both variants it can not be compound if genotype_1.has_variant and genotype_2.has_variant: if phased: # If the family is phased we need to check if a healthy individual have both variants on same allele if len( set(intervals[individual].find_range([ int(variant_1['POS']), int(variant_1['POS']) ])).intersection(intervals[individual].find_range( [int(variant_2['POS']), int(variant_2['POS'])]))) > 0: # If the variants are on different alleles it can not be a compound pair: if genotype_1.allele_1 == '0' and genotype_2.allele_1 != '0': return False #In this case we can not tell if the variants are on the same haplotype so we assume that compound is not ok else: return False else: return False else: # The case where the individual is affected if phased: #If the individual is sick and phased it has to have one variant on each allele if len( set(intervals[individual].find_range([ int(variant_1['POS']), int(variant_1['POS']) ])).intersection(intervals[individual].find_range( [int(variant_2['POS']), int(variant_2['POS'])]))) > 0: if genotype_1.allele_1 == '0' and genotype_2.allele_1 == '0': return False elif family.individuals[individual].has_parents: mother_id = family.individuals[individual].mother mother_genotype_1 = variant_1['Genotypes'].get( mother_id, genotype.Genotype()) mother_genotype_2 = variant_2['Genotypes'].get( mother_id, genotype.Genotype()) mother_phenotype = family.get_phenotype(mother_id) father_id = family.individuals[individual].father father_genotype_1 = variant_1['Genotypes'].get( father_id, genotype.Genotype()) father_genotype_2 = variant_2['Genotypes'].get( father_id, genotype.Genotype()) father_phenotype = family.get_phenotype(father_id) # If a parent has both variants and is unaffected it can not be a compound. # This will change when we get the phasing information. if ((mother_genotype_1.heterozygote and mother_genotype_2.heterozygote and mother_phenotype == 1) or (father_genotype_1.heterozygote and father_genotype_2.heterozygote and father_phenotype == 1)): return False return True