def get_compound_het_genes(datastore, reference, family, variant_filter=None, quality_filter=None): """ Gene-based inheritance; genes with variants that follow compound het inheritance in a family Note that compound het implies two variants, so we look at all variant pairs Return is a stream of tuples (gene_name, variant_list) """ # only ask for variants that are het in all affected initial_filter = {} for indiv_id, individual in family.individuals.items(): if individual.affected_status == 'affected': initial_filter[indiv_id] = 'ref_alt' het_variants = get_variants(datastore, family, initial_filter, variant_filter, quality_filter, indivs_to_consider=family.indiv_id_list()) for gene_name, raw_variants in stream_utils.variant_stream_to_gene_stream(het_variants, reference): variants = search_utils.filter_gene_variants_by_variant_filter(raw_variants, gene_name, variant_filter) variants_to_return = {} # don't care about genes w less than 2 variants if len(variants) < 2: continue combos = itertools.combinations(variants, 2) for combo in combos: valid = is_family_compound_het_for_combo(combo, family) if valid: variants_to_return[combo[0].unique_tuple()] = combo[0] variants_to_return[combo[1].unique_tuple()] = combo[1] if len(variants_to_return) > 0: yield (gene_name, variants_to_return.values())
def get_variants_in_gene(project, gene_id, variant_filter=None, quality_filter=None): """ Get all the variants in a gene, but filter out quality_filter genotypes """ variant_list = get_project_datastore().get_variants_in_gene(project.project_id, gene_id, variant_filter=variant_filter) variant_list = search_utils.filter_gene_variants_by_variant_filter(variant_list, gene_id, variant_filter) return variant_list
def get_knockouts_in_gene(project, gene_id, quality_filter=None): """ Get all the variants in a gene, but filter out quality_filter genotypes """ indiv_id_list = [i.indiv_id for i in project.get_individuals()] # filter out variants > 0.01 AF in any of the reference populations reference_populations = mall.get_annotator().reference_population_slugs variant_filter = get_default_variant_filter('moderate_impact', reference_populations) variant_list = get_project_datastore( project.project_id).get_project_variants_in_gene( project.project_id, gene_id, variant_filter=variant_filter, ) variant_list = search_utils.filter_gene_variants_by_variant_filter( variant_list, gene_id, variant_filter) variation = CohortGeneVariation( get_reference(), gene_id, variant_list, indiv_id_list, quality_filter={}, ) knockouts = get_individuals_with_inheritance('recessive', variation, indiv_id_list) return knockouts, variation
def get_variants_in_gene(family_group, gene_id, variant_filter=None, quality_filter=None): """ """ variants_by_family = [] for family in family_group.get_families(): variant_list = list( get_mall( family.project.project_id).variant_store.get_variants_in_gene( family.project.project_id, family.family_id, gene_id, variant_filter=variant_filter)) variant_list = search_utils.filter_gene_variants_by_variant_filter( variant_list, gene_id, variant_filter) add_extra_info_to_variants_family(get_reference(), family, variant_list) variants_by_family.append({ 'variants': [v.toJSON() for v in variant_list], 'family_id': family.family_id, 'project_id': family.project.project_id, 'family_name': str(family), }) return variants_by_family
def get_variants_in_gene(project, gene_id, variant_filter=None, quality_filter=None): """ Get all the variants in a gene, but filter out quality_filter genotypes """ variant_list = get_project_datastore(project).get_project_variants_in_gene(project.project_id, gene_id, variant_filter=variant_filter) variant_list = search_utils.filter_gene_variants_by_variant_filter(variant_list, gene_id, variant_filter) return variant_list
def get_genes_with_inheritance(datastore, reference, cohort, inheritance_mode, variant_filter=None, quality_filter=None): """ """ for gene_id, raw_variant_list in get_genes(datastore, reference, cohort, variant_filter): variant_list = search_utils.filter_gene_variants_by_variant_filter(raw_variant_list, gene_id, variant_filter) gene_variation = CohortGeneVariation(reference, gene_id, variant_list, cohort.indiv_id_list(), quality_filter=quality_filter) indivs_with_inheritance = get_individuals_with_inheritance(inheritance_mode, gene_variation, cohort.indiv_id_list()) if len(indivs_with_inheritance) > 0: yield gene_id, indivs_with_inheritance, gene_variation
def get_individuals_with_inheritance_in_gene(datastore, reference, cohort, inheritance_mode, gene_id, variant_filter=None, quality_filter=None): variant_list = list(datastore.get_variants_in_gene( cohort.project_id, cohort.cohort_id, gene_id, variant_filter=variant_filter, )) variant_list = search_utils.filter_gene_variants_by_variant_filter(variant_list, gene_id, variant_filter) gene_variation = CohortGeneVariation(reference, gene_id, variant_list, cohort.indiv_id_list(), quality_filter=quality_filter) indivs_with_inheritance = get_individuals_with_inheritance(inheritance_mode, gene_variation, cohort.indiv_id_list()) return indivs_with_inheritance, gene_variation
def get_knockouts_in_gene(project, gene_id, gene_variants): """ Get all the variants in a gene, but filter out quality_filter genotypes """ indiv_id_list = [i.indiv_id for i in project.get_individuals()] # filter out variants > 0.01 AF in any of the reference populations reference_populations = mall.get_annotator().reference_population_slugs variant_filter = get_default_variant_filter('moderate_impact', reference_populations) variant_list = search_utils.filter_gene_variants_by_variant_filter(gene_variants, gene_id, variant_filter) variation = CohortGeneVariation( get_reference(), gene_id, variant_list, indiv_id_list, quality_filter={}, ) knockouts = get_individuals_with_inheritance('recessive', variation, indiv_id_list) return knockouts, variation
def get_knockouts_in_gene(project, gene_id, quality_filter=None): """ Get all the variants in a gene, but filter out quality_filter genotypes """ indiv_id_list = [i.indiv_id for i in project.get_individuals()] variant_filter = get_default_variant_filter('high_impact') variant_list = get_project_datastore().get_variants_in_gene( project.project_id, gene_id, variant_filter=variant_filter, ) variant_list = search_utils.filter_gene_variants_by_variant_filter(variant_list, gene_id, variant_filter) variation = CohortGeneVariation( get_reference(), gene_id, variant_list, indiv_id_list, quality_filter={}, ) knockouts = get_individuals_with_inheritance('recessive', variation, indiv_id_list) return knockouts, variation
def get_variants_in_gene(family_group, gene_id, variant_filter=None, quality_filter=None): """ """ variants_by_family = [] for family in family_group.get_families(): variant_list = list(get_mall(family.project.project_id).variant_store.get_variants_in_gene( family.project.project_id, family.family_id, gene_id, variant_filter=variant_filter )) variant_list = search_utils.filter_gene_variants_by_variant_filter(variant_list, gene_id, variant_filter) add_extra_info_to_variants_family(get_reference(), family, variant_list) variants_by_family.append({ 'variants': [v.toJSON() for v in variant_list], 'family_id': family.family_id, 'project_id': family.project.project_id, 'family_name': str(family), }) return variants_by_family
def get_knockouts_in_gene(project, gene_id, quality_filter=None): """ Get all the variants in a gene, but filter out quality_filter genotypes """ indiv_id_list = [i.indiv_id for i in project.get_individuals()] variant_filter = get_default_variant_filter('high_impact') variant_list = get_project_datastore( project.project_id).get_project_variants_in_gene( project.project_id, gene_id, variant_filter=variant_filter, ) variant_list = search_utils.filter_gene_variants_by_variant_filter( variant_list, gene_id, variant_filter) variation = CohortGeneVariation( get_reference(), gene_id, variant_list, indiv_id_list, quality_filter={}, ) knockouts = get_individuals_with_inheritance('recessive', variation, indiv_id_list) return knockouts, variation