コード例 #1
0
ファイル: cohort.py プロジェクト: statcgx/xbrowse
def get_quality_filtered_genotypes(variant, quality_filter):
    """
    Returns:
        A list of (indiv_id, genotype) tuples for genotypes that pass quality filter, or an empty list if none do
    """
    ret = []
    num_het = 0
    num_hom_alt = 0
    num_genotypes = variant.num_genotypes()
    for indiv_id, genotype in variant.get_genotypes():
        if genotype.num_alt > 0:
            if passes_genotype_filter(genotype, quality_filter):
                if genotype.num_alt == 1:
                    num_het += 1
                if genotype.num_alt == 2:
                    num_hom_alt += 1
                ret.append((indiv_id, genotype))

    if 'het_ratio' in quality_filter:
        if float(num_het)*100 / num_genotypes > quality_filter['het_ratio']:
            return []

    if 'hom_alt_ratio' in quality_filter:
        if float(num_hom_alt)*100 / num_genotypes > quality_filter['hom_alt_ratio']:
            return []

    return ret
コード例 #2
0
ファイル: cohort.py プロジェクト: dmyung/xbrowse
def get_quality_filtered_genotypes(variant, quality_filter):
    """
    Returns:
        A list of (indiv_id, genotype) tuples for genotypes that pass quality filter, or an empty list if none do
    """
    ret = []
    num_het = 0
    num_hom_alt = 0
    num_genotypes = variant.num_genotypes()
    for indiv_id, genotype in variant.get_genotypes():
        if genotype.num_alt > 0:
            if passes_genotype_filter(genotype, quality_filter):
                if genotype.num_alt == 1:
                    num_het += 1
                if genotype.num_alt == 2:
                    num_hom_alt += 1
                ret.append((indiv_id, genotype))

    if 'het_ratio' in quality_filter:
        if float(num_het)*100 / num_genotypes > quality_filter['het_ratio']:
            return []

    if 'hom_alt_ratio' in quality_filter:
        if float(num_hom_alt)*100 / num_genotypes > quality_filter['hom_alt_ratio']:
            return []

    return ret
コード例 #3
0
ファイル: family.py プロジェクト: ericminikel/xbrowse
def passes_quality_filter(variant, quality_filter, indivs_to_consider):
    """
    Does variant pass given the items in quality_filter?
    Return True or False
    TODO: this is weird
    """
    for indiv_id in indivs_to_consider:
        genotype = variant.get_genotype(indiv_id)
        if not passes_genotype_filter(genotype, quality_filter):
            return False

    return True
コード例 #4
0
def passes_quality_filter(variant, quality_filter, indivs_to_consider):
    """
    Does variant pass given the items in quality_filter?
    Return True or False
    TODO: this is weird
    """
    for indiv_id in indivs_to_consider:
        genotype = variant.get_genotype(indiv_id)
        if not passes_genotype_filter(genotype, quality_filter):
            return False

    return True
コード例 #5
0
def get_de_novo_variants(datastore,
                         reference,
                         family,
                         variant_filter=None,
                         quality_filter=None):
    """
    Returns variants that follow homozygous recessive inheritance in family
    """
    de_novo_filter = inheritance.get_de_novo_filter(family)
    db_query = datastore._make_db_query(de_novo_filter, variant_filter)

    collection = datastore._get_family_collection(family.project_id,
                                                  family.family_id)
    if not collection:
        raise ValueError(
            "Error: mongodb collection not found for project %s family %s " %
            (family.project_id, family.family_id))

    MONGO_QUERY_RESULTS_LIMIT = 5000
    variant_iter = collection.find(db_query).sort('xpos').limit(
        MONGO_QUERY_RESULTS_LIMIT + 5)

    # get ids of parents in this family
    valid_ids = set(indiv_id for indiv_id in family.individuals)
    paternal_ids = set(i.paternal_id for i in family.get_individuals()
                       if i.paternal_id in valid_ids)
    maternal_ids = set(i.maternal_id for i in family.get_individuals()
                       if i.maternal_id in valid_ids)
    parental_ids = paternal_ids | maternal_ids

    # loop over all variants returned
    for i, variant_dict in enumerate(variant_iter):
        if i > MONGO_QUERY_RESULTS_LIMIT:
            raise Exception(
                "MONGO_QUERY_RESULTS_LIMIT of %s exceeded for query: %s" %
                (MONGO_QUERY_RESULTS_LIMIT, db_query))

        variant = Variant.fromJSON(variant_dict)
        datastore.add_annotations_to_variant(variant, family.project_id)
        if not passes_variant_filter(variant, variant_filter)[0]:
            continue

        # handle genotype filters
        if len(parental_ids) != 2:
            # ordinary filters for non-trios
            for indiv_id in de_novo_filter.keys():
                genotype = variant.get_genotype(indiv_id)
                if not passes_genotype_filter(genotype, quality_filter):
                    break
            else:
                yield variant
        else:
            # for trios use Mark's recommended filters for de-novo variants:
            # Hard-coded thresholds:
            #   1) Child must have > 10% of combined Parental Read Depth
            #   2) MinimumChildGQscore >= 20
            #   3) MaximumParentAlleleBalance <= 5%
            # Adjustable filters:
            #   Variants should PASS
            #   Child AB should be >= 20

            # compute parental read depth for filter 1
            total_parental_read_depth = 0
            for indiv_id in parental_ids:
                genotype = variant.get_genotype(indiv_id)
                if genotype.extras and 'dp' in genotype.extras and genotype.extras[
                        'dp'] != '.':
                    total_parental_read_depth += int(genotype.extras['dp'])
                else:
                    total_parental_read_depth = None  # both parents must have DP to use the parental_read_depth filters
                    break

            for indiv_id in de_novo_filter.keys():
                quality_filter_temp = quality_filter.copy(
                )  # copy before modifying
                if indiv_id in parental_ids:
                    # handle one of the parents
                    quality_filter_temp['max_ab'] = 5
                else:
                    # handle child
                    quality_filter_temp['min_gq'] = 20
                    if total_parental_read_depth is not None:
                        quality_filter_temp[
                            'min_dp'] = total_parental_read_depth * 0.1

                genotype = variant.get_genotype(indiv_id)
                if not passes_genotype_filter(genotype, quality_filter_temp):
                    #print("%s: %s " % (variant.chr, variant.pos))
                    break
            else:
                yield variant
コード例 #6
0
ファイル: family.py プロジェクト: batsal/xbrowse
def get_de_novo_variants(datastore, reference, family, variant_filter=None, quality_filter=None):
    """
    Returns variants that follow homozygous recessive inheritance in family
    """
    de_novo_filter = inheritance.get_de_novo_filter(family)
    db_query = datastore._make_db_query(de_novo_filter, variant_filter)

    collection = datastore._get_family_collection(family.project_id, family.family_id)
    if not collection:
        raise ValueError("Error: mongodb collection not found for project %s family %s " % (family.project_id, family.family_id))

    variant_iter = collection.find(db_query).sort('xpos')

    # get ids of parents in this family
    valid_ids = set(indiv_id for indiv_id in family.individuals)
    paternal_ids = set(i.paternal_id for i in family.get_individuals() if i.paternal_id in valid_ids)
    maternal_ids = set(i.maternal_id for i in family.get_individuals() if i.maternal_id in valid_ids)
    parental_ids = paternal_ids | maternal_ids

    # loop over all variants returned
    for variant_dict in variant_iter:
        variant = Variant.fromJSON(variant_dict)
        datastore.add_annotations_to_variant(variant, family.project_id)
        if not passes_variant_filter(variant, variant_filter)[0]:
            continue

        # handle genotype filters
        if len(parental_ids) != 2:
            # ordinary filters for non-trios
            for indiv_id in de_novo_filter.keys():
                genotype = variant.get_genotype(indiv_id)
                if not passes_genotype_filter(genotype, quality_filter):
                    break
            else:
                yield variant
        else:
            # for trios use Mark's recommended filters for de-novo variants:
            # Hard-coded thresholds:
            #   1) Child must have > 10% of combined Parental Read Depth
            #   2) MinimumChildGQscore >= 20
            #   3) MaximumParentAlleleBalance <= 5%
            # Adjustable filters:
            #   Variants should PASS
            #   Child AB should be >= 20

            # compute parental read depth for filter 1
            total_parental_read_depth = 0
            for indiv_id in parental_ids:
                genotype = variant.get_genotype(indiv_id)
                if genotype.extras and 'dp' in genotype.extras:
                    total_parental_read_depth += int(genotype.extras['dp'])
                else:
                    total_parental_read_depth = None  # both parents must have DP to use the parental_read_depth filters 
                    break
                
            for indiv_id in de_novo_filter.keys():            
                quality_filter_temp = quality_filter.copy()  # copy before modifying
                if indiv_id in parental_ids:
                    # handle one of the parents
                    quality_filter_temp['max_ab'] = 5
                else: 
                    # handle child
                    quality_filter_temp['min_gq'] = 20
                    if total_parental_read_depth is not None:
                        quality_filter_temp['min_dp'] = total_parental_read_depth * 0.1

                genotype = variant.get_genotype(indiv_id)
                if not passes_genotype_filter(genotype, quality_filter_temp):
                    #print("%s: %s " % (variant.chr, variant.pos))
                    break
            else:
                yield variant