コード例 #1
0
def get_variants_by_family_for_gene(mall,
                                    family_list,
                                    inheritance_mode,
                                    gene_id,
                                    variant_filter=None,
                                    quality_filter=None,
                                    user=None):

    if variant_filter is None:
        variant_filter = VariantFilter()
    variant_filter.add_gene(gene_id)

    by_family = {}
    for family in family_list:
        family_t = (family.project_id, family.family_id)
        variants = list(
            get_variants_with_inheritance_mode(
                mall,
                family,
                inheritance_mode,
                variant_filter,
                quality_filter,
                user=user,
            ))
        by_family[family_t] = variants

    return by_family
コード例 #2
0
def get_variants_for_inheritance_for_project(project, inheritance_mode):
    """
    Get the variants for this project / inheritance combo
    Return dict of family -> list of variants
    """

    # create search specification
    # this could theoretically differ by project, if there are different reference populations
    variant_filter = get_default_variant_filter('moderate_impact')
    variant_filter.ref_freqs.append(('1kg_wgs_phase3', g1k_freq_threshold))
    variant_filter.ref_freqs.append(('1kg_wgs_phase3_popmax', g1k_popmax_freq_threshold))
    variant_filter.ref_freqs.append(('exac_v3', exac_freq_threshold))
    variant_filter.ref_freqs.append(('exac_v3_popmax', exac_popmax_threshold))
    quality_filter = {
        'vcf_filter': 'pass',
        'min_gq': GQ_threshold,
        'min_ab': AB_threshold,
    }

    # run MendelianVariantSearch for each family, collect results
    families = project.get_families()
    for i, family in enumerate(families):
        sys.stdout.write("Processing %s - family %s  (%d / %d) .." % (inheritance_mode, family.family_id, i+1, len(families)))
        variant_list = list(get_variants_with_inheritance_mode(
            get_mall(project.project_id),
            family.xfamily(),
            inheritance_mode,
            variant_filter=variant_filter,
            quality_filter=quality_filter,
            ))
        yield family, variant_list
        print(" got %d variants" % len(variant_list))
コード例 #3
0
def get_variants_for_inheritance_for_project(project, inheritance_mode):
    """
    Get the variants for this project / inheritance combo
    Return dict of family -> list of variants
    """

    # create search specification
    # this could theoretically differ by project, if there are different reference populations
    variant_filter = get_default_variant_filter("moderate_impact")
    variant_filter.ref_freqs.append(("1kg_wgs_phase3", g1k_freq_threshold))
    variant_filter.ref_freqs.append(("1kg_wgs_phase3_popmax", g1k_popmax_freq_threshold))
    variant_filter.ref_freqs.append(("exac_v3", exac_freq_threshold))
    variant_filter.ref_freqs.append(("exac_v3_popmax", exac_popmax_threshold))
    quality_filter = {"vcf_filter": "pass", "min_gq": GQ_threshold, "min_ab": AB_threshold}

    # run MendelianVariantSearch for each family, collect results
    families = project.get_families()
    for i, family in enumerate(families):
        sys.stdout.write(
            "Processing %s - family %s  (%d / %d) .." % (inheritance_mode, family.family_id, i + 1, len(families))
        )
        variant_list = list(
            get_variants_with_inheritance_mode(
                get_mall(project.project_id),
                family.xfamily(),
                inheritance_mode,
                variant_filter=variant_filter,
                quality_filter=quality_filter,
            )
        )
        yield family, variant_list
        print(" got %d variants" % len(variant_list))
コード例 #4
0
ファイル: utils.py プロジェクト: ericminikel/xbrowse
def calculate_mendelian_variant_search(search_spec, xfamily):

    variants = None

    if search_spec.search_mode == 'standard_inheritance':

        variants = list(get_variants_with_inheritance_mode(
            get_mall(),
            xfamily,
            search_spec.inheritance_mode,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.genotype_quality_filter,
        ))

    elif search_spec.search_mode == 'custom_inheritance':

        variants = list(get_variants_family(
            get_datastore(),
            xfamily,
            genotype_filter=search_spec.genotype_inheritance_filter,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.genotype_quality_filter,
        ))

    elif search_spec.search_mode == 'gene_burden':

        gene_stream = get_genes_family(
            get_datastore(),
            get_reference(),
            xfamily,
            burden_filter=search_spec.gene_burden_filter,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.genotype_quality_filter,
        )

        variants = list(stream_utils.gene_stream_to_variant_stream(gene_stream, get_reference()))

    elif search_spec.search_mode == 'allele_count':

        variants = list(get_variants_allele_count(
            get_datastore(),
            xfamily,
            search_spec.allele_count_filter,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.genotype_quality_filter,
        ))

    elif search_spec.search_mode == 'all_variants':
        variants = list(get_variants_family(
            get_datastore(),
            xfamily,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.genotype_quality_filter,
        ))

    return variants
コード例 #5
0
ファイル: utils.py プロジェクト: mattsolo1/seqr
def calculate_mendelian_variant_search(search_spec, xfamily):
    sys.stderr.write("     mendelian_variant_search for %s - search mode: %s  %s\n" % (xfamily.project_id, search_spec.search_mode, search_spec.__dict__))

    variants = None
    if search_spec.search_mode == 'standard_inheritance':
        variants = list(get_variants_with_inheritance_mode(
            get_mall(xfamily.project_id),
            xfamily,
            search_spec.inheritance_mode,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.quality_filter,
        ))

    elif search_spec.search_mode == 'custom_inheritance':
        variants = list(get_variants_family(
            get_datastore(xfamily.project_id),
            xfamily,
            genotype_filter=search_spec.genotype_inheritance_filter,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.quality_filter,
        ))

    elif search_spec.search_mode == 'gene_burden':
        gene_stream = get_genes_family(
            get_datastore(xfamily.project_id),
            get_reference(),
            xfamily,
            burden_filter=search_spec.gene_burden_filter,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.quality_filter,
        )

        variants = list(stream_utils.gene_stream_to_variant_stream(gene_stream, get_reference()))

    elif search_spec.search_mode == 'allele_count':
        variants = list(get_variants_allele_count(
            get_datastore(xfamily.project_id),
            xfamily,
            search_spec.allele_count_filter,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.quality_filter,
        ))

    elif search_spec.search_mode == 'all_variants':
        variants = list(get_variants_family(
            get_datastore(xfamily.project_id),
            xfamily,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.quality_filter,
            indivs_to_consider=xfamily.indiv_id_list(),
        ))

    return variants
コード例 #6
0
def get_variants_for_inheritance_for_project(project, inheritance_mode):
    """
    Get the variants for this project / inheritance combo
    Return dict of family -> list of variants
    """

    # create search specification
    # this could theoretically differ by project, if there are different reference populations
    #variant_filter = VariantFilter(so_annotations=SO_SEVERITY_ORDER, ref_freqs=[])
    variant_filter = get_default_variant_filter('moderate_impact')
    variant_filter.ref_freqs.append(('1kg_wgs_phase3', g1k_freq_threshold))
    variant_filter.ref_freqs.append(
        ('1kg_wgs_phase3_popmax', g1k_popmax_freq_threshold))
    variant_filter.ref_freqs.append(('exac_v3', exac_freq_threshold))
    variant_filter.ref_freqs.append(('exac_v3_popmax', exac_popmax_threshold))
    variant_filter.ref_freqs.append(
        ('merck-wgs-3793', merck_wgs_3793_threshold))
    #variant_filter.ref_freqs.append(('merck-pcr-free-wgs-144', merck_wgs_144_threshold))
    quality_filter = {
        #        'vcf_filter': 'pass',
        'min_gq': GQ_threshold,
        'min_ab': AB_threshold,
    }

    # run MendelianVariantSearch for each family, collect results

    families = project.get_families()

    for i, family in enumerate(families):
        print("Processing %s - family %s  (%d / %d)" %
              (inheritance_mode, family.family_id, i + 1, len(families)))
        try:
            if inheritance_mode == "all_variants":
                yield family, list(
                    get_variants(get_datastore(project.project_id),
                                 family.xfamily(),
                                 variant_filter=variant_filter,
                                 quality_filter=quality_filter,
                                 indivs_to_consider=family.indiv_id_list()))
            else:
                yield family, list(
                    get_variants_with_inheritance_mode(
                        get_mall(project.project_id),
                        family.xfamily(),
                        inheritance_mode,
                        variant_filter=variant_filter,
                        quality_filter=quality_filter,
                    ))
        except ValueError as e:
            print("Error: %s. Skipping family %s" % (str(e), str(family)))
コード例 #7
0
def get_variants_for_inheritance_for_project(project, inheritance_mode):
    """
    Get the variants for this project / inheritance combo
    Return dict of family -> list of variants
    """

    # create search specification
    # this could theoretically differ by project, if there are different reference populations
    #variant_filter = VariantFilter(so_annotations=SO_SEVERITY_ORDER, ref_freqs=[])
    variant_filter = get_default_variant_filter('moderate_impact')
    variant_filter.ref_freqs.append(('1kg_wgs_phase3', g1k_freq_threshold))
    variant_filter.ref_freqs.append(('1kg_wgs_phase3_popmax', g1k_popmax_freq_threshold))
    variant_filter.ref_freqs.append(('exac_v3', exac_freq_threshold))
    variant_filter.ref_freqs.append(('exac_v3_popmax', exac_popmax_threshold))
    variant_filter.ref_freqs.append(('merck-wgs-3793', merck_wgs_3793_threshold))
    #variant_filter.ref_freqs.append(('merck-pcr-free-wgs-144', merck_wgs_144_threshold))
    quality_filter = {
#        'vcf_filter': 'pass',
        'min_gq': GQ_threshold,
        'min_ab': AB_threshold,
    }

    # run MendelianVariantSearch for each family, collect results

    families = project.get_families()

    for i, family in enumerate(families):
        print("Processing %s - family %s  (%d / %d)" % (inheritance_mode, family.family_id, i+1, len(families)))
        try:
            if inheritance_mode == "all_variants":
                yield family, list(get_variants(
                        get_datastore(project.project_id),
                        family.xfamily(),
                        variant_filter=variant_filter,
                        quality_filter=quality_filter,
                        indivs_to_consider=family.indiv_id_list()
                        ))
            else:
                yield family, list(get_variants_with_inheritance_mode(
                        get_mall(project.project_id),
                        family.xfamily(),
                        inheritance_mode,
                        variant_filter=variant_filter,
                        quality_filter=quality_filter,
                        ))
        except ValueError as e:
            print("Error: %s. Skipping family %s" % (str(e), str(family)))
コード例 #8
0
def get_families_by_gene(mall, family_group, inheritance_mode, variant_filter=None, quality_filter=None):

    families_by_gene = defaultdict(set)

    for family in family_group.get_families():
        for variant in get_variants_with_inheritance_mode(
                mall,
                family,
                inheritance_mode,
                variant_filter,
                quality_filter
        ):
            for gene_id in variant.coding_gene_ids:
                families_by_gene[gene_id].add((family.project_id, family.family_id))

    for gene_id, family_set in families_by_gene.items():
        yield gene_id, sorted(list(family_set))
コード例 #9
0
def get_variants_by_family_for_gene(mall, family_list, inheritance_mode, gene_id, variant_filter=None, quality_filter=None):

    if variant_filter is None:
        variant_filter = VariantFilter()
    variant_filter.add_gene(gene_id)

    by_family = {}
    for family in family_list:
        family_t = (family.project_id, family.family_id)
        variants = list(get_variants_with_inheritance_mode(
            mall,
            family,
            inheritance_mode,
            variant_filter,
            quality_filter
        ))
        by_family[family_t] = variants

    return by_family
コード例 #10
0
ファイル: views.py プロジェクト: frichter/seqr
def combine_mendelian_families_spec(request):

    project, family_group = utils.get_project_and_family_group_for_user(request.user, request.GET)
    if not project.can_view(request.user):
        raise PermissionDenied

    search_hash = request.GET.get('search_hash')
    search_spec, genes = cache_utils.get_cached_results(project.project_id, search_hash)
    search_spec_obj = MendelianVariantSearchSpec.fromJSON(search_spec)

    if request.GET.get('return_type') != 'csv' or not request.GET.get('group_by_variants'):
        if genes is None:
            genes = api_utils.calculate_combine_mendelian_families(family_group, search_spec)
        api_utils.add_extra_info_to_genes(project, get_reference(), genes)
    
        if request.GET.get('return_type') != 'csv':
            return JSONResponse({
                    'is_error': False,
                    'genes': genes,
                    'search_spec': search_spec,
                    })
        else:
            response = HttpResponse(content_type='text/csv')
            response['Content-Disposition'] = 'attachment; filename="family_group_results_{}.csv"'.format(search_hash)
            writer = csv.writer(response)
            writer.writerow(["gene", "# families", "family list", "chrom", "start", "end"])
            for gene in genes:
                family_id_list = [family_id for (project_id, family_id) in gene["family_id_list"]]
                writer.writerow(map(str, [gene["gene_name"], len(family_id_list), " ".join(family_id_list), gene["chr"], gene["start"], gene["end"], ""]))
            return response
    else:
        # download results grouped by variant
        indiv_id_list = []
        for family in family_group.get_families():
            indiv_id_list.extend(family.indiv_ids_with_variant_data())

        response = HttpResponse(content_type='text/csv')
        response['Content-Disposition'] = 'attachment; filename="results_{}.csv"'.format(search_hash)
        writer = csv.writer(response)
        
        headers = ['genes','chr','pos','ref','alt','worst_annotation' ]
        headers.extend(project.get_reference_population_slugs())
        headers.extend([ 'polyphen','sift','muttaster','fathmm'])
        for indiv_id in indiv_id_list:
            headers.append(indiv_id)
            headers.append(indiv_id+'_gq')
            headers.append(indiv_id+'_dp')
        
        writer.writerow(headers)

        mall = get_mall(project.project_id)
        variant_key_to_individual_id_to_variant = defaultdict(dict)
        variant_key_to_variant = {}
        for family in family_group.get_families():
            for variant in get_variants_with_inheritance_mode(
                mall,
                family.xfamily(),
                search_spec_obj.inheritance_mode,
                search_spec_obj.variant_filter,
                search_spec_obj.quality_filter,
                ):
                if len(variant.coding_gene_ids) == 0:
                    continue

                variant_key = (variant.xpos, variant.ref, variant.alt)
                variant_key_to_variant[variant_key] = variant
                for indiv_id in family.indiv_ids_with_variant_data():
                    variant_key_to_individual_id_to_variant[variant_key][indiv_id] = variant
                    
        for variant_key in sorted(variant_key_to_individual_id_to_variant.keys()):
            variant = variant_key_to_variant[variant_key]
            individual_id_to_variant = variant_key_to_individual_id_to_variant[variant_key]

            genes = [mall.reference.get_gene_symbol(gene_id) for gene_id in variant.coding_gene_ids]
            fields = []
            fields.append(','.join(genes))
            fields.extend([
                        variant.chr,
                        str(variant.pos),
                        variant.ref,
                        variant.alt,
                        variant.annotation.get('vep_group', '.'),
                        ])
            for ref_population_slug in project.get_reference_population_slugs():
                fields.append(variant.annotation['freqs'][ref_population_slug])
            for field_key in ['polyphen', 'sift', 'muttaster', 'fathmm']:
                fields.append(variant.annotation[field_key])

            for indiv_id in indiv_id_list:
                variant = individual_id_to_variant.get(indiv_id)                    
                genotype = None
                if variant is not None:
                    genotype = variant.get_genotype(indiv_id)

                if genotype is None:
                    fields.extend(['.', '.', '.'])
                else:
                    if genotype.num_alt == 0:
                        fields.append("%s/%s" % (variant.ref, variant.ref))
                    elif genotype.num_alt == 1:
                        fields.append("%s/%s" % (variant.ref, variant.alt))
                    elif genotype.num_alt == 2:
                        fields.append("%s/%s" % (variant.alt, variant.alt))
                    else:
                        fields.append("./.")

                    fields.append(str(genotype.gq) if genotype.gq is not None else '.')
                    fields.append(genotype.extras['dp'] if genotype.extras.get('dp') is not None else '.')    
            writer.writerow(fields)
        return response        
コード例 #11
0
ファイル: utils.py プロジェクト: macarthur-lab/seqr
def calculate_mendelian_variant_search(search_spec, family, user=None):
    xfamily = family.xfamily()
    project = family.project
    variants = None
    if search_spec.search_mode == 'standard_inheritance':
        variants = list(get_variants_with_inheritance_mode(
            get_mall(project),
            xfamily,
            search_spec.inheritance_mode,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.quality_filter,
            user=user,
        ))

    elif search_spec.search_mode == 'custom_inheritance':
        variants = list(get_variants_family(
            get_datastore(project),
            xfamily,
            genotype_filter=search_spec.genotype_inheritance_filter,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.quality_filter,
            user=user,
        ))

    elif search_spec.search_mode == 'gene_burden':
        gene_stream = get_genes_family(
            get_datastore(project),
            get_reference(),
            xfamily,
            burden_filter=search_spec.gene_burden_filter,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.quality_filter,
            user=user,
        )

        variants = list(stream_utils.gene_stream_to_variant_stream(gene_stream, get_reference()))

    elif search_spec.search_mode == 'allele_count':
        variants = list(get_variants_allele_count(
            get_datastore(project),
            xfamily,
            search_spec.allele_count_filter,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.quality_filter,
            user=user,
        ))

    elif search_spec.search_mode == 'all_variants':
        variants = list(get_variants_family(
            get_datastore(project),
            xfamily,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.quality_filter,
            indivs_to_consider=xfamily.indiv_id_list(),
            user=user,
        ))

    for variant in variants:
        variant.set_extra('family_id', family.family_id)

    return variants
コード例 #12
0
def calculate_mendelian_variant_search(search_spec, xfamily):
    sys.stderr.write((
        "mendelian_variant_search for %s - search mode: %s \n"
        "variant_filter: %s \ninheritance_mode: %s \nallele_count_filter: %s \nquality_filter: %s \ngenotype_inheritance_filter: %s \n"
    ) % (xfamily.project_id, search_spec.search_mode,
         search_spec.variant_filter.toJSON() if search_spec.variant_filter else
         '', search_spec.inheritance_mode, search_spec.allele_count_filter,
         search_spec.quality_filter, search_spec.genotype_inheritance_filter))

    variants = None
    if search_spec.search_mode == 'standard_inheritance':
        variants = list(
            get_variants_with_inheritance_mode(
                get_mall(xfamily.project_id),
                xfamily,
                search_spec.inheritance_mode,
                variant_filter=search_spec.variant_filter,
                quality_filter=search_spec.quality_filter,
            ))

    elif search_spec.search_mode == 'custom_inheritance':
        variants = list(
            get_variants_family(
                get_datastore(xfamily.project_id),
                xfamily,
                genotype_filter=search_spec.genotype_inheritance_filter,
                variant_filter=search_spec.variant_filter,
                quality_filter=search_spec.quality_filter,
            ))

    elif search_spec.search_mode == 'gene_burden':
        gene_stream = get_genes_family(
            get_datastore(xfamily.project_id),
            get_reference(),
            xfamily,
            burden_filter=search_spec.gene_burden_filter,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.quality_filter,
        )

        variants = list(
            stream_utils.gene_stream_to_variant_stream(gene_stream,
                                                       get_reference()))

    elif search_spec.search_mode == 'allele_count':
        variants = list(
            get_variants_allele_count(
                get_datastore(xfamily.project_id),
                xfamily,
                search_spec.allele_count_filter,
                variant_filter=search_spec.variant_filter,
                quality_filter=search_spec.quality_filter,
            ))

    elif search_spec.search_mode == 'all_variants':
        variants = list(
            get_variants_family(
                get_datastore(xfamily.project_id),
                xfamily,
                variant_filter=search_spec.variant_filter,
                quality_filter=search_spec.quality_filter,
                indivs_to_consider=xfamily.indiv_id_list(),
            ))

    return variants
コード例 #13
0
def calculate_mendelian_variant_search(search_spec, xfamily):
    sys.stderr.write("     cohort_variant_search - inheritance_mode: %s" %
                     search_spec.inheritance_mode)

    variants = None
    if search_spec.search_mode == 'standard_inheritance':

        variants = list(
            get_variants_with_inheritance_mode(
                get_mall(xfamily.project_id),
                xfamily,
                search_spec.inheritance_mode,
                variant_filter=search_spec.variant_filter,
                quality_filter=search_spec.genotype_quality_filter,
            ))

    elif search_spec.search_mode == 'custom_inheritance':

        variants = list(
            get_variants_family(
                get_datastore(xfamily.project_id),
                xfamily,
                genotype_filter=search_spec.genotype_inheritance_filter,
                variant_filter=search_spec.variant_filter,
                quality_filter=search_spec.genotype_quality_filter,
            ))

    elif search_spec.search_mode == 'gene_burden':

        gene_stream = get_genes_family(
            get_datastore(xfamily.project_id),
            get_reference(),
            xfamily,
            burden_filter=search_spec.gene_burden_filter,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.genotype_quality_filter,
        )

        variants = list(
            stream_utils.gene_stream_to_variant_stream(gene_stream,
                                                       get_reference()))

    elif search_spec.search_mode == 'allele_count':

        variants = list(
            get_variants_allele_count(
                get_datastore(xfamily.project_id),
                xfamily,
                search_spec.allele_count_filter,
                variant_filter=search_spec.variant_filter,
                quality_filter=search_spec.genotype_quality_filter,
            ))

    elif search_spec.search_mode == 'all_variants':
        variants = list(
            get_variants_family(
                get_datastore(xfamily.project_id),
                xfamily,
                variant_filter=search_spec.variant_filter,
                quality_filter=search_spec.genotype_quality_filter,
            ))

    return variants
コード例 #14
0
def calculate_mendelian_variant_search(search_spec, family, user=None):
    xfamily = family.xfamily()
    project = family.project
    variants = None
    if search_spec.search_mode == 'standard_inheritance':
        variants = list(
            get_variants_with_inheritance_mode(
                get_mall(project),
                xfamily,
                search_spec.inheritance_mode,
                variant_filter=search_spec.variant_filter,
                quality_filter=search_spec.quality_filter,
                user=user,
            ))

    elif search_spec.search_mode == 'custom_inheritance':
        variants = list(
            get_variants_family(
                get_datastore(project),
                xfamily,
                genotype_filter=search_spec.genotype_inheritance_filter,
                variant_filter=search_spec.variant_filter,
                quality_filter=search_spec.quality_filter,
                user=user,
            ))

    elif search_spec.search_mode == 'gene_burden':
        gene_stream = get_genes_family(
            get_datastore(project),
            get_reference(),
            xfamily,
            burden_filter=search_spec.gene_burden_filter,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.quality_filter,
            user=user,
        )

        variants = list(
            stream_utils.gene_stream_to_variant_stream(gene_stream,
                                                       get_reference()))

    elif search_spec.search_mode == 'allele_count':
        variants = list(
            get_variants_allele_count(
                get_datastore(project),
                xfamily,
                search_spec.allele_count_filter,
                variant_filter=search_spec.variant_filter,
                quality_filter=search_spec.quality_filter,
                user=user,
            ))

    elif search_spec.search_mode == 'all_variants':
        variants = list(
            get_variants_family(
                get_datastore(project),
                xfamily,
                variant_filter=search_spec.variant_filter,
                quality_filter=search_spec.quality_filter,
                indivs_to_consider=xfamily.indiv_id_list(),
                user=user,
            ))

    for variant in variants:
        variant.set_extra('family_id', family.family_id)

    return variants
コード例 #15
0
    def handle(self, *args, **options):

        project_id = args[0]
        inheritance_mode = args[1]
        fam_list_file_path = args[2]

        project = Project.objects.get(project_id=project_id)
        families = []
        for line in open(fam_list_file_path):
            family_id = line.strip('\n')
            families.append(Family.objects.get(project=project, family_id=family_id))


        # create search spec
        variant_filter = next(f for f in project.get_default_variant_filters() if f['slug'] == 'moderate_impact')['variant_filter']
        quality_filter = {
            'min_gq': 20,
            'min_ab': 25,
        }

        # run MendelianVariantSearch for each family, collect results
        family_results = {}
        for family in families:
            family_results[family] = list(get_variants_with_inheritance_mode(
                get_mall(project_id),
                family.xfamily(),
                inheritance_mode,
                variant_filter=variant_filter,
                quality_filter=quality_filter,
            ))

        # create family_variants.tsv
        f = open('family_variants.tsv', 'w')
        writer = csv.writer(f, dialect='excel', delimiter='\t')
        writer.writerow([
            '#family_id',
            'gene',
            'chrom',
            'ref',
            'alt',
            'rsid',
            'annotation',
        ])
        for family in families:
            for variant in family_results[family]:
                writer.writerow([
                    family.family_id,
                    get_gene_symbol(variant),
                    variant.chr,
                    variant.ref,
                    variant.alt,
                    variant.vcf_id,
                    variant.annotation['vep_group'],
                ])
        f.close()

        # create variants.tsv
        by_variant = {}
        variant_info = {}
        for family in families:
            for variant in family_results[family]:
                if variant.unique_tuple() not in by_variant:
                    by_variant[variant.unique_tuple()] = set()
                    variant_info[variant.unique_tuple()] = variant
                by_variant[variant.unique_tuple()].add(family.family_id)
        f = open('variants.tsv', 'w')
        writer = csv.writer(f, dialect='excel', delimiter='\t')
        headers = [
            '#chrom',
            'ref',
            'alt',
            'rsid',
            'gene'
            'annotation',
            'num_families',
        ]
        headers.extend([fam.family_id for fam in families])
        writer.writerow(headers)
        for variant_t in sorted(variant_info.keys()):
            variant = variant_info[variant_t]
            fields = [
                variant.chr,
                variant.ref,
                variant.alt,
                variant.vcf_id,
                get_gene_symbol(variant_info[variant_t]),
                variant.annotation['vep_group'],
                str(len(by_variant[variant_t])),
            ]
            for family in families:
                fields.append('1' if family.family_id in by_variant[variant_t] else '0')
            writer.writerow(fields)
        f.close()

        # create genes.tsv
        by_gene = {}
        for family in families:
            for variant in family_results[family]:
                gene_symbol = get_gene_symbol(variant)
                if gene_symbol not in by_gene:
                    by_gene[gene_symbol] = set()
                by_gene[gene_symbol].add(family.family_id)

        f = open('genes.tsv', 'w')
        writer = csv.writer(f, dialect='excel', delimiter='\t')
        headers = [
            '#gene',
            'num_families',
        ]
        headers.extend([fam.family_id for fam in families])
        writer.writerow(headers)
        for gene_symbol in sorted(by_gene.keys()):
            fields = [
                gene_symbol,
                str(len(by_gene[gene_symbol])),
            ]
            for family in families:
                fields.append('1' if family.family_id in by_gene[gene_symbol] else '0')
            writer.writerow(fields)
        f.close()
コード例 #16
0
    def handle(self, *args, **options):

        project_id = args[0]
        inheritance_mode = args[1]
        fam_list_file_path = args[2]

        project = Project.objects.get(project_id=project_id)
        families = []
        for line in open(fam_list_file_path):
            family_id = line.strip('\n')
            families.append(
                Family.objects.get(project=project, family_id=family_id))

        # create search spec
        variant_filter = next(
            f for f in project.get_default_variant_filters()
            if f['slug'] == 'moderate_impact')['variant_filter']
        quality_filter = {
            'min_gq': 30,
            'min_ab': 25,
        }

        # run MendelianVariantSearch for each family, collect results
        family_results = {}
        for family in families:
            family_results[family] = list(
                get_variants_with_inheritance_mode(
                    get_mall(project_id),
                    family.xfamily(),
                    inheritance_mode,
                    variant_filter=variant_filter,
                    quality_filter=quality_filter,
                ))

        # create family_variants.tsv
        f = open('family_variants.tsv', 'w')
        writer = csv.writer(f, dialect='excel', delimiter='\t')
        writer.writerow([
            '#family_id',
            'gene',
            'chrom',
            'ref',
            'alt',
            'rsid',
            'annotation',
        ])
        for family in families:
            for variant in family_results[family]:
                writer.writerow([
                    family.family_id,
                    get_gene_symbol(variant),
                    variant.chr,
                    variant.ref,
                    variant.alt,
                    variant.vcf_id,
                    variant.annotation['vep_group'],
                ])
        f.close()

        # create variants.tsv
        by_variant = {}
        variant_info = {}
        for family in families:
            for variant in family_results[family]:
                if variant.unique_tuple() not in by_variant:
                    by_variant[variant.unique_tuple()] = set()
                    variant_info[variant.unique_tuple()] = variant
                by_variant[variant.unique_tuple()].add(family.family_id)
        f = open('variants.tsv', 'w')
        writer = csv.writer(f, dialect='excel', delimiter='\t')
        headers = [
            '#chrom',
            'ref',
            'alt',
            'rsid',
            'gene'
            'annotation',
            'num_families',
        ]
        headers.extend([fam.family_id for fam in families])
        writer.writerow(headers)
        for variant_t in sorted(variant_info.keys()):
            variant = variant_info[variant_t]
            fields = [
                variant.chr,
                variant.ref,
                variant.alt,
                variant.vcf_id,
                get_gene_symbol(variant_info[variant_t]),
                variant.annotation['vep_group'],
                str(len(by_variant[variant_t])),
            ]
            for family in families:
                fields.append('1' if family.family_id in
                              by_variant[variant_t] else '0')
            writer.writerow(fields)
        f.close()

        # create genes.tsv
        by_gene = {}
        for family in families:
            for variant in family_results[family]:
                gene_symbol = get_gene_symbol(variant)
                if gene_symbol not in by_gene:
                    by_gene[gene_symbol] = set()
                by_gene[gene_symbol].add(family.family_id)

        f = open('genes.tsv', 'w')
        writer = csv.writer(f, dialect='excel', delimiter='\t')
        headers = [
            '#gene',
            'num_families',
        ]
        headers.extend([fam.family_id for fam in families])
        writer.writerow(headers)
        for gene_symbol in sorted(by_gene.keys()):
            fields = [
                gene_symbol,
                str(len(by_gene[gene_symbol])),
            ]
            for family in families:
                fields.append('1' if family.family_id in
                              by_gene[gene_symbol] else '0')
            writer.writerow(fields)
        f.close()
コード例 #17
0
ファイル: views.py プロジェクト: rpete/seqr
def combine_mendelian_families_spec(request):

    project, family_group = utils.get_project_and_family_group_for_user(request.user, request.GET)
    if not project.can_view(request.user):
        raise PermissionDenied

    search_hash = request.GET.get('search_hash')
    search_spec, genes = cache_utils.get_cached_results(project.project_id, search_hash)
    search_spec_obj = MendelianVariantSearchSpec.fromJSON(search_spec)

    if request.GET.get('return_type') != 'csv' or not request.GET.get('group_by_variants'):
        if genes is None:
            genes = api_utils.calculate_combine_mendelian_families(family_group, search_spec)
        api_utils.add_extra_info_to_genes(project, get_reference(), genes)
    
        if request.GET.get('return_type') != 'csv':
            return JSONResponse({
                    'is_error': False,
                    'genes': genes,
                    'search_spec': search_spec,
                    })
        else:
            response = HttpResponse(content_type='text/csv')
            response['Content-Disposition'] = 'attachment; filename="family_group_results_{}.csv"'.format(search_hash)
            writer = csv.writer(response)
            writer.writerow(["gene", "# families", "family list", "chrom", "start", "end"])
            for gene in genes:
                family_id_list = [family_id for (project_id, family_id) in gene["family_id_list"]]
                writer.writerow(map(str, [gene["gene_name"], len(family_id_list), " ".join(family_id_list), gene["chr"], gene["start"], gene["end"], ""]))
            return response
    else:
        # download results grouped by variant
        indiv_id_list = []
        for family in family_group.get_families():
            indiv_id_list.extend(family.indiv_ids_with_variant_data())

        response = HttpResponse(content_type='text/csv')
        response['Content-Disposition'] = 'attachment; filename="results_{}.csv"'.format(search_hash)
        writer = csv.writer(response)
        
        headers = ['genes','chr','pos','ref','alt','worst_annotation' ]
        headers.extend(project.get_reference_population_slugs())
        headers.extend([ 'polyphen','sift','muttaster','fathmm'])
        for indiv_id in indiv_id_list:
            headers.append(indiv_id)
            headers.append(indiv_id+'_gq')
            headers.append(indiv_id+'_dp')
        
        writer.writerow(headers)

        mall = get_mall(project.project_id)
        variant_key_to_individual_id_to_variant = defaultdict(dict)
        variant_key_to_variant = {}
        for family in family_group.get_families():
            for variant in get_variants_with_inheritance_mode(
                mall,
                family.xfamily(),
                search_spec_obj.inheritance_mode,
                search_spec_obj.variant_filter,
                search_spec_obj.quality_filter,
                ):
                if len(variant.coding_gene_ids) == 0:
                    continue

                variant_key = (variant.xpos, variant.ref, variant.alt)
                variant_key_to_variant[variant_key] = variant
                for indiv_id in family.indiv_ids_with_variant_data():
                    variant_key_to_individual_id_to_variant[variant_key][indiv_id] = variant
                    
        for variant_key in sorted(variant_key_to_individual_id_to_variant.keys()):
            variant = variant_key_to_variant[variant_key]
            individual_id_to_variant = variant_key_to_individual_id_to_variant[variant_key]

            genes = [mall.reference.get_gene_symbol(gene_id) for gene_id in variant.coding_gene_ids]
            fields = []
            fields.append(','.join(genes))
            fields.extend([
                        variant.chr,
                        str(variant.pos),
                        variant.ref,
                        variant.alt,
                        variant.annotation.get('vep_group', '.'),
                        ])
            for ref_population_slug in project.get_reference_population_slugs():
                fields.append(variant.annotation['freqs'][ref_population_slug])
            for field_key in ['polyphen', 'sift', 'muttaster', 'fathmm']:
                fields.append(variant.annotation[field_key])

            for indiv_id in indiv_id_list:
                variant = individual_id_to_variant.get(indiv_id)                    
                genotype = None
                if variant is not None:
                    genotype = variant.get_genotype(indiv_id)

                if genotype is None:
                    fields.extend(['.', '.', '.'])
                else:
                    if genotype.num_alt == 0:
                        fields.append("%s/%s" % (variant.ref, variant.ref))
                    elif genotype.num_alt == 1:
                        fields.append("%s/%s" % (variant.ref, variant.alt))
                    elif genotype.num_alt == 2:
                        fields.append("%s/%s" % (variant.alt, variant.alt))
                    else:
                        fields.append("./.")

                    fields.append(str(genotype.gq) if genotype.gq is not None else '.')
                    fields.append(genotype.extras['dp'] if genotype.extras.get('dp') is not None else '.')    
            writer.writerow(fields)
        return response