Esempio n. 1
0
def get_variants_in_gene(family_group,
                         gene_id,
                         variant_filter=None,
                         quality_filter=None):
    """

    """
    variants_by_family = []
    for family in family_group.get_families():
        variant_list = list(
            get_mall(family.project).variant_store.get_variants_in_gene(
                family.project.project_id,
                family.family_id,
                gene_id,
                variant_filter=variant_filter))
        variant_list = search_utils.filter_gene_variants_by_variant_filter(
            variant_list, gene_id, variant_filter)
        add_extra_info_to_variants_project(get_reference(),
                                           family.project,
                                           variant_list,
                                           add_family_tags=True,
                                           add_populations=True)
        variants_by_family.append({
            'variants': [v.toJSON() for v in variant_list],
            'family_id':
            family.family_id,
            'project_id':
            family.project.project_id,
            'family_name':
            str(family),
        })
    return variants_by_family
Esempio n. 2
0
def saved_variants(request, project_id, family_id):

    project = get_object_or_404(Project, project_id=project_id)
    family = get_object_or_404(Family, project=project, family_id=family_id)
    if not project.can_view(request.user):
        raise PermissionDenied

    variants, couldntfind = get_saved_variants_for_family(family)

    # TODO: first this shouldnt be in API - base should never depend on api
    # TODO: also this should have better naming
    add_extra_info_to_variants_project(get_reference(),
                                       project,
                                       variants,
                                       add_family_tags=True,
                                       add_populations=True)

    return render(
        request, 'family/saved_family_variants.html', {
            'project':
            project,
            'family':
            family,
            'variants_json':
            json.dumps([v.toJSON() for v in variants]),
            'new_page_url':
            '/project/{0}/family_page/{1}'.format(
                family.seqr_family.project.guid, family.seqr_family.guid)
            if family.seqr_family else None,
        })
Esempio n. 3
0
def deprecated_retrieve_saved_variants_json(project, variant_tuples,
                                            create_if_missing):
    project_id = project.deprecated_project_id
    xbrowse_project = BaseProject.objects.get(project_id=project_id)
    user = User.objects.filter(is_staff=True).first(
    )  # HGMD annotations are only returned for staff users

    variants = get_variants_from_variant_tuples(xbrowse_project,
                                                variant_tuples,
                                                user=user)
    if not create_if_missing:
        variants = [
            var for var in variants if not var.get_extra('created_variant')
        ]
    add_extra_info_to_variants_project(get_reference(),
                                       xbrowse_project,
                                       variants,
                                       add_populations=True)

    family_guids_by_id = {
        f.family_id: f.guid
        for f in Family.objects.filter(project=project)
    }
    individual_guids_by_id = {
        i.individual_id: i.guid
        for i in Individual.objects.filter(family__project=project)
    }
    return [
        _variant_details(variant.toJSON(), family_guids_by_id,
                         individual_guids_by_id) for variant in variants
    ]
Esempio n. 4
0
def family_variant_view(request, project_id, family_id):

    project = get_object_or_404(Project, project_id=project_id)
    family = get_object_or_404(Family, project=project, family_id=family_id)
    if not project.can_view(request.user):
        raise PermissionDenied

    try:
        xpos = int(request.GET.get('xpos'))
        ref = request.GET.get('ref')
        alt = request.GET.get('alt')
    except:
        return HttpResponse('Invalid View')

    variant = get_datastore(project).get_single_variant(
        project_id, family_id, xpos, ref, alt)
    add_extra_info_to_variants_project(get_reference(),
                                       project, [variant],
                                       add_family_tags=True,
                                       add_populations=True)

    return render(
        request, 'family/family_variant_view.html', {
            'project': project,
            'family': family,
            'variant_json': json.dumps(variant.toJSON()),
        })
Esempio n. 5
0
def causal_variants(request, project_id):

    project = get_object_or_404(Project, project_id=project_id)
    if not project.can_view(request.user):
        raise PermissionDenied

    variants = get_causal_variants_for_project(project)
    add_extra_info_to_variants_project(get_reference(),
                                       project,
                                       variants,
                                       add_family_tags=True,
                                       add_populations=True)

    return render(
        request, 'project/causal_variants.html', {
            'project':
            project,
            'variants_json':
            json.dumps([v.toJSON() for v in variants]),
            'families_json':
            json.dumps({
                family.family_id: family.get_json_obj()
                for family in project.get_families()
            })
        })
Esempio n. 6
0
def _set_saved_variant_json(new_variant_tag_or_note, source_variant_tag_or_note, new_family):
    if new_family is None:
        return

    project_id = new_family.project.deprecated_project_id
    project = Project.objects.get(project_id=project_id)
    try:
        variant_info = get_datastore(project).get_single_variant(
            project_id,
            new_family.family_id,
            source_variant_tag_or_note.xpos,
            source_variant_tag_or_note.ref,
            source_variant_tag_or_note.alt)
    except Exception as e:
        logger.error("Unable to retrieve variant annotations for %s %s: %s" % (
            new_family, source_variant_tag_or_note, e))
        return

    if variant_info:
        add_extra_info_to_variants_project(get_reference(), project, [variant_info], add_family_tags=True,
                                           add_populations=True)
        variant_json = variant_info.toJSON()

        new_variant_tag_or_note.saved_variant_json = json.dumps(variant_json)
        new_variant_tag_or_note.save()
Esempio n. 7
0
def _deprecated_retrieve_saved_variants_json(project, variant_tuples, create_if_missing):
    project_id = project.deprecated_project_id
    xbrowse_project = BaseProject.objects.get(project_id=project_id)
    user = User.objects.filter(is_staff=True).first()  # HGMD annotations are only returned for staff users

    variants = get_variants_from_variant_tuples(xbrowse_project, variant_tuples, user=user)
    if not create_if_missing:
        variants = [var for var in variants if not var.get_extra('created_variant')]
    add_extra_info_to_variants_project(get_reference(), xbrowse_project, variants, add_populations=True)
    return [variant.toJSON() for variant in variants]
Esempio n. 8
0
def _deprecated_retrieve_saved_variants_json(project, variant_tuples, create_if_missing):
    project_id = project.deprecated_project_id
    xbrowse_project = BaseProject.objects.get(project_id=project_id)
    user = User.objects.filter(is_staff=True).first()  # HGMD annotations are only returned for staff users

    variants = get_variants_from_variant_tuples(xbrowse_project, variant_tuples, user=user)
    if not create_if_missing:
        variants = [var for var in variants if not var.get_extra('created_variant')]
    add_extra_info_to_variants_project(get_reference(), xbrowse_project, variants, add_populations=True)
    return [variant.toJSON() for variant in variants]
Esempio n. 9
0
def gene_quicklook(request, project_id, gene_id):
    """
    Summary of a gene in a project
    """
    project = get_object_or_404(Project, project_id=project_id)
    if not project.can_view(request.user):
        return HttpResponse("Unauthorized")
    gene_id = get_gene_id_from_str(gene_id, get_reference())
    gene = get_reference().get_gene(gene_id)
    sys.stderr.write(project_id + " - staring gene search for: %s %s \n" %
                     (gene_id, gene))
    variant_filter = get_default_variant_filter(
        'all_coding',
        mall.get_annotator().reference_population_slugs)
    num_indivs = len(
        [i for i in project.get_individuals() if i.has_variant_data()])
    aac_threshold = (.2 * num_indivs) + 5
    rare_variants = []
    for variant in project_analysis.get_variants_in_gene(
            project, gene_id, variant_filter=variant_filter):
        aac = get_alt_allele_count(variant)
        max_af = max(variant.annotation['freqs'].values())
        if aac <= aac_threshold and max_af < .01:
            rare_variants.append(variant)

    add_extra_info_to_variants_project(get_reference(), project, rare_variants)

    knockouts = []
    knockout_ids, variation = get_knockouts_in_gene(project, gene_id)
    for kid in knockout_ids:
        variants = variation.get_relevant_variants_for_indiv_ids([kid])
        add_extra_info_to_variants_project(get_reference(), project, variants)
        knockouts.append({
            'indiv_id': kid,
            'variants': [v.toJSON() for v in variants],
        })

    sys.stderr.write("Retrieved %s variants \n" % len(rare_variants))
    return render(
        request, 'project/gene_quicklook.html', {
            'gene':
            gene,
            'gene_json':
            json.dumps(gene),
            'project':
            project,
            'rare_variants_json':
            json.dumps([v.toJSON() for v in rare_variants]),
            'individuals_json':
            json.dumps([i.get_json_obj() for i in project.get_individuals()]),
            'knockouts_json':
            json.dumps(knockouts),
        })
Esempio n. 10
0
def causal_variants(request, project_id):

    project = get_object_or_404(Project, project_id=project_id)
    if not project.can_view(request.user):
        raise PermissionDenied

    variants = get_causal_variants_for_project(project)
    add_extra_info_to_variants_project(get_reference(), project, variants, add_family_tags=True, add_populations=True)

    return render(request, 'project/causal_variants.html', {
        'project': project,
        'variants_json': json.dumps([v.toJSON() for v in variants]),
        'families_json': json.dumps({family.family_id: family.get_json_obj() for family in project.get_families()})
    })
Esempio n. 11
0
def saved_variants(request, project_id, family_id):

    project = get_object_or_404(Project, project_id=project_id)
    family = get_object_or_404(Family, project=project, family_id=family_id)
    if not project.can_view(request.user):
        raise PermissionDenied

    variants, couldntfind = get_saved_variants_for_family(family)

    # TODO: first this shouldnt be in API - base should never depend on api
    # TODO: also this should have better naming
    add_extra_info_to_variants_project(get_reference(), project, variants, add_family_tags=True, add_populations=True)

    return render(request, 'family/saved_family_variants.html', {
        'project': project,
        'family': family,
        'variants_json': json.dumps([v.toJSON() for v in variants]),
        'new_page_url': '/project/{0}/family_page/{1}'.format(
                family.seqr_family.project.guid, family.seqr_family.guid) if family.seqr_family else None,
    })
Esempio n. 12
0
def gene_quicklook(request, project_id, gene_id):
    """
    Summary of a gene in a project
    """
    project = get_object_or_404(Project, project_id=project_id)
    if not project.can_view(request.user):
        return HttpResponse("Unauthorized")
    gene_id = get_gene_id_from_str(gene_id, get_reference())
    gene = get_reference().get_gene(gene_id)
    sys.stderr.write(project_id + " - staring gene search for: %s %s \n" % (gene_id, gene))
    variant_filter = get_default_variant_filter('all_coding', mall.get_annotator().reference_population_slugs)
    num_indivs = len([i for i in project.get_individuals() if i.has_variant_data()])
    aac_threshold = (.2 * num_indivs) + 5
    rare_variants = []
    for variant in project_analysis.get_variants_in_gene(project, gene_id, variant_filter=variant_filter):
        aac = get_alt_allele_count(variant)
        max_af = max(variant.annotation['freqs'].values())
        if aac <= aac_threshold and max_af < .01:
            rare_variants.append(variant)

    add_extra_info_to_variants_project(get_reference(), project, rare_variants)

    knockouts = []
    knockout_ids, variation = get_knockouts_in_gene(project, gene_id)
    for kid in knockout_ids:
        variants = variation.get_relevant_variants_for_indiv_ids([kid])
        add_extra_info_to_variants_project(get_reference(), project, variants)
        knockouts.append({
            'indiv_id': kid,
            'variants': [v.toJSON() for v in variants],
        })

    sys.stderr.write("Retrieved %s variants \n" % len(rare_variants))
    return render(request, 'project/gene_quicklook.html', {
        'gene': gene,
        'gene_json': json.dumps(gene),
        'project': project,
        'rare_variants_json': json.dumps([v.toJSON() for v in rare_variants]),
        'individuals_json': json.dumps([i.get_json_obj() for i in project.get_individuals()]),
        'knockouts_json': json.dumps(knockouts),
    })
Esempio n. 13
0
def get_variants_in_gene(family_group, gene_id, variant_filter=None, quality_filter=None):
    """

    """
    variants_by_family = []
    for family in family_group.get_families():
        variant_list = list(get_mall(family.project).variant_store.get_variants_in_gene(
            family.project.project_id,
            family.family_id,
            gene_id,
            variant_filter=variant_filter
        ))
        variant_list = search_utils.filter_gene_variants_by_variant_filter(variant_list, gene_id, variant_filter)
        add_extra_info_to_variants_project(get_reference(), family.project, variant_list, add_family_tags=True, add_populations=True)
        variants_by_family.append({
            'variants': [v.toJSON() for v in variant_list],
            'family_id': family.family_id,
            'project_id': family.project.project_id,
            'family_name': str(family),
        })
    return variants_by_family
Esempio n. 14
0
def variants_with_tag(request, project_id, tag=None):

    project = get_object_or_404(Project, project_id=project_id)
    if not project.can_view(request.user):
        raise PermissionDenied

    requested_family_id = request.GET.get('family')
    if tag:
        tag = urllib.unquote(tag)
        variants = get_variants_by_tag(project, tag, family_id=requested_family_id)
    else:
        variants = get_all_saved_variants_for_project(project, family_id=requested_family_id, user=request.user)
    add_extra_info_to_variants_project(get_reference(), project, variants, add_family_tags=True, add_populations=True)
    variants.sort(key=lambda var: var.xpos)

    if request.GET.get('download', ''):
        response = HttpResponse(content_type='text/csv')
        response['Content-Disposition'] = 'attachment; filename="{}_{}.csv"'.format(project_id, tag)

        header_fields = [
            "chrom", "pos", "ref", "alt",  "tags", "notes", "family", "gene", "effect",
            "1kg_wgs_phase3", "1kg_wgs_phase3_popmax", "exac_v3", "exac_v3_popmax",
            "gnomad_exomes", "gnomad_exomes_popmax", "gnomad_genomes", "gnomad_genomes_popmax",
            "sift", "polyphen", "hgvsc", "hgvsp"
        ]

        genotype_header_fields = ['sample_id', 'GT_genotype', 'filter', 'AD_allele_depth', 'DP_read_depth', 'GQ_genotype_quality', 'AB_allele_balance']
        for i in range(0, 10):
            for h in genotype_header_fields:
                header_fields.append("%s_%d" % (h, i))

        writer = csv.writer(response)
        writer.writerow(header_fields)
        for variant in variants:
            if not (variant and variant.annotation and (variant.annotation.get('main_transcript') or variant.annotation.get("vep_annotation"))):
                continue

            worst_annotation_idx = variant.annotation["worst_vep_annotation_index"]
            worst_annotation = variant.annotation.get('main_transcript') or variant.annotation["vep_annotation"][worst_annotation_idx]

            family_id = variant.extras["family_id"]
            family = Family.objects.get(project=project, family_id=family_id)

            genotype_values = []
            for individual in family.get_individuals():
                genotype_values.append(individual.indiv_id)
                genotype = variant.get_genotype(individual.indiv_id)
                genotype_values.append("/".join(genotype.alleles) if genotype and genotype.alleles else "./.")
                genotype_values.append(genotype.filter if genotype else "")
                genotype_values.append(genotype.extras["ad"] if genotype else "")
                genotype_values.append(genotype.extras["dp"] if genotype else "")
                genotype_values.append(genotype.gq if genotype and genotype.gq is not None else "")
                genotype_values.append(genotype.ab if genotype and genotype.ab is not None else "")


            row = [
                variant.chr,
                variant.pos,
                variant.ref,
                variant.alt,
                "|".join([tag['tag'] for tag in variant.extras['family_tags']]) if 'family_tags' in variant.extras else '',

                "|".join([note['user']['display_name'] +":"+ note['note'] for note in variant.extras['family_notes']]) if 'family_notes' in variant.extras else '',

                variant.extras["family_id"],
                worst_annotation["gene_symbol"],
                variant.annotation.get("vep_consequence") or "",

                variant.annotation["freqs"].get("1kg_wgs_phase3") or variant.annotation["freqs"].get("1kg_wgs_AF") or "",
                variant.annotation["freqs"].get("1kg_wgs_phase3_popmax") or variant.annotation["freqs"].get("1kg_wgs_popmax_AF") or "",
                variant.annotation["freqs"].get("exac_v3") or variant.annotation["freqs"].get("exac_v3_AF") or "",
                variant.annotation["freqs"].get("exac_v3_popmax") or variant.annotation["freqs"].get("exac_v3_popmax_AF") or "",
                variant.annotation["freqs"].get("gnomad_exomes_AF") or "",
                variant.annotation["freqs"].get("gnomad_exomes_popmax_AF") or "",
                variant.annotation["freqs"].get("gnomad_genomes_AF") or "",
                variant.annotation["freqs"].get("gnomad_genomes_popmax_AF") or "",
                worst_annotation.get("sift") or "",
                worst_annotation.get("polyphen") or "",
                worst_annotation.get("hgvsc") or "",
                (worst_annotation.get("hgvsp") or "").replace("%3D", "="),
            ] + genotype_values
            writer.writerow(map(lambda s: unicode(s).encode('UTF-8'), row))

        return response
    else:
        family_ids = {variant.extras['family_id'] for variant in variants}
        families = get_filtered_families(filters={'project': project, 'family_id__in': family_ids}, fields=['family_id'])

        new_page_url = None
        if project.seqr_project:
            new_page_url = '/project/{}/saved_variants'.format(project.seqr_project.guid)
            if requested_family_id:
                family = project.seqr_project.family_set.get(family_id=requested_family_id)
                new_page_url += '/family/{}'.format(family.guid)
            if tag:
                new_page_url += '/{}'.format(tag)

        return render(request, 'project/saved_variants.html', {
            'project': project,
            'tag': tag,
            'new_page_url': new_page_url,
            'variants_json': json.dumps([v.toJSON() for v in variants]),
            'families_json': json.dumps({family.family_id: {
                'project_id': project.project_id,
                'family_id': family.family_id,
                'individuals': family.get_individuals_json(project_id=project.project_id)
            } for family in families})
    })
Esempio n. 15
0
def gene_quicklook(request, project_id, gene_id):
    """
    Summary of a gene in a project
    """
    project = get_object_or_404(Project, project_id=project_id)
    if not project.can_view(request.user):
        return HttpResponse("Unauthorized")

    if project.project_status == Project.NEEDS_MORE_PHENOTYPES and not request.user.is_staff:
        return render(request, 'analysis_unavailable.html',
                      {'reason': 'Awaiting phenotype data.'})

    # other projects this user can view
    if request.user.is_staff:
        other_projects = [p for p in Project.objects.all()]  #  if p != project
    else:
        other_projects = [
            c.project
            for c in ProjectCollaborator.objects.filter(user=request.user)
        ]  # if c.project != project

    other_projects = filter(
        lambda p: get_project_datastore(p.project_id).
        project_collection_is_loaded(p.project_id), other_projects)

    if other_projects:
        other_projects_json = json.dumps([{
            'project_id': p.project_id,
            'project_name': p.project_name
        } for p in sorted(other_projects, key=lambda p: p.project_id)])
    else:
        other_projects_json = None

    if gene_id is None:
        return render(
            request, 'project/gene_quicklook.html', {
                'project': project,
                'gene': None,
                'gene_json': None,
                'rare_variants_json': None,
                'individuals_json': None,
                'knockouts_json': None,
                'other_projects_json': other_projects_json,
            })

    projects_to_search_param = request.GET.get('selected_projects')
    if projects_to_search_param:
        projects_to_search = []
        project_ids = projects_to_search_param.split(",")
        for project_id in project_ids:
            project = get_object_or_404(Project, project_id=project_id)
            if not project.can_view(request.user):
                return HttpResponse("Unauthorized")
            projects_to_search.append(project)
    else:
        projects_to_search = [project]

    gene_id = get_gene_id_from_str(gene_id, get_reference())
    gene = get_reference().get_gene(gene_id)
    sys.stderr.write(
        project_id + " - staring gene search for: %s in projects: %s\n" %
        (gene_id, ",".join([p.project_id for p in projects_to_search]) + "\n"))

    # all rare coding variants
    variant_filter = get_default_variant_filter(
        'all_coding',
        mall.get_annotator().reference_population_slugs)

    indiv_id_to_project_id = {}
    rare_variant_dict = {}
    rare_variants = []
    for project in projects_to_search:
        project_variants = []
        for variant in project_analysis.get_variants_in_gene(
                project, gene_id, variant_filter=variant_filter):
            max_af = max(variant.annotation['freqs'].values())
            if not any([
                    indiv_id
                    for indiv_id, genotype in variant.genotypes.items()
                    if genotype.num_alt > 0
            ]):
                continue
            if max_af >= .01:
                continue

            # add project id to genotypes
            for indiv_id in variant.genotypes:
                indiv_id_to_project_id[indiv_id] = project.project_id

            # save this variant (or just the genotypes from this variant if the variant if it's been seen already in another project)
            variant_id = "%s-%s-%s-%s" % (variant.chr, variant.pos,
                                          variant.ref, variant.alt)
            if variant_id not in rare_variant_dict:
                rare_variant_dict[variant_id] = variant
                project_variants.append(variant)
            else:
                rare_variant_dict[variant_id].genotypes.update(
                    variant.genotypes)

        #sys.stderr.write("gene_id: %s, variant: %s\n" % (gene_id, variant.toJSON()['annotation']['vep_annotation']))
        add_extra_info_to_variants_project(get_reference(), project,
                                           project_variants)
        rare_variants.extend(project_variants)
    sys.stderr.write("Retreived %s rare variants\n" % len(rare_variants))

    # compute knockout individuals
    individ_ids_and_variants = []
    for project in projects_to_search:
        knockout_ids, variation = get_knockouts_in_gene(project, gene_id)
        for indiv_id in knockout_ids:
            variants = variation.get_relevant_variants_for_indiv_ids(
                [indiv_id])
            add_extra_info_to_variants_project(get_reference(), project,
                                               variants)
            individ_ids_and_variants.append({
                'indiv_id': indiv_id,
                'variants': variants,
            })
            #sys.stderr.write("%s : %s: Retrieved %s knockout variants\n" % (project.project_id, indiv_id, len(variants), ))

    download_csv = request.GET.get('download', '')
    if download_csv:
        response = HttpResponse(content_type='text/csv')
        response[
            'Content-Disposition'] = 'attachment; filename="{}_{}.csv"'.format(
                download_csv, gene["transcript_name"])

        if download_csv == 'knockouts':

            individuals_to_include = [
                individ_id_and_variants["indiv_id"]
                for individ_id_and_variants in individ_ids_and_variants
            ]

            rows = []
            for individ_id_and_variants in individ_ids_and_variants:
                rare_variants = individ_id_and_variants["variants"]
                for variant in rare_variants:
                    worst_annotation_idx = variant.annotation[
                        "worst_vep_index_per_gene"][gene_id]
                    worst_annotation = variant.annotation["vep_annotation"][
                        worst_annotation_idx]
                    genotypes = []
                    all_genotypes_string = ""
                    for indiv_id in individuals_to_include:
                        if indiv_id in variant.genotypes and variant.genotypes[
                                indiv_id].num_alt > 0:
                            genotype = variant.genotypes[indiv_id]
                            allele_string = ">".join(genotype.alleles)
                            all_genotypes_string += indiv_id + ":" + allele_string + "  "
                            genotypes.append(allele_string + "   (" +
                                             str(genotype.gq) + ")")
                        else:
                            genotypes.append("")

                    measureset_id, clinvar_significance = settings.CLINVAR_VARIANTS.get(
                        variant.unique_tuple(), ("", ""))

                    rows.append(
                        map(str, [
                            gene["symbol"],
                            variant.chr,
                            variant.pos,
                            variant.ref,
                            variant.alt,
                            variant.vcf_id or "",
                            variant.annotation.get("vep_consequence", ""),
                            worst_annotation.get("hgvsc", ""),
                            worst_annotation.get("hgvsp", "").replace(
                                "%3D", "="),
                            worst_annotation.get("sift", ""),
                            worst_annotation.get("polyphen", ""),
                            worst_annotation.get("mutationtaster_pred", ""),
                            ";".join(
                                set(
                                    worst_annotation.get("fathmm_pred",
                                                         "").split('%3B'))),
                            measureset_id,
                            clinvar_significance,
                            variant.annotation["freqs"].get(
                                "1kg_wgs_phase3", ""),
                            variant.annotation["freqs"].get(
                                "1kg_wgs_phase3_popmax", ""),
                            variant.annotation["freqs"].get("exac_v3", ""),
                            variant.annotation["freqs"].get(
                                "exac_v3_popmax", ""),
                            all_genotypes_string,
                        ] + genotypes))
        elif download_csv == 'rare_variants':
            individuals_to_include = []
            for variant in rare_variants:
                for indiv_id, genotype in variant.genotypes.items():
                    if genotype.num_alt > 0 and indiv_id not in individuals_to_include:
                        individuals_to_include.append(indiv_id)
            rows = []
            for variant in rare_variants:
                worst_annotation_idx = variant.annotation[
                    "worst_vep_index_per_gene"][gene_id]
                worst_annotation = variant.annotation["vep_annotation"][
                    worst_annotation_idx]
                genotypes = []
                all_genotypes_string = ""
                for indiv_id in individuals_to_include:
                    if indiv_id in variant.genotypes and variant.genotypes[
                            indiv_id].num_alt > 0:
                        genotype = variant.genotypes[indiv_id]
                        allele_string = ">".join(genotype.alleles)
                        all_genotypes_string += indiv_id + ":" + allele_string + "  "
                        genotypes.append(allele_string + "   (" +
                                         str(genotype.gq) + ")")
                    else:
                        genotypes.append("")

                measureset_id, clinvar_significance = settings.CLINVAR_VARIANTS.get(
                    variant.unique_tuple(), ("", ""))
                rows.append(
                    map(str, [
                        gene["symbol"],
                        variant.chr,
                        variant.pos,
                        variant.ref,
                        variant.alt,
                        variant.vcf_id or "",
                        variant.annotation.get("vep_consequence", ""),
                        worst_annotation.get("hgvsc", ""),
                        worst_annotation.get("hgvsp", "").replace("%3D", "="),
                        worst_annotation.get("sift", ""),
                        worst_annotation.get("polyphen", ""),
                        worst_annotation.get("mutationtaster_pred", ""),
                        ";".join(
                            set(
                                worst_annotation.get("fathmm_pred",
                                                     "").split('%3B'))),
                        measureset_id,
                        clinvar_significance,
                        variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                        variant.annotation["freqs"].get(
                            "1kg_wgs_phase3_popmax", ""),
                        variant.annotation["freqs"].get("exac_v3", ""),
                        variant.annotation["freqs"].get("exac_v3_popmax", ""),
                        all_genotypes_string,
                    ] + genotypes))

        header = [
            "gene", "chr", "pos", "ref", "alt", "rsID", "impact", "HGVS.c",
            "HGVS.p", "sift", "polyphen", "muttaster", "fathmm", "clinvar_id",
            "clinvar_clinical_sig", "freq_1kg_wgs_phase3",
            "freq_1kg_wgs_phase3_popmax", "freq_exac_v3",
            "freq_exac_v3_popmax", "all_genotypes"
        ] + list(
            map(lambda i: i + " (from %s)" % indiv_id_to_project_id[i],
                individuals_to_include))

        writer = csv.writer(response)
        writer.writerow(header)
        for row in rows:
            writer.writerow(row)
        return response
    else:
        for individ_id_and_variants in individ_ids_and_variants:
            variants = individ_id_and_variants["variants"]
            individ_id_and_variants["variants"] = [
                v.toJSON() for v in variants
            ]

        return render(
            request, 'project/gene_quicklook.html', {
                'gene':
                gene,
                'gene_json':
                json.dumps(gene),
                'project':
                project,
                'rare_variants_json':
                json.dumps([v.toJSON() for v in rare_variants]),
                'individuals_json':
                json.dumps([
                    i.get_json_obj() for project in projects_to_search
                    for i in project.get_individuals()
                ]),
                'knockouts_json':
                json.dumps(individ_ids_and_variants),
                'other_projects_json':
                other_projects_json,
            })
Esempio n. 16
0
    def search_for_genes(self,
                         gene_ids,
                         project_id_list,
                         output_filename,
                         max_af=0.01):
        """
        Search for a gene across project(s)

        Args:
            gene_ids (list): 'ENSG..' gene id strings.
            project_id_list (list): (optional) project ids to narrow down the search
            output_filename (string): output file name
            max_af (float): AF filter
        """

        outfile = open(output_filename, 'w')

        header = [
            "project_id", "gene", "chr", "pos", "ref", "alt", "rsID", "filter",
            "impact", "HGVS.c", "HGVS.p", "sift", "polyphen", "muttaster",
            "fathmm", "clinvar_id", "clinvar_clinical_sig",
            "freq_1kg_wgs_phase3", "freq_1kg_wgs_phase3_popmax",
            "freq_exac_v3", "freq_exac_v3_popmax", "all_genotypes"
        ]

        writer = csv.writer(outfile, delimiter='\t')
        writer.writerow(header)

        # all rare coding variants
        variant_filter = get_default_variant_filter(
            'all_coding',
            mall.get_annotator().reference_population_slugs)
        print("All Filters: ")
        pprint(variant_filter.toJSON())

        if project_id_list:
            projects = [
                Project.objects.get(project_id=project_id)
                for project_id in project_id_list
            ]
        else:
            projects = Project.objects.all()

        print("Max AF threshold: %s" % max_af)
        print("Staring gene search for:\n%s\nin projects:\n%s\n" %
              (", ".join(gene_ids), ", ".join([p.project_id
                                               for p in projects])))

        indiv_id_cache = {}
        for project in projects:
            project_id = project.project_id
            if get_project_datastore(project_id).project_collection_is_loaded(
                    project_id):
                print("=====================")
                print("Searching project %s" % project_id)
            else:
                print(
                    "Skipping project %s - gene search is not enabled for this project"
                    % project_id)
                continue

            for gene_id in gene_ids:
                gene_id = get_gene_id_from_str(gene_id, get_reference())

                gene = get_reference().get_gene(gene_id)
                print("-- searching %s for gene %s (%s)" %
                      (project_id, gene["symbol"], gene_id))

                for variant in project_analysis.get_variants_in_gene(
                        project, gene_id, variant_filter=variant_filter):
                    if max(variant.annotation['freqs'].values()) >= max_af:
                        continue

                    add_extra_info_to_variants_project(get_reference(),
                                                       project, [variant])

                    worst_annotation_idx = variant.annotation[
                        "worst_vep_index_per_gene"][gene_id]
                    worst_annotation = variant.annotation["vep_annotation"][
                        worst_annotation_idx]
                    all_genotypes_list = []
                    pass_filter = "N/A"
                    for indiv_id, genotype in variant.genotypes.items():
                        individual = indiv_id_cache.get('indiv_id')
                        if individual is None:
                            individual = Individual.objects.get(
                                project=project, indiv_id=indiv_id)
                            indiv_id_cache[indiv_id] = individual

                        pass_filter = genotype.filter  # filter value is stored in the genotypes even though it's the same for all individuals
                        if genotype.num_alt > 0:
                            all_genotypes_list.append(
                                "%s%s[gt:%s GQ:%s AB:%0.3f]" %
                                (indiv_id, "[Affected]"
                                 if individual.affected == "A" else
                                 ("[-]" if individual.affected == "N" else
                                  "[?]"), ">".join(genotype.alleles),
                                 genotype.gq, genotype.ab
                                 if genotype.ab is not None else float('NaN')))

                    measureset_id, clinvar_significance = get_clinvar_variants(
                    ).get(variant.unique_tuple(), ("", ""))
                    row = map(str, [
                        project_id,
                        gene["symbol"],
                        variant.chr,
                        variant.pos,
                        variant.ref,
                        variant.alt,
                        variant.vcf_id or "",
                        pass_filter,
                        variant.annotation.get("vep_consequence", ""),
                        worst_annotation.get("hgvsc", ""),
                        worst_annotation.get("hgvsp", "").replace("%3D", "="),
                        worst_annotation.get("sift", ""),
                        worst_annotation.get("polyphen", ""),
                        worst_annotation.get("mutationtaster_pred", ""),
                        ";".join(
                            set(
                                worst_annotation.get("fathmm_pred",
                                                     "").split('%3B'))),
                        measureset_id,
                        clinvar_significance,
                        variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                        variant.annotation["freqs"].get(
                            "1kg_wgs_phase3_popmax", ""),
                        variant.annotation["freqs"].get("exac_v3", ""),
                        variant.annotation["freqs"].get("exac_v3_popmax", ""),
                        ", ".join(all_genotypes_list),
                    ])
                    writer.writerow(row)

        outfile.close()
        print("Wrote out %s" % output_filename)
Esempio n. 17
0
def gene_quicklook(request, project_id, gene_id):
    """
    Summary of a gene in a project
    """
    project = get_object_or_404(Project, project_id=project_id)
    if not project.can_view(request.user):
        return HttpResponse("Unauthorized")
    
    if gene_id is None:
        return render(request, 'project/gene_quicklook.html', {
            'project': project,
            'gene': None,
            'gene_json': None,
            'rare_variants_json': None,
            'individuals_json': None,
            'knockouts_json': None,
        })
        
        
    gene_id = get_gene_id_from_str(gene_id, get_reference())
    gene = get_reference().get_gene(gene_id)
    sys.stderr.write(project_id + " - staring gene search for: %s %s \n" % (gene_id, gene))

    # all rare coding variants
    variant_filter = get_default_variant_filter('all_coding', mall.get_annotator().reference_population_slugs)

    rare_variants = []
    for variant in project_analysis.get_variants_in_gene(project, gene_id, variant_filter=variant_filter):
        max_af = max(variant.annotation['freqs'].values())
        if max_af < .01:
            rare_variants.append(variant)
    #sys.stderr.write("gene_id: %s, variant: %s\n" % (gene_id, variant.toJSON()['annotation']['vep_annotation']))
    add_extra_info_to_variants_project(get_reference(), project, rare_variants)

    # compute knockout individuals
    individ_ids_and_variants = []
    knockout_ids, variation = get_knockouts_in_gene(project, gene_id)
    for indiv_id in knockout_ids:
        variants = variation.get_relevant_variants_for_indiv_ids([indiv_id])
        add_extra_info_to_variants_project(get_reference(), project, variants)
        individ_ids_and_variants.append({
            'indiv_id': indiv_id,
            'variants': variants,
        })

    sys.stderr.write("Project-wide gene search retrieved %s rare variants for gene: %s \n" % (len(rare_variants), gene_id))

    download_csv = request.GET.get('download', '')
    if download_csv:
        response = HttpResponse(content_type='text/csv')
        response['Content-Disposition'] = 'attachment; filename="{}_{}.csv"'.format(download_csv, gene["transcript_name"])

        if download_csv == 'knockouts':

            individuals_to_include = [individ_id_and_variants["indiv_id"] for individ_id_and_variants in individ_ids_and_variants]

            rows = []
            for individ_id_and_variants in individ_ids_and_variants:
                rare_variants = individ_id_and_variants["variants"]
                for variant in rare_variants:
                    worst_annotation_idx = variant.annotation["worst_vep_index_per_gene"][gene_id]
                    worst_annotation = variant.annotation["vep_annotation"][worst_annotation_idx]
                    genotypes = []
                    all_genotypes_string = ""
                    for indiv_id in individuals_to_include:
                        genotype = variant.genotypes[indiv_id]
                        allele_string = ">".join(genotype.alleles)
                        all_genotypes_string += indiv_id + ":" + allele_string + "  "
                        if genotype.num_alt > 0:
                            genotypes.append(allele_string + "   (" + str(genotype.gq) + ")")
                        else:
                            genotypes.append("")

                    measureset_id, clinvar_significance = settings.CLINVAR_VARIANTS.get(variant.unique_tuple(), ("", ""))

                    rows.append(map(str,
                        [ gene["symbol"],
                          variant.chr,
                          variant.pos,
                          variant.ref,
                          variant.alt,
                          variant.vcf_id or "",
                          variant.annotation.get("vep_consequence", ""),
                          worst_annotation.get("hgvsc", ""),
                          worst_annotation.get("hgvsp", "").replace("%3D", "="),
                          worst_annotation.get("sift", ""),
                          worst_annotation.get("polyphen", ""),
                          worst_annotation.get("mutationtaster_pred", ""),
                          ";".join(set(worst_annotation.get("fathmm_pred", "").split('%3B'))),

                          measureset_id,
                          clinvar_significance,

                          variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                          variant.annotation["freqs"].get("1kg_wgs_phase3_popmax", ""),
                          variant.annotation["freqs"].get("exac_v3", ""),
                          variant.annotation["freqs"].get("exac_v3_popmax", ""),
                          all_genotypes_string,
                        ] + genotypes))
        elif download_csv == 'rare_variants':
            individuals_to_include = []
            for variant in rare_variants:
                for indiv_id, genotype in variant.genotypes.items():
                    if genotype.num_alt > 0 and indiv_id not in individuals_to_include:
                        individuals_to_include.append(indiv_id)
            rows = []
            for variant in rare_variants:
                worst_annotation_idx = variant.annotation["worst_vep_index_per_gene"][gene_id]
                worst_annotation = variant.annotation["vep_annotation"][worst_annotation_idx]
                genotypes = []
                all_genotypes_string = ""
                for indiv_id in individuals_to_include:
                    genotype = variant.genotypes[indiv_id]
                    allele_string = ">".join(genotype.alleles)
                    all_genotypes_string += indiv_id + ":" + allele_string + "  "
                    if genotype.num_alt > 0:
                        genotypes.append(allele_string + "   (" + str(genotype.gq) + ")")
                    else:
                        genotypes.append("")

                measureset_id, clinvar_significance = settings.CLINVAR_VARIANTS.get(variant.unique_tuple(), ("", ""))
                rows.append(map(str,
                    [ gene["symbol"],
                      variant.chr,
                      variant.pos,
                      variant.ref,
                      variant.alt,
                      variant.vcf_id or "",
                      variant.annotation.get("vep_consequence", ""),
                      worst_annotation.get("hgvsc", ""),
                      worst_annotation.get("hgvsp", "").replace("%3D", "="),
                      worst_annotation.get("sift", ""),
                      worst_annotation.get("polyphen", ""),
                      worst_annotation.get("mutationtaster_pred", ""),
                      ";".join(set(worst_annotation.get("fathmm_pred", "").split('%3B'))),
                      measureset_id,
                      clinvar_significance,
                      variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                      variant.annotation["freqs"].get("1kg_wgs_phase3_popmax", ""),
                      variant.annotation["freqs"].get("exac_v3", ""),
                      variant.annotation["freqs"].get("exac_v3_popmax", ""),
                      all_genotypes_string,
                    ] + genotypes))


        header = ["gene", "chr", "pos", "ref", "alt", "rsID", "impact",
                  "HGVS.c", "HGVS.p", "sift", "polyphen", "muttaster", "fathmm", "clinvar_id", "clinvar_clinical_sig",
                  "freq_1kg_wgs_phase3", "freq_1kg_wgs_phase3_popmax",
                  "freq_exac_v3", "freq_exac_v3_popmax",
                  "all_genotypes"] + individuals_to_include

        writer = csv.writer(response)
        writer.writerow(header)
        for row in rows:
            writer.writerow(row)
        return response
    else:
        for individ_id_and_variants in individ_ids_and_variants:
            variants = individ_id_and_variants["variants"]
            individ_id_and_variants["variants"] = [v.toJSON() for v in variants]

        return render(request, 'project/gene_quicklook.html', {
            'gene': gene,
            'gene_json': json.dumps(gene),
            'project': project,
            'rare_variants_json': json.dumps([v.toJSON() for v in rare_variants]),
            'individuals_json': json.dumps([i.get_json_obj() for i in project.get_individuals()]),
            'knockouts_json': json.dumps(individ_ids_and_variants),
        })
    def search_for_gene(self, search_gene_id, project_id_list, max_af=0.01):
      '''
        Search for a gene across project(s)
        Args:
          1. search_gene_id: Gene ID to search for
          2. proj_list: An optional list of projects to narrow down search to
      '''
      gene_id = get_gene_id_from_str(search_gene_id, get_reference())
      gene = get_reference().get_gene(gene_id)
      
      print("Staring gene search for: %s %s in projects: %s\n" % (search_gene_id, gene['gene_id'], ", ".join(project_id_list)))
      print("Max AF threshold: %s" % max_af)

      # all rare coding variants
      variant_filter = get_default_variant_filter('all_coding', mall.get_annotator().reference_population_slugs)
      print("All Filters: ")
      pprint(variant_filter.toJSON())

      output_filename = 'results_'+search_gene_id + '.tsv'
      outfile = open(output_filename,'w')

      header = ["project_id","gene", "chr", "pos", "ref", "alt", "rsID", "filter", "impact",
                "HGVS.c", "HGVS.p", "sift", "polyphen", "muttaster", "fathmm", "clinvar_id", "clinvar_clinical_sig",
                "freq_1kg_wgs_phase3", "freq_1kg_wgs_phase3_popmax",
                "freq_exac_v3", "freq_exac_v3_popmax",
                "all_genotypes"]

      
      writer = csv.writer(outfile,delimiter='\t')
      writer.writerow(header)
      
      if project_id_list: 
          for project_id in project_id_list:
              project = Project.objects.filter(project_id=project_id)[0]  # TODO validate
      else:
          project_id_list = [p.project_id for p in Project.objects.all()]
      
      for project_id in project_id_list:
          project = Project.objects.filter(project_id=project_id)[0]
          if get_project_datastore(project_id).project_collection_is_loaded(project_id):
              print("Running on project %s" % project_id)
          else:
              print("Skipping project %s - gene search is not enabled for this project" % project_id)
              continue

          for variant in project_analysis.get_variants_in_gene(project, gene_id, variant_filter=variant_filter):
              if max(variant.annotation['freqs'].values()) >= max_af:
                  continue
              #pprint(variant.toJSON())
              add_extra_info_to_variants_project(get_reference(), project, [variant])

              worst_annotation_idx = variant.annotation["worst_vep_index_per_gene"][gene_id]
              worst_annotation = variant.annotation["vep_annotation"][worst_annotation_idx]
              all_genotypes_list = []
              pass_filter = "N/A"
              for indiv_id, genotype in variant.genotypes.items():
                  pass_filter = genotype.filter  # filter value is stored in the genotypes even though it's the same for all individuals
                  if genotype.num_alt > 0:
                    all_genotypes_list.append("%s[gt:%s GQ:%s AB:%0.3f]" % (indiv_id, ">".join(genotype.alleles), genotype.gq, genotype.ab if genotype.ab is not None else float('NaN')))

              measureset_id, clinvar_significance = settings.CLINVAR_VARIANTS.get(variant.unique_tuple(), ("", ""))
              row = map(str,
                  [project_id, 
                    gene["symbol"],
                    variant.chr,
                    variant.pos,
                    variant.ref,
                    variant.alt,
                    variant.vcf_id or "",
                    pass_filter,
                    variant.annotation.get("vep_consequence", ""),
                    worst_annotation.get("hgvsc", ""),
                    worst_annotation.get("hgvsp", "").replace("%3D", "="),
                    worst_annotation.get("sift", ""),
                    worst_annotation.get("polyphen", ""),
                    worst_annotation.get("mutationtaster_pred", ""),
                    ";".join(set(worst_annotation.get("fathmm_pred", "").split('%3B'))),
                    measureset_id,
                    clinvar_significance,
                    variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                    variant.annotation["freqs"].get("1kg_wgs_phase3_popmax", ""),
                    variant.annotation["freqs"].get("exac_v3", ""),
                    variant.annotation["freqs"].get("exac_v3_popmax", ""),
                    ", ".join(all_genotypes_list),
                  ])
              writer.writerow(row)
      
      outfile.close()        
      print("Wrote out %s" % output_filename)
Esempio n. 19
0
def gene_quicklook(request, project_id, gene_id):
    """
    Summary of a gene in a project
    """
    main_project = get_object_or_404(Project, project_id=project_id)
    if not main_project.can_view(request.user):
        return HttpResponse("Unauthorized")

    # other projects this user can view
    other_projects = get_loaded_projects_for_user(
        request.user, fields=['project_id', 'project_name'])

    if other_projects:
        other_projects_json = json.dumps([{
            'project_id': p.project_id,
            'project_name': p.project_name
        } for p in sorted(other_projects, key=lambda p: p.project_id.lower())])
    else:
        other_projects_json = None

    if gene_id is None:
        return render(
            request, 'project/gene_quicklook.html', {
                'project': main_project,
                'gene': None,
                'gene_json': None,
                'rare_variants_json': None,
                'individuals_json': None,
                'knockouts_json': None,
                'other_projects_json': other_projects_json,
            })

    projects_to_search_param = request.GET.get('selected_projects')
    if projects_to_search_param:
        project_ids = projects_to_search_param.split(",")
        projects_to_search = [
            project for project in other_projects
            if project.project_id in project_ids
        ]
        if len(projects_to_search) < len(project_ids):
            # If not all the specified project ids are in the other projects list then they are not authorized
            return HttpResponse("Unauthorized")
    else:
        project_ids = [main_project.project_id]
        projects_to_search = [main_project]

    gene_id = get_gene_id_from_str(gene_id, get_reference())
    gene = get_reference().get_gene(gene_id)
    sys.stderr.write(
        project_id + " - staring gene search for: %s in projects: %s\n" %
        (gene_id, ",".join([p.project_id for p in projects_to_search]) + "\n"))

    # all rare coding variants
    variant_filter = get_default_variant_filter(
        'all_coding',
        mall.get_annotator().reference_population_slugs)

    indiv_id_to_project_id = {}
    rare_variant_dict = {}
    rare_variants = []
    individ_ids_and_variants = []
    for project in projects_to_search:
        all_project_variants = project_analysis.get_variants_in_gene(
            project, gene_id, variant_filter=variant_filter)

        # compute knockout individuals
        knockout_ids, variation = get_knockouts_in_gene(
            project, gene_id, all_project_variants)
        for indiv_id in knockout_ids:
            variants = variation.get_relevant_variants_for_indiv_ids(
                [indiv_id])
            individ_ids_and_variants.append({
                'indiv_id': indiv_id,
                'variants': variants,
            })

        # compute rare variants
        project_variants = []
        for i, variant in enumerate(all_project_variants):
            max_af = max([
                freq for label, freq in variant.annotation['freqs'].items()
                if label != "AF"
            ])  # don't filter on within-cohort AF

            if not any([
                    indiv_id
                    for indiv_id, genotype in variant.genotypes.items()
                    if genotype.num_alt > 0
            ]):
                continue
            if max_af >= .01:
                continue

            # add project id to genotypes
            for indiv_id in variant.genotypes:
                indiv_id_to_project_id[indiv_id] = project.project_id

            # save this variant (or just the genotypes from this variant if the variant if it's been seen already in another project)
            variant_id = "%s-%s-%s-%s" % (variant.chr, variant.pos,
                                          variant.ref, variant.alt)
            if variant_id not in rare_variant_dict:
                rare_variant_dict[variant_id] = variant
                project_variants.append(variant)
            else:
                rare_variant_dict[variant_id].genotypes.update(
                    variant.genotypes)

        rare_variants.extend(project_variants)

    all_variants = sum([i['variants'] for i in individ_ids_and_variants],
                       rare_variants)
    add_extra_info_to_variants_project(get_reference(), project, all_variants)
    download_csv = request.GET.get('download', '')
    if download_csv:
        response = HttpResponse(content_type='text/csv')
        response[
            'Content-Disposition'] = 'attachment; filename="{}_{}.csv"'.format(
                download_csv,
                gene.get("symbol") or gene.get("transcript_name"))

        if download_csv == 'knockouts':

            individuals_to_include = [
                individ_id_and_variants["indiv_id"]
                for individ_id_and_variants in individ_ids_and_variants
            ]

            rows = []
            for individ_id_and_variants in individ_ids_and_variants:
                rare_variants = individ_id_and_variants["variants"]
                for variant in rare_variants:
                    worst_annotation_idx = variant.annotation[
                        "worst_vep_index_per_gene"][gene_id]
                    worst_annotation = variant.annotation["vep_annotation"][
                        worst_annotation_idx]
                    genotypes = []
                    all_genotypes_string = ""
                    for indiv_id in individuals_to_include:
                        if indiv_id in variant.genotypes and variant.genotypes[
                                indiv_id].num_alt > 0:
                            genotype = variant.genotypes[indiv_id]
                            allele_string = ">".join(genotype.alleles)
                            all_genotypes_string += indiv_id + ":" + allele_string + "  "
                            genotypes.append(allele_string + "   (" +
                                             str(genotype.gq) + ")")
                        else:
                            genotypes.append("")

                    measureset_id, clinvar_significance = get_reference(
                    ).get_clinvar_info(*variant.unique_tuple())
                    rows.append(
                        map(str, [
                            gene["symbol"],
                            variant.chr,
                            variant.pos,
                            variant.ref,
                            variant.alt,
                            variant.vcf_id or "",
                            variant.annotation.get("vep_consequence", ""),
                            worst_annotation.get("hgvsc", ""),
                            worst_annotation.get("hgvsp", "").replace(
                                "%3D", "="),
                            worst_annotation.get("sift", ""),
                            worst_annotation.get("polyphen", ""),
                            worst_annotation.get("mutationtaster_pred", ""),
                            ";".join(
                                set(
                                    worst_annotation.get("fathmm_pred",
                                                         "").split('%3B'))),
                            measureset_id,
                            clinvar_significance,
                            variant.annotation["freqs"].get(
                                "1kg_wgs_phase3", ""),
                            variant.annotation["freqs"].get(
                                "1kg_wgs_phase3_popmax", ""),
                            variant.annotation["freqs"].get("exac_v3", ""),
                            variant.annotation["freqs"].get(
                                "exac_v3_popmax", ""),
                            all_genotypes_string,
                        ] + genotypes))
        elif download_csv == 'rare_variants':
            individuals_to_include = []
            for variant in rare_variants:
                for indiv_id, genotype in variant.genotypes.items():
                    if genotype.num_alt > 0 and indiv_id not in individuals_to_include:
                        individuals_to_include.append(indiv_id)
            rows = []
            for variant in rare_variants:
                worst_annotation_idx = variant.annotation[
                    "worst_vep_index_per_gene"][gene_id]
                worst_annotation = variant.annotation["vep_annotation"][
                    worst_annotation_idx]
                genotypes = []
                all_genotypes_string = ""
                for indiv_id in individuals_to_include:
                    if indiv_id in variant.genotypes and variant.genotypes[
                            indiv_id].num_alt > 0:
                        genotype = variant.genotypes[indiv_id]
                        allele_string = ">".join(genotype.alleles)
                        all_genotypes_string += indiv_id + ":" + allele_string + "  "
                        genotypes.append(allele_string + "   (" +
                                         str(genotype.gq) + ")")
                    else:
                        genotypes.append("")

                measureset_id, clinvar_significance = get_reference(
                ).get_clinvar_info(*variant.unique_tuple())
                rows.append(
                    map(str, [
                        gene["symbol"],
                        variant.chr,
                        variant.pos,
                        variant.ref,
                        variant.alt,
                        variant.vcf_id or "",
                        variant.annotation.get("vep_consequence", ""),
                        worst_annotation.get("hgvsc", ""),
                        worst_annotation.get("hgvsp", "").replace("%3D", "="),
                        worst_annotation.get("sift", ""),
                        worst_annotation.get("polyphen", ""),
                        worst_annotation.get("mutationtaster_pred", ""),
                        ";".join(
                            set(
                                worst_annotation.get("fathmm_pred",
                                                     "").split('%3B'))),
                        measureset_id,
                        clinvar_significance,
                        variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                        variant.annotation["freqs"].get(
                            "1kg_wgs_phase3_popmax", ""),
                        variant.annotation["freqs"].get("exac_v3", ""),
                        variant.annotation["freqs"].get("exac_v3_popmax", ""),
                        all_genotypes_string,
                    ] + genotypes))

        header = [
            "gene", "chr", "pos", "ref", "alt", "rsID", "impact", "HGVS.c",
            "HGVS.p", "sift", "polyphen", "muttaster", "fathmm", "clinvar_id",
            "clinvar_clinical_sig", "freq_1kg_wgs_phase3",
            "freq_1kg_wgs_phase3_popmax", "freq_exac_v3",
            "freq_exac_v3_popmax", "all_genotypes"
        ] + list(
            map(lambda i: i + " (from %s)" % indiv_id_to_project_id[i],
                individuals_to_include))

        writer = csv.writer(response)
        writer.writerow(header)
        for row in rows:
            writer.writerow(row)
        return response
    else:
        for individ_id_and_variants in individ_ids_and_variants:
            variants = individ_id_and_variants["variants"]
            individ_id_and_variants["variants"] = [
                v.toJSON() for v in variants
            ]

        individ_ids = {i['indiv_id'] for i in individ_ids_and_variants}
        for var in rare_variants:
            individ_ids.update(var.genotypes.keys())
        individuals = Individual.objects.filter(
            indiv_id__in=individ_ids,
            project__project_id__in=project_ids).select_related(
                'project').select_related('family').only(
                    'project__project_id', 'family__family_id',
                    *Individual.INDIVIDUAL_JSON_FIELDS_NO_IDS)

        return render(
            request, 'project/gene_quicklook.html', {
                'gene':
                gene,
                'gene_json':
                json.dumps(gene),
                'project':
                main_project,
                'rare_variants_json':
                json.dumps([v.toJSON() for v in rare_variants]),
                'individuals_json':
                json.dumps([
                    i.get_json_obj(skip_has_variant_data=True)
                    for i in individuals
                ]),
                'knockouts_json':
                json.dumps(individ_ids_and_variants),
                'other_projects_json':
                other_projects_json,
            })
Esempio n. 20
0
def variants_with_tag(request, project_id, tag=None):

    project = get_object_or_404(Project, project_id=project_id)
    if not project.can_view(request.user):
        raise PermissionDenied

    requested_family_id = request.GET.get('family')
    if tag:
        tag = urllib.unquote(tag)
        variants = get_variants_by_tag(project,
                                       tag,
                                       family_id=requested_family_id)
    else:
        variants = get_all_saved_variants_for_project(
            project, family_id=requested_family_id)
    add_extra_info_to_variants_project(get_reference(),
                                       project,
                                       variants,
                                       add_family_tags=True,
                                       add_populations=True)
    variants.sort(key=lambda var: var.xpos)

    if request.GET.get('download', ''):
        response = HttpResponse(content_type='text/csv')
        response[
            'Content-Disposition'] = 'attachment; filename="{}_{}.csv"'.format(
                project_id, tag)

        header_fields = [
            "chrom", "pos", "ref", "alt", "tags", "notes", "family", "gene",
            "effect", "1kg_wgs_phase3", "1kg_wgs_phase3_popmax", "exac_v3",
            "exac_v3_popmax", "sift", "polyphen", "hgvsc", "hgvsp"
        ]

        genotype_header_fields = [
            'sample_id', 'GT_genotype', 'filter', 'AD_allele_depth',
            'DP_read_depth', 'GQ_genotype_quality', 'AB_allele_balance'
        ]
        for i in range(0, 10):
            for h in genotype_header_fields:
                header_fields.append("%s_%d" % (h, i))

        writer = csv.writer(response)
        writer.writerow(header_fields)
        for variant in variants:
            if not (variant and variant.annotation
                    and variant.annotation.get("vep_annotation")):
                continue

            worst_annotation_idx = variant.annotation[
                "worst_vep_annotation_index"]
            worst_annotation = variant.annotation["vep_annotation"][
                worst_annotation_idx]

            family_id = variant.extras["family_id"]
            family = Family.objects.get(project=project, family_id=family_id)

            genotype_values = []
            for individual in family.get_individuals():
                genotype_values.append(individual.indiv_id)
                genotype = variant.get_genotype(individual.indiv_id)
                genotype_values.append("/".join(genotype.alleles) if genotype
                                       and genotype.alleles else "./.")
                genotype_values.append(genotype.filter if genotype else "")
                genotype_values.append(
                    genotype.extras["ad"] if genotype else "")
                genotype_values.append(
                    genotype.extras["dp"] if genotype else "")
                genotype_values.append(genotype.gq if genotype
                                       and genotype.gq is not None else "")
                genotype_values.append("%0.3f" % genotype.ab if genotype
                                       and genotype.ab is not None else "")

            writer.writerow(
                map(lambda s: unicode(s).encode('UTF-8'), [
                    variant.chr,
                    variant.pos,
                    variant.ref,
                    variant.alt,
                    "|".join([
                        tag['tag'] for tag in variant.extras['family_tags']
                    ]) if 'family_tags' in variant.extras else '',
                    "|".join([
                        note['user']['display_name'] + ":" + note['note']
                        for note in variant.extras['family_notes']
                    ]) if 'family_notes' in variant.extras else '',
                    variant.extras["family_id"],
                    worst_annotation.get("symbol", ""),
                    variant.annotation.get("vep_consequence", ""),
                    variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                    variant.annotation["freqs"].get("1kg_wgs_phase3_popmax",
                                                    ""),
                    variant.annotation["freqs"].get("exac_v3", ""),
                    variant.annotation["freqs"].get("exac_v3_popmax", ""),
                    worst_annotation.get("sift", ""),
                    worst_annotation.get("polyphen", ""),
                    worst_annotation.get("hgvsc", ""),
                    worst_annotation.get("hgvsp", "").replace("%3D", "="),
                ] + genotype_values))

        return response
    else:
        family_ids = {variant.extras['family_id'] for variant in variants}
        families = get_filtered_families(filters={
            'project': project,
            'family_id__in': family_ids
        },
                                         fields=['family_id'])

        return render(
            request, 'project/saved_variants.html', {
                'project':
                project,
                'tag':
                tag,
                'variants_json':
                json.dumps([v.toJSON() for v in variants]),
                'families_json':
                json.dumps({
                    family.family_id: {
                        'project_id':
                        project.project_id,
                        'family_id':
                        family.family_id,
                        'individuals':
                        family.get_individuals_json(
                            project_id=project.project_id)
                    }
                    for family in families
                })
            })
Esempio n. 21
0
    def search_for_gene(self, search_gene_id, project_id_list, max_af=0.01):
        '''
        Search for a gene across project(s)
        Args:
          1. search_gene_id: Gene ID to search for
          2. proj_list: An optional list of projects to narrow down search to
      '''
        gene_id = get_gene_id_from_str(search_gene_id, get_reference())
        gene = get_reference().get_gene(gene_id)

        print("Staring gene search for: %s %s in projects: %s\n" %
              (search_gene_id, gene['gene_id'], ", ".join(project_id_list)))
        print("Max AF threshold: %s" % max_af)

        # all rare coding variants
        variant_filter = get_default_variant_filter(
            'all_coding',
            mall.get_annotator().reference_population_slugs)
        print("All Filters: ")
        pprint(variant_filter.toJSON())

        output_filename = 'results_' + search_gene_id + '.tsv'
        outfile = open(output_filename, 'w')

        header = [
            "project_id", "gene", "chr", "pos", "ref", "alt", "rsID", "filter",
            "impact", "HGVS.c", "HGVS.p", "sift", "polyphen", "muttaster",
            "fathmm", "clinvar_id", "clinvar_clinical_sig",
            "freq_1kg_wgs_phase3", "freq_1kg_wgs_phase3_popmax",
            "freq_exac_v3", "freq_exac_v3_popmax", "all_genotypes"
        ]

        writer = csv.writer(outfile, delimiter='\t')
        writer.writerow(header)

        if project_id_list:
            for project_id in project_id_list:
                project = Project.objects.filter(
                    project_id=project_id)[0]  # TODO validate
        else:
            project_id_list = [p.project_id for p in Project.objects.all()]

        for project_id in project_id_list:
            project = Project.objects.filter(project_id=project_id)[0]
            if get_project_datastore(project_id).project_collection_is_loaded(
                    project_id):
                print("Running on project %s" % project_id)
            else:
                print(
                    "Skipping project %s - gene search is not enabled for this project"
                    % project_id)
                continue

            for variant in project_analysis.get_variants_in_gene(
                    project, gene_id, variant_filter=variant_filter):
                if max(variant.annotation['freqs'].values()) >= max_af:
                    continue
                #pprint(variant.toJSON())
                add_extra_info_to_variants_project(get_reference(), project,
                                                   [variant])

                worst_annotation_idx = variant.annotation[
                    "worst_vep_index_per_gene"][gene_id]
                worst_annotation = variant.annotation["vep_annotation"][
                    worst_annotation_idx]
                all_genotypes_list = []
                pass_filter = "N/A"
                for indiv_id, genotype in variant.genotypes.items():
                    pass_filter = genotype.filter  # filter value is stored in the genotypes even though it's the same for all individuals
                    if genotype.num_alt > 0:
                        all_genotypes_list.append(
                            "%s[gt:%s GQ:%s AB:%0.3f]" %
                            (indiv_id, ">".join(
                                genotype.alleles), genotype.gq, genotype.ab
                             if genotype.ab is not None else float('NaN')))

                measureset_id, clinvar_significance = settings.CLINVAR_VARIANTS.get(
                    variant.unique_tuple(), ("", ""))
                row = map(str, [
                    project_id,
                    gene["symbol"],
                    variant.chr,
                    variant.pos,
                    variant.ref,
                    variant.alt,
                    variant.vcf_id or "",
                    pass_filter,
                    variant.annotation.get("vep_consequence", ""),
                    worst_annotation.get("hgvsc", ""),
                    worst_annotation.get("hgvsp", "").replace("%3D", "="),
                    worst_annotation.get("sift", ""),
                    worst_annotation.get("polyphen", ""),
                    worst_annotation.get("mutationtaster_pred", ""),
                    ";".join(
                        set(
                            worst_annotation.get("fathmm_pred",
                                                 "").split('%3B'))),
                    measureset_id,
                    clinvar_significance,
                    variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                    variant.annotation["freqs"].get("1kg_wgs_phase3_popmax",
                                                    ""),
                    variant.annotation["freqs"].get("exac_v3", ""),
                    variant.annotation["freqs"].get("exac_v3_popmax", ""),
                    ", ".join(all_genotypes_list),
                ])
                writer.writerow(row)

        outfile.close()
        print("Wrote out %s" % output_filename)
Esempio n. 22
0
def gene_quicklook(request, project_id, gene_id):
    """
    Summary of a gene in a project
    """
    main_project = get_object_or_404(Project, project_id=project_id)
    if not main_project.can_view(request.user):
        return HttpResponse("Unauthorized")

    new_page_url = '/variant_search/project/{}'.format(main_project.seqr_project.guid) if main_project.seqr_project and main_project.seqr_project.has_new_search else None

    # other projects this user can view
    other_projects = get_loaded_projects_for_user(request.user, fields=['project_id', 'project_name'])

    if other_projects:
        other_projects_json = json.dumps([{'project_id': p.project_id, 'project_name': p.project_name} for p in sorted(other_projects, key=lambda p: p.project_id.lower())])
    else:
        other_projects_json = None

    if gene_id is None:
        return render(request, 'project/gene_quicklook.html', {
            'project': main_project,
            'gene': None,
            'gene_json': None,
            'rare_variants_json': None,
            'individuals_json': None,
            'knockouts_json': None,
            'other_projects_json': other_projects_json,
            'new_page_url': new_page_url,
        })

    projects_to_search_param = request.GET.get('selected_projects')
    if projects_to_search_param:
        project_ids = projects_to_search_param.split(",")
        projects_to_search = [project for project in other_projects if project.project_id in project_ids]
        if len(projects_to_search) < len(project_ids):
            # If not all the specified project ids are in the other projects list then they are not authorized
            return HttpResponse("Unauthorized")
    else:
        project_ids = [main_project.project_id]
        projects_to_search = [main_project]

    gene_id = get_gene_id_from_str(gene_id, get_reference())
    gene = get_reference().get_gene(gene_id)

    # all rare coding variants
    variant_filter = get_default_variant_filter('all_coding', mall.get_annotator().reference_population_slugs)

    indiv_id_to_project_id = {}
    rare_variant_dict = {}
    rare_variants = []
    individ_ids_and_variants = []
    for project in projects_to_search:
        all_project_variants = project_analysis.get_variants_in_gene(project, gene_id, variant_filter=variant_filter)

        # compute knockout individuals
        knockout_ids, variation = get_knockouts_in_gene(project, gene_id, all_project_variants)
        for indiv_id in knockout_ids:
            variants = variation.get_relevant_variants_for_indiv_ids([indiv_id])
            individ_ids_and_variants.append({
                'indiv_id': indiv_id,
                'variants': variants,
            })

        # compute rare variants
        project_variants = []
        for i, variant in enumerate(all_project_variants):
            max_af = max([freq for label, freq in variant.annotation['freqs'].items() if label != "AF"])  # don't filter on within-cohort AF

            if not any([indiv_id for indiv_id, genotype in variant.genotypes.items() if genotype.num_alt > 0]):
                continue
            if max_af >= .01:
                continue

            # add project id to genotypes
            for indiv_id in variant.genotypes:
                indiv_id_to_project_id[indiv_id] = project.project_id

            # save this variant (or just the genotypes from this variant if the variant if it's been seen already in another project)
            variant_id = "%s-%s-%s-%s" % (variant.chr,variant.pos, variant.ref, variant.alt)
            if variant_id not in rare_variant_dict:
                rare_variant_dict[variant_id] = variant
                project_variants.append(variant)
            else:
                for indiv_id, genotype in variant.genotypes.items():
                    existing_genotype = rare_variant_dict[variant_id].genotypes.get(indiv_id)
                    if not existing_genotype or existing_genotype.num_alt == -1:
                        rare_variant_dict[variant_id].genotypes[indiv_id] = genotype
        if project != main_project:
            add_extra_info_to_variants_project(get_reference(), project, project_variants)
        rare_variants.extend(project_variants)

    all_variants = sum([i['variants'] for i in individ_ids_and_variants], rare_variants)
    add_extra_info_to_variants_project(get_reference(), main_project, all_variants, add_family_tags=True)
    download_csv = request.GET.get('download', '')
    if download_csv:
        response = HttpResponse(content_type='text/csv')
        response['Content-Disposition'] = 'attachment; filename="{}_{}.csv"'.format(download_csv, gene.get("symbol") or gene.get("transcript_name"))

        def get_row(variant, worst_annotation):
            if 'clinvar_allele_id' in variant.extras:
                measureset_id = variant.extras['clinvar_allele_id']
                clinvar_significance = variant.extras['clinvar_clinsig']
            else:
                measureset_id, clinvar_significance = get_reference().get_clinvar_info(*variant.unique_tuple())
            genotypes = []

            all_genotypes_string = ""
            for indiv_id in individuals_to_include:
                if indiv_id in variant.genotypes and variant.genotypes[indiv_id].num_alt > 0:
                    genotype = variant.genotypes[indiv_id]
                    allele_string = ">".join(genotype.alleles)
                    all_genotypes_string += indiv_id + ":" + allele_string + "  "
                    genotypes.append(allele_string + "   (" + str(genotype.gq) + ")")
                else:
                    genotypes.append("")
            return [
                gene["symbol"],
                variant.chr,
                variant.pos,
                variant.ref,
                variant.alt,
                variant.vcf_id or variant.annotation.get("rsid") or "",
                variant.annotation.get("vep_consequence") or "",
                worst_annotation.get("hgvsc") or "",
                (worst_annotation.get("hgvsp") or "").replace("%3D", "="),
                variant.annotation.get("sift") or "",
                variant.annotation.get("polyphen") or "",
                variant.annotation.get("mutationtaster_pred") or variant.annotation.get("muttaster") or "",
                (";".join(set((worst_annotation.get("fathmm_pred") or "").split('%3B')))) or variant.annotation.get("fathmm") or "",

                measureset_id or "",
                clinvar_significance or "",

                variant.annotation["freqs"].get("1kg_wgs_phase3") or variant.annotation["freqs"].get("1kg_wgs_AF") or "",
                variant.annotation["freqs"].get("1kg_wgs_phase3_popmax") or variant.annotation["freqs"].get("1kg_wgs_popmax_AF") or "",
                variant.annotation["freqs"].get("exac_v3") or variant.annotation["freqs"].get("exac_v3_AF") or "",
                variant.annotation["freqs"].get("exac_v3_popmax") or variant.annotation["freqs"].get("exac_v3_popmax_AF") or "",
                variant.annotation["freqs"].get("gnomad_exomes_AF") or "",
                variant.annotation["freqs"].get("gnomad_exomes_popmax_AF") or "",
                variant.annotation["freqs"].get("gnomad_genomes_AF") or "",
                variant.annotation["freqs"].get("gnomad_genomes_popmax_AF") or "",
                all_genotypes_string,
            ] + genotypes

        if download_csv == 'knockouts':

            individuals_to_include = [individ_id_and_variants["indiv_id"] for individ_id_and_variants in individ_ids_and_variants]

            rows = []
            for individ_id_and_variants in individ_ids_and_variants:
                rare_variants = individ_id_and_variants["variants"]
                for variant in rare_variants:
                    worst_annotation_idx = variant.annotation["worst_vep_index_per_gene"][gene_id]
                    worst_annotation = variant.annotation["vep_annotation"][worst_annotation_idx]
                    genotypes = []
                    all_genotypes_string = ""
                    for indiv_id in individuals_to_include:
                        if indiv_id in variant.genotypes and variant.genotypes[indiv_id].num_alt > 0:
                            genotype = variant.genotypes[indiv_id]
                            allele_string = ">".join(genotype.alleles)
                            all_genotypes_string += indiv_id + ":" + allele_string + "  "
                            genotypes.append(allele_string + "   (" + str(genotype.gq) + ")")
                        else:
                            genotypes.append("")

                    rows.append(map(str, get_row(variant, worst_annotation)))

        elif download_csv == 'rare_variants':
            individuals_to_include = []
            for variant in rare_variants:
                for indiv_id, genotype in variant.genotypes.items():
                    if genotype.num_alt > 0 and indiv_id not in individuals_to_include:
                        individuals_to_include.append(indiv_id)
            rows = []
            for variant in rare_variants:
                worst_annotation_idx = variant.annotation["worst_vep_index_per_gene"][gene_id]
                worst_annotation = variant.annotation["vep_annotation"][worst_annotation_idx]

                rows.append(map(str, get_row(variant, worst_annotation)))

        header = ["gene", "chr", "pos", "ref", "alt", "rsID", "impact",
                  "HGVS.c", "HGVS.p", "sift", "polyphen", "muttaster", "fathmm", "clinvar_id", "clinvar_clinical_sig",
                  "freq_1kg_wgs_phase3", "freq_1kg_wgs_phase3_popmax",
                  "freq_exac_v3", "freq_exac_v3_popmax",
                  "freq_gnomad_exomes", "freq_gnomad_exomes_popmax",
                  "freq_gnomad_genomes", "freq_gnomad_genomes_popmax",
                  "all_genotypes"] + list(map(lambda i: i + " (from %s)" % indiv_id_to_project_id[i], individuals_to_include))

        writer = csv.writer(response)
        writer.writerow(header)
        for row in rows:
            writer.writerow(row)
        return response
    else:
        for individ_id_and_variants in individ_ids_and_variants:
            variants = individ_id_and_variants["variants"]
            individ_id_and_variants["variants"] = [v.toJSON() for v in variants]

        individ_ids = {i['indiv_id'] for i in individ_ids_and_variants}
        for var in rare_variants:
            individ_ids.update(var.genotypes.keys())
        individuals = Individual.objects.filter(
            indiv_id__in=individ_ids, project__project_id__in=project_ids
        ).select_related('project').select_related('family').only('project__project_id', 'family__family_id', *Individual.INDIVIDUAL_JSON_FIELDS_NO_IDS)

        return render(request, 'project/gene_quicklook.html', {
            'gene': gene,
            'gene_json': json.dumps(gene),
            'project': main_project,
            'rare_variants_json': json.dumps([v.toJSON() for v in rare_variants]),
            'individuals_json': json.dumps([i.get_json_obj(skip_has_variant_data=True) for i in individuals]),
            'knockouts_json': json.dumps(individ_ids_and_variants),
            'other_projects_json': other_projects_json,
            'new_page_url': new_page_url,
        })
Esempio n. 23
0
    def search_for_genes(self,
                         gene_or_variant_ids,
                         project_id_list,
                         output_filename,
                         max_af=0.01,
                         knockouts=False,
                         in_clinvar_only=False,
                         include_non_coding=False):
        """
        Search for a gene across project(s)

        Args:
            gene_or_variant_ids (list): 'ENSG..' gene id strings.
            project_id_list (list): (optional) project ids to narrow down the search
            output_filename (string): output file name
            max_af (float): AF filter
            in_clinvar_only (bool):
            include_non_coding (bool):
        """

        projects = [
            Project.objects.get(project_id=project_id)
            for project_id in project_id_list
        ]

        outfile = open(output_filename, 'w')

        header = [
            "project_id", "gene", "chr", "pos", "ref", "alt", "rsID", "filter",
            "impact", "HGVS.c", "HGVS.p", "sift", "polyphen", "muttaster",
            "fathmm", "clinvar_id", "clinvar_clinical_sig",
            "freq_1kg_wgs_phase3", "freq_1kg_wgs_phase3_popmax",
            "freq_exac_v3", "freq_exac_v3_popmax", "gnomad-exomes",
            "gnomad-genomes", "families", "all_genotypes"
        ]

        writer = csv.writer(outfile, delimiter='\t')
        writer.writerow(header)

        # all rare coding variants
        if not knockouts:
            variant_filter = get_default_variant_filter(
                'all_coding',
                mall.get_annotator().reference_population_slugs)
            #variant_filter.set_max_AF(max_af)
            if include_non_coding:
                variant_filter.so_annotations = []
            print("All Filters: ")
            pprint(variant_filter.toJSON())

        #print("Max AF threshold: %s" % max_af)
        print("Starting search for:\n%s\nin projects:\n%s\n" %
              (", ".join(gene_or_variant_ids), ", ".join(
                  [p.project_id for p in projects])))

        for project in projects:
            project_id = project.project_id
            if get_project_datastore(project).project_collection_is_loaded(
                    project):
                print("=====================")
                print("Searching project %s" % project_id)
            else:
                print(
                    "Skipping project %s - gene search is not enabled for this project"
                    % project_id)
                continue

            indiv_cache = {}
            for gene_or_variant_id in gene_or_variant_ids:
                chrom_pos_match = re.match("([0-9XY]{1,2})-([0-9]{1,9})",
                                           gene_or_variant_id)
                chrom_pos_ref_alt_match = re.match(
                    "([0-9XY]{1,2})-([0-9]{1,9})-([ACTG]+)-([ACTG]+)",
                    gene_or_variant_id)

                if chrom_pos_match or chrom_pos_ref_alt_match:
                    chrom = chrom_pos_match.group(1)
                    pos = int(chrom_pos_match.group(2))
                    xpos = genomeloc.get_xpos(chrom, pos)
                    ref = alt = None
                    if chrom_pos_ref_alt_match:
                        ref = chrom_pos_ref_alt_match.group(3)
                        alt = chrom_pos_ref_alt_match.group(4)

                    variant = get_project_datastore(
                        project).get_single_variant(project.project_id, None,
                                                    xpos, ref, alt)
                    if variant is None:
                        continue
                    variants = [variant]
                    print("-- searching %s for variant %s-%s-%s: found %s" %
                          (project_id, xpos, ref, alt, variant))
                    worst_annotation_idx = variant.annotation[
                        'worst_vep_annotation_index']
                    print(variant.annotation["vep_annotation"]
                          [worst_annotation_idx])
                    gene_id = variant.annotation["vep_annotation"][
                        worst_annotation_idx]['gene_id']
                    gene = get_reference().get_gene(gene_id)
                else:
                    gene_id = get_gene_id_from_str(gene_or_variant_id,
                                                   get_reference())
                    gene = get_reference().get_gene(gene_id)
                    print("-- searching %s for gene %s (%s)" %
                          (project_id, gene["symbol"], gene_id))

                    if knockouts:
                        knockout_ids, variation = project_analysis.get_knockouts_in_gene(
                            project, gene_id)
                        variants = variation.get_relevant_variants_for_indiv_ids(
                            knockout_ids)
                    else:
                        variants = project_analysis.get_variants_in_gene(
                            project, gene_id, variant_filter=variant_filter)

                for variant in variants:
                    if not chrom_pos_match and not chrom_pos_ref_alt_match and max(
                            variant.annotation['freqs'].values()) >= max_af:
                        continue

                    add_extra_info_to_variants_project(get_reference(),
                                                       project, [variant])
                    worst_annotation_idx = variant.annotation[
                        "worst_vep_index_per_gene"].get(gene_id)

                    if worst_annotation_idx is not None:
                        worst_annotation = variant.annotation[
                            "vep_annotation"][worst_annotation_idx]
                    else:
                        worst_annotation = None
                    all_genotypes_list = []
                    pass_filter = "N/A"
                    family_ids = set()
                    for indiv_id, genotype in variant.genotypes.items():
                        if indiv_id in indiv_cache:
                            individual = indiv_cache[indiv_id]
                            if individual == 'deleted':
                                continue
                        else:
                            try:
                                individual = Individual.objects.get(
                                    project=project, indiv_id=indiv_id)
                                indiv_cache[indiv_id] = individual
                            except ObjectDoesNotExist:
                                # this can happen when an individual is deleted from the project - from postgres, but not from mong
                                indiv_cache[indiv_id] = 'deleted'
                                continue
                            except MultipleObjectsReturned:
                                # when several families have an individual with the same id
                                individuals = Individual.objects.filter(
                                    project=project, indiv_id=indiv_id)
                                individual = individuals[0]
                                indiv_cache[indiv_id] = individual

                        pass_filter = genotype.filter  # filter value is stored in the genotypes even though it's the same for all individuals
                        if genotype.num_alt > 0:
                            family_ids.add(individual.family.family_id)
                            all_genotypes_list.append(
                                "%s/%s%s[gt:%s GQ:%s AB:%0.3f]" %
                                (individual.family.family_id, indiv_id,
                                 "[Affected]" if individual.affected == "A"
                                 else ("[-]" if individual.affected == "N" else
                                       "[?]"), ">".join(genotype.alleles),
                                 genotype.gq, genotype.ab
                                 if genotype.ab is not None else float('NaN')))

                    if len(all_genotypes_list) == 0:
                        continue

                    measureset_id, clinvar_significance = get_reference(
                    ).get_clinvar_info(*variant.unique_tuple())
                    if in_clinvar_only and (
                            not clinvar_significance
                            or "path" not in clinvar_significance.lower()):
                        continue

                    row = map(str, [
                        project_id,
                        gene,
                        variant.chr,
                        variant.pos,
                        variant.ref,
                        variant.alt,
                        variant.vcf_id or "",
                        pass_filter,
                        variant.annotation.get("vep_consequence", ""),
                        worst_annotation.get("hgvsc", "")
                        if worst_annotation else "",
                        (worst_annotation.get("hgvsp", "") or "").replace(
                            "%3D", "=") if worst_annotation else "",
                        worst_annotation.get("sift", "")
                        if worst_annotation else "",
                        worst_annotation.get("polyphen", "")
                        if worst_annotation else "",
                        worst_annotation.get("mutationtaster_pred", "")
                        if worst_annotation else "",
                        ";".join(
                            set(
                                worst_annotation.get("fathmm_pred",
                                                     "").split('%3B')))
                        if worst_annotation else "",
                        measureset_id,
                        clinvar_significance,
                        variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                        variant.annotation["freqs"].get(
                            "1kg_wgs_phase3_popmax", ""),
                        variant.annotation["freqs"].get("exac_v3", ""),
                        variant.annotation["freqs"].get("exac_v3_popmax", ""),
                        variant.annotation["freqs"].get("gnomad-exomes2", ""),
                        variant.annotation["freqs"].get("gnomad-genomes2", ""),
                        ", ".join(sorted(list(family_ids))),
                        ", ".join(all_genotypes_list),
                    ])

                    writer.writerow(row)

        outfile.close()
        print("Wrote out %s" % output_filename)
Esempio n. 24
0
def gene_quicklook(request, project_id, gene_id):
    """
    Summary of a gene in a project
    """
    project = get_object_or_404(Project, project_id=project_id)
    if not project.can_view(request.user):
        return HttpResponse("Unauthorized")

    if project.project_status == Project.NEEDS_MORE_PHENOTYPES and not request.user.is_staff:
        return render(request, 'analysis_unavailable.html',
                      {'reason': 'Awaiting phenotype data.'})

    if gene_id is None:
        return render(
            request, 'project/gene_quicklook.html', {
                'project': project,
                'gene': None,
                'gene_json': None,
                'rare_variants_json': None,
                'individuals_json': None,
                'knockouts_json': None,
            })

    gene_id = get_gene_id_from_str(gene_id, get_reference())
    gene = get_reference().get_gene(gene_id)
    sys.stderr.write(project_id + " - staring gene search for: %s %s \n" %
                     (gene_id, gene))

    # all rare coding variants
    variant_filter = get_default_variant_filter(
        'all_coding',
        mall.get_annotator().reference_population_slugs)

    rare_variants = []
    for variant in project_analysis.get_variants_in_gene(
            project, gene_id, variant_filter=variant_filter):
        max_af = max(variant.annotation['freqs'].values())
        if not any([
                indiv_id for indiv_id, genotype in variant.genotypes.items()
                if genotype.num_alt > 0
        ]):
            continue
        if max_af < .01:
            rare_variants.append(variant)
    #sys.stderr.write("gene_id: %s, variant: %s\n" % (gene_id, variant.toJSON()['annotation']['vep_annotation']))
    add_extra_info_to_variants_project(get_reference(), project, rare_variants)

    # compute knockout individuals
    individ_ids_and_variants = []
    knockout_ids, variation = get_knockouts_in_gene(project, gene_id)
    for indiv_id in knockout_ids:
        variants = variation.get_relevant_variants_for_indiv_ids([indiv_id])
        add_extra_info_to_variants_project(get_reference(), project, variants)
        individ_ids_and_variants.append({
            'indiv_id': indiv_id,
            'variants': variants,
        })

    sys.stderr.write(
        "Project-wide gene search retrieved %s rare variants for gene: %s \n" %
        (len(rare_variants), gene_id))

    download_csv = request.GET.get('download', '')
    if download_csv:
        response = HttpResponse(content_type='text/csv')
        response[
            'Content-Disposition'] = 'attachment; filename="{}_{}.csv"'.format(
                download_csv, gene["transcript_name"])

        if download_csv == 'knockouts':

            individuals_to_include = [
                individ_id_and_variants["indiv_id"]
                for individ_id_and_variants in individ_ids_and_variants
            ]

            rows = []
            for individ_id_and_variants in individ_ids_and_variants:
                rare_variants = individ_id_and_variants["variants"]
                for variant in rare_variants:
                    worst_annotation_idx = variant.annotation[
                        "worst_vep_index_per_gene"][gene_id]
                    worst_annotation = variant.annotation["vep_annotation"][
                        worst_annotation_idx]
                    genotypes = []
                    all_genotypes_string = ""
                    for indiv_id in individuals_to_include:
                        genotype = variant.genotypes[indiv_id]
                        allele_string = ">".join(genotype.alleles)
                        all_genotypes_string += indiv_id + ":" + allele_string + "  "
                        if genotype.num_alt > 0:
                            genotypes.append(allele_string + "   (" +
                                             str(genotype.gq) + ")")
                        else:
                            genotypes.append("")

                    measureset_id, clinvar_significance = settings.CLINVAR_VARIANTS.get(
                        variant.unique_tuple(), ("", ""))

                    rows.append(
                        map(str, [
                            gene["symbol"],
                            variant.chr,
                            variant.pos,
                            variant.ref,
                            variant.alt,
                            variant.vcf_id or "",
                            variant.annotation.get("vep_consequence", ""),
                            worst_annotation.get("hgvsc", ""),
                            worst_annotation.get("hgvsp", "").replace(
                                "%3D", "="),
                            worst_annotation.get("sift", ""),
                            worst_annotation.get("polyphen", ""),
                            worst_annotation.get("mutationtaster_pred", ""),
                            ";".join(
                                set(
                                    worst_annotation.get("fathmm_pred",
                                                         "").split('%3B'))),
                            measureset_id,
                            clinvar_significance,
                            variant.annotation["freqs"].get(
                                "1kg_wgs_phase3", ""),
                            variant.annotation["freqs"].get(
                                "1kg_wgs_phase3_popmax", ""),
                            variant.annotation["freqs"].get("exac_v3", ""),
                            variant.annotation["freqs"].get(
                                "exac_v3_popmax", ""),
                            all_genotypes_string,
                        ] + genotypes))
        elif download_csv == 'rare_variants':
            individuals_to_include = []
            for variant in rare_variants:
                for indiv_id, genotype in variant.genotypes.items():
                    if genotype.num_alt > 0 and indiv_id not in individuals_to_include:
                        individuals_to_include.append(indiv_id)
            rows = []
            for variant in rare_variants:
                worst_annotation_idx = variant.annotation[
                    "worst_vep_index_per_gene"][gene_id]
                worst_annotation = variant.annotation["vep_annotation"][
                    worst_annotation_idx]
                genotypes = []
                all_genotypes_string = ""
                for indiv_id in individuals_to_include:
                    genotype = variant.genotypes[indiv_id]
                    allele_string = ">".join(genotype.alleles)
                    all_genotypes_string += indiv_id + ":" + allele_string + "  "
                    if genotype.num_alt > 0:
                        genotypes.append(allele_string + "   (" +
                                         str(genotype.gq) + ")")
                    else:
                        genotypes.append("")

                measureset_id, clinvar_significance = settings.CLINVAR_VARIANTS.get(
                    variant.unique_tuple(), ("", ""))
                rows.append(
                    map(str, [
                        gene["symbol"],
                        variant.chr,
                        variant.pos,
                        variant.ref,
                        variant.alt,
                        variant.vcf_id or "",
                        variant.annotation.get("vep_consequence", ""),
                        worst_annotation.get("hgvsc", ""),
                        worst_annotation.get("hgvsp", "").replace("%3D", "="),
                        worst_annotation.get("sift", ""),
                        worst_annotation.get("polyphen", ""),
                        worst_annotation.get("mutationtaster_pred", ""),
                        ";".join(
                            set(
                                worst_annotation.get("fathmm_pred",
                                                     "").split('%3B'))),
                        measureset_id,
                        clinvar_significance,
                        variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                        variant.annotation["freqs"].get(
                            "1kg_wgs_phase3_popmax", ""),
                        variant.annotation["freqs"].get("exac_v3", ""),
                        variant.annotation["freqs"].get("exac_v3_popmax", ""),
                        all_genotypes_string,
                    ] + genotypes))

        header = [
            "gene", "chr", "pos", "ref", "alt", "rsID", "impact", "HGVS.c",
            "HGVS.p", "sift", "polyphen", "muttaster", "fathmm", "clinvar_id",
            "clinvar_clinical_sig", "freq_1kg_wgs_phase3",
            "freq_1kg_wgs_phase3_popmax", "freq_exac_v3",
            "freq_exac_v3_popmax", "all_genotypes"
        ] + individuals_to_include

        writer = csv.writer(response)
        writer.writerow(header)
        for row in rows:
            writer.writerow(row)
        return response
    else:
        for individ_id_and_variants in individ_ids_and_variants:
            variants = individ_id_and_variants["variants"]
            individ_id_and_variants["variants"] = [
                v.toJSON() for v in variants
            ]

        return render(
            request, 'project/gene_quicklook.html', {
                'gene':
                gene,
                'gene_json':
                json.dumps(gene),
                'project':
                project,
                'rare_variants_json':
                json.dumps([v.toJSON() for v in rare_variants]),
                'individuals_json':
                json.dumps(
                    [i.get_json_obj() for i in project.get_individuals()]),
                'knockouts_json':
                json.dumps(individ_ids_and_variants),
            })