Ejemplo n.º 1
0
 def handle(self, *args, **options):
     for project_id in args:
         print("Deleting data from mongodb for project: " + project_id)
         p = Project.objects.get(project_id = project_id)
         get_mall(p).variant_store.delete_project(project_id)
         get_project_datastore(p).delete_project_store(project_id)
         print("Done")
Ejemplo n.º 2
0
 def handle(self, *args, **options):
     for project_id in args:
         print("Deleting data from mongodb for project: " + project_id)
         p = Project.objects.get(project_id=project_id)
         get_mall(p).variant_store.delete_project(project_id)
         get_project_datastore(p).delete_project_store(project_id)
         print("Done")
Ejemplo n.º 3
0
def delete_project(project_id, delete_data=False):
    """
    Delete a project and perform any cleanup (ie. deleting from datastore and removing temp files)
    """
    project = Project.objects.get(project_id=project_id)
    if delete_data:
        get_project_datastore(project).delete_project_store(project_id)
        get_mall(project).variant_store.delete_project(project_id)

    project.individual_set.all().delete()
    project.family_set.all().delete()
    project.delete()
Ejemplo n.º 4
0
def delete_project(project_id, delete_data=False):
    """
    Delete a project and perform any cleanup (ie. deleting from datastore and removing temp files)
    """
    project = Project.objects.get(project_id=project_id)
    if delete_data:
        get_project_datastore(project).delete_project_store(project_id)
        get_mall(project).variant_store.delete_project(project_id)

    project.individual_set.all().delete()
    project.family_set.all().delete()
    project.delete()
Ejemplo n.º 5
0
def delete_project(project_id):
    """
    Delete a project and perform any cleanup (ie. deleting from datastore and removing temp files)
    """
    print("Deleting %s" % project_id)
    project = Project.objects.get(project_id=project_id)
    get_project_datastore(project_id).delete_project_store(project_id)
    get_mall(project_id).variant_store.delete_project(project_id)
    project.individual_set.all().delete()
    project.family_set.all().delete()
    project.delete()
    print("Successfully deleted %s" % project_id)
Ejemplo n.º 6
0
def delete_project(project_id):
    """
    Delete a project and perform any cleanup (ie. deleting from datastore and removing temp files)
    """
    print("Deleting %s" % project_id)
    project = Project.objects.get(project_id=project_id)
    get_project_datastore(project_id).delete_project_store(project_id)
    get_mall(project_id).variant_store.delete_project(project_id)
    project.individual_set.all().delete()
    project.family_set.all().delete()
    project.delete()
    print("Successfully deleted %s" % project_id)
Ejemplo n.º 7
0
def load_project_datastore(project_id, vcf_files=None, start_from_chrom=None, end_with_chrom=None):
    """
    Load this project into the project datastore
    Which allows queries over all variants in a project
    """
    print(date.strftime(datetime.now(), "%m/%d/%Y %H:%M:%S  -- starting load_project_datastore: " + project_id))
    project = Project.objects.get(project_id=project_id)
    get_project_datastore(project_id).delete_project_store(project_id)
    get_project_datastore(project_id).add_project(project_id)
    for vcf_file in sorted(project.get_all_vcf_files(), key=lambda v:v.path()):
        vcf_file_path = vcf_file.path()
        if vcf_files is not None and vcf_file_path not in vcf_files:
            print("Skipping - %(vcf_file_path)s is not in %(vcf_files)s" % locals())
        project_indiv_ids = [i.indiv_id for i in project.get_individuals()]
        vcf_ids = vcf_file.sample_id_list()
        indiv_id_list = [i for i in project_indiv_ids if i in vcf_ids]
        get_project_datastore(project_id).add_variants_to_project_from_vcf(
            vcf_file.file_handle(),
            project_id,
            indiv_id_list=indiv_id_list,
            start_from_chrom=start_from_chrom,
            end_with_chrom=end_with_chrom
        )

    get_project_datastore(project_id).set_project_collection_to_loaded(project_id)

    print(date.strftime(datetime.now(), "%m/%d/%Y %H:%M:%S  -- load_project_datastore: " + project_id + " is done!"))
Ejemplo n.º 8
0
def project_home(request, project_id):

    project = get_object_or_404(Project, project_id=project_id)
    if not project.can_view(request.user):
        return HttpResponse("unauthorized")
    project.set_accessed()

    if project.can_admin(request.user):
        auth_level = "admin"
    elif project.can_edit(request.user):
        auth_level = "editor"
    elif project.is_public:
        auth_level = "public"
    elif project.can_view(request.user):
        auth_level = "viewer"

    else:
        raise Exception("Authx - how did we get here?!?")

    return render(
        request,
        "project.html",
        {
            "project": project,
            "auth_level": auth_level,
            "can_edit": project.can_edit(request.user),
            "is_manager": project.can_admin(request.user),
            "has_gene_search": get_project_datastore(project_id).project_collection_is_loaded(project_id),
        },
    )
Ejemplo n.º 9
0
def project_home(request, project_id):

    project = get_object_or_404(Project, project_id=project_id)
    if not project.can_view(request.user):
        raise PermissionDenied
    project.set_accessed()
    if project.can_admin(request.user):
        auth_level = 'admin'
    elif project.can_edit(request.user):
        auth_level = 'editor'
    elif project.is_public:
        auth_level = 'public'
    elif project.can_view(request.user):
        auth_level = 'viewer'
    else:
        raise Exception("Authx - how did we get here?!?")

    phenotips_supported=True
    if settings.PROJECTS_WITHOUT_PHENOTIPS is not None and project_id in settings.PROJECTS_WITHOUT_PHENOTIPS:
          phenotips_supported=False
    return render(request, 'project.html', {
        'phenotips_supported':phenotips_supported,
        'project': project,
        'auth_level': auth_level,
        'can_edit': project.can_edit(request.user),
        'is_manager': project.can_admin(request.user),
        'has_gene_search': get_project_datastore(project).project_collection_is_loaded(project),
        'new_page_url': '/project/{}/project_page'.format(project.seqr_project.guid) if project.seqr_project else None,
    })
Ejemplo n.º 10
0
def project_home(request, project_id):

    project = get_object_or_404(Project, project_id=project_id)
    if not project.can_view(request.user):
        raise PermissionDenied
    project.set_accessed()
    if project.can_admin(request.user):
        auth_level = 'admin'
    elif project.can_edit(request.user):
        auth_level = 'editor'
    elif project.is_public:
        auth_level = 'public'
    elif project.can_view(request.user):
        auth_level = 'viewer'
    else:
        raise Exception("Authx - how did we get here?!?")
    
    phenotips_supported=True
    if settings.PROJECTS_WITHOUT_PHENOTIPS is not None and project_id in settings.PROJECTS_WITHOUT_PHENOTIPS:
          phenotips_supported=False
    return render(request, 'project.html', {
        'phenotips_supported':phenotips_supported,
        'project': project,
        'auth_level': auth_level,
        'can_edit': project.can_edit(request.user),
        'is_manager': project.can_admin(request.user),
        'has_gene_search':
            get_project_datastore(project_id).project_collection_is_loaded(project_id)
    })
Ejemplo n.º 11
0
    def handle(self, *args, **options):
        """load CADD scores for all variants in a project, or all variants in the annotator_store."""

        annotator_store = mall.get_annotator().get_annotator_datastore()
        if options['cadd_file']:
            print("Loading " + options['cadd_file'])
            load_from_cadd_file(options['cadd_file'])
        elif options['project_id']:
            print("Loading " + options['project_id'])
            project = Project.objects.get(project_id=options['project_id'])
            variant_collection = get_project_datastore(project)._get_project_collection(options['project_id']).find({'annotation.cadd_phred': {'$exists' : False}})
        else:
            variant_collection = annotator_store.variants.find({'annotation.cadd_phred': {'$exists' : False}})

        #print("Variant collection: " + str(variant_collection))
        #print("Annotating %s variants" % variant_collection.count())

        for r in tqdm.tqdm(variant_collection, unit=' variants'): #, total=variant_collection.count()):
            chrom, pos = genomeloc.get_chr_pos(r['xpos'])
            cadd_phred = fetch(chrom, pos, r['ref'], r['alt'])
            if cadd_phred is not None:
                result = annotator_store.variants.update({'xpos': r['xpos'], 'ref': r['ref'], 'alt': r['alt']}, {'$set': {'annotation.cadd_phred': cadd_phred}}, upsert=False)
                assert result['updatedExisting']

        print("Done")
Ejemplo n.º 12
0
def get_variants_in_gene(project, gene_id, variant_filter=None, quality_filter=None):
    """
    Get all the variants in a gene, but filter out quality_filter genotypes
    """
    variant_list = get_project_datastore().get_variants_in_gene(project.project_id, gene_id, variant_filter=variant_filter)
    variant_list = search_utils.filter_gene_variants_by_variant_filter(variant_list, gene_id, variant_filter)
    return variant_list
Ejemplo n.º 13
0
def mendelian_variant_search(request, project_id, family_id):

    project = get_object_or_404(Project, project_id=project_id)
    family = get_object_or_404(Family, project=project, family_id=family_id)
    if not project.can_view(request.user):
        raise PermissionDenied

    if not family.has_data('variation'):
        return render(request, 'analysis_unavailable.html', {
            'reason': 'This family does not have any variant data.'
        })
    elif project.project_status == Project.NEEDS_MORE_PHENOTYPES and not request.user.is_staff:
        return render(request, 'analysis_unavailable.html', {
            'reason': 'Awaiting phenotype data.'
        })

    has_gene_search = get_project_datastore(project_id).project_collection_is_loaded(project_id)
    gene_lists = [project_gene_list.gene_list.toJSON(details=True) for project_gene_list in ProjectGeneList.objects.filter(project=project)]
    sys.stderr.write("returning mendelian_variant_search page for %(project_id)s %(family_id)s. has_gene_search = %(has_gene_search)s\n " % locals() )
    return render(request, 'mendelian_variant_search.html', {
        'gene_lists': json.dumps(gene_lists),
        'project': project,
        'family': family,
        'family_genotype_filters_json': json.dumps(x_inheritance.get_genotype_filters(family.xfamily())),
        'has_gene_search': has_gene_search or get_elasticsearch_dataset(project_id) is not None
    })
Ejemplo n.º 14
0
def project_home(request, project_id):

    project = get_object_or_404(Project, project_id=project_id)
    if not project.can_view(request.user):
        return HttpResponse('unauthorized')
    project.set_accessed()

    if project.can_admin(request.user):
        auth_level = 'admin'
    elif project.can_edit(request.user):
        auth_level = 'editor'
    elif project.is_public:
        auth_level = 'public'
    elif project.can_view(request.user):
        auth_level = 'viewer'

    else:
        raise Exception("Authx - how did we get here?!?")

    return render(request, 'project.html', {
        'project': project,
        'auth_level': auth_level,
        'can_edit': project.can_edit(request.user),
        'is_manager': project.can_admin(request.user),
        'has_gene_search':
            get_project_datastore(project_id).project_collection_is_loaded(project_id)
    })
Ejemplo n.º 15
0
def get_knockouts_in_gene(project, gene_id, quality_filter=None):
    """
    Get all the variants in a gene, but filter out quality_filter genotypes
    """
    indiv_id_list = [i.indiv_id for i in project.get_individuals()]

    # filter out variants > 0.01 AF in any of the reference populations
    reference_populations = mall.get_annotator().reference_population_slugs
    variant_filter = get_default_variant_filter('moderate_impact',
                                                reference_populations)
    variant_list = get_project_datastore(
        project.project_id).get_project_variants_in_gene(
            project.project_id,
            gene_id,
            variant_filter=variant_filter,
        )
    variant_list = search_utils.filter_gene_variants_by_variant_filter(
        variant_list, gene_id, variant_filter)
    variation = CohortGeneVariation(
        get_reference(),
        gene_id,
        variant_list,
        indiv_id_list,
        quality_filter={},
    )
    knockouts = get_individuals_with_inheritance('recessive', variation,
                                                 indiv_id_list)
    return knockouts, variation
Ejemplo n.º 16
0
def get_variants_in_gene(project, gene_id, variant_filter=None, quality_filter=None):
    """
    Get all the variants in a gene, but filter out quality_filter genotypes
    """
    variant_list = get_project_datastore(project).get_project_variants_in_gene(project.project_id, gene_id, variant_filter=variant_filter)
    variant_list = search_utils.filter_gene_variants_by_variant_filter(variant_list, gene_id, variant_filter)
    return variant_list
    def transfer_project(self, from_project_id, destination_project_id):
        print("From: " + from_project_id)
        print("To: " + destination_project_id)

        from_project = Project.objects.get(project_id=from_project_id)
        destination_project = Project.objects.get(project_id=destination_project_id)
        
        # Make sure individuals are the same
        indivs_missing_from_dest_project = (set(
            [i.indiv_id for i in Individual.objects.filter(project=from_project)]) - set(
            [i.indiv_id for i in Individual.objects.filter(project=destination_project)]))
        if indivs_missing_from_dest_project:
            raise Exception("Individuals missing from dest project: " + str(indivs_missing_from_dest_project))
        

        # update VCFs
        vcfs = from_project.families_by_vcf().keys()
        for vcf_file_path in vcfs:            
            vcf_file = VCFFile.objects.get_or_create(file_path=os.path.abspath(vcf_file_path))[0]
            sample_management.add_vcf_file_to_project(destination_project, vcf_file)
            print("Added %s to project %s" % (vcf_file, destination_project.project_id))

        families_db = get_datastore()._db
        projects_db = get_project_datastore()._db

        print("==========")
        print("Checking 'from' Projects and Families:")
        if not check_that_exists(projects_db.projects, {'project_id': from_project_id}, not_more_than_one=True):
            raise ValueError("There needs to be 1 project db in %(from_project_id)s" % locals())
        if not check_that_exists(families_db.families, {'project_id': from_project_id}, not_more_than_one=False):
            raise ValueError("There needs to be atleast 1 family db in %(from_project_id)s" % locals())

        print("==========")
        print("Make Updates:")
        datestamp = datetime.now().strftime("%Y-%m-%d")
        if check_that_exists(projects_db.projects, {'project_id': destination_project_id}, not_more_than_one=True):
            result = update(projects_db.projects, {'project_id': destination_project_id}, {'project_id': destination_project_id+'_previous', 'version': datestamp})
        if check_that_exists(families_db.families, {'project_id': destination_project_id}, not_more_than_one=False):
            result = update(families_db.families, {'project_id': destination_project_id}, {'project_id': destination_project_id+'_previous', 'version': datestamp})

        result = update(projects_db.projects, {'project_id': from_project_id},        {'project_id': destination_project_id, 'version': '2'})
        result = update(families_db.families, {'project_id': from_project_id},        {'project_id': destination_project_id, 'version': '2'})

        print("==========")
        print("Checking Projects:")
        if not check_that_exists(projects_db.projects, {'project_id': destination_project_id}, not_more_than_one=True):
            raise ValueError("After: There needs to be 1 project db in %(destination_project_id)s" % locals())
        if not check_that_exists(families_db.families, {'project_id': destination_project_id}, not_more_than_one=False):
            raise ValueError("After: There needs to be atleast 1 family db in %(destination_project_id)s" % locals())

        update_family_analysis_status(destination_project_id)
        
        print("Data transfer finished.")
        i = raw_input("Delete the 'from' project: %s? [Y/n] " % from_project_id)
        if i.strip() == 'Y':
            sample_management.delete_project(from_project_id)
            print("Project %s deleted" % from_project_id)
        else:
            print("Project not deleted")
Ejemplo n.º 18
0
    def handle(self, *args, **options):
        project_id = options['project_id']
        print("Loading data into project: " + project_id)
        project = Project.objects.get(project_id = project_id)

        cnv_filename = options['cnv_filename']
        bed_files_directory = options['bed_files_directory']
        
        if not os.path.isfile(cnv_filename):
            raise ValueError("CNV file %s doesn't exist" % options['cnv_filename'])
        
        with open(cnv_filename) as f:
            header_fields = f.readline().rstrip('\n').split('\t')
            for line in f:
                fields = line.rstrip('\n').split('\t')
                row_dict = dict(zip(header_fields, fields))

                chrom = "chr"+row_dict['chr']
                start = int(row_dict['start'])
                end = int(row_dict['end'])
                #left_overhang = int(row_dict['left_overhang_start'])
                #right_overhang = int(row_dict['right_overhang_end'])

                sample_id = row_dict['sample']
                try:
                    i = Individual.objects.get(project=project, indiv_id__istartswith=sample_id)
                except Exception as e:
                    print("WARNING: %s: %s not found in %s" % (e, sample_id, project))
                    continue
                
                bed_file_path = os.path.join(bed_files_directory, "%s.bed" % sample_id)
                if not os.path.isfile(bed_file_path):
                    print("WARNING: .bed file not found: " + bed_file_path)

                    if i.cnv_bed_file != bed_file_path:
                        print("Setting cnv_bed_file path to %s" % bed_file_path)
                        i.cnv_bed_file = bed_file_path
                        i.save()
                
                project_collection = get_project_datastore(project)._get_project_collection(project_id)
                family_collection = get_mall(project).variant_store._get_family_collection(project_id, i.family.family_id)

                for collection in filter(None, [project_collection, family_collection]):
                    
                    collection.update_many(
                        {'$and': [
                            {'xpos': {'$gte': genomeloc.get_single_location(chrom, start)} },
                            {'xpos': {'$lte': genomeloc.get_single_location(chrom, end)}}
                        ]},
                        {'$set': {'genotypes.%s.extras.cnvs' % i.indiv_id: row_dict}})

                    #result = list(collection.find({'$and' : [
                    #       {'xpos': {'$gte':  genomeloc.get_single_location(chrom, start)}},
                    #       {'xpos' :{'$lte': genomeloc.get_single_location(chrom, end)}}]},
                    #   {'genotypes.%s.extras.cnvs' % i.indiv_id :1 }))
                    #print(chrom, start, end, len(result), result[0] if result else None)

        print("Done")
Ejemplo n.º 19
0
def load_project_datastore(project_id,
                           vcf_files=None,
                           start_from_chrom=None,
                           end_with_chrom=None):
    """
    Load this project into the project datastore
    Which allows queries over all variants in a project
    """
    print(
        date.strftime(
            datetime.now(),
            "%m/%d/%Y %H:%M:%S  -- starting load_project_datastore: " +
            project_id +
            (" from chrom: " + start_from_chrom) if start_from_chrom else ""))

    settings.EVENTS_COLLECTION.insert({
        'event_type': 'load_project_datastore_started',
        'date': timezone.now(),
        'project_id': project_id
    })

    project = Project.objects.get(project_id=project_id)
    get_project_datastore(project_id).delete_project_store(project_id)
    get_project_datastore(project_id).add_project(project_id)
    for vcf_file in sorted(project.get_all_vcf_files(),
                           key=lambda v: v.path()):
        vcf_file_path = vcf_file.path()
        if vcf_files is not None and vcf_file_path not in vcf_files:
            print("Skipping - %(vcf_file_path)s is not in %(vcf_files)s" %
                  locals())
        project_indiv_ids = [i.indiv_id for i in project.get_individuals()]
        vcf_ids = vcf_file.sample_id_list()
        indiv_id_list = [i for i in project_indiv_ids if i in vcf_ids]
        get_project_datastore(project_id).add_variants_to_project_from_vcf(
            vcf_file.file_handle(),
            project_id,
            indiv_id_list=indiv_id_list,
            start_from_chrom=start_from_chrom,
            end_with_chrom=end_with_chrom)

    get_project_datastore(project_id).set_project_collection_to_loaded(
        project_id)

    print(
        date.strftime(
            datetime.now(), "%m/%d/%Y %H:%M:%S  -- load_project_datastore: " +
            project_id + " is done!"))

    settings.EVENTS_COLLECTION.insert({
        'event_type': 'load_project_datastore_finished',
        'date': timezone.now(),
        'project_id': project_id
    })
Ejemplo n.º 20
0
def project_home(request, project_id):

    project = get_object_or_404(Project, project_id=project_id)
    if not project.can_view(request.user):
        raise PermissionDenied
    project.set_accessed()
    if project.can_admin(request.user):
        auth_level = 'admin'
    elif project.can_edit(request.user):
        auth_level = 'editor'
    elif project.is_public:
        auth_level = 'public'
    elif project.can_view(request.user):
        auth_level = 'viewer'

    else:
        raise Exception("Authx - how did we get here?!?")

    #phenotips_supported=False
    #if not (settings.PROJECTS_WITHOUT_PHENOTIPS is None or project_id in settings.PROJECTS_WITHOUT_PHENOTIPS):
    #  phenotips_supported=True

    phenotips_supported = True
    if settings.PROJECTS_WITHOUT_PHENOTIPS is not None and project_id in settings.PROJECTS_WITHOUT_PHENOTIPS:
        phenotips_supported = False

    #indiv_phenotype_counts=[]
    #binned_counts={}
    #categorized_phenotype_counts={}
    #if phenotips_supported:
    #  try:
    #    indiv_phenotype_counts= get_phenotype_entry_metrics_for_project(project_id)
    #    binned_counts=aggregate_phenotype_counts_into_bins(indiv_phenotype_counts)
    #    categorized_phenotype_counts=categorize_phenotype_counts(binned_counts)
    #  except Exception as e:
    #    print 'error looking for project information in PhenoTips:logging & moving,there might not be any data'
    #    logger.error('project_views:'+str(e))

    return render(
        request, 'project.html', {
            'phenotips_supported':
            phenotips_supported,
            'project':
            project,
            'auth_level':
            auth_level,
            'can_edit':
            project.can_edit(request.user),
            'is_manager':
            project.can_admin(request.user),
            'has_gene_search':
            get_project_datastore(project_id).project_collection_is_loaded(
                project_id)
        })
Ejemplo n.º 21
0
def _has_gene_search(project):
    """
    Returns True if this project has Gene Search enabled.

    DEPRECATED - will be removed along with mongodb.

    Args:
         project (object): django project
    """
    return get_project_datastore(
        project.deprecated_project_id).project_collection_is_loaded(project.deprecated_project_id)
Ejemplo n.º 22
0
def load_project_datastore(project_id, vcf_files=None):
    """
    Load this project into the project datastore
    Which allows queries over all variants in a project
    """
    print(
        date.strftime(
            datetime.now(),
            "%m/%d/%Y %H:%M:%S  -- starting load_project_datastore: " +
            project_id))
    project = Project.objects.get(project_id=project_id)
    get_project_datastore(project_id).delete_project_store(project_id)
    get_project_datastore(project_id).add_project(project_id)
    for vcf_file in project.get_all_vcf_files():
        vcf_file_path = vcf_file.path()
        if vcf_files is not None and vcf_file_path not in vcf_files:
            print("Skipping - %(vcf_file_path)s is not in %(vcf_files)s" %
                  locals())
        project_indiv_ids = [i.indiv_id for i in project.get_individuals()]
        vcf_ids = vcf_file.sample_id_list()
        indiv_id_list = [i for i in project_indiv_ids if i in vcf_ids]
        get_project_datastore(project_id).add_variants_to_project_from_vcf(
            vcf_file.file_handle(), project_id, indiv_id_list=indiv_id_list)

    print(
        date.strftime(
            datetime.now(), "%m/%d/%Y %H:%M:%S  -- load_project_datastore: " +
            project_id + " is done!"))
Ejemplo n.º 23
0
    def update_pop_freqs_in_project_tables(self):
        # Load project tables
        population_frequency_store = mall.get_annotator().get_population_frequency_store()

        db = sqlite3.connect("reference_populations_project_tables.db", isolation_level=None)
        db.execute("CREATE TABLE if not exists all_projects(project_id varchar(200), started bool, finished bool)")
        db.execute("CREATE UNIQUE INDEX IF NOT EXISTS all_projects_idx ON all_projects(project_id)")

        import random

        other_project_ids = [p.project_id for p in Project.objects.all() if p.project_id != "myoseq_v11"]
        random.shuffle(other_project_ids)
        project_ids = ["myoseq_v11"] + other_project_ids
        for project_id in project_ids:
            db.execute("INSERT OR IGNORE INTO all_projects VALUES (?, 0, 0)", (project_id,))

        # Go through each project and update the variant records
        population_slugs_to_load = [
            population_spec["slug"] for population_spec in annotator_settings.reference_populations
        ]
        while True:
            remaining_work = list(db.execute("SELECT project_id FROM all_projects WHERE started=0"))
            print("%d projects remaining" % len(remaining_work))
            if not remaining_work:
                print("Done with all projects")
                break

            project_id, = remaining_work[0]
            project_store = get_project_datastore(project_id)

            print("    updating %s " % project_id)
            db.execute("UPDATE all_projects SET started=1 WHERE project_id=?", (project_id,))

            project_collection = project_store._get_project_collection(project_id)
            for variant_dict in project_collection.find():
                freqs = population_frequency_store.get_frequencies(
                    variant_dict["xpos"], variant_dict["ref"], variant_dict["alt"]
                )
                full_freqs = {
                    "db_freqs." + population_slug: freqs.get(population_slug, 0)
                    for population_slug in population_slugs_to_load
                }
                project_collection.update(
                    {"xpos": variant_dict["xpos"], "ref": variant_dict["ref"], "alt": variant_dict["alt"]},
                    {"$set": full_freqs},
                    upsert=False,
                )

            print("     ---> done updating project_id: %s" % project_id)
            db.execute("UPDATE all_projects SET finished=1 WHERE project_id=?", (project_id,))
Ejemplo n.º 24
0
def _has_gene_search(project):
    """
    Returns True if this project has Gene Search enabled.

    DEPRECATED - will be removed along with mongodb.

    Args:
         project (object): django project
    """
    try:
        base_project = BaseProject.objects.get(seqr_project=project)
    except ObjectDoesNotExist as e:
        return False

    return base_project.has_elasticsearch_index() or get_project_datastore(base_project).project_collection_is_loaded(base_project)
Ejemplo n.º 25
0
def project_home(request, project_id):

    project = get_object_or_404(Project, project_id=project_id)
    if not project.can_view(request.user):
        raise PermissionDenied
    project.set_accessed()
    if project.can_admin(request.user):
        auth_level = 'admin'
    elif project.can_edit(request.user):
        auth_level = 'editor'
    elif project.is_public:
        auth_level = 'public'
    elif project.can_view(request.user):
        auth_level = 'viewer'

    else:
        raise Exception("Authx - how did we get here?!?")

    #phenotips_supported=False
    #if not (settings.PROJECTS_WITHOUT_PHENOTIPS is None or project_id in settings.PROJECTS_WITHOUT_PHENOTIPS):
    #  phenotips_supported=True
    
    phenotips_supported=True
    if settings.PROJECTS_WITHOUT_PHENOTIPS is not None and project_id in settings.PROJECTS_WITHOUT_PHENOTIPS:
          phenotips_supported=False

    #indiv_phenotype_counts=[]
    #binned_counts={}
    #categorized_phenotype_counts={}
    #if phenotips_supported:
    #  try:
    #    indiv_phenotype_counts= get_phenotype_entry_metrics_for_project(project_id)
    #    binned_counts=aggregate_phenotype_counts_into_bins(indiv_phenotype_counts)
    #    categorized_phenotype_counts=categorize_phenotype_counts(binned_counts)
    #  except Exception as e:
    #    print 'error looking for project information in PhenoTips:logging & moving,there might not be any data'
    #    logger.error('project_views:'+str(e))

    return render(request, 'project.html', {
        'phenotips_supported':phenotips_supported,
        'project': project,
        'auth_level': auth_level,
        'can_edit': project.can_edit(request.user),
        'is_manager': project.can_admin(request.user),
        'has_gene_search':
            get_project_datastore(project_id).project_collection_is_loaded(project_id)
    })
Ejemplo n.º 26
0
    def update_pop_freqs_in_project_tables(self):
        # Load project tables
        population_frequency_store = mall.get_annotator().get_population_frequency_store()

        db = sqlite3.connect("reference_populations_project_tables.db", isolation_level=None)
        db.execute("CREATE TABLE if not exists all_projects(project_id varchar(200), started bool, finished bool)")
        db.execute("CREATE UNIQUE INDEX IF NOT EXISTS all_projects_idx ON all_projects(project_id)")

        
        import random        
        other_project_ids = [p.project_id for p in Project.objects.all() if p.project_id != "myoseq_v11"]
        random.shuffle(other_project_ids)
        project_ids = ["myoseq_v11"] + other_project_ids
        for project_id in project_ids:
            db.execute("INSERT OR IGNORE INTO all_projects VALUES (?, 0, 0)", (project_id,))


        # Go through each project and update the variant records
        population_slugs_to_load = [population_spec['slug'] for population_spec in annotator_settings.reference_populations]
        while True:
            remaining_work = list(db.execute("SELECT project_id FROM all_projects WHERE started=0"))
            print("%d projects remaining" % len(remaining_work))
            if not remaining_work:
                print("Done with all projects")
                break

            project_id, = remaining_work[0]
            project_store = get_project_datastore(project_id)


            print("    updating %s " % project_id)
            db.execute("UPDATE all_projects SET started=1 WHERE project_id=?", (project_id,))

            project_collection = project_store._get_project_collection(project_id)
            for variant_dict in project_collection.find():
                freqs = population_frequency_store.get_frequencies(variant_dict['xpos'], variant_dict['ref'], variant_dict['alt'])
                full_freqs = {'db_freqs.'+population_slug: freqs.get(population_slug, 0) for population_slug in population_slugs_to_load}
                project_collection.update({'xpos':variant_dict['xpos'], 'ref' :variant_dict['ref'], 'alt': variant_dict['alt']},
                                         {'$set': full_freqs},
                                         upsert=False)

            print("     ---> done updating project_id: %s" % project_id)
            db.execute("UPDATE all_projects SET finished=1 WHERE project_id=?", (project_id,))
Ejemplo n.º 27
0
    def handle(self, *args, **options):
        """load CADD scores for all variants in a project, or all variants in the annotator_store."""

        annotator_store = mall.get_annotator().get_annotator_datastore()
        if options['cadd_file']:
            print("Loading " + options['cadd_file'])
            load_from_cadd_file(options['cadd_file'])
        elif options['project_id']:
            print("Loading " + options['project_id'])
            project = Project.objects.get(project_id=options['project_id'])
            variant_collection = get_project_datastore(
                project)._get_project_collection(options['project_id']).find(
                    {'annotation.cadd_phred': {
                        '$exists': False
                    }})
        else:
            variant_collection = annotator_store.variants.find(
                {'annotation.cadd_phred': {
                    '$exists': False
                }})

        #print("Variant collection: " + str(variant_collection))
        #print("Annotating %s variants" % variant_collection.count())

        for r in tqdm.tqdm(
                variant_collection,
                unit=' variants'):  #, total=variant_collection.count()):
            chrom, pos = genomeloc.get_chr_pos(r['xpos'])
            cadd_phred = fetch(chrom, pos, r['ref'], r['alt'])
            if cadd_phred is not None:
                result = annotator_store.variants.update(
                    {
                        'xpos': r['xpos'],
                        'ref': r['ref'],
                        'alt': r['alt']
                    }, {'$set': {
                        'annotation.cadd_phred': cadd_phred
                    }},
                    upsert=False)
                assert result['updatedExisting']

        print("Done")
Ejemplo n.º 28
0
def mendelian_variant_search(request, project_id, family_id):

    project = get_object_or_404(Project, project_id=project_id)
    family = get_object_or_404(Family, project=project, family_id=family_id)
    if not project.can_view(request.user):
        return HttpResponse('unauthorized')

    if not family.has_data('variation'):
        return render(request, 'analysis_unavailable.html', {
            'reason': 'This family does not have any variant data.'
        })

    has_gene_search = get_project_datastore(project_id).project_collection_is_loaded(project_id)
    sys.stderr.write("Running mendelian_variant_search on %(project_id)s %(family_id)s. has_gene_search = %(has_gene_search)s\n " % locals() )
    return render(request, 'mendelian_variant_search.html', {
        'project': project, 
        'family': family,
        'family_genotype_filters_json': json.dumps(x_inheritance.get_genotype_filters(family.xfamily())),
        'has_gene_search': has_gene_search
    })
Ejemplo n.º 29
0
def get_knockouts_in_gene(project, gene_id, quality_filter=None):
    """
    Get all the variants in a gene, but filter out quality_filter genotypes
    """
    indiv_id_list = [i.indiv_id for i in project.get_individuals()]
    variant_filter = get_default_variant_filter('high_impact')
    variant_list = get_project_datastore().get_variants_in_gene(
        project.project_id,
        gene_id,
        variant_filter=variant_filter,
    )
    variant_list = search_utils.filter_gene_variants_by_variant_filter(variant_list, gene_id, variant_filter)
    variation = CohortGeneVariation(
        get_reference(),
        gene_id,
        variant_list,
        indiv_id_list,
        quality_filter={},
    )
    knockouts = get_individuals_with_inheritance('recessive', variation, indiv_id_list)
    return knockouts, variation
Ejemplo n.º 30
0
def mendelian_variant_search(request, project_id, family_id):

    project = get_object_or_404(Project, project_id=project_id)
    family = get_object_or_404(Family, project=project, family_id=family_id)
    if not project.can_view(request.user):
        raise PermissionDenied

    if not family.has_data('variation'):
        return render(request, 'analysis_unavailable.html', {
            'reason': 'This family does not have any variant data.'
        })

    has_gene_search = get_project_datastore(project_id).project_collection_is_loaded(project_id)
    gene_lists = [project_gene_list.gene_list.toJSON(details=True) for project_gene_list in ProjectGeneList.objects.filter(project=project)]
    sys.stderr.write("Running mendelian_variant_search on %(project_id)s %(family_id)s. has_gene_search = %(has_gene_search)s\n " % locals() )
    return render(request, 'mendelian_variant_search.html', {
        'gene_lists': json.dumps(gene_lists),
        'project': project,
        'family': family,
        'family_genotype_filters_json': json.dumps(x_inheritance.get_genotype_filters(family.xfamily())),
        'has_gene_search': has_gene_search
    })
Ejemplo n.º 31
0
def mendelian_variant_search(request, project_id, family_id):

    project = get_object_or_404(Project, project_id=project_id)
    family = get_object_or_404(Family, project=project, family_id=family_id)
    if not project.can_view(request.user):
        raise PermissionDenied

    if not family.has_data('variation'):
        return render(
            request, 'analysis_unavailable.html',
            {'reason': 'This family does not have any variant data.'})

    has_gene_search = get_project_datastore(
        project).project_collection_is_loaded(project)
    gene_lists = [
        project_gene_list.gene_list.toJSON(details=True)
        for project_gene_list in ProjectGeneList.objects.filter(
            project=project)
    ]
    sys.stderr.write(
        "returning mendelian_variant_search page for %(project_id)s %(family_id)s. has_gene_search = %(has_gene_search)s\n "
        % locals())
    return render(
        request, 'mendelian_variant_search.html', {
            'gene_lists':
            json.dumps(gene_lists),
            'project':
            project,
            'family':
            family,
            'family_genotype_filters_json':
            json.dumps(x_inheritance.get_genotype_filters(family.xfamily())),
            'has_gene_search':
            has_gene_search,
            'new_page_url':
            '/variant_search/family/{0}'.format(family.seqr_family.guid)
            if family.seqr_family and family.seqr_family.project.has_new_search
            else None,
        })
Ejemplo n.º 32
0
def load_project_datastore(project_id):
    """
    Load this project into the project datastore
    Which allows queries over all variants in a project
    """
    project = Project.objects.get(project_id=project_id)
    get_project_datastore().delete_project(project_id)
    get_project_datastore().add_project(project_id, project.get_reference_population_slugs())
    for vcf_file in project.get_all_vcf_files():
        project_indiv_ids = [i.indiv_id for i in project.get_individuals()]
        vcf_ids = vcf_file.sample_id_list()
        indiv_id_list = [i for i in project_indiv_ids if i in vcf_ids]
        get_project_datastore().add_variants_to_project_from_vcf(
            vcf_file.file_handle(),
            project_id,
            indiv_id_list=indiv_id_list
        )
Ejemplo n.º 33
0
def load_project_datastore(project_id, vcf_files=None, start_from_chrom=None, end_with_chrom=None):
    """
    Load this project into the project datastore
    Which allows queries over all variants in a project
    """
    if not settings.DEBUG: settings.EVENTS_COLLECTION.insert({
        'event_type': 'load_project_datastore_started',
        'date': timezone.now(),
        'project_id': project_id,
    })

    project = Project.objects.get(project_id=project_id)
    get_project_datastore(project).delete_project_store(project_id)
    get_project_datastore(project).add_project(project_id)
    for vcf_file in sorted(project.get_all_vcf_files(), key=lambda v:v.path()):
        vcf_file_path = vcf_file.path()
        if vcf_files is not None and vcf_file_path not in vcf_files:
            print("Skipping - %(vcf_file_path)s is not in %(vcf_files)s" % locals())
        project_indiv_ids = [i.indiv_id for i in project.get_individuals()]
        vcf_ids = vcf_file.sample_id_list()
        indiv_id_list = [i for i in project_indiv_ids if i in vcf_ids]
        get_project_datastore(project).add_variants_to_project_from_vcf(
            vcf_file.file_handle(),
            project_id,
            indiv_id_list=indiv_id_list,
            start_from_chrom=start_from_chrom,
            end_with_chrom=end_with_chrom
        )

    get_project_datastore(project).set_project_collection_to_loaded(project_id)

    if not settings.DEBUG: settings.EVENTS_COLLECTION.insert({
        'event_type': 'load_project_datastore_finished',
        'date': timezone.now(),
        'project_id': project_id
    })
Ejemplo n.º 34
0
def get_knockouts_in_gene(project, gene_id, quality_filter=None):
    """
    Get all the variants in a gene, but filter out quality_filter genotypes
    """
    indiv_id_list = [i.indiv_id for i in project.get_individuals()]

    # filter out variants > 0.01 AF in any of the reference populations
    reference_populations = mall.get_annotator().reference_population_slugs
    variant_filter = get_default_variant_filter('moderate_impact', reference_populations)
    variant_list = get_project_datastore(project.project_id).get_project_variants_in_gene(
        project.project_id,
        gene_id,
        variant_filter=variant_filter,
    )
    variant_list = search_utils.filter_gene_variants_by_variant_filter(variant_list, gene_id, variant_filter)
    variation = CohortGeneVariation(
        get_reference(),
        gene_id,
        variant_list,
        indiv_id_list,
        quality_filter={},
    )
    knockouts = get_individuals_with_inheritance('recessive', variation, indiv_id_list)
    return knockouts, variation
Ejemplo n.º 35
0
def get_knockouts_in_gene(project, gene_id, quality_filter=None):
    """
    Get all the variants in a gene, but filter out quality_filter genotypes
    """
    indiv_id_list = [i.indiv_id for i in project.get_individuals()]
    variant_filter = get_default_variant_filter('high_impact')
    variant_list = get_project_datastore(
        project.project_id).get_project_variants_in_gene(
            project.project_id,
            gene_id,
            variant_filter=variant_filter,
        )
    variant_list = search_utils.filter_gene_variants_by_variant_filter(
        variant_list, gene_id, variant_filter)
    variation = CohortGeneVariation(
        get_reference(),
        gene_id,
        variant_list,
        indiv_id_list,
        quality_filter={},
    )
    knockouts = get_individuals_with_inheritance('recessive', variation,
                                                 indiv_id_list)
    return knockouts, variation
    def transfer_project(self, from_project_id, destination_project_id):
        print("From: " + from_project_id)
        print("To: " + destination_project_id)

        from_project = Project.objects.get(project_id=from_project_id)
        destination_project = Project.objects.get(
            project_id=destination_project_id)

        # Make sure individuals are the same
        indivs_missing_from_dest_project = (set([
            i.indiv_id for i in Individual.objects.filter(project=from_project)
        ]) - set([
            i.indiv_id
            for i in Individual.objects.filter(project=destination_project)
        ]))
        if indivs_missing_from_dest_project:
            raise Exception("Individuals missing from dest project: " +
                            str(indivs_missing_from_dest_project))

        # update VCFs
        vcfs = from_project.families_by_vcf().keys()
        for vcf_file_path in vcfs:
            vcf_file = VCFFile.objects.get_or_create(
                file_path=os.path.abspath(vcf_file_path))[0]
            sample_management.add_vcf_file_to_project(destination_project,
                                                      vcf_file)
            print("Added %s to project %s" %
                  (vcf_file, destination_project.project_id))

        families_db = get_datastore(from_project_id)._db
        projects_db = get_project_datastore(from_project_id)._db

        print("==========")
        print("Checking Projects:")
        check_that_exists(projects_db.projects,
                          {'project_id': from_project_id},
                          not_more_than_one=True)
        check_that_exists(projects_db.projects,
                          {'project_id': destination_project_id},
                          not_more_than_one=True)
        print("==========")
        print("Checking Families:")
        check_that_exists(families_db.families,
                          {'project_id': from_project_id},
                          not_more_than_one=False)
        check_that_exists(families_db.families,
                          {'project_id': destination_project_id},
                          not_more_than_one=False)

        print("==========")
        print("Make Updates:")
        result = update(
            projects_db.projects, {'project_id': destination_project_id}, {
                'project_id': destination_project_id + '_previous1',
                'version': '1'
            })
        result = update(projects_db.projects, {'project_id': from_project_id},
                        {
                            'project_id': destination_project_id,
                            'version': '2'
                        })
        result = update(
            families_db.families, {'project_id': destination_project_id}, {
                'project_id': destination_project_id + '_previous1',
                'version': '1'
            })
        result = update(families_db.families, {'project_id': from_project_id},
                        {
                            'project_id': destination_project_id,
                            'version': '2'
                        })

        print("==========")
        print("Checking Projects:")
        check_that_exists(projects_db.projects,
                          {'project_id': destination_project_id},
                          not_more_than_one=True)

        print("==========")
        print("Checking Families:")
        check_that_exists(families_db.families,
                          {'project_id': destination_project_id},
                          not_more_than_one=False)

        update_family_analysis_status(destination_project_id)

        print("Data transfer finished.")
        i = raw_input("Delete the 'from' project: %s? [Y/n] " %
                      from_project_id)
        if i.strip() == 'Y':
            sample_management.delete_project(from_project_id)
            print("Project %s deleted" % from_project_id)
        else:
            print("Project not deleted")
Ejemplo n.º 37
0
    def search_for_genes(self,
                         gene_or_variant_ids,
                         project_id_list,
                         output_filename,
                         max_af=0.01,
                         knockouts=False,
                         in_clinvar_only=False,
                         include_non_coding=False):
        """
        Search for a gene across project(s)

        Args:
            gene_or_variant_ids (list): 'ENSG..' gene id strings.
            project_id_list (list): (optional) project ids to narrow down the search
            output_filename (string): output file name
            max_af (float): AF filter
            in_clinvar_only (bool):
            include_non_coding (bool):
        """

        projects = [
            Project.objects.get(project_id=project_id)
            for project_id in project_id_list
        ]

        outfile = open(output_filename, 'w')

        header = [
            "project_id", "gene", "chr", "pos", "ref", "alt", "rsID", "filter",
            "impact", "HGVS.c", "HGVS.p", "sift", "polyphen", "muttaster",
            "fathmm", "clinvar_id", "clinvar_clinical_sig",
            "freq_1kg_wgs_phase3", "freq_1kg_wgs_phase3_popmax",
            "freq_exac_v3", "freq_exac_v3_popmax", "gnomad-exomes",
            "gnomad-genomes", "families", "all_genotypes"
        ]

        writer = csv.writer(outfile, delimiter='\t')
        writer.writerow(header)

        # all rare coding variants
        if not knockouts:
            variant_filter = get_default_variant_filter(
                'all_coding',
                mall.get_annotator().reference_population_slugs)
            #variant_filter.set_max_AF(max_af)
            if include_non_coding:
                variant_filter.so_annotations = []
            print("All Filters: ")
            pprint(variant_filter.toJSON())

        #print("Max AF threshold: %s" % max_af)
        print("Starting search for:\n%s\nin projects:\n%s\n" %
              (", ".join(gene_or_variant_ids), ", ".join(
                  [p.project_id for p in projects])))

        for project in projects:
            project_id = project.project_id
            if get_project_datastore(project).project_collection_is_loaded(
                    project):
                print("=====================")
                print("Searching project %s" % project_id)
            else:
                print(
                    "Skipping project %s - gene search is not enabled for this project"
                    % project_id)
                continue

            indiv_cache = {}
            for gene_or_variant_id in gene_or_variant_ids:
                chrom_pos_match = re.match("([0-9XY]{1,2})-([0-9]{1,9})",
                                           gene_or_variant_id)
                chrom_pos_ref_alt_match = re.match(
                    "([0-9XY]{1,2})-([0-9]{1,9})-([ACTG]+)-([ACTG]+)",
                    gene_or_variant_id)

                if chrom_pos_match or chrom_pos_ref_alt_match:
                    chrom = chrom_pos_match.group(1)
                    pos = int(chrom_pos_match.group(2))
                    xpos = genomeloc.get_xpos(chrom, pos)
                    ref = alt = None
                    if chrom_pos_ref_alt_match:
                        ref = chrom_pos_ref_alt_match.group(3)
                        alt = chrom_pos_ref_alt_match.group(4)

                    variant = get_project_datastore(
                        project).get_single_variant(project.project_id, None,
                                                    xpos, ref, alt)
                    if variant is None:
                        continue
                    variants = [variant]
                    print("-- searching %s for variant %s-%s-%s: found %s" %
                          (project_id, xpos, ref, alt, variant))
                    worst_annotation_idx = variant.annotation[
                        'worst_vep_annotation_index']
                    print(variant.annotation["vep_annotation"]
                          [worst_annotation_idx])
                    gene_id = variant.annotation["vep_annotation"][
                        worst_annotation_idx]['gene_id']
                    gene = get_reference().get_gene(gene_id)
                else:
                    gene_id = get_gene_id_from_str(gene_or_variant_id,
                                                   get_reference())
                    gene = get_reference().get_gene(gene_id)
                    print("-- searching %s for gene %s (%s)" %
                          (project_id, gene["symbol"], gene_id))

                    if knockouts:
                        knockout_ids, variation = project_analysis.get_knockouts_in_gene(
                            project, gene_id)
                        variants = variation.get_relevant_variants_for_indiv_ids(
                            knockout_ids)
                    else:
                        variants = project_analysis.get_variants_in_gene(
                            project, gene_id, variant_filter=variant_filter)

                for variant in variants:
                    if not chrom_pos_match and not chrom_pos_ref_alt_match and max(
                            variant.annotation['freqs'].values()) >= max_af:
                        continue

                    add_extra_info_to_variants_project(get_reference(),
                                                       project, [variant])
                    worst_annotation_idx = variant.annotation[
                        "worst_vep_index_per_gene"].get(gene_id)

                    if worst_annotation_idx is not None:
                        worst_annotation = variant.annotation[
                            "vep_annotation"][worst_annotation_idx]
                    else:
                        worst_annotation = None
                    all_genotypes_list = []
                    pass_filter = "N/A"
                    family_ids = set()
                    for indiv_id, genotype in variant.genotypes.items():
                        if indiv_id in indiv_cache:
                            individual = indiv_cache[indiv_id]
                            if individual == 'deleted':
                                continue
                        else:
                            try:
                                individual = Individual.objects.get(
                                    project=project, indiv_id=indiv_id)
                                indiv_cache[indiv_id] = individual
                            except ObjectDoesNotExist:
                                # this can happen when an individual is deleted from the project - from postgres, but not from mong
                                indiv_cache[indiv_id] = 'deleted'
                                continue
                            except MultipleObjectsReturned:
                                # when several families have an individual with the same id
                                individuals = Individual.objects.filter(
                                    project=project, indiv_id=indiv_id)
                                individual = individuals[0]
                                indiv_cache[indiv_id] = individual

                        pass_filter = genotype.filter  # filter value is stored in the genotypes even though it's the same for all individuals
                        if genotype.num_alt > 0:
                            family_ids.add(individual.family.family_id)
                            all_genotypes_list.append(
                                "%s/%s%s[gt:%s GQ:%s AB:%0.3f]" %
                                (individual.family.family_id, indiv_id,
                                 "[Affected]" if individual.affected == "A"
                                 else ("[-]" if individual.affected == "N" else
                                       "[?]"), ">".join(genotype.alleles),
                                 genotype.gq, genotype.ab
                                 if genotype.ab is not None else float('NaN')))

                    if len(all_genotypes_list) == 0:
                        continue

                    measureset_id, clinvar_significance = get_reference(
                    ).get_clinvar_info(*variant.unique_tuple())
                    if in_clinvar_only and (
                            not clinvar_significance
                            or "path" not in clinvar_significance.lower()):
                        continue

                    row = map(str, [
                        project_id,
                        gene,
                        variant.chr,
                        variant.pos,
                        variant.ref,
                        variant.alt,
                        variant.vcf_id or "",
                        pass_filter,
                        variant.annotation.get("vep_consequence", ""),
                        worst_annotation.get("hgvsc", "")
                        if worst_annotation else "",
                        (worst_annotation.get("hgvsp", "") or "").replace(
                            "%3D", "=") if worst_annotation else "",
                        worst_annotation.get("sift", "")
                        if worst_annotation else "",
                        worst_annotation.get("polyphen", "")
                        if worst_annotation else "",
                        worst_annotation.get("mutationtaster_pred", "")
                        if worst_annotation else "",
                        ";".join(
                            set(
                                worst_annotation.get("fathmm_pred",
                                                     "").split('%3B')))
                        if worst_annotation else "",
                        measureset_id,
                        clinvar_significance,
                        variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                        variant.annotation["freqs"].get(
                            "1kg_wgs_phase3_popmax", ""),
                        variant.annotation["freqs"].get("exac_v3", ""),
                        variant.annotation["freqs"].get("exac_v3_popmax", ""),
                        variant.annotation["freqs"].get("gnomad-exomes2", ""),
                        variant.annotation["freqs"].get("gnomad-genomes2", ""),
                        ", ".join(sorted(list(family_ids))),
                        ", ".join(all_genotypes_list),
                    ])

                    writer.writerow(row)

        outfile.close()
        print("Wrote out %s" % output_filename)
Ejemplo n.º 38
0
def gene_quicklook(request, project_id, gene_id):
    """
    Summary of a gene in a project
    """
    project = get_object_or_404(Project, project_id=project_id)
    if not project.can_view(request.user):
        return HttpResponse("Unauthorized")

    if project.project_status == Project.NEEDS_MORE_PHENOTYPES and not request.user.is_staff:
        return render(request, 'analysis_unavailable.html',
                      {'reason': 'Awaiting phenotype data.'})

    # other projects this user can view
    if request.user.is_staff:
        other_projects = [p for p in Project.objects.all()]  #  if p != project
    else:
        other_projects = [
            c.project
            for c in ProjectCollaborator.objects.filter(user=request.user)
        ]  # if c.project != project

    other_projects = filter(
        lambda p: get_project_datastore(p.project_id).
        project_collection_is_loaded(p.project_id), other_projects)

    if other_projects:
        other_projects_json = json.dumps([{
            'project_id': p.project_id,
            'project_name': p.project_name
        } for p in sorted(other_projects, key=lambda p: p.project_id)])
    else:
        other_projects_json = None

    if gene_id is None:
        return render(
            request, 'project/gene_quicklook.html', {
                'project': project,
                'gene': None,
                'gene_json': None,
                'rare_variants_json': None,
                'individuals_json': None,
                'knockouts_json': None,
                'other_projects_json': other_projects_json,
            })

    projects_to_search_param = request.GET.get('selected_projects')
    if projects_to_search_param:
        projects_to_search = []
        project_ids = projects_to_search_param.split(",")
        for project_id in project_ids:
            project = get_object_or_404(Project, project_id=project_id)
            if not project.can_view(request.user):
                return HttpResponse("Unauthorized")
            projects_to_search.append(project)
    else:
        projects_to_search = [project]

    gene_id = get_gene_id_from_str(gene_id, get_reference())
    gene = get_reference().get_gene(gene_id)
    sys.stderr.write(
        project_id + " - staring gene search for: %s in projects: %s\n" %
        (gene_id, ",".join([p.project_id for p in projects_to_search]) + "\n"))

    # all rare coding variants
    variant_filter = get_default_variant_filter(
        'all_coding',
        mall.get_annotator().reference_population_slugs)

    indiv_id_to_project_id = {}
    rare_variant_dict = {}
    rare_variants = []
    for project in projects_to_search:
        project_variants = []
        for variant in project_analysis.get_variants_in_gene(
                project, gene_id, variant_filter=variant_filter):
            max_af = max(variant.annotation['freqs'].values())
            if not any([
                    indiv_id
                    for indiv_id, genotype in variant.genotypes.items()
                    if genotype.num_alt > 0
            ]):
                continue
            if max_af >= .01:
                continue

            # add project id to genotypes
            for indiv_id in variant.genotypes:
                indiv_id_to_project_id[indiv_id] = project.project_id

            # save this variant (or just the genotypes from this variant if the variant if it's been seen already in another project)
            variant_id = "%s-%s-%s-%s" % (variant.chr, variant.pos,
                                          variant.ref, variant.alt)
            if variant_id not in rare_variant_dict:
                rare_variant_dict[variant_id] = variant
                project_variants.append(variant)
            else:
                rare_variant_dict[variant_id].genotypes.update(
                    variant.genotypes)

        #sys.stderr.write("gene_id: %s, variant: %s\n" % (gene_id, variant.toJSON()['annotation']['vep_annotation']))
        add_extra_info_to_variants_project(get_reference(), project,
                                           project_variants)
        rare_variants.extend(project_variants)
    sys.stderr.write("Retreived %s rare variants\n" % len(rare_variants))

    # compute knockout individuals
    individ_ids_and_variants = []
    for project in projects_to_search:
        knockout_ids, variation = get_knockouts_in_gene(project, gene_id)
        for indiv_id in knockout_ids:
            variants = variation.get_relevant_variants_for_indiv_ids(
                [indiv_id])
            add_extra_info_to_variants_project(get_reference(), project,
                                               variants)
            individ_ids_and_variants.append({
                'indiv_id': indiv_id,
                'variants': variants,
            })
            #sys.stderr.write("%s : %s: Retrieved %s knockout variants\n" % (project.project_id, indiv_id, len(variants), ))

    download_csv = request.GET.get('download', '')
    if download_csv:
        response = HttpResponse(content_type='text/csv')
        response[
            'Content-Disposition'] = 'attachment; filename="{}_{}.csv"'.format(
                download_csv, gene["transcript_name"])

        if download_csv == 'knockouts':

            individuals_to_include = [
                individ_id_and_variants["indiv_id"]
                for individ_id_and_variants in individ_ids_and_variants
            ]

            rows = []
            for individ_id_and_variants in individ_ids_and_variants:
                rare_variants = individ_id_and_variants["variants"]
                for variant in rare_variants:
                    worst_annotation_idx = variant.annotation[
                        "worst_vep_index_per_gene"][gene_id]
                    worst_annotation = variant.annotation["vep_annotation"][
                        worst_annotation_idx]
                    genotypes = []
                    all_genotypes_string = ""
                    for indiv_id in individuals_to_include:
                        if indiv_id in variant.genotypes and variant.genotypes[
                                indiv_id].num_alt > 0:
                            genotype = variant.genotypes[indiv_id]
                            allele_string = ">".join(genotype.alleles)
                            all_genotypes_string += indiv_id + ":" + allele_string + "  "
                            genotypes.append(allele_string + "   (" +
                                             str(genotype.gq) + ")")
                        else:
                            genotypes.append("")

                    measureset_id, clinvar_significance = settings.CLINVAR_VARIANTS.get(
                        variant.unique_tuple(), ("", ""))

                    rows.append(
                        map(str, [
                            gene["symbol"],
                            variant.chr,
                            variant.pos,
                            variant.ref,
                            variant.alt,
                            variant.vcf_id or "",
                            variant.annotation.get("vep_consequence", ""),
                            worst_annotation.get("hgvsc", ""),
                            worst_annotation.get("hgvsp", "").replace(
                                "%3D", "="),
                            worst_annotation.get("sift", ""),
                            worst_annotation.get("polyphen", ""),
                            worst_annotation.get("mutationtaster_pred", ""),
                            ";".join(
                                set(
                                    worst_annotation.get("fathmm_pred",
                                                         "").split('%3B'))),
                            measureset_id,
                            clinvar_significance,
                            variant.annotation["freqs"].get(
                                "1kg_wgs_phase3", ""),
                            variant.annotation["freqs"].get(
                                "1kg_wgs_phase3_popmax", ""),
                            variant.annotation["freqs"].get("exac_v3", ""),
                            variant.annotation["freqs"].get(
                                "exac_v3_popmax", ""),
                            all_genotypes_string,
                        ] + genotypes))
        elif download_csv == 'rare_variants':
            individuals_to_include = []
            for variant in rare_variants:
                for indiv_id, genotype in variant.genotypes.items():
                    if genotype.num_alt > 0 and indiv_id not in individuals_to_include:
                        individuals_to_include.append(indiv_id)
            rows = []
            for variant in rare_variants:
                worst_annotation_idx = variant.annotation[
                    "worst_vep_index_per_gene"][gene_id]
                worst_annotation = variant.annotation["vep_annotation"][
                    worst_annotation_idx]
                genotypes = []
                all_genotypes_string = ""
                for indiv_id in individuals_to_include:
                    if indiv_id in variant.genotypes and variant.genotypes[
                            indiv_id].num_alt > 0:
                        genotype = variant.genotypes[indiv_id]
                        allele_string = ">".join(genotype.alleles)
                        all_genotypes_string += indiv_id + ":" + allele_string + "  "
                        genotypes.append(allele_string + "   (" +
                                         str(genotype.gq) + ")")
                    else:
                        genotypes.append("")

                measureset_id, clinvar_significance = settings.CLINVAR_VARIANTS.get(
                    variant.unique_tuple(), ("", ""))
                rows.append(
                    map(str, [
                        gene["symbol"],
                        variant.chr,
                        variant.pos,
                        variant.ref,
                        variant.alt,
                        variant.vcf_id or "",
                        variant.annotation.get("vep_consequence", ""),
                        worst_annotation.get("hgvsc", ""),
                        worst_annotation.get("hgvsp", "").replace("%3D", "="),
                        worst_annotation.get("sift", ""),
                        worst_annotation.get("polyphen", ""),
                        worst_annotation.get("mutationtaster_pred", ""),
                        ";".join(
                            set(
                                worst_annotation.get("fathmm_pred",
                                                     "").split('%3B'))),
                        measureset_id,
                        clinvar_significance,
                        variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                        variant.annotation["freqs"].get(
                            "1kg_wgs_phase3_popmax", ""),
                        variant.annotation["freqs"].get("exac_v3", ""),
                        variant.annotation["freqs"].get("exac_v3_popmax", ""),
                        all_genotypes_string,
                    ] + genotypes))

        header = [
            "gene", "chr", "pos", "ref", "alt", "rsID", "impact", "HGVS.c",
            "HGVS.p", "sift", "polyphen", "muttaster", "fathmm", "clinvar_id",
            "clinvar_clinical_sig", "freq_1kg_wgs_phase3",
            "freq_1kg_wgs_phase3_popmax", "freq_exac_v3",
            "freq_exac_v3_popmax", "all_genotypes"
        ] + list(
            map(lambda i: i + " (from %s)" % indiv_id_to_project_id[i],
                individuals_to_include))

        writer = csv.writer(response)
        writer.writerow(header)
        for row in rows:
            writer.writerow(row)
        return response
    else:
        for individ_id_and_variants in individ_ids_and_variants:
            variants = individ_id_and_variants["variants"]
            individ_id_and_variants["variants"] = [
                v.toJSON() for v in variants
            ]

        return render(
            request, 'project/gene_quicklook.html', {
                'gene':
                gene,
                'gene_json':
                json.dumps(gene),
                'project':
                project,
                'rare_variants_json':
                json.dumps([v.toJSON() for v in rare_variants]),
                'individuals_json':
                json.dumps([
                    i.get_json_obj() for project in projects_to_search
                    for i in project.get_individuals()
                ]),
                'knockouts_json':
                json.dumps(individ_ids_and_variants),
                'other_projects_json':
                other_projects_json,
            })
Ejemplo n.º 39
0
    def handle(self, *args, **options):
        project_id = options['project_id']
        print("Loading data into project: " + project_id)
        project = Project.objects.get(project_id=project_id)

        cnv_filename = options['cnv_filename']
        bed_files_directory = options['bed_files_directory']

        if not os.path.isfile(cnv_filename):
            raise ValueError("CNV file %s doesn't exist" %
                             options['cnv_filename'])

        with open(cnv_filename) as f:
            header_fields = f.readline().rstrip('\n').split('\t')
            for line in f:
                fields = line.rstrip('\n').split('\t')
                row_dict = dict(zip(header_fields, fields))

                chrom = "chr" + row_dict['chr']
                start = int(row_dict['start'])
                end = int(row_dict['end'])
                #left_overhang = int(row_dict['left_overhang_start'])
                #right_overhang = int(row_dict['right_overhang_end'])

                sample_id = row_dict['sample']
                try:
                    i = Individual.objects.get(project=project,
                                               indiv_id__istartswith=sample_id)
                except Exception as e:
                    print("WARNING: %s: %s not found in %s" %
                          (e, sample_id, project))
                    continue

                bed_file_path = os.path.join(bed_files_directory,
                                             "%s.bed" % sample_id)
                if not os.path.isfile(bed_file_path):
                    print("WARNING: .bed file not found: " + bed_file_path)

                    if i.cnv_bed_file != bed_file_path:
                        print("Setting cnv_bed_file path to %s" %
                              bed_file_path)
                        i.cnv_bed_file = bed_file_path
                        i.save()

                project_collection = get_project_datastore(
                    project)._get_project_collection(project_id)
                family_collection = get_mall(
                    project).variant_store._get_family_collection(
                        project_id, i.family.family_id)

                for collection in filter(
                        None, [project_collection, family_collection]):

                    collection.update_many(
                        {
                            '$and': [{
                                'xpos': {
                                    '$gte':
                                    genomeloc.get_single_location(
                                        chrom, start)
                                }
                            }, {
                                'xpos': {
                                    '$lte':
                                    genomeloc.get_single_location(chrom, end)
                                }
                            }]
                        }, {
                            '$set': {
                                'genotypes.%s.extras.cnvs' % i.indiv_id:
                                row_dict
                            }
                        })

                    #result = list(collection.find({'$and' : [
                    #       {'xpos': {'$gte':  genomeloc.get_single_location(chrom, start)}},
                    #       {'xpos' :{'$lte': genomeloc.get_single_location(chrom, end)}}]},
                    #   {'genotypes.%s.extras.cnvs' % i.indiv_id :1 }))
                    #print(chrom, start, end, len(result), result[0] if result else None)

        print("Done")
Ejemplo n.º 40
0
    def search_for_genes(self,
                         gene_ids,
                         project_id_list,
                         output_filename,
                         max_af=0.01):
        """
        Search for a gene across project(s)

        Args:
            gene_ids (list): 'ENSG..' gene id strings.
            project_id_list (list): (optional) project ids to narrow down the search
            output_filename (string): output file name
            max_af (float): AF filter
        """

        outfile = open(output_filename, 'w')

        header = [
            "project_id", "gene", "chr", "pos", "ref", "alt", "rsID", "filter",
            "impact", "HGVS.c", "HGVS.p", "sift", "polyphen", "muttaster",
            "fathmm", "clinvar_id", "clinvar_clinical_sig",
            "freq_1kg_wgs_phase3", "freq_1kg_wgs_phase3_popmax",
            "freq_exac_v3", "freq_exac_v3_popmax", "all_genotypes"
        ]

        writer = csv.writer(outfile, delimiter='\t')
        writer.writerow(header)

        # all rare coding variants
        variant_filter = get_default_variant_filter(
            'all_coding',
            mall.get_annotator().reference_population_slugs)
        print("All Filters: ")
        pprint(variant_filter.toJSON())

        if project_id_list:
            projects = [
                Project.objects.get(project_id=project_id)
                for project_id in project_id_list
            ]
        else:
            projects = Project.objects.all()

        print("Max AF threshold: %s" % max_af)
        print("Staring gene search for:\n%s\nin projects:\n%s\n" %
              (", ".join(gene_ids), ", ".join([p.project_id
                                               for p in projects])))

        indiv_id_cache = {}
        for project in projects:
            project_id = project.project_id
            if get_project_datastore(project_id).project_collection_is_loaded(
                    project_id):
                print("=====================")
                print("Searching project %s" % project_id)
            else:
                print(
                    "Skipping project %s - gene search is not enabled for this project"
                    % project_id)
                continue

            for gene_id in gene_ids:
                gene_id = get_gene_id_from_str(gene_id, get_reference())

                gene = get_reference().get_gene(gene_id)
                print("-- searching %s for gene %s (%s)" %
                      (project_id, gene["symbol"], gene_id))

                for variant in project_analysis.get_variants_in_gene(
                        project, gene_id, variant_filter=variant_filter):
                    if max(variant.annotation['freqs'].values()) >= max_af:
                        continue

                    add_extra_info_to_variants_project(get_reference(),
                                                       project, [variant])

                    worst_annotation_idx = variant.annotation[
                        "worst_vep_index_per_gene"][gene_id]
                    worst_annotation = variant.annotation["vep_annotation"][
                        worst_annotation_idx]
                    all_genotypes_list = []
                    pass_filter = "N/A"
                    for indiv_id, genotype in variant.genotypes.items():
                        individual = indiv_id_cache.get('indiv_id')
                        if individual is None:
                            individual = Individual.objects.get(
                                project=project, indiv_id=indiv_id)
                            indiv_id_cache[indiv_id] = individual

                        pass_filter = genotype.filter  # filter value is stored in the genotypes even though it's the same for all individuals
                        if genotype.num_alt > 0:
                            all_genotypes_list.append(
                                "%s%s[gt:%s GQ:%s AB:%0.3f]" %
                                (indiv_id, "[Affected]"
                                 if individual.affected == "A" else
                                 ("[-]" if individual.affected == "N" else
                                  "[?]"), ">".join(genotype.alleles),
                                 genotype.gq, genotype.ab
                                 if genotype.ab is not None else float('NaN')))

                    measureset_id, clinvar_significance = get_clinvar_variants(
                    ).get(variant.unique_tuple(), ("", ""))
                    row = map(str, [
                        project_id,
                        gene["symbol"],
                        variant.chr,
                        variant.pos,
                        variant.ref,
                        variant.alt,
                        variant.vcf_id or "",
                        pass_filter,
                        variant.annotation.get("vep_consequence", ""),
                        worst_annotation.get("hgvsc", ""),
                        worst_annotation.get("hgvsp", "").replace("%3D", "="),
                        worst_annotation.get("sift", ""),
                        worst_annotation.get("polyphen", ""),
                        worst_annotation.get("mutationtaster_pred", ""),
                        ";".join(
                            set(
                                worst_annotation.get("fathmm_pred",
                                                     "").split('%3B'))),
                        measureset_id,
                        clinvar_significance,
                        variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                        variant.annotation["freqs"].get(
                            "1kg_wgs_phase3_popmax", ""),
                        variant.annotation["freqs"].get("exac_v3", ""),
                        variant.annotation["freqs"].get("exac_v3_popmax", ""),
                        ", ".join(all_genotypes_list),
                    ])
                    writer.writerow(row)

        outfile.close()
        print("Wrote out %s" % output_filename)
Ejemplo n.º 41
0
    def search_for_gene(self, search_gene_id, project_id_list, max_af=0.01):
      '''
        Search for a gene across project(s)
        Args:
          1. search_gene_id: Gene ID to search for
          2. proj_list: An optional list of projects to narrow down search to
      '''
      gene_id = get_gene_id_from_str(search_gene_id, get_reference())
      gene = get_reference().get_gene(gene_id)
      
      print("Staring gene search for: %s %s in projects: %s\n" % (search_gene_id, gene['gene_id'], ", ".join(project_id_list)))
      print("Max AF threshold: %s" % max_af)

      # all rare coding variants
      variant_filter = get_default_variant_filter('all_coding', mall.get_annotator().reference_population_slugs)
      print("All Filters: ")
      pprint(variant_filter.toJSON())

      output_filename = 'results_'+search_gene_id + '.tsv'
      outfile = open(output_filename,'w')

      header = ["project_id","gene", "chr", "pos", "ref", "alt", "rsID", "filter", "impact",
                "HGVS.c", "HGVS.p", "sift", "polyphen", "muttaster", "fathmm", "clinvar_id", "clinvar_clinical_sig",
                "freq_1kg_wgs_phase3", "freq_1kg_wgs_phase3_popmax",
                "freq_exac_v3", "freq_exac_v3_popmax",
                "all_genotypes"]

      
      writer = csv.writer(outfile,delimiter='\t')
      writer.writerow(header)
      
      if project_id_list: 
          for project_id in project_id_list:
              project = Project.objects.filter(project_id=project_id)[0]  # TODO validate
      else:
          project_id_list = [p.project_id for p in Project.objects.all()]
      
      for project_id in project_id_list:
          project = Project.objects.filter(project_id=project_id)[0]
          if get_project_datastore(project_id).project_collection_is_loaded(project_id):
              print("Running on project %s" % project_id)
          else:
              print("Skipping project %s - gene search is not enabled for this project" % project_id)
              continue

          for variant in project_analysis.get_variants_in_gene(project, gene_id, variant_filter=variant_filter):
              if max(variant.annotation['freqs'].values()) >= max_af:
                  continue
              #pprint(variant.toJSON())
              add_extra_info_to_variants_project(get_reference(), project, [variant])

              worst_annotation_idx = variant.annotation["worst_vep_index_per_gene"][gene_id]
              worst_annotation = variant.annotation["vep_annotation"][worst_annotation_idx]
              all_genotypes_list = []
              pass_filter = "N/A"
              for indiv_id, genotype in variant.genotypes.items():
                  pass_filter = genotype.filter  # filter value is stored in the genotypes even though it's the same for all individuals
                  if genotype.num_alt > 0:
                    all_genotypes_list.append("%s[gt:%s GQ:%s AB:%0.3f]" % (indiv_id, ">".join(genotype.alleles), genotype.gq, genotype.ab if genotype.ab is not None else float('NaN')))

              measureset_id, clinvar_significance = settings.CLINVAR_VARIANTS.get(variant.unique_tuple(), ("", ""))
              row = map(str,
                  [project_id, 
                    gene["symbol"],
                    variant.chr,
                    variant.pos,
                    variant.ref,
                    variant.alt,
                    variant.vcf_id or "",
                    pass_filter,
                    variant.annotation.get("vep_consequence", ""),
                    worst_annotation.get("hgvsc", ""),
                    worst_annotation.get("hgvsp", "").replace("%3D", "="),
                    worst_annotation.get("sift", ""),
                    worst_annotation.get("polyphen", ""),
                    worst_annotation.get("mutationtaster_pred", ""),
                    ";".join(set(worst_annotation.get("fathmm_pred", "").split('%3B'))),
                    measureset_id,
                    clinvar_significance,
                    variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                    variant.annotation["freqs"].get("1kg_wgs_phase3_popmax", ""),
                    variant.annotation["freqs"].get("exac_v3", ""),
                    variant.annotation["freqs"].get("exac_v3_popmax", ""),
                    ", ".join(all_genotypes_list),
                  ])
              writer.writerow(row)
      
      outfile.close()        
      print("Wrote out %s" % output_filename)
Ejemplo n.º 42
0
    def search_for_gene(self, search_gene_id, project_id_list, max_af=0.01):
        '''
        Search for a gene across project(s)
        Args:
          1. search_gene_id: Gene ID to search for
          2. proj_list: An optional list of projects to narrow down search to
      '''
        gene_id = get_gene_id_from_str(search_gene_id, get_reference())
        gene = get_reference().get_gene(gene_id)

        print("Staring gene search for: %s %s in projects: %s\n" %
              (search_gene_id, gene['gene_id'], ", ".join(project_id_list)))
        print("Max AF threshold: %s" % max_af)

        # all rare coding variants
        variant_filter = get_default_variant_filter(
            'all_coding',
            mall.get_annotator().reference_population_slugs)
        print("All Filters: ")
        pprint(variant_filter.toJSON())

        output_filename = 'results_' + search_gene_id + '.tsv'
        outfile = open(output_filename, 'w')

        header = [
            "project_id", "gene", "chr", "pos", "ref", "alt", "rsID", "filter",
            "impact", "HGVS.c", "HGVS.p", "sift", "polyphen", "muttaster",
            "fathmm", "clinvar_id", "clinvar_clinical_sig",
            "freq_1kg_wgs_phase3", "freq_1kg_wgs_phase3_popmax",
            "freq_exac_v3", "freq_exac_v3_popmax", "all_genotypes"
        ]

        writer = csv.writer(outfile, delimiter='\t')
        writer.writerow(header)

        if project_id_list:
            for project_id in project_id_list:
                project = Project.objects.filter(
                    project_id=project_id)[0]  # TODO validate
        else:
            project_id_list = [p.project_id for p in Project.objects.all()]

        for project_id in project_id_list:
            project = Project.objects.filter(project_id=project_id)[0]
            if get_project_datastore(project_id).project_collection_is_loaded(
                    project_id):
                print("Running on project %s" % project_id)
            else:
                print(
                    "Skipping project %s - gene search is not enabled for this project"
                    % project_id)
                continue

            for variant in project_analysis.get_variants_in_gene(
                    project, gene_id, variant_filter=variant_filter):
                if max(variant.annotation['freqs'].values()) >= max_af:
                    continue
                #pprint(variant.toJSON())
                add_extra_info_to_variants_project(get_reference(), project,
                                                   [variant])

                worst_annotation_idx = variant.annotation[
                    "worst_vep_index_per_gene"][gene_id]
                worst_annotation = variant.annotation["vep_annotation"][
                    worst_annotation_idx]
                all_genotypes_list = []
                pass_filter = "N/A"
                for indiv_id, genotype in variant.genotypes.items():
                    pass_filter = genotype.filter  # filter value is stored in the genotypes even though it's the same for all individuals
                    if genotype.num_alt > 0:
                        all_genotypes_list.append(
                            "%s[gt:%s GQ:%s AB:%0.3f]" %
                            (indiv_id, ">".join(
                                genotype.alleles), genotype.gq, genotype.ab
                             if genotype.ab is not None else float('NaN')))

                measureset_id, clinvar_significance = settings.CLINVAR_VARIANTS.get(
                    variant.unique_tuple(), ("", ""))
                row = map(str, [
                    project_id,
                    gene["symbol"],
                    variant.chr,
                    variant.pos,
                    variant.ref,
                    variant.alt,
                    variant.vcf_id or "",
                    pass_filter,
                    variant.annotation.get("vep_consequence", ""),
                    worst_annotation.get("hgvsc", ""),
                    worst_annotation.get("hgvsp", "").replace("%3D", "="),
                    worst_annotation.get("sift", ""),
                    worst_annotation.get("polyphen", ""),
                    worst_annotation.get("mutationtaster_pred", ""),
                    ";".join(
                        set(
                            worst_annotation.get("fathmm_pred",
                                                 "").split('%3B'))),
                    measureset_id,
                    clinvar_significance,
                    variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                    variant.annotation["freqs"].get("1kg_wgs_phase3_popmax",
                                                    ""),
                    variant.annotation["freqs"].get("exac_v3", ""),
                    variant.annotation["freqs"].get("exac_v3_popmax", ""),
                    ", ".join(all_genotypes_list),
                ])
                writer.writerow(row)

        outfile.close()
        print("Wrote out %s" % output_filename)