Example #1
0
    def handle(self, *args, **options):

        project_id = options["project_id"]
        family_ids = args
        project = Project.objects.get(project_id=project_id)

        already_deleted_once = set()  # set of family ids for which get_datastore(project_id).delete_family has already been called once
        for vcf_file, families in project.families_by_vcf().items():
            families_to_load = []
            for family in families:
                family_id = family.family_id
                print("Checking id: " + family_id)
                if not family_ids or family.family_id not in family_ids:
                    continue

                # delete this family
                if family_id not in already_deleted_once:
                    get_datastore(project_id).delete_family(project_id, family_id)
                    already_deleted_once.add(family_id)

                families_to_load.append(family)

            # reload family
            print("Loading %(project_id)s %(families_to_load)s" % locals())
            xbrowse_controls.load_variants_for_family_list(project, families_to_load, vcf_file)
Example #2
0
 def get_data_status(self):
     if not self.has_variant_data():
         return 'no_variants'
     elif not get_datastore(self.project.project_id).family_exists(self.project.project_id, self.cohort_id):
         return 'not_loaded'
     else:
         return get_datastore(self.project.project_id).get_family_status(self.project.project_id, self.cohort_id)
Example #3
0
def _deprecated_update_vcfffiles(project, sample_type, elasticsearch_index, dataset_path, matched_sample_id_to_sample_record):
    base_project = BaseProject.objects.get(seqr_project=project)
    get_datastore(base_project).bust_project_cache(base_project.project_id)
    clear_project_results_cache(base_project.project_id)

    vcf_file = VCFFile.objects.filter(
        project=base_project,
        dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
        sample_type=sample_type,
        elasticsearch_index=elasticsearch_index).order_by('-pk').first()

    if not vcf_file:
        vcf_file = VCFFile.objects.create(
            project=base_project,
            dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
            sample_type=sample_type,
            elasticsearch_index=elasticsearch_index,
        )
        logger.info("Created vcf file: " + str(vcf_file.__dict__))

    vcf_file.file_path = dataset_path
    vcf_file.loaded_date = matched_sample_id_to_sample_record.values()[0].loaded_date
    vcf_file.save()

    for indiv in [s.individual for s in matched_sample_id_to_sample_record.values()]:
        for base_indiv in BaseIndividual.objects.filter(seqr_individual=indiv).only('id'):
            base_indiv.vcf_files.add(vcf_file)
Example #4
0
    def handle(self, *args, **options):

        project_id = options["project_id"]
        family_ids = options["family_ids"]
        project = Project.objects.get(project_id=project_id)

        already_deleted_once = set(
        )  # set of family ids for which get_datastore(project_id).delete_family has already been called once
        for vcf_file, families in project.families_by_vcf().items():
            families_to_load = []
            for family in families:
                family_id = family.family_id
                print("Checking id: " + family_id)
                if not family_ids or family.family_id not in family_ids:
                    continue

                # delete this family
                if family_id not in already_deleted_once:
                    get_datastore(project_id).delete_family(
                        project_id, family_id)
                    already_deleted_once.add(family_id)

                families_to_load.append(family)

            # reload family
            print("Loading %(project_id)s %(families_to_load)s" % locals())
            xbrowse_controls.load_variants_for_family_list(
                project, families_to_load, vcf_file)
Example #5
0
def update_xbrowse_vcfffiles(project, sample_type, elasticsearch_index, dataset_path, matched_sample_id_to_sample_record):
    base_project = find_matching_xbrowse_model(project)
    get_datastore(base_project).bust_project_cache(base_project.project_id)
    clear_project_results_cache(base_project.project_id)

    vcf_file = VCFFile.objects.filter(
        project=base_project,
        dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
        sample_type=sample_type,
        elasticsearch_index=elasticsearch_index).order_by('-pk').first()

    if not vcf_file:
        vcf_file = VCFFile.objects.create(
            project=base_project,
            dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
            sample_type=sample_type,
            elasticsearch_index=elasticsearch_index,
        )
        logging.info("Created vcf file: " + str(vcf_file.__dict__))

    vcf_file.file_path = dataset_path
    vcf_file.loaded_date = matched_sample_id_to_sample_record.values()[0].loaded_date
    vcf_file.save()

    base_individuals = BaseIndividual.objects.filter(
        seqr_individual_id__in=[s.individual_id for s in matched_sample_id_to_sample_record.values()]
    )
    for base_indiv in base_individuals:
        base_indiv.vcf_files.add(vcf_file)
Example #6
0
    def handle(self, *args, **options):

        project_id = options["project_id"]
        family_ids = options["family_ids"]
        project = Project.objects.get(project_id=project_id)

        already_deleted_once = set()  # set of family ids for which get_datastore(project_id).delete_family has already been called once
        for vcf_file, families in project.families_by_vcf().items():
            print("Checking families %s in vcf %s" % (families, vcf_file))
            families_to_load = []
            for family in families:
                family_id = family.family_id
                if not family_ids or family.family_id not in family_ids:
                    continue
             
                print("Processing family: " + family_id)
                # delete data for this family
                if family_id not in already_deleted_once:
                    print("Deleting variant data for family: " + family_id)
                    get_datastore(project).delete_family(project_id, family_id)
                    already_deleted_once.add(family_id)

                families_to_load.append(family)

            if len(families_to_load) > 0:
                # reload family
                print("Loading %(project_id)s %(families_to_load)s" % locals())
                xbrowse_controls.load_variants_for_family_list(project, families_to_load, vcf_file)
            else:
                print("0 matching families found in this VCF")

        print("Finished.")
Example #7
0
 def get_data_status(self):
     if not self.has_variant_data():
         return 'no_variants'
     elif not get_datastore(self.project.project_id).family_exists(self.project.project_id, self.cohort_id):
         return 'not_loaded'
     else:
         return get_datastore(self.project.project_id).get_family_status(self.project.project_id, self.cohort_id)
Example #8
0
def calculate_mendelian_variant_search(search_spec, xfamily):

    variants = None

    if search_spec.search_mode == 'standard_inheritance':

        variants = list(get_variants_with_inheritance_mode(
            get_mall(),
            xfamily,
            search_spec.inheritance_mode,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.genotype_quality_filter,
        ))

    elif search_spec.search_mode == 'custom_inheritance':

        variants = list(get_variants_family(
            get_datastore(),
            xfamily,
            genotype_filter=search_spec.genotype_inheritance_filter,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.genotype_quality_filter,
        ))

    elif search_spec.search_mode == 'gene_burden':

        gene_stream = get_genes_family(
            get_datastore(),
            get_reference(),
            xfamily,
            burden_filter=search_spec.gene_burden_filter,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.genotype_quality_filter,
        )

        variants = list(stream_utils.gene_stream_to_variant_stream(gene_stream, get_reference()))

    elif search_spec.search_mode == 'allele_count':

        variants = list(get_variants_allele_count(
            get_datastore(),
            xfamily,
            search_spec.allele_count_filter,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.genotype_quality_filter,
        ))

    elif search_spec.search_mode == 'all_variants':
        variants = list(get_variants_family(
            get_datastore(),
            xfamily,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.genotype_quality_filter,
        ))

    return variants
    def update_pop_freqs_in_family_tables(self):
        # Load family tables
        population_frequency_store = mall.get_annotator().get_population_frequency_store()

        db = sqlite3.connect("reference_populations_family_tables.db", isolation_level=None)
        db.execute(
            "CREATE TABLE if not exists all_projects(project_id varchar(200), family_id varchar(200), started bool, finished bool)"
        )
        db.execute("CREATE UNIQUE INDEX IF NOT EXISTS all_projects_idx ON all_projects(project_id, family_id)")
        for project in Project.objects.all().order_by("-last_accessed_date"):
            project_id = project.project_id
            datastore = get_datastore(project_id)
            for i, family_info in enumerate(datastore._get_family_info(project_id)):
                family_id = family_info["family_id"]
                db.execute("INSERT OR IGNORE INTO all_projects VALUES (?, ?, 0, 0)", (project_id, family_id))

        # Go through each project in decending order
        population_slugs_to_load = [
            population_spec["slug"] for population_spec in annotator_settings.reference_populations_to_load
        ]
        while True:
            remaining_work = list(
                db.execute("SELECT project_id, family_id FROM all_projects WHERE started=0 ORDER BY RANDOM()")
            )
            print("%d projects / families remaining" % len(remaining_work))
            if not remaining_work:
                print("Done with all projects/families")
                break

            project_id, family_id = remaining_work[0]
            datastore = get_datastore(project_id)
            print("    updating %s / %s" % (project_id, family_id))
            db.execute("UPDATE all_projects SET started=1 WHERE project_id=? AND family_id=?", (project_id, family_id))

            family_collection = datastore._get_family_collection(project_id, family_id)

            for variant_dict in family_collection.find():
                freqs = population_frequency_store.get_frequencies(
                    variant_dict["xpos"], variant_dict["ref"], variant_dict["alt"]
                )
                full_freqs = {
                    "db_freqs." + population_slug: freqs.get(population_slug, 0)
                    for population_slug in population_slugs_to_load
                }
                family_collection.update(
                    {"xpos": variant_dict["xpos"], "ref": variant_dict["ref"], "alt": variant_dict["alt"]},
                    {"$set": full_freqs},
                    upsert=False,
                )
                # print("---------\nvariant_dict: %s, \nfreqs: %s, \nupdated_variant_dict: %s" % (variant_dict, full_freqs, str(family_collection.find_one(
                #            {'xpos':variant_dict['xpos'], 'ref' :variant_dict['ref'], 'alt': variant_dict['alt']}))))

            print("     ---> done updating project_id: %s, family_id: %s" % (project_id, family_id))
            db.execute("UPDATE all_projects SET finished=1 WHERE project_id=? AND family_id=?", (project_id, family_id))
Example #10
0
    def get_data_status(self):
        if get_elasticsearch_dataset(self.project.project_id) is not None:
            return "loaded"

        if not self.has_variant_data():
            return 'no_variants'
        elif not get_datastore(self.project.project_id).family_exists(
                self.project.project_id, self.cohort_id):
            return 'not_loaded'
        else:
            return get_datastore(self.project.project_id).get_family_status(
                self.project.project_id, self.cohort_id)
Example #11
0
def calculate_mendelian_variant_search(search_spec, xfamily):
    sys.stderr.write("     mendelian_variant_search for %s - search mode: %s  %s\n" % (xfamily.project_id, search_spec.search_mode, search_spec.__dict__))

    variants = None
    if search_spec.search_mode == 'standard_inheritance':
        variants = list(get_variants_with_inheritance_mode(
            get_mall(xfamily.project_id),
            xfamily,
            search_spec.inheritance_mode,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.quality_filter,
        ))

    elif search_spec.search_mode == 'custom_inheritance':
        variants = list(get_variants_family(
            get_datastore(xfamily.project_id),
            xfamily,
            genotype_filter=search_spec.genotype_inheritance_filter,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.quality_filter,
        ))

    elif search_spec.search_mode == 'gene_burden':
        gene_stream = get_genes_family(
            get_datastore(xfamily.project_id),
            get_reference(),
            xfamily,
            burden_filter=search_spec.gene_burden_filter,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.quality_filter,
        )

        variants = list(stream_utils.gene_stream_to_variant_stream(gene_stream, get_reference()))

    elif search_spec.search_mode == 'allele_count':
        variants = list(get_variants_allele_count(
            get_datastore(xfamily.project_id),
            xfamily,
            search_spec.allele_count_filter,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.quality_filter,
        ))

    elif search_spec.search_mode == 'all_variants':
        variants = list(get_variants_family(
            get_datastore(xfamily.project_id),
            xfamily,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.quality_filter,
            indivs_to_consider=xfamily.indiv_id_list(),
        ))

    return variants
Example #12
0
    def handle(self, *args, **options):
        project = Project.objects.get(project_id=args[0])
        sample_map = dict(l.strip('\n').split('\t') for l in open(args[1]).readlines())

        datastore_db = mall.get_datastore()._db

        for old_id, new_id in sample_map.iteritems():
            # change actual IDs
            indiv = Individual.objects.get(project=project, indiv_id=old_id)
            indiv.indiv_id = new_id
            indiv.save()

            # datastore
            if indiv.family:
                mall.get_datastore().delete_family(project.project_id, indiv.family.family_id)
def look_up_individual_loaded_date(source_individual, earliest_loaded_date=False):
    """Retrieve the data-loaded time for the given individual"""

    # decode data loaded time
    loaded_date = None
    try:
        datastore = get_datastore(source_individual.project)

        family_id = source_individual.family.family_id
        project_id = source_individual.project.project_id
        if earliest_loaded_date:
            project_id += "_previous1" # add suffix

        family_collection = datastore._get_family_collection(project_id, family_id) if hasattr(datastore, '_get_family_collection') else None
        if not family_collection:
            #logger.error("mongodb family collection not found for %s %s" % (project_id, family_id))
            return loaded_date

        record = family_collection.find_one()
        if record:
            loaded_date = record['_id'].generation_time
            # logger.info("%s data-loaded date: %s" % (project_id, loaded_date))
        else:
            family_info_record = datastore._get_family_info(project_id, family_id)
            loaded_date = family_info_record['_id'].generation_time

    except Exception as e:
        logger.error('Unable to look up loaded_date for %s' % (source_individual,))
        logger.error(e)

    return loaded_date
Example #14
0
def look_up_individual_loaded_date(source_individual, earliest_loaded_date=False):
    """Retrieve the data-loaded time for the given individual"""

    # decode data loaded time
    loaded_date = None
    try:
        datastore = get_datastore(source_individual.project)

        family_id = source_individual.family.family_id
        project_id = source_individual.project.project_id
        if earliest_loaded_date:
            project_id += "_previous1" # add suffix

        family_collection = datastore._get_family_collection(project_id, family_id)
        if not family_collection:
            #logger.error("mongodb family collection not found for %s %s" % (project_id, family_id))
            return loaded_date

        record = family_collection.find_one()
        if record:
            loaded_date = record['_id'].generation_time
            logger.info("%s data-loaded date: %s" % (project_id, loaded_date))
        else:
            family_info_record = datastore._get_family_info(project_id, family_id)
            loaded_date = family_info_record['_id'].generation_time

    except Exception as e:
        logger.error('Unable to look up loaded_date for %s' % (source_individual,))
        logger.error(e)

    return loaded_date
Example #15
0
def get_variants_from_variant_tuples(project, variant_tuples, user=None):
    datastore = get_datastore(project)
    population_slugs = project.get_reference_population_slugs()

    variant_tuples_by_family_id = {}
    for xpos, ref, alt, family_id in variant_tuples:
        if family_id not in variant_tuples_by_family_id:
            variant_tuples_by_family_id[family_id] = []
        variant_tuples_by_family_id[family_id].append((xpos, ref, alt))

    variants = []
    for family_id, variant_tuples in variant_tuples_by_family_id.items():
        variants_for_family = datastore.get_multiple_variants(
            project.project_id,
            family_id,
            variant_tuples,
            user=user
        )
        for (xpos, ref, alt), variant in zip(variant_tuples, variants_for_family):
            if not variant:
                variant = Variant(xpos, ref, alt)
                get_annotator().annotate_variant(variant, population_slugs)
                variant.set_extra('created_variant', True)

            variant.set_extra('family_id', family_id)
            variant.set_extra('project_id', project.project_id)
            variants.append(variant)

    return variants
Example #16
0
def get_saved_variants_for_family(family):
    """
    Returns:
        List of variants that were saved in this family
        List of variant tuples where no variants were in the datastore
    """

    search_flags = FamilySearchFlag.objects.filter(
        family=family).order_by('-date_saved')
    variants = []
    couldntfind = []
    variant_tuples = {(v.xpos, v.ref, v.alt) for v in search_flags}
    for variant_t, variant in zip(
            variant_tuples,
            get_datastore(family.project).get_multiple_variants(
                family.project.project_id,
                family.family_id,
                variant_tuples,
            )):
        if variant:
            variants.append(variant)
        else:
            couldntfind.append(variant_t)

    return variants, couldntfind
Example #17
0
def get_saved_variants_for_family(family):
    """
    Returns:
        List of variants that were saved in this family
        List of variant tuples where no variants were in the datastore
    """

    search_flags = FamilySearchFlag.objects.filter(family=family).order_by('-date_saved')
    variants = []
    couldntfind = []
    variant_tuples = {(v.xpos, v.ref, v.alt) for v in search_flags}
    for variant_t in variant_tuples:
        variant = get_datastore(family.project.project_id).get_single_variant(
            family.project.project_id,
            family.family_id,
            variant_t[0],
            variant_t[1],
            variant_t[2]
        )
        if variant:
            variants.append(variant)
        else:
            couldntfind.append(variant_t)

    return variants, couldntfind
Example #18
0
def get_variants_from_variant_tuples(project, variant_tuples, user=None):
    datastore = get_datastore(project)
    population_slugs = project.get_reference_population_slugs()

    variant_tuples_by_family_id = {}
    for xpos, ref, alt, family_id in variant_tuples:
        if family_id not in variant_tuples_by_family_id:
            variant_tuples_by_family_id[family_id] = []
        variant_tuples_by_family_id[family_id].append((xpos, ref, alt))

    variants = []
    for family_id, variant_tuples in variant_tuples_by_family_id.items():
        variants_for_family = datastore.get_multiple_variants(
            project.project_id, family_id, variant_tuples, user=user)
        for (xpos, ref, alt), variant in zip(variant_tuples,
                                             variants_for_family):
            if not variant:
                variant = Variant(xpos, ref, alt)
                get_annotator().annotate_variant(variant, population_slugs)
                variant.set_extra('created_variant', True)

            variant.set_extra('family_id', family_id)
            variant.set_extra('project_id', project.project_id)
            variants.append(variant)

    return variants
Example #19
0
def family_variant_view(request, project_id, family_id):

    project = get_object_or_404(Project, project_id=project_id)
    family = get_object_or_404(Family, project=project, family_id=family_id)
    if not project.can_view(request.user):
        raise PermissionDenied

    try:
        xpos = int(request.GET.get('xpos'))
        ref = request.GET.get('ref')
        alt = request.GET.get('alt')
    except:
        return HttpResponse('Invalid View')

    variant = get_datastore(project).get_single_variant(
        project_id, family_id, xpos, ref, alt)
    add_extra_info_to_variants_project(get_reference(),
                                       project, [variant],
                                       add_family_tags=True,
                                       add_populations=True)

    return render(
        request, 'family/family_variant_view.html', {
            'project': project,
            'family': family,
            'variant_json': json.dumps(variant.toJSON()),
        })
Example #20
0
def _set_saved_variant_json(new_variant_tag_or_note, source_variant_tag_or_note, new_family):
    if new_family is None:
        return

    project_id = new_family.project.deprecated_project_id
    project = Project.objects.get(project_id=project_id)
    try:
        variant_info = get_datastore(project).get_single_variant(
            project_id,
            new_family.family_id,
            source_variant_tag_or_note.xpos,
            source_variant_tag_or_note.ref,
            source_variant_tag_or_note.alt)
    except Exception as e:
        logger.error("Unable to retrieve variant annotations for %s %s: %s" % (
            new_family, source_variant_tag_or_note, e))
        return

    if variant_info:
        add_extra_info_to_variants_project(get_reference(), project, [variant_info], add_family_tags=True,
                                           add_populations=True)
        variant_json = variant_info.toJSON()

        new_variant_tag_or_note.saved_variant_json = json.dumps(variant_json)
        new_variant_tag_or_note.save()
Example #21
0
def add_family_search_flag(request):

    error = None

    for key in [
            'project_id', 'family_id', 'xpos', 'ref', 'alt', 'note',
            'flag_type', 'flag_inheritance_mode'
    ]:
        if request.GET.get(key, None) == None:
            error = "%s is requred" % key

    if not error:
        project = get_object_or_404(Project,
                                    project_id=request.GET.get('project_id'))
        family = get_object_or_404(Family,
                                   project=project,
                                   family_id=request.GET.get('family_id'))
        if not project.can_edit(request.user):
            raise PermissionDenied

    if not error:
        xpos = int(request.GET['xpos'])
        ref = request.GET.get('ref')
        alt = request.GET['alt']
        note = request.GET.get('note')
        flag_type = request.GET.get('flag_type')
        flag_inheritance_mode = request.GET.get('flag_inheritance_mode')

        # todo: more validation - is variant valid?

        flag = FamilySearchFlag(
            user=request.user,
            family=family,
            xpos=int(request.GET['xpos']),
            ref=ref,
            alt=alt,
            note=note,
            flag_type=flag_type,
            suggested_inheritance=flag_inheritance_mode,
            date_saved=timezone.now(),
        )

    if not error:
        flag.save()
        variant = get_datastore(project.project_id).get_single_variant(
            family.project.project_id, family.family_id, xpos, ref, alt)
        api_utils.add_extra_info_to_variant(get_reference(), family, variant)

        ret = {
            'is_error': False,
            'variant': variant.toJSON(),
        }

    else:
        ret = {
            'is_error': True,
            'error': error,
        }
    return JSONResponse(ret)
Example #22
0
def edit_family_cause(request, project_id, family_id):
    error = None

    project = get_object_or_404(Project, project_id=project_id)
    family = get_object_or_404(Family, project=project, family_id=family_id)
    if not project.can_admin(request.user):
        raise PermissionDenied

    causal_variants = list(CausalVariant.objects.filter(family=family))

    if request.GET.get('variant'):
        xpos, ref, alt = request.GET['variant'].split('|')
        c = CausalVariant.objects.get_or_create(
            family=family,
            xpos=int(xpos),
            ref=ref,
            alt=alt,
        )[0]
        causal_variants = list(CausalVariant.objects.filter(family=family))

    if request.method == 'POST':
        form = EditFamilyCauseForm(family, request.POST)
        if form.is_valid():
            CausalVariant.objects.filter(family=family).delete()
            for v_str in request.POST.getlist('variants'):
                xpos, ref, alt = v_str.split('|')
                CausalVariant.objects.create(
                    family=family,
                    xpos=int(xpos),
                    ref=ref,
                    alt=alt,
                )
                update_xbrowse_model(family,
                                     analysis_status='S',
                                     causal_inheritance_mode=form.
                                     cleaned_data['inheritance_mode'])

            return redirect('family_home',
                            project_id=project.project_id,
                            family_id=family.family_id)
        else:
            error = server_utils.form_error_string(form)
    else:
        form = EditFamilyForm(family)

    variants = []
    for c in causal_variants:
        variants.append(
            get_datastore(project).get_single_variant(project_id, family_id,
                                                      c.xpos, c.ref, c.alt))

    return render(
        request, 'family/edit_cause.html', {
            'project': project,
            'family': family,
            'error': error,
            'form': form,
            'variants': [v.toJSON() for v in variants],
        })
    def transfer_project(self, from_project_id, destination_project_id):
        print("From: " + from_project_id)
        print("To: " + destination_project_id)

        from_project = Project.objects.get(project_id=from_project_id)
        destination_project = Project.objects.get(project_id=destination_project_id)
        
        # Make sure individuals are the same
        indivs_missing_from_dest_project = (set(
            [i.indiv_id for i in Individual.objects.filter(project=from_project)]) - set(
            [i.indiv_id for i in Individual.objects.filter(project=destination_project)]))
        if indivs_missing_from_dest_project:
            raise Exception("Individuals missing from dest project: " + str(indivs_missing_from_dest_project))
        

        # update VCFs
        vcfs = from_project.families_by_vcf().keys()
        for vcf_file_path in vcfs:            
            vcf_file = VCFFile.objects.get_or_create(file_path=os.path.abspath(vcf_file_path))[0]
            sample_management.add_vcf_file_to_project(destination_project, vcf_file)
            print("Added %s to project %s" % (vcf_file, destination_project.project_id))

        families_db = get_datastore()._db
        projects_db = get_project_datastore()._db

        print("==========")
        print("Checking 'from' Projects and Families:")
        if not check_that_exists(projects_db.projects, {'project_id': from_project_id}, not_more_than_one=True):
            raise ValueError("There needs to be 1 project db in %(from_project_id)s" % locals())
        if not check_that_exists(families_db.families, {'project_id': from_project_id}, not_more_than_one=False):
            raise ValueError("There needs to be atleast 1 family db in %(from_project_id)s" % locals())

        print("==========")
        print("Make Updates:")
        datestamp = datetime.now().strftime("%Y-%m-%d")
        if check_that_exists(projects_db.projects, {'project_id': destination_project_id}, not_more_than_one=True):
            result = update(projects_db.projects, {'project_id': destination_project_id}, {'project_id': destination_project_id+'_previous', 'version': datestamp})
        if check_that_exists(families_db.families, {'project_id': destination_project_id}, not_more_than_one=False):
            result = update(families_db.families, {'project_id': destination_project_id}, {'project_id': destination_project_id+'_previous', 'version': datestamp})

        result = update(projects_db.projects, {'project_id': from_project_id},        {'project_id': destination_project_id, 'version': '2'})
        result = update(families_db.families, {'project_id': from_project_id},        {'project_id': destination_project_id, 'version': '2'})

        print("==========")
        print("Checking Projects:")
        if not check_that_exists(projects_db.projects, {'project_id': destination_project_id}, not_more_than_one=True):
            raise ValueError("After: There needs to be 1 project db in %(destination_project_id)s" % locals())
        if not check_that_exists(families_db.families, {'project_id': destination_project_id}, not_more_than_one=False):
            raise ValueError("After: There needs to be atleast 1 family db in %(destination_project_id)s" % locals())

        update_family_analysis_status(destination_project_id)
        
        print("Data transfer finished.")
        i = raw_input("Delete the 'from' project: %s? [Y/n] " % from_project_id)
        if i.strip() == 'Y':
            sample_management.delete_project(from_project_id)
            print("Project %s deleted" % from_project_id)
        else:
            print("Project not deleted")
    def handle(self, *args, **options):
        #genomicFeatures section
        self.all_gene_lists = defaultdict(set)
        self.gene_to_gene_lists = defaultdict(set)
        for gene_list in GeneList.objects.all():
            print('gene list: [%s]' % gene_list.name)
            self.all_gene_lists[gene_list.name] = set(
                g.gene_id for g in gene_list.genelistitem_set.all())
            for g in gene_list.genelistitem_set.all():
                self.gene_to_gene_lists[g.gene_id].add(gene_list.name)

        print("starting... ")
        gene_to_projects = defaultdict(set)
        gene_to_variants = defaultdict(set)
        gene_to_families = defaultdict(set)
        gene_to_variant_tags = defaultdict(set)

        Key = namedtuple('Key', 'gene_id, gene_name')
        project_ids = defaultdict(int)
        for variant_tag in tqdm(VariantTag.objects.filter(), unit=' variants'):
            project_tag = variant_tag.project_tag
            project_id = project_tag.project.project_id
            project_ids[project_id] += 1
            tag_name = project_tag.tag.lower()

            variant = get_datastore(project_id).get_single_variant(
                project_id,
                variant_tag.family.family_id,
                variant_tag.xpos,
                variant_tag.ref,
                variant_tag.alt,
            )

            # print(gene_to_projects)
            if variant is None:
                #print("Variant %s no longer called in this family (did the callset version change?)" % (variant_tag.toJSON()))
                continue

            #print(project_id,variant.toJSON()['gene_ids'])
            if variant.gene_ids is not None:
                for gene_id in variant.gene_ids:
                    gene_name = get_reference().get_gene_symbol(gene_id)
                    key = Key._make([gene_id, gene_name])
                    gene_to_variants[key].add(
                        "%s-%s-%s-%s" %
                        (variant.chr, variant.pos, variant.ref, variant.alt))
                    if variant_tag.family:
                        gene_to_families[key].add(variant_tag.family.family_id)
                    gene_to_variant_tags[key].add(tag_name)
                    gene_to_projects[key].add(project_id.lower())

            if len(gene_to_projects) % 50 == 0:
                self.print_out(gene_to_projects, gene_to_families,
                               gene_to_variants, gene_to_variant_tags)

        self.print_out(gene_to_projects, gene_to_families, gene_to_variants,
                       gene_to_variant_tags)
    def handle(self, *args, **options):
        #genomicFeatures section
        self.all_gene_lists = defaultdict(set)
        self.gene_to_gene_lists = defaultdict(set)
        for gene_list in GeneList.objects.all():
            print('gene list: [%s]' % gene_list.name)
            self.all_gene_lists[gene_list.name] = set(g.gene_id for g in gene_list.genelistitem_set.all())
            for g in gene_list.genelistitem_set.all():
                self.gene_to_gene_lists[g.gene_id].add(gene_list.name)

        print("starting... ")
        gene_to_projects = defaultdict(set)
        gene_to_variants = defaultdict(set)
        gene_to_families = defaultdict(set)
        gene_to_variant_tags = defaultdict(set)
        gene_to_variant_and_families = defaultdict(lambda: defaultdict(set))

        Key = namedtuple('Key', 'gene_id, gene_name')
        project_ids = defaultdict(int)
        for variant_tag in tqdm(VariantTag.objects.filter(), unit=' variants'):
            project_tag = variant_tag.project_tag
            project_id = project_tag.project.project_id
            project_ids[project_id] += 1
            tag_name = project_tag.tag.lower()

            variant = get_datastore(project_tag.project).get_single_variant(
                project_id,
                variant_tag.family.family_id,
                variant_tag.xpos,
                variant_tag.ref,
                variant_tag.alt,
            )

            # print(gene_to_projects)
            if variant is None:
                #print("Variant %s no longer called in this family (did the callset version change?)" % (variant_tag.toJSON()))
                continue

            #print(project_id,variant.toJSON()['gene_ids'])
            if variant.gene_ids is not None:
                for gene_id in variant.gene_ids:
                    gene_name = get_reference().get_gene_symbol(gene_id)
                    key = Key._make([gene_id, gene_name])
                    variant_id = "%s-%s-%s-%s" % (variant.chr, variant.pos, variant.ref, variant.alt)
                    gene_to_variants[key].add(variant_id)
                    if variant_tag.family:
                        gene_to_families[key].add(variant_tag.family)
                    gene_to_variant_tags[key].add(tag_name)
                    gene_to_projects[key].add(project_id.lower())
                    gene_to_variant_and_families[key][variant_id].add(variant_tag.family.family_id)
            
            if len(gene_to_projects) % 50 == 0:
                self.print_out(gene_to_projects, gene_to_families, gene_to_variants, gene_to_variant_tags, gene_to_variant_and_families)

        self.print_out(gene_to_projects, gene_to_families, gene_to_variants, gene_to_variant_tags, gene_to_variant_and_families)
Example #26
0
    def update_pop_freqs_in_family_tables(self):
        # Load family tables
        population_frequency_store = mall.get_annotator().get_population_frequency_store()

        db = sqlite3.connect("reference_populations_family_tables.db", isolation_level=None)
        db.execute("CREATE TABLE if not exists all_projects(project_id varchar(200), family_id varchar(200), started bool, finished bool)")
        db.execute("CREATE UNIQUE INDEX IF NOT EXISTS all_projects_idx ON all_projects(project_id, family_id)")
        for project in Project.objects.all().order_by('-last_accessed_date'):
            project_id = project.project_id
            datastore = get_datastore(project_id)
            for i, family_info in enumerate(datastore._get_family_info(project_id)):
                family_id = family_info['family_id']
                db.execute("INSERT OR IGNORE INTO all_projects VALUES (?, ?, 0, 0)", (project_id, family_id))

        # Go through each project in decending order
        population_slugs_to_load = [population_spec['slug'] for population_spec in annotator_settings.reference_populations_to_load]
        while True:
            remaining_work = list(db.execute("SELECT project_id, family_id FROM all_projects WHERE started=0 ORDER BY RANDOM()"))
            print("%d projects / families remaining" % len(remaining_work))
            if not remaining_work:
                print("Done with all projects/families")
                break

            project_id, family_id = remaining_work[0]
            datastore = get_datastore(project_id)
            print("    updating %s / %s" % (project_id, family_id))
            db.execute("UPDATE all_projects SET started=1 WHERE project_id=? AND family_id=?", (project_id, family_id))

            family_collection = datastore._get_family_collection(project_id, family_id)

            for variant_dict in family_collection.find():
                freqs = population_frequency_store.get_frequencies(variant_dict['xpos'], variant_dict['ref'], variant_dict['alt'])
                full_freqs = {'db_freqs.'+population_slug: freqs.get(population_slug, 0) for population_slug in population_slugs_to_load}
                family_collection.update({'xpos':variant_dict['xpos'], 'ref' :variant_dict['ref'], 'alt': variant_dict['alt']},
                                         {'$set': full_freqs},
                                         upsert=False)
                #print("---------\nvariant_dict: %s, \nfreqs: %s, \nupdated_variant_dict: %s" % (variant_dict, full_freqs, str(family_collection.find_one(
                #            {'xpos':variant_dict['xpos'], 'ref' :variant_dict['ref'], 'alt': variant_dict['alt']}))))


            print("     ---> done updating project_id: %s, family_id: %s" % (project_id, family_id))
            db.execute("UPDATE all_projects SET finished=1 WHERE project_id=? AND family_id=?", (project_id, family_id))
Example #27
0
    def handle(self, *args, **options):

        # default display is individuals
        if len(args) > 0:
            display = args[0]
        else:
            display = 'individuals'

        if display == 'families':
            for project_id, family_id in get_datastore().get_all_families():
                fields = [
                    project_id,
                    family_id,
                    ",".join(get_datastore().get_individuals_for_family(project_id, family_id))
                ]
                print "\t".join(fields)

        elif display == 'individuals':
            for project_id, indiv_id in get_datastore().get_all_individuals():
                print "\t".join([project_id, indiv_id])
Example #28
0
    def handle(self, *args, **options):

        # default display is individuals
        if len(args) > 0:
            display = args[0]
        else:
            display = 'individuals'

        if display == 'families':
            for project_id, family_id in get_datastore().get_all_families():
                fields = [
                    project_id, family_id,
                    ",".join(get_datastore().get_individuals_for_family(
                        project_id, family_id))
                ]
                print "\t".join(fields)

        elif display == 'individuals':
            for project_id, indiv_id in get_datastore().get_all_individuals():
                print "\t".join([project_id, indiv_id])
Example #29
0
def edit_family_cause(request, project_id, family_id):
    error = None

    project = get_object_or_404(Project, project_id=project_id)
    family = get_object_or_404(Family, project=project, family_id=family_id)
    if not project.can_admin(request.user):
        raise PermissionDenied

    causal_variants = list(CausalVariant.objects.filter(family=family))

    if request.GET.get('variant'):
        xpos, ref, alt = request.GET['variant'].split('|')
        c = CausalVariant.objects.get_or_create(
            family=family,
            xpos=int(xpos),
            ref=ref,
            alt=alt,
        )[0]
        causal_variants = list(CausalVariant.objects.filter(family=family))

    if request.method == 'POST':
        form = EditFamilyCauseForm(family, request.POST)
        if form.is_valid():
            CausalVariant.objects.filter(family=family).delete()
            for v_str in request.POST.getlist('variants'):
                xpos, ref, alt = v_str.split('|')
                CausalVariant.objects.create(
                    family=family,
                    xpos=int(xpos),
                    ref=ref,
                    alt=alt,
                )
                update_xbrowse_model(
                    family,
                    analysis_status = 'S',
                    causal_inheritance_mode = form.cleaned_data['inheritance_mode'])

            return redirect('family_home', project_id=project.project_id, family_id=family.family_id)
        else:
            error = server_utils.form_error_string(form)
    else:
        form = EditFamilyForm(family)

    variants = []
    for c in causal_variants:
        variants.append(get_datastore(project).get_single_variant(project_id, family_id, c.xpos, c.ref, c.alt))

    return render(request, 'family/edit_cause.html', {
        'project': project,
        'family': family,
        'error': error,
        'form': form,
        'variants': [v.toJSON() for v in variants],
    })
Example #30
0
def add_family_search_flag(request):

    # TODO: this view not like the others - refactor to forms

    error = None

    for key in ['project_id', 'family_id', 'xpos', 'ref', 'alt', 'note', 'flag_type', 'flag_inheritance_mode']:
        if request.GET.get(key, None) == None:
            error = "%s is requred" % key

    if not error:
        project = get_object_or_404(Project, project_id=request.GET.get('project_id'))
        family = get_object_or_404(Family, project=project, family_id=request.GET.get('family_id'))
        if not project.can_edit(request.user):
            return PermissionDenied

    if not error:
        xpos = int(request.GET['xpos'])
        ref=request.GET.get('ref')
        alt=request.GET['alt']
        note=request.GET.get('note')
        flag_type=request.GET.get('flag_type')
        flag_inheritance_mode=request.GET.get('flag_inheritance_mode')

        # todo: more validation - is variant valid?

        flag = FamilySearchFlag(user=request.user,
            family=family,
            xpos=int(request.GET['xpos']),
            ref=ref,
            alt=alt,
            note=note,
            flag_type=flag_type,
            suggested_inheritance=flag_inheritance_mode,
            date_saved = datetime.datetime.now(),
        )

    if not error:
        flag.save()
        variant = get_datastore(project.project_id).get_single_variant(family.project.project_id, family.family_id,
            xpos, ref, alt )
        api_utils.add_extra_info_to_variant(get_reference(), family, variant)

        ret = {
            'is_error': False,
            'variant': variant.toJSON(),
        }

    else:
        ret = {
            'is_error': True,
            'error': error,
        }
    return JSONResponse(ret)
Example #31
0
    def handle(self, *args, **options):

        project_id = options["project_id"]
        family_ids = args
        project = Project.objects.get(project_id=project_id)

        for vcf_file, families in project.families_by_vcf().items():
            families_to_load = []
            for family in families:
                family_id = family.family_id
                print("Checking id: " + family_id)
                if not family_ids or family.family_id not in family_ids:
                    continue

                # delete this family
                get_datastore(project_id).delete_family(project_id, family_id)

                families_to_load.append(family)
                # reload family

            print("Loading %(project_id)s %(families_to_load)s" % locals())
            xbrowse_controls.load_variants_for_family_list(project, families_to_load, vcf_file)
Example #32
0
def get_causal_variants_for_project(project):
    variant_t_list = [
        (v.xpos, v.ref, v.alt, v.family.family_id) for v in CausalVariant.objects.filter(family__project=project)
    ]
    variants = []
    for xpos, ref, alt, family_id in variant_t_list:
        variant = get_datastore(project.project_id).get_single_variant(project.project_id, family_id, xpos, ref, alt)
        if variant:
            variant.set_extra("family_id", family_id)
            variant.set_extra("project_id", project.project_id)
            variants.append(variant)

    return variants
Example #33
0
def get_variants_from_note_tuples(project, note_tuples):
    variants = []
    for note_t in note_tuples:
        variant = get_datastore(project.project_id).get_single_variant(
            project.project_id, note_t[3], note_t[0], note_t[1], note_t[2]
        )
        if not variant:
            variant = Variant(note_t[0], note_t[1], note_t[2])
            get_annotator().annotate_variant(variant, project.get_reference_population_slugs())
            # variant.annotation = get_annotator().get_variant(note_t[0], note_t[1], note_t[2])
        variant.set_extra("family_id", note_t[3])
        variant.set_extra("project_id", project.project_id)
        variants.append(variant)
    return variants
Example #34
0
def get_variants_from_variant_tuples(project, variant_tuples):
    variants = []
    for t in variant_tuples:
        variant = get_datastore(project.project_id).get_single_variant(
            project.project_id, t[3], t[0], t[1], t[2])
        if not variant:
            variant = Variant(t[0], t[1], t[2])
            get_annotator().annotate_variant(
                variant, project.get_reference_population_slugs())

        variant.set_extra('family_id', t[3])
        variant.set_extra('project_id', project.project_id)
        variants.append(variant)
    return variants
Example #35
0
def get_variants_for_inheritance_for_project(project, inheritance_mode):
    """
    Get the variants for this project / inheritance combo
    Return dict of family -> list of variants
    """

    # create search specification
    # this could theoretically differ by project, if there are different reference populations
    #variant_filter = VariantFilter(so_annotations=SO_SEVERITY_ORDER, ref_freqs=[])
    variant_filter = get_default_variant_filter('moderate_impact')
    variant_filter.ref_freqs.append(('1kg_wgs_phase3', g1k_freq_threshold))
    variant_filter.ref_freqs.append(
        ('1kg_wgs_phase3_popmax', g1k_popmax_freq_threshold))
    variant_filter.ref_freqs.append(('exac_v3', exac_freq_threshold))
    variant_filter.ref_freqs.append(('exac_v3_popmax', exac_popmax_threshold))
    variant_filter.ref_freqs.append(
        ('merck-wgs-3793', merck_wgs_3793_threshold))
    #variant_filter.ref_freqs.append(('merck-pcr-free-wgs-144', merck_wgs_144_threshold))
    quality_filter = {
        #        'vcf_filter': 'pass',
        'min_gq': GQ_threshold,
        'min_ab': AB_threshold,
    }

    # run MendelianVariantSearch for each family, collect results

    families = project.get_families()

    for i, family in enumerate(families):
        print("Processing %s - family %s  (%d / %d)" %
              (inheritance_mode, family.family_id, i + 1, len(families)))
        try:
            if inheritance_mode == "all_variants":
                yield family, list(
                    get_variants(get_datastore(project.project_id),
                                 family.xfamily(),
                                 variant_filter=variant_filter,
                                 quality_filter=quality_filter,
                                 indivs_to_consider=family.indiv_id_list()))
            else:
                yield family, list(
                    get_variants_with_inheritance_mode(
                        get_mall(project.project_id),
                        family.xfamily(),
                        inheritance_mode,
                        variant_filter=variant_filter,
                        quality_filter=quality_filter,
                    ))
        except ValueError as e:
            print("Error: %s. Skipping family %s" % (str(e), str(family)))
Example #36
0
def saved_variant_transcripts(request, variant_guid):
    saved_variant = SavedVariant.objects.get(guid=variant_guid)
    check_permissions(saved_variant.project, request.user, CAN_VIEW)

    # TODO when variant search is rewritten for seqr models use that here
    base_project = find_matching_xbrowse_model(saved_variant.project)
    loaded_variant = get_datastore(base_project).get_single_variant(
        base_project.project_id,
        saved_variant.family.family_id,
        saved_variant.xpos,
        saved_variant.ref,
        saved_variant.alt,
    )

    return create_json_response({variant_guid: {'transcripts': _variant_transcripts(loaded_variant.annotation)}})
Example #37
0
def get_causal_variants_for_project(project):
    variant_t_list = [
        (v.xpos, v.ref, v.alt, v.family.family_id)
        for v in CausalVariant.objects.filter(family__project=project)
    ]
    variants = []
    for xpos, ref, alt, family_id in variant_t_list:
        variant = get_datastore(project.project_id).get_single_variant(
            project.project_id, family_id, xpos, ref, alt)
        if variant:
            variant.set_extra('family_id', family_id)
            variant.set_extra('project_id', project.project_id)
            variants.append(variant)

    return variants
Example #38
0
def cohort_gene_search_variants(request):

    # TODO: this view not like the others - refactor to forms

    error = None

    project, cohort = get_project_and_cohort_for_user(request.user, request.GET)
    if not project.can_view(request.user):
        return PermissionDenied

    form = api_forms.CohortGeneSearchVariantsForm(request.GET)
    if form.is_valid():
        gene_id = form.cleaned_data['gene_id']
        inheritance_mode = form.cleaned_data['inheritance_mode']
        variant_filter = form.cleaned_data['variant_filter']
        quality_filter = form.cleaned_data['quality_filter']
    else:
        error = server_utils.form_error_string(form)

    if not error:

        indivs_with_inheritance, gene_variation = cohort_search.get_individuals_with_inheritance_in_gene(
            get_datastore(project.project_id),
            get_reference(),
            cohort.xcohort(),
            inheritance_mode,
            gene_id,
            variant_filter=variant_filter,
            quality_filter=quality_filter
        )

        relevant_variants = gene_variation.get_relevant_variants_for_indiv_ids(cohort.indiv_id_list())

        api_utils.add_extra_info_to_variants_family(get_reference(), cohort, relevant_variants)

        ret = {
            'is_error': False, 
            'variants': [v.toJSON() for v in relevant_variants],
            'gene_info': get_reference().get_gene(gene_id),
        }
        return JSONResponse(ret)

    else: 
        ret = {
            'is_error': True, 
            'error': error
        }
        return JSONResponse(ret)
Example #39
0
def export_project_variants(request, project_id):
    """
    Export all variants associated to this project
    Args:
        Project id
    Returns:
        A JSON object of variant information
    """
    project = get_object_or_404(Project, project_id=project_id)
    if not project.can_view(request.user):
        raise PermissionDenied

    status_description_map = {}
    for abbrev, details in ANALYSIS_STATUS_CHOICES:
        status_description_map[abbrev] = details[0]

    variants = []
    project_tags = ProjectTag.objects.filter(project__project_id=project_id)
    for project_tag in project_tags:
        variant_tags = VariantTag.objects.filter(project_tag=project_tag)
        for variant_tag in variant_tags:
            variant = get_datastore(project.project_id).get_single_variant(
                project.project_id,
                variant_tag.family.family_id if variant_tag.family else '',
                variant_tag.xpos,
                variant_tag.ref,
                variant_tag.alt,
            )

            variant_json = variant.toJSON() if variant is not None else {
                'xpos': variant_tag.xpos,
                'ref': variant_tag.ref,
                'alt': variant_tag.alt
            }

            family_status = ''
            if variant_tag.family:
                family_status = status_description_map.get(
                    variant_tag.family.analysis_status, 'unknown')

            variants.append({
                "variant": variant_json,
                "tag": project_tag.tag,
                "description": project_tag.title,
                "family": variant_tag.family.toJSON(),
                "family_status": family_status
            })
    return JSONResponse(variants)
Example #40
0
def cohort_gene_search_variants(request):

    # TODO: this view not like the others - refactor to forms

    error = None

    project, cohort = get_project_and_cohort_for_user(request.user,
                                                      request.GET)
    if not project.can_view(request.user):
        return HttpResponse('unauthorized')

    form = api_forms.CohortGeneSearchVariantsForm(request.GET)
    if form.is_valid():
        gene_id = form.cleaned_data['gene_id']
        inheritance_mode = form.cleaned_data['inheritance_mode']
        variant_filter = form.cleaned_data['variant_filter']
        quality_filter = form.cleaned_data['quality_filter']
    else:
        error = server_utils.form_error_string(form)

    if not error:

        indivs_with_inheritance, gene_variation = cohort_search.get_individuals_with_inheritance_in_gene(
            get_datastore(project.project_id),
            get_reference(),
            cohort.xcohort(),
            inheritance_mode,
            gene_id,
            variant_filter=variant_filter,
            quality_filter=quality_filter)

        relevant_variants = gene_variation.get_relevant_variants_for_indiv_ids(
            cohort.indiv_id_list())

        api_utils.add_extra_info_to_variants_family(get_reference(), cohort,
                                                    relevant_variants)

        ret = {
            'is_error': False,
            'variants': [v.toJSON() for v in relevant_variants],
            'gene_info': get_reference().get_gene(gene_id),
        }
        return JSONResponse(ret)

    else:
        ret = {'is_error': True, 'error': error}
        return JSONResponse(ret)
def get_variants_for_inheritance_for_project(project, inheritance_mode):
    """
    Get the variants for this project / inheritance combo
    Return dict of family -> list of variants
    """

    # create search specification
    # this could theoretically differ by project, if there are different reference populations
    #variant_filter = VariantFilter(so_annotations=SO_SEVERITY_ORDER, ref_freqs=[])
    variant_filter = get_default_variant_filter('moderate_impact')
    variant_filter.ref_freqs.append(('1kg_wgs_phase3', g1k_freq_threshold))
    variant_filter.ref_freqs.append(('1kg_wgs_phase3_popmax', g1k_popmax_freq_threshold))
    variant_filter.ref_freqs.append(('exac_v3', exac_freq_threshold))
    variant_filter.ref_freqs.append(('exac_v3_popmax', exac_popmax_threshold))
    variant_filter.ref_freqs.append(('merck-wgs-3793', merck_wgs_3793_threshold))
    #variant_filter.ref_freqs.append(('merck-pcr-free-wgs-144', merck_wgs_144_threshold))
    quality_filter = {
#        'vcf_filter': 'pass',
        'min_gq': GQ_threshold,
        'min_ab': AB_threshold,
    }

    # run MendelianVariantSearch for each family, collect results

    families = project.get_families()

    for i, family in enumerate(families):
        print("Processing %s - family %s  (%d / %d)" % (inheritance_mode, family.family_id, i+1, len(families)))
        try:
            if inheritance_mode == "all_variants":
                yield family, list(get_variants(
                        get_datastore(project.project_id),
                        family.xfamily(),
                        variant_filter=variant_filter,
                        quality_filter=quality_filter,
                        indivs_to_consider=family.indiv_id_list()
                        ))
            else:
                yield family, list(get_variants_with_inheritance_mode(
                        get_mall(project.project_id),
                        family.xfamily(),
                        inheritance_mode,
                        variant_filter=variant_filter,
                        quality_filter=quality_filter,
                        ))
        except ValueError as e:
            print("Error: %s. Skipping family %s" % (str(e), str(family)))
Example #42
0
def gather_all_annotated_genes_in_seqr():
    """
    Finds all genes mentioned in seqr
    Args:
        No arguments
    Returns
        A default dict where the key is a named tuple of gene HGNC ID and ensemble ID and the values are projects where
        this gene appears
    """
    #genomicFeatures section
    all_gene_lists = defaultdict(set)
    gene_to_gene_lists = defaultdict(set)
    for gene_list in GeneList.objects.all():
        all_gene_lists[gene_list.name] = set(
            g.gene_id for g in gene_list.genelistitem_set.all())
        for g in gene_list.genelistitem_set.all():
            gene_to_gene_lists[g.gene_id].add(gene_list.name)

    gene_to_projects = defaultdict(set)

    Key = namedtuple('Key', 'gene_id, gene_name')
    project_ids = defaultdict(int)
    for variant_tag in tqdm(VariantTag.objects.filter(), unit=' variants'):
        project_tag = variant_tag.project_tag
        project_id = project_tag.project.project_id
        project_ids[project_id] += 1
        tag_name = project_tag.tag.lower()

        variant = get_datastore(project_id).get_single_variant(
            project_id,
            variant_tag.family.family_id,
            variant_tag.xpos,
            variant_tag.ref,
            variant_tag.alt,
        )
        if variant is None:
            continue

        if variant.gene_ids is not None:
            for gene_id in variant.gene_ids:
                gene_name = get_reference().get_gene_symbol(gene_id)
                key = Key._make([gene_id, gene_name])
                gene_to_projects[key].add(project_id.lower())

    return gene_to_projects
Example #43
0
def get_variants_from_variant_tuples(project, variant_tuples):
    variants = []
    for t in variant_tuples:
        variant = get_datastore(project.project_id).get_single_variant(
            project.project_id,
            t[3],
            t[0],
            t[1],
            t[2]
        )
        if not variant:
            variant = Variant(t[0], t[1], t[2])
            get_annotator().annotate_variant(variant, project.get_reference_population_slugs())
            
        variant.set_extra('family_id', t[3])
        variant.set_extra('project_id', project.project_id)
        variants.append(variant)
    return variants
Example #44
0
def gather_all_annotated_genes_in_seqr():
    """
    Finds all genes mentioned in seqr
    Args:
        No arguments
    Returns
        A default dict where the key is a named tuple of gene HGNC ID and ensemble ID and the values are projects where
        this gene appears
    """
    #genomicFeatures section
    all_gene_lists = defaultdict(set)
    gene_to_gene_lists = defaultdict(set)
    for gene_list in GeneList.objects.all():
        all_gene_lists[gene_list.name] = set(g.gene_id for g in gene_list.genelistitem_set.all())
        for g in gene_list.genelistitem_set.all():
            gene_to_gene_lists[g.gene_id].add(gene_list.name)

    gene_to_projects = defaultdict(set)

    Key = namedtuple('Key', 'gene_id, gene_name')
    project_ids = defaultdict(int)
    for variant_tag in tqdm(VariantTag.objects.filter(), unit=' variants'):
        project_tag = variant_tag.project_tag
        project_id = project_tag.project.project_id
        project_ids[project_id] += 1
        tag_name = project_tag.tag.lower()

        variant = get_datastore(project_tag.project).get_single_variant(
            project_id,
            variant_tag.family.family_id,
            variant_tag.xpos,
            variant_tag.ref,
            variant_tag.alt,
        )
        if variant is None:
            continue

        if variant.gene_ids is not None:
            for gene_id in variant.gene_ids:
                gene_name = get_reference().get_gene_symbol(gene_id)
                key = Key._make([gene_id, gene_name])
                gene_to_projects[key].add(project_id.lower())

    return gene_to_projects
Example #45
0
def add_variant_note(request):
    """

    """
    family = None
    if 'family_id' in request.GET:
        project, family = get_project_and_family_for_user(
            request.user, request.GET)
    else:
        project = utils.get_project_for_user(request.user, request.GET)

    form = api_forms.VariantNoteForm(project, request.GET)
    if form.is_valid():
        note = VariantNote.objects.create(
            user=request.user,
            date_saved=datetime.datetime.now(),
            project=project,
            note=form.cleaned_data['note_text'],
            xpos=form.cleaned_data['xpos'],
            ref=form.cleaned_data['ref'],
            alt=form.cleaned_data['alt'],
        )
        if family:
            note.family = family
            note.save()
        variant = get_datastore(project.project_id).get_single_variant(
            project.project_id,
            family.family_id,
            form.cleaned_data['xpos'],
            form.cleaned_data['ref'],
            form.cleaned_data['alt'],
        )
        add_extra_info_to_variants_family(get_reference(), family, [
            variant,
        ])
        ret = {
            'is_error': False,
            'variant': variant.toJSON(),
        }
    else:
        ret = {'is_error': True, 'error': server_utils.form_error_string(form)}
    return JSONResponse(ret)
Example #46
0
def add_variant_note(request):
    """

    """
    family = None
    if 'family_id' in request.GET:
        project, family = get_project_and_family_for_user(request.user, request.GET)
    else:
        project = utils.get_project_for_user(request.user, request.GET)

    form = api_forms.VariantNoteForm(project, request.GET)
    if form.is_valid():
        note = VariantNote.objects.create(
            user=request.user,
            date_saved=datetime.datetime.now(),
            project=project,
            note=form.cleaned_data['note_text'],
            xpos=form.cleaned_data['xpos'],
            ref=form.cleaned_data['ref'],
            alt=form.cleaned_data['alt'],
        )
        if family:
            note.family = family
            note.save()
        variant = get_datastore(project.project_id).get_single_variant(
            project.project_id,
            family.family_id,
            form.cleaned_data['xpos'],
            form.cleaned_data['ref'],
            form.cleaned_data['alt'],
        )
        add_extra_info_to_variants_family(get_reference(), family, [variant,])
        ret = {
            'is_error': False,
            'variant': variant.toJSON(),
        }
    else:
        ret = {
            'is_error': True,
            'error': server_utils.form_error_string(form)
        }
    return JSONResponse(ret)
Example #47
0
def family_variant_annotation(request):

    # TODO: this view not like the others - refactor to forms

    error = None

    for key in ['project_id', 'family_id', 'xpos', 'ref', 'alt']:
        if request.GET.get(key) is None:
            error = "%s is requred", key

    if not error:
        project = get_object_or_404(Project, project_id=request.GET.get('project_id'))
        family = get_object_or_404(Family, project=project, family_id=request.GET.get('family_id'))
        if not project.can_view(request.user):
            return PermissionDenied

    if not error:
        variant = get_datastore(project.project_id).get_single_variant(
            family.project.project_id,
            family.family_id,
            int(request.GET['xpos']),
            request.GET['ref'],
            request.GET['alt']
        )

        if not variant:
            error = "Variant does not exist"

    if not error:
        ret = {
            'variant': variant.toJSON(),
            'is_error': False,
            }

    else:
        ret = {
            'is_error': True,
            'error': error,
        }

    return JSONResponse(ret)
Example #48
0
File: views.py Project: rpete/seqr
def family_variant_annotation(request):

    # TODO: this view not like the others - refactor to forms

    error = None

    for key in ['project_id', 'family_id', 'xpos', 'ref', 'alt']:
        if request.GET.get(key) is None:
            error = "%s is requred", key

    if not error:
        project = get_object_or_404(Project, project_id=request.GET.get('project_id'))
        family = get_object_or_404(Family, project=project, family_id=request.GET.get('family_id'))
        if not project.can_view(request.user):
            return PermissionDenied

    if not error:
        variant = get_datastore(project.project_id).get_single_variant(
            family.project.project_id,
            family.family_id,
            int(request.GET['xpos']),
            request.GET['ref'],
            request.GET['alt']
        )

        if not variant:
            error = "Variant does not exist"

    if not error:
        ret = {
            'variant': variant.toJSON(),
            'is_error': False,
            }

    else:
        ret = {
            'is_error': True,
            'error': error,
        }

    return JSONResponse(ret)
Example #49
0
def edit_variant_tags(request):

    family = None
    if 'family_id' in request.GET:
        project, family = get_project_and_family_for_user(
            request.user, request.GET)
    else:
        project = utils.get_project_for_user(request.user, request.GET)

    form = api_forms.VariantTagsForm(project, request.GET)
    if form.is_valid():
        VariantTag.objects.filter(family=family,
                                  xpos=form.cleaned_data['xpos'],
                                  ref=form.cleaned_data['ref'],
                                  alt=form.cleaned_data['alt']).delete()
        for project_tag in form.cleaned_data['project_tags']:
            VariantTag.objects.create(
                project_tag=project_tag,
                family=family,
                xpos=form.cleaned_data['xpos'],
                ref=form.cleaned_data['ref'],
                alt=form.cleaned_data['alt'],
            )
        variant = get_datastore(project.project_id).get_single_variant(
            project.project_id,
            family.family_id,
            form.cleaned_data['xpos'],
            form.cleaned_data['ref'],
            form.cleaned_data['alt'],
        )
        add_extra_info_to_variants_family(get_reference(), family, [
            variant,
        ])
        ret = {
            'is_error': False,
            'variant': variant.toJSON(),
        }
    else:
        ret = {'is_error': True, 'error': server_utils.form_error_string(form)}
    return JSONResponse(ret)
Example #50
0
def family_variant_view(request, project_id, family_id):

    project = get_object_or_404(Project, project_id=project_id)
    family = get_object_or_404(Family, project=project, family_id=family_id)
    if not project.can_view(request.user):
        return HttpResponse('unauthorized')

    try:
        xpos = int(request.GET.get('xpos'))
        ref = request.GET.get('ref')
        alt = request.GET.get('alt')
    except:
        return HttpResponse('Invalid View')

    variant = get_datastore(project_id).get_single_variant(project_id, family_id, xpos, ref, alt)
    add_extra_info_to_variants_family(get_reference(), family, [variant])

    return render(request, 'family/family_variant_view.html', {
        'project': project,
        'family': family,
        'variant_json': json.dumps(variant.toJSON()),
    })
Example #51
0
def family_variant_view(request, project_id, family_id):

    project = get_object_or_404(Project, project_id=project_id)
    family = get_object_or_404(Family, project=project, family_id=family_id)
    if not project.can_view(request.user):
        return HttpResponse('unauthorized')

    try:
        xpos = int(request.GET.get('xpos'))
        ref = request.GET.get('ref')
        alt = request.GET.get('alt')
    except:
        return HttpResponse('Invalid View')

    variant = get_datastore(project_id).get_single_variant(project_id, family_id, xpos, ref, alt)
    add_extra_info_to_variants_family(get_reference(), family, [variant])

    return render(request, 'family/family_variant_view.html', {
        'project': project,
        'family': family,
        'variant_json': json.dumps(variant.toJSON()),
    })
Example #52
0
def edit_variant_tags(request):

    family = None
    if 'family_id' in request.GET:
        project, family = get_project_and_family_for_user(request.user, request.GET)
    else:
        project = utils.get_project_for_user(request.user, request.GET)

    form = api_forms.VariantTagsForm(project, request.GET)
    if form.is_valid():
        VariantTag.objects.filter(family=family, xpos=form.cleaned_data['xpos'], ref=form.cleaned_data['ref'], alt=form.cleaned_data['alt']).delete()
        for project_tag in form.cleaned_data['project_tags']:
            VariantTag.objects.create(
                user=request.user,
                date_saved=datetime.datetime.now(),
                project_tag=project_tag,
                family=family,
                xpos=form.cleaned_data['xpos'],
                ref=form.cleaned_data['ref'],
                alt=form.cleaned_data['alt'],
            )
        variant = get_datastore(project.project_id).get_single_variant(
            project.project_id,
            family.family_id,
            form.cleaned_data['xpos'],
            form.cleaned_data['ref'],
            form.cleaned_data['alt'],
        )
        add_extra_info_to_variants_family(get_reference(), family, [variant,])
        ret = {
            'is_error': False,
            'variant': variant.toJSON(),
        }
    else:
        ret = {
            'is_error': True,
            'error': server_utils.form_error_string(form)
        }
    return JSONResponse(ret)
    def transfer_project(self, from_project_id, destination_project_id):
        print("From: " + from_project_id)
        print("To: " + destination_project_id)

        from_project = Project.objects.get(project_id=from_project_id)
        destination_project = Project.objects.get(
            project_id=destination_project_id)

        # Make sure individuals are the same
        indivs_missing_from_dest_project = (set([
            i.indiv_id for i in Individual.objects.filter(project=from_project)
        ]) - set([
            i.indiv_id
            for i in Individual.objects.filter(project=destination_project)
        ]))
        if indivs_missing_from_dest_project:
            raise Exception("Individuals missing from dest project: " +
                            str(indivs_missing_from_dest_project))

        # update VCFs
        vcfs = from_project.families_by_vcf().keys()
        for vcf_file_path in vcfs:
            vcf_file = VCFFile.objects.get_or_create(
                file_path=os.path.abspath(vcf_file_path))[0]
            sample_management.add_vcf_file_to_project(destination_project,
                                                      vcf_file)
            print("Added %s to project %s" %
                  (vcf_file, destination_project.project_id))

        families_db = get_datastore(from_project_id)._db
        projects_db = get_project_datastore(from_project_id)._db

        print("==========")
        print("Checking Projects:")
        check_that_exists(projects_db.projects,
                          {'project_id': from_project_id},
                          not_more_than_one=True)
        check_that_exists(projects_db.projects,
                          {'project_id': destination_project_id},
                          not_more_than_one=True)
        print("==========")
        print("Checking Families:")
        check_that_exists(families_db.families,
                          {'project_id': from_project_id},
                          not_more_than_one=False)
        check_that_exists(families_db.families,
                          {'project_id': destination_project_id},
                          not_more_than_one=False)

        print("==========")
        print("Make Updates:")
        result = update(
            projects_db.projects, {'project_id': destination_project_id}, {
                'project_id': destination_project_id + '_previous1',
                'version': '1'
            })
        result = update(projects_db.projects, {'project_id': from_project_id},
                        {
                            'project_id': destination_project_id,
                            'version': '2'
                        })
        result = update(
            families_db.families, {'project_id': destination_project_id}, {
                'project_id': destination_project_id + '_previous1',
                'version': '1'
            })
        result = update(families_db.families, {'project_id': from_project_id},
                        {
                            'project_id': destination_project_id,
                            'version': '2'
                        })

        print("==========")
        print("Checking Projects:")
        check_that_exists(projects_db.projects,
                          {'project_id': destination_project_id},
                          not_more_than_one=True)

        print("==========")
        print("Checking Families:")
        check_that_exists(families_db.families,
                          {'project_id': destination_project_id},
                          not_more_than_one=False)

        update_family_analysis_status(destination_project_id)

        print("Data transfer finished.")
        i = raw_input("Delete the 'from' project: %s? [Y/n] " %
                      from_project_id)
        if i.strip() == 'Y':
            sample_management.delete_project(from_project_id)
            print("Project %s deleted" % from_project_id)
        else:
            print("Project not deleted")
Example #54
0
def calculate_cohort_gene_search(cohort, search_spec):
    """
    Calculate search results from the params in search_spec
    Should be called after cache is checked - this does all the computation
    Returns (is_error, genes) tuple
    """
    xcohort = cohort.xcohort()
    cohort_size = len(xcohort.individuals)
    indiv_id_list = xcohort.indiv_id_list()

    genes = []
    for gene_id, indivs_with_inheritance, gene_variation in cohort_get_genes_with_inheritance(
        get_datastore(cohort.project.project_id),
        get_reference(),
        xcohort,
        search_spec.inheritance_mode,
        search_spec.variant_filter,
        search_spec.quality_filter,
    ):

        num_hits = len(indivs_with_inheritance)

        # don't return genes with a single variant
        if num_hits < 2:
            continue

        try:
            start_pos, end_pos = get_reference().get_gene_bounds(gene_id)
            chr, start = genomeloc.get_chr_pos(start_pos)
            end = genomeloc.get_chr_pos(end_pos)[1]
        except KeyError:
            chr, start, end = None, None, None

        control_cohort = cohort.project.default_control_cohort if cohort.project.default_control_cohort else settings.DEFAULT_CONTROL_COHORT
        control_comparison = population_controls.control_comparison(
            control_cohort,
            gene_id,
            num_hits,
            cohort_size,
            search_spec.inheritance_mode,
            search_spec.variant_filter,
            search_spec.quality_filter
        )

        xgene = get_reference().get_gene(gene_id)
        if xgene is None:
            continue

        sys.stderr.write("     cohort_gene_search - found gene: %s, gene_id: %s \n" % (xgene['symbol'], gene_id, ))
        gene = {
            'gene_info': xgene,
            'gene_id': gene_id,
            'gene_name': xgene['symbol'],
            'num_hits': num_hits,
            'num_unique_variants': len(gene_variation.get_relevant_variants_for_indiv_ids(indiv_id_list)),
            'chr': chr,
            'start': start,
            'end': end,
            'control_comparison': control_comparison,
        }

        genes.append(gene)
    sys.stderr.write("     cohort_gene_search - finished. (cohort_genes_with_inheritance iterator)")
    return genes
Example #55
0
    def transfer_project(self, from_project_id, to_project_id):
        """
        The following models are transfered between projects.

        ProjectCollaborator => user = models.ForeignKey(User), project = models.ForeignKey('base.Project'), collaborator_type = models.CharField(max_length=20, choices=COLLABORATOR_TYPES, default="collaborator")
        Project => (private_reference_populations = models.ManyToManyField(ReferencePopulation), gene_lists = models.ManyToManyField('gene_lists.GeneList', through='ProjectGeneList'))
        Family => Project,
        FamilyGroup => Project   (families = models.ManyToManyField(Family))
        FamilyImageSlide => Family
        Cohort => Project  (individuals = models.ManyToManyField('base.Individual'), vcf_files, bam_file)
        Individual => Project, Family  # vcf_files = models.ManyToManyField(VCFFile, null=True, blank=True), bam_file = models.ForeignKey('datasets.BAMFile', null=True, blank=True)
        FamilySearchFlag => User, Family
        CausalVariant => Family
        ProjectTag => Project
        VariantTag => ProjectTag, Family
        VariantNote => User, Project
        IndividualPhenotype => Individual, ProjectPhenotype
        ProjectPhenotype => Project
        """

        families_db = get_datastore()._db

        # Project
        from_project = Project.objects.get(project_id=from_project_id)
        to_project, created = Project.objects.get_or_create(project_id=to_project_id)
        if created:
            print("Created project: " + str(to_project))
        to_project.description = from_project.description
        to_project.save()

        # ProjectCollaborator
        #for c in ProjectCollaborator.objects.filter(project=from_project):
        #    ProjectCollaborator.objects.get_or_create(project=to_project, user=c.user, collaborator_type=c.collaborator_type)

        # Reference Populations
        for reference_population in from_project.private_reference_populations.all():
            print("Adding private reference population: " + reference_population.slug)
            to_project.private_reference_populations.add(reference_population)
            to_project.save()

        # Family
        to_family_id_to_family = {} # maps family_id to the to_family object
        for from_f in Family.objects.filter(project=from_project):

            to_f, created = Family.objects.get_or_create(project=to_project, family_id=from_f.family_id)
            if not created:
                print("Matched family ids %s (%s) to %s (%s)" % (from_f.family_id, from_f.short_description, to_f.family_id, to_f.short_description))

            to_family_id_to_family[to_f.family_id] = to_f

            to_f.family_name = from_f.family_name
            to_f.short_description = from_f.short_description

            to_f.about_family_content = from_f.about_family_content
            to_f.analysis_summary_content = from_f.analysis_summary_content
            to_f.coded_phenotype = from_f.coded_phenotype
            to_f.post_discovery_omim_number = from_f.post_discovery_omim_number

            to_f.pedigree_image = from_f.pedigree_image
            to_f.pedigree_image_height = from_f.pedigree_image_height
            to_f.pedigree_image_width = from_f.pedigree_image_width

            to_f.analysis_status = from_f.analysis_status
            to_f.analysis_status_date_saved = from_f.analysis_status_date_saved
            to_f.analysis_status_saved_by = from_f.analysis_status_saved_by
            to_f.causal_inheritance_mode = from_f.causal_inheritance_mode

            to_f.internal_case_review_notes = from_f.internal_case_review_notes
            to_f.internal_case_review_brief_summary = from_f.internal_case_review_brief_summary

            to_f.save()

            update(
                families_db.families, 
                {'project_id': to_project.project_id, 'family_id': to_f.family_id },
                { 
                    "status" : "loaded", 
                    "family_id" : to_f.family_id, 
                    "individuals" : [i.indiv_id for i in Individual.objects.filter(project=from_project, family=from_f)],
                    "coll_name" : "family_%s_%s" % (from_project.project_id, from_f.family_id), 
                    "project_id" : to_project.project_id
                },
                upsert=True
            )
            

        # FamilyGroup
        for from_fg in FamilyGroup.objects.filter(project=from_project):
            FamilyGroup.objects.get_or_create(project=to_project, slug=from_fg.slug, name=from_fg.name, description=from_fg.description)

        # FamilyImageSlide
        #for from_family in Family.objects.filter(project=from_project):
        # TODO - need to iterate over image slides of from_family, and link to image slides of to_family
        #        FamilyImageSlide.objects.get_or_create(family=to_family, )


        # Cohort
        #cohorts = list(Cohort.objects.filter(project=project))
        #output_obj += cohorts


        # Individual
        for from_family in Family.objects.filter(project=from_project):
            to_family = to_family_id_to_family[from_family.family_id]

            for from_i in Individual.objects.filter(project=from_project, family=from_family):
                to_i, created = Individual.objects.get_or_create(project=to_project, family=to_family, indiv_id=from_i.indiv_id)

                if not created:
                    print("matched existing individual: " + str(from_i.indiv_id) + " in family " + from_family.family_id)

                to_i.created_date = from_i.created_date

                to_i.affected = from_i.affected

                to_i.phenotips_id = from_i.phenotips_id
                to_i.phenotips_data = from_i.phenotips_data

                to_i.case_review_status = from_i.case_review_status

                to_i.mean_target_coverage = from_i.mean_target_coverage
                to_i.coverage_status = from_i.coverage_status
                to_i.bam_file_path = from_i.bam_file_path
                to_i.vcf_id = from_i.vcf_id

                to_i.gender = from_i.gender

                to_i.in_case_review = from_i.in_case_review
                

                to_i.nickname = from_i.nickname
                to_i.maternal_id = from_i.maternal_id
                to_i.paternal_id = from_i.paternal_id

                to_i.other_notes = from_i.other_notes

                for vcf_file in from_i.vcf_files.all():
                    if vcf_file not in to_i.vcf_files.all():
                        to_i.vcf_files.add(vcf_file)

                to_i.save()


            for from_v in CausalVariant.objects.filter(family=from_family):
                CausalVariant.objects.get_or_create(
                    family = to_family,
                    variant_type=from_v.variant_type,
                    xpos=from_v.xpos,
                    ref=from_v.ref,
                    alt=from_v.alt)

        for from_vn in VariantNote.objects.filter(project=from_project):
            if from_vn.family.family_id not in to_family_id_to_family:
                print("Skipping note: " + str(from_vn.toJSON()))
                continue
            to_family = to_family_id_to_family[from_vn.family.family_id]
            VariantNote.objects.get_or_create(
                project=to_project,
                family=to_family,
                user=from_vn.user,
                date_saved=from_vn.date_saved,
                note=from_vn.note,
                xpos=from_vn.xpos,
                ref=from_vn.ref,
                alt=from_vn.alt)

        for from_ptag in ProjectTag.objects.filter(project=from_project):
            to_ptag, created = ProjectTag.objects.get_or_create(project=to_project, tag=from_ptag.tag, title=from_ptag.title, color=from_ptag.color)
            for from_vtag in VariantTag.objects.filter(project_tag=from_ptag):
                if from_vtag.family.family_id not in to_family_id_to_family:
                    print("Skipping tag: " + str(from_vtag.xpos))
                    continue


                to_family = to_family_id_to_family[from_vtag.family.family_id]
                VariantTag.objects.get_or_create(
                    family=to_family,
                    project_tag=to_ptag,
                    xpos=from_vtag.xpos,
                    ref=from_vtag.ref,
                    alt=from_vtag.alt)


        for project_gene_list in ProjectGeneList.objects.filter(project=from_project):
            project_gene_list, created = ProjectGeneList.objects.get_or_create(project=to_project, gene_list=project_gene_list.gene_list)
Example #56
0
def calculate_mendelian_variant_search(search_spec, family, user=None):
    xfamily = family.xfamily()
    project = family.project
    variants = None
    if search_spec.search_mode == 'standard_inheritance':
        variants = list(get_variants_with_inheritance_mode(
            get_mall(project),
            xfamily,
            search_spec.inheritance_mode,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.quality_filter,
            user=user,
        ))

    elif search_spec.search_mode == 'custom_inheritance':
        variants = list(get_variants_family(
            get_datastore(project),
            xfamily,
            genotype_filter=search_spec.genotype_inheritance_filter,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.quality_filter,
            user=user,
        ))

    elif search_spec.search_mode == 'gene_burden':
        gene_stream = get_genes_family(
            get_datastore(project),
            get_reference(),
            xfamily,
            burden_filter=search_spec.gene_burden_filter,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.quality_filter,
            user=user,
        )

        variants = list(stream_utils.gene_stream_to_variant_stream(gene_stream, get_reference()))

    elif search_spec.search_mode == 'allele_count':
        variants = list(get_variants_allele_count(
            get_datastore(project),
            xfamily,
            search_spec.allele_count_filter,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.quality_filter,
            user=user,
        ))

    elif search_spec.search_mode == 'all_variants':
        variants = list(get_variants_family(
            get_datastore(project),
            xfamily,
            variant_filter=search_spec.variant_filter,
            quality_filter=search_spec.quality_filter,
            indivs_to_consider=xfamily.indiv_id_list(),
            user=user,
        ))

    for variant in variants:
        variant.set_extra('family_id', family.family_id)

    return variants
Example #57
0
def add_dataset_handler(request, project_guid):
    """Create or update samples for the given dataset

    Args:
        request: Django request object
        project_guid (string): GUID of the project that should be updated

    HTTP POST
        Request body - should contain the following json structure:
        {
            'sampleType':  <"WGS", "WES", or "RNA"> (required)
            'datasetType': <"VARIANTS", or "ALIGN"> (required)
            'elasticsearchIndex': <String>
            'datasetPath': <String>
            'datasetName': <String>
            'ignoreExtraSamplesInCallset': <Boolean>
            'mappingFile': { 'uploadedFileId': <Id for temporary uploaded file> }
        }

        Response body - will contain the following structure:

    """

    logger.info("add_dataset_handler: " + str(request))

    project = get_project_and_check_permissions(project_guid,
                                                request.user,
                                                permission_level=CAN_EDIT)

    request_json = json.loads(request.body)

    logger.info("add_dataset_handler: received %s" % pformat(request_json))

    required_fields = ['sampleType', 'datasetType']
    if any(field not in request_json for field in required_fields):
        raise ValueError("request must contain fields: {}".format(
            ', '.join(required_fields)))

    sample_type = request_json['sampleType']
    dataset_type = request_json['datasetType']
    elasticsearch_index = request_json.get('elasticsearchIndex')
    if elasticsearch_index:
        elasticsearch_index = elasticsearch_index.strip()
    dataset_path = request_json.get('datasetPath')
    if dataset_path:
        dataset_path = dataset_path.strip()
    dataset_name = request_json.get('datasetName')
    if dataset_name:
        dataset_name = dataset_name.strip()

    ignore_extra_samples_in_callset = request_json.get(
        'ignoreExtraSamplesInCallset')
    ignore_missing_family_members = request_json.get(
        'ignoreMissingFamilyMembers')
    mapping_file_id = request_json.get('mappingFile', {}).get('uploadedFileId')
    mapping_file_path = request_json.get('mappingFilePath')

    try:
        updated_samples, created_sample_ids = add_dataset(
            project=project,
            sample_type=sample_type,
            dataset_type=dataset_type,
            elasticsearch_index=elasticsearch_index,
            dataset_path=dataset_path,
            dataset_name=dataset_name,
            max_edit_distance=0,
            ignore_extra_samples_in_callset=ignore_extra_samples_in_callset,
            ignore_missing_family_members=ignore_missing_family_members,
            mapping_file_path=mapping_file_path,
            mapping_file_id=mapping_file_id,
        )

        # update VCFFile records
        if updated_samples:
            if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS:
                base_project = BaseProject.objects.get(seqr_project=project)
                get_datastore(base_project).bust_project_cache(
                    base_project.project_id)
                clear_project_results_cache(base_project.project_id)

                vcf_file = VCFFile.objects.filter(
                    project=base_project,
                    dataset_type=dataset_type,
                    sample_type=sample_type,
                    elasticsearch_index=elasticsearch_index).order_by(
                        '-pk').first()

                if not vcf_file:
                    vcf_file = VCFFile.objects.create(
                        project=base_project,
                        dataset_type=dataset_type,
                        sample_type=sample_type,
                        elasticsearch_index=elasticsearch_index,
                    )
                    logger.info("Created vcf file: " + str(vcf_file.__dict__))

                vcf_file.file_path = dataset_path or "{}.vcf.gz".format(
                    elasticsearch_index
                )  # legacy VCFFile model requires non-empty vcf path
                vcf_file.loaded_date = iter(updated_samples).next().loaded_date
                vcf_file.save()

                for indiv in [s.individual for s in updated_samples]:
                    for base_indiv in BaseIndividual.objects.filter(
                            seqr_individual=indiv).only('id'):
                        base_indiv.vcf_files.add(vcf_file)

            elif dataset_type == Sample.DATASET_TYPE_READ_ALIGNMENTS:
                for sample in updated_samples:
                    for base_indiv in BaseIndividual.objects.filter(
                            seqr_individual=sample.individual).only('id'):
                        base_indiv.bam_file_path = sample.dataset_file_path
                        base_indiv.save()

        updated_sample_json = get_json_for_samples(updated_samples,
                                                   project_guid=project_guid)
        response = {
            'samplesByGuid': {s['sampleGuid']: s
                              for s in updated_sample_json}
        }
        updated_individuals = {
            s['individualGuid']
            for s in updated_sample_json if s['sampleId'] in created_sample_ids
        }
        if updated_individuals:
            individuals = Individual.objects.filter(
                guid__in=updated_individuals).prefetch_related(
                    'sample_set', 'family').only('guid')
            response['individualsByGuid'] = {
                ind.guid: {
                    'sampleGuids':
                    [s.guid for s in ind.sample_set.only('guid').all()]
                }
                for ind in individuals
            }

            for ind in individuals:
                family = ind.family
                if family.analysis_status == Family.ANALYSIS_STATUS_WAITING_FOR_DATA:
                    update_seqr_model(family,
                                      analysis_status=Family.
                                      ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS)

        return create_json_response(response)
    except Exception as e:
        traceback.print_exc()
        return create_json_response({'errors': [e.message or str(e)]},
                                    status=400)
Example #58
0
 def x_variant(self):
     v = get_datastore(self.family.project.project_id).get_single_variant(self.family.project.project_id, self.family.family_id, self.xpos, self.ref, self.alt)
     return v