Example #1
0
    def handle(self, *args, **options):
        ANALYSIS_TEMPLATES_DIR = os.path.join(settings.BASE_DIR, "analysis",
                                              "data", "analysis_templates")

        user = admin_bot()
        genome_build = GenomeBuild.grch37()  # Doesn't matter for templates

        for filename in glob.glob(f"{ANALYSIS_TEMPLATES_DIR}/*.json"):
            print(filename)
            analysis = analysis_import(user, genome_build, filename)
            analysis.template_type = AnalysisTemplateType.TEMPLATE
            analysis.visible = False
            analysis.save()
            add_public_group_read_permission(analysis)

            analysis_template = AnalysisTemplate.objects.create(
                name=analysis.name, user=user, analysis=analysis)

            analysis_snapshot = analysis.clone()
            analysis_snapshot.template_type = AnalysisTemplateType.SNAPSHOT
            analysis_snapshot.visible = False
            analysis_snapshot.save()
            add_public_group_read_permission(analysis_snapshot)

            analysis_name_template = "%(template)s for %(input)s"
            AnalysisTemplateVersion.objects.create(
                template=analysis_template,
                version=1,
                analysis_name_template=analysis_name_template,
                analysis_snapshot=analysis_snapshot)

            print(f"Created template: {analysis_template}")
Example #2
0
def _liftover_variant_tag(variant_tag: VariantTag):
    genome_build = variant_tag.analysis.genome_build
    populate_clingen_alleles_for_variants(genome_build, [variant_tag.variant])
    variant_allele = VariantAllele.objects.get(variant=variant_tag.variant,
                                               genome_build=genome_build)
    allele_source = VariantAlleleSource.objects.create(
        variant_allele=variant_allele)
    create_liftover_pipelines(admin_bot(), allele_source, ImportSource.WEB,
                              genome_build)
Example #3
0
    def handle(self, *args, **options):
        user = admin_bot()

        count = 0
        lab_changes = Counter()
        for classification in Classification.objects.filter(
                evidence__allele_frequency__isnull=False):
            old_value = classification.get("allele_frequency")
            if old_value is not None:
                # Get out dict - so we can look at and store notes
                value_obj = dict(
                    classification.evidence.get("allele_frequency"))
                existing_note = value_obj.get("note")
                if existing_note:
                    if "Converted from" in existing_note:
                        continue  # Already run

                try:
                    to_value = float(old_value) / 100
                except ValueError:
                    # Someone had entered "0..2"
                    # Someone entered '34%'
                    value = old_value.replace("..", ".").replace("%", "")
                    if old_value == "4.2 (46% in publication)":
                        value = "4.2"

                    try:
                        to_value = float(value) / 100
                    except ValueError:
                        logging.error("Couldn't convert classification: %d",
                                      classification.pk)
                        raise

                value_obj["value"] = to_value
                notes = []
                existing_note = value_obj.get("note")
                if existing_note:
                    notes.append(existing_note)
                notes.append(f"Converted from '{old_value}'%")
                value_obj["note"] = "\n".join(notes)
                patch = {
                    SpecialEKeys.ALLELE_FREQUENCY: value_obj,
                }
                classification.revalidate(user, migration_patch=patch)

                lab_changes[classification.lab.name] += 1
                count += 1

                if count % 100 == 0:
                    print(f"Processed {count} records")

        print("Classifications changed per lab:")
        print(lab_changes)
Example #4
0
    def handle(self, *args, **options):
        user = admin_bot()

        modified_classifications = []
        for classification in Classification.objects.filter(evidence__variant_coordinate__value__icontains=".."):
            modified_classifications.append(str(classification.pk))
            patch = {
                SpecialEKeys.VARIANT_COORDINATE: classification.variant.full_string
            }
            classification.patch_value(patch=patch,
                                       source=SubmissionSource.VARIANT_GRID,
                                       save=True,
                                       user=user)
            classification.revalidate(user)

        print(f"Modified {len(modified_classifications)} classifications: {','.join(modified_classifications)}")
Example #5
0
    def create(analysis_template: AnalysisTemplate,
               genome_build: GenomeBuild,
               user: User = None):
        if user is None:
            user = admin_bot()

        template_version = analysis_template.active
        analysis = template_version.analysis_snapshot.clone()
        analysis.user = user
        analysis.genome_build = genome_build
        analysis.annotation_version = AnnotationVersion.latest(genome_build,
                                                               validate=True)
        analysis.template_type = None
        analysis.visible = True
        analysis.name = f"TemplateRun from {analysis_template.name}"  # Will be set in populate arguments
        analysis.save()

        assign_permission_to_user_and_groups(user, analysis)
        return AnalysisTemplateRun.objects.create(
            template_version=template_version, analysis=analysis)
    def handle(self, *args, **options):
        for genome_build in GenomeBuild.builds_with_annotation():
            variant_qs = Variant.objects.filter(
                Variant.get_contigs_q(genome_build), varianttag__isnull=False)
            populate_clingen_alleles_for_variants(
                genome_build, variant_qs)  # Will add VariantAlleles

            va_collection = VariantAlleleCollectionSource.objects.create(
                genome_build=genome_build)
            records = []
            for va in VariantAllele.objects.filter(
                    variant__in=variant_qs):  # VariantAlleles added above
                records.append(
                    VariantAlleleCollectionRecord(collection=va_collection,
                                                  variant_allele=va))

            if records:
                VariantAlleleCollectionRecord.objects.bulk_create(
                    records, batch_size=2000)
            create_liftover_pipelines(admin_bot(), va_collection,
                                      ImportSource.COMMAND_LINE, genome_build)
Example #7
0
    def handle(self, *args, **options):
        script = __file__
        add_clingen_allele = options["add_clingen_allele"]
        for genome_build in GenomeBuild.builds_with_annotation():
            defaults = {"git_hash": Git(settings.BASE_DIR).hash}
            allele_source, _ = AllClassificationsAlleleSource.objects.get_or_create(
                script=script, genome_build=genome_build, defaults=defaults)
            variants_qs = allele_source.get_variants_qs()
            if variants_qs.count():
                print(
                    f"{genome_build} has variants - creating Allele/ClinGen + liftover"
                )
                populate_clingen_alleles_for_variants(genome_build,
                                                      variants_qs)
                create_liftover_pipelines(admin_bot(), allele_source,
                                          ImportSource.COMMAND_LINE,
                                          genome_build)

                if add_clingen_allele:
                    # Patch those ClinGen alleles into the variant classifications
                    num_added_clingen_allele = 0
                    clingen_allele_key_null = "evidence__%s__isnull" % SpecialEKeys.CLINGEN_ALLELE_ID
                    for vc in Classification.objects.filter(
                            variant__in=variants_qs,
                            **{clingen_allele_key_null: True}):
                        _, evidence_value, _ = get_clingen_allele_and_evidence_value_for_variant(
                            genome_build, vc.variant)
                        vc.patch_value(
                            {SpecialEKeys.CLINGEN_ALLELE_ID: evidence_value},
                            source=SubmissionSource.VARIANT_GRID)
                        vc.save()
                        num_added_clingen_allele += 1

                    print(
                        f"Added {SpecialEKeys.CLINGEN_ALLELE_ID} to {num_added_clingen_allele} classifications"
                    )
Example #8
0
    def handle(self, *args, **options):
        filename = options["var_citations_txt"]
        user = admin_bot()

        df = pd.read_csv(filename, sep='\t', index_col=None)
        for col in [ALLELE_ID, VARIATION_ID, CITATION_SOURCE, CITATION_ID]:
            if col not in df.columns:
                msg = f"Expected column '{col}' in tsv from {CITATIONS_URL}"
                raise ValueError(msg)

        logging.info("Deleting existing ClinVarCitations")
        UploadedClinVarCitations.objects.all().delete()

        md5_hash = file_md5sum(filename)
        uploaded_file = UploadedFile.objects.create(
            path=filename,
            import_source=ImportSource.COMMAND_LINE,
            name='ClinVar citations',
            user=user,
            file_type=UploadedFileTypes.CLINVAR_CITATIONS)

        clinvar_citations_collection = ClinVarCitationsCollection.objects.create(
        )
        UploadedClinVarCitations.objects.create(
            md5_hash=md5_hash,
            uploaded_file=uploaded_file,
            clinvar_citations_collection=clinvar_citations_collection)

        existing_citations = {}
        citation = None
        for citation in Citation.objects.all().order_by("pk"):
            existing_citations[citation.unique_code()] = citation

        if citation:
            max_previously_existing_citation_id = citation.pk  # as qs above is in PK order
        else:
            max_previously_existing_citation_id = 0

        citation_sources = invert_dict(dict(CitationSource.choices))
        new_citations_by_key = {}
        for _, row in df.iterrows():
            #print("row: %s" % row)
            cs = row[CITATION_SOURCE]
            citation_source = citation_sources[cs]

            citation = Citation(citation_source=citation_source,
                                citation_id=row[CITATION_ID])

            key = citation.unique_code()
            if key not in existing_citations:
                new_citations_by_key[key] = citation

        # Insert the new citations
        logging.info("Inserting %d citations", len(new_citations_by_key))
        Citation.objects.bulk_create(new_citations_by_key.values(),
                                     batch_size=2000)

        # Update hash
        for citation in Citation.objects.filter(
                pk__gt=max_previously_existing_citation_id):
            existing_citations[citation.unique_code()] = citation

        # Insert ClinVar citations
        rows = []
        for _, row in df.iterrows():
            cs = row[CITATION_SOURCE]
            citation_source = citation_sources[cs]

            wanted_citation = Citation(citation_source=citation_source,
                                       citation_id=row[CITATION_ID])
            citation = existing_citations[
                wanted_citation.unique_code()]  # Will die if not there

            cvc = ClinVarCitation(
                clinvar_citations_collection=clinvar_citations_collection,
                clinvar_variation_id=row[VARIATION_ID],
                clinvar_allele_id=row[ALLELE_ID],
                citation=citation)
            rows.append(cvc)

        logging.info("Read %d records, inserting into DB", len(rows))
        ClinVarCitation.objects.bulk_create(rows, batch_size=2000)