def handle(self, *args, **options): ANALYSIS_TEMPLATES_DIR = os.path.join(settings.BASE_DIR, "analysis", "data", "analysis_templates") user = admin_bot() genome_build = GenomeBuild.grch37() # Doesn't matter for templates for filename in glob.glob(f"{ANALYSIS_TEMPLATES_DIR}/*.json"): print(filename) analysis = analysis_import(user, genome_build, filename) analysis.template_type = AnalysisTemplateType.TEMPLATE analysis.visible = False analysis.save() add_public_group_read_permission(analysis) analysis_template = AnalysisTemplate.objects.create( name=analysis.name, user=user, analysis=analysis) analysis_snapshot = analysis.clone() analysis_snapshot.template_type = AnalysisTemplateType.SNAPSHOT analysis_snapshot.visible = False analysis_snapshot.save() add_public_group_read_permission(analysis_snapshot) analysis_name_template = "%(template)s for %(input)s" AnalysisTemplateVersion.objects.create( template=analysis_template, version=1, analysis_name_template=analysis_name_template, analysis_snapshot=analysis_snapshot) print(f"Created template: {analysis_template}")
def _liftover_variant_tag(variant_tag: VariantTag): genome_build = variant_tag.analysis.genome_build populate_clingen_alleles_for_variants(genome_build, [variant_tag.variant]) variant_allele = VariantAllele.objects.get(variant=variant_tag.variant, genome_build=genome_build) allele_source = VariantAlleleSource.objects.create( variant_allele=variant_allele) create_liftover_pipelines(admin_bot(), allele_source, ImportSource.WEB, genome_build)
def handle(self, *args, **options): user = admin_bot() count = 0 lab_changes = Counter() for classification in Classification.objects.filter( evidence__allele_frequency__isnull=False): old_value = classification.get("allele_frequency") if old_value is not None: # Get out dict - so we can look at and store notes value_obj = dict( classification.evidence.get("allele_frequency")) existing_note = value_obj.get("note") if existing_note: if "Converted from" in existing_note: continue # Already run try: to_value = float(old_value) / 100 except ValueError: # Someone had entered "0..2" # Someone entered '34%' value = old_value.replace("..", ".").replace("%", "") if old_value == "4.2 (46% in publication)": value = "4.2" try: to_value = float(value) / 100 except ValueError: logging.error("Couldn't convert classification: %d", classification.pk) raise value_obj["value"] = to_value notes = [] existing_note = value_obj.get("note") if existing_note: notes.append(existing_note) notes.append(f"Converted from '{old_value}'%") value_obj["note"] = "\n".join(notes) patch = { SpecialEKeys.ALLELE_FREQUENCY: value_obj, } classification.revalidate(user, migration_patch=patch) lab_changes[classification.lab.name] += 1 count += 1 if count % 100 == 0: print(f"Processed {count} records") print("Classifications changed per lab:") print(lab_changes)
def handle(self, *args, **options): user = admin_bot() modified_classifications = [] for classification in Classification.objects.filter(evidence__variant_coordinate__value__icontains=".."): modified_classifications.append(str(classification.pk)) patch = { SpecialEKeys.VARIANT_COORDINATE: classification.variant.full_string } classification.patch_value(patch=patch, source=SubmissionSource.VARIANT_GRID, save=True, user=user) classification.revalidate(user) print(f"Modified {len(modified_classifications)} classifications: {','.join(modified_classifications)}")
def create(analysis_template: AnalysisTemplate, genome_build: GenomeBuild, user: User = None): if user is None: user = admin_bot() template_version = analysis_template.active analysis = template_version.analysis_snapshot.clone() analysis.user = user analysis.genome_build = genome_build analysis.annotation_version = AnnotationVersion.latest(genome_build, validate=True) analysis.template_type = None analysis.visible = True analysis.name = f"TemplateRun from {analysis_template.name}" # Will be set in populate arguments analysis.save() assign_permission_to_user_and_groups(user, analysis) return AnalysisTemplateRun.objects.create( template_version=template_version, analysis=analysis)
def handle(self, *args, **options): for genome_build in GenomeBuild.builds_with_annotation(): variant_qs = Variant.objects.filter( Variant.get_contigs_q(genome_build), varianttag__isnull=False) populate_clingen_alleles_for_variants( genome_build, variant_qs) # Will add VariantAlleles va_collection = VariantAlleleCollectionSource.objects.create( genome_build=genome_build) records = [] for va in VariantAllele.objects.filter( variant__in=variant_qs): # VariantAlleles added above records.append( VariantAlleleCollectionRecord(collection=va_collection, variant_allele=va)) if records: VariantAlleleCollectionRecord.objects.bulk_create( records, batch_size=2000) create_liftover_pipelines(admin_bot(), va_collection, ImportSource.COMMAND_LINE, genome_build)
def handle(self, *args, **options): script = __file__ add_clingen_allele = options["add_clingen_allele"] for genome_build in GenomeBuild.builds_with_annotation(): defaults = {"git_hash": Git(settings.BASE_DIR).hash} allele_source, _ = AllClassificationsAlleleSource.objects.get_or_create( script=script, genome_build=genome_build, defaults=defaults) variants_qs = allele_source.get_variants_qs() if variants_qs.count(): print( f"{genome_build} has variants - creating Allele/ClinGen + liftover" ) populate_clingen_alleles_for_variants(genome_build, variants_qs) create_liftover_pipelines(admin_bot(), allele_source, ImportSource.COMMAND_LINE, genome_build) if add_clingen_allele: # Patch those ClinGen alleles into the variant classifications num_added_clingen_allele = 0 clingen_allele_key_null = "evidence__%s__isnull" % SpecialEKeys.CLINGEN_ALLELE_ID for vc in Classification.objects.filter( variant__in=variants_qs, **{clingen_allele_key_null: True}): _, evidence_value, _ = get_clingen_allele_and_evidence_value_for_variant( genome_build, vc.variant) vc.patch_value( {SpecialEKeys.CLINGEN_ALLELE_ID: evidence_value}, source=SubmissionSource.VARIANT_GRID) vc.save() num_added_clingen_allele += 1 print( f"Added {SpecialEKeys.CLINGEN_ALLELE_ID} to {num_added_clingen_allele} classifications" )
def handle(self, *args, **options): filename = options["var_citations_txt"] user = admin_bot() df = pd.read_csv(filename, sep='\t', index_col=None) for col in [ALLELE_ID, VARIATION_ID, CITATION_SOURCE, CITATION_ID]: if col not in df.columns: msg = f"Expected column '{col}' in tsv from {CITATIONS_URL}" raise ValueError(msg) logging.info("Deleting existing ClinVarCitations") UploadedClinVarCitations.objects.all().delete() md5_hash = file_md5sum(filename) uploaded_file = UploadedFile.objects.create( path=filename, import_source=ImportSource.COMMAND_LINE, name='ClinVar citations', user=user, file_type=UploadedFileTypes.CLINVAR_CITATIONS) clinvar_citations_collection = ClinVarCitationsCollection.objects.create( ) UploadedClinVarCitations.objects.create( md5_hash=md5_hash, uploaded_file=uploaded_file, clinvar_citations_collection=clinvar_citations_collection) existing_citations = {} citation = None for citation in Citation.objects.all().order_by("pk"): existing_citations[citation.unique_code()] = citation if citation: max_previously_existing_citation_id = citation.pk # as qs above is in PK order else: max_previously_existing_citation_id = 0 citation_sources = invert_dict(dict(CitationSource.choices)) new_citations_by_key = {} for _, row in df.iterrows(): #print("row: %s" % row) cs = row[CITATION_SOURCE] citation_source = citation_sources[cs] citation = Citation(citation_source=citation_source, citation_id=row[CITATION_ID]) key = citation.unique_code() if key not in existing_citations: new_citations_by_key[key] = citation # Insert the new citations logging.info("Inserting %d citations", len(new_citations_by_key)) Citation.objects.bulk_create(new_citations_by_key.values(), batch_size=2000) # Update hash for citation in Citation.objects.filter( pk__gt=max_previously_existing_citation_id): existing_citations[citation.unique_code()] = citation # Insert ClinVar citations rows = [] for _, row in df.iterrows(): cs = row[CITATION_SOURCE] citation_source = citation_sources[cs] wanted_citation = Citation(citation_source=citation_source, citation_id=row[CITATION_ID]) citation = existing_citations[ wanted_citation.unique_code()] # Will die if not there cvc = ClinVarCitation( clinvar_citations_collection=clinvar_citations_collection, clinvar_variation_id=row[VARIATION_ID], clinvar_allele_id=row[ALLELE_ID], citation=citation) rows.append(cvc) logging.info("Read %d records, inserting into DB", len(rows)) ClinVarCitation.objects.bulk_create(rows, batch_size=2000)