def handle(self, *args, **options): ANALYSIS_TEMPLATES_DIR = os.path.join(settings.BASE_DIR, "analysis", "data", "analysis_templates") user = admin_bot() genome_build = GenomeBuild.grch37() # Doesn't matter for templates for filename in glob.glob(f"{ANALYSIS_TEMPLATES_DIR}/*.json"): print(filename) analysis = analysis_import(user, genome_build, filename) analysis.template_type = AnalysisTemplateType.TEMPLATE analysis.visible = False analysis.save() add_public_group_read_permission(analysis) analysis_template = AnalysisTemplate.objects.create( name=analysis.name, user=user, analysis=analysis) analysis_snapshot = analysis.clone() analysis_snapshot.template_type = AnalysisTemplateType.SNAPSHOT analysis_snapshot.visible = False analysis_snapshot.save() add_public_group_read_permission(analysis_snapshot) analysis_name_template = "%(template)s for %(input)s" AnalysisTemplateVersion.objects.create( template=analysis_template, version=1, analysis_name_template=analysis_name_template, analysis_snapshot=analysis_snapshot) print(f"Created template: {analysis_template}")
def annotation_scheduler(): """ This is run on scheduling_single_worker queue to avoid race conditions """ LOCK_EXPIRE = 60 * 5 # 5 minutes lock_id = "annotation-scheduler-lock" # cache.add fails if if the key already exists acquire_lock = lambda: cache.add(lock_id, "true", LOCK_EXPIRE) release_lock = lambda: cache.delete(lock_id) try: if acquire_lock(): try: logging.info("Got the lock for annotation scheduler") for genome_build in GenomeBuild.builds_with_annotation(): annotation_version = AnnotationVersion.latest(genome_build) variant_annotation_version = annotation_version.variant_annotation_version while True: range_lock = _handle_variant_annotation_version( variant_annotation_version) if range_lock is None: break finally: logging.info("Releasing lock") release_lock() else: logging.info("Someone else has %s", lock_id) except: log_traceback()
def get(self, request, *args, **kwargs): genome_build_name = self.kwargs['genome_build_name'] variant_string = self.kwargs['variant_string'] genome_build = GenomeBuild.get_name_or_alias(genome_build_name) variant = Variant.get_from_string(variant_string, genome_build) if variant is None: raise Http404(variant_string) vav = VariantAnnotationVersion.latest(genome_build) va = variant.variantannotation_set.get(version=vav) serializer = VariantAnnotationSerializer(va) return Response(serializer.data)
def create_analysis_from_template(request, genome_build_name): data = request.POST.dict() tag_uuid = data.pop("tag_uuid") analysis_template_key = f"{tag_uuid}-analysis_template" analysis_template_name = data.pop(analysis_template_key) analysis_template = AnalysisTemplate.get_for_user(request.user, analysis_template_name) genome_build = GenomeBuild.get_name_or_alias(genome_build_name) template_run = AnalysisTemplateRun.create(analysis_template, genome_build, user=request.user) template_run.populate_arguments(data) populate_analysis_from_template_run(template_run) return view_active_node(template_run.analysis, None)
def annotation_versions(request): anno_versions = {} # Create VariantAnnotationVersion for build if not exists for genome_build in GenomeBuild.builds_with_annotation(): try: get_variant_annotation_version(genome_build) except: log_traceback() qs = AnnotationVersion.objects.filter( genome_build=genome_build).order_by("-annotation_date") vep_command = get_vep_command("in.vcf", "out.vcf", genome_build, genome_build.annotation_consortium) vep_command = " ".join(vep_command).replace(" -", "\n") anno_versions[genome_build.name] = (vep_command, qs) context = {"annotation_versions": anno_versions} return render(request, "annotation/annotation_versions.html", context)
def handle(self, *args, **options): f = sys.stdout genome_build = GenomeBuild.get_name_or_alias("GRCh37") header_lines = get_vcf_header_from_contigs(genome_build, {}) for line in header_lines: f.write(line + '\n') num_variants = options["num_variants"] rows = [] for _ in range(num_variants): ref = random_base() alt = random_base(not_base=ref) chrom = random_contig() pos = randrange(MAX_SIZE) rows.append([chrom, pos, '.', ref, alt, '.', '.', '.']) rows = sorted(rows, key=lambda x: (x[0], int(x[1]))) for data in rows: line = '\t'.join(map(str, data)) f.write(line + '\n')
def handle(self, *args, **options): for genome_build in GenomeBuild.builds_with_annotation(): variant_qs = Variant.objects.filter( Variant.get_contigs_q(genome_build), varianttag__isnull=False) populate_clingen_alleles_for_variants( genome_build, variant_qs) # Will add VariantAlleles va_collection = VariantAlleleCollectionSource.objects.create( genome_build=genome_build) records = [] for va in VariantAllele.objects.filter( variant__in=variant_qs): # VariantAlleles added above records.append( VariantAlleleCollectionRecord(collection=va_collection, variant_allele=va)) if records: VariantAlleleCollectionRecord.objects.bulk_create( records, batch_size=2000) create_liftover_pipelines(admin_bot(), va_collection, ImportSource.COMMAND_LINE, genome_build)
def variant_annotation_runs(request): as_display = dict(AnnotationStatus.choices) genome_build_field_counts = {} genome_build_summary = {} if request.method == "POST": for genome_build in GenomeBuild.builds_with_annotation(): annotation_runs = AnnotationRun.objects.filter( annotation_range_lock__version__genome_build=genome_build) message = None if f"set-non-finished-to-error-{genome_build.name}" in request.POST: num_errored = 0 non_finished_statuses = [ AnnotationStatus.FINISHED, AnnotationStatus.ERROR ] for annotation_run in annotation_runs.exclude( status__in=non_finished_statuses): if celery_task := annotation_run.task_id: logging.info("Terminating celery job '%s'", celery_task) app.control.revoke( celery_task, terminate=True) # @UndefinedVariable annotation_run.error_exception = "Manually failed" annotation_run.save() num_errored += 1 message = f"{genome_build} - set {num_errored} annotation runs to Error" elif f"retry-annotation-runs-{genome_build.name}" in request.POST: num_retrying = 0 for annotation_run in annotation_runs.filter( status=AnnotationStatus.ERROR): annotation_run_retry(annotation_run) num_retrying += 1 message = f"{genome_build} - retrying {num_retrying} annotation runs." if message: messages.add_message(request, messages.INFO, message)
def setUpClass(cls): super().setUpClass() user = User.objects.get_or_create(username='******')[0] cls.grch37 = GenomeBuild.get_name_or_alias("GRCh37") cls.annotation_version_grch37 = get_fake_annotation_version(cls.grch37) gene_annotation_release = cls.annotation_version_grch37.gene_annotation_version.gene_annotation_release cls.transcript_version = create_fake_transcript_version(cls.grch37, release=gene_annotation_release) cls.gene_symbol = cls.transcript_version.gene_version.gene_symbol cls.trio = create_fake_trio(user, cls.grch37) cls.analysis = Analysis(genome_build=cls.grch37) cls.analysis.set_defaults_and_save(user) cls.sample = cls.trio.get_samples()[0] # Gene List cls.gene_list = GeneList.objects.get_or_create(name="fake list", user=cls.analysis.user, import_status=ImportStatus.SUCCESS)[0] GeneListGeneSymbol.objects.get_or_create(gene_list=cls.gene_list, gene_symbol=cls.gene_symbol) # Need some overlapping variants so gene list will work create_fake_variants(cls.grch37) # Note: Variant probably doesn't overlap with gene, just want a random one variant = Variant.objects.filter(Variant.get_no_reference_q()).first() annotation_run = AnnotationRun.objects.create() VariantGeneOverlap.objects.create(version=cls.annotation_version_grch37.variant_annotation_version, annotation_run=annotation_run, gene=cls.transcript_version.gene, variant=variant) # Tag that variant cls.tag = Tag.objects.get_or_create(pk="foo")[0] VariantTag.objects.create(genome_build=cls.grch37, analysis=cls.analysis, variant=variant, tag=cls.tag, user=user)
def _add_vep_field_handlers(self): # TOPMED and 1k genomes can return multiple values - take highest format_pick_highest_float = get_clean_and_pick_single_value_func(max, float) format_pick_highest_int = get_clean_and_pick_single_value_func(max, int) remove_empty_multiples = get_clean_and_pick_single_value_func(join_uniq) # COSMIC v90 (5/9/2019) switched to COSV (build independent identifiers) extract_cosmic = get_extract_existing_variation("COSV") extract_dbsnp = get_extract_existing_variation("rs") # Some variants return 2 rsIds, and 2 frequencies eg "0.6764&0.2433" - take max self.field_formatters = { "af_1kg": format_pick_highest_float, "af_uk10k": format_pick_highest_float, "cosmic_count": format_pick_highest_int, "cosmic_id": extract_cosmic, "cosmic_legacy_id": remove_empty_multiples, "dbsnp_rs_id": extract_dbsnp, 'gnomad_popmax': str.upper, # nfe -> NFE "hgnc_id": format_hgnc_id, "sift": format_vep_sift_to_choice, "variant_class": get_choice_formatter_func(VariantClass.choices), 'fathmm_pred_most_damaging': get_most_damaging_func(FATHMMPrediction), 'impact': get_choice_formatter_func(PathogenicityImpact.CHOICES), 'interpro_domain': remove_empty_multiples, 'mastermind_count_1_cdna': get_clean_and_pick_single_value_func(operator.itemgetter(0), int), 'mastermind_count_2_cdna_prot': get_clean_and_pick_single_value_func(operator.itemgetter(1), int), 'mastermind_count_3_aa_change': get_clean_and_pick_single_value_func(operator.itemgetter(2), int), 'mutation_assessor_pred_most_damaging': get_most_damaging_func(MutationAssessorPrediction), 'mutation_taster_pred_most_damaging': get_most_damaging_func(MutationTasterPrediction), 'polyphen2_hvar_pred_most_damaging': get_most_damaging_func(Polyphen2Prediction), # conservation fields are from BigWig, which can return multiple entries # for deletions. Higher = more conserved, so for rare disease filtering taking max makes sense 'phylop_30_way_mammalian': format_pick_highest_float, 'phylop_46_way_mammalian': format_pick_highest_float, 'phylop_100_way_vertebrate': format_pick_highest_float, 'phastcons_30_way_mammalian': format_pick_highest_float, 'phastcons_46_way_mammalian': format_pick_highest_float, 'phastcons_100_way_vertebrate': format_pick_highest_float, 'somatic': format_vep_somatic, 'topmed_af': format_pick_highest_float, } if self.genome_build == GenomeBuild.grch38(): self.field_formatters["gnomad_filtered"] = gnomad_filtered_func self.source_field_to_columns = defaultdict(set) self.ignored_vep_fields = self.VEP_NOT_COPIED_FIELDS.copy() vc = VEPConfig(self.genome_build) # Sort to have consistent VCF headers for cvf in ColumnVEPField.filter_for_build(self.genome_build).order_by("source_field"): try: if cvf.vep_custom: # May not be configured prefix = cvf.get_vep_custom_display() setting_key = prefix.lower() _ = vc[setting_key] # May throw exception if not setup if cvf.source_field_has_custom_prefix: self.ignored_vep_fields.append(prefix) self.source_field_to_columns[cvf.vep_info_field].add(cvf.variant_grid_column_id) # logging.info("Handling column %s => %s", cvf.vep_info_field, cvf.variant_grid_column_id) except: logging.warning("Skipping custom %s due to missing settings", cvf.vep_info_field) self.prediction_pathogenic_values = { 'sift': SIFTPrediction.get_damage_or_greater_levels(), 'fathmm_pred_most_damaging': FATHMMPrediction.get_damage_or_greater_levels(), 'mutation_assessor_pred_most_damaging': MutationAssessorPrediction.get_damage_or_greater_levels(), 'mutation_taster_pred_most_damaging': MutationTasterPrediction.get_damage_or_greater_levels(), 'polyphen2_hvar_pred_most_damaging': Polyphen2Prediction.get_damage_or_greater_levels(), }
def create_manual_variant_entry_from_text(request, genome_build_name, variants_text): genome_build = GenomeBuild.get_name_or_alias(genome_build_name) create_manual_variants(request.user, genome_build, variants_text) return redirect('manual_variant_entry')
def annotation(request): # Set Variables to None for uninstalled components, the template will show installation instructions ensembl_biomart_transcript_genes = None diagnostic_gene_list = None build_contigs = get_build_contigs() genome_build_annotations = {} builds_ok = [] for genome_build in GenomeBuild.builds_with_annotation(): annotation_details = _get_build_annotation_details( build_contigs, genome_build) genome_build_annotations[genome_build.name] = annotation_details builds_ok.append(annotation_details.get("ok", False)) gene_symbol_alias_counts = get_field_counts(GeneSymbolAlias.objects.all(), "source") if gene_symbol_alias_counts: gene_symbol_alias_counts = { GeneSymbolAliasSource(k).label: v for k, v in gene_symbol_alias_counts.items() } all_ontologies_accounted_for = True ontology_counts = list() for service in [ OntologyService.MONDO, OntologyService.OMIM, OntologyService.HPO, OntologyService.HGNC ]: # don't report HGNC as it's just there as a stub for other items to relate to count = OntologyTerm.objects.filter(ontology_service=service).count() ontology_counts.append({"service": service, "count": count}) ontology_services = [ OntologyService.MONDO, OntologyService.OMIM, OntologyService.HPO, OntologyService.HGNC ] ontology_relationship_counts = dict() for first_index, first_service in enumerate(ontology_services): for second_service in ontology_services[first_index:]: join_count = OntologyTermRelation.objects.filter( source_term__ontology_service=first_service, dest_term__ontology_service=second_service).count() if first_service != second_service: reverse_count = OntologyTermRelation.objects.filter( source_term__ontology_service=second_service, dest_term__ontology_service=first_service).count() join_count += reverse_count ontology_relationship_counts[ f"{first_service}{second_service}"] = join_count ontology_relationship_counts[ f"{second_service}{first_service}"] = join_count ontology_imports = list() for context in [ "mondo_file", "gencc_file", "hpo_file", "omim_file", "biomart_omim_aliases", "phenotype_to_genes" ]: last_import = OntologyImport.objects.filter( context=context).order_by('-created').first() if not last_import and context != "omim_file": # don't complain about omim_file not being imported as not available to environments without license all_ontologies_accounted_for = False ontology_imports.append({ "context": context, "last_import": last_import }) diagnostic = GeneListCategory.objects.get(name='Diagnostic') diagnostic_gene_list_count = diagnostic.genelist_set.all().count() if diagnostic_gene_list_count: diagnostic_gene_list = f"{diagnostic_gene_list_count} diagnostic gene lists" clinvar_citations = ClinVarCitation.objects.count() if clinvar_citations: num_cached_clinvar_citations = CachedCitation.objects.count() clinvar_citations = f"{clinvar_citations} ClinVar citations ({num_cached_clinvar_citations} cached)" hpa_version = HumanProteinAtlasAnnotationVersion.objects.order_by( "-annotation_date").first() hpa_counts = HumanProteinAtlasAnnotation.objects.filter( version=hpa_version).count() somalier = None if somalier_enabled := settings.SOMALIER.get("enabled"): somalier = _verify_somalier_config()
annotation_run.error_exception = "Manually failed" annotation_run.save() num_errored += 1 message = f"{genome_build} - set {num_errored} annotation runs to Error" elif f"retry-annotation-runs-{genome_build.name}" in request.POST: num_retrying = 0 for annotation_run in annotation_runs.filter( status=AnnotationStatus.ERROR): annotation_run_retry(annotation_run) num_retrying += 1 message = f"{genome_build} - retrying {num_retrying} annotation runs." if message: messages.add_message(request, messages.INFO, message) for genome_build in GenomeBuild.builds_with_annotation(): qs = AnnotationRun.objects.filter( annotation_range_lock__version__genome_build=genome_build) field_counts = get_field_counts(qs, "status") summary_data = Counter() for field, count in field_counts.items(): summary = AnnotationStatus.get_summary_state(field) summary_data[summary] += count genome_build_summary[genome_build.pk] = summary_data genome_build_field_counts[genome_build.pk] = { as_display[k]: v for k, v in field_counts.items() } context = { "genome_build_summary": genome_build_summary,
variant_tag, created = VariantTag.objects.get_or_create( variant_id=variant_id, tag=tag, genome_build=genome_build, location=location, analysis=analysis, user=request.user) if node_id: variant_tag.node_id = node_id variant_tag.save() else: if genome_build_name is None: raise ValueError( "Adding requires either 'analysis_id' or 'genome_build_name'" ) genome_build = GenomeBuild.get_name_or_alias(genome_build_name) variant_tag, created = VariantTag.objects.get_or_create( variant_id=variant_id, tag=tag, analysis=None, location=location, user=request.user, defaults={"genome_build": genome_build}) if created: # Only return new if anything created ret = VariantTagSerializer(variant_tag, context={ "request": request }).data elif op == 'del': # Deletion of tags is for analysis (all users)