def _get_build_annotation_details(build_contigs, genome_build): annotation_details = { "contigs": build_contigs.get(genome_build.name), "annotation_consortium": genome_build.settings["annotation_consortium"], } reference_ok = False try: annotation_details["reference_fasta"] = genome_build.reference_fasta reference_ok = True except Exception as e: annotation_details["reference_fasta_error"] = str(e) av = AnnotationVersion.latest(genome_build, validate=False) if av is None: # Maybe doesn't exist - attempt to create try: get_variant_annotation_version(genome_build) except: pass av = AnnotationVersion.latest(genome_build, validate=False) if av: annotation_details["latest"] = av genes_and_transcripts = None try: genes_and_transcripts = _get_gene_and_transcript_stats( genome_build, genome_build.annotation_consortium) annotation_details["genes_and_transcripts"] = genes_and_transcripts annotation_consortia = dict(AnnotationConsortium.choices) other_consortia = set(annotation_consortia.keys()) - { genome_build.annotation_consortium } other_gene_annotation = {} for other_ac in other_consortia: annotation_consortium_display = annotation_consortia[other_ac] other_gene_annotation[ annotation_consortium_display] = _get_gene_and_transcript_stats( genome_build, other_ac) annotation_details["other_consortia"] = other_gene_annotation except: pass gene_annotation_release = None if av.variant_annotation_version: if gene_annotation_release := av.variant_annotation_version.gene_annotation_release: annotation_details["gene_annotation_release"] = { "name": str(gene_annotation_release), "filename": os.path.basename(gene_annotation_release. gene_annotation_import.filename), } if gene_annotation_counts := av.get_gene_annotation().count(): annotation_details[ "gene_level_annotation"] = f"{gene_annotation_counts} gene annotations."
def get_warnings(self) -> List[str]: warnings = [] if self.annotation_version: latest_av = AnnotationVersion.latest(self.genome_build) if self.annotation_version != latest_av: warnings.append( f"Using AnnotationVersion {self.annotation_version} while most recent version " f"for build is : {latest_av}.") return warnings
def set_defaults_and_save(self, user): self.user = user self.annotation_version = AnnotationVersion.latest(self.genome_build) # Initial config from user settings user_settings = UserSettings.get_for_user(user) self.custom_columns_collection = user_settings.columns self.default_sort_by_column = user_settings.default_sort_by_column self.save() default_node_count_config = user_settings.get_node_count_settings_collection( ) if default_node_count_config: self.set_node_count_types( default_node_count_config.get_node_count_filters()) self.save()
def create(analysis_template: AnalysisTemplate, genome_build: GenomeBuild, user: User = None): if user is None: user = admin_bot() template_version = analysis_template.active analysis = template_version.analysis_snapshot.clone() analysis.user = user analysis.genome_build = genome_build analysis.annotation_version = AnnotationVersion.latest(genome_build, validate=True) analysis.template_type = None analysis.visible = True analysis.name = f"TemplateRun from {analysis_template.name}" # Will be set in populate arguments analysis.save() assign_permission_to_user_and_groups(user, analysis) return AnalysisTemplateRun.objects.create( template_version=template_version, analysis=analysis)
def handle(self, *args, **options): print(f"Started: {timezone.now()}") force = options["force"] gar_id = options["gene_annotation_release"] missing = options["missing"] self._validate_has_required_data() gene_symbols = set(GnomADGeneConstraint.objects.all().values_list( "gene_symbol_id", flat=True)) if gar_id: gene_annotation_release = GeneAnnotationRelease.objects.get( pk=gar_id) if not force and GeneAnnotationVersion.objects.filter( gene_annotation_release=gene_annotation_release).exists(): raise ValueError( "Existing GeneAnnotationVersion for gene_annotation_release={} exists! Use --force?" ) self._create_gene_annotation_version(gene_annotation_release, gene_symbols) else: for genome_build in GenomeBuild.builds_with_annotation(): av = AnnotationVersion.latest(genome_build, validate=False) if av.gene_annotation_version: print(f"Skipping {av} - already has GeneAnnotation") continue if not av.variant_annotation_version: raise InvalidAnnotationVersionError( f"AnnotationVersion {av} has no VariantAnnotationVersion set" ) gar = av.variant_annotation_version.gene_annotation_release if not gar: raise InvalidAnnotationVersionError( f"VariantAnnotationVersion {av.variant_annotation_version} needs to be assinged an GeneAnnotationRelease" ) self._create_gene_annotation_version(gar, gene_symbols)
def get_queryset_for_latest_annotation_version(klass, genome_build): annotation_version = AnnotationVersion.latest(genome_build) return get_queryset_for_annotation_version(klass, annotation_version=annotation_version)
def get_variant_queryset_for_latest_annotation_version(genome_build): annotation_version = AnnotationVersion.latest(genome_build) return get_variant_queryset_for_annotation_version(annotation_version)
def calculate_needed_stats(run_async=False): """ Works out what needs to be calculated and does so """ logging.info( "Deleting Sample Stats where import_status != SUCCESS (leftovers)") deleted = SampleStats.objects.exclude( import_status=ImportStatus.SUCCESS).delete() logging.info(deleted) logging.info("Calculating Sample Stats (run_async=%s)", run_async) for genome_build in GenomeBuild.builds_with_annotation(): try: annotation_version = AnnotationVersion.latest(genome_build) except InvalidAnnotationVersionError: logging.info( f"Skipping calculating sample stats for incomplete annotation version for build {genome_build}" ) continue needs_stats = Q(samplestats__isnull=True) needs_stats |= ~Q( samplevariantannotationstats__variant_annotation_version= annotation_version.variant_annotation_version) needs_stats |= ~Q(samplegeneannotationstats__gene_annotation_version= annotation_version.gene_annotation_version) needs_stats |= ~Q( sampleclinvarannotationstats__clinvar_version=annotation_version. clinvar_version) needs_stats_passing_filters = Q(samplestatspassingfilter__isnull=True) needs_stats_passing_filters |= ~Q( samplevariantannotationstats__variant_annotation_version= annotation_version.variant_annotation_version) needs_stats_passing_filters |= ~Q( samplegeneannotationstats__gene_annotation_version= annotation_version.gene_annotation_version) needs_stats_passing_filters |= ~Q( sampleclinvarannotationstats__clinvar_version=annotation_version. clinvar_version) qs_filter = needs_stats | (Q(vcf__vcffilter__isnull=False) & needs_stats_passing_filters) samples_qs = Sample.objects.filter( qs_filter, vcf__genome_build=genome_build).distinct() if run_async: logging.info("Launching sample stats jobs asynchronously") vcf_qs = VCF.objects.filter(sample__in=samples_qs).distinct() logging.info(f"Build: %s Samples: %d in %d VCFs", genome_build, samples_qs.count(), vcf_qs.count()) for vcf in vcf_qs: task = calculate_vcf_stats.si( vcf.pk, annotation_version.pk) # @UndefinedVariable if run_async: task.apply_async() else: result = task.apply() if result.successful(): logging.info("Successfully calculated stats for %s", vcf) else: logging.error("Died for VCF %s: %s", vcf, result.result)