Ejemplo n.º 1
0
def annotation_scheduler():
    """ This is run on scheduling_single_worker queue to avoid race conditions """
    LOCK_EXPIRE = 60 * 5  # 5 minutes
    lock_id = "annotation-scheduler-lock"

    # cache.add fails if if the key already exists
    acquire_lock = lambda: cache.add(lock_id, "true", LOCK_EXPIRE)
    release_lock = lambda: cache.delete(lock_id)

    try:
        if acquire_lock():
            try:
                logging.info("Got the lock for annotation scheduler")
                for genome_build in GenomeBuild.builds_with_annotation():
                    annotation_version = AnnotationVersion.latest(genome_build)
                    variant_annotation_version = annotation_version.variant_annotation_version
                    while True:
                        range_lock = _handle_variant_annotation_version(
                            variant_annotation_version)
                        if range_lock is None:
                            break
            finally:
                logging.info("Releasing lock")
                release_lock()
        else:
            logging.info("Someone else has %s", lock_id)
    except:
        log_traceback()
Ejemplo n.º 2
0
def wait_for_node(node_id):
    """ Used to build a dependency on a node that's already loading.

        The danger here is that we'll end up taking up a celery worker, waiting for
        the parent to become available... so we have to be careful
     """

    EVENT_NAME = "wait_for_node"

    #logging.info("wait_for_node: %s", node_id)
    try:
        TIME_BETWEEN_CHECKS = [5, 5, 10, 10, 30, 30, 60, MINUTE_SECS * 2]
        total_time = 0
        for sleep_time in TIME_BETWEEN_CHECKS:

            # Any exception will exit this task which is ok
            node = AnalysisNode.objects.get(pk=node_id)
            #logging.info("node status: %s", node.status)

            if NodeStatus.is_ready(node.status):
                #logging.info("Node was ready")
                return

            try:
                node_task = NodeTask.objects.get(node=node,
                                                 version=node.version)
                if node_task.celery_task:
                    wait_for_task(node_task.celery_task)
                    return
            except NodeTask.DoesNotExist:
                pass

            # QUEUED - there won't be a celery task yet
            if node.status == NodeStatus.QUEUED:
                details = f"Waiting on parent node {node_id} which is QUEUED"
                details += f" - waiting for {sleep_time} secs, {total_time} so far!"
                create_event(None,
                             EVENT_NAME,
                             details,
                             severity=LogLevel.WARNING)
            else:
                details = f"Waiting on parent node {node_id} status {node.status} no celery task!"
                create_event(None,
                             EVENT_NAME,
                             details,
                             severity=LogLevel.ERROR)
                return

            logging.info("Sleeping for %d seconds", sleep_time)
            sleep(sleep_time)
            total_time += sleep_time

        # Timeout
        details = f"Waited on parent node {node_id} for {total_time} seconds. "
        details += "Didn't become available, dying so we don't cause a deadlock!"
        create_event(None, EVENT_NAME, details, severity=LogLevel.ERROR)
    except:
        log_traceback()
Ejemplo n.º 3
0
def delete_annotation_run(annotation_run_id):
    try:
        annotation_run = AnnotationRun.objects.get(pk=annotation_run_id)
        annotation_run.status = AnnotationStatus.DELETING
        annotation_run.save()
        annotation_run.delete()
    except:
        log_traceback()
        raise
Ejemplo n.º 4
0
def create_and_launch_analysis_tasks(analysis_id, run_async=True):
    """ This is run in a single worker queue so that we avoid race conditions"""
    try:
        tasks = _get_analysis_update_tasks(analysis_id)
    except:
        log_traceback()
        raise

    for t in tasks:
        execute_task(t, run_async=run_async)
Ejemplo n.º 5
0
def _verify_somalier_config() -> Optional[str]:
    somalier_cfg = SomalierConfig()
    somalier_bin = somalier_cfg.get_annotation("command")
    somalier = None
    try:
        somalier_output = check_output([somalier_bin],
                                       stderr=subprocess.STDOUT)
        somalier = somalier_output.decode().split("\n", 1)[0]
    except:
        log_traceback()

    return somalier
Ejemplo n.º 6
0
class NodeJSONViewMixin(View, ABC):
    @abstractmethod
    def _get_node(self, request, **kwargs) -> AnalysisNode:
        pass

    @abstractmethod
    def _get_data(self, request, node: AnalysisNode, *args, **kwargs):
        pass

    def _get_redirect(self, request, node: AnalysisNode):
        return None

    def get_response(self, request, *args, **kwargs):
        try:
            node = self._get_node(request, **kwargs)
            if ret := self._get_redirect(request, node):
                return ret
            data = self._get_data(request, *args, node=node, **kwargs)
            status = HTTP_200_OK
        except NonFatalNodeError as e:
            log_traceback()
            data = {"message": str(e), "non_fatal": True}
            status = e.status

            if isinstance(e, NodeNotFoundException):
                # Is the node deleted, or perhaps it was just out of version?
                if e.node_id:
                    if not AnalysisNode.objects.filter(pk=e.node_id).exists():
                        data["deleted_nodes"] = [e.node_id]
                    else:
                        # We could potentially redirect to the same page with new version
                        # but client will probably just ditch this and re-request latest anyway
                        pass
Ejemplo n.º 7
0
def annotation_versions(request):
    anno_versions = {}
    # Create VariantAnnotationVersion for build if not exists
    for genome_build in GenomeBuild.builds_with_annotation():
        try:
            get_variant_annotation_version(genome_build)
        except:
            log_traceback()

        qs = AnnotationVersion.objects.filter(
            genome_build=genome_build).order_by("-annotation_date")
        vep_command = get_vep_command("in.vcf", "out.vcf", genome_build,
                                      genome_build.annotation_consortium)
        vep_command = " ".join(vep_command).replace(" -", "\n")
        anno_versions[genome_build.name] = (vep_command, qs)

    context = {"annotation_versions": anno_versions}
    return render(request, "annotation/annotation_versions.html", context)
Ejemplo n.º 8
0
def node_cache_task(node_id, version):
    logging.info("node_cache_task: %s/%d", node_id, version)

    try:
        node = AnalysisNode.objects.get_subclass(pk=node_id, version=version)
    except AnalysisNode.DoesNotExist:  # @UndefinedVariable
        # Node was deleted - this task is obsolete
        return

    node_cache = NodeCache.objects.get(node_version=node.node_version)
    variant_collection = node_cache.variant_collection
    if variant_collection.status != ProcessingStatus.CREATED:
        return

    if not (node.is_valid() and node.modifies_parents()):
        logging.debug("Not doing anything for node %s", node.pk)
        variant_collection.status = ProcessingStatus.SKIPPED
        variant_collection.save()
        return

    variant_collection.status = ProcessingStatus.PROCESSING
    variant_collection.save()

    # Node could be READY, we're just generating cache afterwards (node doesn't need but children do)
    if node.status != NodeStatus.READY:
        node.set_node_task_and_status(node_cache_task.request.id,
                                      NodeStatus.LOADING_CACHE)

    try:
        node.write_cache(variant_collection)
        processing_status = ProcessingStatus.SUCCESS
        status = NodeStatus.LOADING
    except:
        log_traceback()
        processing_status = ProcessingStatus.ERROR
        status = NodeStatus.ERROR

    variant_collection.status = processing_status
    variant_collection.save()

    if node.status != NodeStatus.READY:
        node.status = status
        node.save()
Ejemplo n.º 9
0
        if hl:
            func, arg = hl
            try:
                match_type, records = func(arg)
                if match_type == PhenotypeMatchTypes.HPO:
                    hpo_list.extend(records)
                elif match_type == PhenotypeMatchTypes.OMIM:
                    omim_alias_list.extend(records)
                elif match_type == PhenotypeMatchTypes.GENE:
                    gene_symbols.extend(records)

                # logging.info("Got exact: %s => %s", text, records)
            except Exception as e:
                msg = f"Error: {e}, func: {func}, arg={arg}"
                logging.error(msg)
                log_traceback()
        #            raise ValueError(msg)

        return hpo_list, omim_alias_list, gene_symbols

    @classmethod
    def _skip_word(cls, lower_text):
        """ Return true to skip a word, throws SkipAllPhenotypeMatchException to skip all.
            Only need to skip >MIN_LENGTH words as will do that later (after exact) """

        # For multi-words where you want to skip components
        SKIP_ALL = {
            "library prep", "to cgf", "set up", "ad pattern", "recurrent eps",
            "rest of"
        }
        if lower_text in SKIP_ALL:
Ejemplo n.º 10
0
def get_evidence_fields_from_preferred_transcript(
        genome_build: GenomeBuild,
        variant: Variant,
        clingen_allele: ClinGenAllele,
        hgvs_matcher: HGVSMatcher,
        transcript_version: TranscriptVersion,
        evidence_transcript_columns,
        ekey_formatters,
        annotation_version) -> AutopopulateData:

    data = transcript_autopopulate(transcript_version)
    data[SpecialEKeys.GENE_SYMBOL] = transcript_version.gene_version.gene_symbol_id

    # Populate from TranscriptVersion data 1st (so can overwrite later)
    if variant:
        try:
            vts = VariantTranscriptSelections(variant, genome_build, annotation_version)
            transcript_data = vts.get_transcript_annotation(transcript_version)

            for evidence_key, transcript_config in evidence_transcript_columns.items():
                variant_column = transcript_config['col']
                immutable = transcript_config['immutable']
                # Getting out of dict directly, not joining via queryset
                transcript_column = variant_column.replace("variantannotation__", "")
                value = transcript_data.get(transcript_column)
                if value is not None:
                    set_evidence(data, evidence_key, value, immutable, ekey_formatters)

            gene_symbol = transcript_version.gene_version.gene_symbol
            data[SpecialEKeys.INTERNAL_SAMPLES_20X_COVERAGE] = get_20x_gene_coverage(gene_symbol)

            phastcons_dict = {
                "phastcons_30_way_mammalian": "30 way mammalian",
                "phastcons_46_way_mammalian": "46 way mammalian",
                "phastcons_100_way_vertebrate": "100 way vertebrate",
            }
            data[SpecialEKeys.PHASTCONS] = get_set_fields_summary(transcript_data, phastcons_dict, phastcons_dict)
            phylop_dict = {
                "phylop_30_way_mammalian": "30 way mammalian",
                "phylop_46_way_mammalian": "46 way mammalian",
                "phylop_100_way_vertebrate": "100 way vertebrate",
            }
            data[SpecialEKeys.PHYLOP] = get_set_fields_summary(transcript_data, phylop_dict, phylop_dict)
            if variant_annotation := vts.variant_annotation:
                data[SpecialEKeys.SEARCH_TERMS] = variant_annotation.get_search_terms()
                if settings.ANNOTATION_PUBMED_SEARCH_TERMS_ENABLED:
                    data[SpecialEKeys.PUBMED_SEARCH_TERMS] = variant_annotation.get_pubmed_search_terms()
        except:
            log_traceback()

    try:
        c_hgvs = hgvs_matcher.variant_to_c_hgvs_parts(variant, transcript_version.accession)
        if c_hgvs:
            data[SpecialEKeys.C_HGVS] = c_hgvs.full_c_hgvs
    except Exception as e:
        value_obj = {}
        data.message = 'Could not parse HGVS value %s' % str(e)
        data[SpecialEKeys.C_HGVS] = value_obj

    if clingen_allele:
        p_hgvs = clingen_allele.get_p_hgvs(transcript_version.accession, match_version=False)
    else:
        p_hgvs = None
    data[SpecialEKeys.P_HGVS] = p_hgvs

    gene_symbol_id = transcript_version.gene_version.gene_symbol_id
    gnomad_oe_lof_summary = get_gnomad_oe_lof_summary(transcript_version)
    if gnomad_oe_lof_summary:
        data[SpecialEKeys.GNOMAD_OE_LOF] = gnomad_oe_lof_summary

    gs_count = GeneSymbolPubMedCount.get_for_gene_symbol(gene_symbol_id)
    data[SpecialEKeys.PUBMED_GENE_SEARCH_COUNT] = {"value": gs_count.count,
                                                   "note": f"Retrieved {gs_count.modified.date()}"}
    return data