Example #1
0
from data_refinery_common.logging import get_and_configure_logger
from data_refinery_common.models import (
    ComputationalResult,
    ComputedFile,
    Organism,
    OrganismIndex,
    OriginalFile,
    Pipeline,
    Processor,
    ProcessorJob,
)
from data_refinery_common.utils import get_env_variable, get_env_variable_gracefully
from data_refinery_workers.processors import utils


logger = get_and_configure_logger(__name__)
JOB_DIR_PREFIX = "processor_job_"
GENE_TO_TRANSCRIPT_TEMPLATE = "{gene_id}\t{transcript_id}\n"
GENE_TYPE_COLUMN = 2
S3_TRANSCRIPTOME_INDEX_BUCKET_NAME = get_env_variable_gracefully("S3_TRANSCRIPTOME_INDEX_BUCKET_NAME", False)
LOCAL_ROOT_DIR = get_env_variable("LOCAL_ROOT_DIR", "/home/user/data_store")
# Removes each occurrance of ; and "
IDS_CLEANUP_TABLE = str.maketrans({";": None, "\"": None})


def _compute_paths(job_context: Dict) -> str:
    """Computes the paths for all the directories used/created by this processor.

    Also computes a couple other path-based properties and adds them to the job_context.
    """
    # All files for the job are in the same directory.
from typing import List

from data_refinery_common import logging
from data_refinery_common.enums import Downloaders
from data_refinery_common.job_lookup import determine_downloader_task
from data_refinery_common.message_queue import send_job
from data_refinery_common.models import (
    DownloaderJob,
    DownloaderJobOriginalFileAssociation,
    Experiment,
    OriginalFile,
    Sample,
    SurveyJob,
)

logger = logging.get_and_configure_logger(__name__)


class ExternalSourceSurveyor:
    __metaclass__ = abc.ABCMeta

    def __init__(self, survey_job: SurveyJob):
        self.survey_job = survey_job

    @abc.abstractproperty
    def source_type(self):
        return

    @abc.abstractmethod
    def discover_experiments_and_samples(self):
        """Abstract method to survey a source."""