from data_refinery_common.logging import get_and_configure_logger from data_refinery_common.models import ( ComputationalResult, ComputedFile, Organism, OrganismIndex, OriginalFile, Pipeline, Processor, ProcessorJob, ) from data_refinery_common.utils import get_env_variable, get_env_variable_gracefully from data_refinery_workers.processors import utils logger = get_and_configure_logger(__name__) JOB_DIR_PREFIX = "processor_job_" GENE_TO_TRANSCRIPT_TEMPLATE = "{gene_id}\t{transcript_id}\n" GENE_TYPE_COLUMN = 2 S3_TRANSCRIPTOME_INDEX_BUCKET_NAME = get_env_variable_gracefully("S3_TRANSCRIPTOME_INDEX_BUCKET_NAME", False) LOCAL_ROOT_DIR = get_env_variable("LOCAL_ROOT_DIR", "/home/user/data_store") # Removes each occurrance of ; and " IDS_CLEANUP_TABLE = str.maketrans({";": None, "\"": None}) def _compute_paths(job_context: Dict) -> str: """Computes the paths for all the directories used/created by this processor. Also computes a couple other path-based properties and adds them to the job_context. """ # All files for the job are in the same directory.
from typing import List from data_refinery_common import logging from data_refinery_common.enums import Downloaders from data_refinery_common.job_lookup import determine_downloader_task from data_refinery_common.message_queue import send_job from data_refinery_common.models import ( DownloaderJob, DownloaderJobOriginalFileAssociation, Experiment, OriginalFile, Sample, SurveyJob, ) logger = logging.get_and_configure_logger(__name__) class ExternalSourceSurveyor: __metaclass__ = abc.ABCMeta def __init__(self, survey_job: SurveyJob): self.survey_job = survey_job @abc.abstractproperty def source_type(self): return @abc.abstractmethod def discover_experiments_and_samples(self): """Abstract method to survey a source."""