Exemple #1
0
    def setUpClass(cls):
        survey_job = SurveyJob(
            source_type="ARRAY_EXPRESS"
        )
        survey_job.save()

        batch = Batch(
            survey_job=survey_job,
            source_type="ARRAY_EXPRESS",
            pipeline_required="AFFY_TO_PCL",
            platform_accession_code="A-AFFY-141",
            experiment_accession_code="E-GEOD-59071",
            experiment_title="It doesn't really matter.",
            organism_id=9606,
            organism_name="H**O SAPIENS",
            release_date="2017-05-05",
            last_uploaded_date="2017-05-05",
            status=BatchStatuses.NEW.value
        )
        batch.save()

        file = File(
            size_in_bytes=0,
            download_url="ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/GEOD/E-GEOD-59071/E-GEOD-59071.raw.3.zip",  # noqa
            raw_format="CEL",
            processed_format="PCL",
            name="GSM1426072.CEL",
            internal_location="A-AFFY-141/AFFY_TO_PCL",
            batch=batch
        )
        file.save()

        super(FilesTestCase, cls).setUpClass()
def init_objects():
    survey_job = SurveyJob(source_type="ARRAY_EXPRESS")
    survey_job.save()

    batch = Batch(
        survey_job=survey_job,
        source_type="ARRAY_EXPRESS",
        pipeline_required="AFFY_TO_PCL",
        platform_accession_code="A-AFFY-1",
        experiment_accession_code="E-MTAB-3050",
        experiment_title="It doesn't really matter.",
        organism_id=9606,
        organism_name="H**O SAPIENS",
        release_date="2017-05-05",
        last_uploaded_date="2017-05-05",
        status=BatchStatuses.DOWNLOADED.value
    )
    batch.save()

    file = File(size_in_bytes=0,
                download_url="ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/GEOD/E-GEOD-59071/E-GEOD-59071.raw.3.zip",  # noqa
                raw_format="CEL",
                processed_format="PCL",
                name="CE1234.CEL",
                internal_location="A-AFFY-1/AFFY_TO_PCL/",
                batch=batch)
    file.save()

    batch.files = [file]
    return batch
Exemple #3
0
def _insert_salmon_index():
    """Creates a batch for the index for the organism for the test."""
    survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX")
    survey_job.save()

    batch = Batch(survey_job=survey_job,
                  source_type="TRANSCRIPTOME_INDEX",
                  pipeline_required="TRANSCRIPTOME_INDEX",
                  platform_accession_code="TEST",
                  experiment_accession_code="HOMO_SAPIENS",
                  experiment_title="It doesn't really matter.",
                  organism_id=9606,
                  organism_name="H**O SAPIENS",
                  release_date="2017-11-02",
                  last_uploaded_date="2017-11-02",
                  status=BatchStatuses.PROCESSED.value)
    batch.save()

    kmer_size = BatchKeyValue(key="kmer_size", value="23", batch=batch)
    kmer_size.save()

    index_file = File(
        size_in_bytes=2214725074,
        raw_format="gtf.gz",
        processed_format="tar.gz",
        name="Homo_sapiens_short.gtf.gz",
        internal_location="TEST/TRANSCRIPTOME_INDEX",
        download_url=("ftp://ftp.ensembl.org/pub/release-90/gtf/homo_sapiens"
                      "/Homo_sapiens.GRCh38.90.gtf.gz"),
        batch=batch)
    index_file.save()
Exemple #4
0
    def handle(self, *args, **options):
        # Create all the dummy data that would have been created
        # before a downloader job could have been generated.
        survey_job = SurveyJob(source_type="ARRAY_EXPRESS")
        survey_job.save()

        batch = Batch(survey_job=survey_job,
                      source_type="ARRAY_EXPRESS",
                      pipeline_required="AFFY_TO_PCL",
                      platform_accession_code="A-AFFY-141",
                      experiment_accession_code="E-GEOD-59071",
                      experiment_title="It doesn't really matter.",
                      organism_id=9606,
                      organism_name="H**O SAPIENS",
                      release_date="2017-05-05",
                      last_uploaded_date="2017-05-05",
                      status=BatchStatuses.NEW.value)
        batch.save()

        file = File(
            batch=batch,
            size_in_bytes=0,
            download_url=
            "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/GEOD/E-GEOD-59071/E-GEOD-59071.raw.3.zip",  # noqa
            raw_format="CEL",
            processed_format="PCL",
            name="GSM1426072_CD_colon_active_2.CEL",
            internal_location="A-AFFY-141/AFFY_TO_PCL")
        file.save()

        downloader_job = DownloaderJob.create_job_and_relationships(
            batches=[batch])
        send_job(Downloaders["ARRAY_EXPRESS"], downloader_job.id)
    def run_trasnscriptome_processor(self):
        # Create all the dummy data that would have been created
        # before a processor job could have been generated.
        survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX")
        survey_job.save()

        batch = Batch(
            survey_job=survey_job,
            source_type="TRANSCRIPTOME_INDEX",
            pipeline_required="TRANSCRIPTOME_INDEX",
            platform_accession_code="EnsemblPlants",
            experiment_accession_code="aegilops_tauschii",
            experiment_title="It doesn't really matter.",
            organism_id=37682,
            organism_name="AEGILOPS TAUSCHII",
            release_date="2017-11-02",
            last_uploaded_date="2017-11-02",
            status=BatchStatuses.DOWNLOADED.value,
        )
        batch.save()

        kmer_size_property = BatchKeyValue(batch=batch,
                                           key="kmer_size",
                                           value="31")
        kmer_size_property.save()

        gtf_file = File(
            name="aegilops_tauschii_short.gtf.gz",
            download_url=(
                "ftp://ftp.ensemblgenomes.org/pub/release-37/plants/gtf"
                "/aegilops_tauschii/Aegilops_tauschii.ASM34733v1.37.gtf.gz"),
            raw_format="gtf.gz",
            processed_format="tar.gz",
            internal_location="EnsemblPlants/TRANSCRIPTOME_INDEX",
            size_in_bytes=-1,
            batch=batch)
        gtf_file.save()

        fasta_file = File(
            name="aegilops_tauschii_short.fa.gz",
            download_url=(
                "ftp://ftp.ensemblgenomes.org/pub/release-37/plants/fasta"
                "/aegilops_tauschii/dna/Aegilops_tauschii."
                "ASM34733v1.dna.toplevel.fa.gz"),
            raw_format="fa.gz",
            processed_format="tar.gz",
            internal_location="EnsemblPlants/TRANSCRIPTOME_INDEX",
            size_in_bytes=-1,
            batch=batch)
        fasta_file.save()

        processor_job = ProcessorJob.create_job_and_relationships(
            batches=[batch])
        logger.info("Queuing a processor job.")
        send_job(ProcessorPipeline[batch.pipeline_required], processor_job.id)
def init_objects():
    survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX")
    survey_job.save()

    batch = Batch(survey_job=survey_job,
                  source_type="TRANSCRIPTOME_INDEX",
                  pipeline_required="TRANSCRIPTOME_INDEX",
                  platform_accession_code="EnsemblPlants",
                  experiment_accession_code="aegilops_tauschii",
                  experiment_title="It doesn't really matter.",
                  organism_id=37682,
                  organism_name="AEGILOPS TAUSCHII",
                  release_date="2017-11-02",
                  last_uploaded_date="2017-11-02",
                  status=BatchStatuses.DOWNLOADED.value)
    batch.save()
    BatchKeyValue(batch=batch, key="length", value="_short").save()
    BatchKeyValue(batch=batch, key="kmer_size", value="23").save()

    gtf_file = File(
        size_in_bytes=-1,
        raw_format="gtf.gz",
        processed_format="tar.gz",
        name="aegilops_tauschii_short.gtf.gz",
        internal_location="EnsemblPlants/TRANSCRIPTOME_INDEX",
        download_url=(
            "ftp://ftp.ensemblgenomes.org/pub/release-37/plants/gtf"
            "/aegilops_tauschii/Aegilops_tauschii.ASM34733v1.37.gtf.gz"),
        batch=batch)
    gtf_file.save()

    fasta_file = File(
        size_in_bytes=-1,
        raw_format="fa.gz",
        processed_format="tar.gz",
        name="aegilops_tauschii_short.fa.gz",
        internal_location="EnsemblPlants/TRANSCRIPTOME_INDEX",
        download_url=(
            "ftp://ftp.ensemblgenomes.org/pub/release-37/plants/fasta"
            "/aegilops_tauschii/dna/Aegilops_tauschii."
            "ASM34733v1.dna.toplevel.fa.gz"),
        batch=batch)
    fasta_file.save()

    batch.files = [gtf_file, fasta_file]
    return (batch, gtf_file, fasta_file)
    def insert_objects(self) -> List[Batch]:
        download_url = "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/GEOD/E-GEOD-59071/E-GEOD-59071.raw.3.zip"  # noqa
        batch1 = Batch(survey_job=self.survey_job,
                       source_type="TRANSCRIPTOME_INDEX",
                       pipeline_required="TRANSCRIPTOME_INDEX",
                       platform_accession_code="EnsemblPlants",
                       experiment_accession_code="AEGILOPS_TAUSCHII",
                       experiment_title="It doesn't really matter.",
                       organism_id=37682,
                       organism_name="AEGILOPS TAUSCHII",
                       release_date="2017-05-05",
                       last_uploaded_date="2017-05-05",
                       status=BatchStatuses.NEW.value)
        batch2 = copy.deepcopy(batch1)
        batch1.save()
        batch2.save()

        for batch, length, kmer_size in [(batch1, "_short", "23"),
                                         (batch2, "_long", "31")]:
            BatchKeyValue(batch=batch, key="length", value=length).save()
            BatchKeyValue(batch=batch, key="kmer_size", value=kmer_size).save()

            file1 = File(size_in_bytes=0,
                         download_url=self.fasta_download_url,
                         raw_format="fa.gz",
                         processed_format="tar.gz",
                         name="Aegilops_tauschii{}.fa.gz".format(length),
                         internal_location="EnsemblPlants/TRANSCRIPTOME_INDEX",
                         batch=batch)
            file2 = File(size_in_bytes=0,
                         download_url=self.gtf_download_url,
                         raw_format="gtf.gz",
                         processed_format="tar.gz",
                         name="Aegilops_tauschii{}.gtf.gz".format(length),
                         internal_location="EnsemblPlants/TRANSCRIPTOME_INDEX",
                         batch=batch)
            file1.save()
            file2.save()
            batch.files = [file1, file2]

        return [batch1, batch2]
Exemple #8
0
def init_objects():
    survey_job = SurveyJob(source_type="SALMON")
    survey_job.save()

    batch = Batch(survey_job=survey_job,
                  source_type="SALMON",
                  pipeline_required="SALMON",
                  platform_accession_code="IlluminaGenomeAnalyzerII",
                  experiment_accession_code="ERX000259",
                  experiment_title="It doesn't really matter.",
                  organism_id=9606,
                  organism_name="H**O SAPIENS",
                  release_date="2017-11-02",
                  last_uploaded_date="2017-11-02",
                  status=BatchStatuses.DOWNLOADED.value)
    batch.save()

    first_fastq_file = File(
        size_in_bytes=2214725074,
        raw_format="fastq.gz",
        processed_format="tar.gz",
        name="ERR003000_1.fastq.gz",
        internal_location="IlluminaGenomeAnalyzerII/SALMON",
        download_url=("ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR003/"
                      "ERR003000/ERR003000_1.fastq.gz"),
        batch=batch)
    first_fastq_file.save()

    second_fastq_file = File(
        size_in_bytes=2214725074,
        raw_format="fastq.gz",
        processed_format="tar.gz",
        name="ERR003000_2.fastq.gz",
        internal_location="IlluminaGenomeAnalyzerII/SALMON",
        download_url=("ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR003/"
                      "ERR003000/ERR003000_2.fastq.gz"),
        batch=batch)
    second_fastq_file.save()

    batch.files = [first_fastq_file, second_fastq_file]
    return (batch, first_fastq_file, second_fastq_file)
    def insert_objects(self) -> List[Batch]:
        download_url = "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/GEOD/E-GEOD-59071/E-GEOD-59071.raw.3.zip"  # noqa
        batch = Batch(survey_job=self.survey_job,
                      source_type="ARRAY_EXPRESS",
                      pipeline_required="AFFY_TO_PCL",
                      platform_accession_code="A-AFFY-1",
                      experiment_accession_code="E-MTAB-3050",
                      experiment_title="It doesn't really matter.",
                      organism_id=9606,
                      organism_name="H**O SAPIENS",
                      release_date="2017-05-05",
                      last_uploaded_date="2017-05-05",
                      status=BatchStatuses.NEW.value)
        batch2 = copy.deepcopy(batch)
        batch.save()
        batch2.save()

        file = File(size_in_bytes=0,
                    download_url=download_url,
                    raw_format="CEL",
                    processed_format="PCL",
                    name="CE1234.CEL",
                    internal_location="A-AFFY-1/AFFY_TO_PCL/",
                    batch=batch)
        file2 = File(size_in_bytes=0,
                     download_url=download_url,
                     raw_format="CEL",
                     processed_format="PCL",
                     name="CE2345.CEL",
                     internal_location="A-AFFY-1/AFFY_TO_PCL/",
                     batch=batch2)
        file.save()
        file2.save()

        batch.files = [file]
        batch2.files = [file]

        return ([batch, batch2], [file, file2])
Exemple #10
0
    def insert_objects(self) -> List[Batch]:
        download_url = "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/GEOD/E-GEOD-59071/E-GEOD-59071.raw.3.zip"  # noqa
        batch = Batch(survey_job=self.survey_job,
                      source_type="SRA",
                      pipeline_required="SALMON",
                      platform_accession_code="IlluminaHiSeq2000",
                      experiment_accession_code="DRX001563",
                      experiment_title="It doesn't really matter.",
                      organism_id=9031,
                      organism_name="GALLUS GALLUS",
                      release_date="2013-07-19",
                      last_uploaded_date="2017-09-11",
                      status=BatchStatuses.NEW.value)
        batch.save()

        file = File(
            size_in_bytes=0,
            download_url=
            "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/DRR002/DRR002116/DRR002116_1.fastq.gz",  # noqa
            raw_format="fastq.gz",
            processed_format="tar.gz",
            name="DRR002116_1.fastq.gz",
            internal_location="IlluminaHiSeq2000/SALMON",
            batch=batch)
        file2 = File(
            size_in_bytes=0,
            download_url=
            "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/DRR002/DRR002116/DRR002116_2.fastq.gz",  # noqa
            raw_format="fastq.gz",
            processed_format="tar.gz",
            name="DRR002116_2.fastq.gz",
            internal_location="IlluminaHiSeq2000/SALMON",
            batch=batch)

        file.save()
        file2.save()
        batch.files = [file, file2]
        return (batch, [file, file2])