def setUpClass(cls): survey_job = SurveyJob( source_type="ARRAY_EXPRESS" ) survey_job.save() batch = Batch( survey_job=survey_job, source_type="ARRAY_EXPRESS", pipeline_required="AFFY_TO_PCL", platform_accession_code="A-AFFY-141", experiment_accession_code="E-GEOD-59071", experiment_title="It doesn't really matter.", organism_id=9606, organism_name="H**O SAPIENS", release_date="2017-05-05", last_uploaded_date="2017-05-05", status=BatchStatuses.NEW.value ) batch.save() file = File( size_in_bytes=0, download_url="ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/GEOD/E-GEOD-59071/E-GEOD-59071.raw.3.zip", # noqa raw_format="CEL", processed_format="PCL", name="GSM1426072.CEL", internal_location="A-AFFY-141/AFFY_TO_PCL", batch=batch ) file.save() super(FilesTestCase, cls).setUpClass()
def init_objects(): survey_job = SurveyJob(source_type="ARRAY_EXPRESS") survey_job.save() batch = Batch( survey_job=survey_job, source_type="ARRAY_EXPRESS", pipeline_required="AFFY_TO_PCL", platform_accession_code="A-AFFY-1", experiment_accession_code="E-MTAB-3050", experiment_title="It doesn't really matter.", organism_id=9606, organism_name="H**O SAPIENS", release_date="2017-05-05", last_uploaded_date="2017-05-05", status=BatchStatuses.DOWNLOADED.value ) batch.save() file = File(size_in_bytes=0, download_url="ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/GEOD/E-GEOD-59071/E-GEOD-59071.raw.3.zip", # noqa raw_format="CEL", processed_format="PCL", name="CE1234.CEL", internal_location="A-AFFY-1/AFFY_TO_PCL/", batch=batch) file.save() batch.files = [file] return batch
def _insert_salmon_index(): """Creates a batch for the index for the organism for the test.""" survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX") survey_job.save() batch = Batch(survey_job=survey_job, source_type="TRANSCRIPTOME_INDEX", pipeline_required="TRANSCRIPTOME_INDEX", platform_accession_code="TEST", experiment_accession_code="HOMO_SAPIENS", experiment_title="It doesn't really matter.", organism_id=9606, organism_name="H**O SAPIENS", release_date="2017-11-02", last_uploaded_date="2017-11-02", status=BatchStatuses.PROCESSED.value) batch.save() kmer_size = BatchKeyValue(key="kmer_size", value="23", batch=batch) kmer_size.save() index_file = File( size_in_bytes=2214725074, raw_format="gtf.gz", processed_format="tar.gz", name="Homo_sapiens_short.gtf.gz", internal_location="TEST/TRANSCRIPTOME_INDEX", download_url=("ftp://ftp.ensembl.org/pub/release-90/gtf/homo_sapiens" "/Homo_sapiens.GRCh38.90.gtf.gz"), batch=batch) index_file.save()
def handle(self, *args, **options): # Create all the dummy data that would have been created # before a downloader job could have been generated. survey_job = SurveyJob(source_type="ARRAY_EXPRESS") survey_job.save() batch = Batch(survey_job=survey_job, source_type="ARRAY_EXPRESS", pipeline_required="AFFY_TO_PCL", platform_accession_code="A-AFFY-141", experiment_accession_code="E-GEOD-59071", experiment_title="It doesn't really matter.", organism_id=9606, organism_name="H**O SAPIENS", release_date="2017-05-05", last_uploaded_date="2017-05-05", status=BatchStatuses.NEW.value) batch.save() file = File( batch=batch, size_in_bytes=0, download_url= "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/GEOD/E-GEOD-59071/E-GEOD-59071.raw.3.zip", # noqa raw_format="CEL", processed_format="PCL", name="GSM1426072_CD_colon_active_2.CEL", internal_location="A-AFFY-141/AFFY_TO_PCL") file.save() downloader_job = DownloaderJob.create_job_and_relationships( batches=[batch]) send_job(Downloaders["ARRAY_EXPRESS"], downloader_job.id)
def run_trasnscriptome_processor(self): # Create all the dummy data that would have been created # before a processor job could have been generated. survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX") survey_job.save() batch = Batch( survey_job=survey_job, source_type="TRANSCRIPTOME_INDEX", pipeline_required="TRANSCRIPTOME_INDEX", platform_accession_code="EnsemblPlants", experiment_accession_code="aegilops_tauschii", experiment_title="It doesn't really matter.", organism_id=37682, organism_name="AEGILOPS TAUSCHII", release_date="2017-11-02", last_uploaded_date="2017-11-02", status=BatchStatuses.DOWNLOADED.value, ) batch.save() kmer_size_property = BatchKeyValue(batch=batch, key="kmer_size", value="31") kmer_size_property.save() gtf_file = File( name="aegilops_tauschii_short.gtf.gz", download_url=( "ftp://ftp.ensemblgenomes.org/pub/release-37/plants/gtf" "/aegilops_tauschii/Aegilops_tauschii.ASM34733v1.37.gtf.gz"), raw_format="gtf.gz", processed_format="tar.gz", internal_location="EnsemblPlants/TRANSCRIPTOME_INDEX", size_in_bytes=-1, batch=batch) gtf_file.save() fasta_file = File( name="aegilops_tauschii_short.fa.gz", download_url=( "ftp://ftp.ensemblgenomes.org/pub/release-37/plants/fasta" "/aegilops_tauschii/dna/Aegilops_tauschii." "ASM34733v1.dna.toplevel.fa.gz"), raw_format="fa.gz", processed_format="tar.gz", internal_location="EnsemblPlants/TRANSCRIPTOME_INDEX", size_in_bytes=-1, batch=batch) fasta_file.save() processor_job = ProcessorJob.create_job_and_relationships( batches=[batch]) logger.info("Queuing a processor job.") send_job(ProcessorPipeline[batch.pipeline_required], processor_job.id)
def init_objects(): survey_job = SurveyJob(source_type="TRANSCRIPTOME_INDEX") survey_job.save() batch = Batch(survey_job=survey_job, source_type="TRANSCRIPTOME_INDEX", pipeline_required="TRANSCRIPTOME_INDEX", platform_accession_code="EnsemblPlants", experiment_accession_code="aegilops_tauschii", experiment_title="It doesn't really matter.", organism_id=37682, organism_name="AEGILOPS TAUSCHII", release_date="2017-11-02", last_uploaded_date="2017-11-02", status=BatchStatuses.DOWNLOADED.value) batch.save() BatchKeyValue(batch=batch, key="length", value="_short").save() BatchKeyValue(batch=batch, key="kmer_size", value="23").save() gtf_file = File( size_in_bytes=-1, raw_format="gtf.gz", processed_format="tar.gz", name="aegilops_tauschii_short.gtf.gz", internal_location="EnsemblPlants/TRANSCRIPTOME_INDEX", download_url=( "ftp://ftp.ensemblgenomes.org/pub/release-37/plants/gtf" "/aegilops_tauschii/Aegilops_tauschii.ASM34733v1.37.gtf.gz"), batch=batch) gtf_file.save() fasta_file = File( size_in_bytes=-1, raw_format="fa.gz", processed_format="tar.gz", name="aegilops_tauschii_short.fa.gz", internal_location="EnsemblPlants/TRANSCRIPTOME_INDEX", download_url=( "ftp://ftp.ensemblgenomes.org/pub/release-37/plants/fasta" "/aegilops_tauschii/dna/Aegilops_tauschii." "ASM34733v1.dna.toplevel.fa.gz"), batch=batch) fasta_file.save() batch.files = [gtf_file, fasta_file] return (batch, gtf_file, fasta_file)
def insert_objects(self) -> List[Batch]: download_url = "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/GEOD/E-GEOD-59071/E-GEOD-59071.raw.3.zip" # noqa batch1 = Batch(survey_job=self.survey_job, source_type="TRANSCRIPTOME_INDEX", pipeline_required="TRANSCRIPTOME_INDEX", platform_accession_code="EnsemblPlants", experiment_accession_code="AEGILOPS_TAUSCHII", experiment_title="It doesn't really matter.", organism_id=37682, organism_name="AEGILOPS TAUSCHII", release_date="2017-05-05", last_uploaded_date="2017-05-05", status=BatchStatuses.NEW.value) batch2 = copy.deepcopy(batch1) batch1.save() batch2.save() for batch, length, kmer_size in [(batch1, "_short", "23"), (batch2, "_long", "31")]: BatchKeyValue(batch=batch, key="length", value=length).save() BatchKeyValue(batch=batch, key="kmer_size", value=kmer_size).save() file1 = File(size_in_bytes=0, download_url=self.fasta_download_url, raw_format="fa.gz", processed_format="tar.gz", name="Aegilops_tauschii{}.fa.gz".format(length), internal_location="EnsemblPlants/TRANSCRIPTOME_INDEX", batch=batch) file2 = File(size_in_bytes=0, download_url=self.gtf_download_url, raw_format="gtf.gz", processed_format="tar.gz", name="Aegilops_tauschii{}.gtf.gz".format(length), internal_location="EnsemblPlants/TRANSCRIPTOME_INDEX", batch=batch) file1.save() file2.save() batch.files = [file1, file2] return [batch1, batch2]
def init_objects(): survey_job = SurveyJob(source_type="SALMON") survey_job.save() batch = Batch(survey_job=survey_job, source_type="SALMON", pipeline_required="SALMON", platform_accession_code="IlluminaGenomeAnalyzerII", experiment_accession_code="ERX000259", experiment_title="It doesn't really matter.", organism_id=9606, organism_name="H**O SAPIENS", release_date="2017-11-02", last_uploaded_date="2017-11-02", status=BatchStatuses.DOWNLOADED.value) batch.save() first_fastq_file = File( size_in_bytes=2214725074, raw_format="fastq.gz", processed_format="tar.gz", name="ERR003000_1.fastq.gz", internal_location="IlluminaGenomeAnalyzerII/SALMON", download_url=("ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR003/" "ERR003000/ERR003000_1.fastq.gz"), batch=batch) first_fastq_file.save() second_fastq_file = File( size_in_bytes=2214725074, raw_format="fastq.gz", processed_format="tar.gz", name="ERR003000_2.fastq.gz", internal_location="IlluminaGenomeAnalyzerII/SALMON", download_url=("ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR003/" "ERR003000/ERR003000_2.fastq.gz"), batch=batch) second_fastq_file.save() batch.files = [first_fastq_file, second_fastq_file] return (batch, first_fastq_file, second_fastq_file)
def insert_objects(self) -> List[Batch]: download_url = "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/GEOD/E-GEOD-59071/E-GEOD-59071.raw.3.zip" # noqa batch = Batch(survey_job=self.survey_job, source_type="ARRAY_EXPRESS", pipeline_required="AFFY_TO_PCL", platform_accession_code="A-AFFY-1", experiment_accession_code="E-MTAB-3050", experiment_title="It doesn't really matter.", organism_id=9606, organism_name="H**O SAPIENS", release_date="2017-05-05", last_uploaded_date="2017-05-05", status=BatchStatuses.NEW.value) batch2 = copy.deepcopy(batch) batch.save() batch2.save() file = File(size_in_bytes=0, download_url=download_url, raw_format="CEL", processed_format="PCL", name="CE1234.CEL", internal_location="A-AFFY-1/AFFY_TO_PCL/", batch=batch) file2 = File(size_in_bytes=0, download_url=download_url, raw_format="CEL", processed_format="PCL", name="CE2345.CEL", internal_location="A-AFFY-1/AFFY_TO_PCL/", batch=batch2) file.save() file2.save() batch.files = [file] batch2.files = [file] return ([batch, batch2], [file, file2])
def insert_objects(self) -> List[Batch]: download_url = "ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/GEOD/E-GEOD-59071/E-GEOD-59071.raw.3.zip" # noqa batch = Batch(survey_job=self.survey_job, source_type="SRA", pipeline_required="SALMON", platform_accession_code="IlluminaHiSeq2000", experiment_accession_code="DRX001563", experiment_title="It doesn't really matter.", organism_id=9031, organism_name="GALLUS GALLUS", release_date="2013-07-19", last_uploaded_date="2017-09-11", status=BatchStatuses.NEW.value) batch.save() file = File( size_in_bytes=0, download_url= "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/DRR002/DRR002116/DRR002116_1.fastq.gz", # noqa raw_format="fastq.gz", processed_format="tar.gz", name="DRR002116_1.fastq.gz", internal_location="IlluminaHiSeq2000/SALMON", batch=batch) file2 = File( size_in_bytes=0, download_url= "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/DRR002/DRR002116/DRR002116_2.fastq.gz", # noqa raw_format="fastq.gz", processed_format="tar.gz", name="DRR002116_2.fastq.gz", internal_location="IlluminaHiSeq2000/SALMON", batch=batch) file.save() file2.save() batch.files = [file, file2] return (batch, [file, file2])