def _apply_metadata_to_experiment(experiment: Experiment, metadata: dict): experiment.source_url = ENA_URL_TEMPLATE.format( experiment.accession_code) experiment.source_database = "SRA" experiment.technology = "RNA-SEQ" # We don't get this value from the API, unfortunately. # experiment.platform_accession_code = experiment["platform_accession_code"] if not experiment.description: experiment.description = "No description." if "study_title" in metadata: experiment.title = metadata["study_title"] if "study_abstract" in metadata: experiment.description = metadata["study_abstract"] if "lab_name" in metadata: experiment.submitter_institution = metadata["lab_name"] if "experiment_design_description" in metadata: experiment.protocol_description = metadata[ "experiment_design_description"] if "pubmed_id" in metadata: experiment.pubmed_id = metadata["pubmed_id"] experiment.has_publication = True if "study_ena_first_public" in metadata: experiment.source_first_published = parse_date( metadata["study_ena_first_public"]) if "study_ena_last_update" in metadata: experiment.source_last_modified = parse_date( metadata["study_ena_last_update"]) # We only want GEO alternate accessions for SRA samples if re.match(r"^GSE\d{2,6}", metadata.get("external_id", "")) is not None: experiment.alternate_accession_code = metadata["external_id"] # Rare, but it happens. if not experiment.protocol_description: # metadata.get() doesn't work here because sometimes the # key is present but its value is None, in which case None # is returned, causing our database constraint to be # violated. if ("library_construction_protocol" in metadata and metadata["library_construction_protocol"]): experiment.protocol_description = metadata[ "library_construction_protocol"] else: experiment.protocol_description = "Protocol was never provided." # Scrape publication title and authorship from Pubmed if experiment.pubmed_id: pubmed_metadata = utils.get_title_and_authors_for_pubmed_id( experiment.pubmed_id) experiment.publication_title = pubmed_metadata[0] experiment.publication_authors = pubmed_metadata[1]
def setUp(self): experiment = Experiment() experiment.accession_code = "GSE000" experiment.alternate_accession_code = "E-GEOD-000" experiment.title = "NONONONO" experiment.description = "Boooooourns. Wasabi." experiment.technology = "RNA-SEQ" experiment.save() self.experiment = experiment # Create some samples to attach keywords to sample = Sample() sample.accession_code = "SRR123" sample.technology = "RNA-SEQ" sample.source_database = "SRA" sample.title = "Not important" sample.save() experiment_sample_association = ExperimentSampleAssociation() experiment_sample_association.sample = sample experiment_sample_association.experiment = experiment experiment_sample_association.save() sample2 = Sample() sample2.accession_code = "SRR456" sample2.technology = "RNA-SEQ" sample2.source_database = "SRA" sample2.title = "Not important" sample2.save() experiment_sample_association = ExperimentSampleAssociation() experiment_sample_association.sample = sample2 experiment_sample_association.experiment = experiment experiment_sample_association.save() # Create the ontology terms I'm using in the tests name = OntologyTerm() name.ontology_term = "PATO:0000122" name.human_readable_name = "length" name.save() unit = OntologyTerm() unit.ontology_term = "UO:0010012" unit.human_readable_name = "thou" unit.save() contribution = Contribution() contribution.source_name = "refinebio_tests" contribution.methods_url = "ccdatalab.org" contribution.save() self.contribution = contribution
def setUp(self): # Saving this for if we have protected endpoints # self.superuser = User.objects.create_superuser('john', '*****@*****.**', 'johnpassword') # self.client.login(username='******', password='******') # self.user = User.objects.create(username="******") experiment = Experiment() experiment.accession_code = "GSE000" experiment.alternate_accession_code = "E-GEOD-000" experiment.title = "NONONONO" experiment.description = "Boooooourns. Wasabi." experiment.technology = "RNA-SEQ" experiment.save() experiment = Experiment() experiment.accession_code = "GSE123" experiment.title = "Hey Ho Let's Go" experiment.description = ( "This is a very exciting test experiment. Faygo soda. Blah blah blah." ) experiment.technology = "MICROARRAY" experiment.save() self.experiment = experiment experiment_annotation = ExperimentAnnotation() experiment_annotation.data = {"hello": "world", "123": 456} experiment_annotation.experiment = experiment experiment_annotation.save() # Create 26 test organisms numbered 0-25 for pagination test, so there should be 29 organisms total (with the 3 others below) for i in range(26): Organism(name=("TEST_ORGANISM_{}".format(i)), taxonomy_id=(1234 + i)).save() ailuropoda = Organism(name="AILUROPODA_MELANOLEUCA", taxonomy_id=9646, is_scientific_name=True) ailuropoda.save() self.homo_sapiens = Organism(name="HOMO_SAPIENS", taxonomy_id=9606, is_scientific_name=True) self.homo_sapiens.save() self.danio_rerio = Organism(name="DANIO_RERIO", taxonomy_id=1337, is_scientific_name=True) self.danio_rerio.save() sample = Sample() sample.title = "123" sample.accession_code = "123" sample.is_processed = True sample.organism = ailuropoda sample.save() sample = Sample() sample.title = "789" sample.accession_code = "789" sample.is_processed = True sample.organism = ailuropoda sample.save() self.sample = sample # add qn target for sample organism result = ComputationalResult() result.commands.append("create_qn_target.py") result.is_ccdl = True result.is_public = True result.processor = None result.save() cra = ComputationalResultAnnotation() cra.result = result cra.data = {"organism_id": ailuropoda.id, "is_qn": True} cra.save() ailuropoda.qn_target = result ailuropoda.save() sample_annotation = SampleAnnotation() sample_annotation.data = {"goodbye": "world", "789": 123} sample_annotation.sample = sample sample_annotation.save() original_file = OriginalFile() original_file.save() original_file_sample_association = OriginalFileSampleAssociation() original_file_sample_association.sample = sample original_file_sample_association.original_file = original_file original_file_sample_association.save() downloader_job = DownloaderJob() downloader_job.save() download_assoc = DownloaderJobOriginalFileAssociation() download_assoc.original_file = original_file download_assoc.downloader_job = downloader_job download_assoc.save() processor_job = ProcessorJob() processor_job.save() processor_assoc = ProcessorJobOriginalFileAssociation() processor_assoc.original_file = original_file processor_assoc.processor_job = processor_job processor_assoc.save() experiment_sample_association = ExperimentSampleAssociation() experiment_sample_association.sample = sample experiment_sample_association.experiment = experiment experiment_sample_association.save() experiment.num_total_samples = 1 experiment.num_processed_samples = 1 experiment.save() result = ComputationalResult() result.save() sra = SampleResultAssociation() sra.sample = sample sra.result = result sra.save() result = ComputationalResult() result.save() sra = SampleResultAssociation() sra.sample = sample sra.result = result sra.save() processor = Processor() processor.name = "Salmon Quant" processor.version = "v9.9.9" processor.docker_image = "dr_salmon" processor.environment = '{"some": "environment"}' processor.save() computational_result_short = ComputationalResult(processor=processor) computational_result_short.save() organism_index = OrganismIndex() organism_index.index_type = "TRANSCRIPTOME_SHORT" organism_index.organism = self.danio_rerio organism_index.result = computational_result_short organism_index.absolute_directory_path = ( "/home/user/data_store/salmon_tests/TRANSCRIPTOME_INDEX/SHORT") organism_index.is_public = True organism_index.s3_url = "not_blank" organism_index.save() return