def _create_result_objects(job_context: Dict) -> Dict: """ Store and host the result as a ComputationalResult object. """ result = ComputationalResult() result.commands.append(" ".join(job_context['formatted_command'])) result.is_ccdl = True result.is_public = True result.time_start = job_context['time_start'] result.time_end = job_context['time_end'] try: processor_key = "COMPENDIA" result.processor = utils.find_processor(processor_key) except Exception as e: return utils.handle_processor_exception(job_context, processor_key, e) result.save() # Write the compendia dataframe to a file, overwriting the previous smash job_context['merged_qn'].to_csv(job_context['smash_outfile'], sep='\t', encoding='utf-8') compendia_tsv_computed_file = ComputedFile() compendia_tsv_computed_file.absolute_file_path = job_context['smash_outfile'] compendia_tsv_computed_file.filename = job_context['smash_outfile'].split('/')[-1] compendia_tsv_computed_file.calculate_sha1() compendia_tsv_computed_file.calculate_size() compendia_tsv_computed_file.is_smashable = False compendia_tsv_computed_file.is_qn_target = False compendia_tsv_computed_file.result = result compendia_tsv_computed_file.save() organism_key = list(job_context['samples'].keys())[0] annotation = ComputationalResultAnnotation() annotation.result = result annotation.data = { "organism_id": job_context['samples'][organism_key][0].organism_id, "organism_name": job_context['samples'][organism_key][0].organism.name, "is_qn": False, "is_compendia": True, "samples": [sample.accession_code for sample in job_context["samples"][organism_key]], "num_samples": len(job_context["samples"][organism_key]), "experiment_accessions": [e.accession_code for e in job_context['experiments']] } annotation.save() # Save the related metadata file metadata_computed_file = ComputedFile() metadata_computed_file.absolute_file_path = job_context['metadata_tsv_paths'][0] metadata_computed_file.filename = job_context['metadata_tsv_paths'][0].split('/')[-1] metadata_computed_file.calculate_sha1() metadata_computed_file.calculate_size() metadata_computed_file.is_smashable = False metadata_computed_file.is_qn_target = False metadata_computed_file.result = result metadata_computed_file.save() # Create the resulting archive final_zip_base = "/home/user/data_store/smashed/" + str(job_context["dataset"].pk) + "_compendia" archive_path = shutil.make_archive(final_zip_base, 'zip', job_context["output_dir"]) # Save the related metadata file organism = job_context['samples'][organism_key][0].organism try: last_compendia = ComputedFile.objects.filter( is_compendia=True, compendia_organism=organism).order_by('-compendia_version')[-1] compendia_version = last_compendia.compendia_version + 1 except Exception as e: # This is the first compendia for this Organism compendia_version = 1 archive_computed_file = ComputedFile() archive_computed_file.absolute_file_path = archive_path archive_computed_file.filename = archive_path.split('/')[-1] archive_computed_file.calculate_sha1() archive_computed_file.calculate_size() archive_computed_file.is_smashable = False archive_computed_file.is_qn_target = False archive_computed_file.result = result archive_computed_file.is_compendia = True archive_computed_file.compendia_organism = job_context['samples'][organism_key][0].organism archive_computed_file.compendia_version = compendia_version archive_computed_file.save() logger.info("Compendia created!", archive_path=archive_path, organism_name=job_context['samples'][organism_key][0].organism.name ) # Upload the result to S3 key = job_context['samples'][organism_key][0].organism.name + "_" + str(compendia_version) + "_" + str(int(time.time())) + ".zip" archive_computed_file.sync_to_s3(S3_COMPENDIA_BUCKET_NAME, key) job_context['result'] = result job_context['computed_files'] = [compendia_tsv_computed_file, metadata_computed_file, archive_computed_file] job_context['success'] = True return job_context
def test_compendia(self): result = ComputationalResult() result.save() hsc1 = ComputedFile() hsc1.absolute_file_path = "/null/1.tsv" hsc1.filename = "1.tsv" hsc1.sha1 = "abc" hsc1.size_in_bytes = 1 hsc1.is_smashable = False hsc1.is_qn_target = False hsc1.result = result hsc1.is_compendia = True hsc1.compendia_organism = self.homo_sapiens hsc1.compendia_version = 1 hsc1.s3_bucket = "dr-compendia" hsc1.s3_key = "hsc1.tsv" hsc1.save() hsc2 = ComputedFile() hsc2.absolute_file_path = "/null/2.tsv" hsc2.filename = "2.tsv" hsc2.sha1 = "abc" hsc2.size_in_bytes = 1 hsc2.is_smashable = False hsc2.is_qn_target = False hsc2.result = result hsc2.is_compendia = True hsc2.compendia_organism = self.homo_sapiens hsc2.compendia_version = 2 hsc2.s3_bucket = "dr-compendia" hsc2.s3_key = "hsc2.tsv" hsc2.save() drc1 = ComputedFile() drc1.absolute_file_path = "/null/1.tsv" drc1.filename = "1.tsv" drc1.sha1 = "abc" drc1.size_in_bytes = 1 drc1.is_smashable = False drc1.is_qn_target = False drc1.result = result drc1.is_compendia = True drc1.compendia_organism = self.danio_rerio drc1.compendia_version = 1 drc1.s3_bucket = "dr-compendia" drc1.s3_key = "drc2.tsv" drc1.save() response = self.client.get( reverse("computed_files", kwargs={"version": API_VERSION}), {"is_compendia": True}) response_json = response.json()["results"] self.assertEqual(3, len(response_json)) # Prove that the download_url field is missing and not None. self.assertEqual("NotPresent", response_json[0].get("download_url", "NotPresent")) # We don't actually want AWS to generate a temporary URL for # us, and it won't unless we're running in the cloud, but if # we provide an API Token and use the WithUrl serializer then # it will set the download_url field to None rather than # generate one. # Create a token first response = self.client.post( reverse("token", kwargs={"version": API_VERSION}), json.dumps({"is_activated": True}), content_type="application/json", ) token_id = response.json()["id"] response = self.client.get( reverse("computed_files", kwargs={"version": API_VERSION}), {"is_compendia": True}, HTTP_API_KEY=token_id, ) response_json = response.json()["results"] self.assertEqual(3, len(response_json)) self.assertIsNone(response_json[0]["download_url"])
def _create_result_objects(job_context: Dict) -> Dict: """ Store and host the result as a ComputationalResult object. """ archive_path = job_context["archive_path"] compendia_organism = _get_organisms(job_context["samples"]).first() compendia_version = _get_next_compendia_version(compendia_organism) result = ComputationalResult() result.commands.append(" ".join(job_context["formatted_command"])) result.is_ccdl = True result.is_public = True result.time_start = job_context["time_start"] result.time_end = job_context["time_end"] try: processor_key = "CREATE_QUANTPENDIA" result.processor = utils.find_processor(processor_key) except Exception as e: return utils.handle_processor_exception(job_context, processor_key, e) result.save() archive_computed_file = ComputedFile() archive_computed_file.absolute_file_path = archive_path archive_computed_file.filename = FileUtils.get_filename(archive_path) archive_computed_file.calculate_sha1() archive_computed_file.calculate_size() archive_computed_file.is_smashable = False archive_computed_file.is_qn_target = False archive_computed_file.result = result archive_computed_file.is_compendia = True archive_computed_file.quant_sf_only = True archive_computed_file.compendia_organism = compendia_organism archive_computed_file.compendia_version = compendia_version archive_computed_file.save() compendium_result = CompendiumResult() compendium_result.quant_sf_only = True compendium_result.result = result compendium_result.primary_organism = compendia_organism compendium_result.compendium_version = compendia_version compendium_result.save() logger.info( "Quantpendia created! Uploading to S3.", job_id=job_context["job_id"], archive_path=archive_path, organism_name=compendia_organism.name, **get_process_stats() ) # Upload the result to S3 timestamp = str(int(time.time())) s3_key = compendia_organism.name + "_" + str(compendia_version) + "_" + timestamp + ".zip" uploaded_to_s3 = archive_computed_file.sync_to_s3(S3_COMPENDIA_BUCKET_NAME, s3_key) if not uploaded_to_s3: raise utils.ProcessorJobError( "Failed to upload compendia to S3", success=False, computed_file_id=archive_computed_file.id, ) if settings.RUNNING_IN_CLOUD: archive_computed_file.delete_local_file() job_context["result"] = result job_context["success"] = True return job_context