def create(self, request, *args, **kwargs): run_creator = RunCreator(**request.data) if run_creator.is_valid(): run = run_creator.create() response = RunSerializerFull(run) create_run_task.delay(response.data["id"], request.data["inputs"]) job_group_notifier_id = str(run.job_group_notifier_id) if job_group_notifier_id: self._send_notifications(job_group_notifier_id, run) return Response(response.data, status=status.HTTP_201_CREATED) return Response("Error", status=status.HTTP_400_BAD_REQUEST)
def get_jobs(self): files = FileRepository.filter(queryset=self.files, metadata={ "requestId": self.request_id, "igocomplete": True }) data = [{ "id": f.file.id, "path": f.file.path, "file_name": f.file.file_name, "metadata": f.metadata } for f in files] sample_inputs = construct_sample_inputs(data) number_of_inputs = len(sample_inputs) return [(RunCreator( **{ "name": "ACCESS M1: %s, %i of %i" % (self.request_id, i + 1, number_of_inputs), "app": self.get_pipeline_id(), "inputs": job, "tags": { "requestId": self.request_id }, }), ) for i, job in enumerate(sample_inputs)]
def get_jobs(self): """ Convert job inputs into serialized jobs :return: list[(serialized job info, Job)] """ self.request_id = get_request_id(self.run_ids, self.request_id) inputs = self.get_sample_inputs() return [ RunCreator( **{ "name": "ACCESS LEGACY MSI M1: %s, %i of %i" % (self.request_id, i + 1, len(inputs)), "app": self.get_pipeline_id(), "inputs": job, "tags": { "requestId": self.request_id, "cmoSampleIds": job["sample_name"], "patientId": "-".join(job["sample_name"][0].split("-")[0:2]), }, }) for i, job in enumerate(inputs) ]
def get_jobs(self): """ From self, retrieve relevant run IDs, build the input JSON for the pipeline, and then submit them as jobs through the RunCreator """ argos_run_ids = self.run_ids input_json = construct_helix_filters_input(argos_run_ids) number_of_runs = len(argos_run_ids) name = "HELIX FILTERS OUTPUTS %s runs [%s,..] " % (number_of_runs, argos_run_ids[0]) app = self.get_pipeline_id() pipeline = Pipeline.objects.get(id=app) pipeline_version = pipeline.version project_prefix = input_json["project_prefix"] input_json["helix_filter_version"] = pipeline_version input_json = self.add_output_file_names(input_json, pipeline_version) tags = { "project_prefix": project_prefix, "argos_run_ids": argos_run_ids } # TODO: Remove purity facets seg files from facets_hisens_seg_files input_json["facets_hisens_seg_files"] = self.remove_purity_files( input_json["facets_hisens_seg_files"]) helix_filters_outputs_job_data = { "app": app, "inputs": input_json, "name": name, "tags": tags } """ If project_prefix and job_group_id, write output to a directory that uses both Going by argos pipeline version id, assuming all runs use the same argos version """ argos_run = Run.objects.get(id=argos_run_ids[0]) argos_pipeline = argos_run.app output_directory = None if project_prefix: tags["project_prefix"] = project_prefix if self.job_group_id: jg = JobGroup.objects.get(id=self.job_group_id) jg_created_date = jg.created_date.strftime("%Y%m%d_%H_%M_%f") output_directory = os.path.join(pipeline.output_directory, "argos", project_prefix, argos_pipeline.version, jg_created_date) helix_filters_outputs_job_data[ "output_directory"] = output_directory helix_filters_outputs_job = [ RunCreator(**helix_filters_outputs_job_data) ] return helix_filters_outputs_job
def get_jobs(self): """ From self, retrieve relevant run IDs, build the input JSON for the pipeline, and then submit them as jobs through the RunCreator """ run_ids = self.run_ids input_json = construct_argos_qc_input(run_ids) number_of_runs = len(run_ids) name = "ARGOS QC OUTPUTS %s runs [%s,..] " % (number_of_runs, run_ids[0]) app = self.get_pipeline_id() pipeline = Pipeline.objects.get(id=app) pipeline_version = pipeline.version project_prefix = input_json["project_prefix"] output_directory_prefix = get_output_directory_prefix(self.run_ids) tags = { "tumor_sample_names": input_json["tumor_sample_names"], "normal_sample_names": input_json["normal_sample_names"], } argos_qc_outputs_job_data = { "app": app, "inputs": input_json, "name": name, "notify_for_outputs": ["qc_pdf"], "tags": tags, } """ If project_prefix and job_group_id, write output to a directory that uses both Also use argos version number for output instead of pipeline version that's listed in Beagle """ argos_run = Run.objects.get(id=run_ids[0]) argos_pipeline = argos_run.app output_directory = None if self.output_directory_prefix: project_prefix = self.output_directory_prefix if project_prefix: tags["project_prefix"] = project_prefix output_prefix = output_directory_prefix if output_directory_prefix else project_prefix if self.job_group_id: jg = JobGroup.objects.get(id=self.job_group_id) jg_created_date = jg.created_date.strftime("%Y%m%d_%H_%M_%f") output_directory = os.path.join(pipeline.output_directory, "argos", output_prefix, argos_pipeline.version, jg_created_date) argos_qc_outputs_job_data["output_directory"] = output_directory argos_qc_outputs_job = [RunCreator(**argos_qc_outputs_job_data)] return argos_qc_outputs_job
def get_jobs(self): """ Create job entries to pass to Ridgeback """ pipeline_obj = Pipeline.objects.get(id=self.get_pipeline_id()) inputs = self.create_input() name = "DEMO JOB" job = dict(app=pipeline_obj.id, inputs=inputs, name=name, tags={}) serialized_run = RunCreator(**job) jobs = [serialized_run] return jobs
def get_jobs(self): sample_inputs = self.get_nucleo_outputs() return [ RunCreator( **{ "name": "ACCESS QC: %s, %i of %i" % (self.request_id, i + 1, len(sample_inputs)), "app": self.get_pipeline_id(), "inputs": job, "tags": {"requestId": self.request_id, "cmoSampleId": job["sample_name"]}, } ) for i, job in enumerate(sample_inputs) ]
def _build_job(self, input_json): app = self.get_pipeline_id() pipeline = Pipeline.objects.get(id=app) pipeline_version = pipeline.version request_id = self._get_request_id() input_json["output_filename"] = request_id + ".rez.maf" tags = {"requestId": request_id} # add tags, name output_job_data = { "app": app, "tags": tags, "name": "Request ID %s ULTRON PHASE1:CONCAT MAFs run" % request_id, "inputs": input_json, } output_job = RunCreator(**output_job_data) return output_job
def test_create_jobs_from_operator_pipeline_deleted( self, get_pipeline_id, get_jobs, send_notification, create_run_task, memcache_task_lock, set_for_restart ): argos_jobs = list() argos_jobs.append(RunCreator(app="cb5d793b-e650-4b7d-bfcd-882858e29cc5", inputs=None, name=None, tags={})) set_for_restart.return_value = None get_jobs.return_value = argos_jobs get_pipeline_id.return_value = None create_run_task.return_value = None send_notification.return_value = None memcache_task_lock.return_value = True Run.objects.all().delete() operator = OperatorFactory.get_by_model(Operator.objects.get(id=1), request_id="bar") create_jobs_from_operator(operator, None) self.assertEqual(len(Run.objects.all()), 1) self.assertEqual(RunStatus(Run.objects.first().status), RunStatus.FAILED)
def get_jobs(self): files = self.files.filter( filemetadata__metadata__requestId=self.request_id, filemetadata__metadata__igocomplete=True).all() access_jobs = list( ) # [RunCreator(app=self.get_pipeline_id(), inputs=inputs})] data = list() for file in files: sample = dict() sample["id"] = file.id sample["path"] = file.path sample["file_name"] = file.file_name sample["metadata"] = file.filemetadata_set.first().metadata data.append(sample) samples = list() # group by igoId igo_id_group = dict() for sample in data: igo_id = sample["metadata"]["sampleId"] if igo_id not in igo_id_group: igo_id_group[igo_id] = list() igo_id_group[igo_id].append(sample) for igo_id in igo_id_group: samples.append(generate_results(igo_id_group[igo_id])) access_inputs = construct_access_jobs(samples) number_of_inputs = len(access_inputs) for i, job in enumerate(access_inputs): name = "ACCESS M1: %s, %i of %i" % (self.request_id, i + 1, number_of_inputs) access_jobs.append( RunCreator( **{ "name": name, "app": self.get_pipeline_id(), "inputs": job, "tags": { "requestId": self.request_id }, })) return access_jobs # Not returning anything for some reason for inputs; deal with later
def get_jobs(self): files = self.files.filter( filemetadata__metadata__requestId=self.request_id, filemetadata__metadata__igocomplete=True).all() tempo_jobs = list() data = list() for file in files: sample = dict() sample["id"] = file.id sample["path"] = file.path sample["file_name"] = file.file_name sample["metadata"] = file.filemetadata_set.first().metadata data.append(sample) samples = list() # group by igoId igo_id_group = dict() for sample in data: igo_id = sample["metadata"]["sampleId"] if igo_id not in igo_id_group: igo_id_group[igo_id] = list() igo_id_group[igo_id].append(sample) for igo_id in igo_id_group: samples.append(build_sample(igo_id_group[igo_id])) tempo_inputs, error_samples = construct_tempo_jobs(samples) number_of_inputs = len(tempo_inputs) for i, job in enumerate(tempo_inputs): name = "FLATBUSH: %s, %i of %i" % (self.request_id, i + 1, number_of_inputs) tempo_jobs.append( RunCreator( **{ "app": self.get_pipeline_id(), "inputs": tempo_inputs, "name": name, "tags": { "requestId": self.request_id }, })) return tempo_jobs
def _build_job(self, input_json, run_id): app = self.get_pipeline_id() pipeline = Pipeline.objects.get(id=app) pipeline_version = pipeline.version project_prefix = get_project_prefix(run_id) output_directory = self._get_output_directory(run_id) sample_name = input_json["sample_ids"] tags = { "sampleNameTumor": sample_name, "project_prefix": project_prefix } # add tags, name output_job_data = { "app": app, "tags": tags, "name": "Sample %s ULTRON PHASE1 run" % sample_name, "output_directory": output_directory, "inputs": input_json, } output_job = RunCreator(**output_job_data) return output_job
def get_jobs(self, lab_head_email): """ From self, retrieve relevant run IDs, build the input JSON for the pipeline, and then submit them as jobs through the RunCreator """ run_ids = self.get_helix_filter_run_ids(lab_head_email) number_of_runs = len(run_ids) name = "AION merging %i runs for lab head email %s" % (number_of_runs, lab_head_email) app = self.get_pipeline_id() pipeline = Pipeline.objects.get(id=app) pipeline_version = pipeline.version input_json = self.build_input_json(run_ids) tags = {"study_id": input_json["study_id"], "num_runs_merged": len(run_ids)} print(input_json) aion_outputs_job_data = {"app": app, "inputs": input_json, "name": name, "tags": tags} aion_outputs_job = [RunCreator(**aion_outputs_job_data)] return aion_outputs_job
def get_jobs(self): files = FileRepository.filter(queryset=self.files, metadata={ "requestId": self.request_id, "igocomplete": True }) data = [{ "id": f.file.id, "path": f.file.path, "file_name": f.file.file_name, "metadata": f.metadata } for f in files] inputs = construct_inputs(data, self.request_id) number_of_inputs = len(inputs) return [ RunCreator( **{ "name": "LEGACY FASTQ Merge: %s, %i of %i" % (self.request_id, i + 1, number_of_inputs), "app": self.get_pipeline_id(), "output_metadata": { key: metadata[key] for key in METADATA_OUTPUT_FIELDS if key in metadata }, "inputs": job, "tags": { "requestId": self.request_id, "sampleId": metadata["sampleId"] }, }) for i, (job, metadata) in enumerate(inputs) ]
def get_jobs(self): ports = Port.objects.filter(run_id__in=self.run_ids, port_type=PortType.OUTPUT) data = [{ "id": f.id, "path": f.path, "file_name": f.file_name, "metadata": f.filemetadata_set.first().metadata } for p in ports for f in p.files.all()] request_id = data[0]["metadata"]["requestId"] (sample_inputs, no_of_errors) = construct_sample_inputs(data, request_id, self.job_group_id) if no_of_errors: return number_of_inputs = len(sample_inputs) return [(RunCreator( **{ "name": "ACCESS LEGACY COLLAPSING M1: %s, %i of %i" % (request_id, i + 1, number_of_inputs), "app": self.get_pipeline_id(), "inputs": job, "tags": { "requestId": request_id, "cmoSampleIds": job["add_rg_ID"], "reference_version": "HG19", }, })) for i, job in enumerate(sample_inputs)]
def get_jobs(self): """ From self, retrieve relevant run IDs, build the input JSON for the pipeline, and then submit them as jobs through the RunCreator """ run_ids = self.run_ids input_json = construct_copy_outputs_input(run_ids) mapping_file_content, pairing_file_content, data_clinical_content = generate_sample_pairing_and_mapping_files( run_ids) mapping_file = self.write_to_file("sample_mapping.txt", mapping_file_content) pairing_file = self.write_to_file("sample_pairing.txt", pairing_file_content) data_clinical_file = self.write_to_file("sample_data_clinical.txt", data_clinical_content) input_json["meta"] = [mapping_file, pairing_file, data_clinical_file] number_of_runs = len(run_ids) name = "ARGOS COPY OUTPUTS %s runs [%s,..] " % (number_of_runs, run_ids[0]) app = self.get_pipeline_id() pipeline = Pipeline.objects.get(id=app) pipeline_version = pipeline.version project_prefix = input_json["project_prefix"] output_directory_prefix = get_output_directory_prefix(self.run_ids) tags = {"run_ids": run_ids} copy_outputs_job_data = { "app": app, "inputs": input_json, "name": name, "tags": tags } """ If project_prefix and job_group_id, write output to a directory that uses both Also use argos version number for output instead of pipeline version that's listed in Beagle """ argos_run = Run.objects.get(id=run_ids[0]) argos_pipeline = argos_run.app output_directory = None if project_prefix: tags["project_prefix"] = project_prefix if self.job_group_id: output_prefix = output_directory_prefix if output_directory_prefix else project_prefix jg = JobGroup.objects.get(id=self.job_group_id) jg_created_date = jg.created_date.strftime("%Y%m%d_%H_%M_%f") output_directory = os.path.join(pipeline.output_directory, "argos", output_prefix, argos_pipeline.version, jg_created_date) copy_outputs_job_data["output_directory"] = output_directory copy_outputs_job = [RunCreator(**copy_outputs_job_data)] return copy_outputs_job
def ready_job(self, pipeline, tempo_inputs, job): self._jobs.append(RunCreator(app=pipeline, inputs=job))
def get_jobs(self): files = FileRepository.filter(queryset=self.files, metadata={ "requestId": self.request_id, "igocomplete": True }) argos_jobs = list() cnt_tumors = FileRepository.filter(queryset=self.files, metadata={ "requestId": self.request_id, "tumorOrNormal": "Tumor", "igocomplete": True }).count() if cnt_tumors == 0: cant_do = CantDoEvent(self.job_group_notifier_id).to_dict() send_notification.delay(cant_do) all_normals_event = SetLabelEvent(self.job_group_notifier_id, "all_normals").to_dict() send_notification.delay(all_normals_event) return argos_jobs data = list() for f in files: sample = dict() sample["id"] = f.file.id sample["path"] = f.file.path sample["file_name"] = f.file.file_name sample["metadata"] = f.metadata data.append(sample) files = list() samples = list() # group by igoId igo_id_group = dict() for sample in data: igo_id = sample["metadata"]["sampleId"] if igo_id not in igo_id_group: igo_id_group[igo_id] = list() igo_id_group[igo_id].append(sample) for igo_id in igo_id_group: samples.append(build_sample(igo_id_group[igo_id])) argos_inputs, error_samples = construct_argos_jobs(samples) number_of_inputs = len(argos_inputs) sample_pairing = "" sample_mapping = "" pipeline = self.get_pipeline_id() try: pipeline_obj = Pipeline.objects.get(id=pipeline) except Pipeline.DoesNotExist: pass for i, job in enumerate(argos_inputs): tumor_sample_name = job["pair"][0]["ID"] for p in job["pair"][0]["R1"]: filepath = FileProcessor.parse_path_from_uri(p["location"]) if filepath not in files: sample_mapping += "\t".join([tumor_sample_name, filepath ]) + "\n" files.append(filepath) for p in job["pair"][0]["R2"]: filepath = FileProcessor.parse_path_from_uri(p["location"]) if filepath not in files: sample_mapping += "\t".join([tumor_sample_name, filepath ]) + "\n" files.append(filepath) for p in job["pair"][0]["zR1"]: filepath = FileProcessor.parse_path_from_uri(p["location"]) if filepath not in files: sample_mapping += "\t".join([tumor_sample_name, filepath ]) + "\n" files.append(filepath) for p in job["pair"][0]["zR2"]: filepath = FileProcessor.parse_path_from_uri(p["location"]) if filepath not in files: sample_mapping += "\t".join([tumor_sample_name, filepath ]) + "\n" files.append(filepath) normal_sample_name = job["pair"][1]["ID"] for p in job["pair"][1]["R1"]: filepath = FileProcessor.parse_path_from_uri(p["location"]) if filepath not in files: sample_mapping += "\t".join([normal_sample_name, filepath ]) + "\n" files.append(filepath) for p in job["pair"][1]["R2"]: filepath = FileProcessor.parse_path_from_uri(p["location"]) if filepath not in files: sample_mapping += "\t".join([normal_sample_name, filepath ]) + "\n" files.append(filepath) for p in job["pair"][1]["zR1"]: filepath = FileProcessor.parse_path_from_uri(p["location"]) if filepath not in files: sample_mapping += "\t".join([normal_sample_name, filepath ]) + "\n" files.append(filepath) for p in job["pair"][1]["zR2"]: filepath = FileProcessor.parse_path_from_uri(p["location"]) if filepath not in files: sample_mapping += "\t".join([normal_sample_name, filepath ]) + "\n" files.append(filepath) for p in job["pair"][1]["bam"]: filepath = FileProcessor.parse_path_from_uri(p["location"]) if filepath not in files: sample_mapping += "\t".join([normal_sample_name, filepath ]) + "\n" files.append(filepath) name = "ARGOS %s, %i of %i" % (self.request_id, i + 1, number_of_inputs) assay = job["assay"] pi = job["pi"] pi_email = job["pi_email"] sample_pairing += "\t".join( [normal_sample_name, tumor_sample_name]) + "\n" tags = { "requestId": self.request_id, "sampleNameTumor": tumor_sample_name, "sampleNameNormal": normal_sample_name, "labHeadName": pi, "labHeadEmail": pi_email, } argos_jobs.append( RunCreator(app=pipeline, inputs=job, name=name, tags=tags)) operator_run_summary = UploadAttachmentEvent( self.job_group_notifier_id, "sample_pairing.txt", sample_pairing).to_dict() send_notification.delay(operator_run_summary) mapping_file_event = UploadAttachmentEvent(self.job_group_notifier_id, "sample_mapping.txt", sample_mapping).to_dict() send_notification.delay(mapping_file_event) data_clinical = generate_sample_data_content( files, pipeline_name=pipeline_obj.name, pipeline_github=pipeline_obj.github, pipeline_version=pipeline_obj.version, ) sample_data_clinical_event = UploadAttachmentEvent( self.job_group_notifier_id, "sample_data_clinical.txt", data_clinical).to_dict() send_notification.delay(sample_data_clinical_event) self.evaluate_sample_errors(error_samples) self.summarize_pairing_info(argos_inputs) return argos_jobs
def get_jobs(self): """ From self, retrieve relevant run IDs, build the input JSON for the pipeline, and then submit them as jobs through the RunCreator """ run_ids = self.run_ids input_json = construct_copy_outputs_input(run_ids) mapping_file_content, pairing_file_content, data_clinical_content = generate_sample_pairing_and_mapping_files( run_ids) input_json["meta"] = [ { "class": "File", "basename": "sample_mapping.txt", "contents": mapping_file_content }, { "class": "File", "basename": "sample_pairing.txt", "contents": pairing_file_content }, { "class": "File", "basename": "sample_data_clinical.txt", "contents": data_clinical_content }, ] number_of_runs = len(run_ids) name = "ARGOS COPY OUTPUTS %s runs [%s,..] " % (number_of_runs, run_ids[0]) app = self.get_pipeline_id() pipeline = Pipeline.objects.get(id=app) pipeline_version = pipeline.version project_prefix = input_json["project_prefix"] tags = {"run_ids": run_ids} copy_outputs_job_data = { "app": app, "inputs": input_json, "name": name, "tags": tags } """ If project_prefix and job_group_id, write output to a directory that uses both """ output_directory = None if project_prefix: tags["project_prefix"] = project_prefix if self.job_group_id: jg = JobGroup.objects.get(id=self.job_group_id) jg_created_date = jg.created_date.strftime("%Y%m%d_%H_%M_%f") output_directory = os.path.join(pipeline.output_directory, "argos", project_prefix, pipeline_version, jg_created_date) copy_outputs_job_data["output_directory"] = output_directory copy_outputs_job = [RunCreator(**copy_outputs_job_data)] return copy_outputs_job