Ejemplo n.º 1
0
    def _covert_to_cwl_format(val):
        file_obj = copy.deepcopy(val)
        location = file_obj.pop('location', None)
        if location:
            try:
                file_db_object = FileProcessor.get_file_obj(location)
            except FileHelperException as e:
                raise PortProcessorException('File %s not found' % location)
            path = file_db_object.path
            path_obj = Path(path)
            checksum = FileProcessor.get_file_checksum(file_db_object)
            if checksum:
                file_obj['checksum'] = checksum
            size = FileProcessor.get_file_size(file_db_object)
            if size:
                file_obj['size'] = size
            file_obj['basename'] = path_obj.name
            file_obj['nameext'] = path_obj.suffix
            file_obj['nameroot'] = path_obj.stem
            file_obj['path'] = path
        secondary_files = file_obj.pop('secondaryFiles', [])
        secondary_files_value = PortProcessor.process_files(
            secondary_files, PortAction.CONVERT_TO_CWL_FORMAT)
        if secondary_files_value:
            file_obj['secondaryFiles'] = secondary_files_value

        return file_obj
Ejemplo n.º 2
0
 def _register_file(val, size, group_id, metadata, file_list):
     file_obj = copy.deepcopy(val)
     file_obj.pop("basename", None)
     file_obj.pop("nameroot", None)
     file_obj.pop("nameext", None)
     uri = file_obj.pop('location', None)
     checksum = file_obj.pop("checksum", None)
     try:
         file_obj_db = FileProcessor.create_file_obj(uri, size, checksum, group_id, metadata)
     except FileConflictException as e:
         logger.warning(str(e))
         file_obj_db = FileProcessor.get_file_obj(uri)
         # TODO: Check what to do in case file already exist in DB. Note: This should never happen
         # raise PortProcessorException(e)
     secondary_files = file_obj.pop('secondaryFiles', [])
     secondary_file_list = []
     secondary_files_obj = PortProcessor.process_files(secondary_files,
                                                       PortAction.REGISTER_OUTPUT_FILES,
                                                       group_id=group_id,
                                                       metadata=metadata,
                                                       file_list=secondary_file_list)
     if secondary_files_obj:
         file_obj['secondaryFiles'] = secondary_files_obj
     file_obj['location'] = FileProcessor.get_bid_from_file(file_obj_db)
     if file_list is not None:
         file_list.append('bid://%s' % FileProcessor.get_bid_from_file(file_obj_db))
         file_list.extend([f['location'] for f in secondary_files_obj])
     return file_obj
Ejemplo n.º 3
0
 def to_db(self):
     if self.port_object:
         self.port_object.name = self.name
         self.port_object.port_type = self.port_type
         self.port_object.schema = self.schema
         self.port_object.secondary_files = self.secondary_files
         self.port_object.db_value = self.db_value
         self.port_object.value = self.value
         self.port_object.save()
         self.port_object.files.set(
             [FileProcessor.get_file_obj(v) for v in self.files])
         self.port_object.notify = self.notify
         self.port_object.save()
     else:
         try:
             run_object = Run.objects.get(id=self.run_id)
         except Run.DoesNotExist:
             raise PortObjectConstructException(
                 "Port save failed. Run with id: %s doesn't exist.")
         new_port = Port(
             run=run_object,
             name=self.name,
             port_type=self.port_type,
             schema=self.schema,
             secondary_files=self.secondary_files,
             db_value=self.db_value,
             value=self.value,
             notify=self.name in run_object.notify_for_outputs,
         )
         new_port.save()
         new_port.files.set(
             [FileProcessor.get_file_obj(v) for v in self.files])
         new_port.save()
         self.port_object = new_port
def get_files_from_run(r):
    files = list()
    inp_port = Port.objects.filter(run_id=r.id, name='pair').first()
    for p in inp_port.db_value[0]['R1']:
        files.append(FileProcessor.get_file_path(p['location']))
    for p in inp_port.db_value[0]['R2']:
        files.append(FileProcessor.get_file_path(p['location']))
    for p in inp_port.db_value[0]['zR1']:
        files.append(FileProcessor.get_file_path(p['location']))
    for p in inp_port.db_value[0]['zR2']:
        files.append(FileProcessor.get_file_path(p['location']))
    return files
Ejemplo n.º 5
0
def get_files_from_run(r):
    files = list()
    inp_port = Port.objects.filter(run_id=r.id, name="pair").first()
    for p in inp_port.db_value[0]["R1"]:
        files.append(FileProcessor.get_file_path(p["location"]))
    for p in inp_port.db_value[0]["R2"]:
        files.append(FileProcessor.get_file_path(p["location"]))
    for p in inp_port.db_value[0]["zR1"]:
        files.append(FileProcessor.get_file_path(p["location"]))
    for p in inp_port.db_value[0]["zR2"]:
        files.append(FileProcessor.get_file_path(p["location"]))
    return files
Ejemplo n.º 6
0
 def _fix_locations_in_db(val, file_list):
     """
     Temporary method for fixing Values in DB
     :param val:
     :param file_list:
     :return:
     """
     file_obj = copy.deepcopy(val)
     location = val.get('location')
     if not location:
         location = val.get('path')
     if not location:
         print("Couldn't fix value: %s. File doesn't exist" % file_obj)
         return file_obj
     if location.startswith('/'):
         location = 'juno://%s' % location
     elif PortProcessor.is_uuid(location):
         location = 'bid://%s' % location
     elif not location.startswith('juno://') and not location.startswith(
             'bid:/'):
         print("Couldn't fix value: %s" % file_obj)
         return file_obj
     try:
         bid = FileProcessor.get_file_id(location)
     except FileHelperException as e:
         print("Couldn't fix value: %s. File doesn't exist" % file_obj)
         return file_obj
     file_obj['location'] = 'bid://%s' % bid
     if file_obj.get('path'):
         file_obj.pop('path')
     if file_list is not None:
         file_list.append('bid://%s' % bid)
     return file_obj
Ejemplo n.º 7
0
 def test_create_file_obj_bad_file_group(self):
     file_group_id = str(uuid.uuid4())
     with self.assertRaises(Exception) as context:
         file_obj = FileProcessor.create_file_obj(
             "file:///path/to/file.unknown_data_type", 123345, "sha1$calculated checksum", file_group_id, {}
         )
         self.assertTrue("Invalid FileGroup id: %s" % file_group_id in context.exception)
Ejemplo n.º 8
0
 def _send_as_notification(val, job_group):
     uri = val.get('location')
     path = FileProcessor.parse_path_from_uri(uri)
     file_name = os.path.basename(path)
     if job_group:
         event = UploadAttachmentEvent(str(job_group.id), file_name, path, download=True)
         send_notification.delay(event.to_dict())
     logger.info("Can't upload file:%s. JobGroup not specified", path)
     return val
Ejemplo n.º 9
0
 def ready(self):
     [CWLPortObject.ready(p) for p in self.inputs]
     samples = set()
     for p in self.inputs:
         for f in p.files:
             file_obj = FileProcessor.get_file_obj(f)
             if file_obj.sample:
                 samples.add(file_obj.sample)
     self.samples = list(samples)
     [CWLPortObject.ready(p) for p in self.outputs]
     self.status = RunStatus.READY
Ejemplo n.º 10
0
    def _register_file(val, size, group_id, metadata, file_list):
        file_obj = copy.deepcopy(val)
        file_obj.pop("basename", None)
        file_obj.pop("nameroot", None)
        file_obj.pop("nameext", None)
        uri = file_obj.pop("location", None)
        checksum = file_obj.pop("checksum", None)
        try:
            file_obj_db = FileProcessor.create_file_obj(uri, size, checksum, group_id, metadata)
        except FileConflictException as e:
            logger.warning(str(e))
            # TODO: Check what to do in case file already exist in DB.
            file_obj_db = FileProcessor.get_file_obj(uri)
            FileProcessor.update_file(file_obj_db, file_obj_db.path, metadata)

        secondary_files = file_obj.pop("secondaryFiles", [])
        secondary_file_list = []
        secondary_files_obj = PortProcessor.process_files(
            secondary_files,
            PortAction.REGISTER_OUTPUT_FILES,
            group_id=group_id,
            metadata=metadata,
            file_list=secondary_file_list,
        )
        if secondary_files_obj:
            file_obj["secondaryFiles"] = secondary_files_obj
        file_obj["location"] = FileProcessor.get_bid_from_file(file_obj_db)
        if file_list is not None:
            file_list.append("bid://%s" % FileProcessor.get_bid_from_file(file_obj_db))
            file_list.extend([f["location"] for f in secondary_files_obj])
        return file_obj
Ejemplo n.º 11
0
def populate_run_samples(apps, _):
    Run = apps.get_model('runner', 'Run')
    for run in Run.objects.all():
        samples = set()
        try:
            run_obj = RunObject.from_db(run.id)
        except Exception as e:
            print("Run %s can't be migrated" % str(run.id))
        for p in run_obj.inputs:
            for f in p.files:
                file_obj = FileProcessor.get_file_obj(f)
                if file_obj.sample:
                    samples.add(file_obj.sample)
        run_obj.samples = list(samples)
        run_obj.to_db()
Ejemplo n.º 12
0
 def from_db(cls, port_id):
     try:
         port = Port.objects.get(id=port_id)
     except Port.DoesNotExist:
         raise PortObjectConstructException('Port with id:')
     return cls(str(port.run.id),
                port.name,
                port.port_type,
                port.schema,
                port.secondary_files,
                port.db_value,
                port.value,
                [FileProcessor.get_bid_from_file(f) for f in port.files.all()],
                port_id=port_id,
                notify=port.notify)
Ejemplo n.º 13
0
 def _convert_to_path(val):
     file_obj = copy.deepcopy(val)
     location = file_obj.pop("location", None)
     if not location and val.get("contents"):
         logger.debug("Processing file literal %s", str(val))
         return val
     try:
         path = FileProcessor.get_file_path(location)
     except FileHelperException as e:
         raise PortProcessorException("File %s not found" % location)
     secondary_files = file_obj.pop("secondaryFiles", [])
     secondary_files_value = PortProcessor.process_files(secondary_files, PortAction.CONVERT_TO_PATH)
     if secondary_files_value:
         file_obj["secondaryFiles"] = secondary_files_value
     file_obj["path"] = path
     return file_obj
Ejemplo n.º 14
0
 def _update_location_to_bid(val, file_list):
     file_obj = copy.deepcopy(val)
     location = val.get('location')
     if not location and val.get('contents'):
         logger.debug("Processing file literal %s", str(val))
         return val
     bid = FileProcessor.get_file_id(location)
     file_obj['location'] = 'bid://%s' % bid
     secondary_files = file_obj.pop('secondaryFiles', [])
     secondary_file_list = []
     secondary_files_obj = PortProcessor.process_files(secondary_files,
                                                       PortAction.CONVERT_TO_BID,
                                                       file_list=secondary_file_list)
     if secondary_files_obj:
         file_obj['secondaryFiles'] = secondary_files_obj
     if file_obj.get('path'):
         file_obj.pop('path')
     if file_list is not None:
         file_list.append('bid://%s' % bid)
         file_list.extend([f['location'] for f in secondary_files_obj])
     return file_obj
Ejemplo n.º 15
0
 def test_run_complete_job(
     self, mock_populate_job_group_notifier, mock_get_pipeline, memcache_task_lock, send_notification
 ):
     with open("runner/tests/run/pair-workflow.cwl", "r") as f:
         app = json.load(f)
     with open("runner/tests/run/inputs.json", "r") as f:
         inputs = json.load(f)
     mock_populate_job_group_notifier.return_value = None
     mock_get_pipeline.return_value = app
     memcache_task_lock.return_value = True
     send_notification.return_value = False
     run = RunObjectFactory.from_definition(str(self.run.id), inputs)
     run.to_db()
     operator_run = OperatorRun.objects.first()
     operator_run.runs.add(run.run_obj)
     num_completed_runs = operator_run.num_completed_runs
     complete_job(run.run_id, self.outputs)
     operator_run.refresh_from_db()
     self.assertEqual(operator_run.num_completed_runs, num_completed_runs + 1)
     run_obj = RunObjectFactory.from_db(run.run_id)
     file_obj = File.objects.filter(path=self.outputs["maf"]["location"].replace("file://", "")).first()
     run_obj.to_db()
     for out in run_obj.outputs:
         if out.name == "maf":
             self.assertEqual(out.value["location"], self.outputs["maf"]["location"])
             self.assertEqual(FileProcessor.get_bid_from_file(file_obj), out.db_value["location"])
     port = Port.objects.filter(run_id=run_obj.run_id, name="bams").first()
     self.assertEqual(len(port.files.all()), 4)
     expected_result = (
         "/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_1.rg.md.abra.printreads.bam",
         "/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_1.rg.md.abra.printreads.bai",
         "/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_2.rg.md.abra.printreads.bam",
         "/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_2.rg.md.abra.printreads.bai",
     )
     self.assertTrue(port.files.all()[0].path in expected_result)
     self.assertTrue(port.files.all()[1].path in expected_result)
     self.assertTrue(port.files.all()[2].path in expected_result)
     self.assertTrue(port.files.all()[3].path in expected_result)
Ejemplo n.º 16
0
 def test_run_complete_job(self, mock_get_pipeline):
     with open('runner/tests/run/pair-workflow.cwl', 'r') as f:
         app = json.load(f)
     with open('runner/tests/run/inputs.json', 'r') as f:
         inputs = json.load(f)
     mock_get_pipeline.return_value = app
     run = RunObject.from_cwl_definition(str(self.run.id), inputs)
     run.to_db()
     operator_run = OperatorRun.objects.first()
     operator_run.runs.add(run.run_obj)
     num_completed_runs = operator_run.num_completed_runs
     complete_job(run.run_id, self.outputs)
     operator_run.refresh_from_db()
     self.assertEqual(operator_run.num_completed_runs,
                      num_completed_runs + 1)
     run_obj = RunObject.from_db(run.run_id)
     file_obj = File.objects.filter(path=self.outputs['maf']['location'].
                                    replace('file://', '')).first()
     run_obj.to_db()
     for out in run_obj.outputs:
         if out.name == 'maf':
             self.assertEqual(out.value['location'],
                              self.outputs['maf']['location'])
             self.assertEqual(FileProcessor.get_bid_from_file(file_obj),
                              out.db_value['location'])
     port = Port.objects.filter(run_id=run_obj.run_id, name='bams').first()
     self.assertEqual(len(port.files.all()), 4)
     expected_result = (
         '/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_1.rg.md.abra.printreads.bam',
         '/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_1.rg.md.abra.printreads.bai',
         '/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_2.rg.md.abra.printreads.bam',
         '/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_2.rg.md.abra.printreads.bai'
     )
     self.assertTrue(port.files.all()[0].path in expected_result)
     self.assertTrue(port.files.all()[1].path in expected_result)
     self.assertTrue(port.files.all()[2].path in expected_result)
     self.assertTrue(port.files.all()[3].path in expected_result)
Ejemplo n.º 17
0
    def get_jobs(self):
        files = FileRepository.filter(queryset=self.files,
                                      metadata={
                                          "requestId": self.request_id,
                                          "igocomplete": True
                                      })
        argos_jobs = list()

        cnt_tumors = FileRepository.filter(queryset=self.files,
                                           metadata={
                                               "requestId": self.request_id,
                                               "tumorOrNormal": "Tumor",
                                               "igocomplete": True
                                           }).count()
        if cnt_tumors == 0:
            cant_do = CantDoEvent(self.job_group_notifier_id).to_dict()
            send_notification.delay(cant_do)
            all_normals_event = SetLabelEvent(self.job_group_notifier_id,
                                              "all_normals").to_dict()
            send_notification.delay(all_normals_event)
            return argos_jobs

        data = list()
        for f in files:
            sample = dict()
            sample["id"] = f.file.id
            sample["path"] = f.file.path
            sample["file_name"] = f.file.file_name
            sample["metadata"] = f.metadata
            data.append(sample)

        files = list()
        samples = list()
        # group by igoId
        igo_id_group = dict()
        for sample in data:
            igo_id = sample["metadata"]["sampleId"]
            if igo_id not in igo_id_group:
                igo_id_group[igo_id] = list()
            igo_id_group[igo_id].append(sample)

        for igo_id in igo_id_group:
            samples.append(build_sample(igo_id_group[igo_id]))

        argos_inputs, error_samples = construct_argos_jobs(samples)
        number_of_inputs = len(argos_inputs)

        sample_pairing = ""
        sample_mapping = ""
        pipeline = self.get_pipeline_id()

        try:
            pipeline_obj = Pipeline.objects.get(id=pipeline)
        except Pipeline.DoesNotExist:
            pass

        for i, job in enumerate(argos_inputs):
            tumor_sample_name = job["pair"][0]["ID"]
            for p in job["pair"][0]["R1"]:
                filepath = FileProcessor.parse_path_from_uri(p["location"])
                if filepath not in files:
                    sample_mapping += "\t".join([tumor_sample_name, filepath
                                                 ]) + "\n"
                    files.append(filepath)
            for p in job["pair"][0]["R2"]:
                filepath = FileProcessor.parse_path_from_uri(p["location"])
                if filepath not in files:
                    sample_mapping += "\t".join([tumor_sample_name, filepath
                                                 ]) + "\n"
                    files.append(filepath)
            for p in job["pair"][0]["zR1"]:
                filepath = FileProcessor.parse_path_from_uri(p["location"])
                if filepath not in files:
                    sample_mapping += "\t".join([tumor_sample_name, filepath
                                                 ]) + "\n"
                    files.append(filepath)
            for p in job["pair"][0]["zR2"]:
                filepath = FileProcessor.parse_path_from_uri(p["location"])
                if filepath not in files:
                    sample_mapping += "\t".join([tumor_sample_name, filepath
                                                 ]) + "\n"
                    files.append(filepath)

            normal_sample_name = job["pair"][1]["ID"]
            for p in job["pair"][1]["R1"]:
                filepath = FileProcessor.parse_path_from_uri(p["location"])
                if filepath not in files:
                    sample_mapping += "\t".join([normal_sample_name, filepath
                                                 ]) + "\n"
                    files.append(filepath)
            for p in job["pair"][1]["R2"]:
                filepath = FileProcessor.parse_path_from_uri(p["location"])
                if filepath not in files:
                    sample_mapping += "\t".join([normal_sample_name, filepath
                                                 ]) + "\n"
                    files.append(filepath)
            for p in job["pair"][1]["zR1"]:
                filepath = FileProcessor.parse_path_from_uri(p["location"])
                if filepath not in files:
                    sample_mapping += "\t".join([normal_sample_name, filepath
                                                 ]) + "\n"
                    files.append(filepath)
            for p in job["pair"][1]["zR2"]:
                filepath = FileProcessor.parse_path_from_uri(p["location"])
                if filepath not in files:
                    sample_mapping += "\t".join([normal_sample_name, filepath
                                                 ]) + "\n"
                    files.append(filepath)

            for p in job["pair"][1]["bam"]:
                filepath = FileProcessor.parse_path_from_uri(p["location"])
                if filepath not in files:
                    sample_mapping += "\t".join([normal_sample_name, filepath
                                                 ]) + "\n"
                    files.append(filepath)

            name = "ARGOS %s, %i of %i" % (self.request_id, i + 1,
                                           number_of_inputs)
            assay = job["assay"]
            pi = job["pi"]
            pi_email = job["pi_email"]

            sample_pairing += "\t".join(
                [normal_sample_name, tumor_sample_name]) + "\n"

            tags = {
                "requestId": self.request_id,
                "sampleNameTumor": tumor_sample_name,
                "sampleNameNormal": normal_sample_name,
                "labHeadName": pi,
                "labHeadEmail": pi_email,
            }
            argos_jobs.append(
                RunCreator(app=pipeline, inputs=job, name=name, tags=tags))

        operator_run_summary = UploadAttachmentEvent(
            self.job_group_notifier_id, "sample_pairing.txt",
            sample_pairing).to_dict()
        send_notification.delay(operator_run_summary)

        mapping_file_event = UploadAttachmentEvent(self.job_group_notifier_id,
                                                   "sample_mapping.txt",
                                                   sample_mapping).to_dict()
        send_notification.delay(mapping_file_event)

        data_clinical = generate_sample_data_content(
            files,
            pipeline_name=pipeline_obj.name,
            pipeline_github=pipeline_obj.github,
            pipeline_version=pipeline_obj.version,
        )
        sample_data_clinical_event = UploadAttachmentEvent(
            self.job_group_notifier_id, "sample_data_clinical.txt",
            data_clinical).to_dict()
        send_notification.delay(sample_data_clinical_event)

        self.evaluate_sample_errors(error_samples)
        self.summarize_pairing_info(argos_inputs)

        return argos_jobs
Ejemplo n.º 18
0
    def get_jobs(self):

        argos_jobs = list()

        if self.request_id:
            files = FileRepository.filter(queryset=self.files,
                                          metadata={
                                              'requestId': self.request_id,
                                              'igocomplete': True
                                          },
                                          filter_redact=True)

            cnt_tumors = FileRepository.filter(queryset=self.files,
                                               metadata={
                                                   'requestId':
                                                   self.request_id,
                                                   'tumorOrNormal': 'Tumor',
                                                   'igocomplete': True
                                               },
                                               filter_redact=True).count()
        elif self.pairing:
            files, cnt_tumors = self.get_files_for_pairs()

        if cnt_tumors == 0:
            cant_do = CantDoEvent(self.job_group_notifier_id).to_dict()
            send_notification.delay(cant_do)
            all_normals_event = SetLabelEvent(self.job_group_notifier_id,
                                              'all_normals').to_dict()
            send_notification.delay(all_normals_event)
            return argos_jobs

        data = list()
        for f in files:
            sample = dict()
            sample['id'] = f.file.id
            sample['path'] = f.file.path
            sample['file_name'] = f.file.file_name
            sample['metadata'] = f.metadata
            data.append(sample)

        files = list()
        samples = list()
        # group by igoId
        igo_id_group = dict()
        for sample in data:
            igo_id = sample['metadata']['sampleId']
            if igo_id not in igo_id_group:
                igo_id_group[igo_id] = list()
            igo_id_group[igo_id].append(sample)

        for igo_id in igo_id_group:
            samples.append(build_sample(igo_id_group[igo_id]))

        argos_inputs, error_samples = construct_argos_jobs(
            samples, self.pairing)
        number_of_inputs = len(argos_inputs)

        sample_pairing = ""
        sample_mapping = ""
        pipeline = self.get_pipeline_id()

        try:
            pipeline_obj = Pipeline.objects.get(id=pipeline)
        except Pipeline.DoesNotExist:
            pass

        check_for_duplicates = list()
        for i, job in enumerate(argos_inputs):
            tumor_sample_name = job['pair'][0]['ID']
            for p in job['pair'][0]['R1']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([tumor_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)
            for p in job['pair'][0]['R2']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([tumor_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)
            for p in job['pair'][0]['zR1']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([tumor_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)
            for p in job['pair'][0]['zR2']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([tumor_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)

            normal_sample_name = job['pair'][1]['ID']
            for p in job['pair'][1]['R1']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([normal_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)
            for p in job['pair'][1]['R2']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([normal_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    sample_mapping += "\t".join([normal_sample_name, filepath
                                                 ]) + "\n"
                    files.append(filepath)
            for p in job['pair'][1]['zR1']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([normal_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)
            for p in job['pair'][1]['zR2']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([normal_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)

            for p in job['pair'][1]['bam']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([normal_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)

            name = "ARGOS %s, %i of %i" % (self.request_id, i + 1,
                                           number_of_inputs)
            assay = job['assay']
            pi = job['pi']
            pi_email = job['pi_email']

            sample_pairing += "\t".join(
                [normal_sample_name, tumor_sample_name]) + "\n"

            argos_jobs.append((APIRunCreateSerializer(
                data={
                    'app': pipeline,
                    'inputs': argos_inputs,
                    'name': name,
                    'tags': {
                        'requestId': self.request_id,
                        'sampleNameTumor': tumor_sample_name,
                        'sampleNameNormal': normal_sample_name,
                        'labHeadName': pi,
                        'labHeadEmail': pi_email
                    }
                }), job))

        operator_run_summary = UploadAttachmentEvent(
            self.job_group_notifier_id, 'sample_pairing.txt',
            sample_pairing).to_dict()
        send_notification.delay(operator_run_summary)

        mapping_file_event = UploadAttachmentEvent(self.job_group_notifier_id,
                                                   'sample_mapping.txt',
                                                   sample_mapping).to_dict()
        send_notification.delay(mapping_file_event)

        data_clinical = generate_sample_data_content(
            files,
            pipeline_name=pipeline_obj.name,
            pipeline_github=pipeline_obj.github,
            pipeline_version=pipeline_obj.version)
        sample_data_clinical_event = UploadAttachmentEvent(
            self.job_group_notifier_id, 'sample_data_clinical.txt',
            data_clinical).to_dict()
        send_notification.delay(sample_data_clinical_event)

        self.evaluate_sample_errors(error_samples)
        self.summarize_pairing_info(argos_inputs)

        return argos_jobs
Ejemplo n.º 19
0
def generate_sample_pairing_and_mapping_files(run_ids):

    sample_pairing = ""
    sample_mapping = ""

    runs = Run.objects.filter(id__in=run_ids)

    request_id_set = set()

    files = list()

    if runs:
        pipeline = runs[0].app

    for r in runs:
        request_id_set.add(r.tags['requestId'])
        inp_port = Port.objects.filter(run_id=r.id, name='pair').first()
        tumor_sample_name = inp_port.db_value[0]['ID']
        for p in inp_port.db_value[0]['R1']:
            sample_mapping += "\t".join([
                tumor_sample_name,
                FileProcessor.get_file_path(p['location'])
            ]) + "\n"
            files.append(FileProcessor.get_file_path(p['location']))
        for p in inp_port.db_value[0]['R2']:
            sample_mapping += "\t".join([
                tumor_sample_name,
                FileProcessor.get_file_path(p['location'])
            ]) + "\n"
            files.append(FileProcessor.get_file_path(p['location']))
        for p in inp_port.db_value[0]['zR1']:
            sample_mapping += "\t".join([
                tumor_sample_name,
                FileProcessor.get_file_path(p['location'])
            ]) + "\n"
            files.append(FileProcessor.get_file_path(p['location']))
        for p in inp_port.db_value[0]['zR2']:
            sample_mapping += "\t".join([
                tumor_sample_name,
                FileProcessor.get_file_path(p['location'])
            ]) + "\n"
            files.append(FileProcessor.get_file_path(p['location']))
        normal_sample_name = inp_port.db_value[1]['ID']
        for p in inp_port.db_value[1]['R1']:
            sample_mapping += "\t".join([
                normal_sample_name,
                FileProcessor.get_file_path(p['location'])
            ]) + "\n"
            files.append(FileProcessor.get_file_path(p['location']))
        for p in inp_port.db_value[1]['R2']:
            sample_mapping += "\t".join([
                normal_sample_name,
                FileProcessor.get_file_path(p['location'])
            ]) + "\n"
            files.append(FileProcessor.get_file_path(p['location']))
        for p in inp_port.db_value[1]['zR1']:
            sample_mapping += "\t".join([
                normal_sample_name,
                FileProcessor.get_file_path(p['location'])
            ]) + "\n"
            files.append(FileProcessor.get_file_path(p['location']))
        for p in inp_port.db_value[1]['zR2']:
            sample_mapping += "\t".join([
                normal_sample_name,
                FileProcessor.get_file_path(p['location'])
            ]) + "\n"
            files.append(FileProcessor.get_file_path(p['location']))
        for p in inp_port.db_value[1]['bam']:
            sample_mapping += "\t".join([
                normal_sample_name,
                FileProcessor.get_file_path(p['location'])
            ]) + "\n"
            files.append(FileProcessor.get_file_path(p['location']))

        sample_pairing += "\t".join([normal_sample_name, tumor_sample_name
                                     ]) + "\n"

    if runs:
        data_clinical = generate_sample_data_content(
            files,
            pipeline_name=pipeline.name,
            pipeline_github=pipeline.github,
            pipeline_version=pipeline.version)

    return sample_mapping, sample_pairing, data_clinical
Ejemplo n.º 20
0
 def test_create_file_type_unknown(self):
     file_obj = FileProcessor.create_file_obj(
         'file:///path/to/file.unknown_data_type', 123345,
         'sha1$calculated checksum', str(self.file_group.id), {})
     self.assertEqual(file_obj.file_type, self.file_type_unknown)
Ejemplo n.º 21
0
 def test_create_file_setting_proper_file_type_based_on_extension(self):
     file_obj = FileProcessor.create_file_obj(
         'file:///path/to/file.fastq.gz', 123345,
         'sha1$calculated checksum', str(self.file_group.id), {})
     self.assertEqual(file_obj.file_type, self.file_type_fastq)