Exemple #1
0
 def create(self, request, *args, **kwargs):
     serializer = APIRunCreateSerializer(data=request.data,
                                         context={'request': request})
     if serializer.is_valid():
         run = serializer.save()
         response = RunSerializerFull(run)
         create_run_task.delay(response.data['id'], request.data['inputs'])
         job_group_notifier_id = str(run.job_group_notifier_id)
         self._send_notifications(job_group_notifier_id, run)
         return Response(response.data, status=status.HTTP_201_CREATED)
     return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
Exemple #2
0
    def get_jobs(self):
        files = FileRepository.filter(queryset=self.files,
                                      metadata={'requestId': self.request_id,
                                                'igocomplete': True})
        data = [
            {
                "id": f.file.id,
                "path": f.file.path,
                "file_name": f.file.file_name,
                "metadata": f.metadata
            } for f in files
        ]

        sample_inputs = construct_sample_inputs(data)

        number_of_inputs = len(sample_inputs)

        return [
            (
                APIRunCreateSerializer(
                    data={
                        'name': "ACCESS M1: %s, %i of %i" % (self.request_id, i + 1, number_of_inputs),
                        'app': self.get_pipeline_id(),
                        'inputs': job,
                        'tags': {'requestId': self.request_id}}
                ),
                job
             )

            for i, job in enumerate(sample_inputs)
        ]
    def get_jobs(self, lab_head_email):
        """
        From self, retrieve relevant run IDs, build the input JSON for
        the pipeline, and then submit them as jobs through the
        APIRunCreateSerializer
        """
        run_ids = self.get_helix_filter_run_ids(lab_head_email)
        number_of_runs = len(run_ids)
        name = "AION merging %i runs for lab head email %s" % (number_of_runs,
                                                               lab_head_email)

        app = self.get_pipeline_id()
        pipeline = Pipeline.objects.get(id=app)
        pipeline_version = pipeline.version
        input_json = self.build_input_json(run_ids)
        tags = {
            "study_id": input_json['study_id'],
            "num_runs_merged": len(run_ids)
        }
        print(input_json)

        aion_outputs_job_data = {
            'app': app,
            'inputs': input_json,
            'name': name,
            'tags': tags
        }

        aion_outputs_job = [
            (APIRunCreateSerializer(data=aion_outputs_job_data), input_json)
        ]

        return aion_outputs_job
Exemple #4
0
    def get_jobs(self):
        files = FileRepository.filter(queryset=self.files,
                                      metadata={
                                          'requestId': self.request_id,
                                          'igocomplete': True
                                      })
        data = [{
            "id": f.file.id,
            "path": f.file.path,
            "file_name": f.file.file_name,
            "metadata": f.metadata
        } for f in files]

        inputs = construct_inputs(data, self.request_id)

        number_of_inputs = len(inputs)

        return [(APIRunCreateSerializer(
            data={
                'name':
                "LEGACY FASTQ Merge: %s, %i of %i" %
                (self.request_id, i + 1, number_of_inputs),
                'app':
                self.get_pipeline_id(),
                'output_metadata': {
                    key: metadata[key]
                    for key in METADATA_OUTPUT_FIELDS if key in metadata
                },
                'inputs':
                job,
                'tags': {
                    'requestId': self.request_id,
                    'sampleId': metadata["sampleId"]
                }
            }), job) for i, (job, metadata) in enumerate(inputs)]
    def get_jobs(self):
        files = self.files.filter(filemetadata__metadata__requestId=self.request_id, filemetadata__metadata__igocomplete=True).all()
        tempo_jobs = list()

        data = list()
        for file in files:
            sample = dict()
            sample['id'] = file.id
            sample['path'] = file.path
            sample['file_name'] = file.file_name
            sample['metadata'] = file.filemetadata_set.first().metadata
            data.append(sample)

        samples = list()
        # group by igoId
        igo_id_group = dict()
        for sample in data:
            igo_id = sample['metadata']['sampleId']
            if igo_id not in igo_id_group:
                igo_id_group[igo_id] = list()
            igo_id_group[igo_id].append(sample)

        for igo_id in igo_id_group:
            samples.append(build_sample(igo_id_group[igo_id]))

        tempo_inputs, error_samples = construct_tempo_jobs(samples)
        number_of_inputs = len(tempo_inputs)

        for i, job in enumerate(tempo_inputs):
            name = "FLATBUSH: %s, %i of %i" % (self.request_id, i + 1, number_of_inputs)
            tempo_jobs.append((APIRunCreateSerializer(
                data={'app': self.get_pipeline_id(), 'inputs': tempo_inputs, 'name': name,
                      'tags': {'requestId': self.request_id}}), job))

        return tempo_jobs
Exemple #6
0
    def get_jobs(self):
        """
        Convert job inputs into serialized jobs

        :return: list[(serialized job info, Job)]
        """
        sample_inputs = self.get_sample_inputs()

        return [
            (
                APIRunCreateSerializer(
                    data={
                        'name': "ACCESS LEGACY SNV M1: %s, %i of %i" % (self.request_id, i + 1, len(sample_inputs)),
                        'app': self.get_pipeline_id(),
                        'inputs': job,
                        'tags': {
                            'requestId': self.request_id,
                            'cmoSampleIds': job["tumor_sample_names"],
                            'patientId': '-'.join(job["tumor_sample_names"][0].split('-')[0:2])
                        }
                    }
                ),
                job
             )
            for i, job in enumerate(sample_inputs)
        ]
Exemple #7
0
    def get_jobs(self):
        """
        From self, retrieve relevant run IDs, build the input JSON for
        the pipeline, and then submit them as jobs through the
        APIRunCreateSerializer
        """
        run_ids = self.run_ids
        input_json = construct_copy_outputs_input(run_ids)

        mapping_file_content, pairing_file_content, data_clinical_content = generate_sample_pairing_and_mapping_files(
            run_ids)
        mapping_file = self.write_to_file("sample_mapping.txt", mapping_file_content)
        pairing_file = self.write_to_file("sample_pairing.txt", pairing_file_content)
        data_clinical_file = self.write_to_file("sample_data_clinical.txt", data_clinical_content)

        input_json['meta'] = [
                mapping_file,
                pairing_file,
                data_clinical_file
        ]

        number_of_runs = len(run_ids)
        name = "ARGOS COPY OUTPUTS %s runs [%s,..] " % (
            number_of_runs, run_ids[0])

        app = self.get_pipeline_id()
        pipeline = Pipeline.objects.get(id=app)
        pipeline_version = pipeline.version
        project_prefix = input_json['project_prefix']

        tags = {"run_ids": run_ids}

        copy_outputs_job_data = {
            'app': app,
            'inputs': input_json,
            'name': name,
            'tags': tags
        }

        """
        If project_prefix and job_group_id, write output to a directory
        that uses both
        """
        output_directory = None
        if project_prefix:
            tags["project_prefix"] = project_prefix
            if self.job_group_id:
                jg = JobGroup.objects.get(id=self.job_group_id)
                jg_created_date = jg.created_date.strftime("%Y%m%d_%H_%M_%f")
                output_directory = os.path.join(pipeline.output_directory,
                                                "argos",
                                                project_prefix,
                                                pipeline_version,
                                                jg_created_date)
            copy_outputs_job_data['output_directory'] = output_directory
        copy_outputs_job = [(APIRunCreateSerializer(
            data=copy_outputs_job_data), input_json)]

        return copy_outputs_job
Exemple #8
0
    def get_jobs(self):
        """
        From self, retrieve relevant run IDs, build the input JSON for
        the pipeline, and then submit them as jobs through the
        APIRunCreateSerializer
        """
        argos_run_ids = self.run_ids
        input_json = construct_helix_filters_input(argos_run_ids)
        number_of_runs = len(argos_run_ids)
        name = "HELIX FILTERS OUTPUTS %s runs [%s,..] " % (number_of_runs,
                                                           argos_run_ids[0])

        app = self.get_pipeline_id()
        pipeline = Pipeline.objects.get(id=app)
        pipeline_version = pipeline.version
        project_prefix = input_json['project_prefix']
        input_json['helix_filter_version'] = pipeline_version
        input_json = self.add_output_file_names(input_json, pipeline_version)
        tags = {
            "project_prefix": project_prefix,
            "argos_run_ids": argos_run_ids
        }

        #TODO:  Remove purity facets seg files from facets_hisens_seg_files
        input_json['facets_hisens_seg_files'] = self.remove_purity_files(
            input_json['facets_hisens_seg_files'])

        helix_filters_outputs_job_data = {
            'app': app,
            'inputs': input_json,
            'name': name,
            'tags': tags
        }
        """
        If project_prefix and job_group_id, write output to a directory
        that uses both

        Going by argos pipeline version id, assuming all runs use the same argos version
        """
        argos_run = Run.objects.get(id=argos_run_ids[0])
        argos_pipeline = argos_run.app

        output_directory = None
        if project_prefix:
            tags["project_prefix"] = project_prefix
            if self.job_group_id:
                jg = JobGroup.objects.get(id=self.job_group_id)
                jg_created_date = jg.created_date.strftime("%Y%m%d_%H_%M_%f")
                output_directory = os.path.join(pipeline.output_directory,
                                                "argos", project_prefix,
                                                argos_pipeline.version,
                                                jg_created_date)
            helix_filters_outputs_job_data[
                'output_directory'] = output_directory
        helix_filters_outputs_job = [
            (APIRunCreateSerializer(data=helix_filters_outputs_job_data),
             input_json)
        ]
        return helix_filters_outputs_job
Exemple #9
0
 def post(self, request):
     run_id = request.data.get('run')
     run = RunObject.from_db(run_id)
     inputs = dict()
     for port in run.inputs:
         inputs[port.name] = port.db_value
     data = dict(app=str(run.run_obj.app.id),
                 inputs=inputs,
                 tags=run.tags,
                 job_group_id=run.job_group.id,
                 job_group_notifier_id=run.job_group_notifier.id,
                 resume=run_id)
     serializer = APIRunCreateSerializer(data=data,
                                         context={'request': request})
     if serializer.is_valid():
         new_run = serializer.save()
         response = RunSerializerFull(new_run)
         create_run_task.delay(response.data['id'], data['inputs'])
         job_group_notifier_id = str(new_run.job_group_notifier_id)
         self._send_notifications(job_group_notifier_id, new_run)
         return Response(response.data, status=status.HTTP_201_CREATED)
     return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
Exemple #10
0
 def get_jobs(self):
     """
     Create job entries to pass to Ridgeback
     """
     pipeline_obj = Pipeline.objects.get(id=self.get_pipeline_id())
     inputs = self.create_input()
     name = "DEMO JOB"
     serialized_run = APIRunCreateSerializer(
         data=dict(app=pipeline_obj.id, inputs=inputs, name=name, tags={}))
     job = inputs
     job = (serialized_run, job)
     jobs = [job]
     return (jobs)
Exemple #11
0
    def test_create_jobs_from_operator_pipeline_deleted(self, get_pipeline_id, get_jobs, send_notification, create_run_task):
        argos_jobs = list()
        argos_jobs.append((APIRunCreateSerializer(
                data={'app': 'cb5d793b-e650-4b7d-bfcd-882858e29cc5', 'inputs': None, 'name': None, 'tags': {}}), None))
        get_jobs.return_value = argos_jobs
        get_pipeline_id.return_value = None
        create_run_task.return_value = None
        send_notification.return_value = None
        Run.objects.all().delete()

        operator = OperatorFactory.get_by_model(Operator.objects.get(id=1), request_id="bar")
        create_jobs_from_operator(operator, None)
        self.assertEqual(len(Run.objects.all()), 1)
        self.assertEqual(Run.objects.first().status, RunStatus.FAILED)
Exemple #12
0
 def _build_job(self, input_json):
     app = self.get_pipeline_id()
     pipeline = Pipeline.objects.get(id=app)
     pipeline_version = pipeline.version
     sample_name = input_json['sample_names'][0]  # should only be one
     tags = {'sampleNameTumor': sample_name}
     # add tags, name
     output_job_data = {
         'app': app,
         'tags': tags,
         'name': "Sample %s ULTRON PHASE1 run" % sample_name,
         'inputs': input_json
     }
     output_job = (APIRunCreateSerializer(data=output_job_data), input_json)
     return output_job
Exemple #13
0
    def get_jobs(self):
        """
        From self, retrieve relevant run IDs, build the input JSON for
        the pipeline, and then submit them as jobs through the
        APIRunCreateSerializer
        """
        run_ids = self.run_ids
        input_json = construct_argos_qc_input(run_ids)
        number_of_runs = len(run_ids)
        name = "ARGOS QC OUTPUTS %s runs [%s,..] " % (
            number_of_runs, run_ids[0])

        app = self.get_pipeline_id()
        pipeline = Pipeline.objects.get(id=app)
        pipeline_version = pipeline.version
        project_prefix = input_json['project_prefix']

        tags = {"tumor_sample_names": input_json['tumor_sample_names'],
                "normal_sample_names": input_json['normal_sample_names']}

        argos_qc_outputs_job_data = {
            'app': app,
            'inputs': input_json,
            'name': name,
            'notify_for_outputs': ['qc_pdf'],
            'tags': tags}

        """
        If project_prefix and job_group_id, write output to a directory
        that uses both
        """
        output_directory = None
        if project_prefix:
            tags["project_prefix"] = project_prefix
            if self.job_group_id:
                jg = JobGroup.objects.get(id=self.job_group_id)
                jg_created_date = jg.created_date.strftime("%Y%m%d_%H_%M_%f")
                output_directory = os.path.join(pipeline.output_directory,
                                                "argos",
                                                project_prefix,
                                                pipeline_version,
                                                jg_created_date)
            argos_qc_outputs_job_data['output_directory'] = output_directory

        argos_qc_outputs_job = [(APIRunCreateSerializer(
            data=argos_qc_outputs_job_data), input_json)]

        return argos_qc_outputs_job
 def _build_job(self, input_json):
     app = self.get_pipeline_id()
     pipeline = Pipeline.objects.get(id=app)
     pipeline_version = pipeline.version
     request_id = self._get_request_id()
     input_json['output_filename'] = request_id + ".rez.maf"
     tags = {'requestId': request_id}
     # add tags, name
     output_job_data = {
         'app': app,
         'tags': tags,
         'name': "Request ID %s ULTRON PHASE1:CONCAT MAFs run" % request_id,
         'inputs': input_json}
     output_job = (APIRunCreateSerializer(
         data=output_job_data),
         input_json)
     return output_job
Exemple #15
0
    def get_jobs(self):
        files = self.files.filter(
            filemetadata__metadata__requestId=self.request_id,
            filemetadata__metadata__igocomplete=True).all()
        access_jobs = list(
        )  #  [APIRunCreateSerializer(data={'app': self.get_pipeline_id(), 'inputs': inputs})]

        data = list()
        for file in files:
            sample = dict()
            sample['id'] = file.id
            sample['path'] = file.path
            sample['file_name'] = file.file_name
            sample['metadata'] = file.filemetadata_set.first().metadata
            data.append(sample)

        samples = list()
        # group by igoId
        igo_id_group = dict()
        for sample in data:
            igo_id = sample['metadata']['sampleId']
            if igo_id not in igo_id_group:
                igo_id_group[igo_id] = list()
            igo_id_group[igo_id].append(sample)

        for igo_id in igo_id_group:
            samples.append(generate_results(igo_id_group[igo_id]))

        access_inputs = construct_access_jobs(samples)
        number_of_inputs = len(access_inputs)

        for i, job in enumerate(access_inputs):
            name = "ACCESS M1: %s, %i of %i" % (self.request_id, i + 1,
                                                number_of_inputs)
            access_jobs.append((APIRunCreateSerializer(
                data={
                    'name': name,
                    'app': self.get_pipeline_id(),
                    'inputs': access_inputs,
                    'tags': {
                        'requestId': self.request_id
                    }
                }), job))

        return access_jobs  # Not returning anything for some reason for inputs; deal with later
Exemple #16
0
    def get_jobs(self):
        ports = Port.objects.filter(run_id__in=self.run_ids,
                                    port_type=PortType.OUTPUT)

        data = [{
            "id": f.id,
            "path": f.path,
            "file_name": f.file_name,
            "metadata": f.filemetadata_set.first().metadata
        } for p in ports for f in p.files.all()]

        request_id = data[0]["metadata"]["requestId"]
        (sample_inputs,
         no_of_errors) = construct_sample_inputs(data, request_id,
                                                 self.job_group_id)

        if no_of_errors:
            return

        number_of_inputs = len(sample_inputs)

        return [(APIRunCreateSerializer(
            data={
                'name':
                "ACCESS LEGACY COLLAPSING M1: %s, %i of %i" %
                (request_id, i + 1, number_of_inputs),
                'app':
                self.get_pipeline_id(),
                'inputs':
                job,
                'tags': {
                    'requestId': request_id,
                    'cmoSampleIds': job["add_rg_ID"],
                    'reference_version': 'HG19'
                }
            }), job) for i, job in enumerate(sample_inputs)]
Exemple #17
0
    def get_jobs(self):
        files = FileRepository.filter(queryset=self.files,
                                      metadata={
                                          'requestId': self.request_id,
                                          'igocomplete': True
                                      })
        data = [{
            "id": f.file.id,
            "path": f.file.path,
            "file_name": f.file.file_name,
            "metadata": f.metadata
        } for f in files]

        (sample_inputs,
         no_of_errors) = construct_sample_inputs(data, self.request_id,
                                                 self.job_group_id)

        if no_of_errors:
            return

        number_of_inputs = len(sample_inputs)

        return [(APIRunCreateSerializer(
            data={
                'name':
                "ACCESS LEGACY COLLAPSING M1: %s, %i of %i" %
                (self.request_id, i + 1, number_of_inputs),
                'app':
                self.get_pipeline_id(),
                'inputs':
                job,
                'tags': {
                    'requestId': self.request_id,
                    'cmoSampleIds': job["add_rg_ID"]
                }
            }), job) for i, job in enumerate(sample_inputs)]
Exemple #18
0
    def get_jobs(self, pairing_override=None):
        logger.info("Operator JobGroupNotifer ID %s",
                    self.job_group_notifier_id)
        tmpdir = os.path.join(settings.BEAGLE_SHARED_TMPDIR, str(uuid.uuid4()))
        self.OUTPUT_DIR = tmpdir
        Path(self.OUTPUT_DIR).mkdir(parents=True, exist_ok=True)

        recipe_query = self.build_recipe_query()
        assay_query = self.build_assay_query()
        igocomplete_query = Q(metadata__igocomplete=True)
        missing_fields_query = self.filter_out_missing_fields_query()
        q = recipe_query & assay_query & igocomplete_query & missing_fields_query
        files = FileRepository.all()
        tempo_files = FileRepository.filter(queryset=files, q=q)

        self.send_message("""
            Querying database for the following recipes:
                {recipes}

            Querying database for the following assays/bait sets:
                {assays}
            """.format(recipes="\t\n".join(self.get_recipes()),
                       assays="\t\n".join(self.get_assays())))

        exclude_query = self.get_exclusions()
        if exclude_query:
            tempo_files = tempo_files.exclude(exclude_query)
        # replace with run operator logic, most recent pairing
        pre_pairing = self.load_pairing_file(
            PAIRING_FILE_LOCATION)  # pairing.tsv is not in repo
        if pairing_override:
            normal_samples = pairing_override['normal_samples']
            tumor_samples = pairing_override['tumor_samples']
            num_ns = len(normal_samples)
            num_ts = len(tumor_samples)
            if num_ns != num_ts:
                print("Number of tumors and normals not the same; can't pair")
            else:
                for i in range(0, num_ns):
                    tumor_id = tumor_samples[i]
                    normal_id = normal_samples[i]
                    pre_pairing[tumor_id] = normal_id
        patient_ids = set()
        patient_files = dict()
        no_patient_samples = list()
        for entry in tempo_files:
            patient_id = entry.metadata['patientId']
            if patient_id:
                patient_ids.add(patient_id)
                if patient_id not in patient_files:
                    patient_files[patient_id] = list()
                patient_files[patient_id].append(entry)
            else:
                no_patient_samples.append(entry)

        self.patients = dict()
        self.non_cmo_patients = dict()
        for patient_id in patient_files:
            if "C-" in patient_id[:2]:
                self.patients[patient_id] = patient_obj.Patient(
                    patient_id, patient_files[patient_id], pre_pairing)
            else:
                self.non_cmo_patients[patient_id] = patient_obj.Patient(
                    patient_id, patient_files[patient_id])

        input_json = dict()
        # output these strings to file
        input_json['conflict_data'] = self.create_conflict_samples_txt_file()
        input_json['unpaired_data'] = self.create_unpaired_txt_file()
        input_json['mapping_data'] = self.create_mapping_file()
        input_json['pairing_data'] = self.create_pairing_file()
        input_json['tracker_data'] = self.create_tracker_file()

        pickle_file = os.path.join(self.OUTPUT_DIR, "patients_data_pickle")
        fh = open(pickle_file, 'wb')
        pickle.dump(self.patients, fh)
        os.chmod(pickle_file, 0o777)
        self.register_tmp_file(pickle_file)

        input_json['pickle_data'] = {
            'class': 'File',
            'location': "juno://" + pickle_file
        }

        beagle_version = __version__
        run_date = datetime.now().strftime("%Y%m%d_%H:%M:%f")

        tags = {"beagle_version": beagle_version, "run_date": run_date}

        app = self.get_pipeline_id()
        pipeline = Pipeline.objects.get(id=app)
        pipeline_version = pipeline.version
        output_directory = pipeline.output_directory

        self.debug_json = input_json

        tempo_mpgen_outputs_job_data = {
            'app': app,
            'inputs': input_json,
            'name': "Tempo mpgen %s" % run_date,
            'tags': tags,
            'output_directory': output_directory
        }

        tempo_mpgen_outputs_job = [
            (APIRunCreateSerializer(data=tempo_mpgen_outputs_job_data),
             input_json)
        ]
        return tempo_mpgen_outputs_job
Exemple #19
0
 def ready_job(self, pipeline, tempo_inputs, job):
     self._jobs.append((APIRunCreateSerializer(data={
         'app': pipeline,
         'inputs': tempo_inputs
     }), job))
Exemple #20
0
    def get_jobs(self):

        argos_jobs = list()

        if self.request_id:
            files = FileRepository.filter(queryset=self.files,
                                          metadata={
                                              'requestId': self.request_id,
                                              'igocomplete': True
                                          },
                                          filter_redact=True)

            cnt_tumors = FileRepository.filter(queryset=self.files,
                                               metadata={
                                                   'requestId':
                                                   self.request_id,
                                                   'tumorOrNormal': 'Tumor',
                                                   'igocomplete': True
                                               },
                                               filter_redact=True).count()
        elif self.pairing:
            files, cnt_tumors = self.get_files_for_pairs()

        if cnt_tumors == 0:
            cant_do = CantDoEvent(self.job_group_notifier_id).to_dict()
            send_notification.delay(cant_do)
            all_normals_event = SetLabelEvent(self.job_group_notifier_id,
                                              'all_normals').to_dict()
            send_notification.delay(all_normals_event)
            return argos_jobs

        data = list()
        for f in files:
            sample = dict()
            sample['id'] = f.file.id
            sample['path'] = f.file.path
            sample['file_name'] = f.file.file_name
            sample['metadata'] = f.metadata
            data.append(sample)

        files = list()
        samples = list()
        # group by igoId
        igo_id_group = dict()
        for sample in data:
            igo_id = sample['metadata']['sampleId']
            if igo_id not in igo_id_group:
                igo_id_group[igo_id] = list()
            igo_id_group[igo_id].append(sample)

        for igo_id in igo_id_group:
            samples.append(build_sample(igo_id_group[igo_id]))

        argos_inputs, error_samples = construct_argos_jobs(
            samples, self.pairing)
        number_of_inputs = len(argos_inputs)

        sample_pairing = ""
        sample_mapping = ""
        pipeline = self.get_pipeline_id()

        try:
            pipeline_obj = Pipeline.objects.get(id=pipeline)
        except Pipeline.DoesNotExist:
            pass

        check_for_duplicates = list()
        for i, job in enumerate(argos_inputs):
            tumor_sample_name = job['pair'][0]['ID']
            for p in job['pair'][0]['R1']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([tumor_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)
            for p in job['pair'][0]['R2']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([tumor_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)
            for p in job['pair'][0]['zR1']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([tumor_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)
            for p in job['pair'][0]['zR2']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([tumor_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)

            normal_sample_name = job['pair'][1]['ID']
            for p in job['pair'][1]['R1']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([normal_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)
            for p in job['pair'][1]['R2']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([normal_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    sample_mapping += "\t".join([normal_sample_name, filepath
                                                 ]) + "\n"
                    files.append(filepath)
            for p in job['pair'][1]['zR1']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([normal_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)
            for p in job['pair'][1]['zR2']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([normal_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)

            for p in job['pair'][1]['bam']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([normal_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)

            name = "ARGOS %s, %i of %i" % (self.request_id, i + 1,
                                           number_of_inputs)
            assay = job['assay']
            pi = job['pi']
            pi_email = job['pi_email']

            sample_pairing += "\t".join(
                [normal_sample_name, tumor_sample_name]) + "\n"

            argos_jobs.append((APIRunCreateSerializer(
                data={
                    'app': pipeline,
                    'inputs': argos_inputs,
                    'name': name,
                    'tags': {
                        'requestId': self.request_id,
                        'sampleNameTumor': tumor_sample_name,
                        'sampleNameNormal': normal_sample_name,
                        'labHeadName': pi,
                        'labHeadEmail': pi_email
                    }
                }), job))

        operator_run_summary = UploadAttachmentEvent(
            self.job_group_notifier_id, 'sample_pairing.txt',
            sample_pairing).to_dict()
        send_notification.delay(operator_run_summary)

        mapping_file_event = UploadAttachmentEvent(self.job_group_notifier_id,
                                                   'sample_mapping.txt',
                                                   sample_mapping).to_dict()
        send_notification.delay(mapping_file_event)

        data_clinical = generate_sample_data_content(
            files,
            pipeline_name=pipeline_obj.name,
            pipeline_github=pipeline_obj.github,
            pipeline_version=pipeline_obj.version)
        sample_data_clinical_event = UploadAttachmentEvent(
            self.job_group_notifier_id, 'sample_data_clinical.txt',
            data_clinical).to_dict()
        send_notification.delay(sample_data_clinical_event)

        self.evaluate_sample_errors(error_samples)
        self.summarize_pairing_info(argos_inputs)

        return argos_jobs