예제 #1
0
    def test_get_results(self):
        """ Test our ability to collect the appropriate samples. """

        sample = Sample()
        sample.accession_code = 'GSM45588'
        sample.save()

        result = ComputationalResult()
        result.save()

        computed_file1 = ComputedFile()
        computed_file1.filename = "oh_boy.txt"
        computed_file1.result = result
        computed_file1.size_in_bytes = 123
        computed_file1.is_smashable = True
        computed_file1.save()

        computed_file2 = ComputedFile()
        computed_file2.filename = "gee_whiz.bmp"
        computed_file2.result = result
        computed_file2.size_in_bytes = 123
        computed_file2.is_smashable = False
        computed_file2.save()

        assoc = SampleResultAssociation()
        assoc.sample = sample
        assoc.result = result
        assoc.save()

        assoc = SampleComputedFileAssociation()
        assoc.sample = sample
        assoc.computed_file = computed_file1
        assoc.save()

        assoc = SampleComputedFileAssociation()
        assoc.sample = sample
        assoc.computed_file = computed_file2
        assoc.save()

        computed_files = sample.get_result_files()
        self.assertEqual(computed_files.count(), 2)
예제 #2
0
def _run_salmontools(job_context: Dict) -> Dict:
    """ Run Salmontools to extract unmapped genes. """

    logger.debug("Running SalmonTools ...")
    unmapped_filename = job_context[
        "output_directory"] + "aux_info/unmapped_names.txt"

    command_str = "salmontools extract-unmapped -u {unmapped_file} -o {output} "
    output_prefix = job_context["salmontools_directory"] + "unmapped_by_salmon"
    command_str = command_str.format(unmapped_file=unmapped_filename,
                                     output=output_prefix)
    if "input_file_path_2" in job_context:
        command_str += "-1 {input_1} -2 {input_2}"
        command_str = command_str.format(
            input_1=job_context["input_file_path"],
            input_2=job_context["input_file_path_2"])
    else:
        command_str += "-r {input_1}"
        command_str = command_str.format(
            input_1=job_context["input_file_path"])

    start_time = timezone.now()
    logger.debug(
        "Running the following SalmonTools command: %s",
        command_str,
        processor_job=job_context["job_id"],
    )

    completed_command = subprocess.run(command_str.split(),
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE)
    end_time = timezone.now()

    # As of SalmonTools 0.1.0, completed_command.returncode is always 0,
    # (even if error happens).  completed_command.stderr is not totally
    # reliable either, because it will output the following line even
    # when the execution succeeds:
    #  "There were <N> unmapped reads\n"
    # in which "<N>" is the number of lines in input unmapped_names.txt.
    #
    # As a workaround, we are using a regular expression here to test
    # the status of SalmonTools execution.  Any text in stderr that is
    # not in the above format is treated as error message.
    status_str = completed_command.stderr.decode().strip()
    success_pattern = r"^There were \d+ unmapped reads$"
    if re.match(success_pattern, status_str):
        # Zip up the output of salmontools
        try:
            with tarfile.open(job_context["salmontools_archive"],
                              "w:gz") as tar:
                tar.add(job_context["salmontools_directory"], arcname=os.sep)
        except Exception:
            logger.exception(
                "Exception caught while zipping processed directory %s",
                job_context["salmontools_directory"],
                processor_job=job_context["job_id"],
            )
            failure_template = "Exception caught while zipping salmontools directory {}"
            job_context["job"].failure_reason = failure_template.format(
                job_context["salmontools_archive"])
            job_context["success"] = False
            return job_context

        result = ComputationalResult()
        result.commands.append(command_str)
        result.time_start = start_time
        result.time_end = end_time
        result.is_ccdl = True

        try:
            processor_key = "SALMONTOOLS"
            result.processor = utils.find_processor(processor_key)
        except Exception as e:
            return utils.handle_processor_exception(job_context, processor_key,
                                                    e)

        result.save()
        job_context["pipeline"].steps.append(result.id)

        assoc = SampleResultAssociation()
        assoc.sample = job_context["sample"]
        assoc.result = result
        assoc.save()

        computed_file = ComputedFile()
        computed_file.filename = job_context["salmontools_archive"].split(
            "/")[-1]
        computed_file.absolute_file_path = job_context["salmontools_archive"]
        computed_file.calculate_sha1()
        computed_file.calculate_size()
        computed_file.is_public = True
        computed_file.is_smashable = False
        computed_file.is_qc = True
        computed_file.result = result
        computed_file.save()
        job_context["computed_files"].append(computed_file)

        assoc = SampleComputedFileAssociation()
        assoc.sample = job_context["sample"]
        assoc.computed_file = computed_file
        assoc.save()

        job_context["result"] = result
        job_context["success"] = True
    else:  # error in salmontools
        logger.error(
            "Shell call to salmontools failed with error message: %s",
            status_str,
            processor_job=job_context["job_id"],
        )
        job_context["job"].failure_reason = (
            "Shell call to salmontools failed because: " + status_str)
        job_context["success"] = False

    return job_context