Beispiel #1
0
 def _file_object_from_path(file_path):
     relative_file_path = self.filesystem_service.relpath(
         file_path,
         self.filesystem_service.dirname(project.runfolder_path))
     checksum = checksums[relative_file_path] \
         if relative_file_path in checksums else self.metadata_service.hash_file(file_path)
     return RunfolderFile(file_path, file_checksum=checksum)
Beispiel #2
0
 def test_organise_project_file(self):
     organised_project_path = "/bar/project"
     project_file_base = "/foo"
     project_files = [
         RunfolderFile(os.path.join(project_file_base, project_file),
                       file_checksum="checksum-for-{}".format(project_file))
         for project_file in (
             "a-report-file",
             os.path.join("report-dir", "another-report-file"))
     ]
     self.file_system_service.relpath.side_effect = os.path.relpath
     self.file_system_service.dirname.side_effect = os.path.dirname
     for project_file in project_files:
         organised_project_file = self.organise_service.organise_project_file(
             project_file, organised_project_path, project_file_base)
         self.assertEqual(
             os.path.join(
                 organised_project_path,
                 os.path.relpath(project_file.file_path,
                                 project_file_base)),
             organised_project_file.file_path)
         self.assertEqual(project_file.checksum,
                          organised_project_file.checksum)
     self.file_system_service.symlink.assert_has_calls([
         mock.call(os.path.join("..", "..", "foo", "a-report-file"),
                   os.path.join(organised_project_path, "a-report-file")),
         mock.call(
             os.path.join("..", "..", "..", "foo", "report-dir",
                          "another-report-file"),
             os.path.join(organised_project_path, "report-dir",
                          "another-report-file"))
     ])
    def dump_project_samplesheet(self, runfolder, project):
        """
        Parses the SampleSheet from the supplied runfolder and extracts the rows in the [Data] section relevant to
        the samples in the supplied project. The extracted data are written to a samplesheet file under the project
        path. Rows not belonging to the project are masked by hashing. The reason for this is to keep the numbering
        of fastq files untouched, i.e. the "_S1_", "_S2_" parts of the fastq file name should still refer to the correct
        entry in the samplesheet.

        :param runfolder: an instance of Runfolder
        :param project: an instance of Project
        :return: a RunfolderFile object representing the written samplesheet file
        """
        def _samplesheet_entry_in_project(e):
            """
            Checks if a samplesheet row matches the project w.r.t.:
                * project name
                * sample id in project
                * lane in project samples
            """
            return self.project_repository.is_sample_in_project(
                project,
                e.get("Sample_Project"),
                e.get("Sample_ID"),
                # e.g. MiSeq SampleSheets may not have the Lane column, so assume 1 if missing
                int(e.get("Lane", "1")))

        def _mask_samplesheet_entry(e):
            """
            Masks samplesheet entries not belonging to the project by taking the MD5 hash of each field's contents.
            It will leave the "Lane" field unmasked, as well as any empty fields.

            :param e: the samplesheet entry as a dict
            :return: an OrderedDict where fields have been masked by hashing if the entry does not belong to the project
            """
            masked_entry = OrderedDict()
            leave_entry_unmasked = _samplesheet_entry_in_project(e)
            for key, val in e.items():
                if leave_entry_unmasked or key == "Lane" or len(val) == 0:
                    masked_entry[key] = val
                else:
                    masked_entry[key] = self.metadata_service.hash_string(val)
            return masked_entry

        samplesheet_data = self.get_samplesheet(runfolder)
        # mask all entries not belonging to the project and write the resulting data to the project-specific location
        project_samplesheet_data = list(map(_mask_samplesheet_entry, samplesheet_data))
        project_samplesheet_file = os.path.join(project.path, runfolder.name, self.SAMPLESHEET_PATH)
        self.metadata_service.write_samplesheet_file(project_samplesheet_file, project_samplesheet_data)
        return RunfolderFile(
            project_samplesheet_file,
            file_checksum=self.metadata_service.hash_file(
                project_samplesheet_file))
def project_report_files(project, multiqc_report=True):
    if multiqc_report:
        report_dir = project.path
        report_files = [os.path.join(report_dir, "{}_multiqc_report.html".format(project.name)),
                        os.path.join(report_dir, "{}_multiqc_report_data.zip".format(project.name))]
    else:
        report_dir = os.path.join(project.runfolder_path, "Summary", project.name)
        report_files = list(map(lambda f: os.path.join(report_dir, "report.{}".format(f)), ["html", "xml", "xsl"]))
        report_files.append(os.path.join(report_dir, "Plots", "file_in_plots.png"))
        report_files.append(os.path.join(report_dir, "Plots", "subdir", "file_in_plots_subdir"))
    return [
        RunfolderFile(report_file, file_checksum="checksum-for-{}".format(report_file))
        for report_file in report_files
    ]
Beispiel #5
0
    def organise_project_file(self,
                              project_file,
                              organised_project_path,
                              project_file_base=None):
        """
        Find and symlink the project report to the organised project directory.

        :param project: a Project instance representing the project before organisation
        :param organised_project: a Project instance representing the project after organisation
        """
        project_file_base = project_file_base or self.file_system_service.dirname(
            project_file.file_path)

        # the full path to the symlink
        link_name = os.path.join(
            organised_project_path,
            self.file_system_service.relpath(project_file.file_path,
                                             project_file_base))
        # the relative path from the symlink to the original file
        link_path = self.file_system_service.relpath(
            project_file.file_path,
            self.file_system_service.dirname(link_name))
        self.file_system_service.symlink(link_path, link_name)
        return RunfolderFile(link_name, file_checksum=project_file.checksum)