Beispiel #1
0
 def test_tempfile_not_run_on_prune(self):
     a = ppg.TempFileGeneratingJob("A", lambda: write("A", "A"))
     b = ppg.FileGeneratingJob("B", lambda: write("B", "B" + read("A")))
     b.depends_on(a)
     b.prune()
     ppg.run_pipegraph()
     assert not Path("B").exists()
     assert not Path("A").exists()
Beispiel #2
0
 def test_tempfile_still_run_if_needed_for_other(self):
     a = ppg.TempFileGeneratingJob("A", lambda: write("A", "A"))
     b = ppg.FileGeneratingJob("B", lambda: write("B", "B" + read("A")))
     c = ppg.FileGeneratingJob("C", lambda: write("C", "C" + read("A")))
     b.depends_on(a)
     c.depends_on(a)
     b.prune()
     ppg.run_pipegraph()
     assert not Path("B").exists()
     assert Path("C").exists()
     assert Path("C").read_text() == "CA"
     assert not Path("A").exists()
def job_download(info):
    """Download the package defined in info"""
    target_fn = f'/anysnake/bioconductor_download/{info["repo"]}/{info["name"]}_{info["version"]}.tar.gz'

    def download():
        p = Path(target_fn)
        p.parent.mkdir(exist_ok=True, parents=False)
        r = requests.get(info["url"], stream=True)
        if r.status_code != 200:
            raise ValueError("Error return on %s %s " % (info["url"], r.status_code))
        with open(str(target_fn) + "_temp", "wb") as op:
            for block in r.iter_content(1024 * 1024):
                op.write(block)
        shutil.move(str(target_fn) + "_temp", str(target_fn))

    job = ppg.TempFileGeneratingJob(target_fn, download)
    job.ignore_code_changes()
    return job
Beispiel #4
0
    def test_accepts(self):
        import pathlib

        write("aaa", "hello")
        write("bbb", "hello")
        write("ccc", "hello")
        a = ppg.FileTimeInvariant(pathlib.Path("aaa"))
        a1 = ppg.MultiFileInvariant([pathlib.Path("bbb"), "ccc"])
        b = ppg.FileGeneratingJob(
            pathlib.Path("b"),
            lambda of: write(of, "bb" + read("aaa") + read("bbb") + read("ccc")),
        )
        b.depends_on(a)
        b.depends_on(a1)

        dd = Dummy()

        def mf():
            write("c", "cc" + read("g"))
            write("d", "dd" + read("h") + dd.attr)
            write("e", "ee" + read("i") + read("j"))

        c = ppg.MultiFileGeneratingJob([pathlib.Path("c"), "d", pathlib.Path("e")], mf)
        c.depends_on(b)
        d = ppg.FunctionInvariant(pathlib.Path("f"), lambda x: x + 1)
        c.depends_on(d)
        e = ppg.ParameterInvariant(pathlib.Path("c"), "hello")
        c.depends_on(e)
        f = ppg.TempFileGeneratingJob(pathlib.Path("g"), lambda: write("g", "gg"))
        c.depends_on(f)

        def tmf():
            write("h", "hh")
            write("i", "ii")

        g = ppg.MultiTempFileGeneratingJob([pathlib.Path("h"), "i"], tmf)
        c.depends_on(g)

        def tpf():
            write("j", "jjjj")
            write("k", "kkkk")

        h = ppg.TempFilePlusGeneratingJob(pathlib.Path("j"), pathlib.Path("k"), tpf)
        c.depends_on(h)

        i = ppg.CachedDataLoadingJob(
            pathlib.Path("l"), lambda: write("l", "llll"), lambda res: res
        )
        c.depends_on(i)

        m = ppg.CachedAttributeLoadingJob(pathlib.Path("m"), dd, "attr", lambda: "55")
        c.depends_on(m)
        ppg.run_pipegraph()
        assert read("aaa") == "hello"
        assert read("b") == "bbhellohellohello"
        assert read("c") == "ccgg"
        assert read("d") == "ddhh55"
        assert read("e") == "eeiijjjj"
        assert not (os.path.exists("g"))
        assert not (os.path.exists("h"))
        assert not (os.path.exists("i"))
        assert not (os.path.exists("j"))
        assert read("k") == "kkkk"
Beispiel #5
0
    def prepare_input(self):
        # input_strategy returns a list of
        # paired fastq files
        # ie. [('A_R1_.fastq1', 'A_R2.fastq', ...), ...]

        input_pairs = self.input_strategy()
        any_r2 = any([len(x) > 1 for x in input_pairs])
        # Single end - works from flat list
        if self.pairing == "single":
            if any_r2:
                raise PairingError(
                    f"{self.name}: paired end lane defined as single end - you need to change the pairing parameter"
                )
            input_filenames = [str(f[0]) for f in input_pairs]
        elif self.pairing == "paired_as_single":
            input_filenames = [str(f) for fl in input_pairs for f in fl]
        elif self.pairing == "only_first":
            input_filenames = [str(f[0]) for f in input_pairs]
        elif self.pairing == "only_second":
            input_filenames = [str(f[1]) for f in input_pairs]
        elif self.pairing == "paired":
            if not any_r2:
                raise PairingError(
                    f"Paired end lane, but no R2 reads found. Found files: {input_pairs}"
                )
            input_filenames = [(str(f[0]), str(f[1])) for f in input_pairs
                               ]  # throwing away all later...
        else:
            raise PairingError("unknown pairing")  # pragma: no cover
        if self.pairing == "paired":
            flat_input_filenames = [f for fl in input_pairs for f in fl]
        else:
            flat_input_filenames = input_filenames

        if hasattr(self.input_strategy, "dependencies"):
            deps = self.input_strategy.dependencies
        else:
            deps = [ppg.FileChecksumInvariant(f) for f in flat_input_filenames]
        output_filenames = self.get_aligner_input_filenames()

        if self.pairing == "paired":
            if hasattr(self.fastq_processor, "generate_aligner_input_paired"):

                def prep_aligner_input():
                    import shutil

                    self.fastq_processor.generate_aligner_input_paired(
                        str(output_filenames[0]) + ".temp",
                        str(output_filenames[1]) + ".temp",
                        input_filenames,
                        self.reverse_reads,
                    )
                    shutil.move(
                        str(output_filenames[0]) + ".temp",
                        output_filenames[0])
                    shutil.move(
                        str(output_filenames[1]) + ".temp",
                        output_filenames[1])

                job = ppg.MultiTempFileGeneratingJob(output_filenames,
                                                     prep_aligner_input)
                job.depends_on(
                    self.fastq_processor.get_dependencies(
                        [str(x) for x in output_filenames]))
            else:

                def prep_aligner_input_r1():
                    import shutil

                    self.fastq_processor.generate_aligner_input(
                        str(output_filenames[0]) + ".temp",
                        [x[0] for x in input_filenames],
                        self.reverse_reads,
                    )
                    shutil.move(
                        str(output_filenames[0]) + ".temp",
                        output_filenames[0])

                def prep_aligner_input_r2():
                    import shutil

                    self.fastq_processor.generate_aligner_input(
                        str(output_filenames[1]) + ".temp",
                        [x[1] for x in input_filenames],
                        self.reverse_reads,
                    )
                    shutil.move(
                        str(output_filenames[1]) + ".temp",
                        output_filenames[1])

                jobR1 = ppg.TempFileGeneratingJob(output_filenames[0],
                                                  prep_aligner_input_r1)
                jobR2 = ppg.TempFileGeneratingJob(output_filenames[1],
                                                  prep_aligner_input_r2)

                jobR1.depends_on(
                    self.fastq_processor.get_dependencies(
                        str(output_filenames[0])))
                jobR2.depends_on(
                    self.fastq_processor.get_dependencies(
                        str(output_filenames[1])))
                job = ppg.JobList([jobR1, jobR2])
                # needed by downstream code.
                job.filenames = [output_filenames[0], output_filenames[1]]
        else:

            def prep_aligner_input(output_filename):
                import shutil

                self.fastq_processor.generate_aligner_input(
                    str(output_filename) + ".temp", input_filenames,
                    self.reverse_reads)
                shutil.move(str(output_filename) + ".temp", output_filename)

            job = ppg.TempFileGeneratingJob(output_filenames[0],
                                            prep_aligner_input)
            job.depends_on(
                self.fastq_processor.get_dependencies(str(
                    output_filenames[0])))

        job.depends_on(
            deps,
            ppg.ParameterInvariant(
                self.name + "input_files",
                tuple(sorted(input_filenames)) +
                (self.reverse_reads, self.fastq_processor.__class__.__name__),
            ),
        )
        return job