Python FileGeneratingJob Examples, pypipegraph.FileGeneratingJob Python Examples

Example #1

0

Show file

 def test_basic_prune(self):
     ppg.FileGeneratingJob("A", lambda: write("A", "A"))
     b = ppg.FileGeneratingJob("B", lambda: write("B", "B"))
     b.prune()
     ppg.run_pipegraph()
     assert Path("A").read_text() == "A"
     assert not Path("B").exists()

Example #2

0

Show file

    def test_invalidation_redoes_output(self, new_pipegraph):
        o = Dummy()

        def calc():
            return ", ".join(str(x) for x in range(0, 100))

        job = ppg.CachedAttributeLoadingJob("out/mycalc", o, "a", calc)
        of = "out/A"

        def do_write():
            write(of, o.a)

        ppg.FileGeneratingJob(of, do_write).depends_on(job)
        ppg.run_pipegraph()
        assert read(of) == ", ".join(str(x) for x in range(0, 100))

        new_pipegraph.new_pipegraph()

        def calc2():
            return ", ".join(str(x) for x in range(0, 200))

        job = ppg.CachedAttributeLoadingJob(
            "out/mycalc", o, "a", calc2)  # now, jobB should be deleted...
        ppg.FileGeneratingJob(of, do_write).depends_on(job)
        ppg.run_pipegraph()
        assert read(of) == ", ".join(str(x) for x in range(0, 200))

Example #3

0

Show file

File: test_cache_jobs.py Project: bopopescu/pypipegraph-1

    def test_invalidation_ignored_does_not_redo_output(self, new_pipegraph):
        o = Dummy()

        def calc():
            return ", ".join(str(x) for x in range(0, 100))

        job = ppg.CachedAttributeLoadingJob("out/mycalc", o, "a", calc)
        of = "out/A"

        def do_write():
            write(of, o.a)

        ppg.FileGeneratingJob(of, do_write).depends_on(job)
        ppg.run_pipegraph()
        assert read(of) == ", ".join(str(x) for x in range(0, 100))

        new_pipegraph.new_pipegraph()

        def calc2():
            return ", ".join(str(x) for x in range(0, 200))

        job = ppg.CachedAttributeLoadingJob("out/mycalc", o, "a", calc2)
        job.ignore_code_changes()
        ppg.FileGeneratingJob(of, do_write).depends_on(job)
        ppg.run_pipegraph()
        assert read(of) == ", ".join(str(x) for x in range(0, 100))

        new_pipegraph.new_pipegraph()
        job = ppg.CachedAttributeLoadingJob("out/mycalc", o, "a", calc2)
        ppg.FileGeneratingJob(of, do_write).depends_on(job)
        ppg.run_pipegraph()
        assert read(of) == ", ".join(
            str(x) for x in range(0, 200)
        )  # The new stuff - you either have an explicit ignore_code_changes in our codebase, or we enforce consistency between code and result

Example #4

0

Show file

File: salmon.py Project: MarcoMernberger/mbf_externals

    def run_alevin_on_sample(self, lane, genome, method):
        output = Path("results/alevin/") / lane.name

        def run_alevin():
            output.mkdir(exist_ok=True, parents=True)
            self.run_alevin(
                output, [lane.get_aligner_input_filenames()], genome, method
            )
            (output / "sentinel.txt").write_text("done")

        job = ppg.FileGeneratingJob(output / "sentinel.txt", run_alevin).depends_on(
            genome.build_index(self), lane.prepare_input()
        )

        def run_qc():
            (output / "QC").mkdir(exist_ok=True)
            import rpy2.robjects as ro

            ro.r("library('alevinQC')")
            ro.r("alevinQCReport")(
                baseDir=str(output.absolute()),
                sampleId=lane.name,
                outputFile="alevinReport.html",
                outputFormat="html_document",
                outputDir=str((output / "QC").absolute()),
                forceOverwrite=True,
            )

        qc_job = ppg.FileGeneratingJob(
            output / "QC" / "alevinReport.html", run_qc
        ).depends_on(job)
        return job, qc_job

Example #5

0

Show file

File: test_other.py Project: bopopescu/pypipegraph

    def test_unpickle_bug_prevents_single_job_from_unpickling(self):
        def do_a():
            write("out/A", "A")
            append("out/As", "A")

        ppg.FileGeneratingJob("out/A", do_a)

        def do_b():
            write("out/B", "A")
            append("out/Bs", "A")

        job_B = ppg.FileGeneratingJob("out/B", do_b)
        cd = CantDepickle()
        job_parameter_unpickle_problem = ppg.ParameterInvariant("C", (cd, ))
        job_B.depends_on(job_parameter_unpickle_problem)
        ppg.run_pipegraph()
        assert read("out/A") == "A"
        assert read("out/As") == "A"
        assert read("out/B") == "A"
        assert read("out/Bs") == "A"
        print("second run")
        ppg.new_pipegraph(dump_graph=False)

        ppg.FileGeneratingJob("out/A", do_a)
        job_B = ppg.FileGeneratingJob("out/B", do_b)
        job_parameter_unpickle_problem = ppg.ParameterInvariant("C", (cd, ))
        job_B.depends_on(job_parameter_unpickle_problem)
        with pytest.raises(ppg.RuntimeError):
            ppg.run_pipegraph()
        assert read("out/A") == "A"
        assert read("out/As") == "A"
        assert read("out/B") == "A"
        assert (
            read("out/Bs") == "AA"
        )  # this one got rerun because we could not load the invariant...

Example #6

0

Show file

File: test_other.py Project: bopopescu/pypipegraph

 def test_jobs_concurrent_jobs_run_concurrently(self):
     # we'll determine this by the start respective end times..
     ppg.new_pipegraph(
         ppg.resource_coordinators.LocalSystem(max_cores_to_use=2),
         quiet=True,
         dump_graph=False,
     )
     jobA = ppg.FileGeneratingJob("out/A", lambda: write("out/A", "A"))
     jobB = ppg.FileGeneratingJob("out/B", lambda: write("out/B", "B"))
     jobA.cores_needed = 1
     jobB.cores_needed = 1
     ppg.run_pipegraph()
     assert read("out/A") == "A"
     assert read("out/B") == "B"
     if jobA.start_time < jobB.start_time:
         first_job = jobA
         second_job = jobB
     else:
         first_job = jobB
         second_job = jobA
     print(
         "times",
         first_job.start_time,
         first_job.stop_time,
         second_job.start_time,
         second_job.stop_time,
     )
     if jobA.start_time is None:
         raise ValueError("JobA did not run")
     assert first_job.stop_time > second_job.start_time

Example #7

0

Show file

    def test_raises_on_non_dependend_job_injection2(self):
        o = Dummy()
        of = "out/A"

        def do_write():
            write(of, o.A + o.B)

        job = ppg.FileGeneratingJob(of, do_write)
        ppg.FileGeneratingJob("out/D", lambda: write("out/D", "D"))

        def generate_deps():
            def load_a():
                return "A"

            def load_b():
                return "B"

            dlA = ppg.AttributeLoadingJob("dlA", o, "A", load_a)
            ppg.AttributeLoadingJob("dlB", o, "B", load_b)
            job.depends_on(dlA)
            # let's not do anything with dlA

        gen_job = ppg.DependencyInjectionJob("C", generate_deps)
        job.depends_on(gen_job)
        with pytest.raises(ppg.RuntimeError):
            ppg.run_pipegraph()

        assert not (os.path.exists(of))  # since the gen job crashed
        assert os.path.exists(
            "out/D")  # since it has no relation to the gen job actually...
        assert isinstance(gen_job.exception, ppg.JobContractError)
        assert "case 1" in str(gen_job.exception)

Example #8

0

Show file

    def test_raises_on_non_dependend_job_injection2_can_be_ignored(self):
        o = Dummy()
        of = "out/A"

        def do_write():
            write(of, o.A)  # + o.B - but B is not in the dependency chain!

        job = ppg.FileGeneratingJob(of, do_write)
        ppg.FileGeneratingJob("out/D", lambda: write("out/D", "D"))

        def generate_deps():
            def load_a():
                return "A"

            def load_b():
                return "B"

            dlA = ppg.AttributeLoadingJob("dlA", o, "A", load_a)
            ppg.AttributeLoadingJob("dlB", o, "B", load_b)
            job.depends_on(dlA)
            # let's not do anything with dlA

        gen_job = ppg.DependencyInjectionJob(
            "C", generate_deps, check_for_dependency_injections=False)
        job.depends_on(gen_job)
        ppg.run_pipegraph()

        assert os.path.exists(of)  # since the gen job crashed

Example #9

0

Show file

File: test_qc.py Project: MarcoMernberger/mbf_qualitycontrol

    def test_registration_and_pruning(self, new_pipegraph):
        with pytest.raises(TypeError):
            register_qc("shu")
        jobA = ppg.FileGeneratingJob("a",
                                     lambda: Path("a").write_text("hello"))
        register_qc(jobA)
        print(list(get_qc_jobs()))
        assert jobA in list(get_qc_jobs())
        assert not jobA._pruned
        jobc = register_qc(
            ppg.FileGeneratingJob("c", lambda: Path("b").write_text("hello")))

        def check_prune(job):
            return job.job_id.lower()[-1] == "c"

        prune_qc(check_prune)
        assert jobc in list(get_qc_jobs())
        assert not jobc._pruned
        jobB = register_qc(
            ppg.FileGeneratingJob("b", lambda: Path("b").write_text("hello")))
        assert jobB in list(get_qc_jobs())
        assert jobB._pruned
        jobC = register_qc(
            ppg.FileGeneratingJob("C", lambda: Path("b").write_text("hello")))
        assert not jobC._pruned
        assert len(list(get_qc_jobs())) == 4
        prune_qc()
        assert jobA._pruned
        assert jobB._pruned
        assert jobc._pruned
        assert jobC._pruned
        for j in get_qc_jobs():
            assert j._pruned

Example #10

0

Show file

    def test_filegen_invalidated_jobgen_created_filegen_later_also_invalidated(
            self, new_pipegraph):
        a = ppg.FileGeneratingJob("out/A",
                                  lambda: writeappend("out/A", "out/Ac", "A"))
        p = ppg.ParameterInvariant("p", "p")
        a.depends_on(p)

        def gen():
            c = ppg.FileGeneratingJob(
                "out/C", lambda: writeappend("out/C", "out/Cx", "C"))
            c.depends_on(a)

        ppg.JobGeneratingJob("b", gen)
        ppg.run_pipegraph()
        assert read("out/A") == "A"
        assert read("out/Ac") == "A"
        assert read("out/C") == "C"
        assert read("out/Cx") == "C"
        new_pipegraph.new_pipegraph()

        a = ppg.FileGeneratingJob("out/A",
                                  lambda: writeappend("out/A", "out/Ac", "A"))
        p = ppg.ParameterInvariant("p", "p2")
        a.depends_on(p)
        ppg.JobGeneratingJob("b", gen)
        ppg.run_pipegraph()
        assert read("out/Ac") == "AA"
        assert read("out/Cx") == "CC"

Example #11

0

Show file

File: test_prune.py Project: bopopescu/pypipegraph-1

 def test_basic_prune2(self):
     a = ppg.FileGeneratingJob('A', lambda: write('A', 'A'))
     b = ppg.FileGeneratingJob('B', lambda: write('B', 'B'))
     b.depends_on(a)
     b.prune()
     ppg.run_pipegraph()
     assert Path('A').read_text() == 'A'
     assert not Path('B').exists()

Example #12

0

Show file

File: test_other.py Project: bopopescu/pypipegraph-1

 def test_older_jobs_added_back_to_new_pipegraph(self, new_pipegraph):
     a = ppg.FileGeneratingJob("out/A", lambda of: write(of, "a"))
     ppg.util.global_pipegraph.run()
     new_pipegraph.new_pipegraph()
     b = ppg.FileGeneratingJob("out/B", lambda of: write(of, "b"))
     with pytest.raises(ppg.PyPipeGraphError):
         a.depends_on(b)
     with pytest.raises(ppg.PyPipeGraphError):
         b.depends_on(a)

Example #13

0

Show file

 def test_pruning_final_jobs_directly(self):
     ppg.FileGeneratingJob("A", lambda: write("A", "A"))
     ppg.FileGeneratingJob("B", lambda: write("B", "B"))
     c = ppg.FinalJob("shu", lambda: write("C", "C"))
     c.prune()
     ppg.run_pipegraph()
     assert Path("A").read_text() == "A"
     assert Path("B").read_text() == "B"
     assert not Path("C").exists()

Example #14

0

Show file

File: test_prune.py Project: bopopescu/pypipegraph-1

 def test_pruning_final_jobs_directly(self):
     a = ppg.FileGeneratingJob('A', lambda: write('A', 'A'))
     b = ppg.FileGeneratingJob('B', lambda: write('B', 'B'))
     c = ppg.FinalJob('shu', lambda: write('C', 'C'))
     c.prune()
     ppg.run_pipegraph()
     assert Path('A').read_text() == 'A'
     assert Path('B').read_text() == 'B'
     assert not Path('C').exists()

Example #15

0

Show file

File: test_job_gen_jobs.py Project: IMTMarburg/pypipegraph

    def test_ignored_if_generating_within_filegenerating(self):
        write_job = ppg.FileGeneratingJob("out/A", lambda: write("out/A", "aa"))

        def load():
            ppg.FileGeneratingJob("out/B", lambda: write("out/B", "aa"))
            write("out/C", "c")

        dl = ppg.FileGeneratingJob("out/C", load)
        write_job.depends_on(dl)
        ppg.run_pipegraph()
        assert read("out/C") == "c"

Example #16

0

Show file

 def test_tempfile_still_run_if_needed_for_other(self):
     a = ppg.TempFileGeneratingJob("A", lambda: write("A", "A"))
     b = ppg.FileGeneratingJob("B", lambda: write("B", "B" + read("A")))
     c = ppg.FileGeneratingJob("C", lambda: write("C", "C" + read("A")))
     b.depends_on(a)
     c.depends_on(a)
     b.prune()
     ppg.run_pipegraph()
     assert not Path("B").exists()
     assert Path("C").exists()
     assert Path("C").read_text() == "CA"
     assert not Path("A").exists()

Example #17

0

Show file

        def test_invalidation(self, new_pipegraph):
            import numpy

            o = {}

            def calc():
                return numpy.array(range(0, 10), dtype=numpy.uint32)

            def store(value):
                o[0] = value

            def cleanup():
                del o[0]

            dl = ppg.MemMappedDataLoadingJob("out/A", calc, store,
                                             numpy.uint32)
            dl.cleanup = cleanup
            of = "out/B"

            def do_write():
                assert isinstance(o[0], numpy.core.memmap)
                write(of, ",".join(str(x) for x in o[0]))
                append("out/C", "a")

            ppg.FileGeneratingJob(of, do_write).depends_on(dl)
            ppg.run_pipegraph()
            assert read("out/B") == "0,1,2,3,4,5,6,7,8,9"
            assert read("out/C") == "a"
            # now, no run...
            new_pipegraph.new_pipegraph()
            dl = ppg.MemMappedDataLoadingJob("out/A", calc, store,
                                             numpy.uint32)
            dl.cleanup = cleanup
            ppg.FileGeneratingJob(of, do_write).depends_on(dl)
            ppg.run_pipegraph()
            assert read("out/C") == "a"

            new_pipegraph.new_pipegraph()

            def calc2():
                append("out/D", "a")
                return numpy.array(range(0, 12), dtype=numpy.uint32)

            dl = ppg.MemMappedDataLoadingJob("out/A", calc2, store,
                                             numpy.uint32)
            ppg.FileGeneratingJob(of, do_write).depends_on(dl)
            dl.cleanup = cleanup

            ppg.run_pipegraph()
            assert read("out/D") == "a"
            assert read("out/B") == "0,1,2,3,4,5,6,7,8,9,10,11"
            assert read("out/C") == "aa"

Example #18

0

Show file

    def test_indirect_cicle(self):
        ppg.new_pipegraph(quiet=True, dump_graph=False)
        jobA = ppg.FileGeneratingJob("A", lambda: write("A", "A"))
        jobB = ppg.FileGeneratingJob("B", lambda: write("B", "A"))
        jobC = ppg.FileGeneratingJob("C", lambda: write("C", "A"))
        jobC.depends_on(jobB)
        jobB.depends_on(jobA)
        jobA.depends_on(jobC)

        def inner():
            ppg.run_pipegraph()

        assertRaises(ppg.CycleError, inner)

Example #19

0

Show file

File: test_prune.py Project: bopopescu/pypipegraph-1

 def test_basic_prune3(self):
     a = ppg.FileGeneratingJob('A', lambda: write('A', 'A'))
     b = ppg.FileGeneratingJob('B', lambda: write('B', 'B'))
     c = ppg.FileGeneratingJob('C', lambda: write('C', 'C'))
     d = ppg.FileGeneratingJob('D', lambda: write('D', 'D'))
     b.depends_on(a)
     b.prune()
     c.depends_on(b)  # that is ok, pruning happens after complet build.
     d.depends_on(a)
     ppg.run_pipegraph()
     assert Path('A').read_text() == 'A'
     assert Path('D').read_text() == 'D'
     assert not Path('B').exists()
     assert not Path('C').exists()

Example #20

0

Show file

File: test_job_gen_jobs.py Project: IMTMarburg/pypipegraph

        def b():
            jobB = ppg.FileGeneratingJob("out/B", lambda: write("out/B", "B"))
            jobD = ppg.FileGeneratingJob("out/D", lambda: write("out/D", "D"))

            def genA():
                jobC = ppg.FileGeneratingJob("out/C", lambda: write("out/C", "C"))
                jobC.depends_on(jobB)
                jobC.depends_on(jobD)

            jobA = ppg.JobGeneratingJob("A", genA)
            jobB.depends_on(jobA)
            ppg.run_pipegraph()
            assert read("out/B") == "B"
            assert read("out/C") == "C"

Example #21

0

Show file

 def test_basic_prune3(self):
     a = ppg.FileGeneratingJob("A", lambda: write("A", "A"))
     b = ppg.FileGeneratingJob("B", lambda: write("B", "B"))
     c = ppg.FileGeneratingJob("C", lambda: write("C", "C"))
     d = ppg.FileGeneratingJob("D", lambda: write("D", "D"))
     b.depends_on(a)
     b.prune()
     c.depends_on(b)  # that is ok, pruning happens after complet build.
     d.depends_on(a)
     ppg.run_pipegraph()
     assert Path("A").read_text() == "A"
     assert Path("D").read_text() == "D"
     assert not Path("B").exists()
     assert not Path("C").exists()
     assert c._pruned == b.job_id

Example #22

0

Show file

File: pre_process.py Project: MarcoMernberger/mvariants

    def prerequisite_jobs(self) -> List[Job]:
        """
        Returns a list of global prerequisite jobs.

        Returns a list of global prerequisite jobs necessary for all samples:
        copying and reindexing the reference fasta, creating a dictionary file.

        Returns
        -------
        List[ppg.Job]
            List of jobs that have to be done before the actual mutation calling.
        """
        def copy_genome(gatk_compliant_genome_file):
            # GATK needs a dictionary in the same directory as the genome so we copy
            shutil.copy(self.genome.find_file("genome.fasta"),
                        str(gatk_compliant_genome_file))

        def create_index(gatk_compliant_genome_file):
            # create the index file
            cmd = ["samtools", "faidx", self.gatk_compliant_genome_file]
            with Path(str(gatk_compliant_genome_file) +
                      ".fai.stderr").open("wb") as stderr:
                subprocess.check_call(cmd, stdout=stderr)

        def create_dict():
            # create the dictionary
            arguments = [
                "CreateSequenceDictionary",
                "-R",
                str(self.gatk_compliant_genome_file),
            ]
            cmd = self.build_cmd(self.gatk_compliant_genome_file.parent, 1,
                                 arguments)
            with Path(str(self.gatk_compliant_genome_file) +
                      ".stderr").open("wb") as stderr:
                subprocess.check_call(cmd, stderr=stderr)

        job1 = ppg.FileGeneratingJob(self.gatk_compliant_genome_file,
                                     copy_genome)
        job2 = ppg.FileGeneratingJob(
            str(self.gatk_compliant_genome_file) + ".fai",
            create_index).depends_on(job1)
        job3 = ppg.FileGeneratingJob(
            self.gatk_compliant_genome_file.parent /
            (self.gatk_compliant_genome_file.stem + ".dict"),
            create_dict,
        ).depends_on([job1, job2])
        return [job1, job2, job3]

Example #23

0

Show file

File: strategies.py Project: MarcoMernberger/mbf_align

    def fastq_dump(self):
        def dump():
            import subprocess
            import os

            cmd = [
                self.algo.path / "bin/"
                "fasterq-dump",
                "-O",
                str(self.target_dir),
                "-t",
                str(self.cache_dir),
                "-e",
                "4",
            ]
            if self.paired:
                cmd.append("-S")
            cmd.append(self.accession)
            p = subprocess.Popen(cmd,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
            stdout, stderr = p.communicate()
            (self.target_dir / "stdout").write_bytes(stdout)
            (self.target_dir / "stderr").write_bytes(stderr)
            if p.returncode != 0 or b'invalid accession' in stderr:
                raise ValueError("fasterq-dump", p.returncode)
            (self.target_dir / "sentinel").write_text("done")

        return ppg.FileGeneratingJob(self.target_dir / "sentinel", dump)

Example #24

0

Show file

File: filebased.py Project: MarcoMernberger/mbf_genomes

    def prep_fasta(self, input_filenames, output_filename):
        if isinstance(input_filenames, ppg.Job):
            filenames = input_filenames.filenames
            deps = input_filenames
        elif isinstance(input_filenames, (str, Path)):
            filenames = [str(input_filenames)]
            deps = PrebuildFileInvariantsExploding(
                str(output_filename) + "_prep_fasta", filenames)
        else:
            filenames = [str(x) for x in input_filenames]
            deps = PrebuildFileInvariantsExploding(
                str(output_filename) + "_prep_fasta", filenames)

        def prep(output_filename):
            import pysam

            with open(output_filename, "wb") as op:
                for fn in filenames:
                    for key, seq in iter_fasta(
                            fn, lambda x: x[:x.find(b" ")]
                            if b" " in x else x):
                        op.write(b">%s\n%s\n" %
                                 (key, b"\n".join(wrappedIterator(80)(seq))))
            pysam.faidx(output_filename)

        Path(output_filename).parent.mkdir(exist_ok=True)
        job = ppg.FileGeneratingJob(output_filename, prep)
        job.depends_on(deps)
        self._download_jobs.append(job)
        return job

Example #25

0

Show file

    def plot(self):
        normed = self.normed_ddf(self.ddf)
        ordered = self.ordered_ddf(normed)
        names = self.handle_names()

        def plot():
            p = self.plot_strategy.plot(ordered.df, names, self.plot_options)
            self.plot_strategy.render(str(self.output_filename), p)

        if ppg.inside_ppg():
            ppg.util.global_pipegraph.quiet = False
            deps = [
                ordered.load(),
                ppg.FunctionInvariant(
                    "mbf_heatmap." + self.plot_strategy.name + "plot_func",
                    self.plot_strategy.__class__.plot,
                ),
                ppg.FunctionInvariant(
                    "mbf_heatmap" + self.plot_strategy.name + "render_func",
                    self.plot_strategy.__class__.render,
                ),
                ppg.ParameterInvariant(self.output_filename,
                                       freeze(
                                           (self.names, self.plot_options))),
            ]
            return ppg.FileGeneratingJob(self.output_filename,
                                         plot).depends_on(deps)
        else:
            plot()
            return self.output_filename

Example #26

0

Show file

File: delayeddataframe.py Project: IMTMarburg/mbf_genomics

 def generate_file(self, filename, write_callback, dependencies, empty_ok=False):
     return (
         ppg.FileGeneratingJob(filename, write_callback, empty_ok=empty_ok)
         .depends_on(dependencies)
         .depends_on(self.load()),
         Path(filename),
     )

Example #27

0

Show file

def write_gct(genes_or_dataframe: Union[Genes, DataFrame],
              output_directory: Path,
              phenotypes: Tuple[str, str],
              columns_a_b: Tuple[List[str], List[str]],
              dependencies: List[Job] = []) -> FileGeneratingJob:
    """
    Creates a Job that writes expression data for GSEA at a specified
    folder. A file named imput.gct is created.

    Parameters
    ----------
    output_directory : Path
        The output directory in which an input.gct file is created.
    phenotypes : Tuple[str, str]
        The phenotype/class names of the groups to be compared.
    columns_a_b : Tuple[List[str], List[str]]
        The DataFrame columns of the relevant expression values.
    dependencies : List[Job], optional
        List of prerequisite jobs on which the , by default []

    Returns
    -------
    FileGeneratingJob
        The job that creates the file.
    """
    output_directory.mkdir(parents=True, exist_ok=True)
    outfile = output_directory / "input.gct"
    if isinstance(genes_or_dataframe, Genes):
        dependencies.append(
            genes_or_dataframe.add_annotator(
                mbf_genomics.genes.annotators.Description()))

    def __dump():
        df = genes_or_dataframe
        if isinstance(genes_or_dataframe, Genes):
            df = genes_or_dataframe.df.copy()
        elif isinstance(genes_or_dataframe, DataFrame):
            df = df.copy()
        else:
            raise ValueError(
                f"Parameter genes_or_dataframe must be an instance of Genes or DataFrame, was {type(genes_or_dataframe)}."
            )
        with outfile.open("w") as handle:
            handle.write("#1.2\n")
            handle.write(
                f"{len(df)}\t{len(columns_a_b[0]) + len(columns_a_b[1])}\n")
            handle.write("ProbeName\tDescription\t")
            handle.write("\t".join(columns_a_b[0] + columns_a_b[1]))
            handle.write("\n")
            df = df.rename(columns={"gene_stable_id": "NAME"})
            description = [
                f"{x} {y}" for x, y in zip(df["name"], df["description"])
            ]
            df["Description"] = description
            df = df[["NAME", "Description"] + columns_a_b[0] + columns_a_b[1]]
            df = df.fillna(0)
            for _, row in df.iterrows():
                handle.write("\t".join([str(x) for x in row]) + "\n")

    return ppg.FileGeneratingJob(outfile, __dump).depends_on(dependencies)

Example #28

0

Show file

File: bil.py Project: MarcoMernberger/mbf_publish

    def register(self, prerequisites):
        """Call the webservice and register this project"""
        sentinel_file = Path("web/registration_sentinel")

        def do_register():
            project_name = os.environ["ANYSNAKE_PROJECT_PATH"]
            project_name = project_name[project_name.rfind("/") + 1:]
            print("registration for", project_name)
            import requests

            data = {"project_name": project_name}
            auth = requests.auth.HTTPBasicAuth("feed", "feed")
            print(
                requests.get(
                    "http://mbf.imt.uni-marburg.de/bil2/register?",
                    params=data,
                    auth=auth,
                ))
            print(
                requests.get("http://mbf.imt.uni-marburg.de/bil2/gbrowse_dump",
                             auth=auth))

            sentinel_file.write_text("Done")

        return ppg.FileGeneratingJob(sentinel_file,
                                     do_register).depends_on(prerequisites)

Example #29

0

Show file

    def register_qc_biotypes(self):
        output_filename = self.result_dir / f"{self.name}_reads_per_biotype.png"

        from mbf_genomics.genes import Genes
        from mbf_genomics.genes.anno_tag_counts import GeneUnstranded

        genes = Genes(self.genome)
        anno = GeneUnstranded(self)

        def plot(output_filename):
            print(genes.df.columns)
            return (dp(genes.df).groupby("biotype").summarize(
                (anno.columns[0],
                 lambda x: x.sum(), "read count")).mutate(sample=self.name).p9(
                 ).theme_bw().annotation_stripes().add_bar(
                     "biotype", "read count",
                     stat="identity").scale_y_continuous(
                         labels=lambda xs: ["%.2g" % x for x in xs])
                    # .turn_x_axis_labels()
                    .coord_flip().title(self.name).render(
                        output_filename,
                        width=6,
                        height=2 + len(genes.df.biotype.unique()) * 0.25,
                    ))

        return register_qc(
            ppg.FileGeneratingJob(output_filename,
                                  plot).depends_on(genes.add_annotator(anno)))

Example #30

0

Show file

File: scb.py Project: MarcoMernberger/mbf_publish

    def dump_rsync_list(self):
        output_filename = "web/scb/rsync_list.txt"

        def dump(output_filename):
            paths_to_copy = set()
            for group in self.meta_data:
                for entry in self.meta_data[group]:
                    for x in [
                            "table_path",
                            "path_bigbed",
                            "path_table",
                            "path_bam",
                            "path",
                    ]:
                        if x in entry:  # genes
                            paths_to_copy.add(Path(entry[x]).absolute())
                    if "path_bam" in entry:
                        paths_to_copy.add(
                            Path(entry["path_bam"] + ".bai").absolute())
            for fn in Path("web/scb").glob("*"):
                paths_to_copy.add(fn.absolute())
            output = ""
            # paths_to_copy.add("/project/web/scb/metadata.json")
            for x in sorted([str(x) for x in paths_to_copy]):
                if x.startswith("/project"):
                    output += x[len("/project/"):]
                    output += "\n"
            Path(output_filename).write_text(output)

        return (ppg.FileGeneratingJob(output_filename, dump).depends_on(
            self.deps).depends_on(self.dump_meta_data_json()))