Python Splitter Examples

Programming Language: Python

Namespace/Package Name: psyrun.processing

Class/Type: Splitter

Examples at hotexamples.com: 5

Python Splitter - 5 examples found. These are the top rated real world Python examples of psyrun.processing.Splitter extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

iter_in_out_files(2)

merge(2)

split(1)

Example #1

Show file

File: test_processing.py Project: pombredanne/psyrun

    def test_split_merge_roundtrip(
            self, tmpdir, pspace_size, max_splits, min_items, n_splits):
        splitter = Splitter(
            str(tmpdir), Param(x=range(pspace_size)), max_splits, min_items)
        splitter.split()

        for filename in os.listdir(splitter.indir):
            infile = os.path.join(splitter.indir, filename)
            outfile = os.path.join(splitter.outdir, filename)
            save_dict_h5(outfile, load_dict_h5(infile))

        result_file = os.path.join(str(tmpdir), 'result.h5')
        Splitter.merge(splitter.outdir, result_file)
        result = load_dict_h5(result_file)
        assert sorted(result['x']) == sorted(range(pspace_size))

Example #2

Show file

File: psydoit.py Project: pombredanne/psyrun

    def create_merge_job(self):
        code = '''
from psyrun.processing import Splitter
Splitter.merge({outdir!r}, {filename!r}, append=False)
        '''.format(outdir=self.splitter.outdir, filename=self.result_file)
        return Job(
            'merge', self._submit, code,
            [f for _, f in self.splitter.iter_in_out_files()],
            [self.result_file])

Example #3

Show file

File: psydoit.py Project: pombredanne/psyrun

 def __init__(self, task):
     self.splitter = Splitter(
         os.path.join(task.workdir, task.name), task.pspace,
         task.max_splits, task.min_items)
     self.task = task

Example #4

Show file

File: psydoit.py Project: pombredanne/psyrun

class DistributeSubtaskCreator(object):
    """Create subtasks for to distribute parameter evaluations.

    Parameters
    ----------
    task : :class:`.TaskDef`
        Task definition to create subtasks for.
    """

    def __init__(self, task):
        self.splitter = Splitter(
            os.path.join(task.workdir, task.name), task.pspace,
            task.max_splits, task.min_items)
        self.task = task

    @property
    def result_file(self):
        if self.task.result_file:
            return self.task.result_file
        else:
            return os.path.join(self.splitter.workdir, 'result.h5')

    def _submit(self, code, name, depends_on=None):
        """Submits some code to execute to the task scheduler.

        Parameters
        ----------
        code : str
            Code to execute in job.
        name : str
            Job name.
        depends_on : sequence
            Job IDs that have to finish before the submitted code can be
            executed.

        Returns
        -------
        dict
            Contains the id of the submitted job under the key ``'id'``.
        """
        if depends_on is not None:
            try:
                depends_on = list(depends_on.values())
            except AttributeError:
                depends_on = [depends_on]
        code = '''
try:
    import faulthandler
    faulthandler.enable()
except:
    pass

import os
os.chdir({taskdir!r})

from psyrun.psydoit import TaskDef
task = TaskDef({taskpath!r})
{code}
        '''.format(
            path=sys.path,
            taskdir=os.path.abspath(os.path.dirname(self.task.path)),
            taskpath=os.path.abspath(self.task.path), code=code)
        codefile = os.path.join(self.splitter.workdir, name + '.py')
        output_filename = os.path.join(self.splitter.workdir, name + '.log')
        with open(codefile, 'w') as f:
            f.write(code)

        for job in self.task.scheduler.get_jobs():
            status = self.task.scheduler.get_status(job)
            if status is not None and name == status.name:
                self.task.scheduler.kill(job)

        return {'id': self.task.scheduler.submit(
            [self.task.python, codefile], output_filename, name, depends_on,
            self.task.scheduler_args)}

    def create_subtasks(self):
        job = self.create_job()
        names = Fullname(job).names
        return ToDoitTask(names, Uptodate(
            job, names, self.task.scheduler).status).visit(job)

    def create_job(self):
        split = self.create_split_job()
        process = self.create_process_job()
        merge = self.create_merge_job()
        return JobChain(self.task.name, [split, process, merge])

    def create_split_job(self):
        code = '''
from psyrun.processing import Splitter
Splitter({workdir!r}, task.pspace, {max_splits!r}, {min_items!r}).split()
        '''.format(
            workdir=self.splitter.workdir, max_splits=self.task.max_splits,
            min_items=self.task.min_items)
        file_dep = [os.path.join(os.path.dirname(self.task.path), f)
                    for f in self.task.file_dep]
        return Job(
            'split', self._submit, code, [self.task.path] + file_dep,
            [f for f, _ in self.splitter.iter_in_out_files()])

    def create_process_job(self):
        jobs = []
        for i, (infile, outfile) in enumerate(
                self.splitter.iter_in_out_files()):
            code = '''
from psyrun.processing import Worker
Worker(task.mapper, **task.mapper_kwargs).start(
    task.execute, {infile!r}, {outfile!r})
            '''.format(infile=infile, outfile=outfile)
            jobs.append(Job(str(i), self._submit, code, [infile], [outfile]))

        group = JobGroup('process', jobs)
        return group

    def create_merge_job(self):
        code = '''
from psyrun.processing import Splitter
Splitter.merge({outdir!r}, {filename!r}, append=False)
        '''.format(outdir=self.splitter.outdir, filename=self.result_file)
        return Job(
            'merge', self._submit, code,
            [f for _, f in self.splitter.iter_in_out_files()],
            [self.result_file])

Example #5

Show file

File: test_processing.py Project: pombredanne/psyrun

 def test_n_splits(
         self, tmpdir, pspace_size, max_splits, min_items, n_splits):
     splitter = Splitter(
         str(tmpdir), Param(x=range(pspace_size)), max_splits, min_items)
     assert splitter.n_splits == n_splits
     assert len(list(splitter.iter_in_out_files())) == n_splits