Ejemplo n.º 1
0
    def test_split_merge_roundtrip(
            self, tmpdir, pspace_size, max_splits, min_items, n_splits):
        splitter = Splitter(
            str(tmpdir), Param(x=range(pspace_size)), max_splits, min_items)
        splitter.split()

        for filename in os.listdir(splitter.indir):
            infile = os.path.join(splitter.indir, filename)
            outfile = os.path.join(splitter.outdir, filename)
            save_dict_h5(outfile, load_dict_h5(infile))

        result_file = os.path.join(str(tmpdir), 'result.h5')
        Splitter.merge(splitter.outdir, result_file)
        result = load_dict_h5(result_file)
        assert sorted(result['x']) == sorted(range(pspace_size))
Ejemplo n.º 2
0
    def create_merge_job(self):
        code = '''
from psyrun.processing import Splitter
Splitter.merge({outdir!r}, {filename!r}, append=False)
        '''.format(outdir=self.splitter.outdir, filename=self.result_file)
        return Job(
            'merge', self._submit, code,
            [f for _, f in self.splitter.iter_in_out_files()],
            [self.result_file])
Ejemplo n.º 3
0
 def __init__(self, task):
     self.splitter = Splitter(
         os.path.join(task.workdir, task.name), task.pspace,
         task.max_splits, task.min_items)
     self.task = task
Ejemplo n.º 4
0
class DistributeSubtaskCreator(object):
    """Create subtasks for to distribute parameter evaluations.

    Parameters
    ----------
    task : :class:`.TaskDef`
        Task definition to create subtasks for.
    """

    def __init__(self, task):
        self.splitter = Splitter(
            os.path.join(task.workdir, task.name), task.pspace,
            task.max_splits, task.min_items)
        self.task = task

    @property
    def result_file(self):
        if self.task.result_file:
            return self.task.result_file
        else:
            return os.path.join(self.splitter.workdir, 'result.h5')

    def _submit(self, code, name, depends_on=None):
        """Submits some code to execute to the task scheduler.

        Parameters
        ----------
        code : str
            Code to execute in job.
        name : str
            Job name.
        depends_on : sequence
            Job IDs that have to finish before the submitted code can be
            executed.

        Returns
        -------
        dict
            Contains the id of the submitted job under the key ``'id'``.
        """
        if depends_on is not None:
            try:
                depends_on = list(depends_on.values())
            except AttributeError:
                depends_on = [depends_on]
        code = '''
try:
    import faulthandler
    faulthandler.enable()
except:
    pass

import os
os.chdir({taskdir!r})

from psyrun.psydoit import TaskDef
task = TaskDef({taskpath!r})
{code}
        '''.format(
            path=sys.path,
            taskdir=os.path.abspath(os.path.dirname(self.task.path)),
            taskpath=os.path.abspath(self.task.path), code=code)
        codefile = os.path.join(self.splitter.workdir, name + '.py')
        output_filename = os.path.join(self.splitter.workdir, name + '.log')
        with open(codefile, 'w') as f:
            f.write(code)

        for job in self.task.scheduler.get_jobs():
            status = self.task.scheduler.get_status(job)
            if status is not None and name == status.name:
                self.task.scheduler.kill(job)

        return {'id': self.task.scheduler.submit(
            [self.task.python, codefile], output_filename, name, depends_on,
            self.task.scheduler_args)}

    def create_subtasks(self):
        job = self.create_job()
        names = Fullname(job).names
        return ToDoitTask(names, Uptodate(
            job, names, self.task.scheduler).status).visit(job)

    def create_job(self):
        split = self.create_split_job()
        process = self.create_process_job()
        merge = self.create_merge_job()
        return JobChain(self.task.name, [split, process, merge])

    def create_split_job(self):
        code = '''
from psyrun.processing import Splitter
Splitter({workdir!r}, task.pspace, {max_splits!r}, {min_items!r}).split()
        '''.format(
            workdir=self.splitter.workdir, max_splits=self.task.max_splits,
            min_items=self.task.min_items)
        file_dep = [os.path.join(os.path.dirname(self.task.path), f)
                    for f in self.task.file_dep]
        return Job(
            'split', self._submit, code, [self.task.path] + file_dep,
            [f for f, _ in self.splitter.iter_in_out_files()])

    def create_process_job(self):
        jobs = []
        for i, (infile, outfile) in enumerate(
                self.splitter.iter_in_out_files()):
            code = '''
from psyrun.processing import Worker
Worker(task.mapper, **task.mapper_kwargs).start(
    task.execute, {infile!r}, {outfile!r})
            '''.format(infile=infile, outfile=outfile)
            jobs.append(Job(str(i), self._submit, code, [infile], [outfile]))

        group = JobGroup('process', jobs)
        return group

    def create_merge_job(self):
        code = '''
from psyrun.processing import Splitter
Splitter.merge({outdir!r}, {filename!r}, append=False)
        '''.format(outdir=self.splitter.outdir, filename=self.result_file)
        return Job(
            'merge', self._submit, code,
            [f for _, f in self.splitter.iter_in_out_files()],
            [self.result_file])
Ejemplo n.º 5
0
 def test_n_splits(
         self, tmpdir, pspace_size, max_splits, min_items, n_splits):
     splitter = Splitter(
         str(tmpdir), Param(x=range(pspace_size)), max_splits, min_items)
     assert splitter.n_splits == n_splits
     assert len(list(splitter.iter_in_out_files())) == n_splits