def test_split_merge_roundtrip( self, tmpdir, pspace_size, max_splits, min_items, n_splits): splitter = Splitter( str(tmpdir), Param(x=range(pspace_size)), max_splits, min_items) splitter.split() for filename in os.listdir(splitter.indir): infile = os.path.join(splitter.indir, filename) outfile = os.path.join(splitter.outdir, filename) save_dict_h5(outfile, load_dict_h5(infile)) result_file = os.path.join(str(tmpdir), 'result.h5') Splitter.merge(splitter.outdir, result_file) result = load_dict_h5(result_file) assert sorted(result['x']) == sorted(range(pspace_size))
def create_merge_job(self): code = ''' from psyrun.processing import Splitter Splitter.merge({outdir!r}, {filename!r}, append=False) '''.format(outdir=self.splitter.outdir, filename=self.result_file) return Job( 'merge', self._submit, code, [f for _, f in self.splitter.iter_in_out_files()], [self.result_file])
def __init__(self, task): self.splitter = Splitter( os.path.join(task.workdir, task.name), task.pspace, task.max_splits, task.min_items) self.task = task
class DistributeSubtaskCreator(object): """Create subtasks for to distribute parameter evaluations. Parameters ---------- task : :class:`.TaskDef` Task definition to create subtasks for. """ def __init__(self, task): self.splitter = Splitter( os.path.join(task.workdir, task.name), task.pspace, task.max_splits, task.min_items) self.task = task @property def result_file(self): if self.task.result_file: return self.task.result_file else: return os.path.join(self.splitter.workdir, 'result.h5') def _submit(self, code, name, depends_on=None): """Submits some code to execute to the task scheduler. Parameters ---------- code : str Code to execute in job. name : str Job name. depends_on : sequence Job IDs that have to finish before the submitted code can be executed. Returns ------- dict Contains the id of the submitted job under the key ``'id'``. """ if depends_on is not None: try: depends_on = list(depends_on.values()) except AttributeError: depends_on = [depends_on] code = ''' try: import faulthandler faulthandler.enable() except: pass import os os.chdir({taskdir!r}) from psyrun.psydoit import TaskDef task = TaskDef({taskpath!r}) {code} '''.format( path=sys.path, taskdir=os.path.abspath(os.path.dirname(self.task.path)), taskpath=os.path.abspath(self.task.path), code=code) codefile = os.path.join(self.splitter.workdir, name + '.py') output_filename = os.path.join(self.splitter.workdir, name + '.log') with open(codefile, 'w') as f: f.write(code) for job in self.task.scheduler.get_jobs(): status = self.task.scheduler.get_status(job) if status is not None and name == status.name: self.task.scheduler.kill(job) return {'id': self.task.scheduler.submit( [self.task.python, codefile], output_filename, name, depends_on, self.task.scheduler_args)} def create_subtasks(self): job = self.create_job() names = Fullname(job).names return ToDoitTask(names, Uptodate( job, names, self.task.scheduler).status).visit(job) def create_job(self): split = self.create_split_job() process = self.create_process_job() merge = self.create_merge_job() return JobChain(self.task.name, [split, process, merge]) def create_split_job(self): code = ''' from psyrun.processing import Splitter Splitter({workdir!r}, task.pspace, {max_splits!r}, {min_items!r}).split() '''.format( workdir=self.splitter.workdir, max_splits=self.task.max_splits, min_items=self.task.min_items) file_dep = [os.path.join(os.path.dirname(self.task.path), f) for f in self.task.file_dep] return Job( 'split', self._submit, code, [self.task.path] + file_dep, [f for f, _ in self.splitter.iter_in_out_files()]) def create_process_job(self): jobs = [] for i, (infile, outfile) in enumerate( self.splitter.iter_in_out_files()): code = ''' from psyrun.processing import Worker Worker(task.mapper, **task.mapper_kwargs).start( task.execute, {infile!r}, {outfile!r}) '''.format(infile=infile, outfile=outfile) jobs.append(Job(str(i), self._submit, code, [infile], [outfile])) group = JobGroup('process', jobs) return group def create_merge_job(self): code = ''' from psyrun.processing import Splitter Splitter.merge({outdir!r}, {filename!r}, append=False) '''.format(outdir=self.splitter.outdir, filename=self.result_file) return Job( 'merge', self._submit, code, [f for _, f in self.splitter.iter_in_out_files()], [self.result_file])
def test_n_splits( self, tmpdir, pspace_size, max_splits, min_items, n_splits): splitter = Splitter( str(tmpdir), Param(x=range(pspace_size)), max_splits, min_items) assert splitter.n_splits == n_splits assert len(list(splitter.iter_in_out_files())) == n_splits