Python wait_for_jobsの例、production.util.wait_for_jobs Pythonの例

コード例 #1

0

ファイルを表示

ファイル: map_edge_ids.py プロジェクト: kreshuklab/hiiragi_segmentation_cluster_tools

    def run(self):
        from production import util

        # copy the script to the temp folder and replace the shebang
        file_dir = os.path.dirname(os.path.abspath(__file__))
        script_path = os.path.join(self.tmp_folder, 'map_edge_ids.py')
        util.copy_and_replace(os.path.join(file_dir, 'map_edge_ids.py'),
                              script_path)

        with open(self.config_path) as f:
            config = json.load(f)
            init_block_shape = config['block_shape']
            n_threads = config['n_threads']
            roi = config.get('roi', None)

        # make config for the job
        config = {
            'block_shape': init_block_shape,
            'n_threads': n_threads,
            'roi': roi
        }
        for scale in range(self.max_scale + 1):
            self._prepare_job(scale, config)

        # submit the jobs
        if self.run_local:
            # this only works in python 3 ?!
            with futures.ProcessPoolExecutor(self.max_scale + 1) as tp:
                tasks = [
                    tp.submit(self._submit_job, scale, n_threads)
                    for scale in range(self.max_scale + 1)
                ]
                [t.result() for t in tasks]
        else:
            for scale in range(self.max_scale + 1):
                self._submit_job(scale, n_threads)

        if not self.run_local:
            util.wait_for_jobs('papec')

        # check for results
        processed_scales, times = self._collect_outputs(self.max_scale + 1)
        success = len(processed_scales) == self.max_scale + 1
        if success:
            with open(self.output().path, 'w') as f:
                json.dump({'times': times}, f)
        else:
            log_path = os.path.join(self.tmp_folder,
                                    'map_edge_ids_partial.log')
            with open(log_path, 'w') as f:
                json.dump(
                    {
                        'processed_scales': processed_scales,
                        'times': times
                    }, f)
            raise RuntimeError("MapEdgesTask failed for %i / %i scales," %
                               (len(times), self.max_scale + 1) +
                               "partial results serialized to %s" % log_path)

コード例 #2

0

ファイルを表示

ファイル: global_problem.py プロジェクト: kreshuklab/hiiragi_segmentation_cluster_tools

    def run(self):
        from production import util

        # copy the script to the temp folder and replace the shebang
        script_path = os.path.join(self.tmp_folder, 'global_problem.py')
        file_dir = os.path.dirname(os.path.abspath(__file__))
        util.copy_and_replace(os.path.join(file_dir, 'global_problem.py'),
                              script_path)

        with open(self.config_path) as f:
            config = json.load(f)
            n_threads = config['n_threads']
            # TODO support computation with roi
            if 'roi' in config:
                roi = config['roi']
            else:
                roi = None

        # prepare the job config
        job_config = {'n_threads': n_threads}
        config_path = os.path.join(self.tmp_folder,
                                   'global_problem_config.json')
        with open(config_path, 'w') as f:
            json.dump(job_config, f)

        command = '%s %s %s %i %s %s' % (script_path, self.path, self.out_key,
                                         self.max_scale, config_path,
                                         self.tmp_folder)
        log_file = os.path.join(self.tmp_folder, 'logs', 'log_global_problem')
        err_file = os.path.join(self.tmp_folder, 'error_logs',
                                'err_global_problem')
        bsub_command = ('bsub -n %i -J global_problem ' % n_threads +
                        '-We %i -o %s -e %s \'%s\'' %
                        (self.time_estimate, log_file, err_file, command))
        if self.run_local:
            subprocess.call([command], shell=True)
        else:
            subprocess.call([bsub_command], shell=True)
            util.wait_for_jobs('papec')

        # check the output
        try:
            res_path = os.path.join(self.tmp_folder, 'global_problem.log')
            with open(res_path) as f:
                res = json.load(f)
                t = res['t']
            print("Global problem finished in", t, "s")
            success = True
        except Exception:
            success = False
            # clean up the output
            rmtree(os.path.join(self.path, self.out_key))

        # write output file if we succeed, otherwise write partial
        # success to different file and raise exception
        if not success:
            raise RuntimeError("GlobalProblemTask failed")

コード例 #3

0

ファイルを表示

    def run(self):
        from production import util

        # copy the script to the temp folder and replace the shebang
        script_path = os.path.join(self.tmp_folder, 'merge_features.py')
        file_dir = os.path.dirname(os.path.abspath(__file__))
        util.copy_and_replace(os.path.join(file_dir, 'merge_features.py'),
                              script_path)

        with open(self.config_path) as f:
            config = json.load(f)
            block_shape = config['block_shape']
            n_threads = config['n_threads']
            roi = config.get('roi', None)

        # write job config
        job_config = {
            'block_shape': block_shape,
            'n_threads': n_threads,
            'roi': roi
        }
        config_path = os.path.join(self.tmp_folder,
                                   'merge_features_config.json')
        with open(config_path, 'w') as f:
            json.dump(job_config, f)

        # submit job
        command = '%s %s %s %s %s' % (script_path, self.graph_path,
                                      self.out_path, config_path,
                                      self.tmp_folder)
        log_file = os.path.join(self.tmp_folder, 'logs', 'log_merge_features')
        err_file = os.path.join(self.tmp_folder, 'error_logs',
                                'err_merge_features')
        bsub_command = 'bsub -n %i -J merge_features -We %i -o %s -e %s \'%s\'' % (
            n_threads, self.time_estimate, log_file, err_file, command)
        if self.run_local:
            subprocess.call([command], shell=True)
        else:
            subprocess.call([bsub_command], shell=True)
            util.wait_for_jobs('papec')

        try:
            with open(self.output().path) as f:
                json.load(f)['t']
            success = True
        except Exception:
            success = False

        if not success:
            raise RuntimeError("MergeFeaturesTask failed")

コード例 #4

0

ファイルを表示

ファイル: make_costs.py プロジェクト: kreshuklab/hiiragi_segmentation_cluster_tools

    def run(self):
        from production import util

        # copy the script to the temp folder and replace the shebang
        script_path = os.path.join(self.tmp_folder, 'make_costs.py')
        file_dir = os.path.dirname(os.path.abspath(__file__))
        util.copy_and_replace(os.path.join(file_dir, 'make_costs.py'),
                              script_path)

        with open(self.config_path) as f:
            config = json.load(f)
            beta = config.get('beta', 0.5)
            weighting_exponent = config.get('weighting_exponent', 1.)
            weight_edges = config.get('weight_multicut_edges', False)

        # write job config
        job_config = {
            'beta': beta,
            'weight_edges': weight_edges,
            'weighting_exponent': weighting_exponent
        }
        config_path = os.path.join(self.tmp_folder, 'make_costs_config.json')
        with open(config_path, 'w') as f:
            json.dump(job_config, f)

        # submit job
        command = '%s %s %s %s %s %s' % (script_path, self.features_path,
                                         self.graph_path, self.out_path,
                                         config_path, self.tmp_folder)
        log_file = os.path.join(self.tmp_folder, 'logs', 'log_costs')
        err_file = os.path.join(self.tmp_folder, 'error_logs', 'err_costs')
        bsub_command = 'bsub -J costs -We %i -o %s -e %s \'%s\'' % (
            self.time_estimate, log_file, err_file, command)
        if self.run_local:
            subprocess.call([command], shell=True)
        else:
            subprocess.call([bsub_command], shell=True)
            util.wait_for_jobs('papec')

        try:
            with open(self.output().path) as f:
                json.load(f)['t']
            success = True
        except Exception:
            success = False

        if not success:
            raise RuntimeError("CostsTask failed")

コード例 #5

0

ファイルを表示

    def run(self):
        from production import util

        # copy the script to the temp folder and replace the shebang
        file_dir = os.path.dirname(os.path.abspath(__file__))
        util.copy_and_replace(
            os.path.join(file_dir, 'solve_subproblems.py'),
            os.path.join(self.tmp_folder, 'solve_subproblems.py'))

        with open(self.config_path) as f:
            config = json.load(f)
            initial_block_shape = config['block_shape']
            n_threads = config['n_threads']
            roi = config.get('roi', None)

        # get number of blocks
        factor = 2**self.scale
        block_shape = [factor * bs for bs in initial_block_shape]
        shape = z5py.File(self.graph_path).attrs['shape']
        blocking = nifty.tools.blocking([0, 0, 0], shape, block_shape)

        # check if we have a roi and adjuse the block list if we do
        if roi is None:
            n_blocks = blocking.numberOfBlocks
            block_list = list(range(n_blocks))
        else:
            block_list = blocking.getBlockIdsOverlappingBoundingBox(
                roi[0], roi[1], [0, 0, 0]).tolist()
            n_blocks = len(block_list)

        # find the actual number of jobs and prepare job configs
        n_jobs = min(n_blocks, self.max_jobs)
        self._prepare_jobs(n_jobs, block_list, initial_block_shape, n_threads)

        # submit the jobs
        if self.run_local:
            # this only works in python 3 ?!
            with futures.ProcessPoolExecutor(n_jobs) as tp:
                tasks = [
                    tp.submit(self._submit_job, job_id, n_threads)
                    for job_id in range(n_jobs)
                ]
                [t.result() for t in tasks]
        else:
            for job_id in range(n_jobs):
                self._submit_job(job_id, n_threads)

        # wait till all jobs are finished
        if not self.run_local:
            util.wait_for_jobs('papec')

        # check the job outputs
        processed_blocks, times = self._collect_outputs(block_list)
        assert len(processed_blocks) == len(times)
        success = len(processed_blocks) == n_blocks

        # write output file if we succeed, otherwise write partial
        # success to different file and raise exception
        if success:
            out = self.output()
            # TODO does 'out' support with job?
            fres = out.open('w')
            json.dump({'times': times}, fres)
            fres.close()
        else:
            log_path = os.path.join(
                self.tmp_folder,
                'solve_subproblems_s%i_partial.json' % self.scale)
            with open(log_path, 'w') as out:
                json.dump(
                    {
                        'times': times,
                        'processed_blocks': processed_blocks
                    }, out)
            raise RuntimeError("SolveSubproblemTask failed, "
                               "%i / %i blocks processed, "
                               "serialized partial results to %s" %
                               (len(processed_blocks), n_blocks, log_path))

コード例 #6

0

ファイルを表示

ファイル: initial_features.py プロジェクト: kreshuklab/hiiragi_segmentation_cluster_tools

    def run(self):
        from production import util

        # copy the script to the temp folder and replace the shebang
        file_dir = os.path.dirname(os.path.abspath(__file__))
        util.copy_and_replace(
            os.path.join(file_dir, 'initial_features.py'),
            os.path.join(self.tmp_folder, 'initial_features.py'))

        with open(self.config_path) as f:
            config = json.load(f)
            block_shape = config['block_shape']
            offsets = config['affinity_offsets']
            roi = config.get('roi', None)

        # hardcoded keys
        graph_key = 'graph'
        out_key = 'features'

        # create the outpuy files
        f_graph = z5py.File(self.graph_path, use_zarr_format=False)
        shape = f_graph.attrs['shape']
        ds_graph = f_graph[graph_key]
        n_edges = ds_graph.attrs['numberOfEdges']

        f_out = z5py.File(self.out_path, use_zarr_format=False)
        f_out.require_group('blocks')
        # chunk size = 64**3
        chunk_size = min(262144, n_edges)
        f_out.require_dataset(out_key,
                              dtype='float64',
                              shape=(n_edges, 10),
                              chunks=(chunk_size, 1),
                              compression='gzip')

        # get number of blocks
        blocking = nifty.tools.blocking([0, 0, 0], shape, block_shape)
        # check if we have a roi and adjuse the block list if we do
        if roi is None:
            n_blocks = blocking.numberOfBlocks
            block_list = list(range(n_blocks))
        else:
            block_list = blocking.getBlockIdsOverlappingBoundingBox(
                roi[0], roi[1], [0, 0, 0]).tolist()
            n_blocks = len(block_list)
        # find the actual number of jobs and prepare job configs
        n_jobs = min(n_blocks, self.max_jobs)
        self._prepare_jobs(n_jobs, block_list, offsets)

        # submit the jobs
        if self.run_local:
            # this only works in python 3 ?!
            with futures.ProcessPoolExecutor(n_jobs) as tp:
                tasks = [
                    tp.submit(self._submit_job, job_id)
                    for job_id in range(n_jobs)
                ]
                [t.result() for t in tasks]
        else:
            for job_id in range(n_jobs):
                self._submit_job(job_id)

        # wait till all jobs are finished
        if not self.run_local:
            util.wait_for_jobs('papec')

        # check the job outputs
        processed_jobs, times = self._collect_outputs(n_jobs)
        assert len(processed_jobs) == len(times)
        success = len(processed_jobs) == n_jobs

        # write output file if we succeed, otherwise write partial
        # success to different file and raise exception
        if success:
            out = self.output()
            # TODO does 'out' support with job?
            fres = out.open('w')
            json.dump({'times': times}, fres)
            fres.close()
        else:
            log_path = os.path.join(self.tmp_folder,
                                    'initial_features_partial.json')
            with open(log_path, 'w') as out:
                json.dump({
                    'times': times,
                    'processed_jobs': processed_jobs
                }, out)
            raise RuntimeError(
                "InitialFeatureTask failed, %i / %i jobs processed," %
                (len(processed_jobs), n_jobs) +
                "serialized partial results to %s" % log_path)

コード例 #7

0

ファイルを表示

ファイル: reduce_problem.py プロジェクト: kreshuklab/hiiragi_segmentation_cluster_tools

    def run(self):
        from production import util

        # copy the script to the temp folder and replace the shebang
        script_path = os.path.join(self.tmp_folder, 'reduce_problem.py')
        file_dir = os.path.dirname(os.path.abspath(__file__))
        util.copy_and_replace(os.path.join(file_dir, 'reduce_problem.py'),
                              script_path)

        with open(self.config_path) as f:
            config = json.load(f)
            block_shape = config['block_shape']
            n_threads = config['n_threads']
            roi = config.get('roi', None)

        # prepare the job config
        job_config = {'block_shape': block_shape,
                      'n_threads': n_threads,
                      'roi': roi}
        config_path = os.path.join(self.tmp_folder,
                                   'reduce_problem_config_s%i.json' % self.scale)
        with open(config_path, 'w') as f:
            json.dump(job_config, f)

        command = '%s %s %s %i %s %s' % (script_path, self.graph_path,
                                         self.costs_path, self.scale,
                                         config_path, self.tmp_folder)
        log_file = os.path.join(self.tmp_folder,
                                'logs', 'log_reduce_problem_s%i' % self.scale)
        err_file = os.path.join(self.tmp_folder,
                                'error_logs', 'err_reduce_problem_s%i.err' % self.scale)
        bsub_command = ('bsub -n %i -J reduce_problem ' % n_threads +
                        '-We %i -o %s -e %s \'%s\'' % (self.time_estimate,
                                                       log_file, err_file, command))
        if self.run_local:
            subprocess.call([command], shell=True)
        else:
            subprocess.call([bsub_command], shell=True)
            util.wait_for_jobs('papec')

        ds_graph = z5py.File(self.graph_path)['graph']
        nodes = ds_graph.attrs['numberOfNodes']
        edges = ds_graph.attrs['numberOfEdges']
        # check the output
        try:
            with open(self.output().path) as f:
                res = json.load(f)
                t = res['t']
                new_nodes = res['new_nodes']
                new_edges = res['new_edges']
            print("Reduce problem finished in", t, "s")
            print("Reduced number of nodes from", nodes, "to", new_nodes)
            print("Reduced number of edges from", edges, "to", new_edges)
            success = True
        except Exception:
            success = False

        # write output file if we succeed, otherwise write partial
        # success to different file and raise exception
        if not success:
            raise RuntimeError("ReduceProblemTask failed")