def test_invalid_num_jobs(self): with self.assertRaises(TypeError): _ = comp_utils.recommend_cpu_cores(14035.67) with self.assertRaises(ValueError): _ = comp_utils.recommend_cpu_cores(-14035) with self.assertRaises(TypeError): _ = comp_utils.recommend_cpu_cores('not a number')
def test_reccomend_cores_few_small_jobs(self): num_jobs = 13 ret_val = comp_utils.recommend_cpu_cores(num_jobs, lengthy_computation=False) self.assertEqual(ret_val, 1) ret_val = comp_utils.recommend_cpu_cores(num_jobs, requested_cores=MAX_CPU_CORES, lengthy_computation=False) self.assertEqual(ret_val, 1)
def test_recommed_cores_few_large_jobs(self): num_jobs = 13 if MAX_CPU_CORES > 4: min_free_cores = 2 else: min_free_cores = 1 ret_val = comp_utils.recommend_cpu_cores(num_jobs, lengthy_computation=True) self.assertEqual(ret_val, max(1, MAX_CPU_CORES - min_free_cores)) ret_val = comp_utils.recommend_cpu_cores( num_jobs, requested_cores=MAX_CPU_CORES - 1, lengthy_computation=True) self.assertEqual(ret_val, max(1, MAX_CPU_CORES - 1))
def test_recommend_cores_illegal_min_free_cores(self): num_jobs = 14035 min_free_cores = MAX_CPU_CORES with self.assertRaises(ValueError): _ = comp_utils.recommend_cpu_cores(num_jobs, lengthy_computation=False, min_free_cores=min_free_cores)
def test_recommend_cores_changing_min_cores(self): num_jobs = 14035 for min_free_cores in range(1, MAX_CPU_CORES): ret_val = comp_utils.recommend_cpu_cores( num_jobs, lengthy_computation=False, min_free_cores=min_free_cores) self.assertEqual(ret_val, max(1, MAX_CPU_CORES - min_free_cores))
def _unit_compute_fit(self, obj_func, obj_func_args=[], solver_options={'jac': 'cs'}): """ Performs least-squares fitting on self.data using self.guess for initial conditions. Results of the computation are captured in self._results Parameters ---------- obj_func : callable Objective function to minimize on obj_func_args : list Arguments required by obj_func following the guess parameters (which should be the first argument) solver_options : dict, optional Keyword arguments passed onto scipy.optimize.least_squares """ # At this point data has been read in. Read in the guess as well: self._read_guess_chunk() if self.verbose and self.mpi_rank == 0: print('_unit_compute_fit got:\nobj_func: {}\nobj_func_args: {}\n' 'solver_options: {}'.format(obj_func, obj_func_args, solver_options)) # TODO: Generalize this bit. Use Parallel compute instead! if self.mpi_size > 1: if self.verbose: print('Rank {}: About to start serial computation' '.'.format(self.mpi_rank)) self._results = list() for pulse_resp, pulse_guess in zip(self.data, self._guess): curr_results = least_squares(obj_func, pulse_guess, args=[pulse_resp] + obj_func_args, **solver_options) self._results.append(curr_results) else: cores = recommend_cpu_cores(self.data.shape[0], verbose=self.verbose) if self.verbose: print('Starting parallel fitting with {} cores'.format(cores)) values = [joblib.delayed(least_squares)(obj_func, pulse_guess, args=[pulse_resp] + obj_func_args, **solver_options) for pulse_resp, pulse_guess in zip(self.data, self._guess)] self._results = joblib.Parallel(n_jobs=cores)(values) if self.verbose and self.mpi_rank == 0: print( 'Finished computing fits on {} objects. Results of length: {}' '.'.format(self.data.shape[0], len(self._results)))
def test_recommend_cores_many_small_jobs(self): num_jobs = 14035 if MAX_CPU_CORES > 4: min_free_cores = 2 else: min_free_cores = 1 ret_val = comp_utils.recommend_cpu_cores(num_jobs, lengthy_computation=False) self.assertEqual(ret_val, max(1, MAX_CPU_CORES - min_free_cores)) ret_val = comp_utils.recommend_cpu_cores(num_jobs, requested_cores=1, lengthy_computation=False) self.assertEqual(ret_val, 1) ret_val = comp_utils.recommend_cpu_cores(num_jobs, requested_cores=MAX_CPU_CORES, lengthy_computation=False) self.assertEqual(ret_val, MAX_CPU_CORES) ret_val = comp_utils.recommend_cpu_cores(num_jobs, requested_cores=5000, lengthy_computation=False) self.assertEqual(ret_val, MAX_CPU_CORES)
def fit_atom_positions_parallel(parm_dict, fitting_parms, num_cores=None): """ Fits the positions of N atoms in parallel Parameters ---------- parm_dict : dictionary Dictionary containing the guess positions, nearest neighbors and original image fitting_parms : dictionary Parameters used for atom position fitting num_cores : unsigned int (Optional. Default = available logical cores - 2) Number of cores to compute with Returns ------- results : list of tuples Guess and fit coefficients """ parm_dict['verbose'] = False all_atom_guesses = parm_dict['atom_pos_guess'] t_start = tm.time() num_cores = recommend_cpu_cores(all_atom_guesses.shape[0], requested_cores=num_cores, lengthy_computation=False) if num_cores > 1: pool = mp.Pool(processes=num_cores) parm_list = itt.izip(range(all_atom_guesses.shape[0]), itt.repeat(parm_dict), itt.repeat(fitting_parms)) chunk = int(all_atom_guesses.shape[0] / num_cores) jobs = pool.imap(fit_atom_pos, parm_list, chunksize=chunk) results = [j for j in jobs] pool.close() else: parm_list = itt.izip(range(all_atom_guesses.shape[0]), itt.repeat(parm_dict), itt.repeat(fitting_parms)) results = [fit_atom_pos(parm) for parm in parm_list] tot_time = np.round(tm.time() - t_start) print('Took {} sec to find {} atoms with {} cores'.format( tot_time, len(results), num_cores)) return results
def fit_atom_positions_parallel(parm_dict, fitting_parms, num_cores=None): """ Fits the positions of N atoms in parallel Parameters ---------- parm_dict : dictionary Dictionary containing the guess positions, nearest neighbors and original image fitting_parms : dictionary Parameters used for atom position fitting num_cores : unsigned int (Optional. Default = available logical cores - 2) Number of cores to compute with Returns ------- results : list of tuples Guess and fit coefficients """ parm_dict['verbose'] = False all_atom_guesses = parm_dict['atom_pos_guess'] t_start = tm.time() num_cores = recommend_cpu_cores(all_atom_guesses.shape[0], requested_cores=num_cores, lengthy_computation=False) if num_cores > 1: pool = mp.Pool(processes=num_cores) parm_list = itt.izip(range(all_atom_guesses.shape[0]), itt.repeat(parm_dict), itt.repeat(fitting_parms)) chunk = int(all_atom_guesses.shape[0] / num_cores) jobs = pool.imap(fit_atom_pos, parm_list, chunksize=chunk) results = [j for j in jobs] pool.close() else: parm_list = itt.izip(range(all_atom_guesses.shape[0]), itt.repeat(parm_dict), itt.repeat(fitting_parms)) results = [fit_atom_pos(parm) for parm in parm_list] tot_time = np.round(tm.time() - t_start) print('Took {} sec to find {} atoms with {} cores'.format(tot_time, len(results), num_cores)) return results
def do_fit(self, processors=None, solver_type='least_squares', solver_options=None, obj_func=None, h5_partial_fit=None, h5_guess=None, override=False): """ Generates the fit for the given dataset and writes back to file Parameters ---------- processors : int Number of cpu cores the user wishes to run on. The minimum of this and self._maxCpus is used. solver_type : str The name of the solver in scipy.optimize to use for the fit solver_options : dict Dictionary of parameters to pass to the solver specified by `solver_type` obj_func : dict Dictionary defining the class and method containing the function to be fit as well as any additional function parameters. h5_partial_fit : h5py.group. optional, default = None Datagroup containing (partially computed) fit results. do_fit will resume computation if provided. h5_guess : h5py.group. optional, default = None Datagroup containing guess results. do_fit will use this if provided. override : bool, optional. default = False By default, will simply return duplicate results to avoid recomputing or resume computation on a group with partial results. Set to True to force fresh computation. Returns ------- h5_results : h5py.Dataset object Dataset with the fit parameters """ # ################## PREPARE THE SOLVER ####################################### legit_solver = solver_type in scipy.optimize.__dict__.keys() if not legit_solver: raise KeyError( 'Error: Objective Functions "%s" is not implemented in pycroscopy.analysis.Fit_Methods' % obj_func['obj_func']) obj_func_name = obj_func['obj_func'] legit_obj_func = obj_func_name in Fit_Methods().methods if not legit_obj_func: raise KeyError( 'Error: Solver "%s" does not exist!. For additional info see scipy.optimize\n' % solver_type) # ################## CHECK FOR DUPLICATES AND RESUME PARTIAL ####################################### def _get_group_to_resume(legal_groups, provided_partial_fit): for h5_group in legal_groups: if h5_group['Fit'] == provided_partial_fit: return h5_group return None def _resume_fit(fitter, h5_group): fitter.h5_guess = h5_group['Guess'] fitter.h5_fit = h5_group['Fit'] fitter._start_pos = fitter.h5_fit.attrs['last_pixel'] def _start_fresh_fit(fitter, h5_guess_legal): fitter.h5_guess = h5_guess_legal fitter._create_fit_datasets() fitter._start_pos = 0 # Prepare the parms dict that will be used for comparison: self._parms_dict = solver_options.copy() self._parms_dict.update({'solver_type': solver_type}) self._parms_dict.update(obj_func) completed_guess, partial_fit_groups, completed_fits = self._check_for_old_fit( ) override = override or (h5_partial_fit is not None or h5_guess is not None) if not override: # First try to simply return completed results if len(completed_fits) > 0: print('Returned previously computed results at ' + completed_fits[-1].name) self.h5_fit = USIDataset(completed_fits[-1]) return # Next, attempt to resume automatically: elif len(partial_fit_groups) > 0: print( 'Will resume fitting in {}. ' 'You can supply a dataset using the h5_partial_fit argument' .format(partial_fit_groups[-1].name)) _resume_fit(self, partial_fit_groups[-1]) # Finally, attempt to do fresh fitting using completed Guess: elif len(completed_guess) > 0: print('Will use {} for generating new Fit. ' 'You can supply a dataset using the h5_guess argument'. format(completed_guess[-1].name)) _start_fresh_fit(self, completed_guess[-1]) else: raise ValueError( 'Could not find a compatible Guess to use for Fit. Call do_guess() before do_fit()' ) else: if h5_partial_fit is not None: h5_group = _get_group_to_resume(partial_fit_groups, h5_partial_fit) if h5_group is None: raise ValueError( 'Provided dataset with partial Fit was not found to be compatible' ) _resume_fit(self, h5_group) elif h5_guess is not None: if h5_guess not in completed_guess: raise ValueError( 'Provided dataset with completed Guess was not found to be compatible' ) _start_fresh_fit(self, h5_guess) else: raise ValueError( 'Please provide a completed guess or partially completed Fit to resume' ) # ################## BEGIN THE ACTUAL FITTING ####################################### print("Using solver %s and objective function %s to fit your data\n" % (solver_type, obj_func['obj_func'])) if processors is None: processors = self._maxCpus else: processors = min(processors, self._maxCpus) processors = recommend_cpu_cores(self._max_pos_per_read, processors, verbose=self._verbose) time_per_pix = 0 num_pos = self.h5_main.shape[0] - self._start_pos orig_start_pos = self._start_pos print( 'You can abort this computation at any time and resume at a later time!\n' '\tIf you are operating in a python console, press Ctrl+C or Cmd+C to abort\n' '\tIf you are in a Jupyter notebook, click on "Kernel">>"Interrupt"\n' ) self._get_guess_chunk() self._get_data_chunk() while self.data is not None: t_start = tm.time() opt = Optimize(data=self.data, guess=self.guess, parallel=self._parallel) temp = opt.computeFit(processors=processors, solver_type=solver_type, solver_options=solver_options, obj_func=obj_func.copy()) # TODO: need a different .reformatResults to process fitting results # reorder to get one numpy array out temp = self._reformat_results(temp, obj_func_name) self.fit = np.hstack(tuple(temp)) # Write to file self._set_results(is_guess=False) # basic timing logs tot_time = np.round(tm.time() - t_start, decimals=2) # in seconds if self._verbose: print('Done parallel computing in {} or {} per pixel'.format( format_time(tot_time), format_time(tot_time / self.data.shape[0]))) if self._start_pos == orig_start_pos: time_per_pix = tot_time / self._end_pos # in seconds else: time_remaining = (num_pos - self._end_pos) * time_per_pix # in seconds print('Time remaining: ' + format_time(time_remaining)) # get next batch of data self._get_guess_chunk() self._get_data_chunk() print('Completed computing fit. Writing to file.') return USIDataset(self.h5_fit)
def do_guess(self, processors=None, strategy=None, options=dict(), h5_partial_guess=None, override=False): """ Parameters ---------- strategy: string (optional) Default is 'Wavelet_Peaks'. Can be one of ['wavelet_peaks', 'relative_maximum', 'gaussian_processes']. For updated list, run GuessMethods.methods processors : int (optional) Number of cores to use for computing. Default = all available - 2 cores options: dict Default, options for wavelet_peaks {"peaks_widths": np.array([10,200]), "peak_step":20}. Dictionary of options passed to strategy. For more info see GuessMethods documentation. h5_partial_guess : h5py.group. optional, default = None Datagroup containing (partially computed) guess results. do_guess will resume computation if provided. override : bool, optional. default = False By default, will simply return duplicate results to avoid recomputing or resume computation on a group with partial results. Set to True to force fresh computation. Returns ------- h5_guess : h5py.Dataset Dataset containing guesses that can be passed on to do_fit() """ gm = GuessMethods() if strategy not in gm.methods: raise KeyError( 'Error: %s is not implemented in pycroscopy.analysis.GuessMethods to find guesses' % strategy) # ################## CHECK FOR DUPLICATES AND RESUME PARTIAL ####################################### # Prepare the parms dict that will be used for comparison: self._parms_dict = options.copy() self._parms_dict.update({'strategy': strategy}) # check for old: partial_dsets, completed_dsets = self._check_for_old_guess() if len(completed_dsets) == 0 and len(partial_dsets) == 0: print('No existing datasets found') override = True if not override: # First try to simply return any completed computation if len(completed_dsets) > 0: print('Returned previously computed results at ' + completed_dsets[-1].name) self.h5_guess = USIDataset(completed_dsets[-1]) return # Next attempt to resume automatically if nothing is provided if len(partial_dsets) > 0: # attempt to use whatever the user provided (if legal) target_partial_dset = partial_dsets[-1] if h5_partial_guess is not None: if not isinstance(h5_partial_guess, h5py.Dataset): raise ValueError( 'Provided parameter is not an h5py.Dataset object') if h5_partial_guess not in partial_dsets: raise ValueError( 'Provided dataset for partial Guesses is not compatible' ) if self._verbose: print('Provided partial Guess dataset was acceptable') target_partial_dset = h5_partial_guess # Finally resume from this dataset print('Resuming computation in group: ' + target_partial_dset.name) self.h5_guess = target_partial_dset self._start_pos = target_partial_dset.attrs['last_pixel'] # No completed / partials available or forced via override: if self.h5_guess is None: if self._verbose: print('Starting a fresh computation!') self._start_pos = 0 self._create_guess_datasets() # ################## BEGIN THE ACTUAL COMPUTING ####################################### if processors is None: processors = self._maxCpus else: processors = min(int(processors), self._maxCpus) processors = recommend_cpu_cores(self._max_pos_per_read, processors, verbose=self._verbose) print("Using %s to find guesses...\n" % strategy) time_per_pix = 0 num_pos = self.h5_main.shape[0] - self._start_pos orig_start_pos = self._start_pos print( 'You can abort this computation at any time and resume at a later time!\n' '\tIf you are operating in a python console, press Ctrl+C or Cmd+C to abort\n' '\tIf you are in a Jupyter notebook, click on "Kernel">>"Interrupt"\n' ) self._get_data_chunk() while self.data is not None: t_start = tm.time() opt = Optimize(data=self.data, parallel=self._parallel) temp = opt.computeGuess(processors=processors, strategy=strategy, options=options) # reorder to get one numpy array out temp = self._reformat_results(temp, strategy) self.guess = np.hstack(tuple(temp)) # Write to file self._set_results(is_guess=True) # basic timing logs tot_time = np.round(tm.time() - t_start, decimals=2) # in seconds if self._verbose: print('Done parallel computing in {} or {} per pixel'.format( format_time(tot_time), format_time(tot_time / self.data.shape[0]))) if self._start_pos == orig_start_pos: time_per_pix = tot_time / self._end_pos # in seconds else: time_remaining = (num_pos - self._end_pos) * time_per_pix # in seconds print('Time remaining: ' + format_time(time_remaining)) # get next batch of data self._get_data_chunk() print('Completed computing guess') print() return USIDataset(self.h5_guess)
def test_invalid_min_cores(self): with self.assertRaises(TypeError): _ = comp_utils.recommend_cpu_cores(14035, min_free_cores=[4])
def do_fit(self, processors=None, solver_type='least_squares', solver_options=None, obj_func=None, h5_partial_fit=None, h5_guess=None, override=False): """ Generates the fit for the given dataset and writes back to file Parameters ---------- processors : int Number of cpu cores the user wishes to run on. The minimum of this and self._maxCpus is used. solver_type : str The name of the solver in scipy.optimize to use for the fit solver_options : dict Dictionary of parameters to pass to the solver specified by `solver_type` obj_func : dict Dictionary defining the class and method containing the function to be fit as well as any additional function parameters. h5_partial_fit : h5py.group. optional, default = None Datagroup containing (partially computed) fit results. do_fit will resume computation if provided. h5_guess : h5py.group. optional, default = None Datagroup containing guess results. do_fit will use this if provided. override : bool, optional. default = False By default, will simply return duplicate results to avoid recomputing or resume computation on a group with partial results. Set to True to force fresh computation. Returns ------- h5_results : h5py.Dataset object Dataset with the fit parameters """ # ################## PREPARE THE SOLVER ####################################### legit_solver = solver_type in scipy.optimize.__dict__.keys() if not legit_solver: raise KeyError('Error: Objective Functions "%s" is not implemented in pycroscopy.analysis.Fit_Methods' % obj_func['obj_func']) obj_func_name = obj_func['obj_func'] legit_obj_func = obj_func_name in Fit_Methods().methods if not legit_obj_func: raise KeyError('Error: Solver "%s" does not exist!. For additional info see scipy.optimize\n' % solver_type) # ################## CHECK FOR DUPLICATES AND RESUME PARTIAL ####################################### def _get_group_to_resume(legal_groups, provided_partial_fit): for h5_group in legal_groups: if h5_group['Fit'] == provided_partial_fit: return h5_group return None def _resume_fit(fitter, h5_group): fitter.h5_guess = h5_group['Guess'] fitter.h5_fit = h5_group['Fit'] fitter._start_pos = fitter.h5_fit.attrs['last_pixel'] def _start_fresh_fit(fitter, h5_guess_legal): fitter.h5_guess = h5_guess_legal fitter._create_fit_datasets() fitter._start_pos = 0 # Prepare the parms dict that will be used for comparison: self._parms_dict = solver_options.copy() self._parms_dict.update({'solver_type': solver_type}) self._parms_dict.update(obj_func) completed_guess, partial_fit_groups, completed_fits = self._check_for_old_fit() override = override or (h5_partial_fit is not None or h5_guess is not None) if not override: # First try to simply return completed results if len(completed_fits) > 0: print('Returned previously computed results at ' + completed_fits[-1].name) self.h5_fit = USIDataset(completed_fits[-1]) return # Next, attempt to resume automatically: elif len(partial_fit_groups) > 0: print('Will resume fitting in {}. ' 'You can supply a dataset using the h5_partial_fit argument'.format(partial_fit_groups[-1].name)) _resume_fit(self, partial_fit_groups[-1]) # Finally, attempt to do fresh fitting using completed Guess: elif len(completed_guess) > 0: print('Will use {} for generating new Fit. ' 'You can supply a dataset using the h5_guess argument'.format(completed_guess[-1].name)) _start_fresh_fit(self, completed_guess[-1]) else: raise ValueError('Could not find a compatible Guess to use for Fit. Call do_guess() before do_fit()') else: if h5_partial_fit is not None: h5_group = _get_group_to_resume(partial_fit_groups, h5_partial_fit) if h5_group is None: raise ValueError('Provided dataset with partial Fit was not found to be compatible') _resume_fit(self, h5_group) elif h5_guess is not None: if h5_guess not in completed_guess: raise ValueError('Provided dataset with completed Guess was not found to be compatible') _start_fresh_fit(self, h5_guess) else: raise ValueError('Please provide a completed guess or partially completed Fit to resume') # ################## BEGIN THE ACTUAL FITTING ####################################### print("Using solver %s and objective function %s to fit your data\n" % (solver_type, obj_func['obj_func'])) if processors is None: processors = self._maxCpus else: processors = min(processors, self._maxCpus) processors = recommend_cpu_cores(self._max_pos_per_read, processors, verbose=self._verbose) time_per_pix = 0 num_pos = self.h5_main.shape[0] - self._start_pos orig_start_pos = self._start_pos print('You can abort this computation at any time and resume at a later time!\n' '\tIf you are operating in a python console, press Ctrl+C or Cmd+C to abort\n' '\tIf you are in a Jupyter notebook, click on "Kernel">>"Interrupt"\n') self._get_guess_chunk() self._get_data_chunk() while self.data is not None: t_start = tm.time() opt = Optimize(data=self.data, guess=self.guess, parallel=self._parallel) temp = opt.computeFit(processors=processors, solver_type=solver_type, solver_options=solver_options, obj_func=obj_func.copy()) # TODO: need a different .reformatResults to process fitting results # reorder to get one numpy array out temp = self._reformat_results(temp, obj_func_name) self.fit = np.hstack(tuple(temp)) # Write to file self._set_results(is_guess=False) # basic timing logs tot_time = np.round(tm.time() - t_start, decimals=2) # in seconds if self._verbose: print('Done parallel computing in {} or {} per pixel'.format(format_time(tot_time), format_time( tot_time / self.data.shape[0]))) if self._start_pos == orig_start_pos: time_per_pix = tot_time / self._end_pos # in seconds else: time_remaining = (num_pos - self._end_pos) * time_per_pix # in seconds print('Time remaining: ' + format_time(time_remaining)) # get next batch of data self._get_guess_chunk() self._get_data_chunk() print('Completed computing fit. Writing to file.') return USIDataset(self.h5_fit)
def do_guess(self, processors=None, strategy=None, options=dict(), h5_partial_guess=None, override=False): """ Parameters ---------- strategy: string (optional) Default is 'Wavelet_Peaks'. Can be one of ['wavelet_peaks', 'relative_maximum', 'gaussian_processes']. For updated list, run GuessMethods.methods processors : int (optional) Number of cores to use for computing. Default = all available - 2 cores options: dict Default, options for wavelet_peaks {"peaks_widths": np.array([10,200]), "peak_step":20}. Dictionary of options passed to strategy. For more info see GuessMethods documentation. h5_partial_guess : h5py.group. optional, default = None Datagroup containing (partially computed) guess results. do_guess will resume computation if provided. override : bool, optional. default = False By default, will simply return duplicate results to avoid recomputing or resume computation on a group with partial results. Set to True to force fresh computation. Returns ------- h5_guess : h5py.Dataset Dataset containing guesses that can be passed on to do_fit() """ gm = GuessMethods() if strategy not in gm.methods: raise KeyError('Error: %s is not implemented in pycroscopy.analysis.GuessMethods to find guesses' % strategy) # ################## CHECK FOR DUPLICATES AND RESUME PARTIAL ####################################### # Prepare the parms dict that will be used for comparison: self._parms_dict = options.copy() self._parms_dict.update({'strategy': strategy}) # check for old: partial_dsets, completed_dsets = self._check_for_old_guess() if len(completed_dsets) == 0 and len(partial_dsets) == 0: print('No existing datasets found') override = True if not override: # First try to simply return any completed computation if len(completed_dsets) > 0: print('Returned previously computed results at ' + completed_dsets[-1].name) self.h5_guess = USIDataset(completed_dsets[-1]) return # Next attempt to resume automatically if nothing is provided if len(partial_dsets) > 0: # attempt to use whatever the user provided (if legal) target_partial_dset = partial_dsets[-1] if h5_partial_guess is not None: if not isinstance(h5_partial_guess, h5py.Dataset): raise ValueError('Provided parameter is not an h5py.Dataset object') if h5_partial_guess not in partial_dsets: raise ValueError('Provided dataset for partial Guesses is not compatible') if self._verbose: print('Provided partial Guess dataset was acceptable') target_partial_dset = h5_partial_guess # Finally resume from this dataset print('Resuming computation in group: ' + target_partial_dset.name) self.h5_guess = target_partial_dset self._start_pos = target_partial_dset.attrs['last_pixel'] # No completed / partials available or forced via override: if self.h5_guess is None: if self._verbose: print('Starting a fresh computation!') self._start_pos = 0 self._create_guess_datasets() # ################## BEGIN THE ACTUAL COMPUTING ####################################### if processors is None: processors = self._maxCpus else: processors = min(int(processors), self._maxCpus) processors = recommend_cpu_cores(self._max_pos_per_read, processors, verbose=self._verbose) print("Using %s to find guesses...\n" % strategy) time_per_pix = 0 num_pos = self.h5_main.shape[0] - self._start_pos orig_start_pos = self._start_pos print('You can abort this computation at any time and resume at a later time!\n' '\tIf you are operating in a python console, press Ctrl+C or Cmd+C to abort\n' '\tIf you are in a Jupyter notebook, click on "Kernel">>"Interrupt"\n') self._get_data_chunk() while self.data is not None: t_start = tm.time() opt = Optimize(data=self.data, parallel=self._parallel) temp = opt.computeGuess(processors=processors, strategy=strategy, options=options) # reorder to get one numpy array out temp = self._reformat_results(temp, strategy) self.guess = np.hstack(tuple(temp)) # Write to file self._set_results(is_guess=True) # basic timing logs tot_time = np.round(tm.time() - t_start, decimals=2) # in seconds if self._verbose: print('Done parallel computing in {} or {} per pixel'.format(format_time(tot_time), format_time(tot_time / self.data.shape[0]))) if self._start_pos == orig_start_pos: time_per_pix = tot_time / self._end_pos # in seconds else: time_remaining = (num_pos - self._end_pos) * time_per_pix # in seconds print('Time remaining: ' + format_time(time_remaining)) # get next batch of data self._get_data_chunk() print('Completed computing guess') print() return USIDataset(self.h5_guess)
def test_invalid_requested_cores(self): with self.assertRaises(TypeError): _ = comp_utils.recommend_cpu_cores(14035, requested_cores=[4])
def test_recommend_cpu_cores_rerouting(self): if sys.version_info.major == 3: with self.assertWarns(FutureWarning): _ = io_utils.recommend_cpu_cores(140) self.assertEqual(comp_utils.recommend_cpu_cores(140), io_utils.recommend_cpu_cores(140))
def fit_atom_positions_parallel(self, plot_results=True, num_cores=None): """ Fits the positions of N atoms in parallel Parameters ---------- plot_results : optional boolean (default is True) Specifies whether to output a visualization of the fitting results num_cores : unsigned int (Optional. Default = available logical cores - 2) Number of cores to compute with Creates guess_dataset and fit_dataset with the results. Returns ------- fit_dataset: NxM numpy array of tuples where N is the number of atoms fit and M is the number of nearest neighbors considered. Each tuple contains the converged values for each gaussian. The value names are stored in the dtypes. """ t_start = tm.time() if num_cores is None: num_cores = recommend_cpu_cores(self.num_atoms, requested_cores=num_cores, lengthy_computation=False) print('Setting up guesses') self.guess_parms = [] for i in range(self.num_atoms): self.guess_parms.append(self.do_guess(i)) print('Fitting...') if num_cores > 1: pool = mp.Pool(processes=num_cores) parm_list = itt.izip(self.guess_parms, itt.repeat(self.fitting_parms)) chunk = int(self.num_atoms / num_cores) jobs = pool.imap(do_fit, parm_list, chunksize=chunk) self.fitting_results = [j for j in jobs] pool.close() else: parm_list = itt.izip(self.guess_parms, itt.repeat(self.fitting_parms)) self.fitting_results = [do_fit(parm) for parm in parm_list] print('Finalizing datasets...') self.guess_dataset = np.zeros(shape=(self.num_atoms, self.num_nearest_neighbors + 1), dtype=self.atom_coeff_dtype) self.fit_dataset = np.zeros(shape=self.guess_dataset.shape, dtype=self.guess_dataset.dtype) for atom_ind, single_atom_results in enumerate(self.fitting_results): types = np.hstack((self.h5_guess['type'][atom_ind], [self.h5_guess['type'][neighbor] for neighbor in self.closest_neighbors_mat[atom_ind]])) atom_data = np.hstack((np.vstack(types), single_atom_results)) atom_data = [tuple(element) for element in atom_data] self.fit_dataset[atom_ind] = atom_data single_atom_guess = self.guess_parms[atom_ind] atom_guess_data = np.hstack((np.vstack(types), single_atom_guess[1])) atom_guess_data = [tuple(element) for element in atom_guess_data] self.guess_dataset[atom_ind] = atom_guess_data tot_time = np.round(tm.time() - t_start) print('Took {} sec to find {} atoms with {} cores'.format(tot_time, len(self.fitting_results), num_cores)) # if plotting is desired if plot_results: fig, axis = plt.subplots(figsize=(14, 14)) axis.hold(True) axis.imshow(self.cropped_clean_image, interpolation='none', cmap="gray") axis.scatter(self.guess_dataset[:, 0]['y'], self.guess_dataset[:, 0]['x'], color='yellow', label='Guess') axis.scatter(self.fit_dataset[:, 0]['y'], self.fit_dataset[:, 0]['x'], color='red', label='Fit') axis.legend() fig.tight_layout() fig.show() return self.fit_dataset