Ejemplo n.º 1
0
    def test_invalid_num_jobs(self):
        with self.assertRaises(TypeError):
            _ = comp_utils.recommend_cpu_cores(14035.67)

        with self.assertRaises(ValueError):
            _ = comp_utils.recommend_cpu_cores(-14035)

        with self.assertRaises(TypeError):
            _ = comp_utils.recommend_cpu_cores('not a number')
Ejemplo n.º 2
0
 def test_reccomend_cores_few_small_jobs(self):
     num_jobs = 13
     ret_val = comp_utils.recommend_cpu_cores(num_jobs,
                                              lengthy_computation=False)
     self.assertEqual(ret_val, 1)
     ret_val = comp_utils.recommend_cpu_cores(num_jobs,
                                              requested_cores=MAX_CPU_CORES,
                                              lengthy_computation=False)
     self.assertEqual(ret_val, 1)
Ejemplo n.º 3
0
 def test_recommed_cores_few_large_jobs(self):
     num_jobs = 13
     if MAX_CPU_CORES > 4:
         min_free_cores = 2
     else:
         min_free_cores = 1
     ret_val = comp_utils.recommend_cpu_cores(num_jobs,
                                              lengthy_computation=True)
     self.assertEqual(ret_val, max(1, MAX_CPU_CORES - min_free_cores))
     ret_val = comp_utils.recommend_cpu_cores(
         num_jobs,
         requested_cores=MAX_CPU_CORES - 1,
         lengthy_computation=True)
     self.assertEqual(ret_val, max(1, MAX_CPU_CORES - 1))
Ejemplo n.º 4
0
 def test_recommend_cores_illegal_min_free_cores(self):
     num_jobs = 14035
     min_free_cores = MAX_CPU_CORES
     with self.assertRaises(ValueError):
         _ = comp_utils.recommend_cpu_cores(num_jobs,
                                            lengthy_computation=False,
                                            min_free_cores=min_free_cores)
Ejemplo n.º 5
0
 def test_recommend_cores_changing_min_cores(self):
     num_jobs = 14035
     for min_free_cores in range(1, MAX_CPU_CORES):
         ret_val = comp_utils.recommend_cpu_cores(
             num_jobs,
             lengthy_computation=False,
             min_free_cores=min_free_cores)
         self.assertEqual(ret_val, max(1, MAX_CPU_CORES - min_free_cores))
Ejemplo n.º 6
0
    def _unit_compute_fit(self, obj_func, obj_func_args=[],
                          solver_options={'jac': 'cs'}):
        """
        Performs least-squares fitting on self.data using self.guess for
        initial conditions.

        Results of the computation are captured in self._results

        Parameters
        ----------
        obj_func : callable
            Objective function to minimize on
        obj_func_args : list
            Arguments required by obj_func following the guess parameters
            (which should be the first argument)
        solver_options : dict, optional
            Keyword arguments passed onto scipy.optimize.least_squares
        """

        # At this point data has been read in. Read in the guess as well:
        self._read_guess_chunk()

        if self.verbose and self.mpi_rank == 0:
            print('_unit_compute_fit got:\nobj_func: {}\nobj_func_args: {}\n'
                  'solver_options: {}'.format(obj_func, obj_func_args,
                                              solver_options))

        # TODO: Generalize this bit. Use Parallel compute instead!

        if self.mpi_size > 1:
            if self.verbose:
                print('Rank {}: About to start serial computation'
                      '.'.format(self.mpi_rank))

            self._results = list()
            for pulse_resp, pulse_guess in zip(self.data, self._guess):
                curr_results = least_squares(obj_func, pulse_guess,
                                             args=[pulse_resp] + obj_func_args,
                                             **solver_options)
                self._results.append(curr_results)
        else:
            cores = recommend_cpu_cores(self.data.shape[0],
                                        verbose=self.verbose)
            if self.verbose:
                print('Starting parallel fitting with {} cores'.format(cores))

            values = [joblib.delayed(least_squares)(obj_func, pulse_guess,
                                                    args=[pulse_resp] + obj_func_args,
                                                    **solver_options) for
                      pulse_resp, pulse_guess in zip(self.data, self._guess)]
            self._results = joblib.Parallel(n_jobs=cores)(values)

        if self.verbose and self.mpi_rank == 0:
            print(
                'Finished computing fits on {} objects. Results of length: {}'
                '.'.format(self.data.shape[0], len(self._results)))
Ejemplo n.º 7
0
 def test_recommend_cores_many_small_jobs(self):
     num_jobs = 14035
     if MAX_CPU_CORES > 4:
         min_free_cores = 2
     else:
         min_free_cores = 1
     ret_val = comp_utils.recommend_cpu_cores(num_jobs,
                                              lengthy_computation=False)
     self.assertEqual(ret_val, max(1, MAX_CPU_CORES - min_free_cores))
     ret_val = comp_utils.recommend_cpu_cores(num_jobs,
                                              requested_cores=1,
                                              lengthy_computation=False)
     self.assertEqual(ret_val, 1)
     ret_val = comp_utils.recommend_cpu_cores(num_jobs,
                                              requested_cores=MAX_CPU_CORES,
                                              lengthy_computation=False)
     self.assertEqual(ret_val, MAX_CPU_CORES)
     ret_val = comp_utils.recommend_cpu_cores(num_jobs,
                                              requested_cores=5000,
                                              lengthy_computation=False)
     self.assertEqual(ret_val, MAX_CPU_CORES)
Ejemplo n.º 8
0
def fit_atom_positions_parallel(parm_dict, fitting_parms, num_cores=None):
    """
    Fits the positions of N atoms in parallel

    Parameters
    ----------
    parm_dict : dictionary
        Dictionary containing the guess positions, nearest neighbors and original image
    fitting_parms : dictionary
        Parameters used for atom position fitting
    num_cores : unsigned int (Optional. Default = available logical cores - 2)
        Number of cores to compute with

    Returns
    -------
    results : list of tuples
        Guess and fit coefficients
    """
    parm_dict['verbose'] = False
    all_atom_guesses = parm_dict['atom_pos_guess']
    t_start = tm.time()
    num_cores = recommend_cpu_cores(all_atom_guesses.shape[0],
                                    requested_cores=num_cores,
                                    lengthy_computation=False)
    if num_cores > 1:
        pool = mp.Pool(processes=num_cores)
        parm_list = itt.izip(range(all_atom_guesses.shape[0]),
                             itt.repeat(parm_dict), itt.repeat(fitting_parms))
        chunk = int(all_atom_guesses.shape[0] / num_cores)
        jobs = pool.imap(fit_atom_pos, parm_list, chunksize=chunk)
        results = [j for j in jobs]
        pool.close()
    else:
        parm_list = itt.izip(range(all_atom_guesses.shape[0]),
                             itt.repeat(parm_dict), itt.repeat(fitting_parms))
        results = [fit_atom_pos(parm) for parm in parm_list]

    tot_time = np.round(tm.time() - t_start)
    print('Took {} sec to find {} atoms with {} cores'.format(
        tot_time, len(results), num_cores))

    return results
Ejemplo n.º 9
0
def fit_atom_positions_parallel(parm_dict, fitting_parms, num_cores=None):
    """
    Fits the positions of N atoms in parallel

    Parameters
    ----------
    parm_dict : dictionary
        Dictionary containing the guess positions, nearest neighbors and original image
    fitting_parms : dictionary
        Parameters used for atom position fitting
    num_cores : unsigned int (Optional. Default = available logical cores - 2)
        Number of cores to compute with

    Returns
    -------
    results : list of tuples
        Guess and fit coefficients
    """
    parm_dict['verbose'] = False
    all_atom_guesses = parm_dict['atom_pos_guess']
    t_start = tm.time()
    num_cores = recommend_cpu_cores(all_atom_guesses.shape[0], requested_cores=num_cores, lengthy_computation=False)
    if num_cores > 1:
        pool = mp.Pool(processes=num_cores)
        parm_list = itt.izip(range(all_atom_guesses.shape[0]), itt.repeat(parm_dict), itt.repeat(fitting_parms))
        chunk = int(all_atom_guesses.shape[0] / num_cores)
        jobs = pool.imap(fit_atom_pos, parm_list, chunksize=chunk)
        results = [j for j in jobs]
        pool.close()
    else:
        parm_list = itt.izip(range(all_atom_guesses.shape[0]), itt.repeat(parm_dict), itt.repeat(fitting_parms))
        results = [fit_atom_pos(parm) for parm in parm_list]

    tot_time = np.round(tm.time() - t_start)
    print('Took {} sec to find {} atoms with {} cores'.format(tot_time, len(results), num_cores))

    return results
Ejemplo n.º 10
0
    def do_fit(self,
               processors=None,
               solver_type='least_squares',
               solver_options=None,
               obj_func=None,
               h5_partial_fit=None,
               h5_guess=None,
               override=False):
        """
        Generates the fit for the given dataset and writes back to file

        Parameters
        ----------
        processors : int
            Number of cpu cores the user wishes to run on.  The minimum of this and self._maxCpus is used.
        solver_type : str
            The name of the solver in scipy.optimize to use for the fit
        solver_options : dict
            Dictionary of parameters to pass to the solver specified by `solver_type`
        obj_func : dict
            Dictionary defining the class and method containing the function to be fit as well as any 
            additional function parameters.
        h5_partial_fit : h5py.group. optional, default = None
            Datagroup containing (partially computed) fit results. do_fit will resume computation if provided.
        h5_guess : h5py.group. optional, default = None
            Datagroup containing guess results. do_fit will use this if provided.
        override : bool, optional. default = False
            By default, will simply return duplicate results to avoid recomputing or resume computation on a
            group with partial results. Set to True to force fresh computation.

        Returns
        -------
        h5_results : h5py.Dataset object
            Dataset with the fit parameters
        """

        # ################## PREPARE THE SOLVER #######################################

        legit_solver = solver_type in scipy.optimize.__dict__.keys()

        if not legit_solver:
            raise KeyError(
                'Error: Objective Functions "%s" is not implemented in pycroscopy.analysis.Fit_Methods'
                % obj_func['obj_func'])

        obj_func_name = obj_func['obj_func']
        legit_obj_func = obj_func_name in Fit_Methods().methods

        if not legit_obj_func:
            raise KeyError(
                'Error: Solver "%s" does not exist!. For additional info see scipy.optimize\n'
                % solver_type)

        # ################## CHECK FOR DUPLICATES AND RESUME PARTIAL #######################################

        def _get_group_to_resume(legal_groups, provided_partial_fit):
            for h5_group in legal_groups:
                if h5_group['Fit'] == provided_partial_fit:
                    return h5_group
            return None

        def _resume_fit(fitter, h5_group):
            fitter.h5_guess = h5_group['Guess']
            fitter.h5_fit = h5_group['Fit']
            fitter._start_pos = fitter.h5_fit.attrs['last_pixel']

        def _start_fresh_fit(fitter, h5_guess_legal):
            fitter.h5_guess = h5_guess_legal
            fitter._create_fit_datasets()
            fitter._start_pos = 0

        # Prepare the parms dict that will be used for comparison:
        self._parms_dict = solver_options.copy()
        self._parms_dict.update({'solver_type': solver_type})
        self._parms_dict.update(obj_func)

        completed_guess, partial_fit_groups, completed_fits = self._check_for_old_fit(
        )

        override = override or (h5_partial_fit is not None
                                or h5_guess is not None)

        if not override:
            # First try to simply return completed results
            if len(completed_fits) > 0:
                print('Returned previously computed results at ' +
                      completed_fits[-1].name)
                self.h5_fit = USIDataset(completed_fits[-1])
                return

            # Next, attempt to resume automatically:
            elif len(partial_fit_groups) > 0:
                print(
                    'Will resume fitting in {}. '
                    'You can supply a dataset using the h5_partial_fit argument'
                    .format(partial_fit_groups[-1].name))
                _resume_fit(self, partial_fit_groups[-1])

            # Finally, attempt to do fresh fitting using completed Guess:
            elif len(completed_guess) > 0:
                print('Will use {} for generating new Fit. '
                      'You can supply a dataset using the h5_guess argument'.
                      format(completed_guess[-1].name))
                _start_fresh_fit(self, completed_guess[-1])

            else:
                raise ValueError(
                    'Could not find a compatible Guess to use for Fit. Call do_guess() before do_fit()'
                )

        else:
            if h5_partial_fit is not None:
                h5_group = _get_group_to_resume(partial_fit_groups,
                                                h5_partial_fit)
                if h5_group is None:
                    raise ValueError(
                        'Provided dataset with partial Fit was not found to be compatible'
                    )
                _resume_fit(self, h5_group)

            elif h5_guess is not None:
                if h5_guess not in completed_guess:
                    raise ValueError(
                        'Provided dataset with completed Guess was not found to be compatible'
                    )
                _start_fresh_fit(self, h5_guess)

            else:
                raise ValueError(
                    'Please provide a completed guess or partially completed Fit to resume'
                )

        # ################## BEGIN THE ACTUAL FITTING #######################################

        print("Using solver %s and objective function %s to fit your data\n" %
              (solver_type, obj_func['obj_func']))

        if processors is None:
            processors = self._maxCpus
        else:
            processors = min(processors, self._maxCpus)
        processors = recommend_cpu_cores(self._max_pos_per_read,
                                         processors,
                                         verbose=self._verbose)

        time_per_pix = 0
        num_pos = self.h5_main.shape[0] - self._start_pos
        orig_start_pos = self._start_pos

        print(
            'You can abort this computation at any time and resume at a later time!\n'
            '\tIf you are operating in a python console, press Ctrl+C or Cmd+C to abort\n'
            '\tIf you are in a Jupyter notebook, click on "Kernel">>"Interrupt"\n'
        )

        self._get_guess_chunk()
        self._get_data_chunk()

        while self.data is not None:

            t_start = tm.time()

            opt = Optimize(data=self.data,
                           guess=self.guess,
                           parallel=self._parallel)
            temp = opt.computeFit(processors=processors,
                                  solver_type=solver_type,
                                  solver_options=solver_options,
                                  obj_func=obj_func.copy())

            # TODO: need a different .reformatResults to process fitting results
            # reorder to get one numpy array out
            temp = self._reformat_results(temp, obj_func_name)
            self.fit = np.hstack(tuple(temp))

            # Write to file
            self._set_results(is_guess=False)

            # basic timing logs
            tot_time = np.round(tm.time() - t_start, decimals=2)  # in seconds
            if self._verbose:
                print('Done parallel computing in {} or {} per pixel'.format(
                    format_time(tot_time),
                    format_time(tot_time / self.data.shape[0])))
            if self._start_pos == orig_start_pos:
                time_per_pix = tot_time / self._end_pos  # in seconds
            else:
                time_remaining = (num_pos -
                                  self._end_pos) * time_per_pix  # in seconds
                print('Time remaining: ' + format_time(time_remaining))

            # get next batch of data
            self._get_guess_chunk()
            self._get_data_chunk()

        print('Completed computing fit. Writing to file.')

        return USIDataset(self.h5_fit)
Ejemplo n.º 11
0
    def do_guess(self,
                 processors=None,
                 strategy=None,
                 options=dict(),
                 h5_partial_guess=None,
                 override=False):
        """
        Parameters
        ----------
        strategy: string (optional)
            Default is 'Wavelet_Peaks'.
            Can be one of ['wavelet_peaks', 'relative_maximum', 'gaussian_processes'].
            For updated list, run GuessMethods.methods
        processors : int (optional)
            Number of cores to use for computing. Default = all available - 2 cores
        options: dict
            Default, options for wavelet_peaks {"peaks_widths": np.array([10,200]), "peak_step":20}.
            Dictionary of options passed to strategy. For more info see GuessMethods documentation.
        h5_partial_guess : h5py.group. optional, default = None
            Datagroup containing (partially computed) guess results. do_guess will resume computation if provided.
        override : bool, optional. default = False
            By default, will simply return duplicate results to avoid recomputing or resume computation on a
            group with partial results. Set to True to force fresh computation.

        Returns
        -------
        h5_guess : h5py.Dataset
            Dataset containing guesses that can be passed on to do_fit()
        """
        gm = GuessMethods()
        if strategy not in gm.methods:
            raise KeyError(
                'Error: %s is not implemented in pycroscopy.analysis.GuessMethods to find guesses'
                % strategy)

        # ################## CHECK FOR DUPLICATES AND RESUME PARTIAL #######################################

        # Prepare the parms dict that will be used for comparison:
        self._parms_dict = options.copy()
        self._parms_dict.update({'strategy': strategy})

        # check for old:
        partial_dsets, completed_dsets = self._check_for_old_guess()

        if len(completed_dsets) == 0 and len(partial_dsets) == 0:
            print('No existing datasets found')
            override = True

        if not override:
            # First try to simply return any completed computation
            if len(completed_dsets) > 0:
                print('Returned previously computed results at ' +
                      completed_dsets[-1].name)
                self.h5_guess = USIDataset(completed_dsets[-1])
                return

            # Next attempt to resume automatically if nothing is provided
            if len(partial_dsets) > 0:
                # attempt to use whatever the user provided (if legal)
                target_partial_dset = partial_dsets[-1]
                if h5_partial_guess is not None:
                    if not isinstance(h5_partial_guess, h5py.Dataset):
                        raise ValueError(
                            'Provided parameter is not an h5py.Dataset object')
                    if h5_partial_guess not in partial_dsets:
                        raise ValueError(
                            'Provided dataset for partial Guesses is not compatible'
                        )
                    if self._verbose:
                        print('Provided partial Guess dataset was acceptable')
                    target_partial_dset = h5_partial_guess

                # Finally resume from this dataset
                print('Resuming computation in group: ' +
                      target_partial_dset.name)
                self.h5_guess = target_partial_dset
                self._start_pos = target_partial_dset.attrs['last_pixel']

        # No completed / partials available or forced via override:
        if self.h5_guess is None:
            if self._verbose:
                print('Starting a fresh computation!')
            self._start_pos = 0
            self._create_guess_datasets()

        # ################## BEGIN THE ACTUAL COMPUTING #######################################

        if processors is None:
            processors = self._maxCpus
        else:
            processors = min(int(processors), self._maxCpus)
        processors = recommend_cpu_cores(self._max_pos_per_read,
                                         processors,
                                         verbose=self._verbose)

        print("Using %s to find guesses...\n" % strategy)

        time_per_pix = 0
        num_pos = self.h5_main.shape[0] - self._start_pos
        orig_start_pos = self._start_pos

        print(
            'You can abort this computation at any time and resume at a later time!\n'
            '\tIf you are operating in a python console, press Ctrl+C or Cmd+C to abort\n'
            '\tIf you are in a Jupyter notebook, click on "Kernel">>"Interrupt"\n'
        )

        self._get_data_chunk()
        while self.data is not None:

            t_start = tm.time()

            opt = Optimize(data=self.data, parallel=self._parallel)
            temp = opt.computeGuess(processors=processors,
                                    strategy=strategy,
                                    options=options)

            # reorder to get one numpy array out
            temp = self._reformat_results(temp, strategy)
            self.guess = np.hstack(tuple(temp))

            # Write to file
            self._set_results(is_guess=True)

            # basic timing logs
            tot_time = np.round(tm.time() - t_start, decimals=2)  # in seconds
            if self._verbose:
                print('Done parallel computing in {} or {} per pixel'.format(
                    format_time(tot_time),
                    format_time(tot_time / self.data.shape[0])))
            if self._start_pos == orig_start_pos:
                time_per_pix = tot_time / self._end_pos  # in seconds
            else:
                time_remaining = (num_pos -
                                  self._end_pos) * time_per_pix  # in seconds
                print('Time remaining: ' + format_time(time_remaining))

            # get next batch of data
            self._get_data_chunk()

        print('Completed computing guess')
        print()
        return USIDataset(self.h5_guess)
Ejemplo n.º 12
0
 def test_invalid_min_cores(self):
     with self.assertRaises(TypeError):
         _ = comp_utils.recommend_cpu_cores(14035, min_free_cores=[4])
Ejemplo n.º 13
0
    def do_fit(self, processors=None, solver_type='least_squares', solver_options=None, obj_func=None,
               h5_partial_fit=None, h5_guess=None, override=False):
        """
        Generates the fit for the given dataset and writes back to file

        Parameters
        ----------
        processors : int
            Number of cpu cores the user wishes to run on.  The minimum of this and self._maxCpus is used.
        solver_type : str
            The name of the solver in scipy.optimize to use for the fit
        solver_options : dict
            Dictionary of parameters to pass to the solver specified by `solver_type`
        obj_func : dict
            Dictionary defining the class and method containing the function to be fit as well as any 
            additional function parameters.
        h5_partial_fit : h5py.group. optional, default = None
            Datagroup containing (partially computed) fit results. do_fit will resume computation if provided.
        h5_guess : h5py.group. optional, default = None
            Datagroup containing guess results. do_fit will use this if provided.
        override : bool, optional. default = False
            By default, will simply return duplicate results to avoid recomputing or resume computation on a
            group with partial results. Set to True to force fresh computation.

        Returns
        -------
        h5_results : h5py.Dataset object
            Dataset with the fit parameters
        """

        # ################## PREPARE THE SOLVER #######################################

        legit_solver = solver_type in scipy.optimize.__dict__.keys()

        if not legit_solver:
            raise KeyError('Error: Objective Functions "%s" is not implemented in pycroscopy.analysis.Fit_Methods' %
                           obj_func['obj_func'])

        obj_func_name = obj_func['obj_func']
        legit_obj_func = obj_func_name in Fit_Methods().methods

        if not legit_obj_func:
            raise KeyError('Error: Solver "%s" does not exist!. For additional info see scipy.optimize\n' % solver_type)

        # ################## CHECK FOR DUPLICATES AND RESUME PARTIAL #######################################

        def _get_group_to_resume(legal_groups, provided_partial_fit):
            for h5_group in legal_groups:
                if h5_group['Fit'] == provided_partial_fit:
                    return h5_group
            return None

        def _resume_fit(fitter, h5_group):
            fitter.h5_guess = h5_group['Guess']
            fitter.h5_fit = h5_group['Fit']
            fitter._start_pos = fitter.h5_fit.attrs['last_pixel']

        def _start_fresh_fit(fitter, h5_guess_legal):
            fitter.h5_guess = h5_guess_legal
            fitter._create_fit_datasets()
            fitter._start_pos = 0

        # Prepare the parms dict that will be used for comparison:
        self._parms_dict = solver_options.copy()
        self._parms_dict.update({'solver_type': solver_type})
        self._parms_dict.update(obj_func)

        completed_guess, partial_fit_groups, completed_fits = self._check_for_old_fit()

        override = override or (h5_partial_fit is not None or h5_guess is not None)

        if not override:
            # First try to simply return completed results
            if len(completed_fits) > 0:
                print('Returned previously computed results at ' + completed_fits[-1].name)
                self.h5_fit = USIDataset(completed_fits[-1])
                return

            # Next, attempt to resume automatically:
            elif len(partial_fit_groups) > 0:
                print('Will resume fitting in {}. '
                      'You can supply a dataset using the h5_partial_fit argument'.format(partial_fit_groups[-1].name))
                _resume_fit(self, partial_fit_groups[-1])

            # Finally, attempt to do fresh fitting using completed Guess:
            elif len(completed_guess) > 0:
                print('Will use {} for generating new Fit. '
                      'You can supply a dataset using the h5_guess argument'.format(completed_guess[-1].name))
                _start_fresh_fit(self, completed_guess[-1])

            else:
                raise ValueError('Could not find a compatible Guess to use for Fit. Call do_guess() before do_fit()')

        else:
            if h5_partial_fit is not None:
                h5_group = _get_group_to_resume(partial_fit_groups, h5_partial_fit)
                if h5_group is None:
                    raise ValueError('Provided dataset with partial Fit was not found to be compatible')
                _resume_fit(self, h5_group)

            elif h5_guess is not None:
                if h5_guess not in completed_guess:
                    raise ValueError('Provided dataset with completed Guess was not found to be compatible')
                _start_fresh_fit(self, h5_guess)

            else:
                raise ValueError('Please provide a completed guess or partially completed Fit to resume')

        # ################## BEGIN THE ACTUAL FITTING #######################################

        print("Using solver %s and objective function %s to fit your data\n" % (solver_type, obj_func['obj_func']))

        if processors is None:
            processors = self._maxCpus
        else:
            processors = min(processors, self._maxCpus)
        processors = recommend_cpu_cores(self._max_pos_per_read, processors, verbose=self._verbose)

        time_per_pix = 0
        num_pos = self.h5_main.shape[0] - self._start_pos
        orig_start_pos = self._start_pos

        print('You can abort this computation at any time and resume at a later time!\n'
              '\tIf you are operating in a python console, press Ctrl+C or Cmd+C to abort\n'
              '\tIf you are in a Jupyter notebook, click on "Kernel">>"Interrupt"\n')

        self._get_guess_chunk()
        self._get_data_chunk()

        while self.data is not None:

            t_start = tm.time()

            opt = Optimize(data=self.data, guess=self.guess, parallel=self._parallel)
            temp = opt.computeFit(processors=processors, solver_type=solver_type, solver_options=solver_options,
                                  obj_func=obj_func.copy())

            # TODO: need a different .reformatResults to process fitting results
            # reorder to get one numpy array out
            temp = self._reformat_results(temp, obj_func_name)
            self.fit = np.hstack(tuple(temp))

            # Write to file
            self._set_results(is_guess=False)

            # basic timing logs
            tot_time = np.round(tm.time() - t_start, decimals=2)  # in seconds
            if self._verbose:
                print('Done parallel computing in {} or {} per pixel'.format(format_time(tot_time),
                                                                             format_time(
                                                                                 tot_time / self.data.shape[0])))
            if self._start_pos == orig_start_pos:
                time_per_pix = tot_time / self._end_pos  # in seconds
            else:
                time_remaining = (num_pos - self._end_pos) * time_per_pix  # in seconds
                print('Time remaining: ' + format_time(time_remaining))

            # get next batch of data
            self._get_guess_chunk()
            self._get_data_chunk()

        print('Completed computing fit. Writing to file.')

        return USIDataset(self.h5_fit)
Ejemplo n.º 14
0
    def do_guess(self, processors=None, strategy=None, options=dict(), h5_partial_guess=None, override=False):
        """
        Parameters
        ----------
        strategy: string (optional)
            Default is 'Wavelet_Peaks'.
            Can be one of ['wavelet_peaks', 'relative_maximum', 'gaussian_processes'].
            For updated list, run GuessMethods.methods
        processors : int (optional)
            Number of cores to use for computing. Default = all available - 2 cores
        options: dict
            Default, options for wavelet_peaks {"peaks_widths": np.array([10,200]), "peak_step":20}.
            Dictionary of options passed to strategy. For more info see GuessMethods documentation.
        h5_partial_guess : h5py.group. optional, default = None
            Datagroup containing (partially computed) guess results. do_guess will resume computation if provided.
        override : bool, optional. default = False
            By default, will simply return duplicate results to avoid recomputing or resume computation on a
            group with partial results. Set to True to force fresh computation.

        Returns
        -------
        h5_guess : h5py.Dataset
            Dataset containing guesses that can be passed on to do_fit()
        """
        gm = GuessMethods()
        if strategy not in gm.methods:
            raise KeyError('Error: %s is not implemented in pycroscopy.analysis.GuessMethods to find guesses' %
                           strategy)

        # ################## CHECK FOR DUPLICATES AND RESUME PARTIAL #######################################

        # Prepare the parms dict that will be used for comparison:
        self._parms_dict = options.copy()
        self._parms_dict.update({'strategy': strategy})

        # check for old:
        partial_dsets, completed_dsets = self._check_for_old_guess()

        if len(completed_dsets) == 0 and len(partial_dsets) == 0:
            print('No existing datasets found')
            override = True

        if not override:
            # First try to simply return any completed computation
            if len(completed_dsets) > 0:
                print('Returned previously computed results at ' + completed_dsets[-1].name)
                self.h5_guess = USIDataset(completed_dsets[-1])
                return

            # Next attempt to resume automatically if nothing is provided
            if len(partial_dsets) > 0:
                # attempt to use whatever the user provided (if legal)
                target_partial_dset = partial_dsets[-1]
                if h5_partial_guess is not None:
                    if not isinstance(h5_partial_guess, h5py.Dataset):
                        raise ValueError('Provided parameter is not an h5py.Dataset object')
                    if h5_partial_guess not in partial_dsets:
                        raise ValueError('Provided dataset for partial Guesses is not compatible')
                    if self._verbose:
                        print('Provided partial Guess dataset was acceptable')
                    target_partial_dset = h5_partial_guess

                # Finally resume from this dataset
                print('Resuming computation in group: ' + target_partial_dset.name)
                self.h5_guess = target_partial_dset
                self._start_pos = target_partial_dset.attrs['last_pixel']

        # No completed / partials available or forced via override:
        if self.h5_guess is None:
            if self._verbose:
                print('Starting a fresh computation!')
            self._start_pos = 0
            self._create_guess_datasets()

        # ################## BEGIN THE ACTUAL COMPUTING #######################################

        if processors is None:
            processors = self._maxCpus
        else:
            processors = min(int(processors), self._maxCpus)
        processors = recommend_cpu_cores(self._max_pos_per_read, processors, verbose=self._verbose)

        print("Using %s to find guesses...\n" % strategy)

        time_per_pix = 0
        num_pos = self.h5_main.shape[0] - self._start_pos
        orig_start_pos = self._start_pos

        print('You can abort this computation at any time and resume at a later time!\n'
              '\tIf you are operating in a python console, press Ctrl+C or Cmd+C to abort\n'
              '\tIf you are in a Jupyter notebook, click on "Kernel">>"Interrupt"\n')

        self._get_data_chunk()
        while self.data is not None:

            t_start = tm.time()

            opt = Optimize(data=self.data, parallel=self._parallel)
            temp = opt.computeGuess(processors=processors, strategy=strategy, options=options)

            # reorder to get one numpy array out
            temp = self._reformat_results(temp, strategy)
            self.guess = np.hstack(tuple(temp))

            # Write to file
            self._set_results(is_guess=True)

            # basic timing logs
            tot_time = np.round(tm.time() - t_start, decimals=2)  # in seconds
            if self._verbose:
                print('Done parallel computing in {} or {} per pixel'.format(format_time(tot_time),
                                                                             format_time(tot_time / self.data.shape[0])))
            if self._start_pos == orig_start_pos:
                time_per_pix = tot_time / self._end_pos  # in seconds
            else:
                time_remaining = (num_pos - self._end_pos) * time_per_pix  # in seconds
                print('Time remaining: ' + format_time(time_remaining))

            # get next batch of data
            self._get_data_chunk()

        print('Completed computing guess')
        print()
        return USIDataset(self.h5_guess)
Ejemplo n.º 15
0
 def test_invalid_requested_cores(self):
     with self.assertRaises(TypeError):
         _ = comp_utils.recommend_cpu_cores(14035, requested_cores=[4])
Ejemplo n.º 16
0
 def test_recommend_cpu_cores_rerouting(self):
     if sys.version_info.major == 3:
         with self.assertWarns(FutureWarning):
             _ = io_utils.recommend_cpu_cores(140)
     self.assertEqual(comp_utils.recommend_cpu_cores(140),
                      io_utils.recommend_cpu_cores(140))
    def fit_atom_positions_parallel(self, plot_results=True, num_cores=None):
        """
        Fits the positions of N atoms in parallel

        Parameters
        ----------
        plot_results : optional boolean (default is True)
            Specifies whether to output a visualization of the fitting results

        num_cores : unsigned int (Optional. Default = available logical cores - 2)
            Number of cores to compute with

        Creates guess_dataset and fit_dataset with the results.

        Returns
        -------

        fit_dataset: NxM numpy array of tuples where N is the number of atoms fit and M is the number of nearest
            neighbors considered. Each tuple contains the converged values for each gaussian.
            The value names are stored in the dtypes.
        """

        t_start = tm.time()
        if num_cores is None:
            num_cores = recommend_cpu_cores(self.num_atoms, requested_cores=num_cores, lengthy_computation=False)

        print('Setting up guesses')
        self.guess_parms = []
        for i in range(self.num_atoms):
            self.guess_parms.append(self.do_guess(i))

        print('Fitting...')
        if num_cores > 1:
            pool = mp.Pool(processes=num_cores)
            parm_list = itt.izip(self.guess_parms, itt.repeat(self.fitting_parms))
            chunk = int(self.num_atoms / num_cores)
            jobs = pool.imap(do_fit, parm_list, chunksize=chunk)
            self.fitting_results = [j for j in jobs]
            pool.close()
        else:
            parm_list = itt.izip(self.guess_parms, itt.repeat(self.fitting_parms))
            self.fitting_results = [do_fit(parm) for parm in parm_list]

        print('Finalizing datasets...')
        self.guess_dataset = np.zeros(shape=(self.num_atoms, self.num_nearest_neighbors + 1),
                                      dtype=self.atom_coeff_dtype)
        self.fit_dataset = np.zeros(shape=self.guess_dataset.shape, dtype=self.guess_dataset.dtype)

        for atom_ind, single_atom_results in enumerate(self.fitting_results):
            types = np.hstack((self.h5_guess['type'][atom_ind],
                               [self.h5_guess['type'][neighbor] for neighbor in self.closest_neighbors_mat[atom_ind]]))
            atom_data = np.hstack((np.vstack(types), single_atom_results))
            atom_data = [tuple(element) for element in atom_data]
            self.fit_dataset[atom_ind] = atom_data

            single_atom_guess = self.guess_parms[atom_ind]
            atom_guess_data = np.hstack((np.vstack(types), single_atom_guess[1]))
            atom_guess_data = [tuple(element) for element in atom_guess_data]
            self.guess_dataset[atom_ind] = atom_guess_data

        tot_time = np.round(tm.time() - t_start)
        print('Took {} sec to find {} atoms with {} cores'.format(tot_time, len(self.fitting_results), num_cores))

        # if plotting is desired
        if plot_results:
            fig, axis = plt.subplots(figsize=(14, 14))
            axis.hold(True)
            axis.imshow(self.cropped_clean_image, interpolation='none', cmap="gray")
            axis.scatter(self.guess_dataset[:, 0]['y'], self.guess_dataset[:, 0]['x'], color='yellow', label='Guess')
            axis.scatter(self.fit_dataset[:, 0]['y'], self.fit_dataset[:, 0]['x'], color='red', label='Fit')
            axis.legend()
            fig.tight_layout()
            fig.show()

        return self.fit_dataset