Esempio n. 1
0
    def PolyDiff(self, u, x):
        """Differentiate data with polynomial interpolation.

        This method takes a collection of points of size (2*self.cheb_width),
        fits a Chebychev polynomial to the collection of points, and computes
        the derivative at the central point. The derivatives are collated and
        returned to the calling functions as a NumPy array.

        Keyword arguments:
        u -- values of some function
        x -- corresponding x-coordinates where u is evaluated

        Note: This throws out the data close to the edges since the polynomial
        derivative only works well when we're looking at the middle of the
        points fit.
        """
        u = u.flatten()
        x = x.flatten()

        n = len(x)

        # Initialize a numpy array for storing the derivative values
        du = np.zeros((n - 2 * self.cheb_width, self.diff_order))

        # Loop for fitting Cheb polynomials to each point
        for j in range(self.cheb_width, n - self.cheb_width):
            # Select the points on which to fit polynomial
            points = np.arange(j - self.cheb_width, j + self.cheb_width)
            # Fit the polynomial
            poly = Cheb.fit(x[points], u[points], self.cheb_degree)
            # Take derivatives
            for d in range(1, self.diff_order + 1):
                du[j - self.cheb_width, d - 1] = poly.deriv(m=d)(x[j])

        return du
Esempio n. 2
0
 def background(self, two_theta, intensities):
     n_deg = 5
     spline = Chebyshev(coef=np.ones((5,)))
     red_two_theta, red_intensities = self.remove_peaks(
         two_theta, self.smooth_data(intensities))
     # Fit the background using the reduced dataset
     spline = spline.fit(red_two_theta, red_intensities, n_deg)
     # Return the predicted background
     background = np.array(spline(two_theta))
     return background
def _normalize(spectra):
    stars = spectra.index
    wavelengths = spectra.flux.columns.values.copy()
    flux = spectra.flux.values.copy()
    error = spectra.error.reindex(columns=wavelengths).values.copy()

    #TODO: Should negative fluxes be zero'd too?
    bad_flux = sp.isnan(flux) | sp.isinf(flux)
    bad_error = sp.isnan(error) | sp.isinf(error) | (error < 0)
    bad = bad_flux | bad_error

    flux[bad] = 1
    error[bad] = ERROR_LIM

    #TODO: Where does pixlist come from?
    pixlist = sp.loadtxt('pixlist.txt', dtype=int)
    var = sp.full_like(error, ERROR_LIM**2)
    var[:, pixlist] = 0
    inv_var = 1 / (var**2 + error**2)

    norm_flux = sp.full_like(flux, 1)
    norm_error = sp.full_like(error, ERROR_LIM)
    for star in range(len(stars)):
        for _, (left, right) in CHIPS.items():
            mask = (left < wavelengths) & (wavelengths < right)
            #TODO: Why are we using Chebyshev polynomials rather than smoothing splines?
            #TODO: Why are we using three polynomials rather than one? Are spectra discontinuous between chips?
            #TODO: Is the denominator being zero/negative ever an issue?
            fit = Chebyshev.fit(x=wavelengths[mask],
                                y=flux[star][mask],
                                w=inv_var[star][mask],
                                deg=2)

            norm_flux[star][mask] = flux[star][mask] / fit(wavelengths[mask])
            norm_error[star][mask] = error[star][mask] / fit(wavelengths[mask])

    #TODO: Why is the unreliability threshold different from the limit value?
    unreliable = (norm_error > .3)
    norm_flux[unreliable] = 1
    norm_error[unreliable] = ERROR_LIM

    # In the original, the masking is done in the parallax fitting code.
    # Gonna do it earlier here to save a bit of memory.
    mask = sp.any(
        sp.vstack([(l < wavelengths) & (wavelengths < u)
                   for l, u in CHIPS.values()]), 0)

    norm_flux = pd.DataFrame(norm_flux[:, mask], stars, wavelengths[mask])
    norm_error = pd.DataFrame(norm_error[:, mask], stars, wavelengths[mask])

    return pd.concat({'flux': norm_flux, 'error': norm_error}, 1)
Esempio n. 4
0
import numpy as np
from numpy.polynomial.chebyshev import Chebyshev, chebval
import matplotlib.pyplot as plt

#1st example

np.random.seed(0)

x = np.linspace(-1, 1, 2000)
y = np.cos(x) + 0.3 * np.random.rand(2000)
p = np.polynomial.Chebyshev.fit(x, y, 90)

t = np.linspace(-1, 1, 200)
plt.plot(x, y, 'r.')
plt.plot(t, p(t), 'k-', lw=3)
plt.savefig("random.png")
plt.close()

# 2nd example

x = np.arange(0.5, 1, 1e-6)
y = np.log10(x)

fit = Chebyshev.fit(x, y, 4)

plt.plot(x, y)
plt.plot(x, fit(x), label="Chebyshev 4th Order Fit", ls='--')
plt.legend()
plt.savefig("cheby_fit.png")
Esempio n. 5
0
class NativeRefinement(BaseRefinement):
    spline = None
    
    def peak_rms_error(self, phase, unit_cell=None, peak_list=None):
        diffs = []
        predicted_peaks = phase.predicted_peaks(unit_cell=unit_cell)
        # Only include those that are within the two_theta range
        phase_idx = self.phases.index(phase)
        if peak_list is None:
            actual_peaks = [p.center() for p in self._peak_list[phase_idx]]
        else:
            actual_peaks = peak_list
        # Prepare list of peak position differences
        for idx, actual_peak in enumerate(actual_peaks):
            offsets = [abs(p.q-actual_peak)
                       for p in predicted_peaks]
            diffs.append(min(offsets))
        # Calculate mean-square-difference
        running_total = 0
        for diff in diffs:
            running_total += diff**2
        try:
            rms_error = math.sqrt(running_total / len(diffs))
        except ZeroDivisionError:
            raise exceptions.RefinementError()
        return rms_error
    
    def refine_unit_cells(self, scattering_lengths, intensities, quiet=True):
        """Residual least squares refinement of the unit-cell
        parameters. Returns an (p, 6) array where p is the number of
        phases and axis 1 has a value for each of the cell parameters
        (a, b, c, α, β, γ).
        """
        # Fit peaks to Gaussian/Cauchy functions using least squares refinement
        # self.fit_peaks(scattering_lengths=scattering_lengths,
        #                intensities=intensities)
        # Get a list of peak positions
        # assert len(self.phases) == 1 # Temporary to avoid weird fitting
        def peak_positions(scattering_lengths, intensities, phase):
            """Return a list of peak positions for the given phase."""
            peak_list = []
            for reflection in phase.reflection_list:
                if contains_peak(scattering_lengths, reflection.qrange):
                    left = reflection.qrange[0]
                    right = reflection.qrange[1]
                    idx = np.where(np.logical_and(left < scattering_lengths,
                                                  scattering_lengths < right))
                    xs = scattering_lengths[idx]
                    ys = intensities[idx]
                    maxidx = ys.argmax()
                    xmax = xs[maxidx]
                    peak_list.append(xmax)
            return peak_list
        # Do a refinement for each phase
        residual_error = 0
        for phase in self.phases:
            peak_list = peak_positions(scattering_lengths, intensities, phase)
            # Define an objective function that will be minimized
            def objective(cell_parameters):
                # Determine cell parameters from numpy array
                # Create a temporary unit cell and return the residual error
                unit_cell = phase.unit_cell.__class__()
                unit_cell.set_cell_parameters_from_list(cell_parameters)
                residuals = self.peak_rms_error(phase=phase,
                                                unit_cell=unit_cell,
                                                peak_list = peak_list)
                return residuals
            # Now minimize objective for each phase
            initial_parameters = phase.unit_cell.cell_parameters
            result = scipy.optimize.minimize(fun=objective,
                                             x0=initial_parameters,
                                             method='Nelder-Mead',
                                             options={'disp': not quiet})
            if result.success:
                # Optimiziation was successful, so set new parameters
                optimized_parameters = result.x
                phase.unit_cell.set_cell_parameters_from_list(
                    optimized_parameters)
                residual_error += self.peak_rms_error(phase=phase, peak_list=peak_list)
            else:
                # Optimization failed for some reason
                raise exceptions.RefinementError(result.message)
        return residual_error
    
    def refine_phase_fractions(self, scattering_lengths, intensities):
        """Calculate the relative strengths of each phase in a diffractogram.
        
        The simplest approach is to calculate the peak area for each
        phases's diagnostic reflection. The fraction is then the ratio
        of each phases's reflection over the sum of all phases. This
        makes the assumption that the phases behave similarly and that
        the structure factor of the diagnostic reflections are also
        similar. By default this method does not remove the
        background.
        
        Parameters
        ----------
        scattering_lengths : np.ndarray
          Dependent variable for the diffractogram.
        intensities : np.ndarray
          Independent variable for the diffractogram. Must be the same
          shape as ``scattering_lengths``.
        
        Returns
        -------
        phase_fractions : np.ndarray
          The relative weight of each phase as determined by the
          diffraction pattern.
        
        """
        # Calculate the diagnistic peak area for each phase
        areas = []
        for p in self.phases:
            reflection = p.diagnostic_reflection
            area = peak_area(scattering_lengths, intensities, reflection.qrange)
            areas.append(area)
        # Divide by the total area to get relative phase fractions
        areas = np.array(areas)
        total_area = np.sum(areas)
        phase_fractions = areas / total_area
        return phase_fractions
    
    def refine_scale_factor(self, scattering_lengths, intensities):
        """Calculate the absolute strengths of each phase in a diffractogram.
        
        The simplest approach is to calculate the peak area for each
        phases's diagnostic reflection. The factor is then the sum of
        each phases's reflection over all phases. This makes the
        assumption that the phases behave similarly and that the
        structure factor of the diagnostic reflections are also
        similar. By default this method does not remove the
        background.
        
        Parameters
        ----------
        scattering_lengths : np.ndarray
          Dependent variable for the diffractogram.
        intensities : np.ndarray
          Independent variable for the diffractogram. Must be the same
          shape as ``scattering_lengths``.

        Returns
        -------
        phase_fractions : np.ndarray
          The relative weight of each phase as determined by the
          diffraction pattern.

        """
        # Calculate the diagnistic peak area for each phase
        areas = []
        for p in self.phases:
            reflection = p.diagnostic_reflection
            area = peak_area(scattering_lengths, intensities, reflection.qrange)
            areas.append(area)
        # Divide by the total area to get relative phase fractions
        areas = np.array(areas)
        total_area = np.sum(areas)
        return total_area
    
    def refine_background(self, scattering_lengths, intensities, s=None, k=4):
        
        """Fit a univariate spline to the background data.
        
        Arguments
        ---------
        - scattering_lengths : Array of scattering vector lengths, q.
        
        - intensities : Array of intensity values at each q position
        
        - s : Smoothing factor passed to the spline. Default is
            the variance of the background.
        
        - k : Degree of the spline (default quartic spline).
        """
        # Remove pre-indexed peaks for background fitting
        phase_list = self.phases + self.background_phases
        q, I = scattering_lengths, intensities
        for phase in phase_list:
            for reflection in phase.reflection_list:
                q, I = remove_peak_from_df(x=q, y=I, xrange=reflection.qrange)
        # Get an estimate for s from the non-peak data
        if s is None:
            s = np.std(I)
        # Smoothing for background fitting
        smoothI = savgol_filter(I, window_length=15, polyorder=5)
        # Determine a background line from the noise without peaks
        # self.spline = UnivariateSpline(
        #     x=q,
        #     y=I,
        #     s=s / 25,
        #     k=k,
        # )
        self.spline = Chebyshev(coef=np.ones((20,)))
        self.spline = self.spline.fit(q, smoothI, 10)
        # background = self.spline.cast(scattering_lengths)
        # self.spline = splrep(q, I)
        # Extrapolate the background for the whole pattern
        # background = self.spline(scattering_lengths)
        background = self.spline(scattering_lengths)
        return background
    
    def fwhm(self, phase_idx=0):
        """Use the previously fitted peaks to describe full-width and
        half-maximum."""
        raise NotImplementedError()
        phase = self.scan.phases[phase_idx]
        peak = self.peak(phase.diagnostic_reflection, phase_idx=phase_idx)
        # print('TODO: Get diagnostic peak instead of', peak)
        return peak.fwhm()
    
    @property
    def has_background(self):
        """Returns true if the background has been fit and subtracted.
        """
        return self.spline is not None
    
    def background(self, x):
        if self.spline is None:
            raise exceptions.RefinementError("Please run `refine_background()` first")
        return self.spline(x)
    
    def refine_displacement(self):
        """Not implemented yet."""
        pass
    
    def details(self):
        return "Native refinement"
    
    def peak_list_by_phase(self):
        """List of fitted peaks organized by phase."""
        peak_list = getattr(self, '_peak_list', None)
        if peak_list is None:
            msg = "Peak's not fit, please run {}.fit_peaks() first.".format(self)
            raise exceptions.RefinementError(msg)
        return peak_list
    
    @property
    def peak_list(self):
        """List of fitted peaks across all phases"""
        peak_list = self.peak_list_by_phase()
        # Flatten the list of phases/peaks
        full_list = []
        for phase in peak_list:
            full_list += phase
        return full_list
    
    def peak(self, reflection, phase_idx=0):
        """Returns a specific fitted peak."""
        peak_list = self.peak_list_by_phase()[phase_idx]
        peaks = [peak for peak in peak_list if peak.reflection == reflection]
        # Check for sanity
        if len(peaks) < 1:
            raise ValueError(
                'Peak for reflection {} was not found'.format(reflection)
            )
        elif len(peaks) > 1:
            raise IndexError('Mutliple peaks found for {}'.format(reflection))
        # Sanity checks passed so return to only value
        return peaks[0]
    
    def fit_peaks(self, two_theta, intensities):
        """
        Use least squares refinement to fit gaussian/Cauchy/etc functions
        to the predicted reflections.
        """
        self._peak_list = []
        # fitMethods = ['pseudo-voigt', 'gaussian', 'cauchy', 'estimated']
        fitMethods = ['gaussian', 'cauchy']
        reflection_list = []
        # Check if there are phases present
        if len(self.phases) == 0:
            msg = '{this} has no phases. Nothing to fit'.format(this=self)
            warnings.warn(msg, RuntimeWarning)
        # Step through each reflection in the relevant phases and find the peak
        for phase in self.phases:
            reflection_list += phase.reflection_list
            phase_peak_list = []
            for reflection in reflection_list:
                if contains_peak(scattering_lengths, reflection.qrange):
                    left = reflection.qrange[0]
                    right = reflection.qrange[1]
                    idx = np.where(np.logical_and(left < scattering_lengths,
                                                  scattering_lengths < right))
                    xs = scattering_lengths[idx]
                    ys = intensities[idx]
                    # Try each fit method until one works
                    for method in fitMethods:
                        newPeak = XRDPeak(reflection=reflection, method=method)
                        try:
                            newPeak.fit(x=xs, y=ys)
                        except exceptions.PeakFitError:
                            # Try next fit
                            continue
                        else:
                            phase_peak_list.append(newPeak)
                            break
                    else:
                        # No sucessful fit could be found.
                        msg = "RefinementWarning: peak could not be fit for {}."
                        msg = msg.format(reflection)
                        warnings.warn(msg, RuntimeWarning)
            self._peak_list.append(phase_peak_list)
        return self._peak_list
    
    def predict(self, scattering_lengths):
        """Predict intensity values from the given scattering lengths, q.
        
        Arguments
        ---------
        - scattering_lengths : Iterable with scattering_lengths
          (x-values) that will be used to predict the diffraction
          intensities.
        """
        q = scattering_lengths
        predicted = np.zeros_like(q)
        # Calculate background
        if self.spline is not None:
            predicted += self.spline(q)
        # Calculate peak fittings
        for peak in self.peak_list:
            predicted += peak.predict(q)
        return predicted
    
    # def plot(self, two_theta, intensities, ax=None):
        
    #     # Create new axes if necessary
    #     if ax is None:
    #         ax = plots.new_axes()
    #     # Check if background has been fit
    #     if self.spline is not None:
    #         # Plot background
    #         q = x
    #         background = self.spline(q)
    #         ax.plot(q, background)
    #     # Highlight peaks
    #     self.highlight_peaks(ax=ax)
    #     # Plot peak fittings
    #     peaks = []
    #     for peak in self.peak_list:
    #         # dataframes.append(peak.dataframe(background=spline))
    #         peaks.append(peak.predict())
    #         # peak.plot_overall_fit(ax=ax, background=spline)
    #     if peaks:
    #         predicted = pandas.concat(peaks)
    #         if self.spline:
    #             predicted = predicted + self.spline(predicted.index)
    #         predicted.plot(ax=ax)
    #     return ax
    
    def highlight_peaks(self, ax):
        color_list = [
            'green',
            'blue',
            'red',
            'orange'
        ]
        
        def draw_peaks(ax, phase, color):
            """Highlight the expected peak corresponding to this phase."""
            alpha = 0.15
            # Highlight each peak in this phase
            for reflection in phase.reflection_list:
                two_theta = reflection.qrange
                ax.axvspan(two_theta[0], two_theta[1],
                           color=color, alpha=alpha)
        # Highlight phases
        for idx, phase in enumerate(self.phases):
            draw_peaks(ax=ax, phase=phase, color=color_list[idx])
        # Highlight background
        for phase in self.background_phases:
            draw_peaks(ax=ax, phase=phase, color='grey')
        return ax
    
    def confidence(self):
        return 1