def solve_perturbation(kg, fg, tol=1.0e-8, maxit=100): ''' Solves system of equation using the perturbation method proposed by prof. Ivo Babuska. See (Strouboulis, Babuska and Copps, 2000). ''' T = diag(1.0 / npsqrt(diag(kg))) A = dot(T, dot(kg, T)) A_ = A + eye(len(kg)) * tol b = dot(T, fg) sol_ = solve(A_, b) error = 1.0 / tol it = 0 while error > tol: res_ = b - dot(A, sol_) dsol = solve(A_, res_) sol_ += dsol sol = dot(T, sol_) res = fg - dot(kg, sol) error = npsqrt(dot(res, res)) print ' -> Iteration %d: error norm =%.8e' % (it, error) it += 1 if it > maxit: print 'Perturbation method did not converged in %d iterations!' % maxit exit() print 'Perturbation method converged' return sol
def energy2time(e,r=0,d1=3.75,d2=5,d3=35): #distances are in centimiters and energies are in eV and times are in ns C_cmPns = c*100.*1e-9 t = 1.e3 + zeros(e.shape,dtype=float); if r==0: return nparray([ (d1+d2+d3)/C_cmPns * npsqrt(e_mc2/(2.*en)) for en in e if en > 0]) return nparray([d1/C_cmPns * npsqrt(e_mc2/(2.*en)) + d3/C_cmPns * npsqrt(e_mc2/(2.*(en-r))) + d2/C_cmPns * npsqrt(2)*(e_mc2/r)*(npsqrt(en/e_mc2) - npsqrt((en-r)/e_mc2)) for en in e if en>r])
def sqrt(a): if hasattr(a, 'shape'): if (prod(a.shape) == 1) or (len(a.shape) == 0): return npsqrt(ctf.to_nparray(a)) else: return ctf.from_nparray(npsqrt(ctf.to_nparray(a))) else: return npsqrt(a)
def stetson_kindex(fmags, ferrs): ''' This calculates the Stetson K index (robust measure of the kurtosis). Requires finite mags and errs. ''' # use a fill in value for the errors if they're none if ferrs is None: ferrs = npfull_like(mags, 0.005) ndet = len(fmags) if ndet > 9: # get the median and ndet medmag = npmedian(fmags) # get the stetson index elements delta_prefactor = (ndet / (ndet - 1)) sigma_i = delta_prefactor * (fmags - medmag) / ferrs stetsonk = (npsum(npabs(sigma_i)) / (npsqrt(npsum(sigma_i * sigma_i))) * (ndet**(-0.5))) return stetsonk else: LOGERROR('not enough detections in this magseries ' 'to calculate stetson K index') return npnan
def __init__(self, id, neighs, data, variance="false"): """ @type id: integer @param id: Id of the polygon/area @type neighs: list @param neighs: Neighborhood ids @type data: list. @param data: Data releated to the area. @type variance: boolean @keyword variance: Boolean indicating if the data have variance matrix """ self.id = id self.neighs = neighs if variance == "false": self.data = data else: n = (npsqrt(9 + 8 * (len(data) - 1)) - 3) / 2 self.var = npmatrix(npidentity(n)) index = n + 1 for i in range(int(n)): for j in range(i + 1): self.var[i, j] = data[int(index)] self.var[j, i] = data[int(index)] index += 1 self.data = data[0: int(n + 1)]
def isPrime(n): isTrue = True m = 2 while m <= npsqrt(n): if n % m == 0: isTrue = False break m += 1 return isTrue
def stetson_jindex(ftimes, fmags, ferrs, weightbytimediff=False): '''This calculates the Stetson index for the magseries, based on consecutive pairs of observations. Based on Nicole Loncke's work for her Planets and Life certificate at Princeton. This requires finite times, mags, and errs. If weightbytimediff is True, the Stetson index for any pair of mags will be reweighted by the difference in times between them using the scheme in Fruth+ 2012 and Zhange+ 2003 (as seen in Sokolovsky+ 2017). w_i = exp(- (t_i+1 - t_i)/ delta_t ) ''' ndet = len(fmags) if ndet > 9: # get the median and ndet medmag = npmedian(fmags) # get the stetson index elements delta_prefactor = (ndet / (ndet - 1)) sigma_i = delta_prefactor * (fmags - medmag) / ferrs sigma_j = nproll(sigma_i, 1) # Nicole's clever trick to advance indices # by 1 and do x_i*x_(i+1) if weightbytimediff: time_i = ftimes time_j = nproll(ftimes, 1) difft = npdiff(ftimes) deltat = npmedian(difft) weights_i = npexp(-difft / deltat) products = (weights_i * sigma_i[1:] * sigma_j[1:]) else: # ignore first elem since it's actually x_0*x_n products = (sigma_i * sigma_j)[1:] stetsonj = (npsum(npsign(products) * npsqrt(npabs(products)))) / ndet return stetsonj else: LOGERROR('not enough detections in this magseries ' 'to calculate stetson J index') return npnan
def stetson_kindex(fmags, ferrs): '''This calculates the Stetson K index (a robust measure of the kurtosis). Parameters ---------- fmags,ferrs : np.array The input mag/flux time-series to process. Must have no non-finite elems. Returns ------- float The Stetson K variability index. ''' # use a fill in value for the errors if they're none if ferrs is None: ferrs = npfull_like(fmags, 0.005) ndet = len(fmags) if ndet > 9: # get the median and ndet medmag = npmedian(fmags) # get the stetson index elements delta_prefactor = (ndet/(ndet - 1)) sigma_i = delta_prefactor*(fmags - medmag)/ferrs stetsonk = ( npsum(npabs(sigma_i))/(npsqrt(npsum(sigma_i*sigma_i))) * (ndet**(-0.5)) ) return stetsonk else: LOGERROR('not enough detections in this magseries ' 'to calculate stetson K index') return npnan
def EuclideanDistance(j, ImageColumn, ImageIn, ImageRow, InitialCluster, NumberOfBands, NumberOfClusters): Cluster = zeros((1, ImageColumn, NumberOfClusters)) CountClusterPixels = zeros((NumberOfClusters, 1)) MeanCluster = zeros((NumberOfClusters, NumberOfBands)) EuclideanDistanceResultant = zeros((1, ImageColumn, NumberOfClusters)) for k in range(0, ImageColumn): temp = ImageIn[k, 0:NumberOfBands] EuclideanDistanceResultant[0, k, :] = npsqrt(npsum(nppower((matlib.repmat(temp, NumberOfClusters, 1)) - InitialCluster, 2), axis=1)) DistanceNearestCluster = min(EuclideanDistanceResultant[0, k, :]) for l in range(0, NumberOfClusters): if DistanceNearestCluster != 0: if DistanceNearestCluster == EuclideanDistanceResultant[0, k, l]: CountClusterPixels[l] = CountClusterPixels[l] + 1 for m in range(0, NumberOfBands): MeanCluster[l, m] = MeanCluster[l, m] + ImageIn[k, m] Cluster[0, k, l] = l return([Cluster,CountClusterPixels,EuclideanDistanceResultant,MeanCluster,j])
def CohenEffectSize(group1=group1, group2=group2): """ Calculate Cohen' d Parameters: ----------- group1: NumPy array dimension is n_samples * n_features group2: NumPy array dimension is n_samples * n_features Return: float Cohen' d """ diff = group1.mean(axis=0) - group2.mean(axis=0) n1, n2 = len(group1), len(group2) var1 = group1.var(axis=0) var2 = group2.var(axis=0) pooled_var = ((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2) d = diff / npsqrt(pooled_var) return d
def traptransit_fit_magseries(times, mags, errs, transitparams, sigclip=10.0, plotfit=False, magsarefluxes=False, verbose=True): '''This fits a trapezoid transit model to a magnitude time series. transitparams = [transitperiod (time), transitepoch (time), transitdepth (flux or mags), transitduration (phase), ingressduration (phase)] for magnitudes -> transitdepth should be < 0 for fluxes -> transitdepth should be > 0 if transitepoch is None, this function will do an initial spline fit to find an approximate minimum of the phased light curve using the given period. the transitdepth provided is checked against the value of magsarefluxes. if magsarefluxes = True, the transitdepth is forced to be > 0; if magsarefluxes = False, the transitdepth is forced to be < 0. ''' stimes, smags, serrs = sigclip_magseries(times, mags, errs, sigclip=sigclip, magsarefluxes=magsarefluxes) # get rid of zero errs nzind = npnonzero(serrs) stimes, smags, serrs = stimes[nzind], smags[nzind], serrs[nzind] # check the transitparams transitperiod, transitepoch, transitdepth = transitparams[0:3] # check if we have a transitepoch to use if transitepoch is None: if verbose: LOGWARNING('no transitepoch given in transitparams, ' 'trying to figure it out automatically...') # do a spline fit to figure out the approximate min of the LC try: spfit = spline_fit_magseries(times, mags, errs, transitperiod, sigclip=sigclip, magsarefluxes=magsarefluxes, verbose=verbose) transitepoch = spfit['fitinfo']['fitepoch'] # if the spline-fit fails, try a savgol fit instead except: sgfit = savgol_fit_magseries(times, mags, errs, transitperiod, sigclip=sigclip, magsarefluxes=magsarefluxes, verbose=verbose) transitepoch = sgfit['fitinfo']['fitepoch'] # if everything failed, then bail out and ask for the transitepoch finally: if transitepoch is None: LOGERROR( "couldn't automatically figure out the transit epoch, " "can't continue. please provide it in transitparams.") # assemble the returndict returndict = { 'fittype': 'traptransit', 'fitinfo': { 'initialparams': transitparams, 'finalparams': None, 'leastsqfit': None, 'fitmags': None, 'fitepoch': None, }, 'fitchisq': npnan, 'fitredchisq': npnan, 'fitplotfile': None, 'magseries': { 'phase': None, 'times': None, 'mags': None, 'errs': None, 'magsarefluxes': magsarefluxes, }, } return returndict else: # check the case when there are more than one transitepochs returned if transitepoch.size > 0: if verbose: LOGWARNING( "could not auto-find a single minimum in LC for " "transitepoch, using the first one returned") transitparams[1] = transitepoch[0] else: if verbose: LOGWARNING( 'using automatically determined transitepoch = %.5f' % transitepoch) transitparams[1] = transitepoch # next, check the transitdepth and fix it to the form required if magsarefluxes: if transitdepth < 0.0: transitparams[2] = -transitdepth[2] else: if transitdepth > 0.0: transitparams[2] = -transitdepth[2] # finally, do the fit try: leastsqfit = spleastsq(transits.trapezoid_transit_residual, transitparams, args=(stimes, smags, serrs), full_output=True) except Exception as e: leastsqfit = None # if the fit succeeded, then we can return the final parameters if leastsqfit and leastsqfit[-1] in (1, 2, 3, 4): finalparams = leastsqfit[0] covxmatrix = leastsqfit[1] # calculate the chisq and reduced chisq fitmags, phase, ptimes, pmags, perrs = transits.trapezoid_transit_func( finalparams, stimes, smags, serrs) fitchisq = npsum( ((fitmags - pmags) * (fitmags - pmags)) / (perrs * perrs)) fitredchisq = fitchisq / (len(pmags) - len(finalparams) - 1) # get the residual variance and calculate the formal 1-sigma errs on the # final parameters residuals = leastsqfit[2]['fvec'] residualvariance = (npsum(residuals * residuals) / (pmags.size - finalparams.size)) if covxmatrix is not None: covmatrix = residualvariance * covxmatrix stderrs = npsqrt(npdiag(covmatrix)) else: LOGERROR('covxmatrix not available, fit probably failed!') stderrs = None if verbose: LOGINFO('final fit done. chisq = %.5f, reduced chisq = %.5f' % (fitchisq, fitredchisq)) # get the fit epoch fperiod, fepoch = finalparams[:2] # assemble the returndict returndict = { 'fittype': 'traptransit', 'fitinfo': { 'initialparams': transitparams, 'finalparams': finalparams, 'finalparamerrs': stderrs, 'leastsqfit': leastsqfit, 'fitmags': fitmags, 'fitepoch': fepoch, }, 'fitchisq': fitchisq, 'fitredchisq': fitredchisq, 'fitplotfile': None, 'magseries': { 'phase': phase, 'times': ptimes, 'mags': pmags, 'errs': perrs, 'magsarefluxes': magsarefluxes, }, } # make the fit plot if required if plotfit and isinstance(plotfit, str): _make_fit_plot(phase, pmags, perrs, fitmags, fperiod, ptimes.min(), fepoch, plotfit, magsarefluxes=magsarefluxes) returndict['fitplotfile'] = plotfit return returndict # if the leastsq fit failed, return nothing else: LOGERROR('trapezoid-fit: least-squared fit to the light curve failed!') # assemble the returndict returndict = { 'fittype': 'traptransit', 'fitinfo': { 'initialparams': transitparams, 'finalparams': None, 'finalparamerrs': None, 'leastsqfit': leastsqfit, 'fitmags': None, 'fitepoch': None, }, 'fitchisq': npnan, 'fitredchisq': npnan, 'fitplotfile': None, 'magseries': { 'phase': None, 'times': None, 'mags': None, 'errs': None, 'magsarefluxes': magsarefluxes, }, } return returndict
def gaussianeb_fit_magseries(times, mags, errs, ebparams, sigclip=10.0, plotfit=False, magsarefluxes=False, verbose=True): '''This fits a double inverted gaussian EB model to a magnitude time series. ebparams = [period (time), epoch (time), pdepth (mags), pduration (phase), psdepthratio, secondaryphase] period is the period in days epoch is the time of minimum in JD pdepth is the depth of the primary eclipse - for magnitudes -> ebdepth should be < 0 - for fluxes -> ebdepth should be > 0 pduration is the length of the primary eclipse in phase psdepthratio is the ratio of the secondary eclipse depth to that of the primary eclipse. secondaryphase is the phase at which the minimum of the secondary eclipse is located. This effectively parameterizes eccentricity. if epoch is None, this function will do an initial spline fit to find an approximate minimum of the phased light curve using the given period. the pdepth provided is checked against the value of magsarefluxes. if magsarefluxes = True, the ebdepth is forced to be > 0; if magsarefluxes = False, the ebdepth is forced to be < 0. ''' stimes, smags, serrs = sigclip_magseries(times, mags, errs, sigclip=sigclip, magsarefluxes=magsarefluxes) # get rid of zero errs nzind = npnonzero(serrs) stimes, smags, serrs = stimes[nzind], smags[nzind], serrs[nzind] # check the ebparams ebperiod, ebepoch, ebdepth = ebparams[0:3] # check if we have a ebepoch to use if ebepoch is None: if verbose: LOGWARNING('no ebepoch given in ebparams, ' 'trying to figure it out automatically...') # do a spline fit to figure out the approximate min of the LC try: spfit = spline_fit_magseries(times, mags, errs, ebperiod, sigclip=sigclip, magsarefluxes=magsarefluxes, verbose=verbose) ebepoch = spfit['fitinfo']['fitepoch'] # if the spline-fit fails, try a savgol fit instead except: sgfit = savgol_fit_magseries(times, mags, errs, ebperiod, sigclip=sigclip, magsarefluxes=magsarefluxes, verbose=verbose) ebepoch = sgfit['fitinfo']['fitepoch'] # if everything failed, then bail out and ask for the ebepoch finally: if ebepoch is None: LOGERROR("couldn't automatically figure out the eb epoch, " "can't continue. please provide it in ebparams.") # assemble the returndict returndict = { 'fittype': 'gaussianeb', 'fitinfo': { 'initialparams': ebparams, 'finalparams': None, 'leastsqfit': None, 'fitmags': None, 'fitepoch': None, }, 'fitchisq': npnan, 'fitredchisq': npnan, 'fitplotfile': None, 'magseries': { 'phase': None, 'times': None, 'mags': None, 'errs': None, 'magsarefluxes': magsarefluxes, }, } return returndict else: if ebepoch.size > 1: if verbose: LOGWARNING('could not auto-find a single minimum ' 'for ebepoch, using the first one returned') ebparams[1] = ebepoch[0] else: if verbose: LOGWARNING( 'using automatically determined ebepoch = %.5f' % ebepoch) ebparams[1] = ebepoch # next, check the ebdepth and fix it to the form required if magsarefluxes: if ebdepth < 0.0: ebparams[2] = -ebdepth[2] else: if ebdepth > 0.0: ebparams[2] = -ebdepth[2] # finally, do the fit try: leastsqfit = spleastsq(eclipses.invgauss_eclipses_residual, ebparams, args=(stimes, smags, serrs), full_output=True) except Exception as e: leastsqfit = None # if the fit succeeded, then we can return the final parameters if leastsqfit and leastsqfit[-1] in (1, 2, 3, 4): finalparams = leastsqfit[0] covxmatrix = leastsqfit[1] # calculate the chisq and reduced chisq fitmags, phase, ptimes, pmags, perrs = eclipses.invgauss_eclipses_func( finalparams, stimes, smags, serrs) fitchisq = npsum( ((fitmags - pmags) * (fitmags - pmags)) / (perrs * perrs)) fitredchisq = fitchisq / (len(pmags) - len(finalparams) - 1) # get the residual variance and calculate the formal 1-sigma errs on the # final parameters residuals = leastsqfit[2]['fvec'] residualvariance = (npsum(residuals * residuals) / (pmags.size - finalparams.size)) if covxmatrix is not None: covmatrix = residualvariance * covxmatrix stderrs = npsqrt(npdiag(covmatrix)) else: LOGERROR('covxmatrix not available, fit probably failed!') stderrs = None if verbose: LOGINFO('final fit done. chisq = %.5f, reduced chisq = %.5f' % (fitchisq, fitredchisq)) # get the fit epoch fperiod, fepoch = finalparams[:2] # assemble the returndict returndict = { 'fittype': 'gaussianeb', 'fitinfo': { 'initialparams': ebparams, 'finalparams': finalparams, 'finalparamerrs': stderrs, 'leastsqfit': leastsqfit, 'fitmags': fitmags, 'fitepoch': fepoch, }, 'fitchisq': fitchisq, 'fitredchisq': fitredchisq, 'fitplotfile': None, 'magseries': { 'phase': phase, 'times': ptimes, 'mags': pmags, 'errs': perrs, 'magsarefluxes': magsarefluxes, }, } # make the fit plot if required if plotfit and isinstance(plotfit, str): _make_fit_plot(phase, pmags, perrs, fitmags, fperiod, ptimes.min(), fepoch, plotfit, magsarefluxes=magsarefluxes) returndict['fitplotfile'] = plotfit return returndict # if the leastsq fit failed, return nothing else: LOGERROR('eb-fit: least-squared fit to the light curve failed!') # assemble the returndict returndict = { 'fittype': 'gaussianeb', 'fitinfo': { 'initialparams': ebparams, 'finalparams': None, 'finalparamerrs': None, 'leastsqfit': leastsqfit, 'fitmags': None, 'fitepoch': None, }, 'fitchisq': npnan, 'fitredchisq': npnan, 'fitplotfile': None, 'magseries': { 'phase': None, 'times': None, 'mags': None, 'errs': None, 'magsarefluxes': magsarefluxes, }, } return returndict
def fit_nls( df_data, md=None, out=None, var_fix=None, df_init=None, verbose=True, uq_method=None, **kwargs, ): r"""Fit a model with Nonlinear Least Squares (NLS) Estimate best-fit variable levels with nonlinear least squares (NLS), and return an executable model with those frozen best-fit levels. Optionally, fit a distribution on the parameters to quantify parametric uncertainty. Note: This is a *synonym* for eval_nls(); see the documentation for eval_nls() for keyword argument options available beyond those listed here. Args: df_data (DataFrame): Data for estimating best-fit variable levels. Variables not found in df_data optimized for fitting. md (gr.Model): Model to analyze. All model variables selected for fitting must be bounded or random. Deterministic variables may have semi-infinite bounds. var_fix (list or None): Variables to fix to nominal levels. Note that variables with domain width zero will automatically be fixed. df_init (DataFrame): Initial guesses for parameters; overrides n_restart n_restart (int): Number of restarts to try; the first try is at the nominal conditions of the model. Returned model will use the least-error parameter set among restarts tested. n_maxiter (int): Optimizer maximum iterations verbose (bool): Print best-fit parameters to console? uq_method (str OR None): If string, select method to quantify parameter uncertainties. If None, provide best-fit values only. Methods: uq_method = "linpool": assume normal errors; linearly approximate parameter effects; equally pool variance matrices for each output Returns: gr.Model: Model for evaluation with best-fit variables frozen to optimized levels. Examples: >>> import grama as gr >>> from grama.data import df_trajectory_windowed >>> from grama.models import make_trajectory_linear >>> X = gr.Intention() >>> >>> md_trajectory = make_trajectory_linear() >>> md_fitted = ( >>> df_trajectory_windowed >>> >> gr.ft_nls( >>> md=md_trajectory, >>> uq_method="linpool", >>> ) >>> ) """ ## Check `out` invariants if out is None: out = md.out print("... fit_nls setting out = {}".format(out)) ## Check invariants if md is None: raise ValueError("Must provide model md") ## Determine variables to be fixed if var_fix is None: var_fix = set() else: var_fix = set(var_fix) for var in md.var_det: wid = md.domain.get_width(var) if wid == 0: var_fix.add(var) ## Run eval_nls to fit model parameter values df_fit = eval_nls( md, df_data=df_data, var_fix=var_fix, df_init=df_init, append=True, verbose=verbose, **kwargs, ) ## Select best-fit values df_best = df_fit.sort_values(by="mse", axis=0).iloc[[0]].reset_index(drop=True) if verbose: print(df_fit.sort_values(by="mse", axis=0)) ## Determine variables that were fitted var_fitted = list(set(md.var).intersection(set(df_best.columns))) var_remain = list(set(md.var).difference(set(var_fitted))) if len(var_remain) == 0: raise ValueError("Resulting model is constant!") ## Assemble and return fitted model if md.name is None: name = "(Fitted Model)" else: name = md.name + " (Fitted)" ## Calibrate parametric uncertainty, if requested if uq_method == "linpool": ## Precompute data df_nom = eval_nominal(md, df_det="nom") df_base = tran_outer( df_data, concat((df_best[var_fitted], df_nom[var_fix]), axis=1)) df_pred = eval_df(md, df=df_base) df_grad = eval_grad_fd(md, df_base=df_base, var=var_fitted) ## Pool variance matrices n_obs = df_data.shape[0] n_fitted = len(var_fitted) Sigma_pooled = zeros((n_fitted, n_fitted)) for output in out: ## Approximate sigma_sq sigma_sq = npsum( nppow(df_data[output].values - df_pred[output].values, 2)) / (n_obs - n_fitted) ## Approximate (pseudo)-inverse hessian var_grad = list(map(lambda v: "D" + output + "_D" + v, var_fitted)) Z = df_grad[var_grad].values Hinv = pinv(Z.T.dot(Z), hermitian=True) ## Add variance matrix to pooled Sigma Sigma_pooled = Sigma_pooled + sigma_sq * Hinv / n_fitted ## Check model for identifiability kappa_out = cond(Sigma_pooled) if kappa_out > 1e10: warn( "Model is locally unidentifiable as measured by the " + "condition number of the pooled covariance matrix; " + "kappa = {}".format(kappa_out), RuntimeWarning, ) ## Convert to std deviations and correlation sigma_comp = npsqrt(diag(Sigma_pooled)) corr_mat = Sigma_pooled / (atleast_2d(sigma_comp).T.dot( atleast_2d(sigma_comp))) corr_data = [] I, J = triu_indices(n_fitted, k=1) for ind in range(len(I)): i = I[ind] j = J[ind] corr_data.append([var_fitted[i], var_fitted[j], corr_mat[i, j]]) df_corr = DataFrame(data=corr_data, columns=["var1", "var2", "corr"]) ## Assemble marginals marginals = {} for ind, var_ in enumerate(var_fitted): marginals[var_] = { "dist": "norm", "loc": df_best[var_].values[0], "scale": sigma_comp[ind], } ## Construct model with Gaussian copula if len(var_fix) > 0: md_res = (Model(name) >> cp_function( lambda x: df_nom[var_fix].values, var=set(var_remain).difference(var_fix), out=var_fix, name="Fix variable levels", ) >> cp_md_det(md=md) >> cp_marginals(**marginals) >> cp_copula_gaussian(df_corr=df_corr)) else: md_res = (Model(name) >> cp_md_det(md=md) >> cp_marginals( **marginals) >> cp_copula_gaussian(df_corr=df_corr)) ## Return deterministic model elif uq_method is None: md_res = (Model(name) >> cp_function( lambda x: df_best[var_fitted].values, var=var_remain, out=var_fitted, name="Fix variable levels", ) >> cp_md_det(md=md)) else: raise ValueError( "uq_method option {} not recognized".format(uq_method)) return md_res
def fourier_fit_magseries( times, mags, errs, period, fourierorder=None, fourierparams=None, fix_period=True, scale_errs_redchisq_unity=True, sigclip=3.0, magsarefluxes=False, plotfit=False, ignoreinitfail=True, verbose=True, curve_fit_kwargs=None, ): '''This fits a Fourier series to a mag/flux time series. Parameters ---------- times,mags,errs : np.array The input mag/flux time-series to fit a Fourier cosine series to. period : float The period to use for the Fourier fit. fourierorder : None or int If this is an int, will be interpreted as the Fourier order of the series to fit to the input mag/flux times-series. If this is None and `fourierparams` is specified, `fourierparams` will be used directly to generate the fit Fourier series. If `fourierparams` is also None, this function will try to fit a Fourier cosine series of order 3 to the mag/flux time-series. fourierparams : list of floats or None If this is specified as a list of floats, it must be of the form below:: [fourier_amp1, fourier_amp2, fourier_amp3,...,fourier_ampN, fourier_phase1, fourier_phase2, fourier_phase3,...,fourier_phaseN] to specify a Fourier cosine series of order N. If this is None and `fourierorder` is specified, the Fourier order specified there will be used to construct the Fourier cosine series used to fit the input mag/flux time-series. If both are None, this function will try to fit a Fourier cosine series of order 3 to the input mag/flux time-series. fix_period : bool If True, will fix the period with fitting the sinusoidal function to the phased light curve. scale_errs_redchisq_unity : bool If True, the standard errors on the fit parameters will be scaled to make the reduced chi-sq = 1.0. This sets the ``absolute_sigma`` kwarg for the ``scipy.optimize.curve_fit`` function to False. sigclip : float or int or sequence of two floats/ints or None If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. magsarefluxes : bool If True, will treat the input values of `mags` as fluxes for purposes of plotting the fit and sig-clipping. plotfit : str or False If this is a string, this function will make a plot for the fit to the mag/flux time-series and writes the plot to the path specified here. ignoreinitfail : bool If this is True, ignores the initial failure to find a set of optimized Fourier parameters using the global optimization function and proceeds to do a least-squares fit anyway. verbose : bool If True, will indicate progress and warn of any problems. curve_fit_kwargs : dict or None If not None, this should be a dict containing extra kwargs to pass to the scipy.optimize.curve_fit function. Returns ------- dict This function returns a dict containing the model fit parameters, the minimized chi-sq value and the reduced chi-sq value. The form of this dict is mostly standardized across all functions in this module:: { 'fittype':'fourier', 'fitinfo':{ 'finalparams': the list of final model fit params, 'finalparamerrs': list of errs for each model fit param, 'fitmags': the model fit mags, 'fitperiod': the fit period if this wasn't set to fixed, 'fitepoch': this is times.min() for this fit type, 'actual_fitepoch': time of minimum light from fit model ... other fit function specific keys ... }, 'fitchisq': the minimized value of the fit's chi-sq, 'fitredchisq':the reduced chi-sq value, 'fitplotfile': the output fit plot if fitplot is not None, 'magseries':{ 'times':input times in phase order of the model, 'phase':the phases of the model mags, 'mags':input mags/fluxes in the phase order of the model, 'errs':errs in the phase order of the model, 'magsarefluxes':input value of magsarefluxes kwarg } } NOTE: the returned value of 'fitepoch' in the 'fitinfo' dict returned by this function is the time value of the first observation since this is where the LC is folded for the fit procedure. To get the actual time of minimum epoch as calculated by a spline fit to the phased LC, use the key 'actual_fitepoch' in the 'fitinfo' dict. ''' stimes, smags, serrs = sigclip_magseries(times, mags, errs, sigclip=sigclip, magsarefluxes=magsarefluxes) # get rid of zero errs nzind = npnonzero(serrs) stimes, smags, serrs = stimes[nzind], smags[nzind], serrs[nzind] phase, pmags, perrs, ptimes, mintime = (get_phased_quantities( stimes, smags, serrs, period)) # get the fourier order either from the scalar order kwarg... if fourierorder and fourierorder > 0 and not fourierparams: fourieramps = [0.6] + [0.2] * (fourierorder - 1) fourierphas = [0.1] + [0.1] * (fourierorder - 1) fourierparams = fourieramps + fourierphas # or from the fully specified coeffs vector elif not fourierorder and fourierparams: fourierorder = int(len(fourierparams) / 2) else: LOGWARNING('specified both/neither Fourier order AND Fourier coeffs, ' 'using default Fourier order of 3') fourierorder = 3 fourieramps = [0.6] + [0.2] * (fourierorder - 1) fourierphas = [0.1] + [0.1] * (fourierorder - 1) fourierparams = fourieramps + fourierphas if verbose: LOGINFO('fitting Fourier series of order %s to ' 'mag series with %s observations, ' 'using period %.6f, folded at %.6f' % (fourierorder, len(phase), period, mintime)) # initial minimize call to find global minimum in chi-sq initialfit = spminimize(_fourier_chisq, fourierparams, args=(phase, pmags, perrs)) # make sure this initial fit succeeds before proceeding if initialfit.success or ignoreinitfail: if verbose: LOGINFO('initial fit done, refining...') leastsqparams = initialfit.x try: curvefit_params = npconcatenate((nparray([period]), leastsqparams)) # set up the bounds for the fit parameters if fix_period: curvefit_bounds = ([period - 1.0e-7] + [-npinf] * fourierorder + [-npinf] * fourierorder, [period + 1.0e-7] + [npinf] * fourierorder + [npinf] * fourierorder) else: curvefit_bounds = ([0.0] + [-npinf] * fourierorder + [-npinf] * fourierorder, [npinf] + [npinf] * fourierorder + [npinf] * fourierorder) curvefit_func = partial( sinusoidal.fourier_curvefit_func, zerolevel=npmedian(smags), epoch=mintime, fixed_period=period if fix_period else None, ) if curve_fit_kwargs is not None: finalparams, covmatrix = curve_fit( curvefit_func, stimes, smags, p0=curvefit_params, sigma=serrs, bounds=curvefit_bounds, absolute_sigma=(not scale_errs_redchisq_unity), **curve_fit_kwargs) else: finalparams, covmatrix = curve_fit( curvefit_func, stimes, smags, p0=curvefit_params, sigma=serrs, bounds=curvefit_bounds, absolute_sigma=(not scale_errs_redchisq_unity), ) except Exception: LOGEXCEPTION("curve_fit returned an exception") finalparams, covmatrix = None, None # if the fit succeeded, then we can return the final parameters if finalparams is not None and covmatrix is not None: # this is the fit period fperiod = finalparams[0] phase, pmags, perrs, ptimes, mintime = (get_phased_quantities( stimes, smags, serrs, fperiod)) # calculate the chisq and reduced chisq fitmags = _fourier_func(finalparams[1:], phase, pmags) fitchisq = npsum( ((fitmags - pmags) * (fitmags - pmags)) / (perrs * perrs)) n_free_params = len(pmags) - len(finalparams) if fix_period: n_free_params -= 1 fitredchisq = fitchisq / n_free_params stderrs = npsqrt(npdiag(covmatrix)) if verbose: LOGINFO('final fit done. chisq = %.5f, reduced chisq = %.5f' % (fitchisq, fitredchisq)) # figure out the time of light curve minimum (i.e. the fit epoch) # this is when the fit mag is maximum (i.e. the faintest) # or if magsarefluxes = True, then this is when fit flux is minimum if not magsarefluxes: fitmagminind = npwhere(fitmags == npmax(fitmags)) else: fitmagminind = npwhere(fitmags == npmin(fitmags)) if len(fitmagminind[0]) > 1: fitmagminind = (fitmagminind[0][0], ) # assemble the returndict returndict = { 'fittype': 'fourier', 'fitinfo': { 'fourierorder': fourierorder, # return coeffs only for backwards compatibility with # existing functions that use the returned value of # fourier_fit_magseries 'finalparams': finalparams[1:], 'finalparamerrs': stderrs, 'initialfit': initialfit, 'fitmags': fitmags, 'fitperiod': finalparams[0], # the 'fitepoch' is just the minimum time here 'fitepoch': mintime, # the actual fit epoch is calculated as the time of minimum # light OF the fit model light curve 'actual_fitepoch': ptimes[fitmagminind] }, 'fitchisq': fitchisq, 'fitredchisq': fitredchisq, 'fitplotfile': None, 'magseries': { 'times': ptimes, 'phase': phase, 'mags': pmags, 'errs': perrs, 'magsarefluxes': magsarefluxes }, } # make the fit plot if required if plotfit and isinstance(plotfit, str): make_fit_plot(phase, pmags, perrs, fitmags, fperiod, mintime, mintime, plotfit, magsarefluxes=magsarefluxes) returndict['fitplotfile'] = plotfit return returndict # if the leastsq fit did not succeed, return Nothing else: LOGERROR( 'fourier-fit: least-squared fit to the light curve failed') return { 'fittype': 'fourier', 'fitinfo': { 'fourierorder': fourierorder, 'finalparams': None, 'finalparamerrs': None, 'initialfit': initialfit, 'fitmags': None, 'fitperiod': None, 'fitepoch': None, 'actual_fitepoch': None, }, 'fitchisq': npnan, 'fitredchisq': npnan, 'fitplotfile': None, 'magseries': { 'times': ptimes, 'phase': phase, 'mags': pmags, 'errs': perrs, 'magsarefluxes': magsarefluxes } } # if the fit didn't succeed, we can't proceed else: LOGERROR('initial Fourier fit did not succeed, ' 'reason: %s, returning scipy OptimizeResult' % initialfit.message) return { 'fittype': 'fourier', 'fitinfo': { 'fourierorder': fourierorder, 'finalparams': None, 'finalparamerrs': None, 'initialfit': initialfit, 'fitmags': None, 'fitperiod': None, 'fitepoch': None, 'actual_fitepoch': None, }, 'fitchisq': npnan, 'fitredchisq': npnan, 'fitplotfile': None, 'magseries': { 'times': ptimes, 'phase': phase, 'mags': pmags, 'errs': perrs, 'magsarefluxes': magsarefluxes } }
def normalize_v3(arr): """ Normalize a numpy array of 3 component vectors shape=(n,3) """ lens = npsqrt(arr[:, 0] ** 2 + arr[:, 1] ** 2 + arr[:, 2] ** 2) + 0.000001 return divide(arr.T, lens).T
def pdw_worker(task): ''' This is the parallel worker for the function below. task[0] = frequency for this worker task[1] = times array task[2] = mags array task[3] = fold_time task[4] = j_range task[5] = keep_threshold_1 task[6] = keep_threshold_2 task[7] = phasebinsize we don't need errs for the worker. ''' frequency = task[0] times, modmags = task[1], task[2] fold_time = task[3] j_range = range(task[4]) keep_threshold_1 = task[5] keep_threshold_2 = task[6] phasebinsize = task[7] try: period = 1.0 / frequency # use the common phaser to phase and sort the mag phased = phase_magseries(times, modmags, period, fold_time, wrap=False, sort=True) # bin in phase if requested, this turns this into a sort of PDM method if phasebinsize is not None and phasebinsize > 0: bphased = pwd_phasebin(phased['phase'], phased['mags'], binsize=phasebinsize) phase_sorted = bphased[0] mod_mag_sorted = bphased[1] j_range = range(len(mod_mag_sorted) - 1) else: phase_sorted = phased['phase'] mod_mag_sorted = phased['mags'] # now calculate the string length rolledmags = nproll(mod_mag_sorted, 1) rolledphases = nproll(phase_sorted, 1) strings = ((rolledmags - mod_mag_sorted) * (rolledmags - mod_mag_sorted) + (rolledphases - phase_sorted) * (rolledphases - phase_sorted)) strings[0] = (((mod_mag_sorted[0] - mod_mag_sorted[-1]) * (mod_mag_sorted[0] - mod_mag_sorted[-1])) + ((phase_sorted[0] - phase_sorted[-1] + 1) * (phase_sorted[0] - phase_sorted[-1] + 1))) strlen = npsum(npsqrt(strings)) if (keep_threshold_1 < strlen < keep_threshold_2): p_goodflag = True else: p_goodflag = False return (period, strlen, p_goodflag) except Exception as e: LOGEXCEPTION('error in DWP') return (period, npnan, False)
def stetson_jindex(ftimes, fmags, ferrs, weightbytimediff=False): '''This calculates the Stetson index for the magseries, based on consecutive pairs of observations. Based on Nicole Loncke's work for her Planets and Life certificate at Princeton in 2014. Parameters ---------- ftimes,fmags,ferrs : np.array The input mag/flux time-series with all non-finite elements removed. weightbytimediff : bool If this is True, the Stetson index for any pair of mags will be reweighted by the difference in times between them using the scheme in Fruth+ 2012 and Zhange+ 2003 (as seen in Sokolovsky+ 2017):: w_i = exp(- (t_i+1 - t_i)/ delta_t ) Returns ------- float The calculated Stetson J variability index. ''' ndet = len(fmags) if ndet > 9: # get the median and ndet medmag = npmedian(fmags) # get the stetson index elements delta_prefactor = (ndet / (ndet - 1)) sigma_i = delta_prefactor * (fmags - medmag) / ferrs # Nicole's clever trick to advance indices by 1 and do x_i*x_(i+1) sigma_j = nproll(sigma_i, 1) if weightbytimediff: difft = npdiff(ftimes) deltat = npmedian(difft) weights_i = npexp(-difft / deltat) products = (weights_i * sigma_i[1:] * sigma_j[1:]) else: # ignore first elem since it's actually x_0*x_n products = (sigma_i * sigma_j)[1:] stetsonj = (npsum(npsign(products) * npsqrt(npabs(products)))) / ndet return stetsonj else: LOGERROR('not enough detections in this magseries ' 'to calculate stetson J index') return npnan
def normalize_v3(arr): ''' Normalize a numpy array of 3 component vectors shape=(n,3) ''' lens = npsqrt( arr[:,0]**2 + arr[:,1]**2 + arr[:,2]**2) + 0.000001 return divide(arr.T, lens).T
def macf_period_find( times, mags, errs, fillgaps=0.0, filterwindow=11, forcetimebin=None, maxlags=None, maxacfpeaks=10, smoothacf=21, # set for Kepler-type LCs, see details below smoothfunc=_smooth_acf_savgol, smoothfunckwargs=None, magsarefluxes=False, sigclip=3.0, verbose=True, periodepsilon=0.1, # doesn't do anything, for consistent external API nworkers=None, # doesn't do anything, for consistent external API startp=None, # doesn't do anything, for consistent external API endp=None, # doesn't do anything, for consistent external API autofreq=None, # doesn't do anything, for consistent external API stepsize=None, # doesn't do anything, for consistent external API ): '''This finds periods using the McQuillan+ (2013a, 2014) ACF method. The kwargs from `periodepsilon` to `stepsize` don't do anything but are used to present a consistent API for all periodbase period-finders to an outside driver (e.g. the one in the checkplotserver). Parameters ---------- times,mags,errs : np.array The input magnitude/flux time-series to run the period-finding for. fillgaps : 'noiselevel' or float This sets what to use to fill in gaps in the time series. If this is 'noiselevel', will smooth the light curve using a point window size of `filterwindow` (this should be an odd integer), subtract the smoothed LC from the actual LC and estimate the RMS. This RMS will be used to fill in the gaps. Other useful values here are 0.0, and npnan. filterwindow : int The light curve's smoothing filter window size to use if `fillgaps='noiselevel`'. forcetimebin : None or float This is used to force a particular cadence in the light curve other than the automatically determined cadence. This effectively rebins the light curve to this cadence. This should be in the same time units as `times`. maxlags : None or int This is the maximum number of lags to calculate. If None, will calculate all lags. maxacfpeaks : int This is the maximum number of ACF peaks to use when finding the highest peak and obtaining a fit period. smoothacf : int This is the number of points to use as the window size when smoothing the ACF with the `smoothfunc`. This should be an odd integer value. If this is None, will not smooth the ACF, but this will probably lead to finding spurious peaks in a generally noisy ACF. For Kepler, a value between 21 and 51 seems to work fine. For ground based data, much larger values may be necessary: between 1001 and 2001 seem to work best for the HAT surveys. This is dependent on cadence, RMS of the light curve, the periods of the objects you're looking for, and finally, any correlated noise in the light curve. Make a plot of the smoothed/unsmoothed ACF vs. lag using the result dict of this function and the `plot_acf_results` function above to see the identified ACF peaks and what kind of smoothing might be needed. The value of `smoothacf` will also be used to figure out the interval to use when searching for local peaks in the ACF: this interval is 1/2 of the `smoothacf` value. smoothfunc : Python function This is the function that will be used to smooth the ACF. This should take at least one kwarg: 'windowsize'. Other kwargs can be passed in using a dict provided in `smoothfunckwargs`. By default, this uses a Savitsky-Golay filter, a Gaussian filter is also provided but not used. Another good option would be an actual low-pass filter (generated using scipy.signal?) to remove all high frequency noise from the ACF. smoothfunckwargs : dict or None The dict of optional kwargs to pass in to the `smoothfunc`. magsarefluxes : bool If your input measurements in `mags` are actually fluxes instead of mags, set this is True. sigclip : float or int or sequence of two floats/ints or None If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. verbose : bool If True, will indicate progress and report errors. Returns ------- dict Returns a dict with results. dict['bestperiod'] is the estimated best period and dict['fitperiodrms'] is its estimated error. Other interesting things in the output include: - dict['acfresults']: all results from calculating the ACF. in particular, the unsmoothed ACF might be of interest: dict['acfresults']['acf'] and dict['acfresults']['lags']. - dict['lags'] and dict['acf'] contain the ACF after smoothing was applied. - dict['periods'] and dict['lspvals'] can be used to construct a pseudo-periodogram. - dict['naivebestperiod'] is obtained by multiplying the lag at the highest ACF peak with the cadence. This is usually close to the fit period (dict['fitbestperiod']), which is calculated by doing a fit to the lags vs. peak index relation as in McQuillan+ 2014. ''' # get the ACF acfres = autocorr_magseries(times, mags, errs, maxlags=maxlags, fillgaps=fillgaps, forcetimebin=forcetimebin, sigclip=sigclip, magsarefluxes=magsarefluxes, filterwindow=filterwindow, verbose=verbose) xlags = acfres['lags'] # smooth the ACF if requested if smoothacf and isinstance(smoothacf, int) and smoothacf > 0: if smoothfunckwargs is None: sfkwargs = {'windowsize': smoothacf} else: sfkwargs = smoothfunckwargs.copy() sfkwargs.update({'windowsize': smoothacf}) xacf = smoothfunc(acfres['acf'], **sfkwargs) else: xacf = acfres['acf'] # get the relative peak heights and fit best lag peakres = _get_acf_peakheights(xlags, xacf, npeaks=maxacfpeaks, searchinterval=int(smoothacf / 2)) # this is the best period's best ACF peak height bestlspval = peakres['bestpeakheight'] try: # get the fit best lag from a linear fit to the peak index vs time(peak # lag) function as in McQillian+ (2014) fity = npconcatenate(([ 0.0, peakres['bestlag'] ], peakres['relpeaklags'][peakres['relpeaklags'] > peakres['bestlag']] )) fity = fity * acfres['cadence'] fitx = nparange(fity.size) fitcoeffs, fitcovar = nppolyfit(fitx, fity, 1, cov=True) # fit best period is the gradient of fit fitbestperiod = fitcoeffs[0] bestperiodrms = npsqrt(fitcovar[0, 0]) # from the covariance matrix except Exception as e: LOGWARNING('linear fit to time at each peak lag ' 'value vs. peak number failed, ' 'naively calculated ACF period may not be accurate') fitcoeffs = nparray([npnan, npnan]) fitcovar = nparray([[npnan, npnan], [npnan, npnan]]) fitbestperiod = npnan bestperiodrms = npnan raise # calculate the naive best period using delta_tau = lag * cadence naivebestperiod = peakres['bestlag'] * acfres['cadence'] if fitbestperiod < naivebestperiod: LOGWARNING('fit bestperiod = %.5f may be an alias, ' 'naively calculated bestperiod is = %.5f' % (fitbestperiod, naivebestperiod)) if npisfinite(fitbestperiod): bestperiod = fitbestperiod else: bestperiod = naivebestperiod return { 'bestperiod': bestperiod, 'bestlspval': bestlspval, 'nbestpeaks': maxacfpeaks, # for compliance with the common pfmethod API 'nbestperiods': npconcatenate([[fitbestperiod], peakres['relpeaklags'][1:maxacfpeaks] * acfres['cadence']]), 'nbestlspvals': peakres['maxacfs'][:maxacfpeaks], 'lspvals': xacf, 'periods': xlags * acfres['cadence'], 'acf': xacf, 'lags': xlags, 'method': 'acf', 'naivebestperiod': naivebestperiod, 'fitbestperiod': fitbestperiod, 'fitperiodrms': bestperiodrms, 'periodfitcoeffs': fitcoeffs, 'periodfitcovar': fitcovar, 'kwargs': { 'maxlags': maxlags, 'maxacfpeaks': maxacfpeaks, 'fillgaps': fillgaps, 'filterwindow': filterwindow, 'smoothacf': smoothacf, 'smoothfunckwargs': sfkwargs, 'magsarefluxes': magsarefluxes, 'sigclip': sigclip }, 'acfresults': acfres, 'acfpeaks': peakres }
def normalize_v3(arr): ''' Normalize a numpy array of 3 component vectors shape=(n,3) ''' lens = npsqrt( arr[:,0]**2 + arr[:,1]**2 + arr[:,2]**2) return divide(arr.T, lens).T
def get_length(vector): """ Retrieve the length of the input vector. """ return npsqrt(npsum(vector * vector))
def sqrt(x): return npsqrt(x)
def normalize(vector): """ Normalize the input vector. """ norm = npsqrt(npsum(vector * vector)) return vector / norm
def gaussianeb_fit_magseries( times, mags, errs, ebparams, param_bounds=None, scale_errs_redchisq_unity=True, sigclip=10.0, plotfit=False, magsarefluxes=False, verbose=True, curve_fit_kwargs=None, ): '''This fits a double inverted gaussian EB model to a magnitude time series. Parameters ---------- times,mags,errs : np.array The input mag/flux time-series to fit the EB model to. period : float The period to use for EB fit. ebparams : list of float This is a list containing the eclipsing binary parameters:: ebparams = [period (time), epoch (time), pdepth (mags), pduration (phase), psdepthratio, secondaryphase] `period` is the period in days. `epoch` is the time of primary minimum in JD. `pdepth` is the depth of the primary eclipse: - for magnitudes -> `pdepth` should be < 0 - for fluxes -> `pdepth` should be > 0 `pduration` is the length of the primary eclipse in phase. `psdepthratio` is the ratio of the secondary eclipse depth to that of the primary eclipse. `secondaryphase` is the phase at which the minimum of the secondary eclipse is located. This effectively parameterizes eccentricity. If `epoch` is None, this function will do an initial spline fit to find an approximate minimum of the phased light curve using the given period. The `pdepth` provided is checked against the value of `magsarefluxes`. if `magsarefluxes = True`, the `ebdepth` is forced to be > 0; if `magsarefluxes = False`, the `ebdepth` is forced to be < 0. param_bounds : dict or None This is a dict of the upper and lower bounds on each fit parameter. Should be of the form:: {'period': (lower_bound_period, upper_bound_period), 'epoch': (lower_bound_epoch, upper_bound_epoch), 'pdepth': (lower_bound_pdepth, upper_bound_pdepth), 'pduration': (lower_bound_pduration, upper_bound_pduration), 'psdepthratio': (lower_bound_psdepthratio, upper_bound_psdepthratio), 'secondaryphase': (lower_bound_secondaryphase, upper_bound_secondaryphase)} - To indicate that a parameter is fixed, use 'fixed' instead of a tuple providing its lower and upper bounds as tuple. - To indicate that a parameter has no bounds, don't include it in the param_bounds dict. If this is None, the default value of this kwarg will be:: {'period':(0.0,np.inf), # period is between 0 and inf 'epoch':(0.0, np.inf), # epoch is between 0 and inf 'pdepth':(-np.inf,np.inf), # pdepth is between -np.inf and np.inf 'pduration':(0.0,1.0), # pduration is between 0.0 and 1.0 'psdepthratio':(0.0,1.0), # psdepthratio is between 0.0 and 1.0 'secondaryphase':(0.0,1.0), # secondaryphase is between 0.0 and 1.0 scale_errs_redchisq_unity : bool If True, the standard errors on the fit parameters will be scaled to make the reduced chi-sq = 1.0. This sets the ``absolute_sigma`` kwarg for the ``scipy.optimize.curve_fit`` function to False. sigclip : float or int or sequence of two floats/ints or None If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. magsarefluxes : bool If True, will treat the input values of `mags` as fluxes for purposes of plotting the fit and sig-clipping. plotfit : str or False If this is a string, this function will make a plot for the fit to the mag/flux time-series and writes the plot to the path specified here. ignoreinitfail : bool If this is True, ignores the initial failure to find a set of optimized Fourier parameters using the global optimization function and proceeds to do a least-squares fit anyway. verbose : bool If True, will indicate progress and warn of any problems. curve_fit_kwargs : dict or None If not None, this should be a dict containing extra kwargs to pass to the scipy.optimize.curve_fit function. Returns ------- dict This function returns a dict containing the model fit parameters, the minimized chi-sq value and the reduced chi-sq value. The form of this dict is mostly standardized across all functions in this module:: { 'fittype':'gaussianeb', 'fitinfo':{ 'initialparams':the initial EB params provided, 'finalparams':the final model fit EB params, 'finalparamerrs':formal errors in the params, 'fitmags': the model fit mags, 'fitepoch': the epoch of minimum light for the fit, }, 'fitchisq': the minimized value of the fit's chi-sq, 'fitredchisq':the reduced chi-sq value, 'fitplotfile': the output fit plot if fitplot is not None, 'magseries':{ 'times':input times in phase order of the model, 'phase':the phases of the model mags, 'mags':input mags/fluxes in the phase order of the model, 'errs':errs in the phase order of the model, 'magsarefluxes':input value of magsarefluxes kwarg } } ''' stimes, smags, serrs = sigclip_magseries(times, mags, errs, sigclip=sigclip, magsarefluxes=magsarefluxes) # get rid of zero errs nzind = npnonzero(serrs) stimes, smags, serrs = stimes[nzind], smags[nzind], serrs[nzind] # check the ebparams ebperiod, ebepoch, ebdepth = ebparams[0:3] # check if we have a ebepoch to use if ebepoch is None: if verbose: LOGWARNING('no ebepoch given in ebparams, ' 'trying to figure it out automatically...') # do a spline fit to figure out the approximate min of the LC try: spfit = spline_fit_magseries(times, mags, errs, ebperiod, sigclip=sigclip, magsarefluxes=magsarefluxes, verbose=verbose) ebepoch = spfit['fitinfo']['fitepoch'] # if the spline-fit fails, try a savgol fit instead except Exception: sgfit = savgol_fit_magseries(times, mags, errs, ebperiod, sigclip=sigclip, magsarefluxes=magsarefluxes, verbose=verbose) ebepoch = sgfit['fitinfo']['fitepoch'] # if everything failed, then bail out and ask for the ebepoch finally: if ebepoch is None: LOGERROR("couldn't automatically figure out the eb epoch, " "can't continue. please provide it in ebparams.") # assemble the returndict returndict = { 'fittype': 'gaussianeb', 'fitinfo': { 'initialparams': ebparams, 'finalparams': None, 'finalparamerrs': None, 'fitmags': None, 'fitepoch': None, }, 'fitchisq': npnan, 'fitredchisq': npnan, 'fitplotfile': None, 'magseries': { 'phase': None, 'times': None, 'mags': None, 'errs': None, 'magsarefluxes': magsarefluxes, }, } return returndict else: if ebepoch.size > 1: if verbose: LOGWARNING('could not auto-find a single minimum ' 'for ebepoch, using the first one returned') ebparams[1] = ebepoch[0] else: if verbose: LOGWARNING( 'using automatically determined ebepoch = %.5f' % ebepoch) ebparams[1] = ebepoch.item() # next, check the ebdepth and fix it to the form required if magsarefluxes: if ebdepth < 0.0: ebparams[2] = -ebdepth[2] else: if ebdepth > 0.0: ebparams[2] = -ebdepth[2] # finally, do the fit try: # set up the fit parameter bounds if param_bounds is None: curvefit_bounds = (nparray([0.0, 0.0, -npinf, 0.0, 0.0, 0.0]), nparray([npinf, npinf, npinf, 1.0, 1.0, 1.0])) fitfunc_fixed = {} else: # figure out the bounds lower_bounds = [] upper_bounds = [] fitfunc_fixed = {} for ind, key in enumerate( ('period', 'epoch', 'pdepth', 'pduration', 'psdepthratio', 'secondaryphase')): # handle fixed parameters if (key in param_bounds and isinstance(param_bounds[key], str) and param_bounds[key] == 'fixed'): lower_bounds.append(ebparams[ind] - 1.0e-7) upper_bounds.append(ebparams[ind] + 1.0e-7) fitfunc_fixed[key] = ebparams[ind] # handle parameters with lower and upper bounds elif key in param_bounds and isinstance( param_bounds[key], (tuple, list)): lower_bounds.append(param_bounds[key][0]) upper_bounds.append(param_bounds[key][1]) # handle no parameter bounds else: lower_bounds.append(-npinf) upper_bounds.append(npinf) # generate the bounds sequence in the required format curvefit_bounds = (nparray(lower_bounds), nparray(upper_bounds)) # # set up the curve fit function # curvefit_func = partial(eclipses.invgauss_eclipses_curvefit_func, zerolevel=npmedian(smags), fixed_params=fitfunc_fixed) # # run the fit # if curve_fit_kwargs is not None: finalparams, covmatrix = curve_fit( curvefit_func, stimes, smags, p0=ebparams, sigma=serrs, bounds=curvefit_bounds, absolute_sigma=(not scale_errs_redchisq_unity), **curve_fit_kwargs) else: finalparams, covmatrix = curve_fit( curvefit_func, stimes, smags, p0=ebparams, sigma=serrs, bounds=curvefit_bounds, absolute_sigma=(not scale_errs_redchisq_unity), ) except Exception: LOGEXCEPTION("curve_fit returned an exception") finalparams, covmatrix = None, None # if the fit succeeded, then we can return the final parameters if finalparams is not None and covmatrix is not None: # calculate the chisq and reduced chisq fitmags, phase, ptimes, pmags, perrs = eclipses.invgauss_eclipses_func( finalparams, stimes, smags, serrs) fitchisq = npsum( ((fitmags - pmags) * (fitmags - pmags)) / (perrs * perrs)) fitredchisq = fitchisq / (len(pmags) - len(finalparams) - len(fitfunc_fixed)) stderrs = npsqrt(npdiag(covmatrix)) if verbose: LOGINFO('final fit done. chisq = %.5f, reduced chisq = %.5f' % (fitchisq, fitredchisq)) # get the fit epoch fperiod, fepoch = finalparams[:2] # assemble the returndict returndict = { 'fittype': 'gaussianeb', 'fitinfo': { 'initialparams': ebparams, 'finalparams': finalparams, 'finalparamerrs': stderrs, 'fitmags': fitmags, 'fitepoch': fepoch, }, 'fitchisq': fitchisq, 'fitredchisq': fitredchisq, 'fitplotfile': None, 'magseries': { 'phase': phase, 'times': ptimes, 'mags': pmags, 'errs': perrs, 'magsarefluxes': magsarefluxes, }, } # make the fit plot if required if plotfit and isinstance(plotfit, str): make_fit_plot(phase, pmags, perrs, fitmags, fperiod, ptimes.min(), fepoch, plotfit, magsarefluxes=magsarefluxes) returndict['fitplotfile'] = plotfit return returndict # if the leastsq fit failed, return nothing else: LOGERROR('eb-fit: least-squared fit to the light curve failed!') # assemble the returndict returndict = { 'fittype': 'gaussianeb', 'fitinfo': { 'initialparams': ebparams, 'finalparams': None, 'finalparamerrs': None, 'fitmags': None, 'fitepoch': None, }, 'fitchisq': npnan, 'fitredchisq': npnan, 'fitplotfile': None, 'magseries': { 'phase': None, 'times': None, 'mags': None, 'errs': None, 'magsarefluxes': magsarefluxes, }, } return returndict
def Chip_Classify(ImageLocation,SaveLocation,ImageFile,NumberOfClusters,InitialCluster): ticOverall = time.time() #sleep(random.beta(1,1)*30) # Reshape InitialCluster InitialCluster = array(InitialCluster).reshape((NumberOfClusters,-1)) ImageIn = imread(ImageFile) with rio.open(ImageFile) as gtf_img: Info = gtf_img.profile Info.update(dtype=rio.int8) #print(time.time()-tic) ImageRow, ImageColumn, NumberOfBands = ImageIn.shape if NumberOfBands > 8: NumberOfBands = NumberOfBands - 1 # prealocate Cluster = zeros((ImageRow, ImageColumn, NumberOfClusters)) CountClusterPixels = zeros((NumberOfClusters, 1)) MeanCluster = zeros((NumberOfClusters, NumberOfBands)) EuclideanDistanceResultant = zeros((ImageRow, ImageColumn, NumberOfClusters)) #os.mkdir('local/larry.leigh.temp/') directory = '/tmp/ChipS' if not os.path.exists(directory): os.makedirs(directory) print('starting big loop') tic = time.time() for j in range(0,ImageRow): # if(j % 10 == 0): # progbar(j, ImageRow) for k in range(0, ImageColumn): temp = ImageIn[j, k, 0:NumberOfBands] #EuclideanDistanceResultant[j, k, :] = np.npsqrt(np.npsum(np.nppower(np.subtract(np.matlib.repmat(temp, NumberOfClusters, 1), InitialCluster[: ,:]), 2), axis = 1)) EuclideanDistanceResultant[j, k, :] = npsqrt(npsum(nppower((matlib.repmat(temp, NumberOfClusters, 1)) - InitialCluster, 2), axis=1)) DistanceNearestCluster = min(EuclideanDistanceResultant[j, k, :]) #print(str(j) +" "+ str(k)) for l in range(0, NumberOfClusters): if DistanceNearestCluster != 0: if DistanceNearestCluster == EuclideanDistanceResultant[j, k, l]: CountClusterPixels[l] = CountClusterPixels[l] + 1 for m in range(0, NumberOfBands): MeanCluster[l, m] = MeanCluster[l, m] + ImageIn[j, k, m] Cluster[j, k, l] = l # progbar(ImageRow, ImageRow) print('\n') # print(Cluster.shape) # print(CountClusterPixels.shape) # print(EuclideanDistanceResultant.shape) # print(MeanCluster.shape) print('\nfinished big loop') ImageDisplay = npsum(Cluster, axis = 2) print("Execution time: " + str(time.time() - tic)) #print(globals()) #shelver("big.loop",['Cluster','CountClusterPixels','EuclideanDistanceResultant','MeanCluster']) savez("big.loop.serial",Cluster=Cluster, CountClusterPixels=CountClusterPixels, EuclideanDistanceResultant=EuclideanDistanceResultant, MeanCluster=MeanCluster) ClusterPixelCount = count_nonzero(Cluster, axis = 2) print("Non-zero cluster pixels: " + str(ClusterPixelCount)) #Calculate TSSE within clusters TsseCluster = zeros((1, NumberOfClusters)) CountTemporalUnstablePixel = 0 # TSSECluster Serial print("Starting TSSE Cluster computation (Serial version)\n") tic = time.time() for j in range(0, ImageRow): for k in range(0, ImageColumn): FlagSwitch = int(max(Cluster[j, k, :])) #print(Cluster[j, k, :]) #This prints to the log #store SSE of related to each pixel if FlagSwitch == 0: CountTemporalUnstablePixel = CountTemporalUnstablePixel + 1 else: #Might be TsseCluster[0,FlagSwitch-1] #TsseCluster[0,FlagSwitch - 1] = TsseCluster[0,FlagSwitch - 1] + np.sum(np.power(np.subtract(np.squeeze(ImageIn[j, k, 0:NumberOfBands - 1]), np.transpose(InitialCluster[FlagSwitch - 1, :])),2), axis = 0) TsseCluster[0,FlagSwitch] = TsseCluster[0,FlagSwitch] + npsum(nppower((squeeze(ImageIn[j, k, 0:NumberOfBands]) - transpose(InitialCluster[FlagSwitch, :])),2)) #count the number of pixels in each cluster #Collected_ClusterPixelCount[FlagSwitch] = Collected_ClusterPixelCount[FlagSwitch] + 1 Totalsse = npsum(TsseCluster) print("Execution time: " + str(time.time() - tic)) savez("small.loop.serial",CountTemporalUnstablePixel=CountTemporalUnstablePixel,TsseCluster=TsseCluster) #get data for final stats.... #calculate the spatial mean and standard deviation of each cluster ClusterMeanAllBands = zeros((NumberOfClusters, NumberOfBands)) ClusterSdAllBands = zeros((NumberOfClusters, NumberOfBands)) print('finished small loop') #print(time.time()-tic) # Cluster Summary Serial tic = time.time() FinalClusterMean = zeros(NumberOfBands) FinalClusterSd = zeros(NumberOfBands) for i in range(0, NumberOfClusters): Temp = Cluster[:, :, i] Temp[Temp == i] = 1 MaskedClusterAllBands = Temp[:,:,None]*ImageIn[:, :, 0:NumberOfBands] for j in range(0, NumberOfBands): #Mean = MaskedClusterAllBands(:,:,j) Temp = MaskedClusterAllBands[:, :, j] TempNonZero = Temp[npnonzero(Temp)] TempNonzeronan = TempNonZero[~npisnan(TempNonZero)] #TempNonan = Temp[!np.isnan(Temp)] with warnings.catch_warnings(): warnings.filterwarnings('error') try: FinalClusterMean[j] = npmean(TempNonZero) FinalClusterSd[j] = npstd(TempNonZero) except RuntimeWarning: FinalClusterMean[j] = 0 FinalClusterSd[j] = 0 ClusterMeanAllBands[i, :] = FinalClusterMean[:] ClusterSdAllBands[i, :] = FinalClusterSd[:] print("Execution time: " + str(time.time() - tic)) savez("cluster.summary.serial",ClusterMeanAllBands=ClusterMeanAllBands,ClusterSdAllBands=ClusterSdAllBands) filename = str(SaveLocation) + 'ImageDisplay_' + ImageFile[len(ImageFile)-32:len(ImageFile)-3] + 'mat' print('Got filename. Now save the data') print(filename) save(filename, ImageDisplay) filename = str(SaveLocation) + 'ClusterCount' + str(NumberOfClusters) + '_' + ImageFile[len(ImageFile)-32:len(ImageFile)-4] + '.tif' #geotiffwrite(filename, int8(ImageDisplay), Info.RefMatrix); with rio.open(filename, 'w', **Info) as dst: dst.write(int8(ImageDisplay), 1) filename = str(SaveLocation) + 'Stats_' + ImageFile[len(ImageFile)-32:len(ImageFile)-3] + 'mat' savez(filename, [MeanCluster, CountClusterPixels, ClusterPixelCount, ClusterMeanAllBands, ClusterSdAllBands, Totalsse]) print('done!') print(time.time()-ticOverall)
def dworetsky_period_find(time, mag, err, init_p, end_p, f_step, verbose=False): ''' This is the super-slow naive version taken from my thesis work. Uses the string length method in Dworetsky 1983 to calculate the period of a time-series of magnitude measurements and associated magnitude errors. Searches in linear frequency space (which obviously doesn't correspond to a linear period space). PARAMETERS: time: series of times at which mags were measured (usually some form of JD) mag: timeseries of magnitudes (np.array) err: associated errs per magnitude measurement (np.array) init_p, end_p: interval to search for periods between (both ends inclusive) f_step: step in frequency [days^-1] to use RETURNS: tuple of the following form: (periods (np.array), string_lengths (np.array), good_period_mask (boolean array)) ''' mod_mag = (mag - npmin(mag)) / (2.0 * (npmax(mag) - npmin(mag))) - 0.25 fold_time = npmin(time) # fold at the first time element init_f = 1.0 / end_p end_f = 1.0 / init_p n_freqs = npceil((end_f - init_f) / f_step) if verbose: print('searching %s frequencies between %s and %s days^-1...' % (n_freqs, init_f, end_f)) out_periods = npempty(n_freqs, dtype=np.float64) out_strlens = npempty(n_freqs, dtype=np.float64) p_goodflags = npempty(n_freqs, dtype=bool) j_range = len(mag) - 1 for i in range(int(n_freqs)): period = 1.0 / init_f # print('P: %s, f: %s, i: %s, n_freqs: %s, maxf: %s' % # (period, init_f, i, n_freqs, end_f)) phase = (time - fold_time) / period - npfloor( (time - fold_time) / period) phase_sort_ind = npargsort(phase) phase_sorted = phase[phase_sort_ind] mod_mag_sorted = mod_mag[phase_sort_ind] strlen = 0.0 epsilon = 2.0 * npmean(err) delta_l = 0.34 * (epsilon - 0.5 * (epsilon**2)) * (len(time) - npsqrt(10.0 / epsilon)) keep_threshold_1 = 1.6 + 1.2 * delta_l l = 0.212 * len(time) sig_l = len(time) / 37.5 keep_threshold_2 = l + 4.0 * sig_l # now calculate the string length for j in range(j_range): strlen += npsqrt((mod_mag_sorted[j + 1] - mod_mag_sorted[j])**2 + (phase_sorted[j + 1] - phase_sorted[j])**2) strlen += npsqrt((mod_mag_sorted[0] - mod_mag_sorted[-1])**2 + (phase_sorted[0] - phase_sorted[-1] + 1)**2) if ((strlen < keep_threshold_1) or (strlen < keep_threshold_2)): p_goodflags[i] = True out_periods[i] = period out_strlens[i] = strlen init_f += f_step return (out_periods, out_strlens, p_goodflags)
def sqrt(x): r"""Square-root """ return npsqrt(x)
def pdw_period_find(times, mags, errs, autofreq=True, init_p=None, end_p=None, f_step=1.0e-4, phasebinsize=None, sigclip=10.0, nworkers=None, verbose=False): '''This is the parallel version of the function above. Uses the string length method in Dworetsky 1983 to calculate the period of a time-series of magnitude measurements and associated magnitude errors. This can optionally bin in phase to try to speed up the calculation. PARAMETERS: time: series of times at which mags were measured (usually some form of JD) mag: timeseries of magnitudes (np.array) err: associated errs per magnitude measurement (np.array) init_p, end_p: interval to search for periods between (both ends inclusive) f_step: step in frequency [days^-1] to use RETURNS: tuple of the following form: (periods (np.array), string_lengths (np.array), good_period_mask (boolean array)) ''' # remove nans find = npisfinite(times) & npisfinite(mags) & npisfinite(errs) ftimes, fmags, ferrs = times[find], mags[find], errs[find] mod_mags = (fmags - npmin(fmags)) / (2.0 * (npmax(fmags) - npmin(fmags))) - 0.25 if len(ftimes) > 9 and len(fmags) > 9 and len(ferrs) > 9: # get the median and stdev = 1.483 x MAD median_mag = np.median(fmags) stddev_mag = (np.median(np.abs(fmags - median_mag))) * 1.483 # sigclip next if sigclip: sigind = (np.abs(fmags - median_mag)) < (sigclip * stddev_mag) stimes = ftimes[sigind] smags = fmags[sigind] serrs = ferrs[sigind] LOGINFO('sigclip = %s: before = %s observations, ' 'after = %s observations' % (sigclip, len(times), len(stimes))) else: stimes = ftimes smags = fmags serrs = ferrs # make sure there are enough points to calculate a spectrum if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9: # get the frequencies to use if init_p: endf = 1.0 / init_p else: # default start period is 0.1 day endf = 1.0 / 0.1 if end_p: startf = 1.0 / end_p else: # default end period is length of time series startf = 1.0 / (stimes.max() - stimes.min()) # if we're not using autofreq, then use the provided frequencies if not autofreq: frequencies = np.arange(startf, endf, stepsize) LOGINFO( 'using %s frequency points, start P = %.3f, end P = %.3f' % (frequencies.size, 1.0 / endf, 1.0 / startf)) else: # this gets an automatic grid of frequencies to use frequencies = get_frequency_grid(stimes, minfreq=startf, maxfreq=endf) LOGINFO('using autofreq with %s frequency points, ' 'start P = %.3f, end P = %.3f' % (frequencies.size, 1.0 / frequencies.max(), 1.0 / frequencies.min())) # set up some internal stuff fold_time = npmin(ftimes) # fold at the first time element j_range = len(fmags) - 1 epsilon = 2.0 * npmean(ferrs) delta_l = 0.34 * (epsilon - 0.5 * (epsilon**2)) * (len(ftimes) - npsqrt(10.0 / epsilon)) keep_threshold_1 = 1.6 + 1.2 * delta_l l = 0.212 * len(ftimes) sig_l = len(ftimes) / 37.5 keep_threshold_2 = l + 4.0 * sig_l # generate the tasks tasks = [(x, ftimes, mod_mags, fold_time, j_range, keep_threshold_1, keep_threshold_2, phasebinsize) for x in frequencies] # fire up the pool and farm out the tasks if (not nworkers) or (nworkers > NCPUS): nworkers = NCPUS LOGINFO('using %s workers...' % nworkers) pool = Pool(nworkers) strlen_results = pool.map(pdw_worker, tasks) pool.close() pool.join() del pool periods, strlens, goodflags = zip(*strlen_results) periods, strlens, goodflags = (np.array(periods), np.array(strlens), np.array(goodflags)) strlensort = npargsort(strlens) nbeststrlens = strlens[strlensort[:5]] nbestperiods = periods[strlensort[:5]] nbestflags = goodflags[strlensort[:5]] bestperiod = nbestperiods[0] beststrlen = nbeststrlens[0] bestflag = nbestflags[0] return { 'bestperiod': bestperiod, 'beststrlen': beststrlen, 'bestflag': bestflag, 'nbeststrlens': nbeststrlens, 'nbestperiods': nbestperiods, 'nbestflags': nbestflags, 'strlens': strlens, 'periods': periods, 'goodflags': goodflags } else: LOGERROR( 'no good detections for these times and mags, skipping...') return { 'bestperiod': npnan, 'beststrlen': npnan, 'bestflag': npnan, 'nbeststrlens': None, 'nbestperiods': None, 'nbestflags': None, 'strlens': None, 'periods': None, 'goodflags': None } else: LOGERROR('no good detections for these times and mags, skipping...') return { 'bestperiod': npnan, 'beststrlen': npnan, 'bestflag': npnan, 'nbeststrlens': None, 'nbestperiods': None, 'nbestflags': None, 'strlens': None, 'periods': None, 'goodflags': None }
def doornik_hansen(data): """ Perform the Doornik-Hansen test (https://doi.org/10.1111/j.1468-0084.2008.00537.x) This computes and transforms multivariate variants of the skewness and kurtosis, then computes a chi-square statistic on the results. """ data = pandas.DataFrame(data) data = deepcopy(data) n = len(data) p = len(data.columns) # R is the correlation matrix, a scaling of the covariance matrix # R has dimensions dim * dim R = corrcoef(data.transpose()) L, V = eigh(R) for i in range(p): if (L[i] <= 1e-12): L[i] = 0 if (L[i] > 1e-12): L[i] = 1 / sqrt(L[i]) L = diag(L) if (matrix_rank(R) < p): V = pandas.DataFrame(V) G = V.loc[:, (L != 0).any(axis=0)] data = data.dot(G) ppre = p p = data.size / len(data) raise ValueError( "NOTE:Due that some eigenvalue resulted zero, \ a new data matrix was created. Initial number \ of variables = ", ppre, ", were reduced to = ", p) R = corrcoef(data.transpose()) L, V = eigh(R) L = diag(L) means = [list(data.mean())] * n stddev = [list(data.std(ddof=0))] * n Z = (data - pandas.DataFrame(means)) / pandas.DataFrame(stddev) Zp = Z.dot(V) Zpp = Zp.dot(L) st = Zpp.dot(transpose(V)) # skew is the multivariate skewness (dimension dim) # kurt is the multivariate kurtosis (dimension dim) skew = mean(power(st, 3), axis=0) kurt = mean(power(st, 4), axis=0) # Transform the skewness into a standard normal z1 n2 = n * n b = 3 * (n2 + 27 * n - 70) * (n + 1) * (n + 3) b /= (n - 2) * (n + 5) * (n + 7) * (n + 9) w2 = -1 + sqrt(2 * (b - 1)) d = 1 / sqrt(log(sqrt(w2))) y = skew * sqrt((w2 - 1) * (n + 1) * (n + 3) / (12 * (n - 2))) # Use numpy log/sqrt as math versions dont have array input z1 = d * nplog(y + npsqrt(y * y + 1)) # Transform the kurtosis into a standard normal z2 d = (n - 3) * (n + 1) * (n2 + 15 * n - 4) a = (n - 2) * (n + 5) * (n + 7) * (n2 + 27 * n - 70) / (6 * d) c = (n - 7) * (n + 5) * (n + 7) * (n2 + 2 * n - 5) / (6 * d) k = (n + 5) * (n + 7) * (n * n2 + 37 * n2 + 11 * n - 313) / (12 * d) al = a + (skew**2) * c chi = (kurt - 1 - (skew**2)) * k * 2 z2 = (((chi / (2 * al))**(1 / 3)) - 1 + 1 / (9 * al)) * npsqrt(9 * al) kurt -= 3 # omnibus normality statistic DH = z1.dot(z1.transpose()) + z2.dot(z2.transpose()) AS = n / 6 * skew.dot(skew.transpose()) + n / 24 * kurt.dot( kurt.transpose()) # degrees of freedom v = 2 * p # p-values PO = 1 - chi2.cdf(DH, v) PA = 1 - chi2.cdf(AS, v) return DH, AS, PO, PA