def degree_distribution(A, networkName, directed=True): binNum = 30 if (directed): (kin, kout) = get_degree(A) bins = np.linspace(0, np.log10(np.max(kin)), num=binNum) digitized = np.digitize(np.log10(kin), bins) bin_counts = np.asarray([digitized.tolist().count(i) for i in range(0,len(bins))]) bin_counts = ma.log10(bin_counts) #fit the line a,b = ma.polyfit(bins, bin_counts, 1, full=False) print('best fit in degree line:\ny = {:.2f} + {:.2f}x'.format(b, a)) yfit = [b + a * xi for xi in bins] fig, axs = plt.subplots(2, 1) axs[0].scatter(bins, bin_counts) axs[0].plot(bins, yfit, color="orange") axs[0].set_title('in-degree distribution') axs[0].set_xlabel('Degree (d) log base 10', fontsize="small") axs[0].set_ylabel('Frequency log base 10', fontsize="small") axs[0].set_ylim(bottom=0) bins = np.linspace(0, np.log10(np.max(kout)), num=binNum) digitized = np.digitize(np.log10(kout), bins) bin_counts = np.asarray([digitized.tolist().count(i) for i in range(0,len(bins))]) bin_counts = ma.log10(bin_counts) print('best fit out degree line:\ny = {:.2f} + {:.2f}x'.format(b, a)) yfit = [b + a * xi for xi in bins] axs[1].scatter(bins, bin_counts) axs[1].plot(bins, yfit, color="orange") axs[1].set_title('out-degree distribution') axs[1].set_xlabel('Degree (d) log base 10', fontsize="small") axs[1].set_ylabel('Frequency log base 10', fontsize="small") plt.subplots_adjust(hspace=0.01) plt.tight_layout() plt.savefig(networkName + 'degree.pdf') plt.close() if (not directed): (kin,kout) = get_degree(A) print (kin.shape) #bin the statistics bins = np.linspace(0, np.log10(np.max(kin)), num=binNum) digitized = np.digitize(np.log10(kin), bins) bin_counts = np.asarray([digitized.tolist().count(i) for i in range(0,len(bins))]) bin_counts = ma.log10(bin_counts) #fit the line a,b = ma.polyfit(bins, bin_counts, 1, full=False) print('best fit line:\ny = {:.2f} + {:.2f}x'.format(b, a)) yfit = [b + a * xi for xi in bins] plt.scatter(bins, bin_counts) plt.plot(bins, yfit, color="orange") plt.title('degree distribution') plt.xlabel('Degree (d) log base 10', fontsize="small") plt.ylabel('Frequency log base 10', fontsize="small") plt.ylim(bottom=0) # plt.xscale('log') # plt.yscale('log') plt.tight_layout() plt.savefig(networkName + 'degree.pdf') plt.close()
def my_polyfit(x, y, deg, degatt=0): k = ma.polyfit(x, y, degatt) res = my_poly(k, x) resid = ma.abs(res - y) medresid = ma.median(resid, axis=0) if y.ndim != 1: mask = ma.logical_not(ma.sum((resid > 3 * medresid), axis=1).astype('bool')) else: mask = (resid < 3 * medresid) y = y[mask] x = x[mask] k = ma.polyfit(x, y, deg) return(k, mask)
def rate_of_change_polyfit(data, period, dates, fit_length=1): '''This function calculates a rate of change in the same way as the old code did, using successive lines of best fit''' import aux_functions import numpy as np import numpy.ma as ma seconds_in_day = 8.64e4 tolerance = 0.95 numbers = np.array([aux_functions.datetime_to_float(ddt) for ddt in dates]) dates_within = [ ddt for ddt in dates if ddt >= period[0] and ddt <= period[1] ] output_series = ma.masked_array(np.zeros(len(dates_within)), mask=np.zeros(len(dates_within), dtype='bool')) for (idate, ddt) in enumerate(dates_within): number = aux_functions.datetime_to_float(ddt) index_period = np.where( np.logical_and(numbers >= number - fit_length, numbers <= number + fit_length)) numbers_period = numbers[index_period] data_period = data[index_period] if np.sum(data_period.mask) / data_period.shape[0] > tolerance: output_series.mask.itemset(idate, True) else: rate_of_change_instance = ma.polyfit(numbers_period, data_period, 1)[0] / seconds_in_day output_series.data.itemset(idate, rate_of_change_instance) return output_series
def baseline_and_deglitch(orig_spec, ww=300, sigma_cut=4., poly_n=2., filt_width=7.): """ (1) Calculate a rolling standard deviation (s) in a window of 2*ww pixels (2) Mask out portions of the spectrum where s is more than sigma_cut times the median value for s. (3) Fit and subtract-out a polynomial of order poly_n (currently hard-coded to 2) (4) Median filter (with a filter width of filt_width) to remove the single-channel spikes seen. """ ya = rolling_window(orig_spec,ww*2) #Calculate standard dev and pad the output stds = my_pad.pad(np.std(ya,-1),(ww-1,ww),mode='edge') #Figure out which bits of the spectrum have signal/glitches med_std = np.median(stds) std_std = np.std(stds) sigma_x_bar = med_std/np.sqrt(ww) sigma_s = (1./np.sqrt(2.))*sigma_x_bar #Mask out signal for baseline masked = ma.masked_where(stds > med_std+sigma_cut*sigma_s,orig_spec) xx = np.arange(masked.size) ya = ma.polyfit(xx,masked,2) baseline = ya[0]*xx**2+ya[1]*xx+ya[2] sub = orig_spec-baseline #Filter out glitches in baseline-subtracted version final = im.median_filter(sub,filt_width)[::filt_width] return(final)
def fit_baseline(masked,xx,ndeg=2): """ Fit a polynomial baseline of arbitrary (ndeg) order. """ ya = ma.polyfit(xx,masked,ndeg) basepoly = np.poly1d(ya) return(basepoly)
def ptc_irffpairs(imagelist, *coor, **kargs): """ TODO: Need to be finished !! NHA Perform ptc plot for pairs of ff at same level. The pairs of ff should have the same light level. The first 2 images must be bias To eliminate the FPN, the 'shotnoise' image is computed as the subtraction of two debiased flat field images optional kargs arguments: FACTOR (default = 2.0) FIRST_FITTING (default = False) LIMIT (default = False) VERBOSE (default=False) TODO: add Rotation by 90deg as an option """ x1, x2, y1, y2 = imagelist[0].get_windowcoor(*coor) # Define empty list to store values signal = [] stddev = [] variance = [] oddimageindex = list(range(1, len(imagelist), 2)) evenimageindex = list(range(0, len(imagelist), 2)) # For all pairs, compute signal, std and variance for odd, even in zip(oddimageindex, evenimageindex): ff1 = imagelist[odd] ff2 = imagelist[even] ffmean = (ff1 + ff2) / 2.0 shotnoise = ff1 - ff2 signal.append(np.mean(ffmean[x1:x2, y1:y2])) variance.append(np.var(shotnoise[x1:x2, y1:y2]) / 2.0) stddev.append(np.std(shotnoise[x1:x2, y1:y2]) / np.sqrt(2.0)) print(f"Signal: {signal[-1]} Variance: {variance[-1]}") coefts = ma.polyfit(signal, variance, 1) # coefts=ma.polyfit(signal[:-3],variance[:-3],1) polyts = coefts[0] * np.array(signal) + coefts[1] print(1 / coefts[0], coefts[1]) # plot variance v/s signal plt.figure() # plot(meansig,masked_variance,'b.') plt.plot(signal, variance, 'b.') plt.plot(signal, polyts, 'r-')
def get_poly_baseline(mspec, k_est, debug=True, **kwargs): """ Fit for the best polynomial baseline according to BIC Search polynomial fits from order 0 to order 7 and use the BIC to select the best fit. This fit should also have an rms error within a factor of two of the estimated noise (k_est). If this is not the case then the baseline fit is most likely bad. """ d = np.arange(0, 7) rms_err = np.zeros(d.shape) all_polys = [] #k_est = 0.2/np.sqrt(7) xx = np.arange(mspec.size) yy = mspec for i in range(len(d)): p = ma.polyfit(xx, yy, d[i]) basepoly = P(p[::-1]) all_polys.append(basepoly) rms_err[i] = np.sqrt(np.sum((basepoly(xx) - yy)**2) / len(yy)) BIC = np.sqrt(len(yy)) * rms_err / k_est + 1 * d * np.log(len(yy)) AIC = np.sqrt(len(yy)) * rms_err / k_est + 2 * d + 2 * d * (d + 1) / ( len(yy) - d - 1) if debug: fig = plt.figure(figsize=(12, 5)) ax = fig.add_subplot(311) ax.plot(d, rms_err, '-k', label='rms-err') ax.legend(loc=2) ax.axhline(k_est * 2., ls=":", color='r') ax.axhline(k_est, color='red') ax.axhline(k_est / 2., ls=':', color='r') ax = fig.add_subplot(312) ax.plot(d, BIC, '-k', label='BIC') ax.legend(loc=2) ax = fig.add_subplot(313) ax.plot(d, AIC, '-r', label='AIC') ax.legend(loc=2) plt.savefig(kwargs["outdir"] + "/debugplot.png") plt.close(fig) best_poly_order = np.argmin(AIC) best_poly = all_polys[best_poly_order] return (best_poly(xx))
def ptcloglog(imagelist, *coor, **kargs): """ TODO: Need to be finished!! Perform ptc plot for ff at different light levels The first image must be a bias The FPN is not elliminated. This curve should illustrate the diferent regions of a tipical detector """ signal = [] variance = [] stddev = [] x1, x2, y1, y2 = imagelist[0].get_windowcoor(*coor) print("No error") # Read in bias bias = imagelist[0] # For all images, compute signal, std and variance for image in imagelist[1:]: ff = image ff = ff - bias signal.append(np.mean(ff[x1:x2, y1:y2])) variance.append(np.var(ff[x1:x2, y1:y2])) stddev.append(np.std(ff[x1:x2, y1:y2])) print(f"Signal: {signal[-1]} Variance: {variance[-1]}") coefts = ma.polyfit(signal[:-3], variance[:-3], 1) polyts = coefts[0] * signal + coefts[1] print(1 / coefts[0], coefts[1]) # plot variance v/s signal plt.figure() # plot(meansig,masked_variance,'b.') # plot(signal, variance, 'b.') # plot(signal[:-3],polyts[:-3],'r-') # Plot the curves plt.loglog(signal, stddev, 'r')
def masked_polyfit(myTime, data, order, mask, firstNan, fit=False): ''' using standard polyfit ''' maskedY = ma.masked_array(data, mask=mask) full = np.where(firstNan > order)[0] # initialize coef array # coef = np.zeros((order+1, data.shape[1])) coef = ma.zeros((order + 1, data.shape[1])) + np.NaN coef.mask = np.ones(coef.shape) coef.mask[:, full] = 0 if full.size > 0: print('full shape', full.shape) print('time shape', myTime.shape) print('mask shape', mask.shape) coef[:, full] = ma.polyfit(myTime, maskedY[:, full], order) if fit: fitX = np.tile(myTime, (data.shape[1], 1)) fit = np.polyval(coef, fitX.T) return coef, fit return coef
def two_layer_treatment(mvp_dict, x_sills, x_ranges): """Estimate g-prime and interface depth Inputs ------ mvp_dict: dict Used throughout this file x_sills: 2-tuple of floats x position of the start and end of the sill in kilometres x_ranges: 2-tuple of floats x position of location where output is applicable For my transect: first sill: x_sills = (48, 58), x_ranges = (0, 80) second sill: x_sills = (75, 90), x_ranges = (65, 200) Returns ------- gprime : 1D array g * delta_rho/rho interface_depth : 1D array Depth in metres of "two-layer" interface """ # Simplify names of commonly used variables prho = mvp_dict['prho'].copy() z_c = mvp_dict['z_c'].copy() x = mvp_dict['dist_flat'] bottom = np.array(mvp_dict['bottom']) sill_inds = np.where(np.logical_and(x > x_sills[0], x < x_sills[1]))[0] # Find interface of mode-1 wave for each profile in sill_inds rho_interfaces = np.zeros_like(sill_inds, dtype='float') for i, ind in enumerate(sill_inds): # Interpolate horizontal mode structure against density f = interp1d(mvp_dict['hori_0'][ind, :], prho[ind, :]) # Find density of zero crossing of horizontal structure rho_interfaces[i] = f(0) rho_interface = np.nanmean(rho_interfaces) # Find depth of rho_interface along transect x_in, y_in, z_in = x, z_c, prho interface_depth = get_contour(x_in, y_in, z_in, rho_interface) # Preallocate results to keep gprime = np.full_like(x_in, np.nan) # Find average density in each layer using linear fit for i, rho_i in enumerate(mvp_dict['prho']): top_layer_inds = z_c <= interface_depth[i] bot_layer_inds = z_c > interface_depth[i] top_z = z_c[top_layer_inds] bot_z = z_c[bot_layer_inds] top_rho = prho[i, :][top_layer_inds] bot_rho = prho[i, :][bot_layer_inds] if (~top_rho.mask).sum() < 3 or (~bot_rho.mask).sum() < 3: # Not enough values to do linear fit continue p_top = ma.polyfit(top_z, top_rho, 1) p_bot = ma.polyfit(bot_z, bot_rho, 1) top_rho_avg = np.polyval(p_top, interface_depth[i]/2) bot_rho_avg = np.polyval(p_bot, (bottom[i] + interface_depth[i])/2) gprime[i] = 9.81*(bot_rho_avg - top_rho_avg)/bot_rho_avg # Remove output where inappropriate inappropriate_inds = np.logical_or(x < x_ranges[0], x > x_ranges[1]) gprime[inappropriate_inds] = np.nan interface_depth[inappropriate_inds] = np.nan return rho_interface, gprime, interface_depth
def qqplot(data, distrib=ssd.norm, alpha=0.4, beta=0.4, fsp=None, plot_line=True, **kwargs): """ Returns a quantile-quantile plot with theoretical quantiles in abscissae, and experimental quantiles in ordinates. The experimental quantiles are estimated through the equation: .. math:: q_i = \\frac{i-\\alpha}{n-\\alpha-\\beta} where :math:`i` is the rank order statistic, :math:`n` the number of unmasked data and :math:`\\alpha` and :math:`\\beta` two parameters between 0 and 1. The default :math:`\\alpha=\\beta=0.4` gives approximately unbiased estimates of the quantiles. Parameters ---------- data : array Input data distrib : {norm, function}, optional Theoretical distribution used to compute the expected quantiles. If None, use a normal distribution. alpha : {float} optional Coefficient for the computation of plotting positions. beta : {float} optional Coefficient for the computation of plotting positions. fsp : {None, Subplot}, optional Subplot where to plot. If None, use the current axe. plot_line : {True, False} Whether to compute a regression line. Returns ------- plotted : :class:`matplotlib.lines.Line2D` Plotted data lines : :class:`matplotlib.lines.Line2D` Plotted regression line (a,b) : tuple Slope and intercept of the regression line. Notes ----- * The ``distrib`` parameter must be a function with a :meth:`.ppf` method. * The input data is ravelled beforehand. See Also -------- scipy.stats.mstats.mquantiles Computes quantiles from a population. """ data = np.ravel(data) qq = qqcalc(data, distrib=distrib, alpha=alpha, beta=beta) # if fsp is None: fsp = pyplot.gca() if not len(kwargs): kwargs.update(marker="o", c="k", ls="") plotted = fsp.plot(qq, data, **kwargs) # if plot_line: (a, b) = ma.polyfit(qq, data, 1) xlims = fsp.get_xlim() regline = np.polyval((a, b), xlims) lines = fsp.plot(xlims, regline, "k:") return (plotted, lines, (a, b)) return plotted
def qqplot(data, distrib=ssd.norm, alpha=.4, beta=.4, fsp=None, plot_line=True, **kwargs): """ Returns a quantile-quantile plot with theoretical quantiles in abscissae, and experimental quantiles in ordinates. The experimental quantiles are estimated through the equation: .. math:: q_i = \\frac{i-\\alpha}{n-\\alpha-\\beta} where :math:`i` is the rank order statistic, :math:`n` the number of unmasked data and :math:`\\alpha` and :math:`\\beta` two parameters between 0 and 1. The default :math:`\\alpha=\\beta=0.4` gives approximately unbiased estimates of the quantiles. Parameters ---------- data : array Input data distrib : {norm, function}, optional Theoretical distribution used to compute the expected quantiles. If None, use a normal distribution. alpha : {float} optional Coefficient for the computation of plotting positions. beta : {float} optional Coefficient for the computation of plotting positions. fsp : {None, Subplot}, optional Subplot where to plot. If None, use the current axe. plot_line : {True, False} Whether to compute a regression line. Returns ------- plotted : :class:`matplotlib.lines.Line2D` Plotted data lines : :class:`matplotlib.lines.Line2D` Plotted regression line (a,b) : tuple Slope and intercept of the regression line. Notes ----- * The ``distrib`` parameter must be a function with a :meth:`.ppf` method. * The input data is ravelled beforehand. See Also -------- scipy.stats.mstats.mquantiles Computes quantiles from a population. """ data = np.ravel(data) qq = qqcalc(data, distrib=distrib, alpha=alpha, beta=beta) # if fsp is None: fsp = pyplot.gca() if not len(kwargs): kwargs.update(marker='o', c='k', ls='') plotted = fsp.plot(qq, data, **kwargs) # if plot_line: (a, b) = ma.polyfit(qq, data, 1) xlims = fsp.get_xlim() regline = np.polyval((a, b), xlims) lines = fsp.plot(xlims, regline, 'k:') return (plotted, lines, (a, b)) return plotted
import numpy as np import numpy.ma as ma import matplotlib.pyplot as plt data = np.loadtxt('faulty_data.dat') plt.plot(data[:, 0], data[:, 1], 'o') x = data[:, 0] y = data[:, 1] mask = y > 35 y_m = ma.masked_array(y, mask) x_m = ma.masked_array(x, mask) fit_orig = np.polyfit(x, y, 2) fit_masked = ma.polyfit(x_m, y_m, 2) x_i = np.linspace(-6, 6, 35) plt.plot(x_i, np.polyval(fit_orig, x_i), label='original') plt.plot(x_i, np.polyval(fit_masked, x_i), label='masked') plt.legend() plt.show()
def run(self,**keyval): """ Method to calculate the continuum from the given masked spectrum. If search=True is given as an argument then the algorithm will iterate through the different order splines to find the best fit, based on noise level. Parameters ---------- keyval : dictionary Dictionary containing the keyword value pair arguments Returns ------- numpy array containing the best fit continuum Notes ----- Arguments for the run method: - search : bool, whether or not to search for the best fit. Default: False - deg : int, the degree of polynomial to use, Defualt: 1 """ # set up the data elements args = {"x": self.x, "y" : ma.fix_invalid(self.y,fill_value=0.0), # reverse the weights since a masked array uses True for good values # and UnivariateSpline needs a number. The reversal translates the # True values to False, which are then interpreted as 0.0 "w" : -self.y.mask} # get the given arguments search = False noise = None chisq = 3.0 if "search" in keyval: search = keyval["search"] if "noise" in keyval: noise = keyval["noise"] if "chisq" in keyval: maxchisq = keyval["chisq"] for arg in ["deg"]: if arg in keyval: args[arg] = keyval[arg] # if searching for the best fit # limited to 3rd order as 4th and 5th order could fit weak wide lines if search: chisq = {0:1000., 1:1000., 2:1000., 3:1000.} # iterate over each possible order for order in chisq: args["deg"] = order pfit = np.polyfit(**args) if len(pfit) == 1: numpar = 1 else: # find the number of free parameters # note that if a coefficient is << the max coefficient # it is not considered a free parameter as it has very little affect on the fit numpar = 0 mxpar = max(abs(pfit)) for i in range(len(pfit)): if abs(mxpar/pfit[i]) < 1000.: numpar += 1 fit = np.polyval(pfit,self.x) chisq[order] = (stats.reducedchisquared(self.y, fit, numpar, noise), numpar) # find the base fit, based on number of free parameters and chisq mv = 1000. order = 0 for k in chisq: if chisq[k][0] < mv and (mv - chisq[k][0]) / mv > 0.2: mv = chisq[k][0] order = k if mv > maxchisq: logging.warning("No good fit for continuum found") return None args["deg"] = mv logging.info("Using polynomial fit of order %i with chi^2 of %f" % (order, mv)) # do the final fit pfit = np.polyfit(**args) fit = np.polyval(pfit,self.x) else: # do the fit with the given parameters pfit = ma.polyfit(**args) fit = np.polyval(pfit,self.x) return fit