def plotting(file, ext): ts = pd.read_csv(file, header=[0, 1], index_col=[0]) ts.index = [dt.datetime.strptime(x, "%H:%M:%S").time() for x in ts.index] ts.columns.set_levels( [dt.datetime.strptime(x, "%Y-%m-%d").date() for x in ts.columns.levels[0].values], 0, inplace=True ) # gets used in the smoothing xtime = [int(x.hour) + int(x.minute) / 60 for x in ts.index] timeind = pd.date_range("00:00", "23:59", freq="min").to_pydatetime() ###Very important! # for each month in the data, construct a df of just that month for mo in set([x.month for x in ts.columns.levels[0]]): monthnum = mo motxt = time.strftime("%B", time.strptime(str(monthnum), "%m")) mocheck = [x.month == monthnum for x in ts.columns.levels[0]] moflag = [] for ans in mocheck: moflag.append(ans) moflag.append(ans) onemo = ts.loc[:, moflag] sleepy = onemo.xs("Sleep", level=1, axis=1).sum(axis=1) / (len(onemo.columns) / 2) eaty = onemo.xs("Eat", level=1, axis=1).sum(axis=1) / (len(onemo.columns) / 2) # begin plotting fig = plt.figure(figsize=(18, 6)) ax = fig.add_subplot(111) ####Plot Sleep filtereds = lowess(sleepy, xtime, is_sorted=True, frac=0.025, it=0) ax.plot(timeind, filtereds[:, 1], "b", linewidth=2, label="Sleeping") ax.fill_between(timeind, 0, filtereds[:, 1], alpha=0.3, facecolor="b") # ax.plot(ts.index,sleepy,'b',linewidth=2,label='Sleeping')#raw data, not smoothed # ax.fill_between(ts.index, 0, sleepy,alpha=0.3,facecolor='b') ####Plot Eat filterede = lowess(eaty, xtime, is_sorted=True, frac=0.025, it=0) ax.plot(timeind, filterede[:, 1], "orange", linewidth=2, label="Eating") ax.fill_between(timeind, 0, filterede[:, 1], alpha=0.3, facecolor="orange") # ax.plot(ts.index,eaty,'orange',linewidth=2,label='Eating') # ax.fill_between(ts.index, 0, eaty,alpha=0.3,facecolor='orange') ####Axis formatting xax = ax.get_xaxis() xax.set_major_locator(mdates.HourLocator(byhour=range(0, 24, 2))) xax.set_major_formatter(mdates.DateFormatter("%H:%M")) ax.set_title("Activity Fraction at a Given Time of Day", fontsize="xx-large") ax.text("16:00", 0.9, motxt, fontsize="xx-large", color="k", fontweight="bold") ax.legend(fontsize="x-large") ax.set_ylim(0, 1.1) fig.autofmt_xdate() filename = "b2_TimeSeries/Activity_" + str(monthnum) + "." + ext fig.savefig(filename) return
def loess(x,y,frac=0.2,it=None,scatter=True): from statsmodels.nonparametric.smoothers_lowess import lowess y = np.array(y) x = np.array(x) y = y[x.argsort()] # Sort y according to order of x. x.sort() # Sort x in place. if it is not None: # Helps if you are getting NaN's in the output. d = lowess(y,x,frac=frac,it=it) else: d = lowess(y,x,frac=frac) return d
def test_iter(self): rfile = os.path.join(rpath, "test_lowess_iter.csv") test_data = np.genfromtxt(open(rfile, "rb"), delimiter=",", names=True) expected_lowess_no_iter = np.array([test_data["x"], test_data["out_0"]]).T expected_lowess_3_iter = np.array([test_data["x"], test_data["out_3"]]).T actual_lowess_no_iter = lowess(test_data["y"], test_data["x"], it=0) actual_lowess_3_iter = lowess(test_data["y"], test_data["x"], it=3) assert_almost_equal(expected_lowess_no_iter, actual_lowess_no_iter, decimal=testdec) assert_almost_equal(expected_lowess_3_iter, actual_lowess_3_iter, decimal=testdec)
def test_frac(self): rfile = os.path.join(rpath, "test_lowess_frac.csv") test_data = np.genfromtxt(open(rfile, "rb"), delimiter=",", names=True) expected_lowess_23 = np.array([test_data["x"], test_data["out_2_3"]]).T expected_lowess_15 = np.array([test_data["x"], test_data["out_1_5"]]).T actual_lowess_23 = lowess(test_data["y"], test_data["x"], frac=2.0 / 3) actual_lowess_15 = lowess(test_data["y"], test_data["x"], frac=1.0 / 5) assert_almost_equal(expected_lowess_23, actual_lowess_23, decimal=testdec - 1) assert_almost_equal(expected_lowess_15, actual_lowess_15, decimal=testdec)
def test_iter(self): rfile = os.path.join(rpath, 'test_lowess_iter.csv') test_data = np.genfromtxt(open(rfile, 'rb'), delimiter = ',', names = True) expected_lowess_no_iter = np.array([test_data['x'], test_data['out_0']]).T expected_lowess_3_iter = np.array([test_data['x'], test_data['out_3']]).T actual_lowess_no_iter = lowess(test_data['y'], test_data['x'], it = 0) actual_lowess_3_iter = lowess(test_data['y'], test_data['x'], it = 3) assert_almost_equal(expected_lowess_no_iter, actual_lowess_no_iter, decimal = testdec) assert_almost_equal(expected_lowess_3_iter, actual_lowess_3_iter, decimal = testdec)
def test_frac(self): rfile = os.path.join(rpath, 'test_lowess_frac.csv') test_data = np.genfromtxt(open(rfile, 'rb'), delimiter = ',', names = True) expected_lowess_23 = np.array([test_data['x'], test_data['out_2_3']]).T expected_lowess_15 = np.array([test_data['x'], test_data['out_1_5']]).T actual_lowess_23 = lowess(test_data['y'], test_data['x'] ,frac = 2./3) actual_lowess_15 = lowess(test_data['y'], test_data['x'] ,frac = 1./5) assert_almost_equal(expected_lowess_23, actual_lowess_23, decimal = testdec-1) assert_almost_equal(expected_lowess_15, actual_lowess_15, decimal = testdec)
def test_delta(self): rfile = os.path.join(rpath, "test_lowess_delta.csv") test_data = np.genfromtxt(open(rfile, "rb"), delimiter=",", names=True) expected_lowess_del0 = np.array([test_data["x"], test_data["out_0"]]).T expected_lowess_delRdef = np.array([test_data["x"], test_data["out_Rdef"]]).T expected_lowess_del1 = np.array([test_data["x"], test_data["out_1"]]).T actual_lowess_del0 = lowess(test_data["y"], test_data["x"], frac=0.1) actual_lowess_delRdef = lowess(test_data["y"], test_data["x"], frac=0.1, delta=0.01 * np.ptp(test_data["x"])) actual_lowess_del1 = lowess(test_data["y"], test_data["x"], frac=0.1, delta=1.0 + 1e-10) assert_almost_equal(expected_lowess_del0, actual_lowess_del0, decimal=testdec) assert_almost_equal(expected_lowess_delRdef, actual_lowess_delRdef, decimal=testdec) assert_almost_equal(expected_lowess_del1, actual_lowess_del1, decimal=10) # testdec)
def compute_normalization(G_auto_conf, gctrack, conf, bdy): auto_bdy = bdy[21][1] print G_auto_conf.shape print "this takes time...." sys.stdout.flush() t0 = time.time() gceffect = np.zeros_like(G_auto_conf) ncells, nbins = G_auto_conf.shape for cell in xrange(ncells): if cell % 5 == 0: print cell, "cells", sys.stdout.flush() gceffect[cell, :] = lowess(G_auto_conf[cell], gctrack[0:auto_bdy][conf[0:auto_bdy]], frac=0.05, return_sorted=False) print print(time.time() - t0) / 60, "mins" gcnorm = gceffect / gceffect.mean(axis=1)[:, np.newaxis] gcnan = np.isnan(gcnorm) gcnorm[gcnan] = 1 gcnormprofile = G_auto_conf / gcnorm avgprofile = (gcnormprofile / gcnormprofile.mean(axis=1)[:, np.newaxis]).mean(axis=0) normprofile = gcnormprofile / avgprofile nannorm = np.isnan(normprofile) normprofile[nannorm] = gcnormprofile[nannorm] return gceffect, gcnormprofile, normprofile
def plot_spectras(df, outfile=None): """ Function to plot spectras. Parameters ---------- df: pd.DataFrame dataframe containing spectras outfile (optional): string filepath for saving plot Returns: -------- Pyplot figure and optionally saves figure to file """ # plot data spectra_fig = plt.figure(1) plt.plot(df.median(axis=1), 'k.', alpha=.05, label='median data with QC flag 0') # plot loess smoothed line smoothed = lowess(df.median(axis=1).values, df.index, is_sorted=True, frac=0.01, it=0) plt.plot(smoothed[40:, 0], smoothed[40:, 1], 'b', label='lowess fit') # tweak plot plt.xscale('log') plt.yscale('log') plt.xlabel('f (Hz)') plt.ylabel('spectra (T)') plt.legend() plt.tight_layout() # save plot if desired if outfile: plt.savefig(outfile, dpi=300, bbox_inches='tight')
def apply_lowess_filter(x, y): # The difference between lowess and loess seems pretty subtle, and possibly # more by convention that anything else. from statsmodels.nonparametric.smoothers_lowess import lowess z = lowess(y, x, is_sorted=True, frac=0.025, it=0) plt.plot(z[:, 0], z[:, 1], label="LOWESS")
def smooth(y, x='notDefined', frac=0.05): if lowess not in sys.modules: from statsmodels.nonparametric.smoothers_lowess import lowess if x == 'notDefined': x = range(len(y)) filtered = lowess(y, x, frac=frac) return filtered[:, 1]
def csv_each_gridcell(obs_data, annual_obs, rcp85, rebased_26, rebased_45, rebased_60, rebased_85): ''' Produce a csv file for each gridcell containing observations, lowess-smoothed observations, and the four RCP scenarios for each 1x1 lat/lon gridcell on the globe. Add a header line that includes the header metadata. ''' n = 0 for lat in range(0, 180): for lon in range(0, 360): obs = pd.DataFrame({ 'year': obs_data['years'], 'obs_anoms': annual_obs[lat, lon].round(2), 'uncertainty': obs_data['unc'][lat, lon].round(2) }) model = pd.DataFrame({ 'year': rcp85['years'], 'rcp26': rebased_26[lat, lon].round(2), 'rcp45': rebased_45[lat, lon].round(2), 'rcp60': rebased_60[lat, lon].round(2), 'rcp85': rebased_85[lat, lon].round(2) }) result = pd.merge(obs, model, how='outer', on=['year']) first_valid = result['obs_anoms'].first_valid_index() last_valid_obs = result['obs_anoms'].last_valid_index() bwidth = 10. / (last_valid_obs - first_valid) smoothed_data = lowess(result['obs_anoms'], result['year'], is_sorted=True, frac=bwidth) smooth = pd.DataFrame({ 'year': smoothed_data[:, 0], 'smoothed_anoms': smoothed_data[:, 1].round(2) }) result = pd.merge(result, smooth, how='outer', on=['year']) result = result[[ 'year', 'obs_anoms', 'smoothed_anoms', 'uncertainty', 'rcp26', 'rcp45', 'rcp60', 'rcp85' ]] header_lines = gridcell_metadata['full_name'][n] lat_label = obs_data['lats'][lat] lon_label = obs_data['lons'][lon] print 'Saving gridcell ' + str(n) + ' of 64800' os.chdir( '/Users/hausfath/Desktop/Climate Science/Carbon Brief/Warming Map/csvs/' ) result.to_csv('gridcell_' + str(lat_label) + '_' + str(lon_label) + '.csv', header=True, index=True, index_label=header_lines, encoding='utf-8') n += 1
def select_rdark(data, rdark_list_selection='intercept', pixel_range=[0, 1000], lowess_frac=0.5, pixel_idx=100): from numpy import isfinite, sort, linspace, nanmin from statsmodels.nonparametric.smoothers_lowess import lowess import acolite as pp dsorted = sort(data[isfinite(data)], axis=None) pixel_range[1] = nanmin((len(dsorted), pixel_range[1])) rdark_list = dsorted[pixel_range[0]:pixel_range[1]] if rdark_list_selection == 'intercept': xi = linspace(pixel_range[0], pixel_range[1], num=pixel_range[1] - pixel_range[0]) m, b, r, sm, sb = pp.shared.regression.lsqfity(xi, rdark_list) rdark_sel = b elif rdark_list_selection == 'smooth': xi = linspace(pixel_range[0], pixel_range[1], num=pixel_range[1] - pixel_range[0]) rdark_smooth = lowess(rdark_list, xi, frac=lowess_frac)[:, 1] rdark_sel = rdark_smooth[0] elif rdark_list_selection == 'absolute_index': rdark_sel = dsorted[pixel_idx] else: rdark_sel = dsorted[0] return (rdark_sel)
def plot(self, eid, xlab='sec'): jj = self['EID'] == eid y = self['Value'][jj] it = self['iteration'][jj] t = it * TAU par, err = fit_line(t, y) fit = lowess(y[0:None:100], t[0:None:100]) fig, ax = plt.subplots(1, 1) if xlab == 'sec': x = t elif xlab == 'turn': x = it par, err = (TAU * e for e in [par, err]) else: x = t xlab = 'sec' ax.plot(x, y, '.') ax.plot(fit[:, 0], fit[:, 1], '-k') ax.plot(x, par[0] + x * par[1], '-r', label=r'$slp = {:4.2e} \pm {:4.2e}$ [u/{}]'.format( par[1], err[1], xlab)) ax.set_ylabel(r'$\sum_i(\vec s_i, \bar n_{})$'.format(eid)) ax.set_xlabel(xlab) ax.ticklabel_format(axis='y', style='sci', scilimits=(0, 0), useMathText=True) ax.legend() return fig, ax
def predict(self, s_dimension_value, vol=False, window=3, frac=0.2): k = "predict/@%s/%s%d" % (self.ts, s_dimension_value, window) res_p = get_Cache(k) if res_p is not None: return res_p s_dimension_value = np.array(s_dimension_value, dtype=int).reshape(-1, 5) ser = self.value(s_dimension_value, window=True) # if ser.empty: if ser.size == 0: pred, stand, real = 0, 0, 0 else: ser_len = ser.shape[0] # win_size = min(window+1, ser_len) x = np.arange(ser_len).tolist() y = ser[:, 1] filtered = lowess(y, x, is_sorted=True, frac=frac, it=2) pred = filtered[:, 1][-1] real = ser[-1, 1] if ser_len <= 1: stand = 0 else: # stand = ser.std() stand = np.std(filtered[:, 1]) if not vol: res_pred = pred else: # print("pred, stand", pred, stand) res_pred = (pred, stand, real) set_Cache(k, res_pred) return res_pred
def _plot_smoothed_proportion( self, ax: plt.Axes, clusters: Sequence[Any], y_offset: Mapping[Any, float], alpha: float = 0.8, ) -> Tuple[Mapping[Any, np.ndarray], Mapping[Any, PolyCollection]]: start_t, end_t = self._cmat.columns.min(), self._cmat.columns.max() x = np.array(self._cmat.columns) # fitting # extrapolation e = np.linspace(start_t, end_t, int(1 + (end_t - start_t) * 100)) smoothed_proportion, handles = {}, {} for clust in clusters: y = self._cmat.loc[clust] f = interp1d(x, y) fe = f(e) lo = lowess(fe, e, frac=0.3, is_sorted=True, return_sorted=False) smoothed_proportion[clust] = lo handles[clust] = ax.fill_between( e, y_offset[clust] + lo, y_offset[clust] - lo, color=self.cmap[clust], label=clust, alpha=alpha, edgecolor=None, ) return smoothed_proportion, handles
def plot(self, df, dataset_name, k, eps, l, estimator, yscale): sampling_freq = 100 for i in range(1, len(df.columns)): x_val = y_val = [] x_val = df.iloc[:, 0] y_val = df.iloc[:, i] filtered = lowess(y_val, x_val, is_sorted=True, frac=float(sampling_freq) / len(x_val), it=3) plt.plot(filtered[:, 0], filtered[:, 1], linewidth=1, label='k={0}'.format(df.columns[i])) # plt.plot(filtered[np.argmax(filtered[:, 1])][0], max(filtered[:, 1]), 'x') # plt.plot(x_val, y_val) # plt.plot(x_val, y_val, 'or') if yscale and yscale == 'log': plt.yscale('log', basey=10) plt.ylabel(estimator.split('.')[0]) plt.xlabel(df.columns[0]) plt.title('{0}: (eps={2})'.format( dataset_name.split('\\')[-1], k, eps, l)) plt.legend() plt.grid(True, lw=0.5, ls='--', c='.75') plt.margins(0.005) plt.show()
def ResidFitted(fitted_model, residuals = None, fits = None, ax = None): """ Parameters --------------------------------------------------------- fitted_model: A fitted linear regression model from the statsmodels package. Class: <statsmodels.regression.linear_model.OLS> residuals: A pandas series of the OLS residuals fits: A pandas series of the fitted values from the OLS model ax: A specific matplotlib axis. Used if creating subplots Returns --------------------------------------------------------- ax: A matplotlib axis object By: Jason Sadowski Date: 2019-11-19 """ if isinstance(residuals,type(None)): residuals = fitted_model.resid if isinstance(fits,type(None)): fits = fitted_model.fittedvalues top3 = abs(residuals).sort_values(ascending = False)[:3] smoothed = lowess(residuals,fits) if isinstance(ax,type(None)): fig, ax = plt.subplots() ax.scatter(fits, residuals, edgecolors = 'k', facecolors = 'none') ax.plot(smoothed[:,0],smoothed[:,1],color = 'r') ax.set_ylabel('Residuals') ax.set_xlabel('Fitted Values') ax.set_title('Residuals vs. Fitted') ax.plot([min(fits),max(fits)],[0,0],color = 'k',linestyle = ':') for i in top3.index: ax.annotate(i, xy = (fits[i],residuals[i])) return(ax)
def residualsVsFitted(results, axes=None): residuals = results.resid fitted = results.fittedvalues smoothed = lowess(residuals,fitted) top3 = abs(residuals).sort_values(ascending = False)[:3] if axes == None: plt.rcParams.update({'font.size': 16}) plt.rcParams["figure.figsize"] = (8,7) fig, ax = plt.subplots() ax.scatter(fitted, residuals, edgecolors = 'k', facecolors = 'none') ax.plot(smoothed[:,0],smoothed[:,1],color = 'r') ax.set_ylabel('Residuals') ax.set_xlabel('Fitted Values') ax.set_title('Residuals vs. Fitted') ax.plot([min(fitted),max(fitted)],[0,0],color = 'k',linestyle = ':', alpha = .3) for i in top3.index: ax.annotate(i,xy=(fitted[i],residuals[i])) plt.show() else: axes.scatter(fitted, residuals, edgecolors = 'k', facecolors = 'none') axes.plot(smoothed[:,0],smoothed[:,1],color = 'r') axes.set_ylabel('Residuals') axes.set_xlabel('Fitted Values') axes.set_title('Residuals vs. Fitted') axes.plot([min(fitted),max(fitted)],[0,0],color = 'k',linestyle = ':', alpha = .3) for i in top3.index: axes.annotate(i,xy=(fitted[i],residuals[i]))
def separating_threshold(bins, ys): F = 0.1 # Different to review paper, see line 120: https://github.com/ellesec/burstanalysis/blob/master/Burst_detection_methods/logisi_pasq_method.R s_xs_ys = lowess(ys, bins, F, it=0, delta=0.0, is_sorted=True) xs, ys = s_xs_ys[:, 0], s_xs_ys[:, 1] peaks = find_peaks( ys, distance=2 )[0] # Distance set according to supplementary information, Pasquale et al. 2010 x_peaks = xs[peaks] if peaks.size > 1: # indices of peaks in first 100ms peaks_100ms = peaks[np.where(x_peaks < 100)[0]] if peaks_100ms.size == 0: # require a peak in first 100ms raise ValueError("Didn't find a burst") # find index of max peak in the early peaks max_peak_100ms_ind = np.argmax( ys[peaks_100ms]) # index of peak in array of peaks max_peak_100ms = peaks_100ms[ max_peak_100ms_ind] # index of max early peak in xs,ys VOID_THRESH = 0.7 for i in range(max_peak_100ms_ind + 1, len(peaks)): p2 = peaks[i] local_min_ind = max_peak_100ms + np.argmin( ys[max_peak_100ms:p2 + 1]) # local min between peaks local_min_i = ys[local_min_ind] void = 1 - local_min_i / np.sqrt( ys[max_peak_100ms] * ys[p2]) # void param if void > VOID_THRESH: # require void larger than thresh return max_peak_100ms, p2, local_min_ind, void, s_xs_ys raise ValueError("Didn't find a burst")
def scaleLocationPlot(results, axes=None): fitted = results.fittedvalues student_residuals = results.get_influence().resid_studentized_internal sqrt_student_residuals = pd.Series(np.sqrt(np.abs(student_residuals))) sqrt_student_residuals.index = results.resid.index smoothed = lowess(sqrt_student_residuals,fitted) top3 = abs(sqrt_student_residuals).sort_values(ascending = False)[:3] if axes == None: fig, ax = plt.subplots() ax.scatter(fitted, sqrt_student_residuals, edgecolors = 'k', facecolors = 'none') ax.plot(smoothed[:,0],smoothed[:,1],color = 'r') ax.set_ylabel('$\sqrt{|Studentized \ Residuals|}$') ax.set_xlabel('Fitted Values') ax.set_title('Scale-Location') ax.set_ylim(0,max(sqrt_student_residuals)+0.1) for i in top3.index: ax.annotate(i,xy=(fitted[i],sqrt_student_residuals[i])) plt.show() else: axes.scatter(fitted, sqrt_student_residuals, edgecolors = 'k', facecolors = 'none') axes.plot(smoothed[:,0],smoothed[:,1],color = 'r') axes.set_ylabel('$\sqrt{|Studentized \ Residuals|}$') axes.set_xlabel('Fitted Values') axes.set_title('Scale-Location') axes.set_ylim(0,max(sqrt_student_residuals)+0.1) for i in top3.index: axes.annotate(i,xy=(fitted[i],sqrt_student_residuals[i]))
def lowess(data, frac=0.15, it=0): return smoothers_lowess.lowess( endog=data, exog=list(range(len(data))), frac=frac, it=it )[:, 1]
def bounds_peaks(self): """Finds max/min bounds by tracing along peak locations then smoothing""" peaks_pos, _ = find_peaks(self._resp_trace, height=0) peaks_neg, _ = find_peaks(-1 * self._resp_trace, height=0) xx = np.linspace(0, len(self._resp_trace) - 1, len(self._resp_trace)) conn_pos = np.interp(xx, peaks_pos, self._resp_trace[peaks_pos] + (0.3 * self._resp_trace[peaks_pos])) conn_neg = np.interp(xx, peaks_neg, self._resp_trace[peaks_neg] + (0.3 * self._resp_trace[peaks_neg])) smooth = len(self._resp_trace) * 30e-5 lim_pos = lowess(conn_pos, xx, is_sorted=True, frac=smooth)[:, 1] lim_neg = lowess(conn_neg, xx, is_sorted=True, frac=smooth)[:, 1] return lim_pos, lim_neg
def plot_volume(self, lw=1, smoothing=False): plt.title('Market Volume') plt.xlabel('Iteration') plt.ylabel('Quantity') if not smoothing: plt.plot(self.info.iterations, self.info.excess_volume(), color='black', lw=lw, label='inventory') else: plt.plot(self.info.iterations, lowess(self.info.excess_volume(), self.info.iterations, return_sorted=False), color='black', lw=lw, label='inventory (loess)') plt.plot(self.info.iterations, self.info.sum_quantity('bid'), color='green', lw=lw, label='bids') plt.plot(self.info.iterations, self.info.sum_quantity('ask'), color='red', lw=lw, label='asks') plt.legend() plt.show()
def component_wise_fit_LLE(inframe, column, frac=0.2, method='lowess'): ''' 'method' can be 'lowess' or 'LLE'. ''' from statsmodels.nonparametric.smoothers_lowess import lowess frame = inframe.swaplevel('Row', 'Identifier') frame['Identifier'] = frame['Identifier'].map(ord) - 97 for comp in frame.index.levels[0]: cframe = frame.loc[[comp]]#.dropna(subset=[column]) print 'Component: ', comp cnum = cframe['Identifier'][0] % 12 rows = cframe.index.get_level_values('Row') if method == 'LLE': LLE = KernelReg(frame.loc[comp][column], rows, 'c', bw='cv_ls') means, mfx = LLE.fit() elif method == 'lowess': LLE = lowess(cframe[column], rows, it=10, missing='none', frac=frac) means = LLE[:,1] frame.loc[[comp], column+'_means'] = means plt.plot(frame.loc[comp][column], rows, 's', color=Paired.hex_colors[cnum], zorder=1) plt.plot(means, rows, '-', lw=3, color=Paired.hex_colors[cnum], zorder=2) frame[column+'_LLEresids'] = frame[column] - frame[column+'_means'] return frame[[column, column+'_means', column+'_LLEresids']]
def smooth1(y, x): return lowess(y, x + 1e-12 * np.random.randn(len(x)), frac=2.0 / 3, it=0, delta=1.0, return_sorted=True)
def smoothChunks(mvt, chunkSize): smoothed = np.zeros(mvt.shape) nPts = float(mvt.shape[0]) nChunks = int(math.ceil(nPts / chunkSize)) for chunk in range(nChunks): print 'Smoothing chunk {} of {}.'.format(chunk, nChunks - 1) start_pos = (chunkSize * chunk) end_pos = chunkSize * (chunk + 1) if end_pos > nPts: end_pos = int(nPts) print('start: {}; end: {}'.format(start_pos, end_pos)) mvtChunk = mvt[start_pos:end_pos] smoothChunk = smoo.lowess(mvtChunk, range(len(mvtChunk)), it=2, frac=0.005, return_sorted=False) smoothChunk[smoothChunk < 0.] = 0. smoothed[start_pos:end_pos] = smoothChunk return smoothed
def test_simple(self): x = np.arange(20, dtype='float32') #standard normal noise noise = np.array([-0.76741118, -0.30754369, 0.39950921, -0.46352422, -1.67081778, 0.6595567 , 0.66367639, -2.04388585, 0.8123281 , 1.45977518, 1.21428038, 1.29296866, 0.78028477, -0.2402853 , -0.21721302, 0.24549405, 0.25987014, -0.90709034, -1.45688216, -0.31780505]) y = x + noise # R output out = [-0.6260344553, 0.565071712, 1.759627189, 2.9579633258, 4.1560636154, 5.3473396937, 6.522298218, 7.708159388, 8.8759055519, 9.9409758603, 10.8981138458, 11.7851424728, 12.6188717297, 13.4098497374, 14.1516996585, 14.9180658147, 15.6956600199, 16.4783034134, 17.2617441531, 18.0459201716] expected_lowess = np.array([x, out]).T actual_lowess = lowess(y,x) assert_almost_equal(expected_lowess, actual_lowess)
def Sf(time, flux, ferr, **kwargs): sortedd = kwargs.get('sort_data', True) frac = kwargs.get('fraction_rate', 0.03) it = kwargs.get('iterations', 3) rmswin = kwargs.get('points_window', 13) svgwin = int(rmswin * 3) flux_orig = np.copy(flux) flux_orig2 = np.copy(flux) flux_savgol = SavGol(flux, win = svgwin) sigma2 = Scatter(flux_savgol / np.nanmedian(flux_savgol), remove_outliers = True, win = rmswin) sigma = np.ones(40) * 3. for i in range(len(sigma)): if i > 0: not_nan = np.logical_not(np.isnan(flux_orig2)) indices = np.arange(len(flux_orig2)) interp = interp1d(indices[not_nan], flux_orig2[not_nan], kind = 'nearest', bounds_error = False, fill_value = 'extrapolate') flux_orig2 = interp(indices) filtered = lowess(flux_orig2, time, is_sorted = sortedd, frac = frac, it = it) time_filter = filtered[:, 0] flux_filter = filtered[:, 1] std = np.std(flux_orig2 - flux_filter) if std < sigma2: break index = np.where(abs(flux_orig2 - flux_filter) > sigma[i] * std)[0] np.put(flux_orig2, index, np.nan) return flux_orig / flux_filter, ferr / flux_filter
def add_lowess(ax, lines_idx=0, frac=.2, **lowess_kwargs): """ Add Lowess line to a plot. Parameters ---------- ax : matplotlib Axes instance The Axes to which to add the plot lines_idx : int This is the line on the existing plot to which you want to add a smoothed lowess line. frac : float The fraction of the points to use when doing the lowess fit. lowess_kwargs Additional keyword arguments are passes to lowess. Returns ------- fig : matplotlib Figure instance The figure that holds the instance. """ y0 = ax.get_lines()[lines_idx]._y x0 = ax.get_lines()[lines_idx]._x lres = lowess(y0, x0, frac=frac, **lowess_kwargs) ax.plot(lres[:, 0], lres[:, 1], 'r', lw=1.5) return ax.figure
def evaluate_MAP(qty, weights, bins, smooth='kde', lowess_frac=0.3, bw_method='scott', vb=False): post, xaxis = np.histogram(qty, weights=weights, bins=bins) xaxis_centers = xaxis[0:-1] + np.mean(np.diff(xaxis)) if smooth == 'lowess': a = lowess(post, xaxis_centers, frac=lowess_frac) MAP = a[np.argmax(a[0:, 1]), 0] elif smooth == 'kde': a = gaussian_kde(qty, bw_method=bw_method, weights=weights) MAP = xaxis[np.argmax(a.evaluate(xaxis))] else: MAP = xaxis[np.argmax(post) + 1] if vb == True: areapost = np.trapz(x=xaxis_centers, y=post) plt.plot(xaxis_centers, post / areapost) if smooth == 'lowess': plt.plot(a[0:, 0], a[0:, 1] / areapost) elif smooth == 'kde': plt.plot(xaxis, a.pdf(xaxis)) plt.plot([MAP, MAP], plt.ylim()) plt.show() return MAP
def add_lowess(ax, lines_idx=0, frac=.2, **lowess_kwargs): """ Add Lowess line to a plot. Parameters ---------- ax : matplotlib Axes instance The Axes to which to add the plot lines_idx : int This is the line on the existing plot to which you want to add a smoothed lowess line. frac : float The fraction of the points to use when doing the lowess fit. lowess_kwargs Additional keyword arguments are passes to lowess. Returns ------- fig : matplotlib Figure instance The figure that holds the instance. """ y0 = ax.get_lines()[lines_idx]._y x0 = ax.get_lines()[lines_idx]._x lres = lowess(y0, x0, frac=frac, **lowess_kwargs) ax.plot(lres[:, 0], lres[:, 1], 'r', lw=1.5) return ax.figure
def plot_spectrum(self, ax=None, save=False, filtered=False, frac=0.025, fill=True): # the axes (ax) param is so that this function can be used within another function # i.e. this can be used to plot to an external figure -- just pass that fig's axes object as ax # when you call this function if ax is None: plt.figure() ax = plt.gca() ax.set_title("Spectrum") ax.set_xlabel("Wavelength (nm)") ax.set_ylabel("Normalized Intensity (AU)") xs = [x[0] for x in self.data_points] ys = [x[1] for x in self.data_points] if not filtered: ax.plot(xs, ys, 'b--', label=self.dataname) # filtered plot could be usefull when plotting the whole spectrum -- pretty noisy else: filtered = lowess(ys, xs, is_sorted=True, frac=frac, it=0) ax.plot(filtered[:, 0], filtered[:, 1], label='filtered data') # fill in the integral regions defined by peaks and spacing if fill: fill_point_1 = self.get_range(self.peaks[0], self.peaks[0] + self.spacing) fill_point_2 = self.get_range(self.peaks[1], self.peaks[1] + self.spacing) fill_xs_1 = [x[0] for x in fill_point_1] fill_ys_1 = [x[1] for x in fill_point_1] fill_xs_2 = [x[0] for x in fill_point_2] fill_ys_2 = [x[1] for x in fill_point_2] ax.fill_between(fill_xs_1, fill_ys_1, color='lightblue', label='Region I') ax.fill_between(fill_xs_2, fill_ys_2, color='orange', label='Region II') ax.set_ylim(ymin=0) ax.margins(0.05) ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) if save: ax.savefig(self.filename[:-4])
def get_l_res(arr): """calculates residuals from a lowess model for timecourse designs""" res = [] for row in arr: ys = lowess(row, self.tpoints, it=1)[:, 1] res.append(row - ys) return np.array(res)
def resid_fit(lm): """Draw Residuals vs. Fitted Values Plot.""" model_values = select_model_type(lm) # Calculate values for scatter points fitted = model_values.get_fitted_values() residuals = model_values.get_residuals() # Calculate lowess for smoothing line grid, yhat = lowess(residuals, fitted).T # Get top three observations for annotation top_3 = np.abs(residuals).argsort()[-3:][::1] # Draw scatter and lowess line plt.plot([fitted.min(), fitted.max()], [0, 0], "k:") plt.plot(grid, yhat, "r-") plt.plot(fitted, residuals, "o", mec=edge_col, markeredgewidth=1, fillstyle="none") # Draw Annotations for point in top_3: plt.annotate(point, xy=(fitted[point], residuals[point]), color="r") # Set Labels plt.title("Residual vs. Fitted", fontsize=title_size) plt.xlabel("Fitted values") plt.ylabel("Residuals") return plt
def plot_doppler(x, tx_carrier_freq, beacon_carrier_freq): # Doppler plt.figure() dop1 = (tx_carrier_freq[:, 0] - tx_carrier_freq[:, 1]) dop2 = (beacon_carrier_freq[:, 0] - beacon_carrier_freq[:, 1]) dop_bin = (dop1 - dop2) / 2.0 # 50 km/h doppler = 433e6 * ((3e8 + 50/3.6) / 3e8 - 1) # = 50/3.6 / 3e8 * 433e6 hz_per_bin = 2.4e6 / 16384 dop = dop_bin * hz_per_bin * 3e8 / 433e6 * 3.6 dop_outliers = is_outlier(dop) dopx = x[~dop_outliers] dop_smooth = lowess(dop[~dop_outliers], dopx, is_sorted=True, frac=0.025, it=0) plt.plot(dopx, dop[~dop_outliers], 'r', linewidth=0.2, alpha=0.5) plt.plot(dop_smooth[:, 0], dop_smooth[:, 1], 'b') plt.ylabel('Doppler shift (km/h)') plt.xlabel('TX timestamp at RX0 (s)') plt.grid() plt.tight_layout()
def loweesfilter(): low_smooth = lowess(acc_read_variable["ax"], np.arange(acc_read_variable["time"].shape[0]), frac=0.01) return low_smooth
def test_simple(self): x = np.arange(20, dtype='float32') #standard normal noise noise = np.array([ -0.76741118, -0.30754369, 0.39950921, -0.46352422, -1.67081778, 0.6595567, 0.66367639, -2.04388585, 0.8123281, 1.45977518, 1.21428038, 1.29296866, 0.78028477, -0.2402853, -0.21721302, 0.24549405, 0.25987014, -0.90709034, -1.45688216, -0.31780505 ]) y = x + noise # R output out = [ -0.6260344553, 0.565071712, 1.759627189, 2.9579633258, 4.1560636154, 5.3473396937, 6.522298218, 7.708159388, 8.8759055519, 9.9409758603, 10.8981138458, 11.7851424728, 12.6188717297, 13.4098497374, 14.1516996585, 14.9180658147, 15.6956600199, 16.4783034134, 17.2617441531, 18.0459201716 ] expected_lowess = np.array([x, out]).T actual_lowess = lowess(y, x) assert_almost_equal(expected_lowess, actual_lowess)
def local_fit(y): """ LOWESS fit of the data (set to 1 week fraction). Gives better view than rolling avg """ x = np.arange(len(y)) f = lowess(y, x, frac=1 / 7.) return f[:, 1]
def regression_plot(Z,X,band_names=None,visible_only=True,figsize=(12,7)): """ Produce a figure with a plot for each image band that displays the relationship between depth and radiance and gives a visual representation of the regression carried out in the `slopes` and `regressions` methods. Notes ----- This method doesn't come directly from Lyzenga 1978 but the author of this code found it helpful. Parameters ---------- Z : np.ma.MaskedArray Array of depth values repeated for each band so that Z.shape==X.shape. The mask needs to be the same too so that Z.mask==X.mask for all the bands. X : np.ma.MaskedArray The array of log transformed radiance values from equation B1 of Lyzenga 1978. Returns ------- figure A matplotlib figure. """ if band_names is None: band_names = ['Band'+str(i+1) for i in range(X.shape[-1])] nbands = X.shape[-1] if np.atleast_3d(Z).shape[-1] == 1: Z = np.repeat(np.atleast_3d(Z), nbands, 2) if visible_only: fig, axs = plt.subplots( 2, 3, figsize=figsize) else: fig, axs = plt.subplots( 2, 4, figsize=figsize ) regs = regressions(Z,X) for i, ax in enumerate(axs.flatten()): if i > nbands-1: continue slp, incpt, rval = regs[:,i] # print X.shape, Z.shape x, y = equalize_array_masks(Z[...,i], X[...,i]) if x.count() < 2: continue x, y = x.compressed(), y.compressed() # print "i = {}, x.shape = {}, y.shape = {}".format(i, x.shape, y.shape) ax.scatter( x, y, alpha=0.1, edgecolor='none', c='gold' ) smth = lowess(y,x,frac=0.2) # ax.plot(smth.T[0],smth.T[1],c='black',alpha=0.5) ax.plot(smth.T[0],smth.T[1],c='black',alpha=0.5,linestyle='--') reglabel = "m=%.2f, r=%.2f" % (slp,rval) f = lambda x: incpt + slp * x ax.plot( x, f(x), c='brown', label=reglabel, alpha=1.0 ) ax.set_title( band_names[i] ) ax.set_xlabel( r'Depth (m)' ) ax.set_ylabel( r'$X_i$' ) ax.legend(fancybox=True, framealpha=0.5) plt.tight_layout() return fig
def test_simple(self): rfile = os.path.join(rpath, "test_lowess_simple.csv") test_data = np.genfromtxt(open(rfile, "rb"), delimiter=",", names=True) expected_lowess = np.array([test_data["x"], test_data["out"]]).T actual_lowess = lowess(test_data["y"], test_data["x"]) assert_almost_equal(expected_lowess, actual_lowess, decimal=testdec)
def test_delta(self): rfile = os.path.join(rpath, 'test_lowess_delta.csv') test_data = np.genfromtxt(open(rfile, 'rb'), delimiter = ',', names = True) expected_lowess_del0 = np.array([test_data['x'], test_data['out_0']]).T expected_lowess_delRdef = np.array([test_data['x'], test_data['out_Rdef']]).T expected_lowess_del1 = np.array([test_data['x'], test_data['out_1']]).T actual_lowess_del0 = lowess(test_data['y'], test_data['x'], frac=0.1) actual_lowess_delRdef = lowess(test_data['y'], test_data['x'], frac=0.1, delta = 0.01 * np.ptp(test_data['x'])) actual_lowess_del1 = lowess(test_data['y'], test_data['x'], frac = 0.1, delta = 1.0 + 1e-10) assert_almost_equal(expected_lowess_del0, actual_lowess_del0, decimal = testdec) assert_almost_equal(expected_lowess_delRdef, actual_lowess_delRdef, decimal = testdec) assert_almost_equal(expected_lowess_del1, actual_lowess_del1, decimal = 10) #testdec)
def generate(name, fname, x='x', y='y', out='out', kwargs=None, decimal=7): kwargs = {} if kwargs is None else kwargs data = np.genfromtxt(os.path.join(rpath, fname), delimiter=',', names=True) assert_almost_equal.description = name if callable(kwargs): kwargs = kwargs(data) result = lowess(data[y], data[x], **kwargs) expect = np.array([data[x], data[out]]).T assert_almost_equal(result, expect, decimal)
def test_simple(self): rfile = os.path.join(rpath, 'test_lowess_simple.csv') test_data = np.genfromtxt(open(rfile, 'rb'), delimiter = ',', names = True) expected_lowess = np.array([test_data['x'], test_data['out']]).T actual_lowess = lowess(test_data['y'], test_data['x']) assert_almost_equal(expected_lowess, actual_lowess, decimal = testdec)
def DeTrend(TheCandData,TheMDI): '''' Fit a lowess to the data and remove the curve ''' ''' default smoothing looks ok on test ''' ''' Works on grids as well as vectors ''' sizee=np.shape(TheCandData) if (len(sizee) < 2): gots=np.where(TheCandData > TheMDI)[0] los=lowess(TheCandData[gots],range(len(TheCandData[gots])))[:,1] TheCandData[gots]=TheCandData[gots]-los else: for ltt in range(len(TheCandData[0,:,0])): for lnn in range(len(TheCandData[0,0,:])): gots=np.where(TheCandData[:,ltt,lnn] > TheMDI)[0] if (len(gots) > 12): los=lowess(TheCandData[gots,ltt,lnn],range(len(TheCandData[gots,ltt,lnn])))[:,1] TheCandData[gots,ltt,lnn]=TheCandData[gots,ltt,lnn]-los else: TheCandData[:,ltt,lnn]=TheMDI return TheCandData # DETREND
def generate(name, fname, x='x', y='y', out='out', kwargs={}, decimal=7): data = np.genfromtxt( os.path.join(rpath, fname), delimiter=',', names=True) assert_equal_at_testdec = partial( assert_almost_equal, decimal=decimal) assert_equal_at_testdec.description = name if callable(kwargs): kwargs = kwargs(data) result = lowess(data[y], data[x], **kwargs) expect = np.array([data[x], data[out]]).T return assert_equal_at_testdec, result, expect
def stl_loess(ts, loess_frac=0.2): ts.OriginalReading.interpolate(inplace=True) # if there are NAs x_stl = sm.tsa.seasonal_decompose(ts.OriginalReading.values, freq=95) # Apply loess filter to get the trend trend_loess = lowess(ts.OriginalReading.values, ts.index, frac=loess_frac)[:,1] x_ts = ts.OriginalReading.values seasonal = x_stl.seasonal # the seasonality is fine x_ts -= seasonal # remove seasonality remainder = x_ts - trend_loess # remove trend, similar to x_stl.resid but complete return remainder
def test_iter(self): x = np.arange(20, dtype='float32') #cauchy noise noise = np.array([ 1.86299605, -0.10816866, 1.87761229, -3.63442237, 0.30249022, 1.03560416, 0.21163349, 1.14167809, -0.00368175, -2.08808987, 0.13065417, -1.8052207 , 0.60404596, -2.30908204, 1.7081412 , -0.54633243, -0.93107948, 1.79023999, 1.05822445, -1.04530564]) y = x + noise # R output out = [0.6264479483, 1.5008396363, 2.3861761926, 3.2716390242, 4.1397266375, 4.9926614002, 5.9062225, 6.8541464784, 7.8163358136, 8.6684661827, 9.5321215273, 10.4655376106, 11.469691774, 12.612670578, 13.8080457514, 14.9355218409, 16.0491183613, 17.1604998952, 18.2739171976, 19.3834268539] expected_lowess_no_iter = np.array([x, out]).T out = [1.1091939965, 1.9662338415, 2.8223436958, 3.6741660675, 4.5153163696, 5.3483205165, 6.2127611584, 7.0371035909, 7.8823844068, 8.7036783127, 9.5698728732, 10.5011237563, 11.4924301926, 12.6180333554, 13.8056705213, 14.9280791108, 16.0363681325, 17.1426206341, 18.2516511313, 19.3581200948] expected_lowess_3_iter = np.array([x, out]).T actual_lowess_no_iter = lowess(y,x,it=0) actual_lowess_3_iter = lowess(y,x,it=3) assert_almost_equal(expected_lowess_no_iter, actual_lowess_no_iter) assert_almost_equal(expected_lowess_3_iter, actual_lowess_3_iter)
def smoothEstimates(points): #First we take the points that are passed and turn them into x and y [x, y] = points #Since our x's are dates, we turn them into Epoch timestamps x = [time.mktime(p.timetuple()) for p in x] #We want the lowess model to take into account the closest 10 points when smoothing, meaning we pass it frac. frac = 10.0/len(x) #We then pass the points to a lowess smoother, which considers frac% of points at a time. smoothed = smooth.lowess(y, x, frac=frac, is_sorted=True) #We change the epoch timestamps back into datetime objects and assign that to x x = [datetime.datetime.fromtimestamp(p[0]) for p in smoothed] #We then assign the smoothed estimates to y y = [p[1] for p in smoothed] #Now we recombine the x and y and pass it back as one list. smoothed = [x, y] return smoothed
def plot_residuals(self, Hz_fraction=15, fname=None, bbox_inches='tight', **kwargs): f_total = np.max(self.f) - np.min(self.f) frac = min(Hz_fraction / f_total, 1) self.residuals_lowess = lowess(self.reduced_residuals, self.f, frac=frac, return_sorted=False) fig, ax = plt.subplots() ax.plot(self.f, self.reduced_residuals, 'b.', alpha=0.5) ax.plot(self.f, self.residuals_lowess, 'g-', linewidth=2) ax.set_xlabel("Frequency [Hz]") ax.set_ylabel("Reduced Residual") if fname is not None: fig.tight_layout() fig.savefig(fname, bbox_inches=bbox_inches, **kwargs)
def test_lowess(self): if skip_lowess: raise SkipTest frac = 0.5 it = 1 data = self.s.data.copy() for i in range(data.shape[0]): data[i, :] = lowess( endog=data[i, :], exog=self.s.axes_manager[-1].axis, frac=frac, it=it, is_sorted=True, return_sorted=False,) self.s.smooth_lowess(smoothing_parameter=frac, number_of_iterations=it,) nose.tools.assert_true(np.allclose(data, self.s.data))
def lowess_fit(spec, lams, frac=0.05, it=5): '''Fit a spectrum using a Locally Weighted Scatterplot Smoothing approach. Wraps around statsmodels.nonparametric.smoothers_lowess.lowess(). :Args: spec: 1-D numpy array The input spectrum. lams: 1-D numpy array The corresponding wavelength array. frac: float [default:0.05] Between 0 and 1. The fraction of the data used when estimating each y-value. [From the statsmodel lowess function] it: int [default:5] The number of residual-based reweightings to perform. [From the statsmodel lowess function] :Returns: out: 1-D array The fitted array, with size equal to spec. :Notes: This function fits a spectrum using a LOWESS (Locally Weighted Scatterplot Smoothing) technique, described in: Cleveland, W.S. (1979) Robust Locally Weighted Regression and Smoothing Scatterplots. Journal of the American Statistical Association 74 (368): 829-836. This is robust to outliers (hot pixels, cosmics), and is also efficient to ignore emission lines. frac=0.05 and it=5 seem to work very fine for spectra of any SNR, both lousy with no continuum, and good ones in the center of galaxies - modulo the stellar absorption features which are of course "ignored" by the LOWESS routine. ''' # Only do the fit if there is some signal. Avoid an ugly warning in the prompt. if np.all(np.isnan(spec)): fit = np.zeros_like(spec) * np.nan else: fit = lowess(spec,lams,frac=frac, it=it, is_sorted=True, missing = 'drop', return_sorted=False) return fit # ----------------------------------------------------------------------------------------
def bike_scatter(df, cols): import matplotlib.pyplot as plt import statsmodels.nonparametric.smoothers_lowess as lw ## Loop over the columns and create the scatter plots for col in cols: ## first compute a lowess fit to the data los = lw.lowess(df['cnt'], df[col], frac = 0.3) ## Now make the plots fig = plt.figure(figsize=(8, 6)) fig.clf() ax = fig.gca() df.plot(kind = 'scatter', x = col, y = 'cnt', ax = ax, alpha = 0.05) plt.plot(los[:, 0], los[:, 1], axes = ax, color = 'red') ax.set_xlabel(col) ax.set_ylabel('Number of bikes') ax.set_title('Number of bikes vs. ' + col) return 'Done'
def _noise_estimate_spectrum(spectrum, nb_split=20): """Private function to estimate the noise in a spectrum. Parameters ---------- spectrum : ndarray, shape (n_samples) Spectrum from which the noise has to be estimated. nb_split : int, option (default=20) The number of regions splitting each spectrum Returns ------- sigma : float, The estimate of the noise standard deviation. """ # Check if we will be able to make a split nb_elt_out = spectrum.size % nb_split if nb_elt_out > 0: spectrum = spectrum[:-nb_elt_out] # Split the arrays into multiple sections sections = np.array(np.split(spectrum, nb_split)) # Compute the mean and variance for each section mean_sec = [] var_sec = [] for sec in sections: mean_sec.append(np.mean(sec)) var_sec.append(np.var(sec)) out = lowess(np.array(var_sec), np.array(mean_sec), frac=.9, it=0) mean_reg = out[:, 0] var_reg = out[:, 1] # Find the value for a zero mean intensity or the nearest to zero idx_null_mean = _find_nearest(mean_reg, 0.) return np.sqrt(var_reg[idx_null_mean])
def correct(inputs, fasta, frac_n=0.1, frac_r=0.0001, lowess_iter=3, lowess_frac=0.1): """ GC-correct input bed lines. GC correction takes place with a local regression (LOWESS) on GC perc vs number of reads :param inputs: list of BedLine namedtuples :param fasta: instance of pyfaidx.Fasta :param frac_n: maximal fraction on N-bases per bin :param frac_r: minimum fraction of reads per bin :param lowess_iter: amount of iterations of LOWESS function :param lowess_frac: fraction of input data used for LOWESS function :return: corrected BedLines """ reads = [] gcs = [] for line in inputs: if filter_bin(line, fasta, frac_n, frac_r): gcs.append(get_gc_for_bin(fasta, line.chromosome, line)) reads.append(line.value) reads = np.array(reads, np.float) gcs = np.array(gcs, np.float) if lowess_frac*len(reads) < 4 and len(reads) > 0: # need at least four data ponts warnings.warn("Too few data points for lowess. Raising lowess_frac") lowess_frac = 4.0/len(reads) delta = 0 # remove delta in this case else: delta = 0.01 * len(gcs) lowess = statlow.lowess(reads, gcs, return_sorted=False, delta=delta, frac=lowess_frac, it=lowess_iter).tolist() corrected_lines = [] for line in inputs: if filter_bin(line, fasta, frac_n, frac_r): corr_val = float(line.value) / lowess.pop(0) else: corr_val = 0 n_bed = BedLine(line.chromosome, line.start, line.end, corr_val) corrected_lines.append(n_bed) return corrected_lines
def test_lowess(self, parallel): pytest.importorskip("statsmodels") from statsmodels.nonparametric.smoothers_lowess import lowess frac = 0.5 it = 1 data = np.asanyarray(self.s.data, dtype='float') for i in range(data.shape[0]): data[i, :] = lowess( endog=data[i, :], exog=self.s.axes_manager[-1].axis, frac=frac, it=it, is_sorted=True, return_sorted=False,) self.s.smooth_lowess(smoothing_parameter=frac, number_of_iterations=it, show_progressbar=None, parallel=parallel) np.testing.assert_allclose(self.s.data, data, rtol=self.rtol, atol=self.atol)
def test_options(self): rfile = os.path.join(rpath, 'test_lowess_simple.csv') test_data = np.genfromtxt(open(rfile, 'rb'), delimiter = ',', names = True) y, x = test_data['y'], test_data['x'] res1_fitted = test_data['out'] expected_lowess = np.array([test_data['x'], test_data['out']]).T # check skip sorting actual_lowess1 = lowess(y, x, is_sorted=True) assert_almost_equal(actual_lowess1, expected_lowess, decimal=13) # check skip missing actual_lowess = lowess(y, x, is_sorted=True, missing='none') assert_almost_equal(actual_lowess, actual_lowess1, decimal=13) # check order/index, returns yfitted only actual_lowess = lowess(y[::-1], x[::-1], return_sorted=False) assert_almost_equal(actual_lowess, actual_lowess1[::-1, 1], decimal=13) # check integer input actual_lowess = lowess(np.round(y).astype(int), x, is_sorted=True) actual_lowess1 = lowess(np.round(y), x, is_sorted=True) assert_almost_equal(actual_lowess, actual_lowess1, decimal=13) assert_(actual_lowess.dtype is np.dtype(float)) # this will also have duplicate x actual_lowess = lowess(y, np.round(x).astype(int), is_sorted=True) actual_lowess1 = lowess(y, np.round(x), is_sorted=True) assert_almost_equal(actual_lowess, actual_lowess1, decimal=13) assert_(actual_lowess.dtype is np.dtype(float)) # check with nans, this changes the arrays y[[5, 6]] = np.nan x[3] = np.nan actual_lowess1[[3, 5, 6], 1] = np.nan actual_lowess = lowess(y, x, is_sorted=True) assert_almost_equal(actual_lowess1, actual_lowess1, decimal=13) assert_raises(ValueError, lowess, y, x, missing='raise')
def run_lowess(X, Y, frac=0.75, missing="none"): """ Y ~ X lowess. Parameters: ----------- X: X values Y: Y values frac: fraction of data used to estimate each y-value. missing: how to handle missing values (by default "drop" them). """ X[utils.where_null(X)] = np.nan Y[utils.where_null(Y)] = np.nan # Lowess takes Y values first fitted_Y = lowess(Y, X, return_sorted=False, frac=frac, missing=missing) return fitted_Y
def test_simple(self): x = np.arange(20, dtype='float32') #standard normal noise noise = np.array([-0.76741118, -0.30754369, 0.39950921, -0.46352422, -1.67081778, 0.6595567 , 0.66367639, -2.04388585, 0.8123281 , 1.45977518, 1.21428038, 1.29296866, 0.78028477, -0.2402853 , -0.21721302, 0.24549405, 0.25987014, -0.90709034, -1.45688216, -0.31780505]) y = x + noise expected_lowess = np.array([[ 0. , -0.58337912], [ 1. , 0.61951246], [ 2. , 1.82221628], [ 3. , 3.02536876], [ 4. , 4.22667951], [ 5. , 5.42387723], [ 6. , 6.60834945], [ 7. , 7.7797691 ], [ 8. , 8.91824348], [ 9. , 9.94997506], [ 10. , 10.89697569], [ 11. , 11.78746276], [ 12. , 12.62356492], [ 13. , 13.41538492], [ 14. , 14.15745254], [ 15. , 14.92343948], [ 16. , 15.70019862], [ 17. , 16.48167846], [ 18. , 17.26380699], [ 19. , 18.0466769 ]]) actual_lowess = lowess(y,x) assert_almost_equal(expected_lowess, actual_lowess)
def compile_lowess(self): filenames = [f for f in os.listdir(PREDICTIONS_DIR) if f.endswith(".txt") and not "lowess" in f] for f in filenames: wordclass = f.split(".")[0] fh = open(os.path.join(PREDICTIONS_DIR, f), "r") data_points = [[float(c) for c in l.strip().split("\t")] for l in fh.readlines()] fh.close() data_points = self.sample_data_points([d for d in data_points if d[3] > 0]) x = numpy.array([l[0] for l in data_points]) y = numpy.array([l[3] for l in data_points]) results = lowess(y, x, frac=LOWESS_FRACTION, it=LOWESS_ITERATIONS) outfile = os.path.join(PREDICTIONS_DIR, "%s_lowess.txt" % wordclass) with open(outfile, "w") as fh: seen = set() for r in results: sig = "%0.3g\t%0.3g\n" % (r[0], r[1]) if sig in seen: pass else: fh.write(sig) seen.add(sig)