def continuum_fit(wave, flux, error, iteration_num): wave_fit = wave flux_fit = flux error_fit = error print('continum_fit_line17, wave_fit\n', wave_fit) print('continum_fit_line18, flux_fit\n', flux_fit) print('continum_fit_line19, error_fit\n', error_fit) fit_lo = lowess(wave_fit, flux_fit, f=0.02, iter=1) index_overflow = (fit_lo != 0.0) flux_fit = flux_fit[index_overflow] wave_fit = wave_fit[index_overflow] error_fit = error_fit[index_overflow] print('continum_fit_line24, fit_lo\n', fit_lo) print('continum_fit_line25, index_overflow\n', index_overflow) print('continum_fit_line26, flux_fit\n', flux_fit) print('continum_fit_line27, wave_fit\n', wave_fit) print('continum_fit_line28, error_fit\n', error_fit) ddd = signal.medfilt(flux_fit / fit_lo, kernel_size=41) #51 fit_lo = fit_lo * ddd f = interpolate.interp1d(wave_fit, fit_lo) fit_final = f(wave) print('continum_fit_line36, ddd\n', ddd) print('continum_fit_line37, fit_lo\n', fit_lo) print('continum_fit_line40, f\n', f) print('continum_fit_line40, fit_final\n', fit_final) return fit_final
def PGRainSlim(xlimmax=None, ylimmax=None, outFile=None, fileformat=None, RainRate5mm=None, PGtip=None, slope=None, intercept=None): plt.clf() fig = plt.figure() #plt.suptitle("Raindrop Charge: " + outFile) pgrain = fig.add_subplot(111) pgrain.scatter(RainRate5mm, PGtip) pgrain.set_xlabel("Rain Rate (mm/hr)") pgrain.set_ylabel("Potential Gradient (V/m)") pgrain.grid() pgrain.set_xlim(-.1,5) pgrain.set_ylim(-200, ylimmax) pgrain.invert_yaxis() pgrain.plot(np.arange(-.1, xlimmax+0.3, 0.2),np.arange(-.1, xlimmax+0.3, 0.2)*slope+intercept) PGRainsort = np.array(sorted(zip(RainRate5mm, PGtip))) eps = sys.float_info.epsilon pgrain.plot(PGRainsort[:,0], lowess(PGRainsort[:,0]+eps, PGRainsort[:,1]+eps, 0.5)) x0, x1 = pgrain.get_xlim() y0, y1 = pgrain.get_ylim() pgrain.set_aspect(np.abs((x1-x0)/(y1-y0))) plt.savefig('Plots/Ensemble/' + outFile + "." + fileformat) plt.close(fig)
def testInvalidX(self): ''' Try calling the function with an invalid x, and check that an exception is raised. ''' xNaN = self.knownResults['x'].copy() xNaN[5] = np.NaN xNonNumeric = self.knownResults['x'].copy() xNonNumeric[5] = 'invalid' xBool = self.knownResults['x'].copy() xBool[5] = True xLong = self.knownResults['x'].copy().append( pd.Series([0.1], index=['a'])) xIndex = self.knownResults['x'].copy() xIndex.index = [i + 100 for i in range(len(xIndex))] invalids = [ self.knownResults['x'].to_numpy(), 'a', True, xNaN, xNonNumeric, xBool, xLong, xIndex ] for x in invalids: with self.assertRaises(LowessError): _ = lowess.lowess(x, self.knownResults['y'], bandwidth=0.2, polynomialDegree=1)
def testOrder(self): ''' Shuffle the rows and check that the result is unchanged. ''' result = lowess.lowess(self.knownResults['x'], self.knownResults['y'], bandwidth=0.5, polynomialDegree=1) result2 = lowess.lowess(self.knownResults['x'].sample(frac=1), self.knownResults['y'].sample(frac=1), bandwidth=0.5, polynomialDegree=1) eql = all([ abs((result[i] - result2[i]) / (result[i] + result2[i])) < 1e-10 for i in result.index ]) self.assertTrue(eql, 'Results equal on shuffle.')
def testInvalidBandwidth(self): ''' Try calling the function with an invalid bandwidth, and check that an exception is raised. ''' invalids = [-0.1, 1.2, '0.7', True, [0.1], None, 1, 0] for bw in invalids: with self.assertRaises(LowessError): _ = lowess.lowess(self.knownResults['x'], self.knownResults['y'], bandwidth=bw, polynomialDegree=1)
def testInvalidPolynomialDegree(self): ''' Try calling the function with an invalid polynomialDegree, and check that an exception is raised. ''' invalids = [-1, 1.2, '1', True, [1], None, len(self.knownResults) + 1] for dg in invalids: with self.assertRaises(LowessError): _ = lowess.lowess(self.knownResults['x'], self.knownResults['y'], bandwidth=0.2, polynomialDegree=dg)
def testDuplicateIndex(self): ''' Try calling the function with a ducplicated index, and check that an exception is raised. ''' df = self.knownResults.rename( {self.knownResults.index[1]: self.knownResults.index[0]}) with self.assertRaises(LowessError): _ = lowess.lowess(df['x'], df['y'], bandwidth=0.2, polynomialDegree=1)
def test_lowess(): """ Test 1-d local linear regression with lowess """ np.random.seed(1984) for kernel in [lo.epanechnikov, lo.tri_cube]: for robust in [True, False]: x = np.random.randn(100) f = np.sin(x) x0 = np.linspace(-1, 1, 10) f_hat = lo.lowess(x, f, x0, kernel=kernel, l=1.0, robust=robust) f_real = np.sin(x0) npt.assert_array_almost_equal(f_hat, f_real, decimal=1)
def test_lowess2d(): """ Test the 2D case """ for l in [0.2,1.0]: for kernel in [lo.epanechnikov, lo.tri_cube]: for robust in [True, False]: x = np.random.randn(2, 100) f = -1 * np.sin(x[0]) + 0.5 * np.cos(x[1]) x0 = np.mgrid[-1:1:.1, -1:1:.1] x0 = np.vstack([x0[0].ravel(), x0[1].ravel()]) f_hat = lo.lowess(x, f, x0, kernel=kernel, l=l, robust=robust) f_real = -1 * np.sin(x0[0]) + 0.5 * np.cos(x0[1]) npt.assert_array_almost_equal(f_hat, f_real, decimal=1)
def testValidData(self): ''' Call lowess with valid data, and check that an exception is not raised. ''' try: _ = lowess.lowess(self.knownResults['x'], self.knownResults['y'], bandwidth=0.2, polynomialDegree=1) except Exception: raised = True else: raised = False self.assertFalse(raised, 'Exception not raised')
def testValidData(self): ''' Call lowess with valid data, and check that an exception is not raised. ''' try: _ = lowess.lowess(self.knownResults['x'], self.knownResults['y'], bandwidth=0.2, polynomialDegree=1) except Exception: logger.warning("Unexpected error:", sys.exc_info()[0]) raised = True else: raised = False self.assertFalse(raised, 'Exception not raised')
def SLAM1(semitones): #this takes a sequence of semitones and applies the SLAM1 stylization display=False #first, smooth the semitones curves using LOWESS if 100<len(semitones): r = int(len(semitones)/100.0) semitones = list(np.array(semitones)[::r]) t = np.array(range(len(semitones)))/float(len(semitones)) if 10<len(semitones): import lowess smooth = lowess.lowess(t,semitones) else: smooth = semitones start = smooth[0] stop = smooth[-1] style = relst2register(start) style+= relst2register(stop) #identify prominence. Either max or min maxdiffpositive = np.max([x-max(start,stop) for x in smooth]) maxdiffnegative = np.abs(np.min([x-min(start,stop) for x in smooth])) if maxdiffpositive > maxdiffnegative: #the max is further from boundaries than the min is extremum = maxdiffpositive posextremum = np.argmax(smooth) else: extremum = maxdiffnegative posextremum = np.argmin(smooth) if extremum>2: style+=relst2register(smooth[posextremum]) if t[posextremum] < 0.3: style+='1' elif t[posextremum] < 0.7: style+='2' else: style+='3' style = ''.join(style) if display: pl.plot(semitones,'b') pl.hold(True) pl.plot(smooth,'r') pl.title(style) pl.show() return (style,smooth)
def testKnownResults(self): ''' Test the function against known STATA results ''' correct = [] for deg in [0, 1]: for i in range(1, 10): bwidth = 0.1 * i tmp = lowess.lowess(self.knownResults['x'], self.knownResults['y'], bandwidth=bwidth, polynomialDegree=deg) col = 'y_Stata_{}_{}'.format(i, deg) self.knownResults correct.append( max( abs((self.knownResults[col] - tmp) / (self.knownResults[col] + tmp))) < 1e-5) self.assertTrue(all(correct), 'Results the same as STATA.')
def test_lowess3d(): """ Test local linear regression in 3d with lowess """ np.random.randn(2006) xyz = np.mgrid[0:1:.1, 0:1:.1, 0:1:.1] x, y, z = xyz[0].ravel(), xyz[1].ravel(), xyz[2].ravel() xyz = np.vstack([x, y, z]) # w = f(x,y,z) w = -1 * np.sin(x) + 0.5 * np.cos(y) + np.cos(z) # Random sample of x,y,z combinations (between -1 and 1): xyz0 = np.vstack([np.random.rand(2), np.random.rand(2), np.random.rand(2)]) # lowess3d is used to find the values at these sampling points: w0 = lo.lowess(xyz, w, xyz0) # evaluate f(x,y,z) in the uniformly sampled points: w0actual = -1 * np.sin(xyz0[0]) + 0.5 * np.cos(xyz0[1]) + np.cos(xyz0[2]) # This will be undefined in many places npt.assert_array_almost_equal(w0, w0actual, decimal=1)
def testLowess(x, y): from lowess import lowess smoothy = [] f = [] base = 0.005 for i in range(8): base = base + i*0.015 f.append(base) smoothy.append( lowess(x, y, f=base, iter=1) ) fig = plt.gcf() pl.clf() pl.plot(x, y, label='Raw Data') for i in range(len(smoothy)): pl.plot(x, smoothy[i], label='Smooth('+str(f[i])+')') pl.legend() pl.savefig('./imgs/lowess') print "Successfully created picture file lowess.png"
def testLowess(x, y): from lowess import lowess smoothy = [] f = [] base = 0.005 for i in range(8): base = base + i * 0.015 f.append(base) smoothy.append(lowess(x, y, f=base, iter=1)) fig = plt.gcf() pl.clf() pl.plot(x, y, label='Raw Data') for i in range(len(smoothy)): pl.plot(x, smoothy[i], label='Smooth(' + str(f[i]) + ')') pl.legend() pl.savefig('./imgs/lowess') print "Successfully created picture file lowess.png"
def test_lowess3d(): """ Test local linear regression in 3d with lowess """ xyz = np.mgrid[0:1:.1,0:1:.1,0:1:.1] x,y,z = xyz[0].ravel(),xyz[1].ravel(),xyz[2].ravel() xyz = np.vstack([x,y,z]) # w = f(x,y,z) w = -1 * np.sin(x) + 0.5 * np.cos(y) + np.cos(z) # Random sample of x,y,z combinations (between -1 and 1): xyz0=np.vstack([np.random.rand(2),np.random.rand(2),np.random.rand(2)]) # lowess3d is used to find the values at these sampling points: w0 = lo.lowess(xyz,w,xyz0) # evaluate f(x,y,z) in the uniformly sampled points: w0actual =-1 * np.sin(xyz0[0]) + 0.5 * np.cos(xyz0[1]) + np.cos(xyz0[2]) # This will be undefined in manay places npt.assert_array_almost_equal(w0, w0actual, decimal=1)
def test_lowess2d(): """ Test the 2D case """ np.random.seed(1977) for l in [1.0, 2.0]: for kernel in [lo.epanechnikov, lo.tri_cube]: for robust in [True, False]: for deg in [1, 3]: x = np.random.randn(2, 100) f = -1 * np.sin(x[0]) + 0.5 * np.cos(x[1]) x0 = np.mgrid[-1:1:.1, -1:1:.1] x0 = np.vstack([x0[0].ravel(), x0[1].ravel()]) f_hat = lo.lowess(x, f, x0, deg=deg, kernel=kernel, l=l, robust=robust) f_real = -1 * np.sin(x0[0]) + 0.5 * np.cos(x0[1]) npt.assert_array_almost_equal(f_hat, f_real, decimal=0)
def _smooth_impl_mass(self, bandwidth): """Perform 1D LOWESS smoothing along the mass dimension. Treat different bins in the angle independently. Given bandwidth is the half of the size of the window expressed in mass bins. """ num_bins_angle = self.nominal.shape[1] # Smooth the deviation in each angular bin independently smooth_average_deviation = np.empty_like(self.average_deviation) for bin_angle in range(num_bins_angle): deviation_slice = self.average_deviation[bin_angle] smooth_average_deviation[bin_angle] = lowess( range(len(deviation_slice)), deviation_slice, bandwidth, weights=1 / self.average_deviation_unc2) return smooth_average_deviation
def testInvalidY(self): ''' Try calling the function with an invalid y, and check that an exception is raised. ''' yNaN = self.knownResults['y'].copy() yNaN[5] = np.NaN yNonNumeric = self.knownResults['y'].copy() yNonNumeric[5] = 'invalid' yBool = self.knownResults['y'].copy() yBool[5] = 'True' yLong = self.knownResults['y'].copy().append( pd.Series([0.1], index=['a'])) invalids = [ self.knownResults['y'].to_numpy(), 'a', True, yNaN, yNonNumeric, yBool, yLong ] for y in invalids: with self.assertRaises(LowessError): _ = lowess.lowess(self.knownResults['x'], y, bandwidth=0.2, polynomialDegree=1)
x = 4*np.pi*np.random.rand(2000) x = np.sort(x) y = f(x) + 1.0 * np.random.randn(x.shape[0]) x0 = np.linspace(0,4*np.pi,100) # Scikit-Learn t0 = time() clf = KernelRidge(kernel='rbf', gamma=0.1, degree=5) clf.fit(x[:,None], y) f_kernelridge = clf.predict(x0[:,None]) print("Scikit-Learn: ", time()-t0) # Lowess GitHub library t0 = time() f_lowess = lowess(x, y, x0, deg=2, l=0.5) print("Lowess GitHub library: ", time()-t0) # Statsmodels t0 = time() res = statslowess(y, x, return_sorted=True, frac=0.1, it=0) x_stats = res[:,0] f_stats = res[:,1] print("Statsmodels: ", time()-t0) # Loess from PyPI t0 = time() x_loess, f_loess, w_loess = loess_1d(x, y, degree=2, frac=0.1, x0=x0) print("Loess for PyPI: ", time()-t0) plt.plot(x, y, '.', markersize=1)
if __name__ == "__main__": ''' Smooth a noisy sine signal using LOWESS varing the bandwidth used to select the set of local points for the regression. The smaller bandwidths will fit noise. The larger bandwidths will miss the oscillating signal. ''' # seed the random number generator so results reproducible np.random.seed((1, 2, 3)) # Generate some noisy data x = np.arange(-10, 5, 0.1) y = np.sin(x) - 0.5 + np.random.random(len(x)) # Create a Pandas DataFrame with the data df = pd.DataFrame({'x': x, 'Raw': y}) # Smooth the data for i in ['0.05', '0.1', '0.3', '0.5', '0.7']: label = 'bandwidth={}'.format(i) df[label] = lowess.lowess(df['x'], df['Raw'], bandwidth=float(i), polynomialDegree=1) # Plot the data plot(df, 'x', sys.argv[0].replace('.py', '.png'))
def fit_lowess ( x, t=None, span=0.3, iter=4 ): if t is None: t = np.linspace(0,1,len(x)) base = lowess.lowess(t, x, f=span, iter=iter) return { 'baseline':base, 'signal': x - base, 't':t }
def fit_lowess(x, t=None, span=0.3, iter=4): if t is None: t = np.linspace(0, 1, len(x)) base = lowess.lowess(t, x, f=span, iter=iter) return {'baseline': base, 'signal': x - base, 't': t}
if __name__ == "__main__": ''' Smooth a noisy quadratic signal using LOWESS varing the degree of the polynomial used in the regression. The 0th and 1st order polynomials will miss the large scal curvature. The higher order polynomials will over fit (high frequency oscillations.) ''' # seed the random number generator so results reproducible np.random.seed((1, 2, 3)) # Generate some noisy data x = np.arange(-10, 10, 0.1) y = 1.0 + 2.0 * x + 0.3 * x**2 + 20.0 * np.random.random(len(x)) # Create a Pandas DataFrame with the data df = pd.DataFrame({'x': x, 'Raw': y}) # Smooth the data for i in range(5): label = 'polynomialDegree={}'.format(i) df[label] = lowess.lowess(df['x'], df['Raw'], bandwidth=0.3, polynomialDegree=i) # Plot the data plot(df, 'x', sys.argv[0].replace('.py', '.png'))
sondages_array = [] for sondage in sondages: sondages_array.append(sondage) sondages_df = pandas.DataFrame(sondages_array) SMOOTHING = 23 STD_ERROR = 1.35 CANDIDATES = ['Fillon2', 'LePen', 'Melenchon', 'Macron'] for CANDIDATE in CANDIDATES: plot_x = (sondages_df['DaysBefore'].values.astype(int) * -1) values_candidate = sondages_df[CANDIDATE].values.astype(float) middle_moving = movingaverage(values_candidate, SMOOTHING) yest = lowess(plot_x, values_candidate, f=0.8) movingaverage_candidate = yest # Old version # np.concatenate( # (np.linspace( # np.average(values_candidate[:SMOOTHING // 2 - 1]), # middle_moving[0], # num=(SMOOTHING // 2), endpoint=True), # middle_moving, # np.linspace( # np.average(values_candidate[- 1 * (SMOOTHING // 2 - 1):]), # middle_moving[-1], # num=(SMOOTHING // 2), endpoint=True)) # )
RainRateBinLimit[0] = 0.5*PGRRsort[(len(PGRR)/(bincount+30*k)), 0] for i in range(1,bincount+30*(k)): RainRateBinLimit[i] = 0.5*(PGRRsort[(len(PGRR)/(bincount+30*k)*i), 0]-PGRRsort[(len(PGRR)/(bincount+30*k)*(i-1)), 0])+PGRRsort[(len(PGRR)/(bincount+30*k)*(i-1)), 0] print(RainRateBinLimit) ############################################################################ else: sys.exit("Please select either the Mean (1) or Median (2) case.") print("Bin Counts", PGTipPosition) #Calculation of the linear regression model along with statistical parameters. slope[k], intercept[k], r_value[k], p_value[k], std_err[k] = stats.linregress(RainRateBinLimit, yvalue) #print("RainRateBinLimit", RainRateBinLimit) #print("yvalue", yvalue) for m in xrange(len(lowess(RainRateBinLimit+eps, yvalue+eps, 1/2))): lowessval[k,m] = lowess(RainRateBinLimit+eps, yvalue+eps, 1/2)[m] if loop == 10: PGRainEnsembleMulti(np.max(RainRateBinLimit)+0.2, np.max(yvalue)+0.2, "PGEnsembleMulti" + str(amethod) + str(bincount), "png", RainRateBinLimit, yvalue, lowessval) print(slope, intercept, r_value, p_value, std_err) print("P-Value: ", p_value) print("R^2 Value: ", r_value**2) print("Standard Error: ", std_err) #print(lowessval[0,:]) PGEnsembleData = zip(RainRateBinLimit, PGTipBinMedianFinal) with open("processeddata/PGEnsembleData.csv", "wb") as output:
def PGRainFull(xlimmax=None, ylimmax=None, outFile=None, fileformat=None, RainRate5mm=None, TimeTip5mm=None, timekeep=None, PG=None, PGtip=None, slope=None, intercept=None, p_value=None, r_value=None, pearson_cor=None, std_err=None, mann_wht=None): plt.clf() fig = plt.figure() #plt.suptitle("Raindrop Charge: " + outFile) pgrain = fig.add_subplot(222) pgrain.scatter(RainRate5mm, PGtip) pgrain.set_xlabel("Rain Rate (mm/hr)") pgrain.set_ylabel("Potential Gradient (V/m)") pgrain.grid() pgrain.set_xlim(-.1,xlimmax) pgrain.set_ylim(-1050, ylimmax) pgrain.invert_yaxis() pgrain.plot(np.arange(-.1, xlimmax+0.3, 0.2),np.arange(-.1, xlimmax+0.3, 0.2)*slope+intercept) PGRainsort = np.array(sorted(zip(RainRate5mm, PGtip))) eps = sys.float_info.epsilon pgrain.plot(PGRainsort[:,0], lowess(PGRainsort[:,0]+eps, PGRainsort[:,1]+eps, 1/2)) x0, x1 = pgrain.get_xlim() y0, y1 = pgrain.get_ylim() pgrain.set_aspect(np.abs((x1-x0)/(y1-y0))) #PG Plot pg = fig.add_subplot(221) pg.plot(timekeep,PG) pg.set_xlabel("Time (hrs)") pg.set_xlim(np.min(TimeTip5mm),np.max(TimeTip5mm)) pg.set_ylim(-1050, ylimmax) pg.invert_yaxis() #pg.axes.get_yaxis().set_visible(False) pg.grid() x0, x1 = pg.get_xlim() y0, y1 = pg.get_ylim() pg.set_aspect(np.abs((x1-x0)/(y1-y0))) #Rain plot rain = fig.add_subplot(224) rain.plot(RainRate5mm,TimeTip5mm) rain.set_ylabel("Time (hrs)") rain.set_ylim(np.min(TimeTip5mm),np.max(TimeTip5mm)) rain.set_xlim(-.1,xlimmax) rain.grid() x0, x1 = rain.get_xlim() y0, y1 = rain.get_ylim() rain.set_aspect(np.abs((x1-x0)/(y1-y0))) #Info Plot info = fig.add_subplot(223) info.axis('off') info.text(-0.1, .9, '$Year and Day$', fontsize=15) info.text(-0.1, .75, '$P-Value$: ', fontsize=15) info.text(-0.1, .6, '$R^2$: ', fontsize=15) info.text(-0.1, .45, "$Pearson's Cor$: ", fontsize=15) info.text(-0.1, .3, "$Standard Error$: ", fontsize=15) info.text(-0.1, .15, "$Mann-Whitney$: ", fontsize=15) info.text(0.6, .9, outFile, fontsize=15) info.text(0.6, .75, round(p_value,7), fontsize=15) info.text(0.6, .6, round(r_value**2,5), fontsize=15) info.text(0.6, .45, round(pearson_cor[1],5), fontsize=15) info.text(0.6, .3, round(std_err,5), fontsize=15) info.text(0.6, .15, round(mann_wht,5), fontsize=15) x0, x1 = info.get_xlim() y0, y1 = info.get_ylim() info.set_aspect(np.abs((x1-x0)/(y1-y0))) plt.tight_layout(pad=0.4, w_pad=-0.5, h_pad=0.5) plt.savefig('plots/new/' + outFile + "." + fileformat) plt.close(fig)
def PGRainFull(xlimmax=None, ylimmax=None, outFile=None, fileformat=None, RainRate5mm=None, TimeTip5mm=None, timekeep=None, PG=None, PGtip=None, slope=None, intercept=None, p_value=None, r_value=None, pearson_cor=None, std_err=None, mann_wht=None): "Plot 3 subplots all of which completment the main focus, i.e. (1) PG vs." "Rain Rate along with side plots for (2) Rain Rate and (3) PG between the" "times that charged rain was detected. Statistical information was also " "added in the remaining quadrant to fill the white space but can easily " "be removed if neseccary." plt.clf() fig = plt.figure() #plt.suptitle("Raindrop Charge: " + outFile) pgrain = fig.add_subplot(222) pgrain.scatter(RainRate5mm, PGtip) pgrain.set_xlabel("Rain Rate (mm/hr)") pgrain.set_ylabel("Potential Gradient (V/m)") pgrain.grid() pgrain.set_xlim(-.1,xlimmax) pgrain.set_ylim(-1050, ylimmax) pgrain.invert_yaxis() pgrain.plot(np.arange(-.1, xlimmax+0.3, 0.2),np.arange(-.1, xlimmax+0.3, 0.2)*slope+intercept) PGRainsort = np.array(sorted(zip(RainRate5mm, PGtip))) eps = sys.float_info.epsilon try: pgrain.plot(PGRainsort[:,0], lowess(PGRainsort[:,0]+eps, PGRainsort[:,1]+eps, 1/2)) except: print("LOWESS: Singular Matrix!") x0, x1 = pgrain.get_xlim() y0, y1 = pgrain.get_ylim() pgrain.set_aspect(np.abs((x1-x0)/(y1-y0))) #PG Plot pg = fig.add_subplot(221) pg.plot(timekeep,PG) pg.set_xlabel("Time (hrs)") pg.set_xlim(np.min(TimeTip5mm),np.max(TimeTip5mm)) pg.set_ylim(-1050, ylimmax) pg.invert_yaxis() #pg.axes.get_yaxis().set_visible(False) pg.grid() x0, x1 = pg.get_xlim() y0, y1 = pg.get_ylim() pg.set_aspect(np.abs((x1-x0)/(y1-y0))) #Rain plot rain = fig.add_subplot(224) rain.plot(RainRate5mm,TimeTip5mm) rain.set_ylabel("Time (hrs)") rain.set_ylim(np.min(TimeTip5mm),np.max(TimeTip5mm)) rain.set_xlim(-.1,xlimmax) rain.grid() x0, x1 = rain.get_xlim() y0, y1 = rain.get_ylim() rain.set_aspect(np.abs((x1-x0)/(y1-y0))) #Info Plot info = fig.add_subplot(223) info.axis('off') info.text(-0.1, .9, '$Year and Day$', fontsize=15) info.text(-0.1, .75, '$P-Value$: ', fontsize=15) info.text(-0.1, .6, '$R^2$: ', fontsize=15) info.text(-0.1, .45, "$Pearson's Cor$: ", fontsize=15) info.text(-0.1, .3, "$Standard Error$: ", fontsize=15) info.text(-0.1, .15, "$Mann-Whitney$: ", fontsize=15) info.text(0.6, .9, outFile, fontsize=15) info.text(0.6, .75, round(p_value,7), fontsize=15) info.text(0.6, .6, round(r_value**2,5), fontsize=15) info.text(0.6, .45, round(pearson_cor[1],5), fontsize=15) info.text(0.6, .3, round(std_err,5), fontsize=15) info.text(0.6, .15, round(mann_wht,5), fontsize=15) x0, x1 = info.get_xlim() y0, y1 = info.get_ylim() info.set_aspect(np.abs((x1-x0)/(y1-y0))) plt.tight_layout(pad=0.4, w_pad=-0.5, h_pad=0.5) plt.savefig('Plots/new_v4/' + outFile + "." + fileformat) plt.close(fig) return
weightedAverageActivated = True correlatedActivated = True ## First option: ## local regression F_FOR_CANDIDAT = 0.3 F_FOR_PETIT = 0.67 LR_AVG = pandas.Series(index=candidats) for candidat in candidats: # x = np.array(df_clean['DaysBefore']) x = np.linspace(0.0, 1.0, num=len(df_clean)) y = np.array(df_clean[candidat]) f = F_FOR_PETIT if candidat in petits else F_FOR_CANDIDAT candidat_lowess = lowess(x, y, f=f) LR_AVG[candidat] = candidat_lowess[-1] # plt.plot(x,candidat_lowess, label=candidat) # plt.legend(loc='best') # plt.show() ## Second option: ## Weight by time, sample size (hl means half-life) TIME_HL_FACTOR = 4 # By how much the weigh will be divided by TIME_HL_DURATION = 5 # In days INSTITUTE_HL_PENALTY = 4 # By how much the weigh is divided by # for each new survey by the same polling company FACTOR_SAMPLESIZE = 0.4 # Time
STD_ERROR = get_deviation(21, 1000) / 2 print('STD_ERROR = ' + str(STD_ERROR)) RED_DATE = date(2017, 4, 23) - timedelta(60, 0, 0) CANDIDATES = ['Fillon', 'LePen', 'Melenchon', 'Macron'] PLOT = False for CANDIDATE in CANDIDATES: # Convertir les jours en nombre négatifs plot_x = (sondages_df['DayOfSurvey'].values.astype('datetime64')) plot_x_numbers = (sondages_df['DaysBefore'].values.astype(int) * -1) # Chiffres des sondages values_candidate = sondages_df[CANDIDATE].values.astype(float) # Moyenne movingaverage_candidate = lowess(plot_x_numbers, values_candidate, f=0.3) # Deviation from moyenne dev_f_moyenne = abs(values_candidate - movingaverage_candidate) # Moyenne of deviation from moyenne dev_movingaverage = lowess(plot_x_numbers, dev_f_moyenne, f=0.8) # Viz if PLOT: # plt.plot(plot_x, values_candidate, 'k.') # plt.plot(plot_x, movingaverage_candidate, 'r') plt.plot(plot_x, dev_f_moyenne, 'b.') plt.plot(plot_x, dev_movingaverage, 'g') # plt.fill_between(plot_x, movingaverage_candidate + STD_ERROR, movingaverage_candidate - STD_ERROR) # Resultats historiques with open('raw/resultats.csv') as resultats_file:
x.append(float(arr[0])) y.append(float(arr[1])) stdev.append(float(arr[3])) i+=1 #get the average coverage ave += float(arr[1])*float(arr[2]) count += int(arr[2]) if i >=100 and i<=300: maxCov = max(maxCov,float(arr[1])) normalize.close() maxCov = math.ceil(maxCov) ave = ave / count x1 = numpy.array(x[100:301], numpy.float) y1 = numpy.array(y[100:301], numpy.float) result = lowess(x1,y1,f=.15).tolist() result = y[0:100]+result+y[301:401] corfactor = [] for i in range(401): if result[i] != 0 : corfactor.append(ave/result[i]) else: corfactor.append(3) continue if corfactor[i]>3: corfactor[i] = 3 if corfactor[i]<1/3 : corfactor[i] = 1/3 fig, ax1 = plt.subplots() ax1.plot(x,y,'b-') ax1.set_xlabel('GC %')