def lyap_from_dist(distances, debug_plot=True, a=0, b=100): """ Estimates the largest Lyapunov exponent from average distances References: M. T. Rosenstein, J. J. Collins, and C. J. De Luca, “A practical method for calculating largest Lyapunov exponents from small data sets,” Physica D: Nonlinear Phenomena, vol. 65, no. 1, pp. 117–134, 1993. :param distances: average trajectory distances :return: an estimate of the largest Lyapunov exponent (a positive exponent is a strong indicator for chaos) """ x = np.arange(len(distances))[a:b] y = distances[a:b] reg = lr(x, y) yr = reg.slope * x + reg.intercept if debug_plot: plt.scatter(x, y, alpha=0.7, s=50) plt.plot(x, yr, lw=1, c='r') plt.title("Average Distances Evolution (Lyapunov = %.2f)"% reg.slope) plt.show(block=False) return reg.slope
def fit_one_over_f(self, fmin=0, fmax=None, filters=[60], filters_bw=[10], plot=True): ''' Returns A, alpha fit parameters to A/f^alpha. Linear fitting of a log-log plot over the frequency range [fmin, fmax]. filter: A list of frequencies (Hz) to filter out for the fit. filter_bw: A list of bandwidths (Hz) corresponding to each filter frequency plot: if True, will plot the fit curve on the figure. ''' argmin, argmax = self._get_argmin_argmax(fmin, fmax) f = self.f[argmin:argmax] Vn = self.Vn[argmin:argmax] for i in range(len(filters)): freq0 = filters[i] freq = freq0 j=1 while freq < f[-1]: # harmonics freq = freq0 * j j += 1 where, = np.where(abs(f-freq) > filters_bw[i]/2) # find indices where frequency is outside bandwidth of filter center frequency f = f[where] Vn = Vn[where] # popt, pcov = curve_fit(one_over_f, f, Vn, p0=[1e-5,.5], bounds=([-np.inf, .4], [np.inf, .6])) # return popt m,b, _, _, _ = lr(np.log(f), np.log(Vn)) if plot and self.ax is not None: self.ax['loglog'].loglog(np.exp(b)*self.f**(m)) return np.exp(b), -m
def crear_plot(self, sub_df, corr): y_start, y_end = min(sub_df[y][sub_df['alpha'] == alpha_max]), max(sub_df[y][sub_df['alpha'] == alpha_max]) y_start -= 0.05 * (y_end - y_start) y_end += 0.05 * (y_end - y_start) scatter = figure(plot_height=400, plot_width=400, tools='reset,box_zoom,pan,wheel_zoom,lasso_select,undo,redo', sizing_mode='scale_width', output_backend="webgl", toolbar_location='above', y_range=(y_start, y_end)) hover = HoverTool( tooltips=""" <div><span style="font-size: 17px; font-weight: bold;">@municipio</span></div> <div><span style="font-size: 12px;">@provincia (@autonomia), @poblacion</span></div> <div><span style="font-size: 14px; font-weight: bold;">@partido</span> <span style="font-size: 12px;">@porcentaje</span></div> """) scatter.add_tools(hover) scatter.scatter(x=x, y=y, source=sub_df, color='color', alpha='alpha', **scatter_kwargs) y_range=(y_start, y_end) if corr: # Con esto añadimos las correlaciones. for var_subor_i in self.subordinada_obj: for var_indep_i in self.indepe_obj: si_df = sub_df[(sub_df[var_indepe] == var_indep_i) & (sub_df[var_subord] == var_subor_i)] if len(si_df) > 1: # Nos aseguramos porque si no falla para Ceuta y Melilla x_vals, y_vals = si_df[x].values, si_df[y].values if corr_type == 'lineal': def f(x, m, b): return m * x + b m, b, r, p, err = lr(x_vals, y_vals) text_label = "r² = %.2f" % r**2 elif corr_type == 'exp': def f(x, m, b): return np.power(m * np.log10(x) + b, 10) popt, pcor = cf(f, x_vals, y_vals) m, b = popt ss_res = np.sum((y_vals - f(x_vals, m, b)) ** 2) ss_tot = np.sum((y_vals - np.mean(y_vals)) ** 2) r_squared = 1 - (ss_res / ss_tot) text_label = "r² = %.2f" % r_squared x_arr = np.linspace(min(x_vals), max(x_vals), 100) scatter.line(x_arr, [f(x_i, m, b) for x_i in x_arr], color=si_df['color'].iloc[0]) r_label = Label(x=1.05 * max(x_vals), y=f(max(x_vals), m, b), text=text_label, text_align='left', text_color=si_df['color'].iloc[0], render_mode='css') scatter.add_layout(r_label) if f(max(x_vals), m, b) > y_end: y_end = f(max(x_vals), m, b) if f(max(x_vals), m, b) < y_start: y_start = f(max(x_vals), m, b) else: pass scatter.y_range = Range1d(y_start, y_end) return scatter
def RegLineal(A,B,C,D,stringAB,stringCD,title,xlab,ylab): plt.figure(figsize=(5,4)) plt.xlabel(xlab) plt.ylabel(ylab) plt.title(title) xx=np.linspace(0,100,500) #scatter de respuestas iniciales vs finales cuando se manipula la respuesta inicial plt.scatter(A,B, label=stringAB, s=2, color='red', alpha=0.8) #regresion lineal utilizando cuadrados minimos para hallar los parametros de la recta slopeAB, interceptAB, r_value, p_value, std_errAB=lr(A,B) plt.plot(xx,slopeAB*xx+interceptAB, color='red', alpha=0.8) #scatter de respuestas iniciales vs finales cuando NO se manipula la respuesta inicial plt.scatter(C,D, label=stringCD, s=2, color='green', alpha=0.8) #regresion lineal utilizando cuadrados minimos para hallar los parametros de la recta slopeCD, interceptCD, r_valueNM, p_valueNM, std_errCD=lr(C,D) plt.plot(xx,slopeCD*xx+interceptCD, color='green', alpha=0.8) plt.legend(loc='upper left') plt.tight_layout() return slopeAB, interceptAB, std_errAB, slopeCD, interceptCD, std_errCD
def LinearRegression_Map(v,p,ndimlat,ndimlon): p_array = np.zeros((ndimlat,ndimlon)) r_array = np.zeros((ndimlat,ndimlon)) m_array = np.zeros((ndimlat,ndimlon)) for i in range(ndimlon): for j in range(ndimlat): x=p y=v[:,j,i] m,b,r,p,e=lr(x,y) m_array[j,i]=m r_array[j,i]=r p_array[j,i]=p return [m_array,r_array,p_array]
def lr_wrapper(): """ Wraps the linregress function to be able to take arguments from a dataframe """ data = df[wb:wt] # Slice data slope, _, rvalue, pvalue, stderr = lr(y=data[color], x=data[alpha_name]) ppf = norm.ppf((1 - ci) / 2), norm.ppf(ci + (1 - ci) / 2) xbar = np.mean(data[alpha_name]) # Calculate the size of the CI ci_top = (xbar + stderr * ppf[1]) ci_bot = (xbar + stderr * ppf[0]) if (ci_top > 0) and (ci_bot < 0): ci_include_0 = "purple" else: ci_include_0 = "yellow" return rvalue**2, ci_bot, ci_top, ci_include_0
def get_kolton_width(self, rmin_px=5, dr_px=1, overlap=0.): """ Doc Kolton's Width """ N = len(self.u) possible_window_sizes = np.arange(rmin_px, N, dr_px) kwidth = [] for window_size in possible_window_sizes: parts = 0 kwidth_per_window = 0 window_step = int(np.ceil(window_size * (1 - overlap))) for i in xrange(0, N - window_size, window_step): m, b, _, _, _ = lr(self.z[i:i + window_size], self.u[i:i + window_size]) kwidth_per_window += self.window_averaging( self.u[i:i + window_size] - self.z[i:i + window_size] * m - b) parts += 1 kwidth.append(kwidth_per_window / parts) self.rkw = possible_window_sizes * self.umperpix self.kolton_width = np.array(kwidth) self.get_zeta_kw()
def plot_mobility(self, l=0, u=-1, Rxx_channel=0, Rxy_channel=1): ''' Makes a plot of the Hall coefficient and carrier mobility vs gate voltage. The voltage channel measuring Rxy is by default 1, and Rxx is 0. Right now we assume geometrical factor of 1, so Rxx = rho_xx mu = R_H/<R_xx>, average value of R_xx R_H = Rxy/B l: lower index to do the fit u: upper index to do the fit Rxx_channel: channel number for Rxx Rxy_channel: channel number for Rxy ''' from scipy.stats import linregress as lr slopes = np.array([]) mobility = np.array([]) Rxx = self.R2D[Rxx_channel] Rxy = self.R2D[Rxy_channel] for i in range(Rxy.shape[1]): slope, intercept, _, _, _ = lr(self.B[l:u], Rxy[l:u, i]) slopes = np.append(slopes, slope) mobility = np.append(mobility, slope / Rxx[l:u, i].mean()) fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(10, 4)) ax1.plot(self.Vg, abs(slopes)) ax2.plot(self.Vg, abs(mobility) * 100**2) ax1.set_xlabel('Vg (V)', fontsize=20) ax2.set_xlabel('Vg (V)', fontsize=20) ax1.set_ylabel(r'Hall Coefficient ($\rm \Omega/T$)', fontsize=16) ax2.set_ylabel(r'Carrier mobility ($\rm{cm^2/V\cdot s}$)', fontsize=16) fig.tight_layout() self.Hall_coefficient = abs(slopes) self.mobility = abs(mobility) * 100**2 return fig, ax1, ax2
def Hurst( seq , vectoriza=None ): """ Obtiene el exponente de Hurst de alguna secuencia """ N = len( seq ) try: X = vectoriza(seq) except: X = seq mn = X.mean() Y = X-mn Z = Y.cumsum() R,S,E = zeros(Z.size), zeros(Z.size), zeros(Z.size) Em = zeros(E.size) for i in range(1,len(Z)): R[i] = Z[:i].max() - Z[:i].min() S[i] = seq[:i].std() E[i] = R[i]/S[i] for i in range(1,len(E)): Em[i] = E[:i].mean() lnEm = log(Em[1:]) lnX = log(range(1,len(Em))) m,b,r,p,s = lr(lnX,lnEm) return m,r,p
def heatmap_correlaciones(df, var_x_ht, var_y_ht, var_x, var_y, cmap='div', corr_type='lineal'): x, y, z = [], [], [] opts_x, opts_y = sorted(list(dict.fromkeys(df[var_x_ht].values))), sorted(list(dict.fromkeys(df[var_y_ht].values)))[ ::-1] # opts_x = ['PSOE', 'PP'] # opts_y = ['Euskadi', 'Cataluña'] for opt_x in opts_x: for opt_y in opts_y: df_xy = df[(df[var_x_ht] == opt_x) & (df[var_y_ht] == opt_y)] x.append(opt_x); y.append(opt_y) try: x_vals, y_vals = df_xy[var_x].values, df_xy[var_y].values if corr_type == 'lineal': m, b, r, p, err = lr(x_vals, y_vals) z.append(r ** 2) elif corr_type == 'exp': def f(x, m, b): return np.power(m * np.log10(x) + b, 10) popt, pcor = cf(f, x_vals, y_vals) m, b = popt ss_res = np.sum((y_vals - f(x_vals, m, b)) ** 2) ss_tot = np.sum((y_vals - np.mean(y_vals)) ** 2) r_squared = 1 - (ss_res / ss_tot) z.append(r_squared) except: z.append(np.NaN) df_correlacion = pd.DataFrame({var_x_ht: x, var_y_ht: y, 'z': z}) p = figure(plot_height=150, plot_width=400, x_range=opts_x, y_range=opts_y, tools='hover', sizing_mode='scale_width', tooltips=[(var_x_ht, '@' + var_x_ht), (var_y_ht, '@' + var_y_ht), ('r', '@z')]) if cmap == 'seq': colors = ['#d1eeea', '#a8dbd9', '#85c4c9', '#68abb8', '#4f90a6', '#3b738f', '#2a5674'] elif cmap == 'div': colors = ['#009B9E', '#42B7B9', '#A7D3D4', '#F1F1F1', '#E4C1D9', '#D691C1', '#C75DAB'] else: colors = cmap mapper = LinearColorMapper(palette=colors, low=df_correlacion.z.min(), high=df_correlacion.z.max(), nan_color="#bbbbbb") p.grid.grid_line_color = None p.axis.axis_line_color = None p.axis.major_tick_line_color = None p.axis.major_label_text_font_size = "8pt" p.axis.major_label_standoff = 0 p.xaxis.major_label_orientation = 0.8 # pi/4 p.rect(source=df_correlacion, x=var_x_ht, y=var_y_ht, width=1, height=1, fill_color={'field': 'z', 'transform': mapper}, line_color=None) color_bar = ColorBar(color_mapper=mapper, major_label_text_font_size="5pt", ticker=BasicTicker(desired_num_ticks=len(colors)), formatter=PrintfTickFormatter(format="%.3f"), label_standoff=6, border_line_color=None, location=(0, 0)) p.add_layout(color_bar, 'right') return p
ss_res_443_original = np.sum(res_443_original**2) ss_tot_443_original = np.sum( (kd_pred_443_original - np.mean(kd_pred_443_original))**2) r2_443_original = 1 - (ss_res_443_original / ss_tot_443_original) ## Calculo das estatísticas MAE e MAPE mae_443_original = mae(kd_ref_443_original, kd_pred_443_original) mape_443_original = mean_abs_perc_error(kd_ref_443_original, kd_pred_443_original) ## Calculo das estatisticas MSE e RMSE mse_443_original = mse(kd_ref_443_original, kd_pred_443_original) rmse_443_original = np.sqrt(mse_443_original) ## Regressão linear usando pacote do scipy para teste sl_443_original, inter_443_original, r_443_original, p_443_original, stderr_443_original = lr( kd_ref_443_original, kd_pred_443_original) ## Curva de melhor ajuste para plotagem ref_443_original = fun_reg(x_fit, fit_443_original[0], fit_443_original[1]) ref_443_original = ref_443_original.rename(x_fit, axis='rows') ## Texto para plotagem com dados dos resultados estatísticos e da regressão eq_443_original = 'y = 0.6396x + 0.1799' stat_443_original = 'R² = 0.7104; MAPE = 17.65%; RMSE = 0.2042 m^-1; n = 40' ## Plotando dispesão dos dados e os resultados da regressão plot_ajuste(kd_ref_443_original, kd_pred_443_original, ref_443_original, eq_443_original, stat_443_original, [0, 2.5], [0, 2.5],
pre_price = float(stock_price) pre_rm = float(r_m) except BaseException: print 'skipping one line!' r_risk.pop(0) rm_rate.pop(0) return r_risk, rm_rate ibm_r, SP500_rate = parser('ibm.csv') us_risk_free_rate = 0.77/365 SP500_rate[:] = [x * 100 - us_risk_free_rate for x in SP500_rate] ibm_r[:] = [x * 100 - us_risk_free_rate for x in ibm_r] slope_us, intercept_us, _, _, _ = lr(SP500_rate, ibm_r) print slope_us, intercept_us print np.cov(SP500_rate, ibm_r)[1,1]/np.var(SP500_rate) swisscom_r, SMI_rate = parser('swisscom.csv') swiss_risk_free_rate = -0.8 SMI_rate[:] = [x * 100 - swiss_risk_free_rate for x in SMI_rate] swisscom_r[:] = [x * 100 - us_risk_free_rate for x in swisscom_r] slope_swi, intercept_swi, _, _, _ = lr(SMI_rate, swisscom_r) print slope_swi, intercept_swi print np.cov(SMI_rate, swisscom_r)[1,1]/np.var(SMI_rate)
#plt.scatter(['0-10','11-20','21-30','31-40','41-50','51-60','61-70','71-80','81-90','91-100'],TNMfin, label = 'Manipulated') #scatter de promedios que hizo Milton cambiando los labels(lucas) plt.scatter(np.arange(5, 105, 10), Tfin, label='Manipulated', color='blue', alpha=0.8) plt.scatter(np.arange(5, 105, 10), TNMfin, label='Non manipulated', color='orange', alpha=0.8) #regresion lineal utilizando cuadrados minimos para hallar los parametros de la recta #usando como datos los promedios(no se ni para que lo hice, miterio)(lucas) xx = np.linspace(0, 100, 500) slope, intercept, r_value, p_value, std_err = lr(np.arange(5, 105, 10), Tfin) plt.plot(xx, slope * xx + intercept, color='blue', alpha=0.8) slopeNM, interceptNM, r_valueNM, p_valueNM, std_errNM = lr( np.arange(5, 105, 10), TNMfin) plt.plot(xx, slopeNM * xx + interceptNM, color='orange', alpha=0.8) #scatter de respuestas iniciales vs finales cuando se manipula la respuesta inicial plt.scatter(Ti, Tf, label='Ti vs Tf(manipulados)', s=2, color='red', alpha=0.8) #regresion lineal utilizando cuadrados minimos para hallar los parametros de la recta slope, intercept, r_value, p_value, std_err = lr(Ti, Tf) plt.plot(xx, slope * xx + intercept, color='red', alpha=0.8) #scatter de respuestas iniciales vs finales cuando NO se manipula la respuesta inicial plt.scatter(TNMi, TNMf, label='TNMi vs TNMf(no manipulados)', s=2,
N = 1000 # set up a matrix to capture all estimates samplePoints = np.empty(shape=(0, n)) allX = np.empty(shape=(0, n)) allY = np.empty(shape=(0, n)) allYHat = np.empty(shape=(0, n)) allCoeffs = np.empty(shape=(0, 2)) for k in range(0, N): X = normal(10, 1, n) Y = b0 + b1 * X + normal(0, stdE, n) allX = np.vstack([allX, X]) allY = np.vstack([allY, Y]) linReg = lr(X, Y) coeffs = np.array([linReg.intercept, linReg.slope]) allCoeffs = np.vstack([allCoeffs, coeffs]) yHat = coeffs[0] + \ coeffs[1]*X allYHat = np.vstack([allYHat, yHat]) plt.plot(allX, allY, '.', color='grey', markersize=1) for i in range(0, N): plt.plot(allX[i], allYHat[i], color='b') pdb.set_trace() # for k in range(0,N+1): # ########################## # # SIMULATE DATASETS
plt.xlabel('Rrs(560) / Rrs(665) + Rrs(704) [-]', fontsize=14) plt.ylabel('a(560) [m-1]', fontsize=14) plt.xlim([0, 3]) plt.ylim([0, 1]) plt.title('Ajuste QAA v6 - Passo 2', fontsize=14) plt.show() plt.subplot(122) plt.text(4.5, 0.30, nn_eq) plt.text(4.5, 0.15, nn_r) plt.plot(nn_ref, 'r--') plt.scatter(rrs_ratio, n_data_t, c='k') plt.xlabel('e^(rrs(665) / rrs(704)) [-]', fontsize=14) plt.ylabel('Slope bbp (n) [-]', fontsize=14) plt.xlim([3, 6]) plt.ylim([0, 3]) plt.title('Ajuste QAA v6 - Passo 4', fontsize=14) plt.legend(['Ajuste', 'Estações'], loc=1, fontsize=14) plt.show() ############################################################################### ############################### TESTE R² #################################### ############################################################################### from scipy.stats import linregress as lr slope, intercept, r_value, p_value, str_err = lr(rrs_ratio, n_data_t[0]) r2_teste = r_value**2 r2_teste2 = determination_coef(rrs_ratio, n_data_t[0])
## Calculo do coeficiente de determinação R² res_443 = kd_pred_443 - fun_reg(kd_ref_443, fit_443[0], fit_443[1]) ss_res_443 = np.sum(res_443**2) ss_tot_443 = np.sum((kd_pred_443 - np.mean(kd_pred_443))**2) r2_443 = 1 - (ss_res_443 / ss_tot_443) ## Calculo das estatísticas MAE e MAPE mae_443 = mae(kd_ref_443, kd_pred_443) mape_443 = mean_abs_perc_error(kd_ref_443, kd_pred_443) ## Calculo das estatisticas MSE e RMSE mse_443 = mse(kd_ref_443, kd_pred_443) rmse_443 = np.sqrt(mse_443) ## Regressão linear usando pacote do scipy para teste sl_443, inter_443, r_443, p_443, stderr_443 = lr(kd_ref_443, kd_pred_443) ## Curva de melhor ajuste para plotagem ref_443 = fun_reg(x_fit, fit_443[0], fit_443[1]) ref_443 = ref_443.rename(x_fit, axis='rows') ## Texto para plotagem com dados dos resultados estatísticos e da regressão eq_443 = 'y = 0,7067x + 0,4500' stat_443 = 'R² = 0,1796, MAPE = 0,0%; RMSE = 0,3560; n = 22' ## Plotando dispesão dos dados e os resultados da regressão plot_ajuste(kd_ref_443, kd_pred_443, ref_443, eq_443, stat_443, [0, 1.6], [0, 1.6],
# Converts the length M vectors into Mx1 matrices. Similar to using `~numpy.reshape(1,M)` X = np.c_[df['GDPC']] y = np.c_[df['LI']] cl = [ 'South Africa', 'Portugal', 'Czech Republic', 'Italy', 'France', 'United States', 'Switzerland', 'Norway', 'Luxembourg' ] yf = [-20, -8, 5, -8, -8, -20, -15, 10, 5] LRM = lm.LinearRegression() LRM.fit(X, y) theta = [LRM.intercept_[0], LRM.coef_[0][0]] m, b, r, p, dm = lr(df['GDPC'].values, df['LI'].values) beta = [b, m] x = np.linspace(0, 1e5, 100) fig, ax = plt.subplots(figsize=(8, 5)) ax.scatter(df.GDPC.values, df.LI.values, color='blue', s=10) ax.plot(x, theta[0] + theta[1] * x, color='red', ls='-', lw=1.5) ax.set_xlabel('GDP per Capita [USD]', fontproperties=bf) ax.set_ylabel('Life Happines Index', fontproperties=bf) ax.set_ylim(4.5, 8.5) kwargs = dict(xycoords='data', textcoords='offset points', fontproperties=nf,