def main(): print('Starting RANSAC Robust Estimation...') print('') version = '1' upstream_array = [] downstream_array = [] turbine_list_226 = [] # Load in 226 Degrees with open('turbine_list_226.txt') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') next(csv_file) for row in csv_reader: turbine_list_226.append([row[0],row[1]]) #get RANSACs for 226 degrees ransac_lists = [] saved_list_226 = [] for i in range(0,len(turbine_list_226)): angle_lower = 224 angle_higher = 228 returned_ransac = get_ransac(turbine_list_226[i][0],turbine_list_226[i][1],angle_lower,angle_higher) ransac_lists.append(returned_ransac) saved_list_226.append(returned_ransac) upstream_array.extend(returned_ransac[2]) downstream_array.extend(returned_ransac[3]) # save(path,file,'226_degree'): # make a new list for each angle direction, append to each turbine_list_106 = [] # Load in 106 Degrees with open('turbine_list_106.txt') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') next(csv_file) for row in csv_reader: turbine_list_106.append([row[0],row[1]]) #get RANSACs for 106 degrees ransac_lists_106 = [] saved_list_106 = [] for i in range(0,len(turbine_list_106)): angle_lower = 104 angle_higher = 106 returned_ransac = get_ransac(turbine_list_106[i][0],turbine_list_106[i][1],angle_lower,angle_higher) ransac_lists_106.append(returned_ransac) upstream_array.extend(returned_ransac[2]) saved_list_106.append(returned_ransac) downstream_array.extend(returned_ransac[3]) turbine_list_46 = [] # Load in 46 Degrees with open('turbine_list_46.txt') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') next(csv_file) for row in csv_reader: turbine_list_46.append([row[0],row[1]]) #get RANSACs for 46 degrees ransac_lists_46 = [] saved_list_46 = [] for i in range(0,len(turbine_list_46)): angle_lower = 44 angle_higher = 46 returned_ransac = get_ransac(turbine_list_46[i][0],turbine_list_46[i][1],angle_lower,angle_higher) ransac_lists_46.append(returned_ransac) saved_list_46.append(returned_ransac) upstream_array.extend(returned_ransac[2]) downstream_array.extend(returned_ransac[3]) #save data for rapid graphing later path = './saved_data'+version save(path,saved_list_226,'226_degree') save(path,saved_list_106,'106_degree') save(path,saved_list_46,'46_degree') # make a new list for each angle direction, append to each Ct = 0.8 rd=56 kw = 0.04 turbine_distance = 778.7 jensen_speed = [] factor = (1-((1-math.sqrt(1-Ct))/(1+(kw*turbine_distance/rd))**2)) x = np.linspace(0,25,26) for i in range(0,len(x)): speed_deficit = factor*x[i] jensen_speed.append(speed_deficit) # returned_list = get_ransac(lead,behind,angle_lower,angle_higher) # print(returned_list) #plot 226 degree turbines for i in range(0,len(ransac_lists)): plt.plot(ransac_lists[i][0], ransac_lists[i][1], color='cornflowerblue', linewidth=2, label=turbine_list_226[i][0]+' & '+turbine_list_226[i][1]) #plot 106 degree turbines for i in range(0,len(ransac_lists_106)): plt.plot(ransac_lists_106[i][0], ransac_lists_106[i][1], color='darkmagenta', linewidth=2, label=turbine_list_106[i][0]+' & '+turbine_list_106[i][1]) #plot 46 degree turbines for i in range(0,len(ransac_lists_46)): plt.plot(ransac_lists_46[i][0], ransac_lists_46[i][1], color='orangered', linewidth=2, label=turbine_list_46[i][0]+' & '+turbine_list_46[i][1]) # plt.legend(loc="upper left") plt.xlabel("Upstream Wind Turbine Corrected Wind Speed (m/s)") plt.ylabel("Downstream Wind Turbine Corrected Wind Speed (m/s)") plt.title("RANSAC for 30 Turbine Wake Single-Wake Relationships") import matplotlib.patches as mpatches blue_patch = mpatches.Patch(color='cornflowerblue', label='South West Wind Dir') magenta_patch = mpatches.Patch(color='darkmagenta', label='South East Wind Dir') orange_patch = mpatches.Patch(color='orangered', label='North East') black = mpatches.Patch(color='black', label='Jensen Prediction') plt.legend(handles=[blue_patch,magenta_patch,orange_patch,black]) plt.plot(x,jensen_speed,color='black') # create general RANSAC upstream_array = np.array(upstream_array) downstream_array = np.array(downstream_array) save(path,upstream_array,'upstream_array') save(path,downstream_array,'downstream_array') ransac = linear_model.RANSACRegressor() ransac.fit(upstream_array, downstream_array) general_ransac_x = np.arange(upstream_array.min(), upstream_array.max())[:, np.newaxis] general_ransac_y = ransac.predict(general_ransac_x) plt.plot(general_ransac_x, general_ransac_y, color='violet', linewidth=5, label='RANSAC regressor') plt.grid() plt.show()
def get_ransac(): powercurve_windspeed = np.array([ 3, 3.5, 4, 4.5, 5, 5.5, 6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5, 10, 10.5, 11, 11.5, 12, 12.5 ]) powercurve_windspeed_new = np.linspace(powercurve_windspeed.min(), powercurve_windspeed.max(), 3000) powercurve_power = [ 7, 53, 123, 208, 309, 427, 567, 732, 927, 1149, 1401, 1688, 2006, 2348, 2693, 3011, 3252, 3388, 3436, 3448, 3450 ] spl = make_interp_spline(powercurve_windspeed, powercurve_power, k=1) # type: BSpline #powercurve load in power_smooth = spl(powercurve_windspeed_new) turbine_list = csv.reader(open('turbine_list_46.txt', "r"), delimiter=",") next(turbine_list) Turbine_As_power = [] Turbine_Bs_power = [] distances = [] for row in turbine_list: lead = row[0] behind = row[1] distance = get_distances(lead, behind) distances.append(distance) df = pd.read_csv('Dataframe_' + lead + '_' + behind + '.csv') Turbine_A_power = np.asarray(df[lead + '_Grd_Prod_Pwr_Avg']) Turbine_B_power = np.asarray(df[behind + '_Grd_Prod_Pwr_Avg']) Turbine_A_power = Turbine_A_power.reshape(-1, 1) Turbine_B_power = Turbine_B_power.reshape(-1, 1) Turbine_As_power.extend(Turbine_A_power) Turbine_Bs_power.extend(Turbine_B_power) ransac = linear_model.RANSACRegressor() ransac.fit(Turbine_A_power, Turbine_B_power) print('RANSAC estimator coefficient: ' + str(ransac.estimator_.coef_)) line_X = np.arange(Turbine_A_power.min(), Turbine_A_power.max())[:, np.newaxis] line_y_ransac = ransac.predict(line_X) turbine_list = csv.reader(open('turbine_list_226.txt', "r"), delimiter=",") next(turbine_list) for row in turbine_list: lead = row[0] behind = row[1] distance = get_distances(lead, behind) distances.append(distance) df = pd.read_csv('Dataframe_' + lead + '_' + behind + '.csv') Turbine_A_power = np.asarray(df[lead + '_Grd_Prod_Pwr_Avg']) Turbine_B_power = np.asarray(df[behind + '_Grd_Prod_Pwr_Avg']) Turbine_A_power = Turbine_A_power.reshape(-1, 1) Turbine_B_power = Turbine_B_power.reshape(-1, 1) Turbine_As_power.extend(Turbine_A_power) Turbine_Bs_power.extend(Turbine_B_power) ransac = linear_model.RANSACRegressor() ransac.fit(Turbine_A_power, Turbine_B_power) print('RANSAC estimator coefficient: ' + str(ransac.estimator_.coef_)) line_X = np.arange(Turbine_A_power.min(), Turbine_A_power.max())[:, np.newaxis] line_y_ransac = ransac.predict(line_X) turbine_list = csv.reader(open('turbine_list_106.txt', "r"), delimiter=",") next(turbine_list) for row in turbine_list: lead = row[0] behind = row[1] distance = get_distances(lead, behind) distances.append(distance) df = pd.read_csv('Dataframe_' + lead + '_' + behind + '.csv') Turbine_A_power = np.asarray(df[lead + '_Grd_Prod_Pwr_Avg']) Turbine_B_power = np.asarray(df[behind + '_Grd_Prod_Pwr_Avg']) Turbine_A_power = Turbine_A_power.reshape(-1, 1) Turbine_B_power = Turbine_B_power.reshape(-1, 1) Turbine_As_power.extend(Turbine_A_power) Turbine_Bs_power.extend(Turbine_B_power) ransac = linear_model.RANSACRegressor() ransac.fit(Turbine_A_power, Turbine_B_power) print('RANSAC estimator coefficient: ' + str(ransac.estimator_.coef_)) line_X = np.arange(Turbine_A_power.min(), Turbine_A_power.max())[:, np.newaxis] line_y_ransac = ransac.predict(line_X) jensen_line = get_smoothed_jensen_line(distances) Turbine_As_power = np.asarray(Turbine_As_power).reshape(-1, 1) Turbine_Bs_power = np.asarray(Turbine_Bs_power).reshape(-1, 1) ransac = linear_model.RANSACRegressor() ransac.fit(Turbine_As_power, Turbine_Bs_power) general_ransac_x = np.arange(Turbine_As_power.min(), Turbine_As_power.max())[:, np.newaxis] general_ransac_y = ransac.predict(general_ransac_x) x_1 = general_ransac_x[0] x_2 = general_ransac_x[len(general_ransac_x) - 1] y_1 = general_ransac_y[0] y_2 = general_ransac_y[len(general_ransac_y) - 1] # overall_ransac_points_x = [general_ransac_x[0],general_ransac_x[len(general_ransac_x)]] # overall_ransac_points_y = [general_ransac_y[0],general_ransac_y[len(general_ransac_x)]] jensen_x_axis = [] jensen_y_axis = [] for i in range(0, len(jensen_line[0])): power = return_power_value(jensen_line[0][i], powercurve_windspeed_new, power_smooth) jensen_x_axis.append(power) power = return_power_value(jensen_line[1][i], powercurve_windspeed_new, power_smooth) jensen_y_axis.append(power) print(str(ransac.estimator_.coef_)) plt.figure(1) plt.plot([x_1, x_2], [y_1, y_2], color='red', linewidth=2, label='RANSAC regressor') plt.plot(jensen_x_axis, jensen_y_axis, color='green', linewidth=1, label='Jensen Prediction') plt.legend(loc="upper left") plt.xlabel('Turbine A Power (kW)') plt.ylabel('Turbine B Power (kW)') plt.title('Jensen-Predicted Power versus RANSAC of Turbine Data ') plt.grid() plt.show()
coef=True, random_state=0) # 将上面产生的样本点中的前50个设为异常点(外点) # 即:让前50个点偏离原来的位置,模拟错误的测量带来的误差 n_outliers = 50 np.random.seed(int(time.time()) % 100) X[:n_outliers] = 3 + 0.5 * np.random.normal(size=(n_outliers, 1)) y[:n_outliers] = -3 + 0.5 * np.random.normal(size=n_outliers) # 用普通线性模型拟合X,y model = linear_model.LinearRegression() model.fit(X, y) # 使用RANSAC算法拟合X,y model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression()) model_ransac.fit(X, y) inlier_mask = model_ransac.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) # 使用一般回归模型和RANSAC算法分别对测试数据做预测 line_X = np.arange(-5, 5) line_y = model.predict(line_X[:, np.newaxis]) line_y_ransac = model_ransac.predict(line_X[:, np.newaxis]) print("真实数据参数:", coef) print("线性回归模型参数:", model.coef_) print("RANSAC算法参数: ", model_ransac.estimator_.coef_) plt.plot(X[inlier_mask], y[inlier_mask], '.g', label='Inliers') plt.plot(X[outlier_mask], y[outlier_mask], '.r', label='Outliers')
def mx_RANSACRegressor(train_x, train_y): mx = linear_model.RANSACRegressor() mx.fit(train_x, train_y) return mx
class image_converter: def __init__(self): self.image_pub_yuv = rospy.Publisher("/image_processing/lines_from_yuv",Image, queue_size=1) self.image_pub_bgr = rospy.Publisher("/image_processing/lines_from_bgr",Image, queue_size=1) self.image_pub_hsv = rospy.Publisher("/image_processing/lines_from_hsv",Image, queue_size=1) self.image_pub_lined = rospy.Publisher("/image_processing/lined",Image, queue_size=1) self.bridge = CvBridge() self.image_sub = rospy.Subscriber("/app/camera/rgb/image_raw",Image,self.callback, queue_size=1) def callback(self,data): try: cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8") except CvBridgeError as e: print(e) #hsv colorspace hsv = cv2.cvtColor(cv_image, cv2.COLOR_BGR2HSV) lower_bound = np.array([0,0,200]) upper_bound = np.array([255,255,255]) #Threshold the HSV image to get only white areas maskhsv = cv2.inRange(hsv, lower_bound, upper_bound) # Bitwise-AND mask and original image reshsv = cv2.bitwise_and(hsv,hsv, mask=maskhsv) #luminance layer of YUV (Greyscale) gray=cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY) bi_gray_max = 255 bi_gray_min = 245 _,thresh1=cv2.threshold(gray, bi_gray_min, bi_gray_max, cv2.THRESH_BINARY); #bgr light = np.array([255,255,255]) dark = np.array([200,200,200]) maskbgr = cv2.inRange(cv_image, dark, light) #not a big difference resbgr = cv2.bitwise_and(cv_image,cv_image, mask=maskbgr) try: #no good encoding found bgr atleast delivers visible output self.image_pub_hsv.publish(self.bridge.cv2_to_imgmsg(reshsv, encoding="bgr8")) self.image_pub_yuv.publish(self.bridge.cv2_to_imgmsg(thresh1, "mono8" )) self.image_pub_bgr.publish(self.bridge.cv2_to_imgmsg(resbgr, encoding="bgr8")) except CvBridgeError as e: print(e) _, contours, _ = cv2.findContours(thresh1, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE) lane_contours = [None, None] first_size = 0 secnd_size = 0 for contour in contours: test = cv2.contourArea(contour) if test > first_size: lane_contours[1] = lane_contours[0] secnd_size = first_size lane_contours[0] = contour first_size = test elif test > secnd_size: lane_contours[1] = contour secnd_size = test lane_models = [None, None] print(lane_contours[0].shape) print type(lane_contours[0]) for i in range(len(lane_contours)): points = lane_contours[i] lane_models[i] = linear_model.RANSACRegressor() lane_models[i].fit(points[:,:,0], points[:,:,1]) b1 = lane_models[0].predict(0).item(0) b2 = lane_models[1].predict(0).item(0) m1 = (lane_models[0].predict(100).item(0) - b1)/100 m2 = (lane_models[1].predict(100).item(0) - b2)/100 print("m1: " + str(m1) + " b1: " + str(b1) ) print("m2: " + str(m2) + " b2: " + str(b2) ) height, width = cv_image[:2] width = int(cv_image.shape[1]) cv2.line(cv_image, (0,int(b1)), (width,int(b1+width*m1)), (0,0,255), thickness=2, lineType=cv2.LINE_AA) cv2.line(cv_image, (0,int(b2)), (width,int(b2+width*m2)), (0,0,255), thickness=2, lineType=cv2.LINE_AA) try: self.image_pub_lined.publish(self.bridge.cv2_to_imgmsg(cv_image, "bgr8")) except CvBridgeError as e: print(e)
target = dataset.SalePrice train = dataset.drop(['SalePrice'], axis=1) train = dataset.drop(['Id'], axis=1) x_train, x_test, y_train, y_test = train_test_split(train, target, test_size=0.2, random_state=2) #All models regressors = [ linear_model.Ridge(), linear_model.Lasso(), linear_model.BayesianRidge(), linear_model.RANSACRegressor(), svm.SVR(), ensemble.GradientBoostingRegressor(), tree.DecisionTreeRegressor(), ensemble.RandomForestRegressor(), xgb.XGBRegressor(colsample_bytree=0.2, gamma=0.0, learning_rate=0.05, max_depth=6, min_child_weight=1.5, n_estimators=7200, reg_alpha=0.9, reg_lambda=0.6, subsample=0.2, seed=42, silent=1)
def _fit_extinction(self, ws, spec, z=0.): """ Get the extinciton law of spectrum. Currently using calzetti00 law parametrized by a_v, r_v. Params ------ self ws (array) spec (array) z=0. (float) Set Attr -------- self.reddening_ratio (ratio of bestfit unreddened over data on ws_regrid grid) self.extinction_params (depending on the extinction_law) """ if not np.all(ws == self.ws): self.regrid(ws=ws, z=z) spec_norm = spec / np.mean(spec) # highpass filter both the spec and the temp kernel_size = 299 spec_highpass = spec_norm - ss.medfilt(spec_norm, kernel_size=kernel_size) temps_regrid_highpass = [ temp - ss.medfilt(temp, kernel_size=kernel_size) for temp in self.temps_regrid ] spec_highpass = smooth(spec_highpass, n=10) temps_regrid_highpass = np.array( [smooth(temp, n=10) for temp in temps_regrid_highpass]) # find the bestfit template for highpass, which gives prediction on unreddened spec. reg_highpass, __ = linear_reg(spec_highpass, temps_regrid_highpass, type='lasso', alpha=1.e-6, positive=True, max_iter=10000) predicted = np.dot(self.temps_regrid.T, reg_highpass.coef_) + reg_highpass.intercept_ # get empirial reddening ratio spec_med = ss.medfilt(spec_norm, kernel_size=kernel_size) predict_med = ss.medfilt(predicted, kernel_size=kernel_size) self.reddening_ratio = spec_med / predict_med if self.extinction_law == 'linear': # sklearn RANSAC robust linear regression to find best fit params slope and intercept x = self.ws_regrid.reshape(-1, 1) y = self.reddening_ratio.reshape(-1, 1) reg_ransac = linear_model.RANSACRegressor() reg_ransac.fit(X=x, y=y) slope = reg_ransac.estimator_.coef_[0] intercept = reg_ransac.estimator_.intercept_ self.extinction_params = (slope, intercept) elif self.extinction_law == 'calzetti00': # least square curve_fit to find bestfit params a_v, r_v, scaling popt, __ = so.curve_fit(self.extinction_curve, xdata=self.ws_regrid, ydata=self.reddening_ratio, p0=self.extinction_params) self.extinction_params = popt else: raise Exception("extinction_law not recognized")
def get_transform(self, the_date, band, mask="L2", sub=10, nv=200, lw=2, odir='figures', apply_model=True, plausible=True): # ensure odir exists if not os.path.exists(odir): os.makedirs(odir) fname = odir + '/' + 'GFZ_%s_%s_%s_%s' % ( self.site, self.tile, the_date.strftime("%Y-%m-%d %H:%M:%S"), band) print fname toa_set = self.get_l1c_data(the_date) boa_set = self.l2a_datasets[the_date] if toa_set is None or boa_set is None: print "No TILEs found for %s" % the_date return None print toa_set[TOA_list.index(band)] print boa_set[GFZ_BOA_list.index(band)] g = gdal.Open(toa_set[TOA_list.index(band)]) toa_rho = g.ReadAsArray() g = gdal.Open(boa_set[GFZ_BOA_list.index(band)]) boa_rho = g.ReadAsArray() if mask == "L2": # reelvant MSK set to 10 (clear) #relevant PMSL set to 1 print "Using L2A product mask" if band in ["B02", "B03", "B04", "B08"]: g = gdal.Open(boa_set.msk_10m) c1 = g.ReadAsArray() g = gdal.Open(boa_set.pml2a_10m) c2 = g.ReadAsArray() #mask = np.logical_and( c1==10, c2 == 1) mask = c1 == 10 elif band in ["B05", "B06", "B07", "B11", "B12", "B8A"]: g = gdal.Open(boa_set.msk_20m) c1 = g.ReadAsArray() g = gdal.Open(boa_set.pml2a_20m) c2 = g.ReadAsArray() mask = c1 == 10 print c1.shape, c2.shape #mask = np.logical_and( c1==10, c2 == 1) elif band in ["B01"]: # 60m g = gdal.Open(boa_set.msk_60m) c1 = g.ReadAsArray() g = gdal.Open(boa_set.pml2a_60m) c2 = g.ReadAsArray() mask = c1 == 10 #mask = np.logical_and( c1==10, c2 == 1) else: mask_toa = np.logical_or(toa_rho == 0, toa_rho > 20000) mask_boa = np.logical_or(boa_rho == 0, boa_rho > 20000) mask = mask_boa * mask_toa mask_boa = np.logical_or(boa_rho == 0, boa_rho > 20000) mask_toa = np.logical_or(toa_rho == 0, toa_rho > 20000) mask = np.logical_and(c1 == 10, ~mask_boa) toa_rho = toa_rho / 10000. boa_rho = boa_rho / 10000. x = boa_rho[mask][::sub] y = toa_rho[mask][::sub] print "Masked data" vmin = np.min([0.0, np.min(x), np.min(y)]) vmax = np.max([1.0, np.max(x), np.max(y)]) print vmin, vmax line_X = np.arange(vmin, vmax, (vmax - vmin) / nv) ns = x.size xlim = ylim = [vmin, vmax] # robust linear model fit print "Fitting linear model" model = linear_model.LinearRegression(n_jobs=-1) hplot(x, y, new=True, xlim=xlim, ylim=ylim) xyrange = xlim, ylim plt.xlim(xlim) plt.plot(xyrange[0], xyrange[1], 'g--', label='1:1 line') retval = None try: print "Fitting RANSAC model" model_ransac = linear_model.RANSACRegressor(model) model_ransac.fit(y.reshape(ns, 1), x) inlier_mask = model_ransac.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) print "RANSAC model predictions" line_y_ransac = model_ransac.predict(line_X[:, np.newaxis]) plt.plot(line_y_ransac, line_X, color='red', linestyle='-', linewidth=lw, label='RANSAC regressor') a, b = model_ransac.predict(np.array([0., 1.])[:, np.newaxis]) plt.title(the_date.strftime("%Y-%m-%d %H:%M:%S") + \ '\nBOA(%s) = %.3f + %.3f TOA(%s)'%(band,a,b-a,band) + \ '\nTOA(%s) = %.3f + %.3f BOA(%s)'%(band,a/(a-b),1./(b-a),band)) if apply_model: approx_boa_rho = model_ransac.predict( toa_rho[~mask].flatten()[:, np.newaxis]) retval = np.zeros_like(toa_rho) retval[~mask] = approx_boa_rho except ValueError: print "RANSAC failed" model_ransac = None retval = None plt.xlabel('BOA reflectance Band %s' % band) plt.ylabel('TOA reflectance Band %s' % band) if vmax > 1: plt.plot(xlim, [1.0, 1.0], 'k--', label='TOA reflectance == 1') if plausible: boa_emu, toa_emu = load_emulator_training_set() plt.plot(boa_emu, toa_emu[band], '+', markersize=3, c="cyan", label="Plausible") plt.legend(loc='best') plt.savefig(fname + '.scatter.pdf') plt.close() print "Saved fname" return model_ransac, retval
def fit(self, X, y, deg=None): #Adding random_state=0 for consistency in consecutive runs self.model = linear_model.RANSACRegressor(random_state=0) self.model.fit(np.vander(X, N=self.deg + 1), y)
y_errors = y.copy() y_errors[::3] = 3 X_errors = X.copy() X_errors[::3] = 3 y_errors_large = y.copy() y_errors_large[::3] = 10 X_errors_large = X.copy() X_errors_large[::3] = 10 estimators = [('OLS', linear_model.LinearRegression()), ('Theil-Sen', linear_model.TheilSenRegressor(random_state=42)), ('RANSAC', linear_model.RANSACRegressor(random_state=42)), ] colors = {'OLS': 'turquoise', 'Theil-Sen': 'gold', 'RANSAC': 'lightgreen'} linestyle = {'OLS': '-', 'Theil-Sen': '-.', 'RANSAC': '--'} lw = 3 x_plot = np.linspace(X.min(), X.max()) for title, this_X, this_y in [ ('Modeling Errors Only', X, y), ('Corrupt X, Small Deviants', X_errors, y), ('Corrupt y, Small Deviants', X, y_errors), ('Corrupt X, Large Deviants', X_errors_large, y), ('Corrupt y, Large Deviants', X, y_errors_large)]: plt.figure(figsize=(5, 4)) plt.plot(this_X[:, 0], this_y, 'b+') for name, estimator in estimators: model = make_pipeline(PolynomialFeatures(3), estimator)
print("Set up KFolds...") n_splits = 5 kf = KFold(n_splits=n_splits) kf.get_n_splits(X) predictions0 = np.zeros((test.shape[0], n_splits)) predictions1 = np.zeros((test.shape[0], n_splits)) score = 0 print("Starting ", n_splits, "-fold CV loop...") oof_predictions = np.zeros(X.shape[0]) for fold, (train_index, test_index) in enumerate(kf.split(X)): X_train, X_valid = X[train_index, :], X[test_index, :] y_train, y_valid = y[train_index], y[test_index] clf = linear_model.RANSACRegressor() clf.fit(X_train, y_train) pred0 = clf.predict(X) pred1 = clf.predict(test) oof_predictions[test_index] = clf.predict(X_valid) predictions0[:, fold] = pred0 predictions1[:, fold] = pred1 score += r2_score(y_train, clf.predict(X_train)) print('Fold %d: Score %f' % (fold, clf.score(X_train, y_train))) prediction0 = predictions0.mean(axis=1) prediction1 = predictions1.mean(axis=1) score /= n_splits oof_score = r2_score(y, oof_predictions)
elif var == 'Δ Interaction energy': val = job.document['shear_25nN-Etotal'][0] - job.document[ 'shear_5nN-Etotal'][0] elif var == 'Interaction energy QQ': val = job.document['shear_15nN-qq'][0] elif var == 'Δ Interaction energy QQ': val = job.document['shear_25nN-qq'][0] - job.document[ 'shear_5nN-qq'][0] elif var == 'Interaction energy LJ': val = job.document['shear_15nN-lj'][0] elif var == 'Δ Interaction energy LJ': val = job.document['shear_25nN-lj'][0] - job.document[ 'shear_5nN-lj'][0] data[i].append(val) data = np.array(data) correlation_matrix = np.empty([len(variables), len(variables)]) for i, var1 in enumerate(variables): for j, var2 in enumerate(variables): ransac = linear_model.RANSACRegressor(random_state=92) X = np.array([[item] for item in data[i]]) ransac.fit(X, data[j]) inlier_mask = ransac.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) _, _, r_ransac, _, _ = stats.linregress(X[inlier_mask].flatten(), data[j][inlier_mask]) correlation_matrix[i, j] = r_ransac np.save('correlation-matrix-ransac', correlation_matrix)
# print df.head() X = df[['RM']].values y = df['MEDV'].values # print X from sklearn import datasets, linear_model import matplotlib.pyplot as plt import numpy as np model = linear_model.LinearRegression() ransac = linear_model.RANSACRegressor(model, max_trials=100, min_samples=50, residual_metric=lambda x: np.sum(np.abs(x), axis=1), residual_threshold=5.0, random_state=0) # model.fit(X, y) ransac.fit(X, y) def lin_regplot(X, y, model): plt.scatter(X, y, c='blue') plt.plot(X, ransac.predict(X), color='red') return None # lin_regplot(X, y, model) # plt.xlabel('Average number of rooms [RM] (standardized)') # plt.ylabel('Price in $1000\'s [MEDV] (standardized)') # plt.show()
def calculateGradientTallFormat(self): #calculateGradientTallFormat( self ) # Calculate the time-gradient from # x and t in tall format, given id # by fitting a straight line using ordinary least squares. # # Fits xTall(id==id(k)) = x_bl + dx_dt(k)*tTall(id=id(k)) # and returns: # dx_dt # x(k) = mean(x(id==id(k))) # (optional) extras = {dt_mean,x_bl,diffx_difft} # = {average followup interval, # fitted baseline value (intercept), # finite-difference gradient: bl to first followup} # # Author: Neil Oxtoby, UCL, Nov 2015 # Project: Biomarker Ecology (trajectories from cross-sectional data) # Team: Progression Of Neurodegenerative Disease rbo = 'off' useRobustFittingIfSufficientData = False if useRobustFittingIfSufficientData: rbo = 'on' id_u = np.unique(self.id) x_ = np.empty(id_u.shape) x_bl = np.empty(x_.shape) # linear fit intercept dxdt_ = np.empty(x_.shape) # linear fit gradient sigma_x = np.empty(x_.shape) # linear fit residuals t_range = np.empty(x_.shape) # followup length for ki in range(len(id_u)): rowz = self.id==id_u[ki] x_i = self.xi[rowz] t_i = self.ti[rowz] #* Remove missing (NaN) nums = ~np.isnan(x_i) & ~np.isnan(t_i) x_i = x_i[nums] t_i = t_i[nums] t_i = t_i - np.min(t_i) # shift to zero (so x_bl is intercept) #* Fit a straight line using OLS if len(x_i)>=2: if (len(x_i)>=4) & (rbo=='on'): # Robust linear model fit: RANSAC algorithm model_poly1 = linear_model.RANSACRegressor() model_poly1.fit(t_i.values.reshape(-1,1), x_i.values.reshape(-1,1)) inlier_mask = ransac.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) else: # Non-robust (not enough data points, or forced) model_poly1 = linear_model.LinearRegression() model_poly1.fit(t_i.values.reshape(-1,1), x_i.values.reshape(-1,1)) #* Mean biomarker value x_[ki] = np.mean(x_i) t_range[ki] = np.max(t_i)-np.min(t_i) #* geometric mean of fitted values #x(ki) = nthroot(prod(model_poly1.Fitted),length(xi)) #* Gradient dxdt_[ki] = model_poly1.coef_ #* Intercept (= first fitted value - usually "baseline") x_bl[ki] = model_poly1.intercept_ #* Residuals standard deviation residuals = x_i.values - model_poly1.predict(t_i.values.reshape(-1,1)).T sigma_x[ki] = np.std(residuals) self.x = x_ self.x_id = id_u self.dxdt = dxdt_ self.t_interval = t_range
fig = plt.figure(figsize=(12, 6), dpi=100) plt.scatter(X, y, marker='.') plt.xlabel('X', weight="bold", fontsize=16) plt.ylabel('Y', weight="bold", fontsize=16) # - # Entreno regresión lineal, RANSAC y Theil Sen, aumento despues los puntos "outliers". # + lr = linear_model.LinearRegression() lr.fit(X, y) # Entreno RANSAC ransac = linear_model.RANSACRegressor() ransac.fit(X, y) # Datos predichos para graficar después line_X = np.arange(X.min(), X.max())[:, np.newaxis] line_y = lr.predict(line_X) line_y_ransac = ransac.predict(line_X) lw = 2 fig = plt.figure(figsize=(12, 6), dpi=100) plt.scatter(X, y, marker='.') plt.plot(line_X, line_y, color='navy', linewidth=lw, label='Lineal') plt.plot(line_X, line_y_ransac, color='tomato', linewidth=lw, label='RANSAC') plt.legend(loc='lower right', title="Regresión", fontsize=14).get_title().set_fontsize(15)
def infer(inference_method, time_lag, bufferPredictedPositions, X_model_ransac, Y_model_ransac, bp): if (inference_method == "ransac_track"): buffer_mh = np.vstack(bufferPredictedPositions["MouthHook"]) buffer_lmh = np.vstack(bufferPredictedPositions["LeftMHhook"]) buffer_rmh = np.vstack(bufferPredictedPositions["RightMHhook"]) buffer_ldo = np.vstack(bufferPredictedPositions["LeftDorsalOrgan"]) buffer_rdo = np.vstack(bufferPredictedPositions["RightDorsalOrgan"]) X_pred = [-1] Y_pred = [-1] if bp == "LeftDorsalOrgan": bufferAll = np.hstack((buffer_ldo, buffer_mh, buffer_lmh, buffer_rmh)) elif bp == "RightDorsalOrgan": bufferAll = np.hstack((buffer_rdo, buffer_mh, buffer_lmh, buffer_rmh)) if np.shape(bufferAll)[0] >= time_lag: if (X_model_ransac == None and Y_model_ransac == None) and np.shape(bufferAll)[0] == time_lag: data_posterior = bufferAll[:time_lag+1] X = data_posterior[:, 0] Y = data_posterior[:, 1] X_model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression(), residual_threshold=7.0, min_samples=3, max_trials=100) Y_model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression(), residual_threshold=7.0, min_samples=3, max_trials=100) X_model_ransac.fit(data_posterior, X.reshape(-1, 1)) Y_model_ransac.fit(data_posterior, Y.reshape(-1, 1)) elif X_model_ransac != None and Y_model_ransac != None and np.shape(bufferAll)[0] > time_lag: data_present_frame = bufferAll[-time_lag-1:-1] X_pred = np.squeeze(X_model_ransac.predict(data_present_frame)) Y_pred = np.squeeze(Y_model_ransac.predict(data_present_frame)) ## Create model for the next frame X_model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression(), residual_threshold=7.0, min_samples=3, max_trials=100) Y_model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression(), residual_threshold=7.0, min_samples=3, max_trials=100) data_posterior = bufferAll[-time_lag:] X = data_posterior[:, 0] Y = data_posterior[:, 1] X_model_ransac.fit(data_posterior, X.reshape(-1, 1)) Y_model_ransac.fit(data_posterior, Y.reshape(-1, 1)) return X_pred[-1], Y_pred[-1], X_model_ransac, Y_model_ransac if (inference_method == "simple_translate_rotate"): if (bp == "LeftDorsalOrgan"): lmh_r1 = [bufferPredictedPositions["LeftMHhook"][-2][0] - bufferPredictedPositions["MouthHook"][-2][0], bufferPredictedPositions["LeftMHhook"][-2][1] - bufferPredictedPositions["MouthHook"][-2][1]] lmh_r2 = [bufferPredictedPositions["LeftMHhook"][-1][0] - bufferPredictedPositions["MouthHook"][-1][0], bufferPredictedPositions["LeftMHhook"][-1][1] - bufferPredictedPositions["MouthHook"][-1][1]] ldo_r1 = [bufferPredictedPositions["LeftDorsalOrgan"][-1][0] - bufferPredictedPositions["MouthHook"][-2][0], bufferPredictedPositions["LeftDorsalOrgan"][-1][1] - bufferPredictedPositions["MouthHook"][-2][1]] cos_theta = np.dot(lmh_r1, ldo_r1)/np.linalg.norm(lmh_r1)/np.linalg.norm(ldo_r1) sin_theta = np.cross(lmh_r1, ldo_r1)/np.linalg.norm(lmh_r1)/np.linalg.norm(ldo_r1) rotation_mat = np.mat([[cos_theta, -sin_theta], [sin_theta, cos_theta]]) ldo_r2 = rotation_mat * np.mat(lmh_r2).T * np.linalg.norm(ldo_r1) / np.linalg.norm(np.mat(lmh_r2)) ldo = ldo_r2 + np.mat(bufferPredictedPositions["MouthHook"][-1]).T return ldo[0,0], ldo[1,0], X_model_ransac, Y_model_ransac if (bp == "RightDorsalOrgan"): rmh_r1 = [bufferPredictedPositions["RightMHhook"][-2][0] - bufferPredictedPositions["MouthHook"][-2][0], bufferPredictedPositions["RightMHhook"][-2][1] - bufferPredictedPositions["MouthHook"][-2][1]] rmh_r2 = [bufferPredictedPositions["RightMHhook"][-1][0] - bufferPredictedPositions["MouthHook"][-1][0], bufferPredictedPositions["RightMHhook"][-1][1] - bufferPredictedPositions["MouthHook"][-1][1]] rdo_r1 = [bufferPredictedPositions["RightDorsalOrgan"][-1][0] - bufferPredictedPositions["MouthHook"][-2][0], bufferPredictedPositions["RightDorsalOrgan"][-1][1] - bufferPredictedPositions["MouthHook"][-2][1]] cos_theta = np.dot(rmh_r1, rdo_r1)/np.linalg.norm(rmh_r1)/np.linalg.norm(rdo_r1) sin_theta = np.cross(rmh_r1, rdo_r1)/np.linalg.norm(rmh_r1)/np.linalg.norm(rdo_r1) rotation_mat = np.mat([[cos_theta, -sin_theta], [sin_theta, cos_theta]]) rdo_r2 = rotation_mat * np.mat(rmh_r2).T * np.linalg.norm(rdo_r1) / np.linalg.norm(np.mat(rmh_r2)) rdo = rdo_r2 + np.mat(bufferPredictedPositions["MouthHook"][-1]).T return rdo[0,0], rdo[1,0], X_model_ransac, Y_model_ransac
def evaluate(self, nexus): if nexus.optimization_problem != None: #make it so you can run sizing without an optimization problem unscaled_inputs = nexus.optimization_problem.inputs[:, 1] #use optimization problem inputs here input_scaling = nexus.optimization_problem.inputs[:, 3] scaled_inputs = unscaled_inputs / input_scaling problem_inputs = [] for value in scaled_inputs: problem_inputs.append( value) #writing to file is easier when you use list nexus.problem_inputs = problem_inputs opt_flag = 1 #tells if you're running an optimization case or not-used in writing outputs else: opt_flag = 0 #unpack inputs tol = self.tolerance #percentage difference in mass and energy between iterations h = self.iteration_options.h y = self.default_y max_iter = self.maximum_iterations scaling = self.default_scaling sizing_evaluation = self.sizing_evaluation iteration_options = self.iteration_options err = [1000] #initialize error #initialize converged = 0 #marker to tell if it's converged i = 0 #function evals #determine the initial step min_norm = 1000. if self.initial_step != 'Default': data_inputs, data_outputs, read_success = read_sizing_inputs( self, scaled_inputs) if read_success: min_norm, i_min_dist = find_min_norm(scaled_inputs, data_inputs) if min_norm < iteration_options.max_initial_step: #make sure data is close to current guess if self.initial_step == 'Table' or min_norm < iteration_options.min_surrogate_step or len( data_outputs[:, 0] ) < iteration_options.min_surrogate_length: regr = neighbors.KNeighborsRegressor(n_neighbors=1) else: print('running surrogate method') if self.initial_step == 'SVR': #for SVR, can optimize parameters C and eps for closest point print('optimizing svr parameters') x = [2., -1.] #initial guess for 10**C, 10**eps out = sp.optimize.minimize(check_svr_accuracy, x, method='Nelder-Mead', args=(data_inputs, data_outputs, i_min_dist)) t2 = time.time() c_out = 10**out.x[0] eps_out = 10**out.x[1] if c_out > 1E10: c_out = 1E10 if eps_out < 1E-8: eps_out = 1E-8 regr = svm.SVR(C=c_out, epsilon=eps_out) elif self.initial_step == 'GradientBoosting': regr = ensemble.GradientBoostingRegressor() elif self.initial_step == 'ExtraTrees': regr = ensemble.ExtraTreesRegressor() elif self.initial_step == 'RandomForest': regr = ensemble.RandomForestRegressor() elif self.initial_step == 'Bagging': regr = ensemble.BaggingRegressor() elif self.initial_step == 'GPR': gp_kernel_RQ = RationalQuadratic(length_scale=1.0, alpha=1.0) regr = gaussian_process.GaussianProcessRegressor( kernel=gp_kernel_RQ, normalize_y=True) elif self.initial_step == 'RANSAC': regr = linear_model.RANSACRegressor() elif self.initial_step == 'Neighbors': n_neighbors = min(iteration_options.n_neighbors, len(data_outputs)) if iteration_options.neighbors_weighted_distance == True: regr = neighbors.KNeighborsRegressor( n_neighbors=n_neighbors, weights='distance') else: regr = neighbors.KNeighborsRegressor( n_neighbors=n_neighbors) #now run the fits/guesses iteration_options.number_of_surrogate_calls += 1 y = [] input_for_regr = scaled_inputs.reshape(1, -1) for j in range(len(data_outputs[0, :])): y_surrogate = regr.fit(data_inputs, data_outputs[:, j]) y.append(y_surrogate.predict(input_for_regr)[0]) if y[j] > self.max_y[j] or y[j] < self.min_y[j]: print('sizing variable range violated, val = ', y[j], ' j = ', j) n_neighbors = min(iteration_options.n_neighbors, len(data_outputs)) regr_backup = neighbors.KNeighborsRegressor( n_neighbors=n_neighbors) y = [] for j in range(len(data_outputs[0, :])): y_surrogate = regr_backup.fit( data_inputs, data_outputs[:, j]) y.append( y_surrogate.predict(input_for_regr)[0]) break y = np.array(y) # initialize previous sizing values y_save = 1 * y #save values to detect oscillation y_save2 = 3 * y norm_dy2 = 1 #used to determine if it's oscillating; if so, do a successive_substitution iteration nr_start = 0 #flag to switch between methods; if you do nr too early, sizing diverges #now start running the sizing loop while np.max(np.abs(err)) > tol: #save the previous iterations for backtracking iteration_options.err_save2 = 1. * np.array( iteration_options.err_save) iteration_options.err_save = err if self.update_method == 'successive_substitution': err, y, i = self.successive_substitution_update( y, err, sizing_evaluation, nexus, scaling, i, iteration_options) elif self.update_method == 'newton-raphson': if i == 0: nr_start = 0 if np.max( np.abs(err) ) > self.iteration_options.newton_raphson_tolerance or np.max( np.abs(err) ) < self.iteration_options.max_newton_raphson_tolerance or i < self.iteration_options.min_fix_point_iterations: err, y, i = self.successive_substitution_update( y, err, sizing_evaluation, nexus, scaling, i, iteration_options) else: if nr_start == 0: err, y, i = self.newton_raphson_update( y_save2, err, sizing_evaluation, nexus, scaling, i, iteration_options) nr_start = 1 else: err, y, i = self.newton_raphson_update( y, err, sizing_evaluation, nexus, scaling, i, iteration_options) nr_start = 1 elif self.update_method == 'broyden': if (np.max(np.abs(err)) > self.iteration_options.newton_raphson_tolerance or np.max(np.abs(err)) < self.iteration_options.max_newton_raphson_tolerance or i < self.iteration_options.min_fix_point_iterations ) and nr_start == 0: if i > 1: #obtain this value so you can get an ok value initialization from the Jacobian w/o finite differincing err_save = iteration_options.err_save err, y, i = self.successive_substitution_update( y, err, sizing_evaluation, nexus, scaling, i, iteration_options) nr_start = 0 #in case broyden update diverges else: if nr_start == 0: if self.iteration_options.initialize_jacobian == 'newton-raphson': err, y, i = self.newton_raphson_update( y_save2, err, sizing_evaluation, nexus, scaling, i, iteration_options) else: #from http://www.jnmas.org/jnmas2-5.pdf D = np.diag( (y - y_save2) / (err - self.iteration_options.err_save)) self.iteration_options.y_save = y_save self.iteration_options.Jinv = D err, y, i = self.broyden_update( y, err, sizing_evaluation, nexus, scaling, i, iteration_options) nr_start = 1 else: err, y, i = self.broyden_update( y, err, sizing_evaluation, nexus, scaling, i, iteration_options) y = self.stay_inbounds(y_save, y) dy = y - y_save dy2 = y - y_save2 norm_dy = np.linalg.norm(dy) norm_dy2 = np.linalg.norm(dy2) if self.iteration_options.backtracking.backtracking_flag == True: err_save = iteration_options.err_save backtracking = iteration_options.backtracking back_thresh = backtracking.threshhold i_back = 0 min_err_back = 1000. y_back_list = [y] err_back_list = [err] norm_err_back_list = [np.linalg.norm(err)] while np.linalg.norm(err) > back_thresh * np.linalg.norm( err_save) and i_back < backtracking.max_steps: #while? print('backtracking') print('err, err_save = ', np.linalg.norm(err), np.linalg.norm(err_save)) p = y - y_save backtrack_y = y_save + p * (backtracking.multiplier** (i_back + 1)) err, y_back, i = self.successive_substitution_update( backtrack_y, err, sizing_evaluation, nexus, scaling, i, iteration_options) y_back_list.append(backtrack_y) err_back_list.append(err) norm_err_back_list.append(np.linalg.norm(err)) min_err_back = min(np.linalg.norm(err_back_list), min_err_back) i_back += 1 i_min_back = np.argmin(norm_err_back_list) y = y_back_list[i_min_back] err = err_back_list[i_min_back] #keep track of previous iterations, as they're used to transition between methods + for saving results y_save2 = 1. * y_save y_save = 1. * y print('err = ', err) if self.write_residuals: #write residuals at every iteration write_sizing_residuals(self, y_save, scaled_inputs, err) if i > max_iter: # print("###########sizing loop did not converge##########") break if i < max_iter and not np.isnan( err).any() and opt_flag == 1: #write converged values to file converged = 1 #check how close inputs are to what we already have if converged and ( min_norm > self.iteration_options.min_write_step or i > self.write_threshhold ): #now output to file, writing when it's either not a FD step, or it takes a long time to converge #make sure they're in right format #use y_save2, as it makes derivatives consistent write_sizing_outputs(self, y_save2, problem_inputs) nexus.total_number_of_iterations += i nexus.number_of_iterations = i #function calls results = nexus.results print('number of function calls=', i) print('number of iterations total=', nexus.total_number_of_iterations) nexus.sizing_loop.converged = converged nexus.sizing_loop.norm_error = np.linalg.norm(err) nexus.sizing_loop.max_error = max(err) nexus.sizing_loop.output_error = err #save in case you want to write this nexus.sizing_variables = y_save2 return nexus
def computeFreqSlope(self, event, fmin=11, fmax=16, method="hilbert", beforePadding=2, afterPadding=2, doubleFilt=True, keep_curve=True): if method == "stransform": # The filters need the signal to be at least 3*nbTaps nbTaps = 1001.0 fs = self.getChannelFreq(event.channel) duration = max((nbTaps*3.0+1)/float(fs), event.duration()) data = self.read([event.channel], event.timeStart(), duration) signal = data[event.channel].signal nbMinSamples = int(self.getChannelFreq(event.channel)) N = len(signal) if len(signal) < nbMinSamples: signal = np.concatenate((signal, np.zeros(nbMinSamples - N))) X, fX = computeST(signal, fs, fmin=fmin-1, fmax=fmax+1) Y = abs(np.transpose(X)) regx = arange(N)/float(fs) regy = [] for i in range(N): try: result = trapz(fX*Y[:, i], fX)/float(trapz(Y[:, i], fX)) regy.append(result) #if np.isnan(result) or np.isnan(result): # print fX, Y[:, i], len(signal), event.duration() except: print((fX.shape, Y.shape, fX.shape)) print((i, self.recordsInfo[-1].startTime, self.recordsInfo[-1].duration, event.timeStart(), event.duration())) raise assert(not np.any(np.isnan(regy))) assert(not np.any(np.isinf(regy))) z = np.polyfit(regx, regy, deg=1) event.properties["slope"] = z[0] elif method == "hilbert": if event.channel == "": channel = self.getChannelLabels()[0] else: channel = event.channel fs = self.getChannelFreq(channel) data = self.read([channel], event.timeStart() + event.duration()/2.0 - beforePadding, afterPadding+beforePadding) decimate_factor = int(fs/100) signal = data[channel].signal signal -= np.mean(signal) signal = decimate(signal, decimate_factor, zero_phase=True) fs /= decimate_factor; bandPassFilter = Filter(fs) order = int(len(signal)/3)-1 if order % 2 == 0: order -= 1 # Need to be an odd number #order = min(1001, order) # No need for the order to be higher than 1001 order = min(3001, order) # No need for the order to be higher than 1001 bandPassFilter.create(low_crit_freq=fmin, high_crit_freq=fmax, order=order, btype="bandpass", ftype="FIR", useFiltFilt=True) signal = bandPassFilter.applyFilter(signal) if doubleFilt: signal = bandPassFilter.applyFilter(signal) analytic_signal = hilbert(signal) instantaneous_phase = np.unwrap(np.angle(analytic_signal)) instantaneous_frequency = (np.diff(instantaneous_phase) /(2.0*np.pi) * fs) freq = running_mean(np.abs(instantaneous_frequency), int(fs/4)) regy = freq[int(len(freq)/2-fs/4):int(len(freq)/2+fs/4)] regx = np.arange(len(regy))/float(fs) ransac = linear_model.RANSACRegressor() ransac.fit(regx[:, np.newaxis], regy) event.properties["slope"] = ransac.estimator_.coef_[0] if keep_curve: event.properties["slopeCurve"] = freq
# Robustly fit linear model with Huber Regressor algorithm hr_estimator = linear_model.HuberRegressor() hr_grid = {'epsilon': [1.1, 1.2, 1.3, 1.5]} hr_model = utils.grid_search_best_model(hr_estimator, hr_grid, X_train, y_train, scoring=scoring) rutils.plot_model_2d_regression(hr_model, X_train, y_train, title="HuberRegression") rutils.regression_performance(hr_model, X_test, y_test) # Robustly fit linear model with RANSAC algorithm ransac_estimator = linear_model.RANSACRegressor() ransac_grid = {'max_trials': [100, 150]} ransac_model = utils.grid_search_best_model(ransac_estimator, ransac_grid, X_train, y_train, scoring=scoring) inlier_mask = ransac_model.inlier_mask_ rutils.plot_model_2d_regression(ransac_model, X_train, y_train, title="RANSAC") rutils.plot_data_2d_regression(X_train[inlier_mask], y_train[inlier_mask], new_window=False, color='yellowgreen') rutils.plot_data_2d_regression(X_train[np.logical_not(inlier_mask)], y_train[np.logical_not(inlier_mask)], color='gold',
def getCentralPlane(image, model, inputLayerNums=24, cuda=True, predImageSaveDir=None, spacingZYX=None): ''' 使用模型获取图像中平面参数 image: 图像数组 model: 用于预测的模型 inputLayerNums: 送入模型之前的每个mhd采样层数 cuda: 模型是否在GPU上,如果为True,需要保证传入的model已经在GPU上 predImageSaveDir: 预测概率图保存的目录路径,如果为None则不保存 spacingZYX: 是否在拟合之前,将三个方向的spacing统一,如果为None,则不进行统一,否则需要传入当前image的spacing信息 ''' # 复制原数组,以防对原数组有修改 imageC = image.copy() imageC, indices = imagePreProcess(imageC, normalizeRange=(0, 300), inputLayerNums=inputLayerNums) # 默认会在model中前向传播的过程中保留中间结果以计算梯度,但是预测的时候是不需要梯度计算,因此使用no_grad以取消保留中间结果 with torch.no_grad(): # 将image转成pytorch tensor x = torch.from_numpy(imageC).to(torch.float) # 给数据增加batchsize维度 x = x.unsqueeze(0) #(Z, H, W) -> (1, Z, H, W) if cuda: x = x.cuda() yPred = model(x) # 将预测的mask去掉第一个1这个维度,1*inputLayerNums*512*512 yPred = yPred.squeeze(0) # 将预测结果从cuda拿到cpu中,并且转成numpy模式 if cuda: yPred = yPred.cpu().numpy() else: yPred = yPred.numpy() if predImageSaveDir is not None: if not os.path.exists(predImageSaveDir): os.makedirs(predImageSaveDir) points = [] # 对每一层进行处理,i表示预测的层数,index表示在原图中的坐标 validLayerNums = 0 for i, index in enumerate(indices): # 每一层都有一个mask # 首先变回原始大小 layerPred = transform.resize(yPred[i], (image.shape[1], image.shape[2]), order=1, mode='constant', cval=0, anti_aliasing=True, preserve_range=True) if predImageSaveDir is not None: # 保存预测结果 saveImage = (np.round(layerPred * 255)).astype(np.uint8) cv2.imwrite(os.path.join(predImageSaveDir, '{}.png'.format(index)), saveImage) centerPoints = np.array(np.where(layerPred > 0.99)).transpose( (1, 0)) # [(y1, x1), (y2, x2), ...] # print(i, len(centerPoints)) if centerPoints.shape[0] < 100: # 如果该层预测出来的mask小于100个像素点大于0.99,则放弃这一层的点 continue else: centerPoints = centerPoints[np.random.choice( centerPoints.shape[0], size=100, replace=False)] # 大于100个点时,随机取其中100个点,以免不同层之间点数差距太大 for centerPoint in centerPoints: if spacingZYX is not None: points.append([ index * spacingZYX[0], centerPoint[0] * spacingZYX[1], centerPoint[1] * spacingZYX[2] ]) # (z, y, x)顺序 else: points.append([index, centerPoint[0], centerPoint[1]]) # (z, y, x)顺序 validLayerNums += 1 if validLayerNums < 8: print('符合要求的层数太少:', validLayerNums) return None, None points = np.array(points) # 考虑到一般的平面都是和zy平面接近平行,因此用x = w[0] * z + w[1] * y + b这种形式去拟合平面会好些 X = points[:, :2] # (z,y)坐标 Y = points[:, 2] # (x)坐标 # 利用RANSAC,根据y, x坐标去回归z坐标,得到一个平面方程 linereg = linear_model.RANSACRegressor(linear_model.LinearRegression()) linereg.fit(X, Y) # 获取平面方程的系数, x = w[0] * z + w[1] * y + b w = linereg.estimator_.coef_ # w[0]是z的系数,w[1]是y的系数 b = linereg.estimator_.intercept_ # 对参数进行调整,使得平面方程表示为:w[0] * z + w[1] * y + w[2] * x + b = 0 w = [-w[0], -w[1], 1] # 同时这个也是法向量,zyx顺序 b = -b if spacingZYX is not None: w = [wi * spacing for wi, spacing in zip(w, spacingZYX)] return w, b
def line_fit_odds_ratio(glist,attribute): fontsize = 14 fontsize_legend = 11 fig = plt.figure(figsize=(12, 9), facecolor='white') fig.canvas.set_window_title('Odds Ratio for: '+attribute) cols = 3 rows = 2 print (rows) # i = 320 # j = 0 colors = ['black', 'blue', 'green', 'orange', 'purple', 'lime', 'cyan', 'yellow', 'aqua', 'magenta', 'fuchsia'] slopeList = [] interList = [] i=0 for G in glist: i+=1 if attribute=='social': lista = cal.get_degree_frequency_list(G) elif attribute=='hashtags' or attribute=='urls': lista = cal.get_attribute_degree_frequency_list(G, attribute) else: lista = cal.get_unique_attribute_degree_frequency_list(G,attribute) listar = np.asarray(lista) data = np.bincount(listar) s = float(data.sum()) cdf = data.cumsum(0) / s cdf=cdf[:-1].copy() ccdf = 1 - cdf odds = cdf / ccdf nprange = np.arange(len(odds)) logA = np.log10(nprange + 1) logB = np.log10(odds + 1) ax = plt.subplot(rows, cols, i) plt.scatter(logA, logB,c='orange', marker='+', label='odds_ratio') xAxis = np.reshape(logA, (-1, 1)) yAxis = np.reshape(logB, (-1, 1)) # print (type(xAxis)) # print (xAxis.shape[0]) ransac = linear_model.RANSACRegressor(min_samples=xAxis.shape[0],max_trials=1000) ransac.fit(xAxis, yAxis) line_y_ransac = ransac.predict(xAxis) slope = (line_y_ransac[1] - line_y_ransac[0]) / (xAxis[1] - xAxis[0]) intercept = line_y_ransac[1] - slope * xAxis[1] r_value, p_value = scipy.stats.pearsonr(xAxis, line_y_ransac) if type(slope) == np.ndarray: slope = slope[0] if type(intercept) == np.ndarray: intercept = intercept[0] if type(r_value) == np.ndarray: r_value = r_value[0] slopeList.append((G.name, slope)) interList.append((G.name, intercept)) plt.plot(xAxis, line_y_ransac, c='blue',linestyle='dashed',linewidth=2.0, label='slope:' + str(round(slope, 3)) + ' - intercept:' + str( round(intercept, 3)) + ' - Rsquare:' + str(round(r_value, 3))) plt.xlabel(attribute) plt.ylabel('odds ratio') plt.title('Odds Ratio for: '+attribute+' of '+G.name) plt.legend() ymin=min(n[1] for n in interList)-1 ymax=max(n[1] for n in interList)+1 xmin=0 xmax=(max(n[1] for n in slopeList)+1) print (ymin,ymax,xmin,xmax) i+=1 ax = plt.subplot(rows, cols, i) plt.scatter(*zip(*slopeList),marker='x',s=100, c='black') ax.grid() plt.xticks(rotation=45) plt.xlabel('Day') plt.ylabel('Slope') ax.set_ylim((xmin, xmax)) i += 1 ax = plt.subplot(rows, cols, i) plt.scatter(*zip(*interList),marker ='x',s=100, c='purple') ax.grid() ax.set_ylim((ymin, ymax)) plt.xlabel('Day') plt.ylabel('intercept') plt.xticks(rotation=45) plt.tight_layout() plt.savefig(attribute + '_ransac_fitting.png') # plt.show() plt.clf()
# if ( data_posterior_all == None ): # data_posterior_all = data_posterior # else: # data_posterior_all = np.concatenate( (data_posterior_all, data_posterior), axis=0) # if ( X_all == None ): # X_all = X # else: # X_all = np.concatenate( (X_all, X), axis=0) # if ( Y_all == None ): # Y_all = Y # else: # Y_all = np.concatenate( (Y_all, Y), axis=0) error_thresh = 5 X_model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression(), residual_threshold=7.0, min_samples=5, max_trials=100) Y_model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression(), residual_threshold=7.0, min_samples=5, max_trials=100) X_model_ransac.fit(data_posterior_all, X_all) Y_model_ransac.fit(data_posterior_all, Y_all) X_pred = np.squeeze(X_model_ransac.predict(data_posterior_all)) Y_pred = np.squeeze(Y_model_ransac.predict(data_posterior_all)) X_pred = map(round, X_pred) Y_pred = map(round, Y_pred) X_diff = X_pred - X_all Y_diff = Y_pred - Y_all error_loc = np.sqrt(X_diff*X_diff + Y_diff*Y_diff) ind_1 = np.where(error_loc <= error_thresh) ind_2 = np.where(X_all == -1.0) ind = np.intersect1d(ind_1, ind_2)
"D:/FYP-Developments/Dataset-Kaggale/MedianRejectionSamplingData/train.csv" ) features = df_adv[[ 'Agencia_ID', 'Canal_ID', 'Ruta_SAK', 'Cliente_ID', 'Producto_ID' ]] lables = df_adv['Demanda_uni_equil'] print("Getting data complete") # Create linear regression object #regr = linear_model.LinearRegression(fit_intercept=True, normalize=True, copy_X=True, n_jobs=1) # Create Basian Ridge object #regr = linear_model.BayesianRidge() regr = linear_model.RANSACRegressor(linear_model.LinearRegression()) print("fitting") # Train the model using the training sets regr.fit(features, lables) print("fitting done") # The coefficients #print('Coefficients: \n', regr.coef_) # The intercept #print('Intercept: \n', regr.intercept_) df_adv = pd.read_csv( "D:/FYP-Developments/Dataset-Kaggale/MedianRejectionSamplingData/test.csv")
def process(im): start = timeit.timeit() # start timer cv2.imshow('img',im) cv2.waitKey() # initialize some variables W = im.shape[0] H = im.shape[1] x = W y = H radius = 250 # px thresh = 170 bw_width = 170 bxLeft = [] byLeft = [] bxbyLeftArray = [] bxbyRightArray = [] bxRight = [] byRight = [] boundedLeft = [] boundedRight = [] # 1. filter the white color lower = np.array([170, 170, 170]) upper = np.array([255, 255, 255]) mask = cv2.inRange(im, lower, upper) # 2. erode the frame erodeSize = int(y / 30) erodeStructure = cv2.getStructuringElement(cv2.MORPH_RECT, (erodeSize, 1)) erode = cv2.erode(mask, erodeStructure, (-1, -1)) # 3. find contours and draw the green lines on the white strips _, contours, hierarchy = cv2.findContours(erode, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) for i in contours: bx, by, bw, bh = cv2.boundingRect(i) if (bw > bw_width): cv2.line(im, (bx, by), (bx + bw, by), (0, 255, 0), 2) # draw the a contour line bxRight.append(bx + bw) # right line byRight.append(by) # right line bxLeft.append(bx) # left line byLeft.append(by) # left line bxbyLeftArray.append([bx, by]) # x,y for the left line bxbyRightArray.append([bx + bw, by]) # x,y for the left line cv2.circle(im, (int(bx), int(by)), 5, (0, 250, 250), 2) # circles -> left line cv2.circle(im, (int(bx + bw), int(by)), 5, (250, 250, 0), 2) # circles -> right line # calculate median average for each line medianR = np.median(bxbyRightArray, axis=0) medianL = np.median(bxbyLeftArray, axis=0) bxbyLeftArray = np.asarray(bxbyLeftArray) bxbyRightArray = np.asarray(bxbyRightArray) # 4. are the points bounded within the median circle? for i in bxbyLeftArray: if (((medianL[0] - i[0]) ** 2 + (medianL[1] - i[1]) ** 2) < radius ** 2) == True: boundedLeft.append(i) boundedLeft = np.asarray(boundedLeft) for i in bxbyRightArray: if (((medianR[0] - i[0]) ** 2 + (medianR[1] - i[1]) ** 2) < radius ** 2) == True: boundedRight.append(i) boundedRight = np.asarray(boundedRight) # 5. RANSAC Algorithm # select the points enclosed within the circle (from the last part) bxLeft = np.asarray(boundedLeft[:, 0]) byLeft = np.asarray(boundedLeft[:, 1]) bxRight = np.asarray(boundedRight[:, 0]) byRight = np.asarray(boundedRight[:, 1]) # transpose x of the right and the left line bxLeftT = np.array([bxLeft]).transpose() bxRightT = np.array([bxRight]).transpose() # run ransac for LEFT model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression()) # ransacX = model_ransac.fit(bxLeftT, byLeft) inlier_maskL = model_ransac.inlier_mask_ # right mask # run ransac for RIGHT ransacY = model_ransac.fit(bxRightT, byRight) inlier_maskR = model_ransac.inlier_mask_ # left mask # draw RANSAC selected circles for i, element in enumerate(boundedRight[inlier_maskR]): # print(i,element[0]) cv2.circle(im, (element[0], element[1]), 10, (250, 250, 250), 2) # circles -> right line for i, element in enumerate(boundedLeft[inlier_maskL]): # print(i,element[0]) cv2.circle(im, (element[0], element[1]), 10, (100, 100, 250), 2) # circles -> right line # 6. Calcuate the intersection point of the bounding lines # unit vector + a point on each line vx, vy, x0, y0 = cv2.fitLine(boundedLeft[inlier_maskL], cv2.DIST_L2, 0, 0.01, 0.01) vx_R, vy_R, x0_R, y0_R = cv2.fitLine(boundedRight[inlier_maskR], cv2.DIST_L2, 0, 0.01, 0.01) # get m*x+b m_L, b_L = lineCalc(vx, vy, x0, y0) m_R, b_R = lineCalc(vx_R, vy_R, x0_R, y0_R) # calculate intersention intersectionX, intersectionY = lineIntersect(m_R, b_R, m_L, b_L) # 7. draw the bounding lines and the intersection point m = radius * 10 if (intersectionY < H / 2): cv2.circle(im, (int(intersectionX), int(intersectionY)), 10, (0, 0, 255), 15) cv2.line(im, (x0 - m * vx, y0 - m * vy), (x0 + m * vx, y0 + m * vy), (255, 0, 0), 3) cv2.line(im, (x0_R - m * vx_R, y0_R - m * vy_R), (x0_R + m * vx_R, y0_R + m * vy_R), (255, 0, 0), 3) # 8. calculating the direction vector POVx = W / 2 # camera POV - center of the screen POVy = H / 2 # camera POV - center of the screen Dx = -int(intersectionX - POVx) # regular x,y axis coordinates Dy = -int(intersectionY - POVy) # regular x,y axis coordinates # focal length in pixels = (image width in pixels) * (focal length in mm) / (CCD width in mm) focalpx = int(W * 4.26 / 6.604) # all in mm end = timeit.timeit() # STOP TIMER time_ = end - start print('DELTA (x,y from POV):' + str(Dx) + ',' + str(Dy)) return im, Dx, Dy
# nslim for fitting nslimfit = 10 # adjust omegab # omega_b = (omega_b/omega0jh)*omega0 print(' Omega Bar =', omega_b, omega_b / omega0, 'Omega0') # pick up stars at CR mres = 2.0 lres = 0.0 indxcr = np.where(np.fabs((omega_b - omegaphis)) < domega) print(' np CR=', np.shape(indxcr)) if len(lzs[indxcr]) > nslimfit: # robust fit ransac_cr = linear_model.RANSACRegressor() y = lzs[indxcr].reshape(-1, 1) X = jrs[indxcr].reshape(-1, 1) ransac_cr.fit(X, y) # Predict data of estimated models line_ycr = np.linspace(jrrange[0], jrrange[1], npline).reshape(-1, 1) line_Xcr = ransac_cr.predict(line_ycr) else: line_ycr = np.linspace(jrrange[0], jrrange[1], npline).reshape(-1, 1) line_Xcr = np.zeros_like(line_ycr) - 1.0 # 4:1 resonance mres = 4.0 lres = 1.0 indx41 = np.where( np.fabs(mres * (omega_b - omegaphis) - lres * omegars) < domega)
def ransac_linear_regress(correspondence): x, y = [p[0] for p in correspondence], [p[1] for p in correspondence] ransac = linear_model.RANSACRegressor(residual_threshold=1.6) ransac.fit(np.reshape(x, (-1, 1)), y) return ransac.estimator_.coef_, ransac.estimator_.intercept_, len( ransac.inlier_mask_), sum(ransac.inlier_mask_)
def get_graphs_Turbine_B_Speed_Power(): powercurve_windspeed = np.array([ 3, 3.5, 4, 4.5, 5, 5.5, 6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5, 10, 10.5, 11, 11.5, 12, 12.5 ]) powercurve_windspeed_new = np.linspace(powercurve_windspeed.min(), powercurve_windspeed.max(), 3000) powercurve_power = [ 7, 53, 123, 208, 309, 427, 567, 732, 927, 1149, 1401, 1688, 2006, 2348, 2693, 3011, 3252, 3388, 3436, 3448, 3450 ] spl = make_interp_spline(powercurve_windspeed, powercurve_power, k=1) # type: BSpline #powercurve load in power_smooth = spl(powercurve_windspeed_new) turbine_list = csv.reader(open('turbine_list_46.txt', "r"), delimiter=",") next(turbine_list) Turbine_As_power = [] Turbine_Bs_power = [] i = 1 distances = [] for row in turbine_list: plt.figure(i) lead = row[0] behind = row[1] distance = get_distances(lead, behind) distances.append(distance) df = pd.read_csv('Dataframe_' + lead + '_' + behind + '.csv') plt.scatter(df['jensen_' + behind + '_windspeed'], df['jensen_' + behind + '_power'], marker='x', s=1, color="blue", label='Turbine Actual Data') # plt.scatter(df[lead+'_Grd_Prod_Pwr_Avg'],df['jensen_'+behind+'_power'],marker='x',s=1,color="green", label='Jensen Prediction') plt.title( 'Turbine B jensen predicted windspeed vs Turbine B Predicted Jensen power ' + lead + ' ' + behind) plt.xlabel('Turbine B Predicted Jensen Windspeed (m/s)') plt.ylabel('Turbine B Predicted Power (kW)') plt.grid() plt.grid() # plt.scatter(df['power_inferred_'+lead+'_windspeed'],df['power_inferred_'+behind+'_windspeed']) # plt.scatter(df['power_inferred_'+lead+'_windspeed'],df['jensen_'+behind+'_windspeed']) Turbine_A_power = np.asarray(df[lead + '_Grd_Prod_Pwr_Avg']) Turbine_B_power = np.asarray(df[behind + '_Grd_Prod_Pwr_Avg']) Turbine_A_power = Turbine_A_power.reshape(-1, 1) Turbine_B_power = Turbine_B_power.reshape(-1, 1) Turbine_As_power.extend(Turbine_A_power) Turbine_Bs_power.extend(Turbine_B_power) ransac = linear_model.RANSACRegressor() ransac.fit(Turbine_A_power, Turbine_B_power) print('RANSAC estimator coefficient: ' + str(ransac.estimator_.coef_)) line_X = np.arange(Turbine_A_power.min(), Turbine_A_power.max())[:, np.newaxis] line_y_ransac = ransac.predict(line_X) plt.plot(line_X, line_y_ransac, color='red', linewidth=2, label='RANSAC regressor') plt.legend(loc="upper left") i = i + 1 turbine_list = csv.reader(open('turbine_list_226.txt', "r"), delimiter=",") next(turbine_list) i = 50 for row in turbine_list: plt.figure(i) lead = row[0] behind = row[1] distance = get_distances(lead, behind) distances.append(distance) df = pd.read_csv('Dataframe_' + lead + '_' + behind + '.csv') plt.scatter(df['jensen_' + behind + '_windspeed'], df['jensen_' + behind + '_power'], marker='x', s=1, color="blue", label='Turbine Actual Data') # plt.scatter(df[lead+'_Grd_Prod_Pwr_Avg'],df['jensen_'+behind+'_power'],marker='x',s=1,color="green", label='Jensen Prediction') plt.title( 'Turbine B jensen predicted windspeed vs Turbine B Predicted Jensen power ' + lead + ' ' + behind) plt.xlabel('Turbine B Predicted Jensen Windspeed (m/s)') plt.ylabel('Turbine B Predicted Power (kW)') plt.grid() # plt.scatter(df['power_inferred_'+lead+'_windspeed'],df['power_inferred_'+behind+'_windspeed']) # plt.scatter(df['power_inferred_'+lead+'_windspeed'],df['jensen_'+behind+'_windspeed']) Turbine_A_power = np.asarray(df[lead + '_Grd_Prod_Pwr_Avg']) Turbine_B_power = np.asarray(df[behind + '_Grd_Prod_Pwr_Avg']) Turbine_A_power = Turbine_A_power.reshape(-1, 1) Turbine_B_power = Turbine_B_power.reshape(-1, 1) Turbine_As_power.extend(Turbine_A_power) Turbine_Bs_power.extend(Turbine_B_power) ransac = linear_model.RANSACRegressor() ransac.fit(Turbine_A_power, Turbine_B_power) print('RANSAC estimator coefficient: ' + str(ransac.estimator_.coef_)) line_X = np.arange(Turbine_A_power.min(), Turbine_A_power.max())[:, np.newaxis] line_y_ransac = ransac.predict(line_X) plt.plot(line_X, line_y_ransac, color='red', linewidth=2, label='RANSAC regressor') plt.legend(loc="upper left") i = i + 1 turbine_list = csv.reader(open('turbine_list_106.txt', "r"), delimiter=",") next(turbine_list) i = 100 for row in turbine_list: plt.figure(i) lead = row[0] behind = row[1] distance = get_distances(lead, behind) distances.append(distance) df = pd.read_csv('Dataframe_' + lead + '_' + behind + '.csv') plt.scatter(df['jensen_' + behind + '_windspeed'], df['jensen_' + behind + '_power'], marker='x', s=1, color="blue", label='Turbine Actual Data') # plt.scatter(df[lead+'_Grd_Prod_Pwr_Avg'],df['jensen_'+behind+'_power'],marker='x',s=1,color="green", label='Jensen Prediction') plt.title( 'Turbine B jensen predicted windspeed vs Turbine B Predicted Jensen power ' + lead + ' ' + behind) plt.xlabel('Turbine B Predicted Jensen Windspeed (m/s)') plt.ylabel('Turbine B Predicted Power (kW)') plt.grid() Turbine_A_power = np.asarray(df[lead + '_Grd_Prod_Pwr_Avg']) Turbine_B_power = np.asarray(df[behind + '_Grd_Prod_Pwr_Avg']) Turbine_A_power = Turbine_A_power.reshape(-1, 1) Turbine_B_power = Turbine_B_power.reshape(-1, 1) Turbine_As_power.extend(Turbine_A_power) Turbine_Bs_power.extend(Turbine_B_power) ransac = linear_model.RANSACRegressor() ransac.fit(Turbine_A_power, Turbine_B_power) print('RANSAC estimator coefficient: ' + str(ransac.estimator_.coef_)) line_X = np.arange(Turbine_A_power.min(), Turbine_A_power.max())[:, np.newaxis] line_y_ransac = ransac.predict(line_X) plt.plot(line_X, line_y_ransac, color='red', linewidth=2, label='RANSAC regressor') plt.legend(loc="upper left") i = i + 1
for csv_file in csv_files: with open(csv_file) as f: reader = csv.reader(f) (true_depths, observed_depths) = zip(*[(float(row[0]), float(row[1])) for row in reader]) errs = [a - b for (a, b) in zip(true_depths, observed_depths)] true_mean = np.mean(true_depths) observed_mean = np.mean(observed_depths) err_mean = np.mean(errs, axis=0) err_stddev = np.std(errs, axis=0) xs.append(true_mean) ys.append(observed_mean) #es.append(err_mean) es.append(err_stddev) #plt.errorbar(xs, ys, es, linestyle='None', marker='^') plt.scatter(xs, es) model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression(), min_samples=2, residual_threshold=0.1) X = np.array(xs).reshape((len(xs), 1)) Y = np.array(es) model_ransac.fit(X, Y) line_y_ransac = model_ransac.predict(X) plt.plot(X, line_y_ransac, "r--", label="{0} x + {1}".format(model_ransac.estimator_.coef_[0][0], model_ransac.estimator_.intercept_[0])) print "{0} x + {1}".format(model_ransac.estimator_.coef_[0][0], model_ransac.estimator_.intercept_[0]) plt.grid() plt.xlabel("Distance [m]") plt.ylabel("Standard Deviation [m]") # plt.legend(prop={'size': '8'}) plt.show()
n_informative=1, noise=10, coef=True, random_state=0) # Add outlier data np.random.seed(0) X[:n_outliers] = 3 + 0.5 * np.random.normal(size=(n_outliers, 1)) y[:n_outliers] = -3 + 10 * np.random.normal(size=n_outliers) # Fit line using all data lr = linear_model.LinearRegression() lr.fit(X, y) # Robustly fit linear model with RANSAC algorithm ransac = linear_model.RANSACRegressor() ransac.fit(X, y) inlier_mask = ransac.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) # Predict data of estimated models line_X = np.arange(X.min(), X.max())[:, np.newaxis] #line_X= np.array([X.min(),X.max()])[:,np.newaxis] line_y = lr.predict(line_X) line_y_ransac = ransac.predict(line_X) # Compare estimated coefficients print("Estimated coefficients (true, linear regression, RANSAC):") print(coef, lr.coef_, ransac.estimator_.coef_) lw = 2
def get_ransac(lead,behind,angle_lower,angle_higher): # Load in WindSpeed Data df = pd.read_csv("WindSpeed_Average.csv") df.index=df['timestamp'] df = df.drop('timestamp', axis =1) df['WindSpeed_Mean'] = df.mean(axis=1) print('Upstream: ' + lead + ' Downstream: ' + behind + ' Wind Angle Between: ' + str(angle_lower) + ' & ' + str(angle_higher)) #Load in Wind Dir Data df_Dir = pd.read_csv("WindDir_Data.csv") df_Dir.index=df_Dir['timestamp'] df_Dir = df_Dir.drop('timestamp', axis =1) #Load in Wind Power Data df_power = pd.read_csv("power.csv") df_power.index=df_power['timestamp'] df_power = df_power.drop('timestamp', axis =1) #Load In Curtailed Data # Note at the moment it just filters when N10 is curtailed. df_curtailed = pd.read_csv("curtailed_setting.csv") df_curtailed.index=df_curtailed['timestamp'] df_curtailed = df_curtailed.drop('timestamp', axis =1) #power Curve load in # Note that this is for the VESTAS v112 taken from a 3rd party website. # This splices existing data points into a few hundred more points powercurve_windspeed= np.array([3,3.5,4,4.5,5,5.5,6,6.5,7,7.5,8,8.5,9,9.5,10,10.5,11,11.5,12,12.5]) powercurve_windspeed_new = np.linspace(powercurve_windspeed.min(), powercurve_windspeed.max(), 3000) powercurve_power = [7,53,123,208,309,427,567,732,927,1149,1401,1688,2006,2348,2693,3011,3252,3388,3436,3448,3450] spl = make_interp_spline(powercurve_windspeed, powercurve_power, k=1) # type: BSpline #powercurve load in power_smooth = spl(powercurve_windspeed_new) # plt.figure(3) # plt.plot(powercurve_windspeed_new, power_smooth,label='New') # plt.legend(loc="upper left") #Merge Dataframes new_df=df.merge(df_Dir,left_index=True,right_index=True) new_new_df=new_df.merge(df_curtailed,left_index=True,right_index=True) final_df=new_new_df.merge(df_power,left_index=True,right_index=True) #Taking bottom left wind turbine, 'N10' #Taking bottom left wind turbine, 'I15' #46 degree bearing, 226 wind dir # 106 degrees for h 15 to h 14, 750metre distance angle_lower = angle_lower angle_higher = angle_higher power_df=final_df.loc[ (final_df[lead+'_Grd_Prod_Pwr_Avg'] > 0) & (final_df[behind+'_Grd_Prod_Pwr_Avg'] > 0) & #this removes null values that pandas struggles with (final_df[lead+'_Grd_Prod_Pwr_InternalDerateStat']<4) & #this removes curtailed values (final_df[behind+'_Grd_Prod_Pwr_InternalDerateStat']<4) & (final_df[lead+'_Amb_WindDir_Abs_Avg']>=angle_lower) & #220 degrees as the turbines of interest are aligned along this plane for wind dir (final_df[lead+'_Amb_WindDir_Abs_Avg']< angle_higher)][[ lead+'_Grd_Prod_Pwr_Avg', behind+'_Grd_Prod_Pwr_Avg', lead+'_Amb_WindSpeed_Avg', behind+'_Amb_WindSpeed_Avg', lead+'_Amb_WindDir_Abs_Avg', 'WindSpeed_Mean', ]].copy() print('Sample size'+ str(len(power_df))) upstream_turbine_power = lead+'_Grd_Prod_Pwr_Avg' upstream_turbine_windspeed = lead+'_Amb_WindSpeed_Avg' #N10 windspeed correction via powercurve corrected_Upstream_windspeed = [] print('Correcting upstream wind speed measurements via power curve') for row in power_df.itertuples(index=False): if getattr(row, upstream_turbine_power) < 3450: index_speed = min(range(len(power_smooth)), key=lambda i: abs(power_smooth[i]-getattr(row, upstream_turbine_power))) correct_windspeed = powercurve_windspeed_new[index_speed] if correct_windspeed < 3: corrected_Upstream_windspeed.append(0) if getattr(row, upstream_turbine_power) < 3450: index_speed = min(range(len(power_smooth)), key=lambda i: abs(power_smooth[i]-getattr(row, upstream_turbine_power))) correct_windspeed = powercurve_windspeed_new[index_speed] corrected_Upstream_windspeed.append(correct_windspeed) if getattr(row, upstream_turbine_power) >= 3450: correct_windspeed = getattr(row, upstream_turbine_windspeed) corrected_Upstream_windspeed.append(correct_windspeed) power_df["corrected_Upstream_windspeed"] = corrected_Upstream_windspeed #M10 windspeed correction via powercurve corrected_downstream_windspeed = [] downstream_turbine_power = behind+'_Grd_Prod_Pwr_Avg' downstream_turbine_windspeed = behind+'_Amb_WindSpeed_Avg' print('Correcting downstream wind speed measurements via power curve') print('') for row in power_df.itertuples(): if getattr(row, downstream_turbine_power) < 3450: index_speed = min(range(len(power_smooth)), key=lambda i: abs(power_smooth[i]-getattr(row, downstream_turbine_power))) correct_windspeed = powercurve_windspeed_new[index_speed] if correct_windspeed < 3: corrected_downstream_windspeed.append(0) if getattr(row, downstream_turbine_power) < 3450: index_speed = min(range(len(power_smooth)), key=lambda i: abs(power_smooth[i]-getattr(row, downstream_turbine_power))) correct_windspeed = powercurve_windspeed_new[index_speed] corrected_downstream_windspeed.append(correct_windspeed) if getattr(row, downstream_turbine_power) >= 3450: correct_windspeed = getattr(row, downstream_turbine_windspeed) corrected_downstream_windspeed.append(correct_windspeed) power_df["corrected_downstream_windspeed"] = corrected_downstream_windspeed # plt.figure(100) print('Calculating regression...') upstream_windspeed_corrected = np.asarray(power_df['corrected_Upstream_windspeed']) downstream_windspeed_corrected = np.asarray(power_df['corrected_downstream_windspeed']) upstream_windspeed_corrected = upstream_windspeed_corrected.reshape(-1,1) downstream_windspeed_corrected = downstream_windspeed_corrected.reshape(-1,1) ransac = linear_model.RANSACRegressor() ransac.fit(upstream_windspeed_corrected, downstream_windspeed_corrected) print('RANSAC estimator coefficient: ' + str(ransac.estimator_.coef_)) print('') print('') print('') # just want to append the raw x and y values (the upstream values etc) line_X = np.arange(upstream_windspeed_corrected.min(), upstream_windspeed_corrected.max())[:, np.newaxis] line_y_ransac = ransac.predict(line_X) # plt.scatter(upstream_windspeed_corrected,downstream_windspeed_corrected, # marker='x',s=5, label='Jensen Windspeed Prediction') # plt.plot(line_X, line_y_ransac, color='cornflowerblue', linewidth=2, # label='RANSAC regressor') # plt.legend(loc="upper left") return [line_X,line_y_ransac,upstream_windspeed_corrected,downstream_windspeed_corrected]