def ransac_linear_regression(X, y, draw=False): from sklearn.linear_model import RANSACRegressor from sklearn.linear_model import LinearRegression try: reg = RANSACRegressor(random_state=0).fit(X, y) except ValueError: try: reg = LinearRegression().fit(X, y) except: return None prediction = reg.predict if draw: plt.xlim([0, 1920]) plt.ylim([0, 1000]) plt.scatter(X, y, color='yellowgreen', marker='.', label='Inliers') line_y = prediction(X) plt.plot(X, line_y, color='navy', label='Linear regressor') plt.show() if reg.score(X, y) > 0 or np.sum(reg.inlier_mask_) >= 0.8 * reg.inlier_mask_.shape[0]: return prediction else: logger.debug("Regression score was too low ({}) and only {}% inliers, not accepting result." .format(round(reg.score(X, y), 2), 100 * round(np.sum(reg.inlier_mask_) / reg.inlier_mask_.shape[0], 2)))
def RANSAC_Regressor(): lr = LinearRegression(normalize=True) lr.fit(X_train, y_train) rs = RANSACRegressor(lr) rs.fit(X, y) rs.score(X, y) rs.estimator_.intercept_ rs.estimator_.coef_ rs_score = rs.score(X, y) rs_score = cross_val_score(rs, X, y, cv=16, scoring='r2') print("RANSAC_Regressor : ", rs_score.mean())
def test_ransac_score(): X = np.arange(100)[:, None] y = np.zeros((100,)) y[0] = 1 y[1] = 100 base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=0.5, random_state=0) ransac_estimator.fit(X, y) assert_equal(ransac_estimator.score(X[2:], y[2:]), 1) assert_less(ransac_estimator.score(X[:2], y[:2]), 1)
def test_ransac_score(): X = np.arange(100)[:, None] y = np.zeros((100, )) y[0] = 1 y[1] = 100 base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=0.5, random_state=0) ransac_estimator.fit(X, y) assert_equal(ransac_estimator.score(X[2:], y[2:]), 1) assert_less(ransac_estimator.score(X[:2], y[:2]), 1)
def test_only_fit_contains_sample_weight(): mlflow.sklearn.autolog() from sklearn.linear_model import RANSACRegressor assert "sample_weight" in _get_arg_names(RANSACRegressor.fit) assert "sample_weight" not in _get_arg_names(RANSACRegressor.score) mock_obj = mock.Mock() def mock_score(self, X, y): # pylint: disable=unused-argument mock_obj(X, y) return 0 assert inspect.signature( RANSACRegressor.score) == inspect.signature(mock_score) RANSACRegressor.score = mock_score model = RANSACRegressor() X, y = get_iris() with mlflow.start_run() as run: model.fit(X, y) mock_obj.assert_called_once_with(X, y) run_id = run.info.run_id params, metrics, tags, artifacts = get_run_data(run_id) assert params == truncate_dict( stringify_dict_values(model.get_params(deep=True))) assert {TRAINING_SCORE: model.score(X, y)}.items() <= metrics.items() assert tags == get_expected_class_tags(model) assert MODEL_DIR in artifacts assert_predict_equal(load_model_by_run_id(run_id), model, X)
def make_forecast(local_array, local_mf_forecast_horizon_days, local_days_in_focus_frame): local_forecast = [] # simple normalization days = np.array([day for day in range(local_days_in_focus_frame)]) days = np.divide(days, np.amax(days)) x_y_data = np.zeros(shape=(days.shape[0], 2), dtype=np.dtype('float32')) x_y_data[:, 0] = days for local_time_serie in range(local_array.shape[0]): x_y_data[:, 1] = local_array[local_time_serie, :] x = x_y_data[:, 0].reshape(-1, 1) y = x_y_data[:, 1].reshape(-1, ) y_max = np.amax(y) y = np.divide(y, y_max * (y_max != 0) + 1 * (y_max == 0)) regression = RANSACRegressor(base_estimator=ARDRegression(), min_samples=29, max_trials=2000, random_state=0, loss='squared_loss', residual_threshold=2.0).fit(x, y) score = regression.score(x, y) print('time_serie, score of RANdom SAmple Consensus algorithm', local_time_serie, score) forecast_days = np.add(days, local_mf_forecast_horizon_days )[-local_mf_forecast_horizon_days:].reshape( -1, 1) local_forecast_ts = regression.predict(forecast_days) local_forecast.append(local_forecast_ts) local_forecast = np.array(local_forecast) # simple denormalization local_array_max = np.amax(local_array, axis=1) local_forecast = np.multiply( local_forecast, local_array_max.reshape(local_array_max.shape[0], 1)) print('local_forecast shape:', local_forecast.shape) return local_forecast
def fnRANSACRegressor(self, year, avgTemp, predictYear): feature_train, feature_test, target_train, target_test = train_test_split( year, avgTemp, test_size=0.1, random_state=42) rr = RANSACRegressor() rr.fit(feature_train[:, np.newaxis], target_train) return (rr.score(feature_test[:, np.newaxis], target_test), rr.predict(predictYear))
def _cfunc_ransac(x, y): """ Get random sample consensus (RANSAC) regression score for data set. Args: x: (list<float>) independent property (x-axis) y: (list<float>) dependent property (y-axis) Returns: (float) RANSAC score """ from sklearn.linear_model import RANSACRegressor r = RANSACRegressor(random_state=21) x_coeff = np.array(x)[:, np.newaxis] r.fit(x_coeff, y) return r.score(x_coeff, y)
def _align_ransac_inner(self, sp, mzs, ints): hits = join_by_mz( self.target_spectrum, 'mz', pd.DataFrame({ 'sample_mz': mzs, 'sample_ints': ints }), 'sample_mz', self.analyzer, self.align_sigma_1, ) if len(hits) > 10: ints = hits.sample_ints * np.median(hits.ints / hits.sample_ints) ints_accuracy = 0.5 - (ints / (ints + 1)) hits['weight'] = np.log(hits.sample_ints) * ints_accuracy hits = hits.sort_values('weight', ascending=False, ignore_index=True).iloc[:100] X = hits.sample_mz.values.reshape(-1, 1) y = hits.mz.values bins = np.histogram_bin_edges(X, 2) threshold = peak_width(X[:, 0], self.analyzer, self.jitter_sigma_1) ransac = RANSACRegressor( # max_trials=10000, min_samples=max(0.1, 3 / len(X)), residual_threshold=threshold, # Require subsets include values from both the higher and lower end of the mass range is_data_valid=lambda X_subset, y_subset: np.histogram( X_subset, bins)[0].all(), loss='absolute_loss', stop_probability=1, ) ransac.fit(X, y) return { 'sp': sp, 'M': ransac.estimator_.coef_[0], 'C': ransac.estimator_.intercept_, 'score': ransac.score(X, y), 'inliers': np.count_nonzero(ransac.inlier_mask_), 'align_peaks': len(hits), 'align_min': hits.mz.min(), 'align_max': hits.mz.max(), } else: return {'sp': sp, 'M': 1, 'C': 0, 'score': 0}
def run(self, trainingDasaset, plotting): dataset = trainingDasaset accuracy = 0 y = dataset['int_rate'] X = dataset.drop(columns=[ 'int_rate', ]) if plotting == True: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) clf = RANSACRegressor(random_state=42) #clf=self.gridSearch(clf,X_train, y_train) clf.fit(X_train, y_train) print( "###################################RANSACRegressor#############################" ) accuracy = clf.score(X_test, y_test) #pred = clf.predict(X_test) #accuracy = np.sqrt(metrics.mean_squared_error(y_test,pred)) print("score:" + str(accuracy)) else: clf = RANSACRegressor(random_state=42) #clf=self.gridSearch(clf,X_train, y_train) clf.fit(X, y) testData = pd.read_csv( "./SiameseNeuralNetworkProject/MachineLearningAlgorithmSuite/CleanedData/SiameseTrainingData.csv" ) predictions = clf.predict(testData) np.savetxt( "./SiameseNeuralNetworkProject/MachineLearningAlgorithmSuite/OutputFiles/RANSACRegressorPredictions.csv", predictions, delimiter=",") testData = pd.read_csv( "./SiameseNeuralNetworkProject/MachineLearningAlgorithmSuite/CleanedData/OverallTestingData.csv" ) predictions = clf.predict(testData) np.savetxt( "./SiameseNeuralNetworkProject/MachineLearningAlgorithmSuite/OutputFiles/RANSACRegressorPredictionsTestData.csv", predictions, delimiter=",") return accuracy
def model_linear3(frame): # """ # ML model applying Linear Regression to all features using RANSAC. # The outliers influence significantly linear regression, RANSAC will # select only inliers when fitting the model. # input: Pandas dataframe to use in modelling # output: (score, error, execution time) # """ print("\n\n************************************************************") print("MODEL Linear regression ransac") y = frame.iloc[:, -1] #target x = frame.iloc[:, :-7] print("features=", x.columns) #split train and test set validation_size = 0.3 X_train, X_test, y_train, y_test = train_test_split( x, y, test_size=validation_size) ransac = RANSACRegressor(LinearRegression(), max_trials=100, min_samples=50, loss='absolute_loss', residual_threshold=5.0, random_state=0) t0 = time.time() ransac.fit(X_train, y_train) execution = time.time() - t0 y_pred = ransac.predict(X_test) l_reg = ransac.score(X_test, y_test) #show results of regression model print("score linear ransac: %.4f" % l_reg) regression_error = mean_absolute_error(y_test, y_pred) print("regression_error: %.4f" % regression_error) return l_reg, regression_error, execution
- Adjusted R² """ print('R² linear reg.: %.2f' % lin.score(X, Y)) #R² print('MSE linear reg.: %.2f' % mean_squared_error(y_test, y_predlin)) # MSE print('R²: %.2f' % ridge.score(X, Y)) #R² print('MSE: %.2f' % mean_squared_error(y_test, y_ridge)) # MSE print(lasso.score(X, Y)) #R² print('MSE: %.2f' % mean_squared_error(y_test, y_lasso)) # MSE print('R²: %.2f' % elast.score(X, Y)) #R² print('MSE: %.2f' % mean_squared_error(y_test, y_predelast)) print('R²: %.2f' % ransac.score(X, Y)) #R² print('MSE: %.2f' % mean_squared_error(y_test, y_predransac)) print('R²: %.2f' % ts.score(X, Y)) #R² print('MSE: %.2f' % mean_squared_error(y_test, y_predts)) print('R²: %.2f' % huber.score(X, Y)) #R² print('MSE: %.2f' % mean_squared_error(y_test, y_predhuber)) """# Classification models""" # 5.2.1 Logistic Regression log = LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial') pred_log = log.fit(X_train, y_train).predict( X_test
def process_scan(self): # if there are no laser scans bail out if self.current_message is None: rospy.logwarn('no lidar scans received') return # to hold message to be published ld = lidar_data() # get last message msg = self.current_message # copy range readings from message to numpy array range_readings = np.array(msg.ranges) # get angle ranges from message angles = np.arange(msg.angle_min, msg.angle_max, msg.angle_increment) # patch warning: for some reason the simulation and physical units return messages of different sizes min_length = min(len(angles), len(range_readings)) range_readings = range_readings[:min_length] angles = angles[:min_length] # polar to Cartesian coordinates change. Notice x, z flipping (because angle is measured towards the z axis, # and not towards x axis) z, x = polar2cart(range_readings, angles) # TODO: ugly patch!! if self.simulated_radar: x = -x else: z = -z # restrict to places where we have finite readings finite_indices = np.isfinite(x) & np.isfinite(z) x, z = x[finite_indices], z[finite_indices] # sort by x sorted_indices = np.argsort(x) x, z = x[sorted_indices], z[sorted_indices] # check that both x and z are not empty if len(x) * len(z) == 0: rospy.logerr("invalid laser scan readings") return # locations assumed to be the ground ground_indices = np.where( (x > LidarProcess.GROUND_PLANE_ESTIMATION_MIN_DISTANCE) & (x < LidarProcess.GROUND_PLANE_ESTIMATION_MAX_DISTANCE)) ground_x, ground_z = x[ground_indices], z[ground_indices] if min(len(ground_x), len(ground_z)) == 0: rospy.logwarn('unable to detect ground readings') return # estimate ground line using RANSAC try: ransac = RANSACRegressor(random_state=0).fit( ground_x.reshape(-1, 1), ground_z) except ValueError as e: rospy.logwarn('RANSAC error {}'.format(e)) return # keep ransac score ransac_score = np.abs(ransac.score(ground_x.reshape(-1, 1), ground_z)) # if readings do not fit a line then report error if ransac_score > LidarProcess.GROUND_PLANE_ESTIMATION_THRESHOLD: rospy.logwarn( 'unable to detect ground. score is {}'.format(ransac_score)) return # estimate line at all x's l = ransac.predict(x.reshape(-1, 1)) # record lidar height (minus sign because the lidar is above the floor) ld.lidar_height = -ransac.predict([[0]])[0] # floor angle inclination = np.arctan((l[0] - l[-1]) / (x[-1] - x[0])) ld.floor_inclination_degrees = np.rad2deg(inclination) # rotation matrix to make ground level R = np.array([[np.cos(inclination), -np.sin(inclination)], [np.sin(inclination), np.cos(inclination)]]) # transform readings T = np.matmul(R, np.vstack((x, z))) # subtract ground height T[1, :] += ld.lidar_height # get real heights (after the rotations, etc.) heights = T[1, :] # disregard the ground indices # if something bad happened there then hopefully we'll see it in the ransac score heights[ground_indices] = 0 # difference between successive x measurements. stick a zero at the beginning to preserve the length of the # vector diff_x = np.append([0], np.abs(np.diff(T[0, :]))) # where are there either obstacles or gaps in X or large heights bad_indices = np.where( np.bitwise_or(heights > LidarProcess.MAX_HEIGHT_ABOVE_FLOOR, diff_x > LidarProcess.MAX_FLOOR_X_DIFF))[0] # if none found if len(bad_indices) == 0: ld.visible_floor_distance = 1.0 else: # find first place with obstacles or gaps first_idx = max([0, np.min(bad_indices) - 1]) # floor is visible where there is no large gap in x and no obstacle in z ld.visible_floor_distance = T[0][first_idx] if self.visualize: from matplotlib import pylab as plt from drawnow import drawnow, figure def visualize(): #plt.figure(1) # plt.subplot(1, 1, 1) #plt.scatter(x, z) #plt.scatter(ground_x, ground_z, color='blue') #plt.plot(x, ransac.predict(x.reshape(-1, 1)), color='red') #plt.figure(1) # plt.subplot(1, 1, 1) #plt.plot(range_readings) #plt.figure(2) #plt.plot(angles) #plt.plot(x, ransac.predict(x.reshape(-1, 1)), color='red') plt.figure(1) # plt.subplot(1, 1, 1) plt.scatter(T[0, :], T[1, :]) # plt.scatter(x[1:], diff_z, color='green') # plt.scatter(T[0], T[1], color='green') plt.plot(ld.visible_floor_distance, 0, 'bx') plt.xlim([-0.5, 6]) plt.ylim([-1, 3]) drawnow(visualize) # publish results print("FLoor : " + str(ld.visible_floor_distance)) self.lidar_proc.publish(ld)
def fit_RANSAC(features_train, labels_train, features_pred): model = RANSACRegressor() model.fit(features_train, labels_train) labels_pred = model.predict(features_pred) print "RANSAC - coefficient of determination R^2 of the prediction: ", model.score(features_train, labels_train) return labels_pred
# predicted_values = model.predict(housing_data_input[:10]) # actual_values = housing_data_output[:10] # print(np.sqrt((predicted_values -actual_values) ** 2)) ransac = RANSACRegressor(LinearRegression(), min_samples=50, max_trials=100, residual_threshold=5.0) ransac.fit(housing_data_input, housing_data_output) # Check the accuracy score inliers = housing_data_input[ransac.inlier_mask_] outliers = housing_data_input[~ransac.inlier_mask_] print( "Inliers RANSAC Model score: ", ransac.score(inliers, housing_data_output[ransac.inlier_mask_]), ) print( "Outliers RANSAC Model score: ", ransac.score(outliers, housing_data_output[~ransac.inlier_mask_]), ) for column_label in housing_data_input.columns: # print(column_label) plt.title(column_label) plt.scatter( inliers[column_label], housing_data_output[ransac.inlier_mask_], label="inlier original data", ) # plt.scatter(inliers[column_label], model.predict(inliers), label = "inlier fitted data")
import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error, r2_score from sklearn.metrics import classification_report, confusion_matrix #loading the dataset train = pd.read_csv("C:/Users/HP/Desktop/train (1).csv") test = pd.read_csv("C:/Users/HP/Desktop/test (2).csv") train = train.dropna() test = test.dropna() train.head() X_train = np.array(train.iloc[:, :-1].values) y_train = np.array(train.iloc[:, 1].values) X_test = np.array(test.iloc[:, :-1].values) y_test = np.array(test.iloc[:, 1].values) #RANSAC Regressor from sklearn.linear_model import RANSACRegressor model = RANSACRegressor() model.fit(X_train, y_train) y_pred = model.predict(X_test) accuracy = model.score(X_test, y_test) plt.plot(X_train, model.predict(X_train), color='r') plt.show() print(accuracy) print(accuracy)
is_model_valid=None, max_trials=100, stop_n_inliers=inf, stop_score=inf, stop_probability=0.99, residual_metric=None, loss='absolute_loss', random_state=None) rg_2 = TheilSenRegressor(fit_intercept=True, copy_X=True, max_subpopulation=10000.0, n_subsamples=None, max_iter=300, tol=0.001, random_state=None, n_jobs=1, verbose=False) rg_3 = HuberRegressor(epsilon=1.35, max_iter=100, alpha=0.0001, warm_start=False, fit_intercept=True, tol=1e-05) rg_1.fit(X_train, Y_train) rg_2.fit(X_train, Y_train) rg_3.fit(X_train, Y_train) rg_1.score(X_test, Y_test) rg_2.score(X_test, Y_test) rg_3.score(X_test, Y_test)
regressor.fit(X_train, y_train) lr = LinearRegression(normalize=True) lr.fit(X_train, y_train) lr_score = lr.score(X_test, y_test) print("LR : ", lr_score) #LR with cross_val_score lr_scores_cv = cross_val_score(lr, X, Y, cv=10, scoring='r2') print("LR with cv :", lr_scores_cv.mean()) #RANSAC_Regressor RS lr = LinearRegression(normalize=True) lr.fit(X_train, y_train) rs = RANSACRegressor(lr) rs.fit(X, Y) rs.score(X, Y) rs.estimator_.intercept_ rs.estimator_.coef_ rs_score = rs.score(X, Y) rs_score = cross_val_score(rs, X, Y, cv=10, scoring='r2') print("RS: ", rs_score.mean()) #Ridge_and_Lasso -> RG rg = Ridge(alpha=0.001, normalize=True) rg = RidgeCV(alphas=(1.0, 0.1, 0.01, 0.005, 0.0025, 0.001, 0.00025), normalize=True) rg.fit(X, Y) rg_scores = cross_val_score(rg, X, Y, cv=10, scoring='r2') score = rg_scores.mean() print("RG : ", score)