def test_ransac_final_model_fit_sample_weight(): X, y = make_regression(n_samples=1000, random_state=10) rng = check_random_state(42) sample_weight = rng.randint(1, 4, size=y.shape[0]) sample_weight = sample_weight / sample_weight.sum() ransac = RANSACRegressor(base_estimator=LinearRegression(), random_state=0) ransac.fit(X, y, sample_weight=sample_weight) final_model = LinearRegression() mask_samples = ransac.inlier_mask_ final_model.fit(X[mask_samples], y[mask_samples], sample_weight=sample_weight[mask_samples]) assert_allclose(ransac.estimator_.coef_, final_model.coef_, atol=1e-12)
def test_ransac_no_valid_model(): def is_model_valid(estimator, X, y): return False base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, is_model_valid=is_model_valid, max_trials=5) msg = "RANSAC could not find a valid consensus set" with pytest.raises(ValueError, match=msg): ransac_estimator.fit(X, y) assert ransac_estimator.n_skips_no_inliers_ == 0 assert ransac_estimator.n_skips_invalid_data_ == 0 assert ransac_estimator.n_skips_invalid_model_ == 5
def test_ransac_default_residual_threshold(): base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, random_state=0) # Estimate parameters of corrupted data ransac_estimator.fit(X, y) # Ground truth / reference inlier mask ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_).astype( np.bool_) ref_inlier_mask[outliers] = False assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
def get_models(models=dict()): # linear models models['linear regression'] = LinearRegression() models['lasso'] = Lasso() models['ridge'] = Ridge() models['elastic net'] = ElasticNet() models['huber regressor'] = HuberRegressor() #models['lars'] = Lars() models['lasso lars'] = LassoLars() models['passive aggressive regressor'] = PassiveAggressiveRegressor( max_iter=1000, tol=1e-3) models['ranscac regressor'] = RANSACRegressor(min_samples=4) models['sgd regressor'] = SGDRegressor(max_iter=5000, tol=1e-3) print('Defined %d models' % len(models)) return models
def test_ransac_exceed_max_skips(): def is_data_valid(X, y): return False base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, is_data_valid=is_data_valid, max_trials=5, max_skips=3) msg = ("RANSAC skipped more iterations than `max_skips`") assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y) assert ransac_estimator.n_skips_no_inliers_ == 0 assert ransac_estimator.n_skips_invalid_data_ == 4 assert ransac_estimator.n_skips_invalid_model_ == 0
def test_ransac_resid_thresh_no_inliers(): # When residual_threshold=0.0 there are no inliers and a # ValueError with a message should be raised base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=0.0, random_state=0, max_trials=5) msg = ("RANSAC could not find a valid consensus set") assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y) assert ransac_estimator.n_skips_no_inliers_ == 5 assert ransac_estimator.n_skips_invalid_data_ == 0 assert ransac_estimator.n_skips_invalid_model_ == 0
def test_ransac_sparse_csc(): X_sparse = sparse.csc_matrix(X) base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0) ransac_estimator.fit(X_sparse, y) ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_).astype( np.bool_) ref_inlier_mask[outliers] = False assert_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
def test_ransac_residual_metric(): residual_metric1 = lambda dy: np.sum(np.abs(dy), axis=1) residual_metric2 = lambda dy: np.sum(dy**2, axis=1) yyy = np.column_stack([y, y, y]) base_estimator = LinearRegression() ransac_estimator0 = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0) ransac_estimator1 = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0, residual_metric=residual_metric1) ransac_estimator2 = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0, residual_metric=residual_metric2) # multi-dimensional ransac_estimator0.fit(X, yyy) assert_warns(DeprecationWarning, ransac_estimator1.fit, X, yyy) assert_warns(DeprecationWarning, ransac_estimator2.fit, X, yyy) assert_array_almost_equal(ransac_estimator0.predict(X), ransac_estimator1.predict(X)) assert_array_almost_equal(ransac_estimator0.predict(X), ransac_estimator2.predict(X)) # one-dimensional ransac_estimator0.fit(X, y) assert_warns(DeprecationWarning, ransac_estimator2.fit, X, y) assert_array_almost_equal(ransac_estimator0.predict(X), ransac_estimator2.predict(X))
def test_ransac_score(): X = np.arange(100)[:, None] y = np.zeros((100, )) y[0] = 1 y[1] = 100 base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=0.5, random_state=0) ransac_estimator.fit(X, y) assert_equal(ransac_estimator.score(X[2:], y[2:]), 1) assert_less(ransac_estimator.score(X[:2], y[:2]), 1)
def _cfunc_ransac(x, y): """ Get random sample consensus (RANSAC) regression score for data set. Args: x: (list<float>) independent property (x-axis) y: (list<float>) dependent property (y-axis) Returns: (float) RANSAC score """ from sklearn.linear_model import RANSACRegressor r = RANSACRegressor(random_state=21) x_coeff = np.array(x)[:, np.newaxis] r.fit(x_coeff, y) return r.score(x_coeff, y)
def test_ransac_is_data_valid(): def is_data_valid(X, y): assert_equal(X.shape[0], 2) assert_equal(y.shape[0], 2) return False X = np.random.rand(10, 2) y = np.random.rand(10, 1) base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, is_data_valid=is_data_valid, random_state=0) assert_raises(ValueError, ransac_estimator.fit, X, y)
def test_ransac_min_n_samples(): base_estimator = LinearRegression() ransac_estimator1 = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0) ransac_estimator2 = RANSACRegressor(base_estimator, min_samples=2. / X.shape[0], residual_threshold=5, random_state=0) ransac_estimator3 = RANSACRegressor(base_estimator, min_samples=-1, residual_threshold=5, random_state=0) ransac_estimator4 = RANSACRegressor(base_estimator, min_samples=5.2, residual_threshold=5, random_state=0) ransac_estimator5 = RANSACRegressor(base_estimator, min_samples=2.0, residual_threshold=5, random_state=0) ransac_estimator6 = RANSACRegressor(base_estimator, residual_threshold=5, random_state=0) ransac_estimator7 = RANSACRegressor(base_estimator, min_samples=X.shape[0] + 1, residual_threshold=5, random_state=0) ransac_estimator1.fit(X, y) ransac_estimator2.fit(X, y) ransac_estimator5.fit(X, y) ransac_estimator6.fit(X, y) assert_array_almost_equal(ransac_estimator1.predict(X), ransac_estimator2.predict(X)) assert_array_almost_equal(ransac_estimator1.predict(X), ransac_estimator5.predict(X)) assert_array_almost_equal(ransac_estimator1.predict(X), ransac_estimator6.predict(X)) with pytest.raises(ValueError): ransac_estimator3.fit(X, y) with pytest.raises(ValueError): ransac_estimator4.fit(X, y) with pytest.raises(ValueError): ransac_estimator7.fit(X, y)
def fit_one_line(lines): X = [] Y = [] for line in lines: x1, y1, x2, y2 = line.reshape(4) X.append(x1) X.append(x2) Y.append(y1) Y.append(y2) X = np.array(X).reshape(-1, 1) Y = np.array(Y).reshape(-1, 1) ransac = RANSACRegressor() model = ransac.fit(X, Y) return np.array([model.estimator_.coef_, model.estimator_.intercept_])
def test_ransac_exceed_max_skips(): def is_data_valid(X, y): return False estimator = LinearRegression() ransac_estimator = RANSACRegressor(estimator, is_data_valid=is_data_valid, max_trials=5, max_skips=3) msg = "RANSAC skipped more iterations than `max_skips`" with pytest.raises(ValueError, match=msg): ransac_estimator.fit(X, y) assert ransac_estimator.n_skips_no_inliers_ == 0 assert ransac_estimator.n_skips_invalid_data_ == 4 assert ransac_estimator.n_skips_invalid_model_ == 0
def fit_poly(pts): pred = [] model = make_pipeline(PolynomialFeatures(1), RANSACRegressor()) try: model.fit(np.c_[pts[:, 0], pts[:, 2]], pts[:, 1][:, np.newaxis]) except: return [1] * pts.shape[0] y_hat = model.predict(np.c_[pts[:, 0], pts[:, 2]]) error = [(y_hat[i] - pts[i, 1])**2 for i in range(len(y_hat))] mean_error = np.mean(error) for term in error: if term > 10 * mean_error: pred.append(-1) else: pred.append(1) return pred
def compare_models(): estimators = [ SGDRegressor(loss='squared_loss', penalty='l2'), RANSACRegressor(LinearRegression()), ElasticNet(), Ridge(), DecisionTreeRegressor(), RandomForestRegressor(), GradientBoostingRegressor() ] estimators_names = [ "SGDRegressor", "RANSACRegressor", "ElasticNet", "Ridge", "DecisionTreeRegressor", "RandomForestRegressor", "GradientBoostingRegressor" ] for estimator, estimator_name in zip(estimators, estimators_names): clf = make_pipeline(StandardScaler(), estimator) c.train_score(estimator_name, clf, X_train, y_train)
def _fit_line(self,X,Y,line_type): """ Fits a robust line (robust to outliers) using RANSAC Regressor and returns two points from the line """ model = RANSACRegressor() model.fit(X, Y) pred = model.predict(X).astype(int) if line_type == 'vertical': model_line = np.array([X[0][0],pred[0][0],X[-1][0],pred[-1][0]]) #if vertical predict y coordinates elif line_type == 'horizontal': model_line = np.array([pred[0][0],X[0][0],pred[-1][0],X[-1][0]]) # if horizontal predict x coordinates else : raise ValueError("Argument line_type only takes the values 'horizontal' and 'vertical'") return model_line
def test_ransac_is_model_valid(): def is_model_valid(estimator, X, y): assert X.shape[0] == 2 assert y.shape[0] == 2 return False base_estimator = LinearRegression() ransac_estimator = RANSACRegressor( base_estimator, min_samples=2, residual_threshold=5, is_model_valid=is_model_valid, random_state=0, ) with pytest.raises(ValueError): ransac_estimator.fit(X, y)
def _align_ransac_inner(self, sp, mzs, ints): hits = join_by_mz( self.target_spectrum, 'mz', pd.DataFrame({ 'sample_mz': mzs, 'sample_ints': ints }), 'sample_mz', self.analyzer, self.align_sigma_1, ) if len(hits) > 10: ints = hits.sample_ints * np.median(hits.ints / hits.sample_ints) ints_accuracy = 0.5 - (ints / (ints + 1)) hits['weight'] = np.log(hits.sample_ints) * ints_accuracy hits = hits.sort_values('weight', ascending=False, ignore_index=True).iloc[:100] X = hits.sample_mz.values.reshape(-1, 1) y = hits.mz.values bins = np.histogram_bin_edges(X, 2) threshold = peak_width(X[:, 0], self.analyzer, self.jitter_sigma_1) ransac = RANSACRegressor( # max_trials=10000, min_samples=max(0.1, 3 / len(X)), residual_threshold=threshold, # Require subsets include values from both the higher and lower end of the mass range is_data_valid=lambda X_subset, y_subset: np.histogram( X_subset, bins)[0].all(), loss='absolute_loss', stop_probability=1, ) ransac.fit(X, y) return { 'sp': sp, 'M': ransac.estimator_.coef_[0], 'C': ransac.estimator_.intercept_, 'score': ransac.score(X, y), 'inliers': np.count_nonzero(ransac.inlier_mask_), 'align_peaks': len(hits), 'align_min': hits.mz.min(), 'align_max': hits.mz.max(), } else: return {'sp': sp, 'M': 1, 'C': 0, 'score': 0}
def S_RANSAC(x_points, y_points, y_min, y_max): x_points = np.array(x_points) y_points = np.array(y_points) y_points = y_points.reshape(len(y_points), 1) model_Sransac = make_pipeline(PolynomialFeatures(2), RANSACRegressor(random_state=42)) try: model_Sransac.fit(y_points, x_points) except ValueError: pass else: line_Y = np.arange(y_min, y_max) line_X_ransac = model_Sransac.predict(line_Y[:, np.newaxis]) return line_X_ransac
def icp(a, na, b, nb, chronos={}): from sklearn.neighbors import KDTree kdt = KDTree(a) chronostart = timer() nndist, nnidx = kdt.query(b) nn_b_in_a = a[nnidx[:, 0], :] chrono = timer() - chronostart chrono_name = "Nearest neighbors" chronos[chrono_name] = chrono print("{} : {} ms".format(chrono_name, 1000. * chrono)) normals_b_in_a = na[nnidx[:, 0], :] rotvec = np.cross(normals_b_in_a, nb, axis=-1) from sklearn.linear_model import RANSACRegressor ransac = RANSACRegressor() chronostart = timer() ransac.fit(np.zeros((len(rotvec), 1)), rotvec) bestrotvec = ransac.predict([[0]])[0] chrono = timer() - chronostart chrono_name = "RANSAC" chronos[chrono_name] = chrono print("{} : {} ms".format(chrono_name, 1000. * chrono)) norm = np.linalg.norm(bestrotvec) theta = np.arcsin(norm) / 2 vec = bestrotvec / norm costh = np.cos(theta) ncosth = 1 - costh sinth = np.sin(theta) ux = vec[0] uy = vec[1] uz = vec[2] R = np.array([[ costh + ux * ux * ncosth, ux * uy * ncosth - uz * sinth, ux * uz * ncosth + uy * sinth ], [ uy * ux * ncosth + uz * sinth, costh + uy * uy * ncosth, uy * uz * ncosth - ux * sinth ], [ uz * ux * ncosth - uy * sinth, uz * uy * ncosth + ux * sinth, costh + uz * uz * ncosth ]]) b_rot = R.dot(a.T).T return b_rot, R
def ransacregressor(X_train, X_test, y_train, y_test): from sklearn.linear_model import LinearRegression from sklearn.linear_model import RANSACRegressor ransac = RANSACRegressor(LinearRegression(), max_trials=100, min_samples=50, residual_threshold=5.0, random_state=1) ransac.fit(X_train, y_train) print('RANSAC Regressor') y_train_pred = ransac.predict(X_train) y_test_pred = ransac.predict(X_test) print('MSE train: %.3f, test: %.3f' % (mean_squared_error( y_train, y_train_pred), mean_squared_error(y_test, y_test_pred))) print('R^2 train: %.3f, test: %.3f' % (r2_score(y_train, y_train_pred), r2_score(y_test, y_test_pred))) return ransac
def get_models_multioutput(models=dict()): # linear models models['lr'] = MultiOutputRegressor(LinearRegression()) alpha = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] for a in alpha: models['lasso-' + str(a)] = MultiOutputRegressor(Lasso(alpha=a)) for a in alpha: models['ridge-' + str(a)] = MultiOutputRegressor(Ridge(alpha=a)) for a1 in alpha: for a2 in alpha: name = 'en-' + str(a1) + '-' + str(a2) models[name] = MultiOutputRegressor(ElasticNet(a1, a2)) models['huber'] = MultiOutputRegressor(HuberRegressor()) models['lars'] = MultiOutputRegressor(Lars()) models['llars'] = MultiOutputRegressor(LassoLars()) models['pa'] = MultiOutputRegressor( PassiveAggressiveRegressor(max_iter=1000, tol=1e-3)) models['ranscac'] = MultiOutputRegressor(RANSACRegressor()) models['sgd'] = MultiOutputRegressor(SGDRegressor(max_iter=1000, tol=1e-3)) models['theil'] = MultiOutputRegressor(TheilSenRegressor()) # non-linear models n_neighbors = range(1, 21) for k in n_neighbors: models['knn-' + str(k)] = MultiOutputRegressor( KNeighborsRegressor(n_neighbors=k)) models['cart'] = MultiOutputRegressor(DecisionTreeRegressor()) models['extra'] = MultiOutputRegressor(ExtraTreeRegressor()) models['svml'] = MultiOutputRegressor(SVR(kernel='linear')) models['svmp'] = MultiOutputRegressor(SVR(kernel='poly')) c_values = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] for c in c_values: models['svmr' + str(c)] = SVR(C=c) # ensemble models n_trees = 100 models['ada'] = MultiOutputRegressor( AdaBoostRegressor(n_estimators=n_trees)) models['bag'] = MultiOutputRegressor( BaggingRegressor(n_estimators=n_trees)) models['rf'] = MultiOutputRegressor( RandomForestRegressor(n_estimators=n_trees)) models['et'] = MultiOutputRegressor( ExtraTreesRegressor(n_estimators=n_trees)) models['gbm'] = MultiOutputRegressor( GradientBoostingRegressor(n_estimators=n_trees)) print('Defined %d models' % len(models)) return models
def get_model_from_name(model_name): model_map = { # Classifiers 'LogisticRegression': LogisticRegression(n_jobs=-2), 'RandomForestClassifier': RandomForestClassifier(n_jobs=-2), 'RidgeClassifier': RidgeClassifier(), 'GradientBoostingClassifier': GradientBoostingClassifier(), 'ExtraTreesClassifier': ExtraTreesClassifier(n_jobs=-1), 'AdaBoostClassifier': AdaBoostClassifier(n_estimators=10), 'SGDClassifier': SGDClassifier(n_jobs=-1), 'Perceptron': Perceptron(n_jobs=-1), 'PassiveAggressiveClassifier': PassiveAggressiveClassifier(), # Regressors # 'DeepLearningRegressor': KerasRegressor(build_fn=make_deep_learning_model, nb_epoch=10, batch_size=10, verbose=1), 'LinearRegression': LinearRegression(n_jobs=-2), 'RandomForestRegressor': RandomForestRegressor(n_jobs=-2), 'Ridge': Ridge(), 'ExtraTreesRegressor': ExtraTreesRegressor(n_jobs=-1), 'AdaBoostRegressor': AdaBoostRegressor(n_estimators=10), 'RANSACRegressor': RANSACRegressor(), 'GradientBoostingRegressor': GradientBoostingRegressor(presort=False), 'Lasso': Lasso(), 'ElasticNet': ElasticNet(), 'LassoLars': LassoLars(), 'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(), 'BayesianRidge': BayesianRidge(), 'ARDRegression': ARDRegression(), 'SGDRegressor': SGDRegressor(shuffle=False), 'PassiveAggressiveRegressor': PassiveAggressiveRegressor(shuffle=False), # Clustering 'MiniBatchKMeans': MiniBatchKMeans(n_clusters=8) } if xgb_installed: model_map['XGBClassifier'] = xgb.XGBClassifier(colsample_bytree=0.8, min_child_weight=5, subsample=1.0, learning_rate=0.1, n_estimators=200, nthread=-1) model_map['XGBRegressor'] = xgb.XGBRegressor(nthread=-1, n_estimators=200) return model_map[model_name]
def train_RANSACRegressionModel(X, y, base_estimator=None, min_samples=None, residual_threshold=None, is_data_valid=None, is_model_valid=None, max_trials=100, stop_n_inliers=inf, stop_score=inf, stop_probability=0.99, residual_metric=None, random_state=None): """ Train a RANSAC regression model """ model = RANSACRegressor(base_estimator=base_estimator, min_samples=min_samples, residual_threshold=residual_threshold, is_data_valid=is_data_valid, is_model_valid=is_model_valid, max_trials=max_trials, stop_n_inliers=stop_n_inliers, stop_score=stop_score, stop_probability=stop_probability, residual_metric=residual_metric, random_state=random_state) model = model.fit(X, y) return model
def test_ransac_custom_base_estimator(): base_estimator = DecisionTreeRegressor() estimator = RANSACRegressor(base_estimator=base_estimator, random_state=1) estimator.fit([[1], [2], [3]], [1, 2, 3]) assembler = RANSACModelAssembler(estimator) actual = assembler.assemble() expected = ast.IfExpr( ast.CompExpr( ast.FeatureRef(0), ast.NumVal(2.5), ast.CompOpType.LTE), ast.NumVal(2.0), ast.NumVal(3.0)) assert cmp_exprs(actual, expected)
def getTransformMatrix(coords1, coords2): """ Returns transformation matrix using the RANSAC algorithm on paired nodes Parameters ---------- coords1 : list of lists List containing coordinates of nodes in the lower resolution scan coords2 : list of lists List containing coordinates of nodes in the higher resolution scan """ reg = RANSACRegressor(random_state=0).fit(coords1, coords2) transMat = np.column_stack( [reg.estimator_.coef_, reg.estimator_.intercept_]) transMat = np.row_stack([transMat, [0, 0, 0, 1]]) inliers = reg.inlier_mask_ return transMat, inliers
def test_ransac_is_data_valid(): def is_data_valid(X, y): assert X.shape[0] == 2 assert y.shape[0] == 2 return False rng = np.random.RandomState(0) X = rng.rand(10, 2) y = rng.rand(10, 1) base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, is_data_valid=is_data_valid, random_state=0) with pytest.raises(ValueError): ransac_estimator.fit(X, y)
def test_ransac_multi_dimensional_targets(): base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0) # 3-D target values yyy = np.column_stack([y, y, y]) # Estimate parameters of corrupted data ransac_estimator.fit(X, yyy) # Ground truth / reference inlier mask ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_ ).astype(np.bool_) ref_inlier_mask[outliers] = False assert_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
def test_ransac_residuals_threshold_no_inliers(): # When residual_threshold=nan there are no inliers and a # ValueError with a message should be raised base_estimator = LinearRegression() ransac_estimator = RANSACRegressor( base_estimator, min_samples=2, residual_threshold=float("nan"), random_state=0, max_trials=5, ) msg = "RANSAC could not find a valid consensus set" with pytest.raises(ValueError, match=msg): ransac_estimator.fit(X, y) assert ransac_estimator.n_skips_no_inliers_ == 5 assert ransac_estimator.n_skips_invalid_data_ == 0 assert ransac_estimator.n_skips_invalid_model_ == 0