def test_ridge_classifier_cv_store_cv_values(scoring): x = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0], [1.0, 0.0]]) y = np.array([1, 1, 1, -1, -1]) n_samples = x.shape[0] alphas = [1e-1, 1e0, 1e1] n_alphas = len(alphas) scoring_ = make_scorer(scoring) if callable(scoring) else scoring r = RidgeClassifierCV(alphas=alphas, cv=None, store_cv_values=True, scoring=scoring_) # with len(y.shape) == 1 n_targets = 1 r.fit(x, y) assert r.cv_values_.shape == (n_samples, n_targets, n_alphas) # with len(y.shape) == 2 y = np.array([[1, 1, 1, -1, -1], [1, -1, 1, -1, 1], [-1, -1, 1, -1, -1]]).transpose() n_targets = y.shape[1] r.fit(x, y) assert r.cv_values_.shape == (n_samples, n_targets, n_alphas)
def linear_models(x_train, y_train): from sklearn.linear_model import LogisticRegression classifier1 = LogisticRegression(C=1.2, random_state=0, max_iter=1500) classifier1.fit(x_train, y_train) from sklearn.linear_model import PassiveAggressiveClassifier classifier2 = PassiveAggressiveClassifier() classifier2.fit(x_train, y_train) from sklearn.linear_model import RidgeClassifierCV classifier3 = RidgeClassifierCV() classifier3.fit(x_train, y_train) from sklearn.linear_model import SGDClassifier classifier4 = SGDClassifier() classifier4.fit(x_train, y_train) from sklearn.linear_model import Perceptron classifier5 = Perceptron() classifier5.fit(x_train, y_train) print('LogisticRegression training accuracy: ', classifier1.score(x_train, y_train)) print('PassiveAggressiveClassifier training accuracy: ', classifier2.score(x_train, y_train)) print('RidgeClassifierCV training accuracy: ', classifier3.score(x_train, y_train)) print('SGDClassifier training accuracy: ', classifier4.score(x_train, y_train)) print('Perceptron training accuracy: ', classifier5.score(x_train, y_train)) return classifier1, classifier2, classifier3, classifier4, classifier5
def run(train_file, test_file, num_seq, n_jobs): print("Load train data") y, s = load_data(train_file) print("Generate random features") ss = generate_features(s, num_seq) print("Generate automaton") A = ahocorasick.Automaton() for idx, f in enumerate(ss): A.add_word(f, (idx, f)) A.make_automaton() print("Extract Feautre Vectors of train data") fvec = create_fvec(s, A, n_jobs) print("Learn classifier") cls = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True) cls.fit(fvec, y) print("Load test data") y_test, s_test = load_data(test_file) print("Extract Feature Vector of test data") fvec_test = create_fvec(s_test, A, n_jobs) print("Predict") print(cls.score(fvec_test, y_test))
class TestServinator(unittest.TestCase): def setUp(self): data = load_iris() x_train, x_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=67, stratify=data.target) self.model = RidgeClassifierCV(normalize=True, scoring='logloss', cv=3, class_weight='balanced') self.model.fit(x_train, y_train) self.test_data = x_test self.test_target = y_test model_backend = ModelBackend(self.model) # self.app = servinator('test', model_backend).test_client() def test_json_to_model_input(self): raw_json = '''[{"end_date": "2014-10-08T14:52:44-04:00", "location": "Louisville, KY", "pledged": "70.0", "goal": "10000.0", "category": "Food", "author": "Joe Banet", "backers": "4", "blurb": "Krazy Joe's soon to be famous kimchi and bourbon barrels which have long ago been enjoyed come together at last. Bourbon aged kimchi!", "title": "Krazy Joe's Bourbon Barrel Kimchi", "full_text": "I like kimchi. I like to make kimchi. I think I'm pretty good at it. My goal is to create a Bourbon barrel aged kimchi and share it with the world. This is just a start to company that could greatly expand and diversify into many products that all have one common denominator...kimchi. Thank you for your interest and support! ; "}]''' expected_data = { 'author': {0: 'joe banet'}, 'backers': {0: '4'}, 'blurb': {0: 'krazy joes soon to be famous kimchi and bourbon barrels which have long ago been enjoyed come together at last bourbon aged kimchi'}, 'day': {0: 8}, 'dayofweek': {0: 2}, 'dayofyear': {0: 281}, 'full_text': {0: 'i like kimchi i like to make kimchi i think im pretty good at it my goal is to create a bourbon barrel aged kimchi and share it with the world this is just a start to company that could greatly expand and diversify into many products that all have one common denominator kimchi thank you for your interest and support'}, 'goal': {0: '10000.0'}, 'hour': {0: 18}, 'loc1': {0: ''}, 'loc2': {0: 'louisville'}, 'loc3': {0: 'ky'}, 'minute': {0: 52}, 'month': {0: 10}, 'pledged': {0: '70.0'}, 'title': {0: 'krazy joes bourbon barrel kimchi'}, 'weekday': {0: 2}, 'weekofyear': {0: 41}, 'year': {0: 2014}} df_json = _json_to_model_input(raw_json) df_expected = pd.DataFrame(expected_data) # Sort them for comparison df_json = df_json.loc[:, sorted(df_json.columns.values)] df_expected = df_expected.loc[:, sorted(df_expected.columns.values)] self.assertTrue(df_expected.equals(df_json)) def test_e2e(self): '''test full load and predict of training data and assert it matches offline result''' pass
def test_ridge_classifier_with_scoring(filter_, scoring, cv): # non-regression test for #14672 # check that RidgeClassifierCV works with all sort of scoring and # cross-validation scoring_ = make_scorer(scoring) if callable(scoring) else scoring clf = RidgeClassifierCV(scoring=scoring_, cv=cv) # Smoke test to check that fit/predict does not raise error clf.fit(filter_(X_iris), y_iris).predict(filter_(X_iris))
def _fit_estimator(self, rocket, X, y): transformed_x = rocket.fit_transform(X) ridge = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True) ridge.fit(transformed_x, y) return [ make_pipeline(rocket, ridge), transformed_x if self.save_transformed_data else None, ]
def ridge_classification(X_train, X_test, y_train, y_test): X_train, X_test = preprocess(X_train, X_test) from sklearn.linear_model import RidgeClassifierCV classifier = RidgeClassifierCV() classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) y_pred = np.round(y_pred).flatten() plot_model(classifier, X_train, y_train, y_test, y_pred, "RidgeClassifierCV")
def do_rcv(X_test, X_train, Y_train): # creating a classifier of loss function "hinge" and penalty function "l2" clf = RidgeClassifierCV() print "starts fitting" print clf.fit(X_train, Y_train) print "finished fitting, starts predictions" Y_pred = clf.predict(X_test) print "finished predictions" return Y_pred
def _fit_estimator(self, rocket, X, y): transformed_x = rocket.fit_transform(X) scaler = StandardScaler(with_mean=False) scaler.fit(transformed_x, y) ridge = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10)) ridge.fit(scaler.transform(transformed_x), y) return [ make_pipeline(rocket, scaler, ridge), transformed_x if self.save_transformed_data else None, ]
def ridge(X, X_train, X_val, y_train, y_val, X_test, y_test): model = RidgeClassifierCV(alphas=[1e-3, 1e-2, 1e-1, 1]) model.fit(X_train, y_train) sh = X.shape save_model(model, sh) tr_f1, val_f1, test_f1, tr_acc, val_acc, test_acc = model_performance( model, X_train, y_train, X_val, y_val, X_test, y_test) return tr_f1, val_f1, test_f1
def agent(path="./", dataset="", ratio=False, seg=0.75, folder="temp"): current_process().name = dataset start1 = time.time() train_x, train_y = load_from_tsfile_to_dataframe( f"{path}/{dataset}/{dataset}_TRAIN.ts") test_x, test_y = load_from_tsfile_to_dataframe( f"{path}/{dataset}/{dataset}_TEST.ts") print(f"{dataset}: Train Shape {train_x.shape}") print(f"{dataset}: Test Shape {test_x.shape}") scaler = StandardScaler() transform_time1 = time.time() mod_train = PAAStat(paa_=ratio, seg_=seg).transform(train_x.values) mod_train = scaler.fit(mod_train).transform(mod_train) mod_test = PAAStat(paa_=ratio, seg_=seg).transform(test_x.values) mod_test = scaler.transform(mod_test) transform_time2 = time.time() model = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True) train_time1 = time.time() model.fit(mod_train, train_y) preds = model.predict(mod_test) train_time2 = time.time() acc1 = accuracy_score(preds, test_y) * 100 end1 = time.time() print( f"Dataset: {dataset}, AccuracyRidge: {acc1}, Time taken: {(end1 - start1)/60}, " f"Transfrom_time: {(transform_time2-transform_time1)/60}, train_time: {(train_time2-train_time1)/60}" ) results = pd.DataFrame({ 'Dataset': dataset, 'AccuracyRidge': [acc1], 'Time (min)': [(end1 - start1) / 60], 'Transfrom_time (min)': [(transform_time2 - transform_time1) / 60], 'train_time (min)': [(train_time2 - train_time1) / 60] }) temp_path = './' + folder if not os.path.exists(temp_path): os.mkdir(temp_path) results.to_csv(os.path.join(temp_path + f'/{dataset}.csv'), index=False)
def _test_ridge_classifiers(filter_): n_classes = np.unique(y_iris).shape[0] n_features = X_iris.shape[1] for reg in (RidgeClassifier(), RidgeClassifierCV()): reg.fit(filter_(X_iris), y_iris) assert reg.coef_.shape == (n_classes, n_features) y_pred = reg.predict(filter_(X_iris)) assert np.mean(y_iris == y_pred) > .79 cv = KFold(5) reg = RidgeClassifierCV(cv=cv) reg.fit(filter_(X_iris), y_iris) y_pred = reg.predict(filter_(X_iris)) assert np.mean(y_iris == y_pred) >= 0.8
def test_class_weights_cv(): # Test class weights for cross validated ridge classifier. X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0], [1.0, 0.0]]) y = [1, 1, 1, -1, -1] reg = RidgeClassifierCV(class_weight=None, alphas=[.01, .1, 1]) reg.fit(X, y) # we give a small weights to class 1 reg = RidgeClassifierCV(class_weight={1: 0.001}, alphas=[.01, .1, 1, 10]) reg.fit(X, y) assert_array_equal(reg.predict([[-.2, 2]]), np.array([-1]))
def test_ridge_regression_custom_scoring(filter_, cv): # check that custom scoring is working as expected # check the tie breaking strategy (keep the first alpha tried) def _dummy_score(y_test, y_pred): return 0.42 alphas = np.logspace(-2, 2, num=5) clf = RidgeClassifierCV(alphas=alphas, scoring=make_scorer(_dummy_score), cv=cv) clf.fit(filter_(X_iris), y_iris) assert clf.best_score_ == pytest.approx(0.42) # In case of tie score, the first alphas will be kept assert clf.alpha_ == pytest.approx(alphas[0])
def train_model(X, Y): print "Training LR..." modelLR = LogisticRegression(penalty='l1', C=100, tol=1e-10) modelLR.fit(X.toarray(), Y) print "Training RC..." modelRC = RidgeClassifierCV(alphas=[ 0.1, 1., 10. ]) modelRC.fit(X.toarray(), Y) print "Training GBC..." modelGBC = GradientBoostingClassifier(subsample=0.5, max_depth=6, n_estimators=50) modelGBC.fit(X.toarray(), Y) return modelGBC, modelRC, modelLR
def generate_model(X, y, model='linear', regularizer='ridge'): if model == 'linear': if regularizer == 'ridge': clf = RidgeClassifierCV() else: raise ValueError("Unknown Regularizer") elif model == 'logistic': clf = LogisticRegressionCV() elif model == 'svm': clf = SGDClassifier() else: raise ValueError("Unexpected Model Type") clf.fit(X, y) return clf
def main(): train_input = pd.read_csv('../input/train.csv') test_input = pd.read_csv('../input/test.csv') data = pd.concat([train_input, test_input]) # We don't have data on whether person is delinquint featurizer = CreditScoreFeaturizer() # Create our own features print "Transforming dataset into features..." ##Create matrix of features from raw dataset X = featurizer.fit_transform(data) X_train = X[:len(train_input)] X_test = X[len(train_input):] ## Use any model that we might find appropriate model = RidgeClassifierCV(alphas=[ 0.1, 1., 10. ]) ##Create the object and set relevant parameters #model = LogisticRegression(C=10) # Can also switch different models (e.g. Ridge) ##Set target variable y y = train_input.SeriousDlqin2yrs print "Cross validating..." print np.mean(cross_val_score(model, X_train, y, scoring='roc_auc')) # Scoring metric is now AUC print "Training final model..." model = model.fit(X_train, y)
class ROCKET(): def __init__(self, num_kernels=100): self.num_kernels = num_kernels def train(self, X_train, Y_train): _ = generate_kernels(100, 10) apply_kernels(np.zeros_like(X_train)[:, 1:], _) input_length = X_train.shape[1] self.kernels = generate_kernels(input_length, self.num_kernels) X_transform = apply_kernels(X_train, self.kernels) self.classifier = RidgeClassifierCV(alphas=10**np.linspace(-3, 3, 10), normalize=True) return self.classifier.fit(X_transform, Y_train) def test(self, X_test, Y_test): X_transform = apply_kernels(X_test, self.kernels) results = self.classifier.score(X_transform, Y_test) return results
class _RidgeClassifierCVImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X) def decision_function(self, X): return self._wrapped_model.decision_function(X)
class Classifier_Rocket: def __init__(self, output_directory, input_shape, nb_classes, verbose): if verbose: print("[Rocket] Creating Rocket classifier") self.verbose = verbose self.output_directory = output_directory self.input_shape = input_shape self.nb_classes = nb_classes def fit(self, Ximg_train, yimg_train): start_time = time.time() if self.verbose: print('[Rocket] Generating kernels') self.kernels = generate_kernels(Ximg_train.shape[1], 10000, Ximg_train.shape[2]) if self.verbose: print('[Rocket] Applying kernels') X_training_transform = apply_kernels(Ximg_train, self.kernels) if self.verbose: print('[Rocket] Training') self.classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True) self.classifier.fit(X_training_transform, yimg_train) self.duration = time.time() - start_time if self.verbose: print('[Rocket] Training done!, took {}s'.format(self.duration)) def predict(self, Ximg, yimg): if self.verbose: print('[Rocket] Predicting') X_test_transform = apply_kernels(Ximg, self.kernels) model_metrics, conf_mat, y_true, y_pred = predict_model( self.classifier, X_test_transform, yimg, self.output_directory) df_metrics = calculate_metrics(y_true, y_pred, self.duration) df_metrics.to_csv(self.output_directory + 'df_metrics.csv', index=False) if self.verbose: print('[Rocket] Prediction done!') return model_metrics, conf_mat
def run(training_data, test_data, num_runs=10, num_kernels=10_000): results = np.zeros(num_runs) timings = np.zeros( [4, num_runs]) # training transform, test transform, training, test Y_training, X_training = training_data[:, 0].astype(np.int), training_data[:, 1:] Y_test, X_test = test_data[:, 0].astype(np.int), test_data[:, 1:] for i in range(num_runs): input_length = X_training.shape[1] kernels = generate_kernels(input_length, num_kernels) # -- transform training ------------------------------------------------ time_a = time.perf_counter() X_training_transform = apply_kernels(X_training, kernels) time_b = time.perf_counter() timings[0, i] = time_b - time_a # -- transform test ---------------------------------------------------- time_a = time.perf_counter() X_test_transform = apply_kernels(X_test, kernels) time_b = time.perf_counter() timings[1, i] = time_b - time_a # -- training ---------------------------------------------------------- time_a = time.perf_counter() classifier = RidgeClassifierCV(alphas=10**np.linspace(-3, 3, 10), normalize=True) classifier.fit(X_training_transform, Y_training) time_b = time.perf_counter() timings[2, i] = time_b - time_a # -- test -------------------------------------------------------------- time_a = time.perf_counter() results[i] = classifier.score(X_test_transform, Y_test) time_b = time.perf_counter() timings[3, i] = time_b - time_a return results, timings
def predict_by_core(root, X_test, y_test): leaves = [ node for node in LevelOrderIter(root) if node.is_leaf and len(node.y) > 0 ] X_arch_core = np.array( [leaf.w.squeeze() for leaf in leaves if len(leaf.y) > 0]) y_arch_core = [] for leaf in leaves: if len(leaf.y) > 0: unique_y, count_y = np.unique(leaf.y, return_counts=True) amax = np.argmax(count_y) y_arch_core.append(unique_y[amax]) y_arch_core = np.array(y_arch_core) model = RidgeClassifierCV() model.fit(X_arch_core, y_arch_core) accuracy = model.score(X_test, y_test) # print ('# samples: %d' %(len(y_arch_core))) # # model = Sequential() # model.add(Dense(3, input_dim=X_arch_core.shape[1])) # model.add(Activation('relu')) # model.add(Dense(3)) # model.add(Activation('relu')) # # # For a multi-class classification problem # model.compile(optimizer='rmsprop', # loss='categorical_crossentropy', # metrics=['accuracy']) # # # Convert labels to categorical one-hot encoding # one_hot_y = to_categorical(y_arch_core, num_classes=len(np.unique(y_arch_core))) # # Train the model, iterating on the data in batches of 32 samples # model.fit(X_arch_core, one_hot_y, epochs=10, batch_size=32) # # y_pred = model.predict(X_test, batch_size=100) # y_pred1D = y_pred.argmax(1) # # print ('Accuracy on validation data: %.2f' %(accuracy)) return accuracy, leaves
def _train_probas_for_estimator(self, y, idx): rs = 255 if self.random_state == 0 else self.random_state rs = None if self.random_state is None else rs * 37 * (idx + 1) rng = check_random_state(rs) indices = range(self.n_instances_) subsample = rng.choice(self.n_instances_, size=self.n_instances_) oob = [n for n in indices if n not in subsample] clf = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True) clf.fit(self.transformed_data_[idx].iloc[subsample], y[subsample]) preds = clf.predict(self.transformed_data_[idx].iloc[oob]) results = np.zeros((self.n_instances_, self.n_classes_)) for n, pred in enumerate(preds): results[oob[n]][self._class_dictionary[pred]] += self.weights_[idx] return results, oob
def CITE_RIDGE(ax, targCell, numFactors=10, RNA=False): """Fits a ridge classifier to the CITE data and plots those most highly correlated with T reg""" ridgeMod = RidgeClassifierCV() if RNA: RIDGE_DF = importRNACITE() else: RIDGE_DF = importCITE() cellToI = RIDGE_DF.CellType2.unique() RIDGE_DF = RIDGE_DF.loc[(RIDGE_DF["CellType2"].isin(cellToI)), :] cellTypeCol = RIDGE_DF.CellType2.values RIDGE_DF = RIDGE_DF.loc[:, ((RIDGE_DF.columns != 'CellType1') & (RIDGE_DF.columns != 'CellType2') & (RIDGE_DF.columns != 'CellType3') & (RIDGE_DF.columns != 'Cell'))] factors = RIDGE_DF.columns X = RIDGE_DF.values X = StandardScaler().fit_transform(X) le = LabelEncoder() le.fit(cellTypeCol) y = le.transform(cellTypeCol) ridgeMod = RidgeClassifierCV(cv=5) ridgeMod.fit(X, y) TargCoefs = ridgeMod.coef_[np.where(le.classes_ == targCell), :].ravel() TargCoefsDF = pd.DataFrame({ "Marker": factors, "Coefficient": TargCoefs }).sort_values(by="Coefficient") TargCoefsDF = TargCoefsDF.tail(numFactors) sns.barplot(data=TargCoefsDF, x="Marker", y="Coefficient", ax=ax, color='k') ax.set_xticklabels(ax.get_xticklabels(), rotation=45) if RNA: ax.set(title="RIDGE Coefficients - RNA") else: ax.set(title="RIDGE Coefficients - Surface Markers") return TargCoefsDF
def main(): train_input = pd.read_csv('train.csv') test_input = pd.read_csv('test.csv') data = pd.concat([train_input, test_input]) featurizer = CreditScoreFeaturizer() print "Transforming dataset into features..." ##Create matrix of features from raw dataset X = featurizer.fit_transform(data) X_train = X[:len(train_input)] X_test = X[len(train_input):] ## Use any model that we might find appropriate model = RidgeClassifierCV(alphas=[ 0.1, 1., 10. ]) ##Create the object and set relevant parameters #model = LogisticRegression(C=10) ##Set target variable y y = train_input.SeriousDlqin2yrs print "Cross validating..." print np.mean(cross_val_score(model, X_train, y, scoring='roc_auc', cv=10)) print "Training final model..." model = model.fit(X_train, y) n_models=5 bag_size=0.70 models = [LogisticRegression(C=10) for _ in xrange(n_models)] model = Bagging(models, bag_size) #Fit Final Model model.fit(X_train, y) print "Create predictions on submission set..." create_submission(model, X_test, test_input)
def _ridgeclassifiercv(*, train, test, x_predict=None, metrics, alphas=(0.1, 1.0, 10.0), fit_intercept=True, normalize=False, scoring=None, cv=None, class_weight=None, store_cv_values=False): """For for info visit : https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RidgeClassifierCV.html#sklearn.linear_model.RidgeClassifierCV """ model = RidgeClassifierCV(alphas=alphas, fit_intercept=fit_intercept, normalize=normalize, scoring=scoring, cv=cv, class_weight=class_weight, store_cv_values=store_cv_values) model.fit(train[0], train[1]) model_name = 'RidgeClassifierCV' y_hat = model.predict(test[0]) if metrics == 'f1_score': accuracy = f1_score(test[1], y_hat) if metrics == 'jaccard_score': accuracy = jaccard_score(test[1], y_hat) if metrics == 'accuracy_score': accuracy = accuracy_score(test[1], y_hat) if x_predict is None: return (model_name, accuracy, None) y_predict = model.predict(x_predict) return (model_name, accuracy, y_predict)
def test_rocket_on_gunpoint(): """Test of Rocket on gun point.""" # load training data X_training, Y_training = load_gunpoint(split="train", return_X_y=True) # 'fit' ROCKET -> infer data dimensions, generate random kernels ROCKET = Rocket(num_kernels=10_000) ROCKET.fit(X_training) # transform training data X_training_transform = ROCKET.transform(X_training) # test shape of transformed training data -> (number of training # examples, num_kernels * 2) np.testing.assert_equal(X_training_transform.shape, (len(X_training), 20_000)) # fit classifier classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True) classifier.fit(X_training_transform, Y_training) # load test data X_test, Y_test = load_gunpoint(split="test", return_X_y=True) # transform test data X_test_transform = ROCKET.transform(X_test) # test shape of transformed test data -> (number of test examples, # num_kernels * 2) np.testing.assert_equal(X_test_transform.shape, (len(X_test), 20_000)) # predict (alternatively: 'classifier.score(X_test_transform, Y_test)') predictions = classifier.predict(X_test_transform) accuracy = accuracy_score(predictions, Y_test) # test predictions (on Gunpoint, should be 100% accurate) assert accuracy == 1.0
def test_minirocket_multivariate_on_basic_motions(): """Test of MiniRocketMultivariate on basic motions.""" # load training data X_training, Y_training = load_basic_motions(split="train", return_X_y=True) # 'fit' MINIROCKET -> infer data dimensions, generate random kernels minirocket = MiniRocketMultivariate() minirocket.fit(X_training) # transform training data X_training_transform = minirocket.transform(X_training) # test shape of transformed training data -> (number of training # examples, nearest multiple of 84 < 10,000) np.testing.assert_equal(X_training_transform.shape, (len(X_training), 9_996)) # fit classifier classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True) classifier.fit(X_training_transform, Y_training) # load test data X_test, Y_test = load_basic_motions(split="test", return_X_y=True) # transform test data X_test_transform = minirocket.transform(X_test) # test shape of transformed test data -> (number of test examples, # nearest multiple of 84 < 10,000) np.testing.assert_equal(X_test_transform.shape, (len(X_test), 9_996)) # predict (alternatively: 'classifier.score(X_test_transform, Y_test)') predictions = classifier.predict(X_test_transform) accuracy = accuracy_score(predictions, Y_test) # test predictions (on BasicMotions, should be 100% accurate) assert accuracy == 1.0
def test_multirocket_on_gunpoint(): """Test of MultiRocket on gun point.""" # load training data X_training, Y_training = load_gunpoint(split="train", return_X_y=True) # 'fit' MultiRocket -> infer data dimensions, generate random kernels multirocket = MultiRocket() multirocket.fit(X_training) # transform training data X_training_transform = multirocket.transform(X_training) # test shape of transformed training data -> (number of training # examples, nearest multiple of 4*84=336 < 50,000 (2*4*6_250)) np.testing.assert_equal(X_training_transform.shape, (len(X_training), 49_728)) # fit classifier classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True) classifier.fit(X_training_transform, Y_training) # load test data X_test, Y_test = load_gunpoint(split="test", return_X_y=True) # transform test data X_test_transform = multirocket.transform(X_test) # test shape of transformed test data -> (number of test examples, # nearest multiple of 4*84=336 < 50,000 (2*4*6_250)) np.testing.assert_equal(X_test_transform.shape, (len(X_test), 49_728)) # predict (alternatively: 'classifier.score(X_test_transform, Y_test)') predictions = classifier.predict(X_test_transform) accuracy = accuracy_score(predictions, Y_test) # test predictions (on Gunpoint, should be > 99% accurate) assert accuracy > 0.99
def Classify(Circles, Rects, model_name='RG'): X = [] y = [] for i in Circles: X.append(i.position) y.append(0) for i in Rects: X.append(i.position) y.append(1) X = np.array(X) y = np.array(y) for i in X: i[0] -= 400 i[1] -= 300 if model_name == 'RG': model = RidgeClassifierCV() if model_name == 'LR': model = LogisticRegression() model.fit(X, y) coef = model.coef_ intercept = model.intercept_ print(model.score(X, y)) return draw_classifier_helper([intercept[0], coef[0][0], coef[0][1]])
def sklearn_ridge_cv(Xtrain,Ytrain,Xtest,Ytest,*args,**kwargs): clf = RidgeClassifierCV(fit_intercept=True) clf.fit(Xtrain,Ytrain) return clf.score(Xtest,Ytest)
lr = LogisticRegression(solver='lbfgs', multi_class='multinomial') lr.fit(X_train, Y_train) Y_lr = lr.predict(X_test) print(accuracy_score(Y_test, Y_lr)) # In[14]: svc = SVC(C=1.0, kernel='rbf') svc.fit(X_train, Y_train) Y_SVC = svc.predict(X_test) accuracy_score(Y_test, Y_SVC) # In[15]: rc = RidgeClassifierCV(alphas=(0.1, 1.0, 10.0)) rc.fit(X_train, Y_train) Y_rc = rc.predict(X_test) rc.decision_function(X_test) # In[16]: tr = DecisionTreeClassifier(criterion='gini') tr.fit(X_train, Y_train) Y_tr = tr.predict(X_test) accuracy_score(Y_test, Y_tr) # In[17]: rf = RandomForestClassifier(n_estimators=10, criterion='gini') rf.fit(X_train, Y_train) Y_rf = rf.predict(X_test)
time_b = time.perf_counter() _timings[0, i] = time_b - time_a # -- transform test ---------------------------------------------------- time_a = time.perf_counter() X_test_transform = apply_kernels(X_test, kernels) time_b = time.perf_counter() _timings[1, i] = time_b - time_a # -- training ---------------------------------------------------------- time_a = time.perf_counter() classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True) classifier.fit(X_training_transform, Y_training) time_b = time.perf_counter() _timings[2, i] = time_b - time_a # -- test -------------------------------------------------------------- time_a = time.perf_counter() _results[i] = classifier.score(X_test_transform, Y_test) time_b = time.perf_counter() _timings[3, i] = time_b - time_a print("Done.") # -- store results --------------------------------------------------------- results.loc[dataset_name, "accuracy_mean"] = _results.mean()
rgr = RadiusNeighborsRegressor() forest = RandomForestRegressor(n_estimators = 100, n_jobs = 2, oob_score=True) adaboost = AdaBoostRegressor() nb = GaussianNB() rd = RidgeClassifierCV() kf = KFold(report.shape[0], n_folds = 5) for train_index, test_index in kf: #print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = variables.ix[list(train_index),], variables.ix[list(test_index),] y_train = report['survey_participant'].ix[list(train_index),] y_test = report['survey_participant'].ix[list(test_index),] forest.fit(X_train,y_train) adaboost.fit(X_train,y_train) gdc.fit(X_train, y_train) rd.fit(X_train, y_train) rgr.fit(X_train, y_train) nb.fit(X_train, y_train) lr.fit(X_train, y_train) et.fit(X_train, y_train) #print forest.feature_importances_ y_hat = list(gdc.predict(X_test)) print 'GDC', sum((y_hat-y_test)**2)/float(len(y_test)) y_hat = list(rd.predict(X_test)) print 'RD', sum((y_hat-y_test)**2)/float(len(y_test)) y_hat = list(et.predict(X_test)) print 'ET', sum((y_hat-y_test)**2)/float(len(y_test)) y_hat = list(lr.predict(X_test)) print 'LR', sum((y_hat-y_test)**2)/float(len(y_test)) y_hat = list(forest.predict(X_test)) print 'RFRegressor', sum(((y_hat)-y_test)**2)/float(len(y_test))
if __name__ == "__main__": # generate some fake data, split, and scale X, y = make_classification(n_samples=1000, n_informative=5, n_redundant=6, random_state=4) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4) scaler = StandardScaler().fit(X_train) X_train_standard = scaler.transform(X_train) X_test_standard = scaler.transform(X_test) # specify classifiers ridge = RidgeClassifierCV(alphas=np.logspace(-3, 1, 20)) lasso = LogisticRegressionCV(Cs=np.logspace(-3, 1, num=20)) forest = RandomForestClassifier(n_estimators=100, n_jobs=-1) # train the classifiers ridge.fit(X_train_standard, y_train) lasso.fit(X_train_standard, y_train) forest.fit(X_train, y_train) # predicted values ridge_preds = ridge.predict(X_test_standard) lasso_preds = lasso.predict(X_test_standard) forest_preds = forest.predict(X_test) # confusion matrices c1 = confusion_matrix(y_test, ridge_preds) c2 = confusion_matrix(y_test, lasso_preds) c3 = confusion_matrix(y_test, forest_preds) # build a plot to compare results preds = [ridge_preds, lasso_preds, forest_preds]