Example #1
0
def test_ridge_classifier_cv_store_cv_values(scoring):
    x = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0], [1.0,
                                                                     0.0]])
    y = np.array([1, 1, 1, -1, -1])

    n_samples = x.shape[0]
    alphas = [1e-1, 1e0, 1e1]
    n_alphas = len(alphas)

    scoring_ = make_scorer(scoring) if callable(scoring) else scoring

    r = RidgeClassifierCV(alphas=alphas,
                          cv=None,
                          store_cv_values=True,
                          scoring=scoring_)

    # with len(y.shape) == 1
    n_targets = 1
    r.fit(x, y)
    assert r.cv_values_.shape == (n_samples, n_targets, n_alphas)

    # with len(y.shape) == 2
    y = np.array([[1, 1, 1, -1, -1], [1, -1, 1, -1, 1], [-1, -1, 1, -1,
                                                         -1]]).transpose()
    n_targets = y.shape[1]
    r.fit(x, y)
    assert r.cv_values_.shape == (n_samples, n_targets, n_alphas)
Example #2
0
def linear_models(x_train, y_train):
    from sklearn.linear_model import LogisticRegression
    classifier1 = LogisticRegression(C=1.2, random_state=0, max_iter=1500)
    classifier1.fit(x_train, y_train)

    from sklearn.linear_model import PassiveAggressiveClassifier
    classifier2 = PassiveAggressiveClassifier()
    classifier2.fit(x_train, y_train)

    from sklearn.linear_model import RidgeClassifierCV
    classifier3 = RidgeClassifierCV()
    classifier3.fit(x_train, y_train)

    from sklearn.linear_model import SGDClassifier
    classifier4 = SGDClassifier()
    classifier4.fit(x_train, y_train)

    from sklearn.linear_model import Perceptron
    classifier5 = Perceptron()
    classifier5.fit(x_train, y_train)

    print('LogisticRegression training accuracy: ',
          classifier1.score(x_train, y_train))
    print('PassiveAggressiveClassifier training accuracy: ',
          classifier2.score(x_train, y_train))
    print('RidgeClassifierCV training accuracy: ',
          classifier3.score(x_train, y_train))
    print('SGDClassifier training accuracy: ',
          classifier4.score(x_train, y_train))
    print('Perceptron training accuracy: ',
          classifier5.score(x_train, y_train))

    return classifier1, classifier2, classifier3, classifier4, classifier5
Example #3
0
def run(train_file, test_file, num_seq, n_jobs):
    print("Load train data")
    y, s = load_data(train_file)

    print("Generate random features")
    ss = generate_features(s, num_seq)

    print("Generate automaton")
    A = ahocorasick.Automaton()
    for idx, f in enumerate(ss):
        A.add_word(f, (idx, f))
    A.make_automaton()

    print("Extract Feautre Vectors of train data")
    fvec = create_fvec(s, A, n_jobs)

    print("Learn classifier")
    cls = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
    cls.fit(fvec, y)

    print("Load test data")
    y_test, s_test = load_data(test_file)
    print("Extract Feature Vector of test data")
    fvec_test = create_fvec(s_test, A, n_jobs)

    print("Predict")
    print(cls.score(fvec_test, y_test))
Example #4
0
class TestServinator(unittest.TestCase):
    def setUp(self):
        data = load_iris()
        x_train, x_test, y_train, y_test = train_test_split(data.data,
                                                            data.target,
                                                            test_size=0.2,
                                                            random_state=67,
                                                            stratify=data.target)

        self.model = RidgeClassifierCV(normalize=True,
                                       scoring='logloss',
                                       cv=3,
                                       class_weight='balanced')
        self.model.fit(x_train, y_train)
        self.test_data = x_test
        self.test_target = y_test

        model_backend = ModelBackend(self.model)

        # self.app = servinator('test', model_backend).test_client()

    def test_json_to_model_input(self):
        raw_json = '''[{"end_date": "2014-10-08T14:52:44-04:00", "location": "Louisville, KY", "pledged": "70.0", "goal": "10000.0", "category": "Food", "author": "Joe Banet", "backers": "4", "blurb": "Krazy Joe's soon to be famous kimchi and bourbon barrels which have long ago been enjoyed come together at last. Bourbon aged kimchi!", "title": "Krazy Joe's Bourbon Barrel Kimchi", "full_text": "I like kimchi. I like to make kimchi. I think I'm pretty good at it. My goal is to create a Bourbon barrel aged kimchi and share it with the world. This is just a start to company that could greatly expand and diversify into many products that all have one common denominator...kimchi. Thank you for your interest and support! ; "}]'''

        expected_data = {
                 'author': {0: 'joe banet'},
                 'backers': {0: '4'},
                 'blurb': {0: 'krazy joes soon to be famous kimchi and bourbon barrels which have long ago been enjoyed come together at last bourbon aged kimchi'},
                 'day': {0: 8},
                 'dayofweek': {0: 2},
                 'dayofyear': {0: 281},
                 'full_text': {0: 'i like kimchi i like to make kimchi i think im pretty good at it my goal is to create a bourbon barrel aged kimchi and share it with the world this is just a start to company that could greatly expand and diversify into many products that all have one common denominator kimchi thank you for your interest and support'},
                 'goal': {0: '10000.0'},
                 'hour': {0: 18},
                 'loc1': {0: ''},
                 'loc2': {0: 'louisville'},
                 'loc3': {0: 'ky'},
                 'minute': {0: 52},
                 'month': {0: 10},
                 'pledged': {0: '70.0'},
                 'title': {0: 'krazy joes bourbon barrel kimchi'},
                 'weekday': {0: 2},
                 'weekofyear': {0: 41},
                 'year': {0: 2014}}

        df_json = _json_to_model_input(raw_json)
        df_expected = pd.DataFrame(expected_data)

        # Sort them for comparison
        df_json = df_json.loc[:, sorted(df_json.columns.values)]
        df_expected = df_expected.loc[:, sorted(df_expected.columns.values)]

        self.assertTrue(df_expected.equals(df_json))


    def test_e2e(self):
        '''test full load and predict of training data and assert it matches
        offline result'''
        pass
Example #5
0
def test_ridge_classifier_with_scoring(filter_, scoring, cv):
    # non-regression test for #14672
    # check that RidgeClassifierCV works with all sort of scoring and
    # cross-validation
    scoring_ = make_scorer(scoring) if callable(scoring) else scoring
    clf = RidgeClassifierCV(scoring=scoring_, cv=cv)
    # Smoke test to check that fit/predict does not raise error
    clf.fit(filter_(X_iris), y_iris).predict(filter_(X_iris))
Example #6
0
 def _fit_estimator(self, rocket, X, y):
     transformed_x = rocket.fit_transform(X)
     ridge = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
     ridge.fit(transformed_x, y)
     return [
         make_pipeline(rocket, ridge),
         transformed_x if self.save_transformed_data else None,
     ]
def ridge_classification(X_train, X_test, y_train, y_test):
    X_train, X_test = preprocess(X_train, X_test)
    from sklearn.linear_model import RidgeClassifierCV
    classifier = RidgeClassifierCV()
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    y_pred = np.round(y_pred).flatten()
    plot_model(classifier, X_train, y_train, y_test, y_pred,
               "RidgeClassifierCV")
Example #8
0
def do_rcv(X_test, X_train, Y_train):
    # creating a classifier of loss function "hinge" and penalty function "l2"
    clf = RidgeClassifierCV()
    print "starts fitting"
    print clf.fit(X_train, Y_train)
    print "finished fitting, starts predictions"
    Y_pred = clf.predict(X_test)
    print "finished predictions"
    return Y_pred
Example #9
0
 def _fit_estimator(self, rocket, X, y):
     transformed_x = rocket.fit_transform(X)
     scaler = StandardScaler(with_mean=False)
     scaler.fit(transformed_x, y)
     ridge = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
     ridge.fit(scaler.transform(transformed_x), y)
     return [
         make_pipeline(rocket, scaler, ridge),
         transformed_x if self.save_transformed_data else None,
     ]
Example #10
0
def ridge(X, X_train, X_val, y_train, y_val, X_test, y_test):

    model = RidgeClassifierCV(alphas=[1e-3, 1e-2, 1e-1, 1])
    model.fit(X_train, y_train)

    sh = X.shape
    save_model(model, sh)

    tr_f1, val_f1, test_f1, tr_acc, val_acc, test_acc = model_performance(
        model, X_train, y_train, X_val, y_val, X_test, y_test)
    return tr_f1, val_f1, test_f1
Example #11
0
def agent(path="./", dataset="", ratio=False, seg=0.75, folder="temp"):

    current_process().name = dataset

    start1 = time.time()
    train_x, train_y = load_from_tsfile_to_dataframe(
        f"{path}/{dataset}/{dataset}_TRAIN.ts")
    test_x, test_y = load_from_tsfile_to_dataframe(
        f"{path}/{dataset}/{dataset}_TEST.ts")

    print(f"{dataset}: Train Shape {train_x.shape}")
    print(f"{dataset}: Test Shape {test_x.shape}")

    scaler = StandardScaler()

    transform_time1 = time.time()

    mod_train = PAAStat(paa_=ratio, seg_=seg).transform(train_x.values)
    mod_train = scaler.fit(mod_train).transform(mod_train)

    mod_test = PAAStat(paa_=ratio, seg_=seg).transform(test_x.values)
    mod_test = scaler.transform(mod_test)

    transform_time2 = time.time()
    model = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
    train_time1 = time.time()
    model.fit(mod_train, train_y)
    preds = model.predict(mod_test)
    train_time2 = time.time()

    acc1 = accuracy_score(preds, test_y) * 100

    end1 = time.time()
    print(
        f"Dataset: {dataset}, AccuracyRidge: {acc1}, Time taken: {(end1 - start1)/60}, "
        f"Transfrom_time: {(transform_time2-transform_time1)/60}, train_time: {(train_time2-train_time1)/60}"
    )

    results = pd.DataFrame({
        'Dataset':
        dataset,
        'AccuracyRidge': [acc1],
        'Time (min)': [(end1 - start1) / 60],
        'Transfrom_time (min)': [(transform_time2 - transform_time1) / 60],
        'train_time (min)': [(train_time2 - train_time1) / 60]
    })

    temp_path = './' + folder
    if not os.path.exists(temp_path):
        os.mkdir(temp_path)
    results.to_csv(os.path.join(temp_path + f'/{dataset}.csv'), index=False)
Example #12
0
def _test_ridge_classifiers(filter_):
    n_classes = np.unique(y_iris).shape[0]
    n_features = X_iris.shape[1]
    for reg in (RidgeClassifier(), RidgeClassifierCV()):
        reg.fit(filter_(X_iris), y_iris)
        assert reg.coef_.shape == (n_classes, n_features)
        y_pred = reg.predict(filter_(X_iris))
        assert np.mean(y_iris == y_pred) > .79

    cv = KFold(5)
    reg = RidgeClassifierCV(cv=cv)
    reg.fit(filter_(X_iris), y_iris)
    y_pred = reg.predict(filter_(X_iris))
    assert np.mean(y_iris == y_pred) >= 0.8
Example #13
0
def test_class_weights_cv():
    # Test class weights for cross validated ridge classifier.
    X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0], [1.0,
                                                                     0.0]])
    y = [1, 1, 1, -1, -1]

    reg = RidgeClassifierCV(class_weight=None, alphas=[.01, .1, 1])
    reg.fit(X, y)

    # we give a small weights to class 1
    reg = RidgeClassifierCV(class_weight={1: 0.001}, alphas=[.01, .1, 1, 10])
    reg.fit(X, y)

    assert_array_equal(reg.predict([[-.2, 2]]), np.array([-1]))
Example #14
0
def test_ridge_regression_custom_scoring(filter_, cv):
    # check that custom scoring is working as expected
    # check the tie breaking strategy (keep the first alpha tried)

    def _dummy_score(y_test, y_pred):
        return 0.42

    alphas = np.logspace(-2, 2, num=5)
    clf = RidgeClassifierCV(alphas=alphas,
                            scoring=make_scorer(_dummy_score),
                            cv=cv)
    clf.fit(filter_(X_iris), y_iris)
    assert clf.best_score_ == pytest.approx(0.42)
    # In case of tie score, the first alphas will be kept
    assert clf.alpha_ == pytest.approx(alphas[0])
def train_model(X, Y):
    print "Training LR..."
    modelLR = LogisticRegression(penalty='l1', C=100, tol=1e-10)
    modelLR.fit(X.toarray(), Y)
    
    print "Training RC..."
    modelRC = RidgeClassifierCV(alphas=[ 0.1, 1., 10. ])
    modelRC.fit(X.toarray(), Y)
    
    print "Training GBC..."
    modelGBC = GradientBoostingClassifier(subsample=0.5, max_depth=6, n_estimators=50)
    modelGBC.fit(X.toarray(), Y)

    
    return modelGBC, modelRC, modelLR
Example #16
0
def generate_model(X, y, model='linear', regularizer='ridge'):
    if model == 'linear':
        if regularizer == 'ridge':
            clf = RidgeClassifierCV()
        else:
            raise ValueError("Unknown Regularizer")
    elif model == 'logistic':
        clf = LogisticRegressionCV()
    elif model == 'svm':
        clf = SGDClassifier()
    else:
        raise ValueError("Unexpected Model Type")

    clf.fit(X, y)
    return clf
Example #17
0
def main():
  train_input = pd.read_csv('../input/train.csv')
  test_input = pd.read_csv('../input/test.csv') 
  data = pd.concat([train_input, test_input])
  # We don't have data on whether person is delinquint

  featurizer = CreditScoreFeaturizer() # Create our own features
  
  print "Transforming dataset into features..."
  ##Create matrix of features from raw dataset
  X = featurizer.fit_transform(data)
  X_train = X[:len(train_input)]
  X_test = X[len(train_input):]

  ## Use any model that we might find appropriate
  model = RidgeClassifierCV(alphas=[ 0.1, 1., 10. ])

  ##Create the object and set relevant parameters
  #model = LogisticRegression(C=10) # Can also switch different models (e.g. Ridge)

  ##Set target variable y
  y = train_input.SeriousDlqin2yrs

  print "Cross validating..."
  print np.mean(cross_val_score(model, X_train, y, scoring='roc_auc')) # Scoring metric is now AUC

  print "Training final model..."
  model = model.fit(X_train, y)
Example #18
0
class ROCKET():
    def __init__(self, num_kernels=100):

        self.num_kernels = num_kernels

    def train(self, X_train, Y_train):

        _ = generate_kernels(100, 10)
        apply_kernels(np.zeros_like(X_train)[:, 1:], _)

        input_length = X_train.shape[1]

        self.kernels = generate_kernels(input_length, self.num_kernels)
        X_transform = apply_kernels(X_train, self.kernels)
        self.classifier = RidgeClassifierCV(alphas=10**np.linspace(-3, 3, 10),
                                            normalize=True)

        return self.classifier.fit(X_transform, Y_train)

    def test(self, X_test, Y_test):

        X_transform = apply_kernels(X_test, self.kernels)
        results = self.classifier.score(X_transform, Y_test)

        return results
Example #19
0
class _RidgeClassifierCVImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)

    def decision_function(self, X):
        return self._wrapped_model.decision_function(X)
class Classifier_Rocket:
    def __init__(self, output_directory, input_shape, nb_classes, verbose):
        if verbose:
            print("[Rocket] Creating Rocket classifier")

        self.verbose = verbose
        self.output_directory = output_directory
        self.input_shape = input_shape
        self.nb_classes = nb_classes

    def fit(self, Ximg_train, yimg_train):
        start_time = time.time()
        if self.verbose:
            print('[Rocket] Generating kernels')
        self.kernels = generate_kernels(Ximg_train.shape[1], 10000,
                                        Ximg_train.shape[2])

        if self.verbose:
            print('[Rocket] Applying kernels')
        X_training_transform = apply_kernels(Ximg_train, self.kernels)

        if self.verbose:
            print('[Rocket] Training')
        self.classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10),
                                            normalize=True)
        self.classifier.fit(X_training_transform, yimg_train)

        self.duration = time.time() - start_time
        if self.verbose:
            print('[Rocket] Training done!, took {}s'.format(self.duration))

    def predict(self, Ximg, yimg):
        if self.verbose:
            print('[Rocket] Predicting')
        X_test_transform = apply_kernels(Ximg, self.kernels)

        model_metrics, conf_mat, y_true, y_pred = predict_model(
            self.classifier, X_test_transform, yimg, self.output_directory)

        df_metrics = calculate_metrics(y_true, y_pred, self.duration)
        df_metrics.to_csv(self.output_directory + 'df_metrics.csv',
                          index=False)

        if self.verbose:
            print('[Rocket] Prediction done!')
        return model_metrics, conf_mat
Example #21
0
def run(training_data, test_data, num_runs=10, num_kernels=10_000):
    results = np.zeros(num_runs)
    timings = np.zeros(
        [4, num_runs])  # training transform, test transform, training, test

    Y_training, X_training = training_data[:,
                                           0].astype(np.int), training_data[:,
                                                                            1:]
    Y_test, X_test = test_data[:, 0].astype(np.int), test_data[:, 1:]

    for i in range(num_runs):
        input_length = X_training.shape[1]
        kernels = generate_kernels(input_length, num_kernels)

        # -- transform training ------------------------------------------------

        time_a = time.perf_counter()
        X_training_transform = apply_kernels(X_training, kernels)
        time_b = time.perf_counter()
        timings[0, i] = time_b - time_a

        # -- transform test ----------------------------------------------------

        time_a = time.perf_counter()
        X_test_transform = apply_kernels(X_test, kernels)
        time_b = time.perf_counter()
        timings[1, i] = time_b - time_a

        # -- training ----------------------------------------------------------

        time_a = time.perf_counter()
        classifier = RidgeClassifierCV(alphas=10**np.linspace(-3, 3, 10),
                                       normalize=True)
        classifier.fit(X_training_transform, Y_training)
        time_b = time.perf_counter()
        timings[2, i] = time_b - time_a

        # -- test --------------------------------------------------------------

        time_a = time.perf_counter()
        results[i] = classifier.score(X_test_transform, Y_test)
        time_b = time.perf_counter()
        timings[3, i] = time_b - time_a

    return results, timings
Example #22
0
def predict_by_core(root, X_test, y_test):

    leaves = [
        node for node in LevelOrderIter(root)
        if node.is_leaf and len(node.y) > 0
    ]

    X_arch_core = np.array(
        [leaf.w.squeeze() for leaf in leaves if len(leaf.y) > 0])
    y_arch_core = []
    for leaf in leaves:
        if len(leaf.y) > 0:
            unique_y, count_y = np.unique(leaf.y, return_counts=True)
            amax = np.argmax(count_y)
            y_arch_core.append(unique_y[amax])
    y_arch_core = np.array(y_arch_core)

    model = RidgeClassifierCV()
    model.fit(X_arch_core, y_arch_core)
    accuracy = model.score(X_test, y_test)

    #	print ('# samples: %d' %(len(y_arch_core)))
    #
    #	model = Sequential()
    #	model.add(Dense(3, input_dim=X_arch_core.shape[1]))
    #	model.add(Activation('relu'))
    #	model.add(Dense(3))
    #	model.add(Activation('relu'))
    #
    #	# For a multi-class classification problem
    #	model.compile(optimizer='rmsprop',
    #	              loss='categorical_crossentropy',
    #	              metrics=['accuracy'])
    #
    #	# Convert labels to categorical one-hot encoding
    #	one_hot_y = to_categorical(y_arch_core, num_classes=len(np.unique(y_arch_core)))
    #	# Train the model, iterating on the data in batches of 32 samples
    #	model.fit(X_arch_core, one_hot_y, epochs=10, batch_size=32)
    #
    #	y_pred = model.predict(X_test, batch_size=100)
    #	y_pred1D = y_pred.argmax(1)
    #
    #	print ('Accuracy on validation data: %.2f' %(accuracy))

    return accuracy, leaves
Example #23
0
    def _train_probas_for_estimator(self, y, idx):
        rs = 255 if self.random_state == 0 else self.random_state
        rs = None if self.random_state is None else rs * 37 * (idx + 1)
        rng = check_random_state(rs)

        indices = range(self.n_instances_)
        subsample = rng.choice(self.n_instances_, size=self.n_instances_)
        oob = [n for n in indices if n not in subsample]

        clf = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
        clf.fit(self.transformed_data_[idx].iloc[subsample], y[subsample])
        preds = clf.predict(self.transformed_data_[idx].iloc[oob])

        results = np.zeros((self.n_instances_, self.n_classes_))
        for n, pred in enumerate(preds):
            results[oob[n]][self._class_dictionary[pred]] += self.weights_[idx]

        return results, oob
Example #24
0
def CITE_RIDGE(ax, targCell, numFactors=10, RNA=False):
    """Fits a ridge classifier to the CITE data and plots those most highly correlated with T reg"""
    ridgeMod = RidgeClassifierCV()
    if RNA:
        RIDGE_DF = importRNACITE()
    else:
        RIDGE_DF = importCITE()
    cellToI = RIDGE_DF.CellType2.unique()
    RIDGE_DF = RIDGE_DF.loc[(RIDGE_DF["CellType2"].isin(cellToI)), :]
    cellTypeCol = RIDGE_DF.CellType2.values
    RIDGE_DF = RIDGE_DF.loc[:, ((RIDGE_DF.columns != 'CellType1') &
                                (RIDGE_DF.columns != 'CellType2') &
                                (RIDGE_DF.columns != 'CellType3') &
                                (RIDGE_DF.columns != 'Cell'))]
    factors = RIDGE_DF.columns
    X = RIDGE_DF.values
    X = StandardScaler().fit_transform(X)

    le = LabelEncoder()
    le.fit(cellTypeCol)
    y = le.transform(cellTypeCol)

    ridgeMod = RidgeClassifierCV(cv=5)
    ridgeMod.fit(X, y)
    TargCoefs = ridgeMod.coef_[np.where(le.classes_ == targCell), :].ravel()
    TargCoefsDF = pd.DataFrame({
        "Marker": factors,
        "Coefficient": TargCoefs
    }).sort_values(by="Coefficient")
    TargCoefsDF = TargCoefsDF.tail(numFactors)
    sns.barplot(data=TargCoefsDF,
                x="Marker",
                y="Coefficient",
                ax=ax,
                color='k')
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
    if RNA:
        ax.set(title="RIDGE Coefficients - RNA")
    else:
        ax.set(title="RIDGE Coefficients - Surface Markers")
    return TargCoefsDF
def main():
  train_input = pd.read_csv('train.csv')
  test_input = pd.read_csv('test.csv')
  data = pd.concat([train_input, test_input])

  featurizer = CreditScoreFeaturizer()

  print "Transforming dataset into features..."
  ##Create matrix of features from raw dataset
  X = featurizer.fit_transform(data)
  X_train = X[:len(train_input)]
  X_test = X[len(train_input):]

  ## Use any model that we might find appropriate
  model = RidgeClassifierCV(alphas=[ 0.1, 1., 10. ])

  ##Create the object and set relevant parameters
  #model = LogisticRegression(C=10)

  ##Set target variable y
  y = train_input.SeriousDlqin2yrs

  print "Cross validating..."
  print np.mean(cross_val_score(model, X_train, y, scoring='roc_auc', cv=10))

  print "Training final model..."
  model = model.fit(X_train, y)


  n_models=5
  bag_size=0.70

  models = [LogisticRegression(C=10) for _ in xrange(n_models)]
  model = Bagging(models, bag_size)

  #Fit Final Model
  model.fit(X_train, y)

  print "Create predictions on submission set..."
  create_submission(model, X_test, test_input)
Example #26
0
def _ridgeclassifiercv(*,
                       train,
                       test,
                       x_predict=None,
                       metrics,
                       alphas=(0.1, 1.0, 10.0),
                       fit_intercept=True,
                       normalize=False,
                       scoring=None,
                       cv=None,
                       class_weight=None,
                       store_cv_values=False):
    """For for info visit : 
        https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RidgeClassifierCV.html#sklearn.linear_model.RidgeClassifierCV
    """

    model = RidgeClassifierCV(alphas=alphas,
                              fit_intercept=fit_intercept,
                              normalize=normalize,
                              scoring=scoring,
                              cv=cv,
                              class_weight=class_weight,
                              store_cv_values=store_cv_values)
    model.fit(train[0], train[1])
    model_name = 'RidgeClassifierCV'
    y_hat = model.predict(test[0])

    if metrics == 'f1_score':
        accuracy = f1_score(test[1], y_hat)
    if metrics == 'jaccard_score':
        accuracy = jaccard_score(test[1], y_hat)
    if metrics == 'accuracy_score':
        accuracy = accuracy_score(test[1], y_hat)

    if x_predict is None:
        return (model_name, accuracy, None)

    y_predict = model.predict(x_predict)
    return (model_name, accuracy, y_predict)
Example #27
0
def test_rocket_on_gunpoint():
    """Test of Rocket on gun point."""
    # load training data
    X_training, Y_training = load_gunpoint(split="train", return_X_y=True)

    # 'fit' ROCKET -> infer data dimensions, generate random kernels
    ROCKET = Rocket(num_kernels=10_000)
    ROCKET.fit(X_training)

    # transform training data
    X_training_transform = ROCKET.transform(X_training)

    # test shape of transformed training data -> (number of training
    # examples, num_kernels * 2)
    np.testing.assert_equal(X_training_transform.shape,
                            (len(X_training), 20_000))

    # fit classifier
    classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10),
                                   normalize=True)
    classifier.fit(X_training_transform, Y_training)

    # load test data
    X_test, Y_test = load_gunpoint(split="test", return_X_y=True)

    # transform test data
    X_test_transform = ROCKET.transform(X_test)

    # test shape of transformed test data -> (number of test examples,
    # num_kernels * 2)
    np.testing.assert_equal(X_test_transform.shape, (len(X_test), 20_000))

    # predict (alternatively: 'classifier.score(X_test_transform, Y_test)')
    predictions = classifier.predict(X_test_transform)
    accuracy = accuracy_score(predictions, Y_test)

    # test predictions (on Gunpoint, should be 100% accurate)
    assert accuracy == 1.0
Example #28
0
def test_minirocket_multivariate_on_basic_motions():
    """Test of MiniRocketMultivariate on basic motions."""
    # load training data
    X_training, Y_training = load_basic_motions(split="train", return_X_y=True)

    # 'fit' MINIROCKET -> infer data dimensions, generate random kernels
    minirocket = MiniRocketMultivariate()
    minirocket.fit(X_training)

    # transform training data
    X_training_transform = minirocket.transform(X_training)

    # test shape of transformed training data -> (number of training
    # examples, nearest multiple of 84 < 10,000)
    np.testing.assert_equal(X_training_transform.shape,
                            (len(X_training), 9_996))

    # fit classifier
    classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10),
                                   normalize=True)
    classifier.fit(X_training_transform, Y_training)

    # load test data
    X_test, Y_test = load_basic_motions(split="test", return_X_y=True)

    # transform test data
    X_test_transform = minirocket.transform(X_test)

    # test shape of transformed test data -> (number of test examples,
    # nearest multiple of 84 < 10,000)
    np.testing.assert_equal(X_test_transform.shape, (len(X_test), 9_996))

    # predict (alternatively: 'classifier.score(X_test_transform, Y_test)')
    predictions = classifier.predict(X_test_transform)
    accuracy = accuracy_score(predictions, Y_test)

    # test predictions (on BasicMotions, should be 100% accurate)
    assert accuracy == 1.0
Example #29
0
def test_multirocket_on_gunpoint():
    """Test of MultiRocket on gun point."""
    # load training data
    X_training, Y_training = load_gunpoint(split="train", return_X_y=True)

    # 'fit' MultiRocket -> infer data dimensions, generate random kernels
    multirocket = MultiRocket()
    multirocket.fit(X_training)

    # transform training data
    X_training_transform = multirocket.transform(X_training)

    # test shape of transformed training data -> (number of training
    # examples, nearest multiple of 4*84=336 < 50,000 (2*4*6_250))
    np.testing.assert_equal(X_training_transform.shape,
                            (len(X_training), 49_728))

    # fit classifier
    classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10),
                                   normalize=True)
    classifier.fit(X_training_transform, Y_training)

    # load test data
    X_test, Y_test = load_gunpoint(split="test", return_X_y=True)

    # transform test data
    X_test_transform = multirocket.transform(X_test)

    # test shape of transformed test data -> (number of test examples,
    # nearest multiple of 4*84=336 < 50,000 (2*4*6_250))
    np.testing.assert_equal(X_test_transform.shape, (len(X_test), 49_728))

    # predict (alternatively: 'classifier.score(X_test_transform, Y_test)')
    predictions = classifier.predict(X_test_transform)
    accuracy = accuracy_score(predictions, Y_test)

    # test predictions (on Gunpoint, should be > 99% accurate)
    assert accuracy > 0.99
Example #30
0
def Classify(Circles, Rects, model_name='RG'):
    X = []
    y = []
    for i in Circles:
        X.append(i.position)
        y.append(0)
    for i in Rects:
        X.append(i.position)
        y.append(1)
    X = np.array(X)
    y = np.array(y)

    for i in X:
        i[0] -= 400
        i[1] -= 300
    if model_name == 'RG':
        model = RidgeClassifierCV()
    if model_name == 'LR':
        model = LogisticRegression()
    model.fit(X, y)
    coef = model.coef_
    intercept = model.intercept_
    print(model.score(X, y))
    return draw_classifier_helper([intercept[0], coef[0][0], coef[0][1]])
Example #31
0
def sklearn_ridge_cv(Xtrain,Ytrain,Xtest,Ytest,*args,**kwargs):
    clf = RidgeClassifierCV(fit_intercept=True)
    clf.fit(Xtrain,Ytrain)
    return clf.score(Xtest,Ytest)
lr = LogisticRegression(solver='lbfgs', multi_class='multinomial')
lr.fit(X_train, Y_train)
Y_lr = lr.predict(X_test)
print(accuracy_score(Y_test, Y_lr))

# In[14]:

svc = SVC(C=1.0, kernel='rbf')
svc.fit(X_train, Y_train)
Y_SVC = svc.predict(X_test)
accuracy_score(Y_test, Y_SVC)

# In[15]:

rc = RidgeClassifierCV(alphas=(0.1, 1.0, 10.0))
rc.fit(X_train, Y_train)
Y_rc = rc.predict(X_test)
rc.decision_function(X_test)

# In[16]:

tr = DecisionTreeClassifier(criterion='gini')
tr.fit(X_train, Y_train)
Y_tr = tr.predict(X_test)
accuracy_score(Y_test, Y_tr)

# In[17]:

rf = RandomForestClassifier(n_estimators=10, criterion='gini')
rf.fit(X_train, Y_train)
Y_rf = rf.predict(X_test)
        time_b = time.perf_counter()
        _timings[0, i] = time_b - time_a

        # -- transform test ----------------------------------------------------

        time_a = time.perf_counter()
        X_test_transform = apply_kernels(X_test, kernels)
        time_b = time.perf_counter()
        _timings[1, i] = time_b - time_a

        # -- training ----------------------------------------------------------

        time_a = time.perf_counter()
        classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10),
                                       normalize=True)
        classifier.fit(X_training_transform, Y_training)
        time_b = time.perf_counter()
        _timings[2, i] = time_b - time_a

        # -- test --------------------------------------------------------------

        time_a = time.perf_counter()
        _results[i] = classifier.score(X_test_transform, Y_test)
        time_b = time.perf_counter()
        _timings[3, i] = time_b - time_a

    print("Done.")

    # -- store results ---------------------------------------------------------

    results.loc[dataset_name, "accuracy_mean"] = _results.mean()
Example #34
0
rgr = RadiusNeighborsRegressor()
forest = RandomForestRegressor(n_estimators = 100, n_jobs = 2, oob_score=True)
adaboost = AdaBoostRegressor()
nb = GaussianNB()
rd = RidgeClassifierCV()
kf = KFold(report.shape[0], n_folds = 5)

for train_index, test_index in kf:
    #print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = variables.ix[list(train_index),], variables.ix[list(test_index),]
    y_train = report['survey_participant'].ix[list(train_index),]
    y_test = report['survey_participant'].ix[list(test_index),]
    forest.fit(X_train,y_train)
    adaboost.fit(X_train,y_train)
    gdc.fit(X_train, y_train)
    rd.fit(X_train, y_train)
    rgr.fit(X_train, y_train)
    nb.fit(X_train, y_train)
    lr.fit(X_train, y_train)
    et.fit(X_train, y_train)
    #print forest.feature_importances_
    y_hat = list(gdc.predict(X_test))
    print 'GDC', sum((y_hat-y_test)**2)/float(len(y_test))
    y_hat = list(rd.predict(X_test))
    print 'RD', sum((y_hat-y_test)**2)/float(len(y_test))
    y_hat = list(et.predict(X_test))
    print 'ET', sum((y_hat-y_test)**2)/float(len(y_test))
    y_hat = list(lr.predict(X_test))
    print 'LR', sum((y_hat-y_test)**2)/float(len(y_test))
    y_hat = list(forest.predict(X_test))
    print 'RFRegressor', sum(((y_hat)-y_test)**2)/float(len(y_test))
if __name__ == "__main__":

    # generate some fake data, split, and scale
    X, y = make_classification(n_samples=1000, n_informative=5, n_redundant=6, random_state=4)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)
    scaler = StandardScaler().fit(X_train)
    X_train_standard = scaler.transform(X_train)
    X_test_standard = scaler.transform(X_test)

    # specify classifiers
    ridge = RidgeClassifierCV(alphas=np.logspace(-3, 1, 20))
    lasso = LogisticRegressionCV(Cs=np.logspace(-3, 1, num=20))
    forest = RandomForestClassifier(n_estimators=100, n_jobs=-1)

    # train the classifiers
    ridge.fit(X_train_standard, y_train)
    lasso.fit(X_train_standard, y_train)
    forest.fit(X_train, y_train)

    # predicted values
    ridge_preds = ridge.predict(X_test_standard)
    lasso_preds = lasso.predict(X_test_standard)
    forest_preds = forest.predict(X_test)

    # confusion matrices
    c1 = confusion_matrix(y_test, ridge_preds)
    c2 = confusion_matrix(y_test, lasso_preds)
    c3 = confusion_matrix(y_test, forest_preds)

    # build a plot to compare results
    preds = [ridge_preds, lasso_preds, forest_preds]