Exemple #1
0
class ROCKET():
    def __init__(self, num_kernels=100):

        self.num_kernels = num_kernels

    def train(self, X_train, Y_train):

        _ = generate_kernels(100, 10)
        apply_kernels(np.zeros_like(X_train)[:, 1:], _)

        input_length = X_train.shape[1]

        self.kernels = generate_kernels(input_length, self.num_kernels)
        X_transform = apply_kernels(X_train, self.kernels)
        self.classifier = RidgeClassifierCV(alphas=10**np.linspace(-3, 3, 10),
                                            normalize=True)

        return self.classifier.fit(X_transform, Y_train)

    def test(self, X_test, Y_test):

        X_transform = apply_kernels(X_test, self.kernels)
        results = self.classifier.score(X_transform, Y_test)

        return results
Exemple #2
0
def classify(train, test):
    X, y = train.iloc[:, 0:-1], train.iloc[:, -1]
    clf = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10),
                            normalize=True).fit(X, y)
    preds = clf.predict(test.iloc[:, 0:-1])

    return accuracy_score(preds, test.iloc[:, -1])
Exemple #3
0
def linear_models(x_train, y_train):
    from sklearn.linear_model import LogisticRegression
    classifier1 = LogisticRegression(C=1.2, random_state=0, max_iter=1500)
    classifier1.fit(x_train, y_train)

    from sklearn.linear_model import PassiveAggressiveClassifier
    classifier2 = PassiveAggressiveClassifier()
    classifier2.fit(x_train, y_train)

    from sklearn.linear_model import RidgeClassifierCV
    classifier3 = RidgeClassifierCV()
    classifier3.fit(x_train, y_train)

    from sklearn.linear_model import SGDClassifier
    classifier4 = SGDClassifier()
    classifier4.fit(x_train, y_train)

    from sklearn.linear_model import Perceptron
    classifier5 = Perceptron()
    classifier5.fit(x_train, y_train)

    print('LogisticRegression training accuracy: ',
          classifier1.score(x_train, y_train))
    print('PassiveAggressiveClassifier training accuracy: ',
          classifier2.score(x_train, y_train))
    print('RidgeClassifierCV training accuracy: ',
          classifier3.score(x_train, y_train))
    print('SGDClassifier training accuracy: ',
          classifier4.score(x_train, y_train))
    print('Perceptron training accuracy: ',
          classifier5.score(x_train, y_train))

    return classifier1, classifier2, classifier3, classifier4, classifier5
Exemple #4
0
def run(train_file, test_file, num_seq, n_jobs):
    print("Load train data")
    y, s = load_data(train_file)

    print("Generate random features")
    ss = generate_features(s, num_seq)

    print("Generate automaton")
    A = ahocorasick.Automaton()
    for idx, f in enumerate(ss):
        A.add_word(f, (idx, f))
    A.make_automaton()

    print("Extract Feautre Vectors of train data")
    fvec = create_fvec(s, A, n_jobs)

    print("Learn classifier")
    cls = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
    cls.fit(fvec, y)

    print("Load test data")
    y_test, s_test = load_data(test_file)
    print("Extract Feature Vector of test data")
    fvec_test = create_fvec(s_test, A, n_jobs)

    print("Predict")
    print(cls.score(fvec_test, y_test))
Exemple #5
0
def main():
  train_input = pd.read_csv('../input/train.csv')
  test_input = pd.read_csv('../input/test.csv') 
  data = pd.concat([train_input, test_input])
  # We don't have data on whether person is delinquint

  featurizer = CreditScoreFeaturizer() # Create our own features
  
  print "Transforming dataset into features..."
  ##Create matrix of features from raw dataset
  X = featurizer.fit_transform(data)
  X_train = X[:len(train_input)]
  X_test = X[len(train_input):]

  ## Use any model that we might find appropriate
  model = RidgeClassifierCV(alphas=[ 0.1, 1., 10. ])

  ##Create the object and set relevant parameters
  #model = LogisticRegression(C=10) # Can also switch different models (e.g. Ridge)

  ##Set target variable y
  y = train_input.SeriousDlqin2yrs

  print "Cross validating..."
  print np.mean(cross_val_score(model, X_train, y, scoring='roc_auc')) # Scoring metric is now AUC

  print "Training final model..."
  model = model.fit(X_train, y)
Exemple #6
0
class TestServinator(unittest.TestCase):
    def setUp(self):
        data = load_iris()
        x_train, x_test, y_train, y_test = train_test_split(data.data,
                                                            data.target,
                                                            test_size=0.2,
                                                            random_state=67,
                                                            stratify=data.target)

        self.model = RidgeClassifierCV(normalize=True,
                                       scoring='logloss',
                                       cv=3,
                                       class_weight='balanced')
        self.model.fit(x_train, y_train)
        self.test_data = x_test
        self.test_target = y_test

        model_backend = ModelBackend(self.model)

        # self.app = servinator('test', model_backend).test_client()

    def test_json_to_model_input(self):
        raw_json = '''[{"end_date": "2014-10-08T14:52:44-04:00", "location": "Louisville, KY", "pledged": "70.0", "goal": "10000.0", "category": "Food", "author": "Joe Banet", "backers": "4", "blurb": "Krazy Joe's soon to be famous kimchi and bourbon barrels which have long ago been enjoyed come together at last. Bourbon aged kimchi!", "title": "Krazy Joe's Bourbon Barrel Kimchi", "full_text": "I like kimchi. I like to make kimchi. I think I'm pretty good at it. My goal is to create a Bourbon barrel aged kimchi and share it with the world. This is just a start to company that could greatly expand and diversify into many products that all have one common denominator...kimchi. Thank you for your interest and support! ; "}]'''

        expected_data = {
                 'author': {0: 'joe banet'},
                 'backers': {0: '4'},
                 'blurb': {0: 'krazy joes soon to be famous kimchi and bourbon barrels which have long ago been enjoyed come together at last bourbon aged kimchi'},
                 'day': {0: 8},
                 'dayofweek': {0: 2},
                 'dayofyear': {0: 281},
                 'full_text': {0: 'i like kimchi i like to make kimchi i think im pretty good at it my goal is to create a bourbon barrel aged kimchi and share it with the world this is just a start to company that could greatly expand and diversify into many products that all have one common denominator kimchi thank you for your interest and support'},
                 'goal': {0: '10000.0'},
                 'hour': {0: 18},
                 'loc1': {0: ''},
                 'loc2': {0: 'louisville'},
                 'loc3': {0: 'ky'},
                 'minute': {0: 52},
                 'month': {0: 10},
                 'pledged': {0: '70.0'},
                 'title': {0: 'krazy joes bourbon barrel kimchi'},
                 'weekday': {0: 2},
                 'weekofyear': {0: 41},
                 'year': {0: 2014}}

        df_json = _json_to_model_input(raw_json)
        df_expected = pd.DataFrame(expected_data)

        # Sort them for comparison
        df_json = df_json.loc[:, sorted(df_json.columns.values)]
        df_expected = df_expected.loc[:, sorted(df_expected.columns.values)]

        self.assertTrue(df_expected.equals(df_json))


    def test_e2e(self):
        '''test full load and predict of training data and assert it matches
        offline result'''
        pass
Exemple #7
0
 def _fit_estimator(self, rocket, X, y):
     transformed_x = rocket.fit_transform(X)
     ridge = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
     ridge.fit(transformed_x, y)
     return [
         make_pipeline(rocket, ridge),
         transformed_x if self.save_transformed_data else None,
     ]
Exemple #8
0
def test_ridge_classifier_with_scoring(filter_, scoring, cv):
    # non-regression test for #14672
    # check that RidgeClassifierCV works with all sort of scoring and
    # cross-validation
    scoring_ = make_scorer(scoring) if callable(scoring) else scoring
    clf = RidgeClassifierCV(scoring=scoring_, cv=cv)
    # Smoke test to check that fit/predict does not raise error
    clf.fit(filter_(X_iris), y_iris).predict(filter_(X_iris))
Exemple #9
0
def do_rcv(X_test, X_train, Y_train):
    # creating a classifier of loss function "hinge" and penalty function "l2"
    clf = RidgeClassifierCV()
    print "starts fitting"
    print clf.fit(X_train, Y_train)
    print "finished fitting, starts predictions"
    Y_pred = clf.predict(X_test)
    print "finished predictions"
    return Y_pred
def ridge_classification(X_train, X_test, y_train, y_test):
    X_train, X_test = preprocess(X_train, X_test)
    from sklearn.linear_model import RidgeClassifierCV
    classifier = RidgeClassifierCV()
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    y_pred = np.round(y_pred).flatten()
    plot_model(classifier, X_train, y_train, y_test, y_pred,
               "RidgeClassifierCV")
Exemple #11
0
 def __init__(self,
              alphas=(0.1, 1.0, 10.0),
              fit_intercept=True,
              normalize=False,
              scoring=None,
              cv=None,
              class_weight=None):
     _RidgeClassifierCV.__init__(self, alphas, fit_intercept, normalize,
                                 scoring, cv, class_weight)
     BaseWrapperClf.__init__(self)
Exemple #12
0
 def _fit_estimator(self, rocket, X, y):
     transformed_x = rocket.fit_transform(X)
     scaler = StandardScaler(with_mean=False)
     scaler.fit(transformed_x, y)
     ridge = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
     ridge.fit(scaler.transform(transformed_x), y)
     return [
         make_pipeline(rocket, scaler, ridge),
         transformed_x if self.save_transformed_data else None,
     ]
def ridge_classfier_cv_selected_feature():
    raw_frame=thal_data()
    x=raw_frame.drop(['sugar','age','cardiographic','angina','slope','thal','log_cholestoral'],axis=1)
    y=raw_frame['thal']
    x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=5)
    clf = RidgeClassifierCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(x_train, y_train)
    global train_score
    train_score.append(clf.score(x_train,y_train))
    global test_score
    test_score.append(clf.score(x_test,y_test))
def ridge_classfier_cv_withlog():
    raw_frame=thal_data()
    x=raw_frame.drop(['thal','pressure','cholestoral','age','heart_rate'],axis=1)
    y=raw_frame['thal']
    x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=5)
    clf = RidgeClassifierCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(x_train, y_train)
    global train_score
    train_score.append(clf.score(x_train,y_train))
    global test_score
    test_score.append(clf.score(x_test,y_test))
Exemple #15
0
def ridge(X, X_train, X_val, y_train, y_val, X_test, y_test):

    model = RidgeClassifierCV(alphas=[1e-3, 1e-2, 1e-1, 1])
    model.fit(X_train, y_train)

    sh = X.shape
    save_model(model, sh)

    tr_f1, val_f1, test_f1, tr_acc, val_acc, test_acc = model_performance(
        model, X_train, y_train, X_val, y_val, X_test, y_test)
    return tr_f1, val_f1, test_f1
Exemple #16
0
    def train(self, X_train, Y_train):

        _ = generate_kernels(100, 10)
        apply_kernels(np.zeros_like(X_train)[:, 1:], _)

        input_length = X_train.shape[1]

        self.kernels = generate_kernels(input_length, self.num_kernels)
        X_transform = apply_kernels(X_train, self.kernels)
        self.classifier = RidgeClassifierCV(alphas=10**np.linspace(-3, 3, 10),
                                            normalize=True)

        return self.classifier.fit(X_transform, Y_train)
Exemple #17
0
def agent(path="./", dataset="", ratio=False, seg=0.75, folder="temp"):

    current_process().name = dataset

    start1 = time.time()
    train_x, train_y = load_from_tsfile_to_dataframe(
        f"{path}/{dataset}/{dataset}_TRAIN.ts")
    test_x, test_y = load_from_tsfile_to_dataframe(
        f"{path}/{dataset}/{dataset}_TEST.ts")

    print(f"{dataset}: Train Shape {train_x.shape}")
    print(f"{dataset}: Test Shape {test_x.shape}")

    scaler = StandardScaler()

    transform_time1 = time.time()

    mod_train = PAAStat(paa_=ratio, seg_=seg).transform(train_x.values)
    mod_train = scaler.fit(mod_train).transform(mod_train)

    mod_test = PAAStat(paa_=ratio, seg_=seg).transform(test_x.values)
    mod_test = scaler.transform(mod_test)

    transform_time2 = time.time()
    model = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
    train_time1 = time.time()
    model.fit(mod_train, train_y)
    preds = model.predict(mod_test)
    train_time2 = time.time()

    acc1 = accuracy_score(preds, test_y) * 100

    end1 = time.time()
    print(
        f"Dataset: {dataset}, AccuracyRidge: {acc1}, Time taken: {(end1 - start1)/60}, "
        f"Transfrom_time: {(transform_time2-transform_time1)/60}, train_time: {(train_time2-train_time1)/60}"
    )

    results = pd.DataFrame({
        'Dataset':
        dataset,
        'AccuracyRidge': [acc1],
        'Time (min)': [(end1 - start1) / 60],
        'Transfrom_time (min)': [(transform_time2 - transform_time1) / 60],
        'train_time (min)': [(train_time2 - train_time1) / 60]
    })

    temp_path = './' + folder
    if not os.path.exists(temp_path):
        os.mkdir(temp_path)
    results.to_csv(os.path.join(temp_path + f'/{dataset}.csv'), index=False)
 def ridge(y, pred, i, wt=1):
     b = y[:, i]
     b.ravel()
     c = pred[:,i] > 0.4
     c.ravel()
     print('score before (class {})'.format(i), f1_score(b, c,))
     clf = RidgeClassifierCV(alphas=[ 0.001, 0.01, 0.1, 1],class_weight={0:1, 1:wt},  normalize=True).fit(pred,b ) #
     #clf = RidgeClassifierCV(alphas=[0.001, 0.01, 0.1, 1], class_weight='balanced', cv=10).fit(pred, b)  #
     ri_pred = clf.predict(pred)
     print('score after (class {})'.format(i), f1_score(b, ri_pred))
     #print('score after (class {})'.format(i),clf.score(pred, b))
     f1_before.append(f1_score(b, c,))
     f1_after.append(f1_score(b, ri_pred))
     return clf
def train_model(X, Y):
    print "Training LR..."
    modelLR = LogisticRegression(penalty='l1', C=100, tol=1e-10)
    modelLR.fit(X.toarray(), Y)
    
    print "Training RC..."
    modelRC = RidgeClassifierCV(alphas=[ 0.1, 1., 10. ])
    modelRC.fit(X.toarray(), Y)
    
    print "Training GBC..."
    modelGBC = GradientBoostingClassifier(subsample=0.5, max_depth=6, n_estimators=50)
    modelGBC.fit(X.toarray(), Y)

    
    return modelGBC, modelRC, modelLR
Exemple #20
0
def test_ridge_regression_custom_scoring(filter_, cv):
    # check that custom scoring is working as expected
    # check the tie breaking strategy (keep the first alpha tried)

    def _dummy_score(y_test, y_pred):
        return 0.42

    alphas = np.logspace(-2, 2, num=5)
    clf = RidgeClassifierCV(alphas=alphas,
                            scoring=make_scorer(_dummy_score),
                            cv=cv)
    clf.fit(filter_(X_iris), y_iris)
    assert clf.best_score_ == pytest.approx(0.42)
    # In case of tie score, the first alphas will be kept
    assert clf.alpha_ == pytest.approx(alphas[0])
Exemple #21
0
 def __init__(self,
              num_features=10000,
              max_dilations_per_kernel=32,
              random_state=None,
              alphas=np.logspace(-3, 3, 13),
              normalize=True,
              memory=None,
              verbose=False,
              scoring=None,
              class_weight=None,
              **kwargs):
     """
     MiniRocketClassifier is recommended for up to 10k time series.
     For a larger dataset, you can use MINIROCKET (in Pytorch).
     scoring = None --> defaults to accuracy.
     """
     self.steps = [('minirocketmultivariate',
                    MiniRocketMultivariate(
                        num_features=num_features,
                        max_dilations_per_kernel=max_dilations_per_kernel,
                        random_state=random_state)),
                   ('ridgeclassifiercv',
                    RidgeClassifierCV(alphas=alphas,
                                      normalize=normalize,
                                      scoring=scoring,
                                      class_weight=class_weight,
                                      **kwargs))]
     self.num_features, self.max_dilations_per_kernel, self.random_state = num_features, max_dilations_per_kernel, random_state
     self.alphas, self.normalize, self.scoring, self.class_weight, self.kwargs = alphas, normalize, scoring, class_weight, kwargs
     self.memory = memory
     self.verbose = verbose
     self._validate_steps()
Exemple #22
0
def agent(path, dataset, seg, folder, paa=True):

    start = time.time()
    train_x, train_y = load_from_tsfile_to_dataframe(
        f"{path}/{dataset}/{dataset}_TRAIN.ts")
    test_x, test_y = load_from_tsfile_to_dataframe(
        f"{path}/{dataset}/{dataset}_TEST.ts")

    print(f"{dataset}: Train Shape {train_x.shape}")
    print(f"{dataset}: Test Shape {test_x.shape}")

    model = Pipeline([('data_transform', PAAStat(paa_=paa, seg_=seg)),
                      ('model',
                       RidgeClassifierCV(alphas=np.logspace(-3, 3, 10),
                                         normalize=True,
                                         class_weight='balanced'))])

    model.fit(train_x.values, train_y)
    preds = model.predict(test_x.values)
    acc1 = accuracy_score(preds, test_y) * 100

    end = time.time()

    results = pd.DataFrame({
        'Dataset': dataset,
        'AccuracyRidge': [acc1],
        'Time': [end - start]
    })
    print(results)
    temp_path = './' + folder
    if not os.path.exists(temp_path):
        os.mkdir(temp_path)
    results.to_csv(os.path.join(temp_path + f'/{dataset}.csv'), index=False)
    def fit(self, X, y):
        """Build a pipeline containing the ROCKET transformer and RidgeClassifierCV.

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_instances, 1]
            Nested dataframe with univariate time-series in cells.
        y : array-like, shape = [n_instances] The class labels.

        Returns
        -------
        self : object
        """
        X, y = check_X_y(X, y)

        self.n_classes = np.unique(y).shape[0]
        self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]
        for index, classVal in enumerate(self.classes_):
            self.class_dictionary[classVal] = index

        self.classifier = rocket_pipeline = make_pipeline(
            Rocket(
                num_kernels=self.num_kernels,
                random_state=self.random_state,
                n_jobs=self.n_jobs,
            ),
            RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True),
        )
        rocket_pipeline.fit(X, y)

        self._is_fitted = True
        return self
Exemple #24
0
    def _fit(self, X, y):
        """Build a pipeline containing the ROCKET transformer and RidgeClassifierCV.

        Parameters
        ----------
        X : 3D np.array of shape = [n_instances, n_dimensions, series_length]
            The training data.
        y : array-like, shape = [n_instances]
            The class labels.

        Returns
        -------
        self :
            Reference to self.

        Notes
        -----
        Changes state by creating a fitted model that updates attributes
        ending in "_" and sets is_fitted flag to True.
        """
        self._pipeline = rocket_pipeline = make_pipeline(
            Rocket(
                num_kernels=self.num_kernels,
                random_state=self.random_state,
                n_jobs=self._threads_to_use,
            ),
            RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True),
        )
        rocket_pipeline.fit(X, y)

        return self
Exemple #25
0
class RocketClassifier(sklearn.pipeline.Pipeline):
    def __init__(self,
                 num_kernels=10_000,
                 normalize_input=True,
                 random_state=None,
                 alphas=np.logspace(-3, 3, 7),
                 normalize_features=True,
                 memory=None,
                 verbose=False,
                 scoring=None,
                 class_weight=None,
                 **kwargs):
        """
        RocketClassifier is recommended for up to 10k time series.
        For a larger dataset, you can use ROCKET (in Pytorch).
        scoring = None --> defaults to accuracy.

        Rocket args:
            num_kernels     : int, number of random convolutional kernels (default 10,000)
            normalize_input : boolean, whether or not to normalise the input time series per instance (default True)
            random_state    : int (ignored unless int due to compatability with Numba), random seed (optional, default None)

        """
        self.steps = [('rocket',
                       Rocket(num_kernels=num_kernels,
                              normalise=normalize_input,
                              random_state=random_state)),
                      ('ridgeclassifiercv',
                       RidgeClassifierCV(alphas=alphas,
                                         normalize=normalize_features,
                                         scoring=scoring,
                                         class_weight=class_weight,
                                         **kwargs))]
        store_attr()
        self._validate_steps()
def init_classifiers(seed):
    return {
        'AdaBoostClassifier':
        AdaBoostClassifier(random_state=seed),
        'BaggingClassifier':
        BaggingClassifier(random_state=seed),
        'ExtraTreesClassifier':
        ExtraTreesClassifier(random_state=seed),
        'GradientBoostingClassifier':
        GradientBoostingClassifier(random_state=seed),
        'RandomForestClassifier':
        RandomForestClassifier(random_state=seed),
        'XGBClassifier':
        xgb.XGBClassifier(),
        'LogisticRegression':
        LogisticRegression(random_state=seed),
        'PassiveAggressiveClassifier':
        PassiveAggressiveClassifier(random_state=seed),
        'RidgeClassifier':
        RidgeClassifier(random_state=seed),
        'RidgeClassifierCV':
        RidgeClassifierCV(),
        'SGDClassifier':
        SGDClassifier(random_state=seed),
        #'KNeighborsClassifier': KNeighborsClassifier(),
        #'RadiusNeighborsClassifier': RadiusNeighborsClassifier(),
        'MLPClassifier':
        MLPClassifier(random_state=seed),
        'DecisionTreeClassifier':
        DecisionTreeClassifier(random_state=seed),
        'ExtraTreeClassifier':
        ExtraTreeClassifier(random_state=seed)
    }
Exemple #27
0
    def get_logistic_regression_coefs_l2(self,
                                         category,
                                         clf=RidgeClassifierCV()):
        ''' Computes l2-penalized logistic regression score.
        Parameters
        ----------
        category : str
            category name to score

        category : str
            category name to score
        Returns
        -------
            (coefficient array, accuracy, majority class baseline accuracy)
        '''
        try:
            from sklearn.cross_validation import cross_val_predict
        except:
            from sklearn.model_selection import cross_val_predict
        y = self._get_mask_from_category(category)
        X = TfidfTransformer().fit_transform(self._X)
        clf.fit(X, y)
        y_hat = cross_val_predict(clf, X, y)
        acc, baseline = self._get_accuracy_and_baseline_accuracy(y, y_hat)
        return clf.coef_[0], acc, baseline
def classify_connectivity(X,
                          y,
                          sss,
                          classifier_name,
                          n_jobs=-1,
                          subjects=None):
    """ Returns 100 shuffle split scores
    """
    if classifier_name == 'logreg_l1':
        classifier = LogisticRegression(penalty='l1',
                                        dual=False,
                                        random_state=42)
    elif classifier_name == 'logreg_l2':
        classifier = LogisticRegression(penalty='l2', random_state=42)
    elif classifier_name == 'ridge':
        classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 7))
    elif classifier_name == 'svc_l2':
        classifier = LinearSVC(penalty='l2', random_state=42)
    elif classifier_name == 'svc_l1':
        classifier = LinearSVC(penalty='l1', dual=False, random_state=42)

    p = Parallel(n_jobs=n_jobs, verbose=5)(
        delayed(train_and_test)(classifier, X, y, train, test, subjects)
        for train, test in sss)
    return np.asarray(p)
Exemple #29
0
class RocketClassifier(sklearn.pipeline.Pipeline):
    """Time series classification using ROCKET features and a linear classifier"""

    def __init__(self, num_kernels=10_000, normalize_input=True, random_state=None,
                 alphas=np.logspace(-3, 3, 7), normalize_features=True, memory=None, verbose=False, scoring=None, class_weight=None, **kwargs):
        """
        RocketClassifier is recommended for up to 10k time series.
        For a larger dataset, you can use ROCKET (in Pytorch).
        scoring = None --> defaults to accuracy.

        Rocket args:
            num_kernels     : int, number of random convolutional kernels (default 10,000)
            normalize_input : boolean, whether or not to normalise the input time series per instance (default True)
            random_state    : Optional random seed (default None)

        """
        try:
            import sktime
            from sktime.transformations.panel.rocket import Rocket
        except ImportError:
            print("You need to install sktime to be able to use RocketClassifier")

        self.steps = [('rocket', Rocket(num_kernels=num_kernels, normalise=normalize_input, random_state=random_state)),
                      ('ridgeclassifiercv', RidgeClassifierCV(alphas=alphas, normalize=normalize_features, scoring=scoring,
                                                              class_weight=class_weight, **kwargs))]
        store_attr()
        self._validate_steps()
Exemple #30
0
    def _train_probas_for_estimator(self, y, idx):
        rs = 255 if self.random_state == 0 else self.random_state
        rs = (None if self.random_state is None else
              (rs * 37 * (idx + 1)) % np.iinfo(np.int32).max)
        rng = check_random_state(rs)

        indices = range(self.n_instances_)
        subsample = rng.choice(self.n_instances_, size=self.n_instances_)
        oob = [n for n in indices if n not in subsample]

        results = np.zeros((self.n_instances_, self.n_classes_))
        if len(oob) == 0:
            return results, 1, oob

        clf = make_pipeline(
            StandardScaler(with_mean=False),
            RidgeClassifierCV(alphas=np.logspace(-3, 3, 10)),
        )
        clf.fit(self.transformed_data_[idx].iloc[subsample], y[subsample])
        preds = clf.predict(self.transformed_data_[idx].iloc[oob])

        weight = clf.steps[1][1].best_score_

        for n, pred in enumerate(preds):
            results[oob[n]][self._class_dictionary[pred]] += weight

        return results, weight, oob
def generate_submission(models):
    X = pd.concat([
        pd.read_csv(inp)[full_labels] for inp in [
            "../models/{}/train_meta_probs.csv".format(model)
            for model in models
        ]
    ],
                  axis=1)
    X_test = pd.concat([
        pd.read_csv(inp)[full_labels] for inp in
        ["../models/{}/test_meta_probs.csv".format(model) for model in models]
    ],
                       axis=1)
    col_names = [
        "{}_{}".format(i, j)
        for i in ["model_{}".format(k) for k in range(len(models))]
        for j in full_labels
    ]
    X.columns, X_test.columns = col_names, col_names
    folds = get_folds()

    print("===Ridge===")
    ridge_cv = RidgeClassifierCV(alphas=[
        0.01, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 15, 20, 30, 40, 50, 70, 100
    ],
                                 cv=folds).fit(X, y)
    print("best alpha value is: {}".format(ridge_cv.alpha_))
    ridge_model = RidgeClassifier(alpha=ridge_cv.alpha_).fit(X, y)
    print(accuracy_score(y, ridge_model.predict(X)))
    test_df['label'] = pd.Series(
        ridge_model.predict(X_test)).map(full_num_label_mapping)
    test_df['label'] = test_df['label'].map(lambda x: "unknown"
                                            if x not in labels else x)
    test_df.to_csv("ridge_on_{}_models.csv".format(len(models)), index=False)
def get_classifier(clf_name):
    svmClf = SVC(gamma='auto')
    advancedSvmClf = SVC(gamma='auto',
                         kernel='rbf',
                         probability=True,
                         class_weight='balanced',
                         C=120000000)
    ridgeClf = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
    randomForestClf = RandomForestClassifier(n_estimators=200,
                                             max_depth=7,
                                             random_state=0)
    gradientBoostClf = GradientBoostingClassifier(n_estimators=200,
                                                  max_depth=7,
                                                  random_state=0)
    xgbClf = XGBClassifier(n_estimators=500, max_depth=5)
    adaBoostClf = AdaBoostClassifier()
    lgbmClf = LGBMClassifier(n_estimators=1000)

    if clf_name == 'svm':
        return svmClf
    elif clf_name == 'advanced_svm':
        return advancedSvmClf
    elif clf_name == 'ridge':
        return ridgeClf
    elif clf_name == 'random_forest':
        return randomForestClf
    elif clf_name == 'gradient_boosting':
        return gradientBoostClf
    elif clf_name == 'xgb':
        return xgbClf
    elif clf_name == 'ada_boost':
        return adaBoostClf
    elif clf_name == 'lgbm':
        return lgbmClf
Exemple #33
0
 def fit(self, X, y):
     trainx1, trainx2 = zip(*X)
     self.count_vect = CountVectorizer(analyzer='word', ngram_range=(1, 2))
     self.count_vect.fit(list(trainx1)+list(trainx2))
     X_train_counts1 = self.count_vect.transform(trainx1)
     X_train_counts2 = self.count_vect.transform(trainx2)
     X_train_counts = np.concatenate((X_train_counts1.toarray(),X_train_counts2.toarray()),axis=1)
     self.clf = RidgeClassifierCV().fit(X_train_counts, y)
     return self
def main():
    start_time = time.time()
    train_feats, train_labels, test_feats = get_data(TRAIN_FILE, TEST_FILE)

    # try RidgeClassifier with manual cross validation (k-fold)
    lr = RidgeClassifier().fit(train_feats, train_labels)
    cv_preds = cross_validation.cross_val_predict(lr, train_feats, train_labels, cv=10)
    print("cross validation accuracy:", metrics.accuracy_score(train_labels, cv_preds))

    # try automatic RidgeClassifierCV (k-fold)
    lrcv = RidgeClassifierCV(cv=10).fit(train_feats, train_labels)
    print("built in ridge cv accuracy:", lrcv.score(train_feats, train_labels))

    # use cross validated model to predict test labels
    preds = lrcv.predict(test_feats).astype(str)
    for i in range(preds.shape[0]):
        if preds[i] == '1': preds[i] = 'TRUE'
        else: preds[i] = 'FALSE'
    np.savetxt("attractiveness_predictions.csv", preds, fmt="%s", newline="\n")

    print("time taken:", time.time()-start_time, "seconds")
def main():
  train_input = pd.read_csv('train.csv')
  test_input = pd.read_csv('test.csv')
  data = pd.concat([train_input, test_input])

  featurizer = CreditScoreFeaturizer()

  print "Transforming dataset into features..."
  ##Create matrix of features from raw dataset
  X = featurizer.fit_transform(data)
  X_train = X[:len(train_input)]
  X_test = X[len(train_input):]

  ## Use any model that we might find appropriate
  model = RidgeClassifierCV(alphas=[ 0.1, 1., 10. ])

  ##Create the object and set relevant parameters
  #model = LogisticRegression(C=10)

  ##Set target variable y
  y = train_input.SeriousDlqin2yrs

  print "Cross validating..."
  print np.mean(cross_val_score(model, X_train, y, scoring='roc_auc', cv=10))

  print "Training final model..."
  model = model.fit(X_train, y)


  n_models=5
  bag_size=0.70

  models = [LogisticRegression(C=10) for _ in xrange(n_models)]
  model = Bagging(models, bag_size)

  #Fit Final Model
  model.fit(X_train, y)

  print "Create predictions on submission set..."
  create_submission(model, X_test, test_input)
Exemple #36
0
    def setUp(self):
        data = load_iris()
        x_train, x_test, y_train, y_test = train_test_split(data.data,
                                                            data.target,
                                                            test_size=0.2,
                                                            random_state=67,
                                                            stratify=data.target)

        self.model = RidgeClassifierCV(normalize=True,
                                       scoring='logloss',
                                       cv=3,
                                       class_weight='balanced')
        self.model.fit(x_train, y_train)
        self.test_data = x_test
        self.test_target = y_test

        model_backend = ModelBackend(self.model)
Exemple #37
0
class GClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self):
        pass
    
    def fit(self, X, y):
        trainx1, trainx2 = zip(*X)
        self.count_vect = CountVectorizer(analyzer='word', ngram_range=(1, 2))
        self.count_vect.fit(list(trainx1)+list(trainx2))
        X_train_counts1 = self.count_vect.transform(trainx1)
        X_train_counts2 = self.count_vect.transform(trainx2)
        X_train_counts = np.concatenate((X_train_counts1.toarray(),X_train_counts2.toarray()),axis=1)
        self.clf = RidgeClassifierCV().fit(X_train_counts, y)
        return self
    
    
    def predict(self, X):
        testx1, testx2 = zip(*X)
        X_test_counts1 = self.count_vect.transform(testx1)
        X_test_counts2 = self.count_vect.transform(testx2)
        X_test_counts = np.concatenate((X_test_counts1.toarray(),X_test_counts2.toarray()),axis=1)
        return self.clf.predict(X_test_counts)
Exemple #38
0
def sklearn_ridge_cv(Xtrain,Ytrain,Xtest,Ytest,*args,**kwargs):
    clf = RidgeClassifierCV(fit_intercept=True)
    clf.fit(Xtrain,Ytrain)
    return clf.score(Xtest,Ytest)
    # Nicely print the confusion matrix
    print " " * 4,
    for label in labels:
        print " %s" % label,
    print

    for i, label1 in enumerate(labels):
        print label1,
        for j, label2 in enumerate(labels):
            print "%4d" % cm[i, j],
        print



from sklearn.linear_model import RidgeClassifierCV
clf = RidgeClassifierCV().fit(X_train, y_train)
print("Accuracy = ", clf.score(X_test, y_test)),

print_cm(confusion_matrix(y_test,
                          clf.predict(X_test),
                          labels=populations), populations)

# Plot coefficients
coef = np.mean(np.abs(clf.coef_), axis=0)

f, ax = plt.subplots(figsize=(10, 4))
plt.bar(left=range(coef.size), height=coef)
# ppl.bar(ax, left=range(coef.size), height=coef, xticklabels=None,
#             annotate=False)
plt.savefig("ridge.png")
    plt.tight_layout()
    plt.savefig(fname, format="png", bbox_extra_artists=(lgd,), bbox_inches="tight")
    plt.close()


if __name__ == "__main__":

    # generate some fake data, split, and scale
    X, y = make_classification(n_samples=1000, n_informative=5, n_redundant=6, random_state=4)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)
    scaler = StandardScaler().fit(X_train)
    X_train_standard = scaler.transform(X_train)
    X_test_standard = scaler.transform(X_test)

    # specify classifiers
    ridge = RidgeClassifierCV(alphas=np.logspace(-3, 1, 20))
    lasso = LogisticRegressionCV(Cs=np.logspace(-3, 1, num=20))
    forest = RandomForestClassifier(n_estimators=100, n_jobs=-1)

    # train the classifiers
    ridge.fit(X_train_standard, y_train)
    lasso.fit(X_train_standard, y_train)
    forest.fit(X_train, y_train)

    # predicted values
    ridge_preds = ridge.predict(X_test_standard)
    lasso_preds = lasso.predict(X_test_standard)
    forest_preds = forest.predict(X_test)

    # confusion matrices
    c1 = confusion_matrix(y_test, ridge_preds)
Exemple #41
0
from sklearn.neighbors import RadiusNeighborsRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import RidgeClassifierCV
from sklearn.ensemble import GradientBoostingClassifier

gdc = GradientBoostingClassifier()
lr = LogisticRegression()
clf = svm.SVR()
et = ExtraTreesClassifier()
rgr = RadiusNeighborsRegressor()
forest = RandomForestRegressor(n_estimators = 100, n_jobs = 2, oob_score=True)
adaboost = AdaBoostRegressor()
nb = GaussianNB()
rd = RidgeClassifierCV()
kf = KFold(report.shape[0], n_folds = 5)

for train_index, test_index in kf:
    #print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = variables.ix[list(train_index),], variables.ix[list(test_index),]
    y_train = report['survey_participant'].ix[list(train_index),]
    y_test = report['survey_participant'].ix[list(test_index),]
    forest.fit(X_train,y_train)
    adaboost.fit(X_train,y_train)
    gdc.fit(X_train, y_train)
    rd.fit(X_train, y_train)
    rgr.fit(X_train, y_train)
    nb.fit(X_train, y_train)
    lr.fit(X_train, y_train)
    et.fit(X_train, y_train)