def fit_logistic_regression(self, X, y):
        X = review_train_logged

        y = review_train['has_reviewed'].to_numpy().astype(int)

        model = RidgeClassifier().fit(X, y)
        model.score(X, y)  # 0.68 accuracy

        model.get_params()
Example #2
0
class Ridge(Model):

    # X represents the features, Y represents the labels
    X = None
    Y = None
    prediction = None
    model = None

    def __init__(self):
        pass

    def __init__(self,
                 X=None,
                 Y=None,
                 label_headers=None,
                 alpha=1,
                 type='regressor',
                 cfg=False):

        if X is not None:
            self.X = X

        if Y is not None:
            self.Y = Y

        self.type = type
        self.cfg = cfg

        self.mapping_dict = None
        self.label_headers = label_headers

        if self.type == 'regressor':
            self.model = RidgeRegression(alpha=alpha)
        else:
            self.model = RidgeClassifier(alpha=alpha)

    def fit(self, X=None, Y=None):
        if X is not None:
            self.X = X

        if Y is not None:
            self.Y = Y

        if self.type == 'classifier':
            self.map_str_to_number(Y)

        print('Ridge Regression Train started............')
        self.model.fit(self.X, self.Y)
        print('Ridge Regression completed..........')

        return self.model

    def predict(self, test_features):
        print('Prediction started............')
        self.predictions = self.model.predict(test_features)
        print('Prediction completed..........')
        return self.predictions

    def save(self, filename='ridge_model.pkl'):
        if self.cfg:
            f = open('ridge_configs.txt', 'w')
            f.write(json.dumps(self.model.get_params()))
            f.close()
        pickle.dump(self.model, open(filename, 'wb'))

    def featureImportance(self):
        #        if X_headers is None:
        #            X_headers = list(self.X)

        #        feature_importance_ = zip(self.model.coef_[0], X_headers)
        #        feature_importance = set(feature_importance_)
        return self.model.coef_[0]

    def getAccuracy(self, test_labels, predictions, origin=0, hitmissr=0.8):
        if self.type == 'classifier':
            correct = 0
            df = pd.DataFrame(data=predictions.flatten())
            for i in range(len(df)):
                if (df.values[i] == test_labels.values[i]):
                    correct = correct + 1
        else:
            correct = 0
            df = pd.DataFrame(data=predictions.flatten())
            for i in range(len(df)):
                if 1 - abs(df.values[i] - test_labels.values[i]) / abs(
                        df.values[i]) >= hitmissr:
                    correct = correct + 1
        return float(correct) / len(df)

    def getConfusionMatrix(self, test_labels, predictions, label_headers):
        df = pd.DataFrame(data=predictions.flatten())
        if self.type == 'classifier':
            index = 0
            for label_header in label_headers:
                classes = test_labels[label_header].unique()
                title = 'Normalized confusion matrix for Ridge (' + label_header + ')'
                self.plot_confusion_matrix(test_labels.ix[:, index],
                                           df.ix[:, index],
                                           classes=classes,
                                           normalize=True,
                                           title=title)
                index = index + 1
        else:
            return 'No Confusion Matrix for Regression'

    def getRSquare(self, test_labels, predictions, mode='single'):
        df = pd.DataFrame(data=predictions.flatten())
        if self.type == 'regressor':
            if mode == 'multiple':
                errors = r2_score(test_labels,
                                  df,
                                  multioutput='variance_weighted')
            else:
                errors = r2_score(test_labels, df)
            return errors
        else:
            return 'No RSquare for Classification'

    def getMSE(self, test_labels, predictions):
        df = pd.DataFrame(data=predictions.flatten())
        if self.type == 'regressor':
            errors = mean_squared_error(test_labels, df)
            return errors
        else:
            return 'No MSE for Classification'

    def getMAPE(self, test_labels, predictions):
        df = pd.DataFrame(data=predictions.flatten())
        if self.type == 'regressor':
            errors = np.mean(np.abs(
                (test_labels - df.values) / test_labels)) * 100
            return errors.values[0]
        else:
            return 'No MAPE for Classification'

    def getRMSE(self, test_labels, predictions):
        df = pd.DataFrame(data=predictions.flatten())
        if self.type == 'regressor':
            errors = sqrt(mean_squared_error(test_labels, df))
            return errors
        else:
            return 'No RMSE for Classification'