Beispiel #1
0
    def define_ruleset_model(self, model_name, status):
        '''
            This function predicts the classes based on RIPPER method
            for GBT or SVM model and creates corresponding model. 
            The model is then saved in to the directory for reuse.
        '''

        filename = 'models/model_' + model_name.lower() + '.pkl'
        model = pickle.load(open(filename, 'rb'))

        y_predicted = model.predict(self.X_transformed)

        clf = lw.RIPPER()
        clf.fit(self.X_transformed,
                y_predicted,
                pos_class=status,
                random_state=42)
        lol = clf.predict(self.X_transformed, give_reasons=True)
        count = 1
        for each in lol[1]:
            if len(each) > 0:
                count += 1
        print(clf.ruleset_.out_pretty())
        with open(
                'models/ruleset_' + model_name.lower() + '_model_' +
                str(status) + '.pkl', 'wb') as mod:
            pickle.dump(clf, mod)
def rule_based_classifier(training_data):
    print('Generating the data model for a rule based classifier . . .\n')
    X = util.drop_target_variable(training_data)
    y = util.retrieve_target_variable(training_data)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.7,
                                                        random_state=1)
    rule_based_classifier = lw.RIPPER()
    rule_based_classifier.fit(X_train, y_train)
    print(rule_based_classifier.ruleset_.out_pretty())
    print(
        'The data model for rule based classifier has been generated successfully!\n'
    )
    util.save_data_model(rule_based_classifier, 'rule_based_classifier')
    return
def RIPPER(FeatureMatrix, Labels):

    FeatureMatrix = pandas.DataFrame.sparse.from_spmatrix(FeatureMatrix)

    XTrain, XTest, LabelTrain, LabelTest = train_test_split(FeatureMatrix,
                                                            Labels,
                                                            test_size=0.1)

    # training model on dataset
    clf = wittgenstein.RIPPER()
    clf.fit(XTrain, LabelTrain, class_feat=None, pos_class='1')

    # testing model on dataset
    expected = LabelTest
    predicted = clf.predict(XTest)

    return (expected, predicted)
Beispiel #4
0
y = sub1_state_labels

# Splitting data
sub1_X_train, sub1_X_test, sub1_y_train, sub1_y_test = train_test_split(
    X,
    y,
    test_size = 0.2,
    random_state = 42,
    stratify = sub1_state_labels)

"""
Q10 - Train training dataset using "RIPPER" model.
"""
# Need to train with each brain state being the positive class
# Passing random state of different values for each data model
pre_data_model = lw.RIPPER(random_state = 42)
med_data_model = lw.RIPPER(random_state = 36)
post_data_model = lw.RIPPER(random_state = 28)

# Pre train
pre_data_model.fit(
    sub1_X_train, 
    sub1_y_train, 
    pos_class = 'Pre')

# Med train
med_data_model.fit(
    sub1_X_train, 
    sub1_y_train, 
    pos_class = 'Med')
ripper_train, ripper_test = train_test_split(ripper_dataset,
                                             test_size=0.2,
                                             random_state=123)
print("")
print("Train size for Decision Tree" + " ------------>  " + str(x_train.shape))
print("")
print("Test size for Decision Tree" + "  ------------>  " + str(x_test.shape))
print("")
print("")
print("Train size for Ripper" + " ------------>  " + str(ripper_train.shape))
print("")
print("Test size for Ripper" + "  ------------>  " + str(ripper_test.shape))
print("")
#Part e

ripper_clf = lw.RIPPER()
ripper_start_time = time.time()
ripper_clf.fit(ripper_train, class_feat="target", random_state=123)
ripper_predict = ripper_clf.predict(ripper_test)
ripper_run_time = time.time() - ripper_start_time

#Part f

print("Decision Tree with Entropy")
print("")
entropy_data = DecisionTreeClassifier(criterion="entropy",
                                      random_state=123,
                                      max_depth=5)
entropy_start_time = time.time()
entropy_data = entropy_data.fit(x_train, y_train)
entropy_pred_data = entropy_data.predict(x_test)
# Datasets.
# -------------------- #
for dataset in datasets:
    
    # Print database.
    dataset_name = [n for n in globals() if globals()[n] is dataset][0]
    print('Dataset: ', dataset_name)
    
    # Read dataset, and obtain X and y matrices.
    dataset = pd.read_csv(dataset)
    X = dataset.drop('class', axis=1)
    y =  dataset['class']
    print('Size: ', X.shape[0])
    print('-------------------')
    
    # Compute scores.
    m1, s1 = compute_scores(DecisionTreeClassifier(criterion="entropy", splitter='random'), X, y)
    m2, s2 = compute_scores(lw.RIPPER(), X, y) # lw.IREP()
    m3, s3 = compute_scores(KNeighborsClassifier(n_neighbors=5), X, y)
    m4, s4 = compute_scores(GaussianNB(), X, y)
    m5, s5 = compute_scores(svm.SVC(C = 1), X, y)
    m6, s6 = compute_scores(AdaBoostClassifier(n_estimators=100, random_state=0), X, y)
    ms, ss = [m1, m2, m3, m4, m5, m6], [s1, s2, s3, s4, s5, s6]
    
    # Print results.
    results = pd.DataFrame(data = {'CV score': [round(100*i, 2) for i in ms],
                            '+-2std': [round(2*100*i, 2) for i in ss]}, 
                           index = ['Decision-tree', 'Rule-based',
                                    'K-neighbours', 'Naive Bayes', 
                                    'Support Vector Machine', 'Adaboost'])
    print(results)
    # Exercise D #

    # Train-Test split part.
    trainData, testData, trainTarget, testTarget = train_test_split(sub_df.drop(['target'], axis=1), target,
                                                                    test_size=0.2, random_state=0)

    # Exercise E #

    # Taking copy of Train-Test to not mess with original data.
    ripperTrainData    = trainData.copy()
    ripperTestData     = testData.copy()
    ripperTrainTarget  = trainTarget.copy()
    ripperTestTarget   = testTarget.copy()

    ripper = lw.RIPPER()                                              # Ripper creation.

    ripperStartTime  = time()                                         # Start time of fit process with Ripper.
    ripper.fit(ripperTrainData, ripperTrainTarget)                    # Ripper's fit process.
    ripperEndTime    = time()                                         # End time of fit process with Ripper.
    ripperScore      = ripper.score(ripperTestData, ripperTestTarget) # Ripper score calculation.


    print("\nElapsed time for ripper algorithm is:", ripperEndTime - ripperStartTime)
    print("Accuracy of Ripper algorithm is:", ripperScore)

    # Exercise F #

    # Taking copy of Train-Test to not mess with original data.
    treeTrainData   = trainData.copy()
    treeTestData    = testData.copy()