Ejemplo n.º 1
0
 def ovrSVM(self, X, Y, X_test):
     ovrClassifier = OneVsRestClassifier(LinearSVC(C=100, random_state=0),
                                         100)
     print ovrClassifier.get_params()
     ovrClassifier.fit(X, Y)
     Y_predict = ovrClassifier.predict(X_test)
     return Y_predict
Ejemplo n.º 2
0
def fit_model(X_train, Y_train):
    model_to_set = OneVsRestClassifier(XGBClassifier())

    parameters = {
        'estimator__colsample_bylevel': [0.7, 0.8],
        'estimator__colsample_bytree': [0.7, 0.8],
        'estimator__colsample_bynode': [0.7, 0.8],
        'estimator__subsample': [0.7, 0.8]
        # 'estimator__max_depth': [6, 8, 10, 12, 14],
        # 'estimator__n_estimators': [100],
        # 'estimator__min_child_weight': [1, 2, 3, 5, 7]
    }
    print(model_to_set.get_params())

    model_tuning = RandomizedSearchCV(estimator=model_to_set,
                                      param_distributions=parameters,
                                      n_jobs=-1,
                                      n_iter=15,
                                      cv=5,
                                      verbose=1)

    # model_tunning = GridSearchCV(model_to_set,
    #                              param_grid=parameters,
    #                              n_jobs=-1,
    #                              cv=5,
    #                              verbose=1)
    #
    Y_train = Y_train.astype('int')
    model_tuning.fit(X_train, Y_train)
    #
    print(model_tuning.best_score_)
    print(model_tuning.best_params_)
    print('test')

    return model_tuning
Ejemplo n.º 3
0
def SVMTraining(XEstimate, XValidate, Parameters, class_labels):
    svcClassifier = SVC(kernel='rbf', probability=True)
    gridSearcher = GridSearchCV(svcClassifier, Parameters)
    clf = OneVsRestClassifier(gridSearcher)

    clf.fit(XEstimate, class_labels)
    Yvalidate = clf.predict(XValidate)

    EstParameters = clf.get_params()

    return {"Yvalidate": Yvalidate, "EstParameters": EstParameters, "clf": clf}
Ejemplo n.º 4
0
class CustomSVCImplementation(ModelImplementation):
    def __init__(self, log: Log = None, **params: Optional[dict]):
        super().__init__(log)
        if not params:
            self.inner_model = SVC(kernel='linear',
                                   probability=True,
                                   class_weight='balanced')
        else:
            self.inner_model = SVC(**params)
        self.params = params
        self.model = OneVsRestClassifier(self.inner_model)
        self.classes = None

    def fit(self, train_data):
        """ Method fit model on a dataset

        :param train_data: data to train the model
        """
        self.classes = np.unique(train_data.target)
        self.model.fit(train_data.features, train_data.target)
        return self.model

    def predict(self,
                input_data,
                is_fit_pipeline_stage: Optional[bool] = None):
        """ Method make prediction with labels of classes

        :param input_data: data with features to process
        :param is_fit_pipeline_stage: is this fit or predict stage for pipeline
        """
        prediction = self.model.predict(input_data.features)

        return prediction

    def predict_proba(self, input_data):
        """ Method make prediction with probabilities of classes

        :param input_data: data with features to process
        """
        prediction = self.model.predict_proba(input_data.features)

        return prediction

    def get_params(self):
        """ Method return parameters, which can be optimized for particular
        operation
        """
        return self.model.get_params()

    @property
    def classes_(self):
        return self.classes
Ejemplo n.º 5
0
def prepare_data_frame_and_build_model(visu=True):
    df, mb, labels = prepare_data_frame(load_raw=False)

    tfidf_vect = TfidfVectorizer(max_features=50000)

    x_train, x_test, y_train, y_test = train_test_split(df[['clean_x', 'title_not_modified']], labels)
    # df['features'] = tfidf_vect.fit_transform(df['clean_x'])
    x_train_tf_idf = tfidf_vect.fit_transform(x_train['clean_x'])
    x_test_tf_idf = tfidf_vect.transform(x_test['clean_x'])

    clf = OneVsRestClassifier(LogisticRegression())
    # clf = OneVsRestClassifier(RandomForestClassifier()) #Score moins bon (0.60939) et temps énorme !
    # clf = OneVsRestClassifier(svm.SVC())
    # clf = OneVsRestClassifier(DecisionTreeClassifier()) #Mauvais score, et lent fit (0.471)
    clf.fit(x_train_tf_idf, y_train)

    print("Classifier parameters :")
    print(clf.get_params())

    threshold_decision = np.vectorize(lambda t: 1 if t > seuil else 0)
    y_pred = threshold_decision(clf.predict_proba(x_test_tf_idf))
    f1score = f1_score(y_test, y_pred, average='micro')
    eval1 = hamming_score(y_test, y_pred)
    eval2 = true_positive(y_test, y_pred)
    eval3 = false_positive(y_test, y_pred)
    eval4 = true_negative(y_test, y_pred)
    eval5 = false_negative(y_test, y_pred)

    for i in range(10):
        random_pos = random.randint(0, len(x_test))
        y_p = predict(overview=x_test['clean_x'].values[random_pos], multilabel_binarizer=mb, classifier=clf,
                      tfidf_vect=tfidf_vect)
        print("Title : ", x_test['title_not_modified'].values[random_pos], x_test['clean_x'].values[random_pos])
        print('Predicted : ', y_p[0])
        print('Actual :', mb.inverse_transform(y_test)[random_pos])
        print("__________________________________________________")
    for i in range(5):
        random_pos = random.randint(0, len(x_test))
        get_nearest_films(overview=x_test['clean_x'].values[random_pos], title=x_test['title_not_modified']
                          .values[random_pos], tfidf_vect=x_train_tf_idf,
                          tfidf_matrix=tfidf_vect, df=x_train)
    if visu:
        print("Hamming SCORE ", eval1)
        print('F1 SCORE ', f1score)
        print("Taux de vrai positifs ", eval2)
        print("Taux de faux positifs ", eval3)
        print("Taux de vrai négatifs ", eval4)
        print("Taux de faux négatifs ", eval5)
Ejemplo n.º 6
0
class Classifier(object):
    '''Classifier base class. Uses OneVsRest for multiclass problems'''
    def __init__(self, clf, x_train, y_train):
        n_classes = len(set(y_train))
        if n_classes > 2:
            self.clf = OneVsRestClassifier(clf)
        else:
            self.clf = clf
        self.clf.fit(x_train, y_train)

    def __call__(self, x_val):
        return self.clf.predict_proba(x_val)

    def describe(self):
        return dict(
            (k, v)
            for k, v in self.clf.get_params().iteritems()
            if not callable(v))
Ejemplo n.º 7
0
def SVMTraining(XEstimate, XValidate, Parameters, class_labels):
    #clf = svm.SVC(decision_function_shape='ovo',Parameters)
    svcClassifier = SVC(kernel='rbf', probability=True)
    gridSearcher = GridSearchCV(svcClassifier, Parameters)
    clf = OneVsRestClassifier(gridSearcher)
    #clf = OneVsRestClassifier(GridSearchCV(SVC(kernel='rbf',  probability=True), Parameters))
    print(clf.get_params)
    clf.fit(XEstimate, class_labels)
    Yvalidate = clf.predict(XValidate)
    EstParameters = clf.get_params()
    #print(clf.predict_proba(XValidate))
    mini = 1
    for i in clf.predict_proba(XValidate):
        mini = min(max(i), mini)
    #print(mini)
    print(clf.get_params)
    #print(svcClassifier.__dict__)
    #print(clf.d(XValidate))
    return {"Yvalidate": Yvalidate, "EstParameters": EstParameters, "clf": clf}
Ejemplo n.º 8
0
class CustomSVC:
    def __init__(self):
        self.fitted_model = None
        self.classes_ = None

    def fit(self, train_data: np.array, target_data: np.array):
        self.fitted_model = OneVsRestClassifier(
            SVC(kernel='linear', probability=True, class_weight='balanced'))
        self.classes_ = np.unique(target_data)
        self.fitted_model.fit(train_data, target_data)
        return self.fitted_model

    def predict(self, data_to_predict: np.array):
        return self.fitted_model.predict(data_to_predict)

    def predict_proba(self, data_to_predict: np.array):
        return self.fitted_model.predict_proba(data_to_predict)

    def get_params(self):
        return self.fitted_model.get_params()
Ejemplo n.º 9
0
    def train(self, input, output, nb_validation_split=1, shuffle_dataset=[True], kernel=['rbf'], degree=[3], gamma=['auto'], c=[1.0]):
        classifier = OneVsRestClassifier(SVC(kernel=kernel, degree=degree, gamma=gamma, coef0=0.0,
                 tol=1e-3, C=c, shrinking=True,
                 verbose=1, max_iter=-1))

        x_train, x_test, y_train, y_test = train_test_split(input, output, test_size=0.33)

        # TODO : degree parameter should only be used when kernet is poly
        param_grid = dict(estimator__C=c, estimator__gamma=gamma, estimator__kernel=kernel, estimator__degree=degree)

        cv = MlUtils.get_cross_validation(nb_validation_split, shuffle_dataset, output.shape[1])

        print(classifier.get_params().keys())

        grid_search = GridSearchCV(classifier, param_grid, cv=cv)
        grid_result = grid_search.fit(x_train, y_train)

        MlUtils.print_gridsearch_results(grid_result)

        return classifier, grid_result
Ejemplo n.º 10
0

start_time = time.time()
model = OneVsRestClassifier(svm.SVC(kernel='rbf', cache_size=500))
param_grid = {
    "estimator__C": [0.005, 0.05, 500],
    "estimator__gamma": [0.001, 0.01, 1, 10, 100, 1000]
}
clf_grid = GridSearchCV(model, param_grid=param_grid, score_func=f1_score)
clf_grid.fit(train_X_ch2, train_y)
print("--- %s seconds ---" % (time.time() - start_time))


clf.best_estimator_
clf_grid.best_params_
clf.get_params()

clf2 = OneVsRestClassifier(svm.SVC(kernel='rbf', C=1000, gamma=0.001))

print("--- %s seconds ---" % (time.time() - start_time))
clf2.fit(train_X_ch2, train_y)
prediction = clf2.predict(test_X_ch2)
result = prediction.tolist()
misc.writeResult("./result/submission_ch2_fit_2000_1000_0.0001_lemma.csv", result)

clf2 = OneVsRestClassifier(svm.SVC(kernel='rbf', C=1000, gamma=0.001))
clf2.fit(train_X_pca, train_y)
prediction = clf.predict(test_X_pca)
result = prediction.tolist()
misc.writeResult("./result/submission_pca_fit_2000_1000_0.0001.csv", result)
Ejemplo n.º 11
0

start_time = time.time()
model = OneVsRestClassifier(svm.SVC(kernel='rbf', cache_size=500))
param_grid = {
    "estimator__C": [0.005, 0.05, 500],
    "estimator__gamma": [0.001, 0.01, 1, 10, 100, 1000]
}
clf_grid = GridSearchCV(model, param_grid=param_grid, score_func=f1_score)
clf_grid.fit(train_X_ch2, train_y)
print("--- %s seconds ---" % (time.time() - start_time))


clf.best_estimator_
clf_grid.best_params_
clf.get_params()

clf2 = OneVsRestClassifier(svm.SVC(kernel='rbf', C=1000, gamma=0.001))

print("--- %s seconds ---" % (time.time() - start_time))
clf2.fit(train_X_ch2, train_y)
prediction = clf2.predict(test_X_ch2)
result = prediction.tolist()
misc.writeResult("./result/submission_ch2_fit_2000_1000_0.0001_lemma.csv", result)

clf2 = OneVsRestClassifier(svm.SVC(kernel='rbf', C=1000, gamma=0.001))
clf2.fit(train_X_pca, train_y)
prediction = clf.predict(test_X_pca)
result = prediction.tolist()
misc.writeResult("./result/submission_pca_fit_2000_1000_0.0001.csv", result)
Ejemplo n.º 12
0
# scoring = "arrucary"
grid_no_up = GridSearchCV(XGB,
                          param_grid=params_xgb,
                          cv=kf,
                          scoring='accuracy').fit(X_train_resampled,
                                                  y_train_resampled)

print(grid_no_up.best_score_)
print(grid_no_up.best_params_)
print(grid_no_up.cv_results_)

# Use OneVsRestClassifier
xgb_ovr_clf = OneVsRestClassifier(
    xgboost.XGBClassifier(objective="multi:softmax", num_class=3))
# Get params' key
pprint(xgb_ovr_clf.get_params())
# another way to print params
xgb_ovr_clf.estimator.get_params().keys()
# Set ranges of parameters
# booster types: booster = ['gbtree']
random_grid = {
    'estimator__n_estimators': [100, 200, 300],  # Number of trees
    'estimator__max_depth': [6, 8, 10],  # Maximum number of levels in tree
    'estimator__validate_parameters': [
        True
    ],  # When set to True, XGBoost will perform validation of input parameters to check whether a parameter is used or not.
    'estimator__min_child_weight': [
        1, 2, 3
    ],  # the minimum weight (or number of samples if all samples have a weight of 1) required in order to create a new node in the tree.
    #Smaller weight, smaller samples. If too big, will result in overfitiing
    'estimator__gamma':
# Now, we go ahead and train the hyper parameters of the classifier using GridSearch and CV.

# In[335]:

tuned_parameters = [{
    'estimator__alpha': [0.001, 0.01, 0.1, 0.5],
    'estimator__penalty': ['l1', 'l2', 'elasticnet'],
    'estimator__loss': ['log', 'modified_huber']
}]
scores = ['preceision']  #['precision', 'recall']
estimator = OneVsRestClassifier(
    SGDClassifier(random_state=0,
                  learning_rate='optimal',
                  class_weight='balanced',
                  n_iter=100))
estimator.get_params().keys()
gsearch = GridSearchCV(estimator,
                       tuned_parameters,
                       cv=5,
                       scoring='average_precision')
gsearch.fit(X_train, Y_train)
print gsearch.best_score_
print gsearch.best_params_

# # Top Informative Features for Each Aspect
# We train our model using the optimized paramters obtain from the GridSearch. Now, we can find a the most informative features in for each category.

# In[336]:

best_classifier = OneVsRestClassifier(
    SGDClassifier(alpha=0.001,
Ejemplo n.º 14
0
def linearKernel(parameters):
    test_accuracy = []
    test_accuracy_with_params = []
    k_test_fold = StratifiedKFold(5)
    for (train, test) in (k_test_fold.split(normalized_glass_features,
                                            glass_type)):

        #test fold
        test_dataset_features = normalized_glass_features[test]
        test_dataset_types = glass_type[test]

        #Rest of the data
        training_dataset_features = normalized_glass_features[train]
        training_dataset_types = glass_type[train]

        #Splitting rest of the data into 80-20% such as 20% for validation set
        (training_features, validation_features, training_glassType,
         validation_glassType) = train_test_split(training_dataset_features,
                                                  training_dataset_types,
                                                  train_size=0.80,
                                                  random_state=1)

        validation_acuracies = []
        start_time_training = time.clock()

        #Training different models with different hyperparameters
        for cvalue in cvalues:
            classifier = OneVsRestClassifier(SVC(kernel="linear", C=cvalue))
            classifier.fit(training_features, training_glassType)
            validation_true, validation_pred = validation_glassType, classifier.predict(
                validation_features)
            accuracy_Validationset = metrics.accuracy_score(
                validation_true, validation_pred)
            validation_acuracies.append(
                (classifier.get_params().get('estimator__C'),
                 accuracy_Validationset))
        validation_acuracies.sort(key=lambda val: val[1])
        print("sorted validation_acuracies :", validation_acuracies)

        #optimal hyperparameter with accuracy
        print(" optimal hyperparameter with accuracy is :",
              validation_acuracies[-1])

        #Training a new model on the entire 4 folds with optimal Hyper Parameters
        classifier_1 = OneVsRestClassifier(
            SVC(kernel="linear", C=validation_acuracies[-1][0]))
        classifier_1.fit(training_dataset_features, training_dataset_types)

        end_time_training = time.clock()
        print("Time taken to train for one fold for linear kernel is :",
              (end_time_training - start_time_training))

        test_true, test_pred = test_dataset_types, classifier_1.predict(
            test_dataset_features)
        accuracy_test = metrics.accuracy_score(test_true, test_pred)
        print("accuracy_test  :", accuracy_test)
        test_accuracy_with_params.append(
            (classifier_1.get_params().get('estimator__C'), accuracy_test))
        test_accuracy.append(accuracy_test)
    print("Test Accuracies for all fold with params :",
          test_accuracy_with_params)
    print("Test Accuracies for all fold:", test_accuracy)
    average_accuracy = sum(test_accuracy) / len(test_accuracy)
    print("average accuracy for linear SVM is :", average_accuracy)
Ejemplo n.º 15
0
Archivo: SVMs.py Proyecto: pkumusic/HCE
 def ovrSVM(self, X, Y, X_test):
     ovrClassifier = OneVsRestClassifier(LinearSVC(C = 100, random_state=0), 100)
     print ovrClassifier.get_params()
     ovrClassifier.fit(X, Y)
     Y_predict = ovrClassifier.predict(X_test)
     return Y_predict