コード例 #1
0
def dataset3_svc_sgd(dataset):

    x_original = dataset.loc[:, dataset.columns != 'labels']
    labels = dataset['labels']

    fselect = FeatureSelection(dataset, x_original, labels)

    model_lsvc = LinearSVC(C=0.1, penalty="l1", dual=False)

    #model= linearsvs
    X_fit_model, X_transf_model,column_selected,feature_importances,feature_importances_DF,dataset_features= \
        fselect.select_from_model_feature_elimination(model=model_lsvc)

    ml = MachineLearning(X_transf_model, labels, classes=['pos', 'neg'])

    filename = 'crmapp/ml_models/dataset3_all_'

    print('best model sgd')
    best_sgd_model = ml.train_best_model('sgd')

    print('score_test_set_sgd')
    print(ml.score_testset(best_sgd_model))

    print('roc curve')
    ml.plot_roc_curve(best_sgd_model)

    pickle.dump(best_sgd_model, open(filename + 'sgd_model_15092020.sav',
                                     'wb'))
コード例 #2
0
def dataset3_univariate_svm(dataset):

    x_original = dataset.loc[:, dataset.columns != 'labels']
    labels = dataset['labels']

    fselect = FeatureSelection(dataset, x_original, labels)

    model_lsvc = LinearSVC(C=0.1, penalty="l1", dual=False)

    # #**Select KBest**
    # #KBest com *mutual info classif*
    X_fit_univariate, X_transf_univariate,column_selected,scores,dataset_features = \
        fselect.univariate(score_func=mutual_info_classif, mode='k_best', param=500)

    ml = MachineLearning(X_transf_univariate, labels, classes=['pos', 'neg'])

    filename = 'crmapp/ml_models/dataset3_all_'

    #tests models
    print('best model svm')
    best_svm_model = ml.train_best_model('svm')

    print('score_test_set_svm')
    print(ml.score_testset(best_svm_model))

    print('roc curve')
    ml.plot_roc_curve(best_svm_model)
コード例 #3
0
def dataset1_tree_svm(dataset):

    x_original = dataset.loc[:, dataset.columns != 'labels']
    labels = dataset['labels']

    fselect = FeatureSelection(dataset, x_original, labels)

    model_tree = ExtraTreesClassifier(n_estimators=50)

    # Select from model
    #model= Tree classifier. 50 estiamtors
    X_fit_model, X_transf_model,column_selected,feature_importances,feature_importances_DF,dataset_features= \
        fselect.select_from_model_feature_elimination(model=model_tree)

    ml = MachineLearning(X_transf_model, labels, classes=['pos', 'neg'])

    filename = 'crmapp/ml_models/dataset1_all_'

    #tests models
    print('best model svm')
    best_svm_model = ml.train_best_model('svm')

    print('score_test_set_svm')
    print(ml.score_testset(best_svm_model))

    print('roc curve')
    ml.plot_roc_curve(best_svm_model)

    pickle.dump(best_svm_model, open(filename + 'svm_model_15092020.sav',
                                     'wb'))
コード例 #4
0
def test_feature_selection(dataset):
    x_original = dataset.loc[:, dataset.columns != 'labels']
    labels = dataset['labels']

    fselect = FeatureSelection(dataset, x_original, labels)
    """
    # #**Select KBest**
    # #KBest com *mutual info classif*
    X_fit_univariate, X_transf_univariate,column_selected,scores,dataset_features = \
        fselect.univariate(score_func=mutual_info_classif, mode='k_best', param=1000)
    

    #**Select Percentile
    #Percentile with *f classif*
    X_fit_univariate, X_transf_univariate,column_selected,scores,dataset_features = \
        fselect.univariate(score_func=f_classif, mode='percentile', param=0.6)

    # Select only the features with p value inferior to 0.015
    X_fit_univariate, X_transf_univariate,column_selected,scores,dataset_features \
        = fselect.univariate(score_func=f_classif, mode='fpr', param=0.05)

    #shape of transformed dataset
    print(X_transf_univariate.shape)
    #columns selected by high score
    print(fselect.features_scores(x_original,scores,column_selected, False))
 


    #**SRecursive feature elimination
    #estimator=SVC kernel=linear with 5 cross validation
    X_fit_rfe, X_transf_rfe,column_selected,ranking,dataset_features= \
        fselect.recursive_feature_elimination(cross_validation=True,cv=5)
    

    #shape of transformed dataset
    print(X_transf_rfe.shape)
    #columns selected names
    print(dataset.columns[column_selected])
    #scores
    score_methods(x_original,X_transf_rfe,labels)
    #
    """
    # Select from model

    # L1-based feature selection   f linear_model.LogisticRegression/svm.LinearSVC for classification
    # With SVMs and logistic-regression, the parameter C controls the sparsity: the smaller C the fewer features selected.
    model_lsvc = LinearSVC(C=1, penalty="l1", dual=False)
    model_lr = LogisticRegression(C=0.1, penalty="l2", dual=False)
    model_tree = ExtraTreesClassifier(n_estimators=50)
    """

    # Select from model
    #model= Tree classifier. 50 estiamtors
    X_fit_model, X_transf_model,column_selected,feature_importances,feature_importances_DF,dataset_features= \
        fselect.select_from_model_feature_elimination(model=model_tree)

    #model= logistic regression

    X_fit_model, X_transf_model,column_selected,feature_importances,feature_importances_DF,dataset_features= \
        fselect.select_from_model_feature_elimination( model=model_lr)
        """
    #model= linearsvs
    X_fit_model, X_transf_model,column_selected,feature_importances,feature_importances_DF,dataset_features= \
        fselect.select_from_model_feature_elimination(model=model_lsvc)
    """
    print('original shape', dataset.shape)
    print('reduce shape', fselect.dataset.shape)
    print('dataset reduced with column names\n', fselect.dataset.head(3))
    print('feature importances\n',feature_importances_DF)
    print('scores')
    score_methods(x_original,X_transf_model,labels)
    print(fselect.dataset)
            """

    return fselect.dataset