def stackedmodel_p(train, query, train_cols):
    #instantiating the model
    rf = RandomForestClassifier(n_estimators=150)
    hybrid_model_AB = AdaBoostClassifier(DecisionTreeClassifier(max_depth=3), n_estimators=10)

    lrd = LogisticRegression(solver='lbfgs', max_iter=300)  
    clf_stackd = StackingClassifier(classifiers =[rf, hybrid_model_AB], meta_classifier = lra, use_probas = True, use_features_in_secondary = True)

     #cleaning data by applying preprocessing
    train[train_cols] = preprocessing.scale(train[train_cols])
    query[train_cols] = preprocessing.scale(query[train_cols])

    #fitting the model
    print(clf_stackd.fit(train[train_cols], train['malicious']))

    #cross-validating and evaluating the performance of model
    scores = cv.cross_val_score(clf_stackd, train[train_cols], train['malicious'], cv=30)
    print('Estimated score Random Forest & Gaussian Naive Bayes: %0.5f (+/- %0.5f)' % (scores.mean(), scores.std() / 2))

    #predicting the target query from the model
    query['result'] = clf_stackd.predict(query[train_cols])

    #printing the predicted results
    print(query[['URL', 'result']])
    return query['result']
def stackedmodel_n(train, query, train_cols):
    #instantiating the model
    rf = RandomForestClassifier(n_estimators=150)
    tree = DecisionTreeClassifier(min_impurity_decrease=0)

    lrb = LogisticRegression(solver='lbfgs', max_iter=300)  
    clf_stackb = StackingClassifier(classifiers =[rf, tree], meta_classifier = lra, use_probas = True, use_features_in_secondary = True)

     #cleaning data by applying preprocessing
    train[train_cols] = preprocessing.scale(train[train_cols])
    query[train_cols] = preprocessing.scale(query[train_cols])

    #fitting the model
    print(clf_stackb.fit(train[train_cols], train['malicious']))

    #cross-validating and evaluating the performance of model
    scores = cv.cross_val_score(clf_stackb, train[train_cols], train['malicious'], cv=30)
    print('Estimated score Random Forest & Decision Tree: %0.5f (+/- %0.5f)' % (scores.mean(), scores.std() / 2))

    #predicting the target query from the model
    query['result'] = clf_stackb.predict(query[train_cols])

    #printing the predicted results
    print(query[['URL', 'result']])
    return query['result']
def stackedmodel_e(train, query, train_cols):
    #instantiating the model
    rf = RandomForestClassifier(n_estimators=150)
    gnb = GaussianNB()
    tree = DecisionTreeClassifier(min_impurity_decrease=0)
    mlp = MLPClassifier(alpha=0.001, hidden_layer_sizes=([100,100,100]),max_iter=900)
    hybrid_model_AB = AdaBoostClassifier(DecisionTreeClassifier(max_depth=3), n_estimators=10)
    clf = svm.SVC() 

    lrm = LogisticRegression(solver='lbfgs', max_iter=1000)  
    clf_stackm = StackingClassifier(classifiers =[clf_stackz, clf], meta_classifier = lrz, use_probas = True, use_features_in_secondary = True)

     #cleaning data by applying preprocessing
    train[train_cols] = preprocessing.scale(train[train_cols])
    query[train_cols] = preprocessing.scale(query[train_cols])

    #fitting the model
    print(clf_stackm.fit(train[train_cols], train['malicious']))

    #cross-validating and evaluating the performance of model
    scores = cv.cross_val_score(clf_stackm, train[train_cols], train['malicious'], cv=30)
    print('Estimated score Random Forest,Gaussian Naive Bayes,Decision Forest,MLPs,AdaBoost & SVM: %0.5f (+/- %0.5f)' % (scores.mean(), scores.std() / 2))

    #predicting the target query from the model
    query['result'] = clf_stackm.predict(query[train_cols])

    #printing the predicted results
    print(query[['URL', 'result']])
    return query['result']
Beispiel #4
0
def modelfit(alg, dtrain, dtest, predictors, target, IDcol, filename):
    # Fit the algorithm on the data
    alg.fit(dtrain[predictors], dtrain[target])

    # Predict training set:
    dtrain_predictions = alg.predict(dtrain[predictors])

    # Perform cross-validation:
    cv_score = cross_validate.cross_val_score(alg, dtrain[predictors], dtrain[target], cv=20,
                                                scoring='mean_squared_error')
    cv_score = np.sqrt(np.abs(cv_score))

    # Print model report:
    print
    "\nModel Report"
    print
    "RMSE : %.4g" % np.sqrt(metrics.mean_squared_error(dtrain[target].values, dtrain_predictions))
    print
    "CV Score : Mean - %.4g | Std - %.4g | Min - %.4g | Max - %.4g" % (
    np.mean(cv_score), np.std(cv_score), np.min(cv_score), np.max(cv_score))

    # Predict on testing data:
    dtest[target] = alg.predict(dtest[predictors])

    # Export submission file:
    IDcol.append(target)
    submission = pd.DataFrame({x: dtest[x] for x in IDcol})
    submission.to_csv(filename, index=False)
Beispiel #5
0
def forest_classifier(train, query, train_cols):
    rf = RandomForestClassifier(n_estimators=150)
    print rf.fit(train[train_cols], train['firmware_bool'])
    scores = cv.cross_val_score(rf,
                                train[train_cols],
                                train['firmware_bool'],
                                cv=30)
    print('Estimated score RandomForestClassifier: %0.5f (+/- %0.5f)' %
          (scores.mean(), scores.std() / 2))
    query['result'] = rf.predict(query[train_cols])
    print query[['url', 'result']]
Beispiel #6
0
def cross_validation():
    dataset = load_data()
    print(dataset)
    row, col = dataset.shape
    X = dataset[:, :col - 1]
    y = dataset[:, -1]
    clf = SVC(kernel='rbf', C=1000)
    clf.fit(X, Y)
    scores = cs.cross_val_score(clf, X, y, cv=5, score_func=None)
    print("Accuracy: %0.2f (+- %0.2f)" % (scores.mean(), scores.std()))
    return clf
Beispiel #7
0
def svm_classifier(train, query, train_cols):  ###特征数据,带查询的特征数据,去除字符串的特征信息
    clf = svm.SVC()  ##分类器
    train[train_cols] = preprocessing.scale(train[train_cols])
    query[train_cols] = preprocessing.scale(query[train_cols])
    print clf.fit(train[train_cols], train['firmware_bool'])
    scores = cv.cross_val_score(clf,
                                train[train_cols],
                                train['firmware_bool'],
                                cv=30)
    print('Estimated score SVM: %0.5f (+/- %0.5f)' %
          (scores.mean(), scores.std() / 2))
    query['result'] = clf.predict(query[train_cols])
    print query[['url', 'result']]
def svm_classifier(train, query, train_cols):
    clf = svm.SVC()

    train[train_cols] = preprocessing.scale(train[train_cols])
    query[train_cols] = preprocessing.scale(query[train_cols])

    print(clf.fit(train[train_cols], train['malicious']))
    scores = cv.cross_val_score(clf, train[train_cols], train['malicious'], cv=30)
    print('Estimated score SVM: %0.5f (+/- %0.5f)' % (scores.mean(), scores.std() / 2))

    query['result'] = clf.predict(query[train_cols])

    print(query[['URL', 'result']])