class CumlSVMFitter(FitterBase): def __init__(self, label='label', metric='error', opt: SVMOpt = None, max_eval=100): super(CumlSVMFitter, self).__init__(label, metric, max_eval) if opt is not None: self.opt = opt else: self.opt = SVMOpt() self.clf = None def train(self, train_df, eval_df, params=None): train_df, eval_df = cudf.DataFrame(train_df), cudf.DataFrame(eval_df) x_train, y_train, x_eval, y_eval = train_df.drop(columns=[self.label]), train_df[self.label], \ eval_df.drop(columns=[self.label]), eval_df[self.label], if params is None: use_params = deepcopy(self.opt_params) else: use_params = deepcopy(params) self.clf = SVC(**use_params) self.clf.fit(X=x_train, y=y_train) preds = self.clf.predict(X=x_eval) output = self.get_loss(y_pred=preds, y=y_eval) return output def search(self, train_df, eval_df): self.opt_params = dict() def train_impl(params): self.train(train_df, eval_df, params) if self.metric == 'auc': y_pred = self.clf.predict(eval_df.drop(columns=[self.label])) else: y_pred = self.clf.predict( eval_df.drop(columns=[self.label])).astype(int) return self.get_loss(eval_df[self.label], y_pred) self.opt_params = fmin(train_impl, asdict(self.opt), algo=tpe.suggest, max_evals=self.max_eval) def search_k_fold(self, k_fold, data): self.opt_params = dict() def train_impl_nfold(params): loss = list() for train_id, eval_id in k_fold.split(data): train_df = data.iloc[train_id, :] eval_df = data.iloc[eval_id, :] self.train(train_df, eval_df, params) if self.metric == 'auc': y_pred = self.clf.predict( eval_df.drop(columns=[self.label])) else: y_pred = self.clf.predict( eval_df.drop(columns=[self.label])).astype(int) loss.append(self.get_loss(eval_df[self.label], y_pred)) return np.mean(loss) self.opt_params = fmin(train_impl_nfold, asdict(self.opt), algo=tpe.suggest, max_evals=self.max_eval) def train_k_fold(self, k_fold, train_data, test_data, params=None, drop_test_y=True): acc_result = list() train_pred = cudf.Series(np.empty(train_data.shape[0])) test_pred = cudf.Series(np.empty(test_data.shape[0])) if drop_test_y: dtest = test_data.drop(columns=self.label) else: dtest = test_data for train_id, eval_id in k_fold.split(train_data): train_df = train_data.iloc[train_id, :] eval_df = train_data.iloc[eval_id, :] self.train(train_df, eval_df, params) train_pred[eval_id] = self.clf.predict_proba( eval_df.drop(columns=self.label)).iloc[:, 1].values if self.metric == 'auc': y_pred = self.clf.predict(eval_df.drop(columns=[self.label])) else: y_pred = self.clf.predict( eval_df.drop(columns=[self.label])).astype(int) acc_result.append(self.get_loss(eval_df[self.label], y_pred)) test_pred += self.clf.predict_proba(dtest).iloc[:, 1] test_pred /= k_fold.n_splits return train_pred, test_pred, acc_result
else: C = 10 gamma = 0.01 clf = SVC(probability=True, C=C, gamma=gamma) else: clf = LogisticRegression() #normal case clf.fit(X_train, y_train) #save classifier filename = './data/detectors/LR_' + attack_method + '_' + detector + '_' + mode + '_' + net + '.sav' pickle.dump(clf, open(filename, 'wb')) print('Evaluating classifier...') prediction = clf.predict(X_test) prediction_pr = clf.predict_proba(X_test)[:, 1] benign_rate = 0 benign_guesses = 0 ad_guesses = 0 ad_rate = 0 for i in range(len(prediction)): if prediction[i] == 0: benign_guesses += 1 if y_test[i] == 0: benign_rate += 1 else: ad_guesses += 1 if y_test[i] == 1: ad_rate += 1