def single_experiment_false_fraction(data, gamma, nu): C = 1./ len(data) / nu model = SVDD(kernel='rbf', C=C, gamma=gamma) normal_data, anomaly_data = split_anomaly_normal_data(data) anomaly_elements_count = int(len(normal_data) * nu / (1. - nu)) rows = sample(anomaly_data.index, anomaly_elements_count) anomaly_data = anomaly_data.ix[rows] normal_train, normal_validate, normal_test = split_data_set(normal_data, 3) anomaly_train, anomaly_validate, anomaly_test = split_data_set(anomaly_data, 3) anomaly_train = concatenate([anomaly_train, anomaly_validate]) normal_train = concatenate([normal_train, normal_validate]) model.fit(np.concatenate([anomaly_train, normal_train])) anomaly_prediction = model.decision_function(anomaly_test) normal_prediction = model.decision_function(normal_test) false_anomaly = mean(normal_prediction < 0) false_normal = mean(anomaly_prediction > 0) prediction = concatenate([anomaly_prediction, normal_prediction]) true_labels = array([1] * len(anomaly_prediction) + [-1] * len(normal_prediction)) auc_score = average_precision_score(true_labels, -1 * prediction) train_data = concatenate([anomaly_train, normal_train]) slice_score = slice_probability_metric(model, train_data) support_score = support_vectors_metric(model, train_data, nu) smote_score = validate_classifier_by_random_points(model, train_data, (1. - nu)/nu) vc_score = combinatorial_dimension_metric(model, train_data) kernel_score = kernel_metric(model, train_data) return false_anomaly, false_normal, auc_score, \ slice_score, smote_score, vc_score, support_score, kernel_score
def validate_gamma(train, test_normal, test_anomaly, gamma): C = 1.0 / (0.1 * (len(train))) clf = SVDD(kernel='rbf', gamma=gamma, C=C) #clf.fit(np.random.randn(10000, 4)) clf.fit(train) normal_data_prediction = clf.decision_function(test_normal) anomaly_data_prediction = clf.decision_function(test_anomaly) normal_data_error = np.mean(normal_data_prediction < 0) anomaly_data_error = np.mean(anomaly_data_prediction > 0) true_labels = [1] * len(test_normal) + [-1] * len(test_anomaly) decision_values = np.concatenate([normal_data_prediction, anomaly_data_prediction], axis=0) precision, recall, _ = precision_recall_curve(true_labels, decision_values) auc_score = auc(recall, precision) return normal_data_error, anomaly_data_error, auc_score