def gen_with_smote(target_class, target_num, data_name, discrete_num_map): smote = MySmote(data_name, target_class=target_class, data_map=discrete_num_map) smote_predict = smote.predict(target_len=target_num, data_map=discrete_num_map) return smote_predict
def multi_classify(data_train_total, positive, negatives, pos_len, negs_len, data_map, data_name, expend=False, using_kdd99=False): if expend: print('-' * 16 + 'expending' + '-' * 16) for negative, neg_len in zip(negatives, negs_len): neg_len = pos_len - neg_len vae_predict = gen_with_vae(negative, neg_len, data_name) if using_kdd99: smote = MySmote(data_name, target_class=negative, data_map=data_map) smote_predict = smote.predict( target_len=neg_len, data_map=data_map) for l in range(smote_predict.__len__()): for i in range(len(smote_predict[l].attr_list)): if smote_predict[l][i] == 'None': smote_predict[l][i] = vae_predict[l][i] else: smote_predict = vae_predict data_predict = [] for p in smote_predict: res = fill_with_eucli_distance(data_train_total, p, data_map) data_predict.append(res) data_train_total.extend(data_predict) y = [d.data_class for d in data_train_total] if using_kdd99: y = [get_kdd99_big_classification(c) for c in y] # TODO PRINT print({big_class: y.count(big_class) for big_class in set(y)}) data_train_total = [d.discrete_to_num(data_map=data_map).attr_list for d in data_train_total] x = np.array(data_train_total).astype(np.float) kf = KFold(n_splits=5, shuffle=True) ones = copy.deepcopy(negatives) + [positive] if using_kdd99: ones = list(set([get_kdd99_big_classification(c) for c in ones])) ones.sort() for one in ones: negs = [o for o in ones if o != one] print('{} vs others'.format(one)) TP, TN, FP, FN = 0, 0, 0, 0 acc = 0 precision = 0 for i_train, i_test in kf.split(X=x, y=y): train_x = [x[i] for i in i_train] train_y = [y[i] for i in i_train] test_x = [x[i] for i in i_test] test_y = [y[i] for i in i_test] # clf = svm.SVC() # if expend: clf = svm.SVC(kernel='linear') clf.fit(train_x, train_y) predict_y = [i for i in clf.predict(test_x)] TP_k, TN_k, FP_k, FN_k = compute_TP_TN_FP_TN(class_test=test_y, class_predict=predict_y, positive=one, negative=negs) TP += TP_k TN += TN_k FP += FP_k FN += FN_k acc = compute_classification_indicators(TP, TN, FP, FN)[0] # TODO PRINT # print('{:>2.3f}'.format(TP / (TP + TN + FP + FN))) print('acc : {:>2.3f}'.format(acc / 5))
def binary_classify(data_train_total, positive, negative, pos_len, neg_len, expend, data_map, data_name, using_kdd99=False, vae_only=False): if expend: print('expending-------------------------------------------------------') neg_len = pos_len - neg_len # vae_predict = gen_with_multi_vae(negative, neg_len, data_name) vae_predict = gen_with_vae(negative, neg_len, data_name) if using_kdd99: smote = MySmote(data_name, target_class=negative, data_map=data_map) smote_predict = smote.predict( target_len=neg_len, data_map=data_map) for l in range(smote_predict.__len__()): for i in range(len(smote_predict[l].attr_list)): if smote_predict[l][i] == 'None': smote_predict[l][i] = vae_predict[l][i] else: smote_predict = vae_predict data_predict = [] for p in smote_predict: res = fill_with_eucli_distance(data_train_total, p, data_map) data_predict.append(res) data_train_total.extend(data_predict) y = [d.data_class for d in data_train_total] if using_kdd99: positive = get_kdd99_big_classification(positive) negative = get_kdd99_big_classification(negative) y = [get_kdd99_big_classification(c) for c in y] if vae_only: data_train_total = [d.discrete_to_num(data_map=data_map).to_list([DataType.CONTINUOUS]) for d in data_train_total] data_train_total = [d.discrete_to_num(data_map=data_map).attr_list for d in data_train_total] # Todo: x = np.array(data_train_total).astype(np.float) print({k: y.count(k) for k in y}) # Todo: kf kf = KFold(n_splits=5, shuffle=True) args = [] for i_train, i_test in kf.split(X=x, y=y): train_x = [x[i] for i in i_train] train_y = [y[i] for i in i_train] test_x = [x[i] for i in i_test] test_y = [y[i] for i in i_test] clf = svm.SVC(kernel='linear', probability=True, random_state=np.random.RandomState(0)) # clf = svm.SVC() # clf = GaussianNB() # clf = tree.DecisionTreeClassifier() clf.fit(train_x, train_y) predict_y = [i for i in clf.predict(test_x)] temp = compute_classification_indicators(*compute_TP_TN_FP_TN(test_y, predict_y, positive, negative)) if args.__len__() == 0: args = temp else: args = [a + t for a, t in zip(args, temp)] # print( # u'acc\u208A: {:>2.3f}, acc\u208B: {:>2.3f}, accuracy: {:>2.3f}, precision: {:>2.3f}, recall: {:>2.3f}, F1: {:>2.3f}, G-mean: {:>2.3f}' # .format(*[a / 5 for a in args])) print( u'acc+: {:>2.3f}, acc-: {:>2.3f}, accuracy: {:>2.3f}, precision: {:>2.3f}, recall: {:>2.3f}, F1: {:>2.3f}, G-mean: {:>2.3f}' .format(*[a / 5 for a in args])) print('')