plt.hold(True) plt.bar(left=bins[:-1], width=binwidth, height=benign_hist, color=colours[0], label='Benign', alpha=0.6, edgecolor=colours[0], lw="3") plt.bar(left=bins[:-1], width=binwidth, height=malicious_hist, color=colours[i+1], label='Malicious (Scenario ' + str(scenario) + ')', alpha=0.6, edgecolor=colours[i+1], lw="3") plt.legend(loc='best') plt.xlim(bins[0]-binwidth, bins[-1]+binwidth) plt.xticks(bins, ["{:0.03f}".format(x) for x in bins.flatten()], rotation=45) plt.savefig('iforest-scores' + str(scenario) + '.pdf', bbox_inches='tight') plt.clf() plt.close() # plot my own PR curve precision, recall, ap = pr_curve(y_true, all_values) print 'Scenario:', scenario, ap plt.figure() plt.plot(recall, precision, label='AUC (Scenario ' + str(scenario) +\ ')={0:0.3f}'.format(ap), color=colours[i+1]) plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.legend() plt.savefig('iforest-pr' + str(scenario) + '.pdf', bbox_inches='tight') plt.clf() plt.close() """ # plot rug data points
def test(**kwargs): opt.parse(kwargs) if opt.device is not None: opt.device = torch.device(opt.device) elif opt.gpus: opt.device = torch.device(0) else: opt.device = torch.device('cpu') pretrain_model = load_pretrain_model(opt.pretrain_model_path) model = AGAH(opt.bit, opt.tag_dim, opt.num_label, opt.emb_dim, lambd=opt.lambd, pretrain_model=pretrain_model).to(opt.device) path = 'checkpoints/' + opt.dataset + '_' + str(opt.bit) load_model(model, path) FEATURE_MAP = torch.load(os.path.join(path, 'feature_map.pth')).to(opt.device) model.eval() images, tags, labels = load_data(opt.data_path, opt.dataset) x_query_data = Dataset(opt, images, tags, labels, test='image.query') x_db_data = Dataset(opt, images, tags, labels, test='image.db') y_query_data = Dataset(opt, images, tags, labels, test='text.query') y_db_data = Dataset(opt, images, tags, labels, test='text.db') x_query_dataloader = DataLoader(x_query_data, opt.batch_size, shuffle=False) x_db_dataloader = DataLoader(x_db_data, opt.batch_size, shuffle=False) y_query_dataloader = DataLoader(y_query_data, opt.batch_size, shuffle=False) y_db_dataloader = DataLoader(y_db_data, opt.batch_size, shuffle=False) qBX = generate_img_code(model, x_query_dataloader, opt.query_size, FEATURE_MAP) qBY = generate_txt_code(model, y_query_dataloader, opt.query_size, FEATURE_MAP) rBX = generate_img_code(model, x_db_dataloader, opt.db_size, FEATURE_MAP) rBY = generate_txt_code(model, y_db_dataloader, opt.db_size, FEATURE_MAP) query_labels, db_labels = x_query_data.get_labels() query_labels = query_labels.to(opt.device) db_labels = db_labels.to(opt.device) p_i2t, r_i2t = pr_curve(qBX, rBY, query_labels, db_labels) p_t2i, r_t2i = pr_curve(qBY, rBX, query_labels, db_labels) K = [1, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000] pk_i2t = p_topK(qBX, rBY, query_labels, db_labels, K) pk_t2i = p_topK(qBY, rBX, query_labels, db_labels, K) path = 'checkpoints/' + opt.dataset + '_' + str(opt.bit) np.save(os.path.join(path, 'P_i2t.npy'), p_i2t.numpy()) np.save(os.path.join(path, 'R_i2t.npy'), r_i2t.numpy()) np.save(os.path.join(path, 'P_t2i.npy'), p_t2i.numpy()) np.save(os.path.join(path, 'R_t2i.npy'), r_t2i.numpy()) np.save(os.path.join(path, 'P_at_K_i2t.npy'), pk_i2t.numpy()) np.save(os.path.join(path, 'P_at_K_t2i.npy'), pk_t2i.numpy()) mapi2t = calc_map_k(qBX, rBY, query_labels, db_labels) mapt2i = calc_map_k(qBY, rBX, query_labels, db_labels) print('...test MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' % (mapi2t, mapt2i))
def test(**kwargs): opt.parse(kwargs) if opt.device is not None: opt.device = torch.device(opt.device) elif opt.gpus: opt.device = torch.device(0) else: opt.device = torch.device('cpu') with torch.no_grad(): model = CPAH(opt.image_dim, opt.text_dim, opt.hidden_dim, opt.bit, opt.num_label).to(opt.device) path = 'checkpoints/' + opt.dataset + '_' + str(opt.bit) + str(opt.proc) load_model(model, path) model.eval() images, tags, labels = load_data(opt.data_path, opt.dataset) i_query_data = Dataset(opt, images, tags, labels, test='image.query') i_db_data = Dataset(opt, images, tags, labels, test='image.db') t_query_data = Dataset(opt, images, tags, labels, test='text.query') t_db_data = Dataset(opt, images, tags, labels, test='text.db') i_query_dataloader = DataLoader(i_query_data, opt.batch_size, shuffle=False) i_db_dataloader = DataLoader(i_db_data, opt.batch_size, shuffle=False) t_query_dataloader = DataLoader(t_query_data, opt.batch_size, shuffle=False) t_db_dataloader = DataLoader(t_db_data, opt.batch_size, shuffle=False) qBX = generate_img_code(model, i_query_dataloader, opt.query_size) qBY = generate_txt_code(model, t_query_dataloader, opt.query_size) rBX = generate_img_code(model, i_db_dataloader, opt.db_size) rBY = generate_txt_code(model, t_db_dataloader, opt.db_size) query_labels, db_labels = i_query_data.get_labels() query_labels = query_labels.to(opt.device) db_labels = db_labels.to(opt.device) #K = [1, 10, 100, 1000] #p_top_k(qBX, rBY, query_labels, db_labels, K, tqdm_label='I2T') # pr_curve2(qBY, rBX, query_labels, db_labels) p_i2t, r_i2t = pr_curve(qBX, rBY, query_labels, db_labels, tqdm_label='I2T') p_t2i, r_t2i = pr_curve(qBY, rBX, query_labels, db_labels, tqdm_label='T2I') p_i2i, r_i2i = pr_curve(qBX, rBX, query_labels, db_labels, tqdm_label='I2I') p_t2t, r_t2t = pr_curve(qBY, rBY, query_labels, db_labels, tqdm_label='T2T') K = [1, 10, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000] pk_i2t = p_top_k(qBX, rBY, query_labels, db_labels, K, tqdm_label='I2T') pk_t2i = p_top_k(qBY, rBX, query_labels, db_labels, K, tqdm_label='T2I') pk_i2i = p_top_k(qBX, rBX, query_labels, db_labels, K, tqdm_label='I2I') pk_t2t = p_top_k(qBY, rBY, query_labels, db_labels, K, tqdm_label='T2T') mapi2t = calc_map_k(qBX, rBY, query_labels, db_labels) mapt2i = calc_map_k(qBY, rBX, query_labels, db_labels) mapi2i = calc_map_k(qBX, rBX, query_labels, db_labels) mapt2t = calc_map_k(qBY, rBY, query_labels, db_labels) pr_dict = {'pi2t': p_i2t.cpu().numpy(), 'ri2t': r_i2t.cpu().numpy(), 'pt2i': p_t2i.cpu().numpy(), 'rt2i': r_t2i.cpu().numpy(), 'pi2i': p_i2i.cpu().numpy(), 'ri2i': r_i2i.cpu().numpy(), 'pt2t': p_t2t.cpu().numpy(), 'rt2t': r_t2t.cpu().numpy()} pk_dict = {'k': K, 'pki2t': pk_i2t.cpu().numpy(), 'pkt2i': pk_t2i.cpu().numpy(), 'pki2i': pk_i2i.cpu().numpy(), 'pkt2t': pk_t2t.cpu().numpy()} map_dict = {'mapi2t': float(mapi2t.cpu().numpy()), 'mapt2i': float(mapt2i.cpu().numpy()), 'mapi2i': float(mapi2i.cpu().numpy()), 'mapt2t': float(mapt2t.cpu().numpy())} print(' Test MAP: MAP(i->t) = {:3.4f}, MAP(t->i) = {:3.4f}, MAP(i->i) = {:3.4f}, MAP(t->t) = {:3.4f}'.format(mapi2t, mapt2i, mapi2i, mapt2t)) path = 'checkpoints/' + opt.dataset + '_' + str(opt.bit) + str(opt.proc) write_pickle(os.path.join(path, 'pr_dict.pkl'), pr_dict) write_pickle(os.path.join(path, 'pk_dict.pkl'), pk_dict) write_pickle(os.path.join(path, 'map_dict.pkl'), map_dict)
model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) model.summary() model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy']) history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1) yp = model.predict(X_dev) yp = np.argmax(yp, axis=1) y_dev = np.argmax(y_dev, axis=1) #performance on dev set print_classfication_report('Keras', y_dev, yp, stem='keras_dev') pr_curve(y_dev, yp, num_classes, 'keras_dev') # prediction is done for every sample and # prediction for whole block is done through consensus voting # blocks_consensus tell me about block prediction distrubution # making sure there is no bimodal distribution blocks_pred, blocks_consensus = score_model(model, X_test) #writing csv file write_data(blocks_pred, 'khan_speaker_labels_MLP.csv')
if gid/100 in BENIGN_SCENARIOS or gid/100 == scenario] all_values = np.array([feat_value for gid, feat_value in all_feature_values]).reshape(-1,1) y_true = [1 if gid/100 in MALICIOUS_SCENARIOS else 0 for gid, feat_value in all_feature_values] anomaly_scores = [] for value in all_values.flatten(): if value in pmf: anomaly_scores.append(-pmf[value]) else: prob = (0.0 + 1.0) / (total + nevents) anomaly_scores.append(-prob) # plot my own PR curve precision, recall, ap = pr_curve(y_true, anomaly_scores) print 'Scenario:', scenario, ap plt.figure() plt.plot(recall, precision, label='AUC (Scenario ' + str(scenario) + \ ')={0:0.3f}'.format(ap), color=colours[i+1]) plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.legend() plt.savefig('pr-' + feat_name + str(scenario) + '.pdf', bbox_inches='tight') plt.clf() plt.close() #print all_values.flatten()[300:400] #print anomaly_scores[300:400]
Alb_dec=dc() StateVector=SVC(gamma=2, kernel='linear', C=C) knn= KNeighborsClassifier(3) StateVector.fit(X_train,y_train) #☻addestramento dei classificatori sul training-set Alb_dec.fit(X_train,y_train) knn.fit(X_train,y_train) #creazione curva precision-recall pred_tra=Alb_dec.predict(X_train) #calcolo delle predizioni per il test-set pred=Alb_dec.predict(X_test) pred_SVC=StateVector.predict(X_test) pred_knn=knn.predict(X_test) u.pr_curve(pred,y_test,pred_SVC,pred_knn)#curva precision recall #u.mostra_predizioni(pred.size,pred,pred_SVC,y_test,X_test) print("Accuratezza delle predizioni sul test-set (Albero decisione) %0.1f%% "%(accuracy_score(pred,y_test)*100)) print("Accuratezza delle predizioni sul test-set (SVC) %0.1f%% " %(accuracy_score(pred_SVC,y_test)*100)) print("Accuratezza delle predizioni sul test-set (knn) %0.1f%% " %(accuracy_score(pred_knn,y_test)*100)) """suggestion per nuovo fiore da cercare 4.9,3.1,1.5,0.1 Iris-setosa 5.6,2.8,4.9,2.0 Iris-virginica 6.3,2.3,4.4,1.3 Iris-versicolor""" nuovo_fiore=[[4.9,3.1,1.5,0.1]] nuovo_test_a =Alb_dec.predict(nuovo_fiore)
X_train = np.vstack(train_blocks) n_classes = len(np.unique(train_labels)) ind = 12 indices_ci = plot_feature_importance(X_train, train_labels) # scaler = StandardScaler().fit(X_train[:,indices_ci[:ind]]) # # train_blocks = np.array([scaler.transform(block[:,indices_ci[:ind]]) for block in train_blocks]) # test_blocks = np.array([scaler.transform(block[:,indices_ci[:ind]]) for block in test_blocks]) # Decided to use all features as each of them carry some unique information scaler = StandardScaler().fit(X_train) train_blocks = np.array([scaler.transform(block) for block in train_blocks]) test_blocks = np.array([scaler.transform(block) for block in test_blocks]) X_train_blocks, X_dev_blocks, y_train_blocks, y_dev_blocks = train_test_split( train_blocks, train_blocks_labels, test_size=0.2, random_state=455) speakers = run_model(X_train_blocks, y_train_blocks) yp = score_models(speakers, X_dev_blocks) print_classfication_report('gmm', y_dev_blocks, yp, stem='gmm_dev') pr_curve(y_dev_blocks, yp, n_classes, 'gmm_dev') plt.close('all') blocks_pred = score_models(speakers, test_blocks) write_data(blocks_pred, 'khan_speaker_labels_GMM.csv')
# KN Grid Search--------------------------------------------------------------- metrics = ['euclidean','manhattan'] weights = ['uniform','distance'] numNeighbors = [12,15,17] param_grid = dict(metric=metrics, weights=weights, n_neighbors=numNeighbors) CV_kn = GridSearchCV(clf1, param_grid=param_grid, cv=cv, n_jobs=3, verbose=3, scoring='f1_weighted') CV_kn = CV_kn.fit(X_train, y_train) print(CV_kn.best_estimator_) kn = CV_kn.best_estimator_ y_pred_train_kn = cross_val_predict(kn, X_train, y_train, cv=cv) print_classfication_report(kn, y_train, y_pred_train_kn, stem='kn') pr_curve(y_train, y_pred_train_kn, n_components, 'kn') y_pred_dev_kn = kn.predict(X_dev) print_classfication_report(kn, y_dev, y_pred_dev_kn, stem='kn_dev') pr_curve(y_dev, y_pred_dev_kn, n_components, 'kn_dev') # KN Grid Search (Backward compatability bug in scikit)---------------------------------------------- metrics = ['mahalanobis'] weights = ['uniform','distance'] numNeighbors = [13,15,17] param_grid = dict(metric=metrics, weights=weights, n_neighbors=numNeighbors) CV_km = GridSearchCV(clf1a, param_grid=param_grid, cv=cv, n_jobs=3, verbose=3, scoring='f1_weighted') CV_km = CV_km.fit(X_train, y_train) print(CV_km.best_estimator_) km = CV_km.best_estimator_
def test(self): self.ImgNet.eval().cuda() self.TxtNet.eval().cuda() re_BI, re_BT, re_LT, qu_BI, qu_BT, qu_LT = generate_hashes_from_dataloader( self.database_loader, self.test_loader, self.ImgNet, self.TxtNet, self.cfg.LABEL_DIM) qu_BI = self.get_each_5th_element(qu_BI) re_BI = self.get_each_5th_element(re_BI) qu_LI = self.get_each_5th_element(qu_LT) re_LI = self.get_each_5th_element(re_LT) p_i2t, r_i2t = pr_curve(qu_BI, re_BT, qu_LI, re_LT, tqdm_label='I2T') p_t2i, r_t2i = pr_curve(qu_BT, re_BI, qu_LT, re_LI, tqdm_label='T2I') p_i2i, r_i2i = pr_curve(qu_BI, re_BI, qu_LI, re_LI, tqdm_label='I2I') p_t2t, r_t2t = pr_curve(qu_BT, re_BT, qu_LT, re_LT, tqdm_label='T2T') K = [1, 10, 50] + list(range(100, 1000, 100)) + list( range(1000, 10001, 1000)) pk_i2t = p_top_k(qu_BI, re_BT, qu_LI, re_LT, K, tqdm_label='I2T') pk_t2i = p_top_k(qu_BT, re_BI, qu_LT, re_LI, K, tqdm_label='T2I') pk_i2i = p_top_k(qu_BI, re_BI, qu_LI, re_LI, K, tqdm_label='I2I') pk_t2t = p_top_k(qu_BT, re_BT, qu_LT, re_LT, K, tqdm_label='T2T') MAP_I2T = calc_map_k(qu_BI, re_BT, qu_LI, re_LT, self.cfg.MAP_K) MAP_T2I = calc_map_k(qu_BT, re_BI, qu_LT, re_LI, self.cfg.MAP_K) MAP_I2I = calc_map_k(qu_BI, re_BI, qu_LI, re_LI, self.cfg.MAP_K) MAP_T2T = calc_map_k(qu_BT, re_BT, qu_LT, re_LT, self.cfg.MAP_K) MAPS = (MAP_I2T, MAP_T2I, MAP_I2I, MAP_T2T) pr_dict = { 'pi2t': p_i2t.cpu().numpy(), 'ri2t': r_i2t.cpu().numpy(), 'pt2i': p_t2i.cpu().numpy(), 'rt2i': r_t2i.cpu().numpy(), 'pi2i': p_i2i.cpu().numpy(), 'ri2i': r_i2i.cpu().numpy(), 'pt2t': p_t2t.cpu().numpy(), 'rt2t': r_t2t.cpu().numpy() } pk_dict = { 'k': K, 'pki2t': pk_i2t.cpu().numpy(), 'pkt2i': pk_t2i.cpu().numpy(), 'pki2i': pk_i2i.cpu().numpy(), 'pkt2t': pk_t2t.cpu().numpy() } map_dict = { 'mapi2t': float(MAP_I2T.cpu().numpy()), 'mapt2i': float(MAP_T2I.cpu().numpy()), 'mapi2i': float(MAP_I2I.cpu().numpy()), 'mapt2t': float(MAP_T2T.cpu().numpy()) } self.logger.info( 'mAP I->T: %.3f, mAP T->I: %.3f, mAP I->I: %.3f, mAP T->T: %.3f' % MAPS) write_pickle(osp.join(self.cfg.MODEL_DIR, self.path, 'pr_dict.pkl'), pr_dict) write_pickle(osp.join(self.cfg.MODEL_DIR, self.path, 'pk_dict.pkl'), pk_dict) write_pickle(osp.join(self.cfg.MODEL_DIR, self.path, 'map_dict.pkl'), map_dict)