plt.hold(True)
    plt.bar(left=bins[:-1], width=binwidth, height=benign_hist, color=colours[0],
            label='Benign', alpha=0.6, edgecolor=colours[0], lw="3")
    plt.bar(left=bins[:-1], width=binwidth, height=malicious_hist,
            color=colours[i+1],
            label='Malicious (Scenario ' + str(scenario) + ')',
            alpha=0.6, edgecolor=colours[i+1], lw="3")
    plt.legend(loc='best')
    plt.xlim(bins[0]-binwidth, bins[-1]+binwidth)
    plt.xticks(bins, ["{:0.03f}".format(x) for x in bins.flatten()], rotation=45)
    plt.savefig('iforest-scores' + str(scenario) + '.pdf', bbox_inches='tight')
    plt.clf()
    plt.close()

    # plot my own PR curve
    precision, recall, ap = pr_curve(y_true, all_values)
    print 'Scenario:', scenario, ap
    plt.figure()
    plt.plot(recall, precision, label='AUC (Scenario ' + str(scenario) +\
                                      ')={0:0.3f}'.format(ap), color=colours[i+1])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.legend()
    plt.savefig('iforest-pr' + str(scenario) + '.pdf', bbox_inches='tight')
    plt.clf()
    plt.close()

"""
# plot rug data points
Beispiel #2
0
def test(**kwargs):
    opt.parse(kwargs)

    if opt.device is not None:
        opt.device = torch.device(opt.device)
    elif opt.gpus:
        opt.device = torch.device(0)
    else:
        opt.device = torch.device('cpu')

    pretrain_model = load_pretrain_model(opt.pretrain_model_path)

    model = AGAH(opt.bit,
                 opt.tag_dim,
                 opt.num_label,
                 opt.emb_dim,
                 lambd=opt.lambd,
                 pretrain_model=pretrain_model).to(opt.device)

    path = 'checkpoints/' + opt.dataset + '_' + str(opt.bit)
    load_model(model, path)
    FEATURE_MAP = torch.load(os.path.join(path,
                                          'feature_map.pth')).to(opt.device)

    model.eval()

    images, tags, labels = load_data(opt.data_path, opt.dataset)

    x_query_data = Dataset(opt, images, tags, labels, test='image.query')
    x_db_data = Dataset(opt, images, tags, labels, test='image.db')
    y_query_data = Dataset(opt, images, tags, labels, test='text.query')
    y_db_data = Dataset(opt, images, tags, labels, test='text.db')

    x_query_dataloader = DataLoader(x_query_data,
                                    opt.batch_size,
                                    shuffle=False)
    x_db_dataloader = DataLoader(x_db_data, opt.batch_size, shuffle=False)
    y_query_dataloader = DataLoader(y_query_data,
                                    opt.batch_size,
                                    shuffle=False)
    y_db_dataloader = DataLoader(y_db_data, opt.batch_size, shuffle=False)

    qBX = generate_img_code(model, x_query_dataloader, opt.query_size,
                            FEATURE_MAP)
    qBY = generate_txt_code(model, y_query_dataloader, opt.query_size,
                            FEATURE_MAP)
    rBX = generate_img_code(model, x_db_dataloader, opt.db_size, FEATURE_MAP)
    rBY = generate_txt_code(model, y_db_dataloader, opt.db_size, FEATURE_MAP)

    query_labels, db_labels = x_query_data.get_labels()
    query_labels = query_labels.to(opt.device)
    db_labels = db_labels.to(opt.device)

    p_i2t, r_i2t = pr_curve(qBX, rBY, query_labels, db_labels)
    p_t2i, r_t2i = pr_curve(qBY, rBX, query_labels, db_labels)

    K = [1, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
    pk_i2t = p_topK(qBX, rBY, query_labels, db_labels, K)
    pk_t2i = p_topK(qBY, rBX, query_labels, db_labels, K)

    path = 'checkpoints/' + opt.dataset + '_' + str(opt.bit)
    np.save(os.path.join(path, 'P_i2t.npy'), p_i2t.numpy())
    np.save(os.path.join(path, 'R_i2t.npy'), r_i2t.numpy())
    np.save(os.path.join(path, 'P_t2i.npy'), p_t2i.numpy())
    np.save(os.path.join(path, 'R_t2i.npy'), r_t2i.numpy())
    np.save(os.path.join(path, 'P_at_K_i2t.npy'), pk_i2t.numpy())
    np.save(os.path.join(path, 'P_at_K_t2i.npy'), pk_t2i.numpy())

    mapi2t = calc_map_k(qBX, rBY, query_labels, db_labels)
    mapt2i = calc_map_k(qBY, rBX, query_labels, db_labels)
    print('...test MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' % (mapi2t, mapt2i))
Beispiel #3
0
def test(**kwargs):
    opt.parse(kwargs)

    if opt.device is not None:
        opt.device = torch.device(opt.device)
    elif opt.gpus:
        opt.device = torch.device(0)
    else:
        opt.device = torch.device('cpu')

    with torch.no_grad():
        model = CPAH(opt.image_dim, opt.text_dim, opt.hidden_dim, opt.bit, opt.num_label).to(opt.device)

        path = 'checkpoints/' + opt.dataset + '_' + str(opt.bit) + str(opt.proc)
        load_model(model, path)

        model.eval()

        images, tags, labels = load_data(opt.data_path, opt.dataset)

        i_query_data = Dataset(opt, images, tags, labels, test='image.query')
        i_db_data = Dataset(opt, images, tags, labels, test='image.db')
        t_query_data = Dataset(opt, images, tags, labels, test='text.query')
        t_db_data = Dataset(opt, images, tags, labels, test='text.db')

        i_query_dataloader = DataLoader(i_query_data, opt.batch_size, shuffle=False)
        i_db_dataloader = DataLoader(i_db_data, opt.batch_size, shuffle=False)
        t_query_dataloader = DataLoader(t_query_data, opt.batch_size, shuffle=False)
        t_db_dataloader = DataLoader(t_db_data, opt.batch_size, shuffle=False)

        qBX = generate_img_code(model, i_query_dataloader, opt.query_size)
        qBY = generate_txt_code(model, t_query_dataloader, opt.query_size)
        rBX = generate_img_code(model, i_db_dataloader, opt.db_size)
        rBY = generate_txt_code(model, t_db_dataloader, opt.db_size)

        query_labels, db_labels = i_query_data.get_labels()
        query_labels = query_labels.to(opt.device)
        db_labels = db_labels.to(opt.device)

        #K = [1, 10, 100, 1000]
        #p_top_k(qBX, rBY, query_labels, db_labels, K, tqdm_label='I2T')
        # pr_curve2(qBY, rBX, query_labels, db_labels)

        p_i2t, r_i2t = pr_curve(qBX, rBY, query_labels, db_labels, tqdm_label='I2T')
        p_t2i, r_t2i = pr_curve(qBY, rBX, query_labels, db_labels, tqdm_label='T2I')
        p_i2i, r_i2i = pr_curve(qBX, rBX, query_labels, db_labels, tqdm_label='I2I')
        p_t2t, r_t2t = pr_curve(qBY, rBY, query_labels, db_labels, tqdm_label='T2T')

        K = [1, 10, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000]
        pk_i2t = p_top_k(qBX, rBY, query_labels, db_labels, K, tqdm_label='I2T')
        pk_t2i = p_top_k(qBY, rBX, query_labels, db_labels, K, tqdm_label='T2I')
        pk_i2i = p_top_k(qBX, rBX, query_labels, db_labels, K, tqdm_label='I2I')
        pk_t2t = p_top_k(qBY, rBY, query_labels, db_labels, K, tqdm_label='T2T')

        mapi2t = calc_map_k(qBX, rBY, query_labels, db_labels)
        mapt2i = calc_map_k(qBY, rBX, query_labels, db_labels)
        mapi2i = calc_map_k(qBX, rBX, query_labels, db_labels)
        mapt2t = calc_map_k(qBY, rBY, query_labels, db_labels)

        pr_dict = {'pi2t': p_i2t.cpu().numpy(), 'ri2t': r_i2t.cpu().numpy(),
                   'pt2i': p_t2i.cpu().numpy(), 'rt2i': r_t2i.cpu().numpy(),
                   'pi2i': p_i2i.cpu().numpy(), 'ri2i': r_i2i.cpu().numpy(),
                   'pt2t': p_t2t.cpu().numpy(), 'rt2t': r_t2t.cpu().numpy()}

        pk_dict = {'k': K,
                   'pki2t': pk_i2t.cpu().numpy(),
                   'pkt2i': pk_t2i.cpu().numpy(),
                   'pki2i': pk_i2i.cpu().numpy(),
                   'pkt2t': pk_t2t.cpu().numpy()}

        map_dict = {'mapi2t': float(mapi2t.cpu().numpy()),
                    'mapt2i': float(mapt2i.cpu().numpy()),
                    'mapi2i': float(mapi2i.cpu().numpy()),
                    'mapt2t': float(mapt2t.cpu().numpy())}

        print('   Test MAP: MAP(i->t) = {:3.4f}, MAP(t->i) = {:3.4f}, MAP(i->i) = {:3.4f}, MAP(t->t) = {:3.4f}'.format(mapi2t, mapt2i, mapi2i, mapt2t))

        path = 'checkpoints/' + opt.dataset + '_' + str(opt.bit) + str(opt.proc)
        write_pickle(os.path.join(path, 'pr_dict.pkl'), pr_dict)
        write_pickle(os.path.join(path, 'pk_dict.pkl'), pk_dict)
        write_pickle(os.path.join(path, 'map_dict.pkl'), map_dict)
Beispiel #4
0
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=Adam(),
              metrics=['accuracy'])

history = model.fit(X_train,
                    y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1)
yp = model.predict(X_dev)

yp = np.argmax(yp, axis=1)
y_dev = np.argmax(y_dev, axis=1)

#performance on dev set
print_classfication_report('Keras', y_dev, yp, stem='keras_dev')
pr_curve(y_dev, yp, num_classes, 'keras_dev')

# prediction is done for every sample and
# prediction for whole block is done through consensus voting
# blocks_consensus tell me about block prediction distrubution
# making sure there is no bimodal distribution
blocks_pred, blocks_consensus = score_model(model, X_test)

#writing csv file
write_data(blocks_pred, 'khan_speaker_labels_MLP.csv')
                          if gid/100 in BENIGN_SCENARIOS or
                             gid/100 == scenario]
    all_values = np.array([feat_value
                       for gid, feat_value in all_feature_values]).reshape(-1,1)
    y_true = [1 if gid/100 in MALICIOUS_SCENARIOS else 0
              for gid, feat_value in all_feature_values]
    anomaly_scores = []
    for value in all_values.flatten():
        if value in pmf:
            anomaly_scores.append(-pmf[value])
        else:
            prob = (0.0 + 1.0) / (total + nevents)
            anomaly_scores.append(-prob)

    # plot my own PR curve
    precision, recall, ap = pr_curve(y_true, anomaly_scores)
    print 'Scenario:', scenario, ap
    plt.figure()
    plt.plot(recall, precision, label='AUC (Scenario ' + str(scenario) + \
                                      ')={0:0.3f}'.format(ap), color=colours[i+1])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.legend()
    plt.savefig('pr-' + feat_name + str(scenario) + '.pdf', bbox_inches='tight')
    plt.clf()
    plt.close()

#print all_values.flatten()[300:400]
#print anomaly_scores[300:400]
Beispiel #6
0
Alb_dec=dc()
StateVector=SVC(gamma=2, kernel='linear', C=C)
knn= KNeighborsClassifier(3)

StateVector.fit(X_train,y_train)      #☻addestramento dei classificatori sul training-set
Alb_dec.fit(X_train,y_train)
knn.fit(X_train,y_train)

#creazione curva precision-recall

pred_tra=Alb_dec.predict(X_train)      #calcolo delle predizioni per il test-set
pred=Alb_dec.predict(X_test)
pred_SVC=StateVector.predict(X_test)
pred_knn=knn.predict(X_test)

u.pr_curve(pred,y_test,pred_SVC,pred_knn)#curva precision recall

#u.mostra_predizioni(pred.size,pred,pred_SVC,y_test,X_test) 

 
print("Accuratezza delle predizioni sul test-set (Albero decisione) %0.1f%% "%(accuracy_score(pred,y_test)*100))  
print("Accuratezza delle predizioni sul test-set (SVC) %0.1f%% " %(accuracy_score(pred_SVC,y_test)*100))  
print("Accuratezza delle predizioni sul test-set (knn) %0.1f%% " %(accuracy_score(pred_knn,y_test)*100)) 


"""suggestion per nuovo fiore da cercare 4.9,3.1,1.5,0.1 Iris-setosa
                                         5.6,2.8,4.9,2.0 Iris-virginica
                                         6.3,2.3,4.4,1.3 Iris-versicolor"""
nuovo_fiore=[[4.9,3.1,1.5,0.1]]

nuovo_test_a =Alb_dec.predict(nuovo_fiore)
Beispiel #7
0
X_train = np.vstack(train_blocks)
n_classes = len(np.unique(train_labels))

ind = 12
indices_ci = plot_feature_importance(X_train, train_labels)

# scaler = StandardScaler().fit(X_train[:,indices_ci[:ind]])
#
# train_blocks = np.array([scaler.transform(block[:,indices_ci[:ind]]) for block in train_blocks])
# test_blocks  = np.array([scaler.transform(block[:,indices_ci[:ind]]) for block in test_blocks])
# Decided to use all features as each of them carry some unique information

scaler = StandardScaler().fit(X_train)

train_blocks = np.array([scaler.transform(block) for block in train_blocks])
test_blocks = np.array([scaler.transform(block) for block in test_blocks])

X_train_blocks, X_dev_blocks, y_train_blocks, y_dev_blocks = train_test_split(
    train_blocks, train_blocks_labels, test_size=0.2, random_state=455)

speakers = run_model(X_train_blocks, y_train_blocks)

yp = score_models(speakers, X_dev_blocks)

print_classfication_report('gmm', y_dev_blocks, yp, stem='gmm_dev')
pr_curve(y_dev_blocks, yp, n_classes, 'gmm_dev')
plt.close('all')

blocks_pred = score_models(speakers, test_blocks)
write_data(blocks_pred, 'khan_speaker_labels_GMM.csv')

# KN Grid Search---------------------------------------------------------------
metrics       = ['euclidean','manhattan']
weights       = ['uniform','distance']
numNeighbors  = [12,15,17]
param_grid    = dict(metric=metrics, weights=weights, n_neighbors=numNeighbors)

CV_kn = GridSearchCV(clf1, param_grid=param_grid, cv=cv, n_jobs=3, verbose=3, scoring='f1_weighted')
CV_kn = CV_kn.fit(X_train, y_train)
print(CV_kn.best_estimator_)
kn = CV_kn.best_estimator_

y_pred_train_kn = cross_val_predict(kn, X_train, y_train, cv=cv)
print_classfication_report(kn, y_train, y_pred_train_kn, stem='kn')
pr_curve(y_train, y_pred_train_kn, n_components, 'kn')

y_pred_dev_kn = kn.predict(X_dev)
print_classfication_report(kn, y_dev, y_pred_dev_kn, stem='kn_dev')
pr_curve(y_dev, y_pred_dev_kn, n_components, 'kn_dev')

# KN Grid Search (Backward compatability bug in scikit)----------------------------------------------
metrics       = ['mahalanobis']
weights       = ['uniform','distance']
numNeighbors  = [13,15,17]
param_grid    = dict(metric=metrics, weights=weights, n_neighbors=numNeighbors)

CV_km = GridSearchCV(clf1a, param_grid=param_grid, cv=cv, n_jobs=3, verbose=3, scoring='f1_weighted')
CV_km = CV_km.fit(X_train, y_train)
print(CV_km.best_estimator_)
km = CV_km.best_estimator_
Beispiel #9
0
    def test(self):
        self.ImgNet.eval().cuda()
        self.TxtNet.eval().cuda()

        re_BI, re_BT, re_LT, qu_BI, qu_BT, qu_LT = generate_hashes_from_dataloader(
            self.database_loader, self.test_loader, self.ImgNet, self.TxtNet,
            self.cfg.LABEL_DIM)

        qu_BI = self.get_each_5th_element(qu_BI)
        re_BI = self.get_each_5th_element(re_BI)
        qu_LI = self.get_each_5th_element(qu_LT)
        re_LI = self.get_each_5th_element(re_LT)

        p_i2t, r_i2t = pr_curve(qu_BI, re_BT, qu_LI, re_LT, tqdm_label='I2T')
        p_t2i, r_t2i = pr_curve(qu_BT, re_BI, qu_LT, re_LI, tqdm_label='T2I')
        p_i2i, r_i2i = pr_curve(qu_BI, re_BI, qu_LI, re_LI, tqdm_label='I2I')
        p_t2t, r_t2t = pr_curve(qu_BT, re_BT, qu_LT, re_LT, tqdm_label='T2T')

        K = [1, 10, 50] + list(range(100, 1000, 100)) + list(
            range(1000, 10001, 1000))
        pk_i2t = p_top_k(qu_BI, re_BT, qu_LI, re_LT, K, tqdm_label='I2T')
        pk_t2i = p_top_k(qu_BT, re_BI, qu_LT, re_LI, K, tqdm_label='T2I')
        pk_i2i = p_top_k(qu_BI, re_BI, qu_LI, re_LI, K, tqdm_label='I2I')
        pk_t2t = p_top_k(qu_BT, re_BT, qu_LT, re_LT, K, tqdm_label='T2T')

        MAP_I2T = calc_map_k(qu_BI, re_BT, qu_LI, re_LT, self.cfg.MAP_K)
        MAP_T2I = calc_map_k(qu_BT, re_BI, qu_LT, re_LI, self.cfg.MAP_K)
        MAP_I2I = calc_map_k(qu_BI, re_BI, qu_LI, re_LI, self.cfg.MAP_K)
        MAP_T2T = calc_map_k(qu_BT, re_BT, qu_LT, re_LT, self.cfg.MAP_K)
        MAPS = (MAP_I2T, MAP_T2I, MAP_I2I, MAP_T2T)

        pr_dict = {
            'pi2t': p_i2t.cpu().numpy(),
            'ri2t': r_i2t.cpu().numpy(),
            'pt2i': p_t2i.cpu().numpy(),
            'rt2i': r_t2i.cpu().numpy(),
            'pi2i': p_i2i.cpu().numpy(),
            'ri2i': r_i2i.cpu().numpy(),
            'pt2t': p_t2t.cpu().numpy(),
            'rt2t': r_t2t.cpu().numpy()
        }

        pk_dict = {
            'k': K,
            'pki2t': pk_i2t.cpu().numpy(),
            'pkt2i': pk_t2i.cpu().numpy(),
            'pki2i': pk_i2i.cpu().numpy(),
            'pkt2t': pk_t2t.cpu().numpy()
        }

        map_dict = {
            'mapi2t': float(MAP_I2T.cpu().numpy()),
            'mapt2i': float(MAP_T2I.cpu().numpy()),
            'mapi2i': float(MAP_I2I.cpu().numpy()),
            'mapt2t': float(MAP_T2T.cpu().numpy())
        }

        self.logger.info(
            'mAP I->T: %.3f, mAP T->I: %.3f, mAP I->I: %.3f, mAP T->T: %.3f' %
            MAPS)

        write_pickle(osp.join(self.cfg.MODEL_DIR, self.path, 'pr_dict.pkl'),
                     pr_dict)
        write_pickle(osp.join(self.cfg.MODEL_DIR, self.path, 'pk_dict.pkl'),
                     pk_dict)
        write_pickle(osp.join(self.cfg.MODEL_DIR, self.path, 'map_dict.pkl'),
                     map_dict)