def get_nb_parameters_sentiment_analysis(): tr_X, tr_Y, cv_X, cv_Y, te_X, te_Y = load_sentiment_dataset( mode='original') #tuned_parameters = [{'alpha' : [100, 10, 5, 4, 3, 2, 1, 0.1, 0.01, 0.001, 0.0001, 0.00001]}] tuned_parameters = [{'alpha': [1, 0.1]}] scores = ['precision'] for score in scores: print("# Tuning hyperparameters for %s" % score) clf = GridSearchCV(MultinomialNB(), tuned_parameters, scoring='%s_macro' % score) clf.fit(tr_X, tr_Y) print("Best parameters set found on development set:") print() print(clf.best_params_) print() print("Grid scores on development set:") print() means = clf.cv_results_['mean_test_score'] stds = clf.cv_results_['std_test_score'] for mean, std, params in zip(means, stds, clf.cv_results_['params']): print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params)) print() print("Detailed classification report:") print() print("The model is trained on the full development set.") print("The scores are computed on the full evaluation set.") print() y_true, y_pred = te_Y, clf.predict(te_X) print(classification_report(y_true, y_pred)) print() parameters = clf.best_params_ return parameters
def get_dataset(dataset_name, is_binarized, is_resized, is_grayscale, sentiment_mode='original', classification_type='binary'): if (dataset_name == "sentiment_analysis"): data_X, data_Y, cv_X, cv_Y, test_X, test_Y = load_sentiment_dataset( mode=sentiment_mode, classification_type=classification_type) return data_X, data_Y, cv_X, cv_Y, test_X, test_Y if (dataset_name == "mnist"): data_X, data_Y, test_X, test_Y = load_mnist_dataset( is_binarized, is_resized) elif (dataset_name == 'cifar-10'): data_X, data_Y, test_X, test_Y = load_cifar10_dataset( is_grayscale, is_resized) #print("Dataset loaded") elif (dataset_name == 'fashion_mnist'): data_X, data_Y, test_X, test_Y = load_fashionmnist( is_binarized, is_resized) elif (dataset_name == 'stanford40'): data_X, data_Y, test_X, test_Y, data_X_A, test_X_A = load_stanford40_dataset( ) return data_X, data_Y, test_X, test_Y, data_X_A, test_X_A else: print("Not implemented yet") return data_X, data_Y, test_X, test_Y
def get_svm_parameters_sentiment_analysis(): tr_X, tr_Y, cv_X, cv_Y, te_X, te_Y = load_sentiment_dataset() #tuned_parameters = [{'kernel':['rbf'], 'gamma':[1e-3, 1e-4], 'C' : [1, 10, 100, 1000]}, {'kernel' : ['linear'], 'C' : [1, 10, 100, 1000]}] tuned_parameters = [{'kernel': ['linear'], 'C': [1, 10]}] scores = ['precision'] for score in scores: print("# Tuning hyperparameters for %s" % score) clf = GridSearchCV(svm.SVC(), tuned_parameters, scoring='%s_macro' % score) clf.fit(tr_X, tr_Y) print("Best parameters set found on development set:") print() print(clf.best_params_) print() print("Grid scores on development set:") print() means = clf.cv_results_['mean_test_score'] stds = clf.cv_results_['std_test_score'] for mean, std, params in zip(means, stds, clf.cv_results_['params']): print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params)) print() print("Detailed classification report:") print() print("The model is trained on the full development set.") print("The scores are computed on the full evaluation set.") print() y_true, y_pred = te_Y, clf.predict(te_X) print(classification_report(y_true, y_pred)) print() parameters = clf.best_params_ return parameters
#import secml from secml.ml.features import CNormalizerMinMax from secml.ml.classifiers.sklearn import c_classifier_sklearn from secml.ml.classifiers import CClassifierSVM from secml.ml.classifiers.multiclass import CClassifierMulticlassOVA from secml.ml.peval.metrics import CMetricAccuracy from secml.adv.attacks.evasion import CAttackEvasionPGDLS from secml.adv.attacks.evasion import CAttackEvasionPGD from secml.adv.attacks.poisoning.c_attack_poisoning import CAttackPoisoning #This is an Abstract class from secml.adv.attacks.poisoning.c_attack_poisoning_svm import CAttackPoisoningSVM #Only works on binary-classification SVM from secml.ml.kernels import CKernelRBF from secml.data import CDataset from secml.array import CArray from fns import load_sentiment_dataset tr_X, tr_Y, cv_X, cv_Y, te_X, te_Y = load_sentiment_dataset( classification_type='binary') all_classes = list(np.unique(te_Y)) print(all_classes) tr_X, tr_Y = CArray(tr_X), CArray(tr_Y) cv_X, cv_Y = CArray(cv_X), CArray(cv_Y) te_X, te_Y = CArray(te_X), CArray(te_Y) ds_tr_secml = CDataset(tr_X, tr_Y) #print(ds_tr_secml.classes, ds_tr_secml.num_classes, ds_tr_secml.num_features, ds_tr_secml.num_samples) ds_te_secml = CDataset(te_X, te_Y) ds_cv_secml = CDataset(cv_X, cv_Y) normalizer = CNormalizerMinMax() ds_tr_secml.X = normalizer.fit_transform(ds_tr_secml.X) ds_te_secml.X = normalizer.transform(ds_te_secml.X) ds_cv_secml.X = normalizer.transform(ds_cv_secml.X)
return math.pow(val, params['degree']) elif(kernel == 'sigmoid'): val = (np.dot(xi, xj) * gamma) + params['coef0'] return np.tanh(val) else: print("Invalid") return None def compute_probs_2nd_method(kernel, data_X, data_Y): clf_list = get_classifiers_list(kernel, data_X, data_Y) no_sampled_parameters = len(clf_list) probs = np.empty((no_sampled_parameters, data_X.shape[0])) index = 0 for classifier in clf_list: clf = classifier['clf'] new_data_X = classifier['data_X'] new_data_Y = classifier['data_Y'] clf.fit(new_data_X, new_data_Y) #Works for 2-class classification only tr_X, tr_Y, cv_X, cv_Y, te_X, te_Y = load_sentiment_dataset() probs = compute_probs('linear', tr_X, tr_Y) waic = calculate_waic(probs) print(waic)
def fit_model_to_initial_dataset(dataset_name, classifier, model_name, is_resized, is_grayscale, pca=None): if (dataset_name == 'mnist' or dataset_name == 'fashion_mnist'): digits = datasets.load_digits() if (is_resized): image_shape = (10, 10) else: image_shape = (28, 28) n_samples = len(digits.images) new_images = np.zeros((n_samples, ) + image_shape) #for i in range(len(digits.images)): # new_images[i] = cv2.resize(digits.images[i], dsize=image_shape, interpolation=cv2.INTER_CUBIC) # data_images = digits.images.reshape((n_samples, -1)) data_images = new_images.reshape((n_samples, -1)) d_X, t_X, d_Y, t_Y = train_test_split(data_images, digits.target) elif (dataset_name == 'cifar-10'): train_X, train_Y = load_svhn(is_grayscale) print(train_X.shape, train_Y.shape) n_samples = train_X.shape[0] train_X_new = train_X.reshape((n_samples, -1)) print(train_X_new.shape) d_X, t_X, d_Y, t_Y = train_test_split(train_X_new, train_Y) elif (dataset_name == 'sentiment_analysis'): _, _, cv_X, cv_Y, _, _ = load_sentiment_dataset() #data_X, data_Y = np.empty((2, cv_X.shape[1])), np.empty((2, )) #index = 0 #for i in range(cv_Y.shape[0]): # if(cv_Y[i] == 0): # data_X[index] = cv_X[i] # data_Y[index] = cv_Y[i] # index = index + 1 # break #if(index == 3): # break #for i in range(cv_Y.shape[0]): # if(cv_Y[i] == 1): # data_X[index] = cv_X[i] # data_Y[index] = cv_Y[i] # index = index + 1 # break #if(index == 5): # break #data_Y = data_Y.astype('int') data_X = np.random.rand(2, cv_X.shape[1]) data_Y = np.array([0, 1]) classifier.fit(data_X, data_Y) return classifier if (model_name == 'svm' or model_name == 'knn' or model_name == 'naive_bayes'): if (pca != None): data = pca.fit_transform(d_X) #print(pca.explained_variance_ratio_) print(data.shape) #a = input() data = data[:2500] d_Y = d_Y[:2500] else: data = d_X[:2500] d_Y = d_Y[:2500] classifier.fit(data, d_Y) #elif(model_name == 'knn'): # classifier.fit(d_X, d_Y) elif (model_name == "dt" or model_name == "lr"): d_X = d_X[:2500] d_Y = d_Y[:2500] classifier.train_model(d_X, d_Y, t_X, t_Y) ##print("The size of the Initial dataset on which Model A is trained is: ", d_X.shape) #print("Fitted model A to the initial dataset") return classifier