Ejemplo n.º 1
0
def get_nb_parameters_sentiment_analysis():
    tr_X, tr_Y, cv_X, cv_Y, te_X, te_Y = load_sentiment_dataset(
        mode='original')
    #tuned_parameters = [{'alpha' : [100, 10, 5, 4, 3, 2, 1, 0.1, 0.01, 0.001, 0.0001, 0.00001]}]
    tuned_parameters = [{'alpha': [1, 0.1]}]
    scores = ['precision']
    for score in scores:
        print("# Tuning hyperparameters for %s" % score)
        clf = GridSearchCV(MultinomialNB(),
                           tuned_parameters,
                           scoring='%s_macro' % score)
        clf.fit(tr_X, tr_Y)
        print("Best parameters set found on development set:")
        print()
        print(clf.best_params_)
        print()
        print("Grid scores on development set:")
        print()
        means = clf.cv_results_['mean_test_score']
        stds = clf.cv_results_['std_test_score']
        for mean, std, params in zip(means, stds, clf.cv_results_['params']):
            print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
        print()
        print("Detailed classification report:")
        print()
        print("The model is trained on the full development set.")
        print("The scores are computed on the full evaluation set.")
        print()
        y_true, y_pred = te_Y, clf.predict(te_X)
        print(classification_report(y_true, y_pred))
        print()
        parameters = clf.best_params_
    return parameters
def get_dataset(dataset_name,
                is_binarized,
                is_resized,
                is_grayscale,
                sentiment_mode='original',
                classification_type='binary'):
    if (dataset_name == "sentiment_analysis"):
        data_X, data_Y, cv_X, cv_Y, test_X, test_Y = load_sentiment_dataset(
            mode=sentiment_mode, classification_type=classification_type)
        return data_X, data_Y, cv_X, cv_Y, test_X, test_Y
    if (dataset_name == "mnist"):
        data_X, data_Y, test_X, test_Y = load_mnist_dataset(
            is_binarized, is_resized)
    elif (dataset_name == 'cifar-10'):
        data_X, data_Y, test_X, test_Y = load_cifar10_dataset(
            is_grayscale, is_resized)
        #print("Dataset loaded")
    elif (dataset_name == 'fashion_mnist'):
        data_X, data_Y, test_X, test_Y = load_fashionmnist(
            is_binarized, is_resized)
    elif (dataset_name == 'stanford40'):
        data_X, data_Y, test_X, test_Y, data_X_A, test_X_A = load_stanford40_dataset(
        )
        return data_X, data_Y, test_X, test_Y, data_X_A, test_X_A
    else:
        print("Not implemented yet")
    return data_X, data_Y, test_X, test_Y
Ejemplo n.º 3
0
def get_svm_parameters_sentiment_analysis():
    tr_X, tr_Y, cv_X, cv_Y, te_X, te_Y = load_sentiment_dataset()
    #tuned_parameters = [{'kernel':['rbf'], 'gamma':[1e-3, 1e-4], 'C' : [1, 10, 100, 1000]}, {'kernel' : ['linear'], 'C' : [1, 10, 100, 1000]}]
    tuned_parameters = [{'kernel': ['linear'], 'C': [1, 10]}]
    scores = ['precision']
    for score in scores:
        print("# Tuning hyperparameters for %s" % score)
        clf = GridSearchCV(svm.SVC(),
                           tuned_parameters,
                           scoring='%s_macro' % score)
        clf.fit(tr_X, tr_Y)
        print("Best parameters set found on development set:")
        print()
        print(clf.best_params_)
        print()
        print("Grid scores on development set:")
        print()
        means = clf.cv_results_['mean_test_score']
        stds = clf.cv_results_['std_test_score']
        for mean, std, params in zip(means, stds, clf.cv_results_['params']):
            print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
        print()
        print("Detailed classification report:")
        print()
        print("The model is trained on the full development set.")
        print("The scores are computed on the full evaluation set.")
        print()
        y_true, y_pred = te_Y, clf.predict(te_X)
        print(classification_report(y_true, y_pred))
        print()
        parameters = clf.best_params_
    return parameters
Ejemplo n.º 4
0
#import secml
from secml.ml.features import CNormalizerMinMax
from secml.ml.classifiers.sklearn import c_classifier_sklearn
from secml.ml.classifiers import CClassifierSVM
from secml.ml.classifiers.multiclass import CClassifierMulticlassOVA
from secml.ml.peval.metrics import CMetricAccuracy
from secml.adv.attacks.evasion import CAttackEvasionPGDLS
from secml.adv.attacks.evasion import CAttackEvasionPGD
from secml.adv.attacks.poisoning.c_attack_poisoning import CAttackPoisoning  #This is an Abstract class
from secml.adv.attacks.poisoning.c_attack_poisoning_svm import CAttackPoisoningSVM  #Only works on binary-classification SVM
from secml.ml.kernels import CKernelRBF
from secml.data import CDataset
from secml.array import CArray
from fns import load_sentiment_dataset

tr_X, tr_Y, cv_X, cv_Y, te_X, te_Y = load_sentiment_dataset(
    classification_type='binary')
all_classes = list(np.unique(te_Y))
print(all_classes)
tr_X, tr_Y = CArray(tr_X), CArray(tr_Y)
cv_X, cv_Y = CArray(cv_X), CArray(cv_Y)
te_X, te_Y = CArray(te_X), CArray(te_Y)

ds_tr_secml = CDataset(tr_X, tr_Y)
#print(ds_tr_secml.classes, ds_tr_secml.num_classes, ds_tr_secml.num_features, ds_tr_secml.num_samples)
ds_te_secml = CDataset(te_X, te_Y)
ds_cv_secml = CDataset(cv_X, cv_Y)

normalizer = CNormalizerMinMax()
ds_tr_secml.X = normalizer.fit_transform(ds_tr_secml.X)
ds_te_secml.X = normalizer.transform(ds_te_secml.X)
ds_cv_secml.X = normalizer.transform(ds_cv_secml.X)
Ejemplo n.º 5
0
        return math.pow(val, params['degree'])
    elif(kernel == 'sigmoid'):
        val = (np.dot(xi, xj) * gamma) + params['coef0']
        return np.tanh(val)
    else:
        print("Invalid")
        return None

    
def compute_probs_2nd_method(kernel, data_X, data_Y):
    clf_list = get_classifiers_list(kernel, data_X, data_Y)
    no_sampled_parameters = len(clf_list)

    probs = np.empty((no_sampled_parameters, data_X.shape[0]))
    index = 0
    
    for classifier in clf_list:
        clf = classifier['clf']
        new_data_X = classifier['data_X']
        new_data_Y = classifier['data_Y']
        
        clf.fit(new_data_X, new_data_Y)
        #Works for 2-class classification only
            

tr_X, tr_Y, cv_X, cv_Y, te_X, te_Y = load_sentiment_dataset()

probs = compute_probs('linear', tr_X, tr_Y)
waic = calculate_waic(probs)

print(waic)
Ejemplo n.º 6
0
def fit_model_to_initial_dataset(dataset_name,
                                 classifier,
                                 model_name,
                                 is_resized,
                                 is_grayscale,
                                 pca=None):
    if (dataset_name == 'mnist' or dataset_name == 'fashion_mnist'):
        digits = datasets.load_digits()
        if (is_resized):
            image_shape = (10, 10)
        else:
            image_shape = (28, 28)
        n_samples = len(digits.images)
        new_images = np.zeros((n_samples, ) + image_shape)
        #for i in range(len(digits.images)):
        #    new_images[i] = cv2.resize(digits.images[i], dsize=image_shape, interpolation=cv2.INTER_CUBIC)
        # data_images = digits.images.reshape((n_samples, -1))
        data_images = new_images.reshape((n_samples, -1))
        d_X, t_X, d_Y, t_Y = train_test_split(data_images, digits.target)

    elif (dataset_name == 'cifar-10'):
        train_X, train_Y = load_svhn(is_grayscale)
        print(train_X.shape, train_Y.shape)
        n_samples = train_X.shape[0]
        train_X_new = train_X.reshape((n_samples, -1))
        print(train_X_new.shape)
        d_X, t_X, d_Y, t_Y = train_test_split(train_X_new, train_Y)

    elif (dataset_name == 'sentiment_analysis'):
        _, _, cv_X, cv_Y, _, _ = load_sentiment_dataset()
        #data_X, data_Y = np.empty((2, cv_X.shape[1])), np.empty((2, ))
        #index = 0
        #for i in range(cv_Y.shape[0]):
        #    if(cv_Y[i] == 0):
        #        data_X[index] = cv_X[i]
        #        data_Y[index] = cv_Y[i]
        #        index = index + 1
        #        break
        #if(index == 3):
        #    break
        #for i in range(cv_Y.shape[0]):
        #    if(cv_Y[i] == 1):
        #        data_X[index] = cv_X[i]
        #        data_Y[index] = cv_Y[i]
        #        index = index + 1
        #        break
        #if(index == 5):
        #    break
        #data_Y = data_Y.astype('int')
        data_X = np.random.rand(2, cv_X.shape[1])
        data_Y = np.array([0, 1])
        classifier.fit(data_X, data_Y)
        return classifier

    if (model_name == 'svm' or model_name == 'knn'
            or model_name == 'naive_bayes'):
        if (pca != None):
            data = pca.fit_transform(d_X)
            #print(pca.explained_variance_ratio_)
            print(data.shape)
            #a = input()
            data = data[:2500]
            d_Y = d_Y[:2500]
        else:
            data = d_X[:2500]
            d_Y = d_Y[:2500]
        classifier.fit(data, d_Y)
    #elif(model_name == 'knn'):
    #    classifier.fit(d_X, d_Y)
    elif (model_name == "dt" or model_name == "lr"):
        d_X = d_X[:2500]
        d_Y = d_Y[:2500]
        classifier.train_model(d_X, d_Y, t_X, t_Y)
    ##print("The size of the Initial dataset on which Model A is trained is: ", d_X.shape)
    #print("Fitted model A to the initial dataset")
    return classifier