Beispiel #1
0
def run_vsm():
    (x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()
    x_train = x_train.astype(np.float)
    print('Dimensiones del set de entrenamiento ', x_train.shape)
    print(x_train.shape[0], 'ejemplos de entrenamiento')
    print(x_test.shape[0], 'ejemplos para probar')
    
    model_SVM = SVM(eta=0.01, epochs = 2000, batch_size=60, use_bias=True, lambda_L2=0.5)
    model_SVM.fit(x_train, y_train) 
    return model_SVM.error_acc,model_SVM.error_loss
Beispiel #2
0
def run_vsm(x_train, y_train, x_test, y_test, eta):
    model_SVM = SVM(eta=eta,
                    epochs=1200,
                    batch_size=50,
                    use_bias=True,
                    lambda_L2=0.05)
    model_SVM.fit(x_train, y_train, x_test, y_test)
    print("Con el test dado: ", model_SVM.error_pres, " de ", len(y_test),
          " bien")
    return model_SVM.error_acc, model_SVM.error_loss, model_SVM.error_pres
def test1(training_data, portionOfDataSet: float, test_ratio: float):
    """ Test 1 runs SVM, NB, KNN, RF without cross validation. 
    Uses the "Bag of Words" method to vectorize the data.
    """
    X = []  # training data
    y = []  # Class labels. Disaster or not disaster
    print("Using {} % of data set...".format(portionOfDataSet * 100))
    print("Parsing text...")
    for i in range(int(len(training_data.index) * portionOfDataSet)):
        X.append(str(training_data["ttext"][i]))
        y.append(int(training_data["Disaster"][i]))

    print("Vectorizing Data...")
    vectorized_data = vectorize_data1(X)

    print("Spliting data into training and testing data...")
    X_train, X_test, y_train, y_test = train_test_split(vectorized_data, y, \
        test_size = test_ratio, random_state = 0)

    print("Training SVM Model...")
    clf_svm = SVM(X_train, y_train)

    y_pred_svm = clf_svm.predict(X_test)
    print("SVM Results:")
    print(confusion_matrix(y_test, y_pred_svm))
    print(classification_report(y_test, y_pred_svm))
    print(accuracy_score(y_test, y_pred_svm))

    print("Training GaussianNB Model...")
    clf_NB = NB(X_train, y_train)

    y_pred_NB = clf_NB.predict(X_test)
    print("GaussianNB Results:")
    print(confusion_matrix(y_test, y_pred_NB))
    print(classification_report(y_test, y_pred_NB))
    print(accuracy_score(y_test, y_pred_NB))

    print("Training KNN Model...")
    clf_KNN = KNN(X_train, y_train)

    y_pred_KNN = clf_KNN.predict(X_test)
    print("KNN Results:")
    print(confusion_matrix(y_test, y_pred_KNN))
    print(classification_report(y_test, y_pred_KNN))
    print(accuracy_score(y_test, y_pred_KNN))

    print("Training RF Model...")
    clf_RF = RF(X_train, y_train)

    y_pred_RF = clf_RF.predict(X_test)
    print("RF Results:")
    print(confusion_matrix(y_test, y_pred_RF))
    print(classification_report(y_test, y_pred_RF))
    print(accuracy_score(y_test, y_pred_RF))
Beispiel #4
0
def run_fit(dataset):
    (x_train, y_train), (x_test, y_test) = dataset.load_data()
    x_train = x_train.astype(np.float)

    # transformo a la imagen a un vector unidimensional
    X   ,Y  = flattening(x_train, y_train)
    X_t ,Y_t= flattening(x_test, y_test)

    print('Dimensiones del set de entrenamiento ', x_train.shape)
    print(x_train.shape[0], 'ejemplos de entrenamiento')
    print(x_test.shape[0], 'ejemplos para probar')

    print("\n====SoftMax====")
    model_SMC = SMC(eta=0.002, epochs = 400, batch_size=50, lambda_L2=0.001)
    model_SMC.fit(X, Y, X_t, Y_t)   
   
    print("\n==Support Vector Machine==")
    model_SVM = SVM(eta=0.002, epochs = 400, batch_size=50, lambda_L2=0.001)
    model_SVM.fit(X, Y, X_t, Y_t) 

    return model_SMC, model_SVM
Beispiel #5
0
def make_model():
    """
    Trains the global model.
    The POST body should contain the flag for storing the data.
    eg:
    {"store":True}
    The classifier to be used for training is specified in this method.
    :return:
    """
    classifier = SVM()
    model_gen = ModelGenerator(classifier)
    model_gen.train()
    if request.form['store'] == 'True':
        model_gen.store_model()
    return "makemodel"
Beispiel #6
0
import data_tools as dtt
from classifier import SVM
from classifier import DecisionTree


x_train, y_train = dtt.load_data('train.csv','train')
X, x_test = dtt.load_data('test.csv','test')

# Convert to numeric
x_train_num = dtt.convert_categorical(x_train)
x_test_num = dtt.convert_categorical(x_test)

# Apply one of Classfier Algorithms
classifier = input('Enter classifier: dt or svm ')
if classifier=='svm':
    kernel_name = input('Select kernel function for SVM: linear, poly, rbf or sigmoid: ')
    # Predict using SVM classification
    print('Training and validating via SVM')
    y_test = SVM(kernel_name,x_train,y_train,x_test,x_train_num,x_test_num)
elif classifier=='dt':
    # Predict using Decision Tree classification
    print('Training and validating via Decision Tree')
    y_test = DecisionTree(x_train_num, y_train, x_test_num, 'gini')

# Save results to file
print('The prediction result is saved to file in job_match_'+classifier+'.csv')
dtt.merge_to_file(X, y_test)
                b = dense_flow(str)
                #b.append(count)
                count_final = np.array([count], dtype='int')

                b = np.concatenate((b, count_final), axis=0)

                #print(b)

                if Flag == False and not feature_vector:
                    feature_vector = np.array([b])
                    Flag = True

                else:
                    if (b.shape[0] == feature_vector.shape[1]):
                        feature_vector = np.vstack((feature_vector, b))

                print(feature_vector.shape)
                is_first = False

        print(feature_vector.shape)
        np.save("./data/feature_vector_KTH_model.npy", feature_vector)
    else:
        feature_vector = np.load('./data/feature_vector_KTH_9parts.npy')

    #print(feature_vector.shape)
    SVM(feature_vector)
    #desicion_tree_classifier(feature_vector)
    #MLP(feature_vector)
    #feature_vector.dump("feature_matrix.dat")
    #mat2 = numpy.load("my_matrix.dat")
Beispiel #8
0
from utils import *

os.chdir(
    os.path.expanduser("~") +
    '/OneDrive/Academy/the U/Assignment/AssignmentSln/ML-04-Bayes')

# get train/test data
data_train = get_data('data/train.liblinear')[2]
data_test = get_data('data/test.liblinear')[2]

# Fit SVM
r = 0.1
c = 10
n_epoch = 15
print("SVM: r = %s    c = %s    epoch = %s" % (r, c, n_epoch))
clf = SVM()
with suppress_stdout():
    clf.fit(data=data_train, n_epoch=n_epoch, r=r, c=c)
clf.predict(data_test)
print('------------------------------------------------------')

#cross validation for Logistic
r = 1
sigma = 1000
n_epoch = 6
if 2 * r / sigma**2 <= 1:
    print("Logistic: r = %s   sigma = %s    n_epoch = %s" %
          (r, sigma, n_epoch))
    clf = Logistic()
    clf.fit(data=data_train, n_epoch=n_epoch, r=r, sigma=sigma)
    clf.predict(data_test)
Beispiel #9
0
from classifier import SVM, Logistic, NB, Tree, SVMTree
from utils import cv, get_data

# cross validation for SVM
print("Starting Cross Validation for SVM")
for r in [1e0, 1e-1, 1e-2, 1e-3, 1e-4]:  # best r = 0.1
    for c in [1e1, 1e0, 1e-1, 1e-2, 1e-3, 1e-4]:  # best c = 10
        for n_epoch in range(1, 30):
            print("SVM: r = %s    c = %s    epoch = %s" % (r, c, n_epoch))
            clf = SVM()
            cv(clf, r=r, c=c, n_epoch=n_epoch)

#cross validation for Logistic
print("Starting Cross Validation for Logistic")
for r in [1e0, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5]:  # best r = 1
    for sigma in [1e4, 1e3, 1e2, 1e1, 1e0, 1e-1]:  # best sigma = 1
        for n_epoch in range(1, 31, 5):
            print("Logistic: r = %s   sigma = %s    n_epoch = %s" %
                  (r, sigma, n_epoch))
            clf = Logistic()
            cv(clf, r=r, sigma=sigma, n_epoch=n_epoch)

# cross validation for NB
print("Starting Cross Validation for NB")
for smooth in [2, 1.5, 1, 0.5]:  # best smooth = 1
    print('NB: smooth = %s' % smooth)
    clf = NB()
    cv(clf, smooth=smooth)
    # Load datasets
    data_train = Instances()
    data_train.load_from_file('dataset/train_200/train_eGeMAPS.arff')

    data_unlabelled = Instances()
    data_unlabelled.load_from_file('dataset/train_200/unlabelled_eGeMAPS.arff')

    data_test = Instances()
    data_test.load_from_file('feature_extraction/arff/aibo_test_eGeMAPS.arff')

    # raters = Raters(data_test=data_test, learning_proc='dal', agreement_lvl=3, ordered=True, order_updated=True)
    raters = Raters(data_test=data_test, learning_proc='al')

    # Initialise SVM classifier with particular configuration
    complexity = 0.07432544468767006
    svm_cls = SVM(complexity=complexity, prob_enabled=True, norm_type='std', resample_type='over')
    svm_cls.train(data_train)

    ssl_iterations = 25
    num_instances_to_label = 200

    uar = svm_cls.score('uar', data_test) # Performance score before AL
    n_annotations = 0

    uar_values = []
    annotation_count_vals = []

    # print uar
    # print n_annotations
    # print
    uar_values.append(uar)
def test2(training_data, folds: int, portionOfDataSet: float):
    """ Implemented with cross validation
    """
    X = []  # training data
    y = []  # Class labels. Disaster or not disaster
    print("Using {} % of data set...".format(portionOfDataSet * 100))
    print("Parsing text...")
    for i in range(int(len(training_data.index) * portionOfDataSet)):
        X.append(str(training_data["ttext"][i]))
        y.append(int(training_data["Donation"][i]))

    print("Vectorizing Data...")
    vectorized_data = vectorize_data1(X)
    y = numpy.array(y)
    """ Split the data into training and testing data. 
    Notre the true_X_test and true_y_test are never going to be used for training. 
    """

    kf = KFold(n_splits=folds)
    kf.get_n_splits(vectorized_data)

    i = 1
    results = []
    for train_indexes, test_indexes in kf.split(vectorized_data):
        """ train_indexes contains the list of numpy array of indexes for training and testing data. 
        The first element of train_indexes is the training indexes and the second is the testing. 
        """
        X_train, X_test = vectorized_data[train_indexes], vectorized_data[
            test_indexes]
        y_train, y_test = y[train_indexes], y[test_indexes]

        print("SVM Iteration {} of {}...".format(i, folds))
        clf = SVM(X_train, y_train)

        y_pred = clf.predict(X_test)
        print("SVM Results:")
        print(confusion_matrix(y_test, y_pred))
        print(classification_report(y_test, y_pred))
        print(accuracy_score(y_test, y_pred))
        results.append(accuracy_score(y_test, y_pred))
        i += 1
    print(results)

    for train_indexes, test_indexes in kf.split(vectorized_data):
        """ train_indexes contains the list of numpy array of indexes for training and testing data. 
        The first element of train_indexes is the training indexes and the second is the testing. 
        """
        X_train, X_test = vectorized_data[train_indexes], vectorized_data[
            test_indexes]
        y_train, y_test = y[train_indexes], y[test_indexes]

        print("RF Iteration {} of {}...".format(i, folds))
        clf = RF(X_train, y_train)

        y_pred = clf.predict(X_test)
        print("RF Results:")
        print(confusion_matrix(y_test, y_pred))
        print(classification_report(y_test, y_pred))
        print(accuracy_score(y_test, y_pred))
        results.append(accuracy_score(y_test, y_pred))

    for train_indexes, test_indexes in kf.split(vectorized_data):
        """ train_indexes contains the list of numpy array of indexes for training and testing data. 
        The first element of train_indexes is the training indexes and the second is the testing. 
        """
        X_train, X_test = vectorized_data[train_indexes], vectorized_data[
            test_indexes]
        y_train, y_test = y[train_indexes], y[test_indexes]

        print("KNN Iteration {} of {}...".format(i, folds))
        clf = KNN(X_train, y_train)

        y_pred = clf.predict(X_test)
        print("KNN Results:")
        print(confusion_matrix(y_test, y_pred))
        print(classification_report(y_test, y_pred))
        print(accuracy_score(y_test, y_pred))
        results.append(accuracy_score(y_test, y_pred))

    for train_indexes, test_indexes in kf.split(vectorized_data):
        """ train_indexes contains the list of numpy array of indexes for training and testing data. 
        The first element of train_indexes is the training indexes and the second is the testing. 
        """
        X_train, X_test = vectorized_data[train_indexes], vectorized_data[
            test_indexes]
        y_train, y_test = y[train_indexes], y[test_indexes]

        print("NB Iteration {} of {}...".format(i, folds))
        clf = NB(X_train, y_train)

        y_pred = clf.predict(X_test)
        print("NB Results:")
        print(confusion_matrix(y_test, y_pred))
        print(classification_report(y_test, y_pred))
        print(accuracy_score(y_test, y_pred))
        results.append(accuracy_score(y_test, y_pred))
    """