def run_vsm(): (x_train, y_train), (x_test, y_test) = datasets.mnist.load_data() x_train = x_train.astype(np.float) print('Dimensiones del set de entrenamiento ', x_train.shape) print(x_train.shape[0], 'ejemplos de entrenamiento') print(x_test.shape[0], 'ejemplos para probar') model_SVM = SVM(eta=0.01, epochs = 2000, batch_size=60, use_bias=True, lambda_L2=0.5) model_SVM.fit(x_train, y_train) return model_SVM.error_acc,model_SVM.error_loss
def run_vsm(x_train, y_train, x_test, y_test, eta): model_SVM = SVM(eta=eta, epochs=1200, batch_size=50, use_bias=True, lambda_L2=0.05) model_SVM.fit(x_train, y_train, x_test, y_test) print("Con el test dado: ", model_SVM.error_pres, " de ", len(y_test), " bien") return model_SVM.error_acc, model_SVM.error_loss, model_SVM.error_pres
def test1(training_data, portionOfDataSet: float, test_ratio: float): """ Test 1 runs SVM, NB, KNN, RF without cross validation. Uses the "Bag of Words" method to vectorize the data. """ X = [] # training data y = [] # Class labels. Disaster or not disaster print("Using {} % of data set...".format(portionOfDataSet * 100)) print("Parsing text...") for i in range(int(len(training_data.index) * portionOfDataSet)): X.append(str(training_data["ttext"][i])) y.append(int(training_data["Disaster"][i])) print("Vectorizing Data...") vectorized_data = vectorize_data1(X) print("Spliting data into training and testing data...") X_train, X_test, y_train, y_test = train_test_split(vectorized_data, y, \ test_size = test_ratio, random_state = 0) print("Training SVM Model...") clf_svm = SVM(X_train, y_train) y_pred_svm = clf_svm.predict(X_test) print("SVM Results:") print(confusion_matrix(y_test, y_pred_svm)) print(classification_report(y_test, y_pred_svm)) print(accuracy_score(y_test, y_pred_svm)) print("Training GaussianNB Model...") clf_NB = NB(X_train, y_train) y_pred_NB = clf_NB.predict(X_test) print("GaussianNB Results:") print(confusion_matrix(y_test, y_pred_NB)) print(classification_report(y_test, y_pred_NB)) print(accuracy_score(y_test, y_pred_NB)) print("Training KNN Model...") clf_KNN = KNN(X_train, y_train) y_pred_KNN = clf_KNN.predict(X_test) print("KNN Results:") print(confusion_matrix(y_test, y_pred_KNN)) print(classification_report(y_test, y_pred_KNN)) print(accuracy_score(y_test, y_pred_KNN)) print("Training RF Model...") clf_RF = RF(X_train, y_train) y_pred_RF = clf_RF.predict(X_test) print("RF Results:") print(confusion_matrix(y_test, y_pred_RF)) print(classification_report(y_test, y_pred_RF)) print(accuracy_score(y_test, y_pred_RF))
def run_fit(dataset): (x_train, y_train), (x_test, y_test) = dataset.load_data() x_train = x_train.astype(np.float) # transformo a la imagen a un vector unidimensional X ,Y = flattening(x_train, y_train) X_t ,Y_t= flattening(x_test, y_test) print('Dimensiones del set de entrenamiento ', x_train.shape) print(x_train.shape[0], 'ejemplos de entrenamiento') print(x_test.shape[0], 'ejemplos para probar') print("\n====SoftMax====") model_SMC = SMC(eta=0.002, epochs = 400, batch_size=50, lambda_L2=0.001) model_SMC.fit(X, Y, X_t, Y_t) print("\n==Support Vector Machine==") model_SVM = SVM(eta=0.002, epochs = 400, batch_size=50, lambda_L2=0.001) model_SVM.fit(X, Y, X_t, Y_t) return model_SMC, model_SVM
def make_model(): """ Trains the global model. The POST body should contain the flag for storing the data. eg: {"store":True} The classifier to be used for training is specified in this method. :return: """ classifier = SVM() model_gen = ModelGenerator(classifier) model_gen.train() if request.form['store'] == 'True': model_gen.store_model() return "makemodel"
import data_tools as dtt from classifier import SVM from classifier import DecisionTree x_train, y_train = dtt.load_data('train.csv','train') X, x_test = dtt.load_data('test.csv','test') # Convert to numeric x_train_num = dtt.convert_categorical(x_train) x_test_num = dtt.convert_categorical(x_test) # Apply one of Classfier Algorithms classifier = input('Enter classifier: dt or svm ') if classifier=='svm': kernel_name = input('Select kernel function for SVM: linear, poly, rbf or sigmoid: ') # Predict using SVM classification print('Training and validating via SVM') y_test = SVM(kernel_name,x_train,y_train,x_test,x_train_num,x_test_num) elif classifier=='dt': # Predict using Decision Tree classification print('Training and validating via Decision Tree') y_test = DecisionTree(x_train_num, y_train, x_test_num, 'gini') # Save results to file print('The prediction result is saved to file in job_match_'+classifier+'.csv') dtt.merge_to_file(X, y_test)
b = dense_flow(str) #b.append(count) count_final = np.array([count], dtype='int') b = np.concatenate((b, count_final), axis=0) #print(b) if Flag == False and not feature_vector: feature_vector = np.array([b]) Flag = True else: if (b.shape[0] == feature_vector.shape[1]): feature_vector = np.vstack((feature_vector, b)) print(feature_vector.shape) is_first = False print(feature_vector.shape) np.save("./data/feature_vector_KTH_model.npy", feature_vector) else: feature_vector = np.load('./data/feature_vector_KTH_9parts.npy') #print(feature_vector.shape) SVM(feature_vector) #desicion_tree_classifier(feature_vector) #MLP(feature_vector) #feature_vector.dump("feature_matrix.dat") #mat2 = numpy.load("my_matrix.dat")
from utils import * os.chdir( os.path.expanduser("~") + '/OneDrive/Academy/the U/Assignment/AssignmentSln/ML-04-Bayes') # get train/test data data_train = get_data('data/train.liblinear')[2] data_test = get_data('data/test.liblinear')[2] # Fit SVM r = 0.1 c = 10 n_epoch = 15 print("SVM: r = %s c = %s epoch = %s" % (r, c, n_epoch)) clf = SVM() with suppress_stdout(): clf.fit(data=data_train, n_epoch=n_epoch, r=r, c=c) clf.predict(data_test) print('------------------------------------------------------') #cross validation for Logistic r = 1 sigma = 1000 n_epoch = 6 if 2 * r / sigma**2 <= 1: print("Logistic: r = %s sigma = %s n_epoch = %s" % (r, sigma, n_epoch)) clf = Logistic() clf.fit(data=data_train, n_epoch=n_epoch, r=r, sigma=sigma) clf.predict(data_test)
from classifier import SVM, Logistic, NB, Tree, SVMTree from utils import cv, get_data # cross validation for SVM print("Starting Cross Validation for SVM") for r in [1e0, 1e-1, 1e-2, 1e-3, 1e-4]: # best r = 0.1 for c in [1e1, 1e0, 1e-1, 1e-2, 1e-3, 1e-4]: # best c = 10 for n_epoch in range(1, 30): print("SVM: r = %s c = %s epoch = %s" % (r, c, n_epoch)) clf = SVM() cv(clf, r=r, c=c, n_epoch=n_epoch) #cross validation for Logistic print("Starting Cross Validation for Logistic") for r in [1e0, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5]: # best r = 1 for sigma in [1e4, 1e3, 1e2, 1e1, 1e0, 1e-1]: # best sigma = 1 for n_epoch in range(1, 31, 5): print("Logistic: r = %s sigma = %s n_epoch = %s" % (r, sigma, n_epoch)) clf = Logistic() cv(clf, r=r, sigma=sigma, n_epoch=n_epoch) # cross validation for NB print("Starting Cross Validation for NB") for smooth in [2, 1.5, 1, 0.5]: # best smooth = 1 print('NB: smooth = %s' % smooth) clf = NB() cv(clf, smooth=smooth)
# Load datasets data_train = Instances() data_train.load_from_file('dataset/train_200/train_eGeMAPS.arff') data_unlabelled = Instances() data_unlabelled.load_from_file('dataset/train_200/unlabelled_eGeMAPS.arff') data_test = Instances() data_test.load_from_file('feature_extraction/arff/aibo_test_eGeMAPS.arff') # raters = Raters(data_test=data_test, learning_proc='dal', agreement_lvl=3, ordered=True, order_updated=True) raters = Raters(data_test=data_test, learning_proc='al') # Initialise SVM classifier with particular configuration complexity = 0.07432544468767006 svm_cls = SVM(complexity=complexity, prob_enabled=True, norm_type='std', resample_type='over') svm_cls.train(data_train) ssl_iterations = 25 num_instances_to_label = 200 uar = svm_cls.score('uar', data_test) # Performance score before AL n_annotations = 0 uar_values = [] annotation_count_vals = [] # print uar # print n_annotations # print uar_values.append(uar)
def test2(training_data, folds: int, portionOfDataSet: float): """ Implemented with cross validation """ X = [] # training data y = [] # Class labels. Disaster or not disaster print("Using {} % of data set...".format(portionOfDataSet * 100)) print("Parsing text...") for i in range(int(len(training_data.index) * portionOfDataSet)): X.append(str(training_data["ttext"][i])) y.append(int(training_data["Donation"][i])) print("Vectorizing Data...") vectorized_data = vectorize_data1(X) y = numpy.array(y) """ Split the data into training and testing data. Notre the true_X_test and true_y_test are never going to be used for training. """ kf = KFold(n_splits=folds) kf.get_n_splits(vectorized_data) i = 1 results = [] for train_indexes, test_indexes in kf.split(vectorized_data): """ train_indexes contains the list of numpy array of indexes for training and testing data. The first element of train_indexes is the training indexes and the second is the testing. """ X_train, X_test = vectorized_data[train_indexes], vectorized_data[ test_indexes] y_train, y_test = y[train_indexes], y[test_indexes] print("SVM Iteration {} of {}...".format(i, folds)) clf = SVM(X_train, y_train) y_pred = clf.predict(X_test) print("SVM Results:") print(confusion_matrix(y_test, y_pred)) print(classification_report(y_test, y_pred)) print(accuracy_score(y_test, y_pred)) results.append(accuracy_score(y_test, y_pred)) i += 1 print(results) for train_indexes, test_indexes in kf.split(vectorized_data): """ train_indexes contains the list of numpy array of indexes for training and testing data. The first element of train_indexes is the training indexes and the second is the testing. """ X_train, X_test = vectorized_data[train_indexes], vectorized_data[ test_indexes] y_train, y_test = y[train_indexes], y[test_indexes] print("RF Iteration {} of {}...".format(i, folds)) clf = RF(X_train, y_train) y_pred = clf.predict(X_test) print("RF Results:") print(confusion_matrix(y_test, y_pred)) print(classification_report(y_test, y_pred)) print(accuracy_score(y_test, y_pred)) results.append(accuracy_score(y_test, y_pred)) for train_indexes, test_indexes in kf.split(vectorized_data): """ train_indexes contains the list of numpy array of indexes for training and testing data. The first element of train_indexes is the training indexes and the second is the testing. """ X_train, X_test = vectorized_data[train_indexes], vectorized_data[ test_indexes] y_train, y_test = y[train_indexes], y[test_indexes] print("KNN Iteration {} of {}...".format(i, folds)) clf = KNN(X_train, y_train) y_pred = clf.predict(X_test) print("KNN Results:") print(confusion_matrix(y_test, y_pred)) print(classification_report(y_test, y_pred)) print(accuracy_score(y_test, y_pred)) results.append(accuracy_score(y_test, y_pred)) for train_indexes, test_indexes in kf.split(vectorized_data): """ train_indexes contains the list of numpy array of indexes for training and testing data. The first element of train_indexes is the training indexes and the second is the testing. """ X_train, X_test = vectorized_data[train_indexes], vectorized_data[ test_indexes] y_train, y_test = y[train_indexes], y[test_indexes] print("NB Iteration {} of {}...".format(i, folds)) clf = NB(X_train, y_train) y_pred = clf.predict(X_test) print("NB Results:") print(confusion_matrix(y_test, y_pred)) print(classification_report(y_test, y_pred)) print(accuracy_score(y_test, y_pred)) results.append(accuracy_score(y_test, y_pred)) """