def main(): """Simple main program to test the methods of the file. """ feature_x, feature_y, classifier = "", "", "" data_training, data_validation = data.extract_data("diabetes.csv") while classifier == "" or classifier != "svc" and classifier != "knn": classifier = input( "ENTER CLASSIFIER\nSVC or KNN?\n(Enter 'q' to exit)\n>").lower() if classifier == "q": exit(0) if classifier != "svc" and classifier != "knn": print("\n\nInput is not a valid classifier, try again..") feature_string = "ENTER {} FEATURE\npregnant\nglucose\npressure\ntriceps\ninsulin\nmass\npedigree\nage\n(Enter 'q' to exit)\n>" while feature_x == "" or feature_x not in list(data_training): feature_x = input("\n" + feature_string.format("FIRST")).lower() if feature_x == "q": exit(0) if feature_x not in list(data_training): print("\n\nInput is not a valid feature, try again..") while feature_y == "" or feature_y not in list(data_training): feature_y = input("\n" + feature_string.replace( feature_x + "\n", "").format("SECOND")).lower() if feature_y == "q": exit(0) if feature_y not in list(data_training): print("\n\nInput is not a valid feature, try again..") plot_diabetes( data_training, data_validation, fitting.fit(data_training, data_validation, classifier, feature_x, feature_y), feature_x, feature_y).show()
def extract_data(size=256): print("Extracting data..") X, y = data.extract_data(size=256) print("Preprocessing data..") X, y, nb_samples, num_categories = data.preprocess_data(X, y, save=True, subtract_mean=True) return X, y, nb_samples, num_categories
def __init__(self, labels=('earn', 'acq', 'crude', 'corn')): print('Prepare data') self._train_set_tot, self._test_set_tot = data.extract_data() self._train_set_tot = [_item[:10] for _item in self._train_set_tot] self._labels = labels self._nb_labels = len(labels) self._nb_train = (152, 114, 76, 38) self._nb_test = (40, 25, 15, 10) self.train_set = [] self.test_set = [] self.train_labels = [] self.test_labels = [] self._sample_data()
def run(epochs=500, training_percentage=0.4, validation_percentage=0.1, extract=True, cont=True, size=256, top_k=5): '''Does the routine required to get the data, put them in needed format and start training the model saves weights whenever the model produces a better test result and keeps track of the best loss''' if extract: print("Extracting data..") X, y = data.extract_data(size=size) print("Preprocessing data..") X, y, nb_samples, num_categories = data.preprocess_data(X, y, save=True, subtract_mean=True) else: print("Loading data..") h5f = h5py.File('data.hdf5', 'r') nb_samples = h5f['nb_samples'].value num_categories = h5f['n_categories'].value h5f.close() print("Number of categories: {}".format(num_categories)) print("Number of samples {}".format(nb_samples)) data_ids = np.arange(start=0, stop=nb_samples) val_ids = data.produce_validation_indices(data_ids, nb_samples * validation_percentage) train_ids = data.produce_train_indices(dataset_indx=data_ids, number_of_samples=nb_samples * training_percentage, val_indx=val_ids) # X_train, y_train, X_test, y_test = data.split_data(X, y, split_ratio=split) X_train, y_train, X_val, y_val = data.load_dataset_bit_from_hdf5(train_ids, val_ids, only_train=False) X_val = X_val / 255 print("Building and Compiling model..") model = m.get_model(n_outputs=num_categories, input_size=size) if cont: # model.load_weights_until_layer("pre_trained_weights/latest_model_weights.hdf5", 26) model.load_weights("pre_trained_weights/latest_model_weights.hdf5") model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=["accuracy"]) print("Training..") best_performance = np.inf for i in range(epochs): train_ids = data.produce_train_indices(dataset_indx=data_ids, number_of_samples=15000, val_indx=val_ids) X_train, y_train = data.load_dataset_bit_from_hdf5(train_ids, val_ids, only_train=True) X_train = X_train / 255 X_train = data.augment_data(X_train) # fit the model on the batches generated by datagen.flow() metadata = model.fit(X_train, y_train, validation_data=[X_val, y_val], batch_size=64, nb_epoch=1, verbose=1, shuffle=True, class_weight=None, sample_weight=None) current_loss = metadata.history['loss'][-1] current_val_loss = metadata.history['val_loss'][-1] preds = model.predict_proba(X_val, batch_size=64) print("Loss: {}".format(current_loss)) print("Val_loss: {}".format(current_val_loss)) top_3_error = get_top_n_error(preds, y_val, top_k) print("Top 3 error: {}".format(top_3_error)) if current_val_loss < best_performance: model.save_weights("pre_trained_weights/model_weights.hdf5", overwrite=True) best_performance = current_val_loss print("Saving weights..") model.save_weights("pre_trained_weights/latest_model_weights.hdf5", overwrite=True)
from sklearn import model_selection from sklearn.model_selection import GridSearchCV, RandomizedSearchCV from sklearn.metrics import classification_report from sklearn import svm import pickle import argparse if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument( '--dir', type=str, default='.', help='directory to read vehicle/non-vehicle image files from') FLAGS, unparsed = parser.parse_known_args() X_train, y_train, X_test, y_test, scaler = extract_data(FLAGS.dir) svc = svm.SVC() t = time.time() svc.fit(X_train, y_train) t2 = time.time() print('Training took {} seconds and produced an accuracy of {}'.format( t2 - t, round(svc.score(X_test, y_test), 3))) with open('clf.pkl', 'wb') as fid: pickle.dump(svc, fid) with open('scaler.pkl', 'wb') as fid: pickle.dump(scaler, fid)
import os import visualize import fitting import data from flask import Flask, render_template, request, url_for import matplotlib matplotlib.use('Agg') classifiers = ['knn', 'svc'] features = [ "pregnant", "glucose", "pressure", "triceps", "insulin", "mass", "pedigree", "age" ] data_training, data_validation = data.extract_data("diabetes.csv") app = Flask(__name__) app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0 @app.route("/") def root(): return render_template('frontpage.html') @app.route("/plot") def plot(error=False): plt = visualize.plot_diabetes( data_training, data_validation, fitting.fit(data_training, data_validation, classifiers[0],
def classify(): sess = tf.Session() path = "./trainingFeatures/bal_train/" filenames = [path + f for f in listdir(path)] features, labels = data.extract_data(filenames) features = np.array(features) total = 0 i = 0 sumTab = np.zeros((3985)) colors = np.zeros((3985)) indiceTab = np.zeros((3985)) finalFeatures = [] finalLabels = [] for f in features: for label in labels[i]: if label == 0: colors[i] = 1 sumTab[i] = sum(sum(f)) indiceTab[i] = i i += 1 for i in range(len(features)): temp = [] if len(features[i]) >= 10: for j in range(10): for value in features[i][j]: temp.append(value) finalFeatures.append(np.array(temp)) finalLabels.append(labels[i]) finalFeatures = np.array(finalFeatures) finalLabels = np.array(finalLabels) #plt.scatter(indiceTab, sumTab, c=colors) #plt.show() X = finalFeatures outputs = np.zeros((len(finalLabels), 2)) i = 0 for labels in finalLabels: for label in labels: if label == 0: outputs[i][0] = 1 break else: outputs[i][1] = 1 i += 1 print(outputs) Y = outputs net = tflearn.input_data(shape=[None, 1280]) net = tflearn.fully_connected(net, 64) net = tflearn.fully_connected(net, 64) net = tflearn.fully_connected(net, 2, activation='softmax') net = tflearn.regression(net) model = tflearn.DNN(net, tensorboard_verbose=3) model.fit(X, Y, n_epoch=10, validation_set=0.3, show_metric=True, batch_size=16) path = "./trainingFeatures/eval/" filenames = [path + f for f in listdir(path)] features, labels = data.extract_data(filenames) finalFeatures = [] finalLabels = [] for i in range(len(features)): temp = [] if len(features[i]) >= 10: for j in range(10): for value in features[i][j]: temp.append(value) finalFeatures.append(np.array(temp)) finalLabels.append(labels[i]) i = 0 for labels in finalLabels: for label in labels: if label == 0: outputs[i][0] = 1 else: outputs[i][1] = 1 i += 1 predict = model.predict(finalFeatures) for i in range(len(predict)): if predict[i][0] > predict[i][1]: predict[i][0] = 1 predict[i][1] = 0 else: predict[i][1] = 0 predict[i][0] = 1 totalTrue = 0 total = 0 for i in range(len(predict)): if (outputs[i][0] == predict[i][0]): totalTrue += 1 total += 1 print(totalTrue / total)
def run(epochs=500, training_percentage=0.4, validation_percentage=0.1, extract=True, cont=True, size=256, top_k=5): '''Does the routine required to get the data, put them in needed format and start training the model saves weights whenever the model produces a better test result and keeps track of the best loss''' if extract: print("Extracting data..") X, y = data.extract_data(size=size) print("Preprocessing data..") X, y, nb_samples, num_categories = data.preprocess_data( X, y, save=True, subtract_mean=True) else: print("Loading data..") h5f = h5py.File('data.hdf5', 'r') nb_samples = h5f['nb_samples'].value num_categories = h5f['n_categories'].value h5f.close() print("Number of categories: {}".format(num_categories)) print("Number of samples {}".format(nb_samples)) data_ids = np.arange(start=0, stop=nb_samples) val_ids = data.produce_validation_indices( data_ids, nb_samples * validation_percentage) train_ids = data.produce_train_indices(dataset_indx=data_ids, number_of_samples=nb_samples * training_percentage, val_indx=val_ids) # X_train, y_train, X_test, y_test = data.split_data(X, y, split_ratio=split) X_train, y_train, X_val, y_val = data.load_dataset_bit_from_hdf5( train_ids, val_ids, only_train=False) X_val = X_val / 255 print("Building and Compiling model..") model = m.get_model(n_outputs=num_categories, input_size=size) if cont: # model.load_weights_until_layer("pre_trained_weights/latest_model_weights.hdf5", 26) model.load_weights("pre_trained_weights/latest_model_weights.hdf5") model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=["accuracy"]) print("Training..") best_performance = np.inf for i in range(epochs): train_ids = data.produce_train_indices(dataset_indx=data_ids, number_of_samples=15000, val_indx=val_ids) X_train, y_train = data.load_dataset_bit_from_hdf5(train_ids, val_ids, only_train=True) X_train = X_train / 255 X_train = data.augment_data(X_train) # fit the model on the batches generated by datagen.flow() metadata = model.fit(X_train, y_train, validation_data=[X_val, y_val], batch_size=64, nb_epoch=1, verbose=1, shuffle=True, class_weight=None, sample_weight=None) current_loss = metadata.history['loss'][-1] current_val_loss = metadata.history['val_loss'][-1] preds = model.predict_proba(X_val, batch_size=64) print("Loss: {}".format(current_loss)) print("Val_loss: {}".format(current_val_loss)) top_3_error = get_top_n_error(preds, y_val, top_k) print("Top 3 error: {}".format(top_3_error)) if current_val_loss < best_performance: model.save_weights("pre_trained_weights/model_weights.hdf5", overwrite=True) best_performance = current_val_loss print("Saving weights..") model.save_weights("pre_trained_weights/latest_model_weights.hdf5", overwrite=True)
parser = argparse.ArgumentParser() parser.add_argument('--car', type=str, default='image0004.png', help='car image') parser.add_argument('--non', type=str, default='image0001.png', help='non-car image') FLAGS, unparsed = parser.parse_known_args() # features_car = extract_features([FLAGS.car]) # features_noncar = extract_features([FLAGS.non]) # features_car_scaled = X_scaler.transform(features_car) # features_noncar_scaled = X_scaler.transform(features_noncar) # prediction = clf.predict(features_car_scaled) # if prediction == 1: # print('Correct prediction of Car') # else: print('Incorrect prediction of Car') # prediction = clf.predict(features_noncar_scaled) # if prediction == 0: # print('Correct prediction of Non-Car') # else: print('Incorrect prediction of Non-Car') X_train, y_train, X_test, y_test = extract_data() scaled_X_test = X_scaler.transform(X_test) predictions = clf.predict(scaled_X_test) print('Accuracy on Test Set: {:.2f}%'.format(accuracy_score(y_test, predictions))) print("\nDetailed classification report:") print(classification_report(y_test, predictions))