def test_knn(): # file_path = 'trail_forest_results/results/trained_models/deepnn_subset_full_exc_001.h5' model = create_trail_model() # model = load_model(file_path) reset_weights(model) rep_layer = Model(inputs=model.input, outputs=model.get_layer(index=7).output) print(rep_layer.summary()) data_001 = DataSet(root_dir='/Users/jesusnavarro/Desktop/DataSet/', train_subsets=[], test_subsets=['/001/'], ) data2 = DataSet(root_dir='/Users/jesusnavarro/Desktop/DataSet/', train_subsets=[], test_subsets=['/002/'], ) # Shuffle Data train_df = data_001.test_set[2] random.shuffle(train_df) train_df, test_df = split_sequence(train_df, 90, 30) baseline = data2.test_set[2] knn_model = KModel(train_df, test_df, model=rep_layer, baseline=None) knn_model.train_full_model() return knn_model
def save_cvs(): model_exc_001 = load_model( '/Users/jesusnavarro/Desktop/trail_project/results/trained_models/deepnn_subset_001.h5.h5') model_exc_001_rep = Model(inputs=model_exc_001.input, outputs=model_exc_001.get_layer(index=7).output) dataset_001 = DataSet(root_dir='/Users/jesusnavarro/Desktop/DataSet/', train_subsets=[], test_subsets=['/001/']) dataset_002 = DataSet(root_dir='/Users/jesusnavarro/Desktop/DataSet/', train_subsets=[], test_subsets=['/002/']) key = {str(np.array([1, 0, 0])): 'right', str(np.array([0, 1, 0])): 'center', str(np.array([0, 0, 1])): 'left'} csv_001_key = {'left': 0, 'center': 1, 'right': 2} csv_002_key = {'left': 3, 'center': 4, 'right': 5} x_001, y_001, paths_001 = dataset_001.test_set x_002, y_002, paths_002 = dataset_002.test_set int_001_output = model_exc_001_rep.predict(x_001) int_001_output = int_001_output.reshape(x_001.shape[0], - 1) int_002_output = model_exc_001_rep.predict(x_002) int_002_output = int_002_output.reshape(x_002.shape[0], - 1) csvData = [] for int_out, paths, y in zip(int_001_output, paths_001, y_001): dir_class = key[str(y)] label = csv_001_key[dir_class] tmp_str = '' for val in int_out: tmp_str += str(val) + ',' tmp_str = paths + '\t' + str(label) + '\t' + '[' + tmp_str[:-1] + ']' + '\n' csvData.append(tmp_str) for int_out, paths, y in zip(int_002_output, paths_002, y_002): dir_class = key[str(y)] label = csv_002_key[dir_class] tmp_str = '' for val in int_out: tmp_str += str(val) + ',' tmp_str = paths + '\t' + str(label) + '\t' + '[' + tmp_str[:-1] + ']' + '\n' csvData.append(tmp_str) f = open('/Users/jesusnavarro/Desktop/trail_project/Pickledata/visualize_001_002_nn_exc_001.txt', 'w+') for line in csvData: print(line) f.write(line) f.close() return None
def train_model(self, model): """ Train model and set data attributes The model and datasets (test, train, calibration) are saved to 'workspace.pickle'. Training data is randomized and training is done using .flow() where inputs are numpy arrays. """ # Get dataframe of paths dataset = DataSet(self.root, self.train_set, self.test_set, type='list') paths_dataframe = dataset.train_set[2] train_df, validation_df, calibration_df = split_data(paths_dataframe) # set new parameters for train and calibration_set test_df = dataset.test_set[2] random.shuffle(test_df) self.__test_set = test_df self.__train_set = train_df self.__validation_set = validation_df self.__calibration_set = calibration_df train_images, train_labels = read_img_io(train_df) # randomize the data train_images, train_labels = shuffle(train_images, train_labels) validation_images, validation_labels = read_img_io(validation_df) train_datagen = ImageDataGenerator(rescale=1, shear_range=0.2, zoom_range=0.2, rotation_range=18) valid_datagen = ImageDataGenerator() train_set = train_datagen.flow(train_images, train_labels, batch_size=32, shuffle=True) valid_set = valid_datagen.flow(validation_images, validation_labels, batch_size=32, shuffle=True) model.fit_generator(train_set, steps_per_epoch=train_images.shape[0] // 32, epochs=5, validation_data=valid_set, validation_steps=validation_images.shape[0] // 32) # save to current workspace with open('workspace.pickle', 'wb+') as handle: pickle.dump( [model, train_df, test_df, calibration_df, validation_df], handle) return model
def knn_increase_data(): model = load_from_url() rep_layer = Model(inputs=model.input, outputs=model.get_layer(index=7).output) data_002 = DataSet(root_dir='/Users/jesusnavarro/Desktop/DataSet/', train_subsets=[], test_subsets=['/002/'], type='nl', loc='gs') datasets = ['/001/', '/002/', '/003/', '/004/', '/005/', '/006/', '/007/', '/008/', '/009/', '/010/', '/011/'] accuracy_list = {} for ds in datasets: print(ds) data_001 = DataSet(root_dir='/Users/jesusnavarro/Desktop/DataSet/', train_subsets=[], test_subsets=[ds], type='nl', loc='gs') accuracy_list[ds] = [] data_array = data_001.test_set d2 = data_002.test_set list_to_randomize = [] list_test = [] for (x, y) in zip(data_array[0], data_array[1]): list_to_randomize.append([x, y]) random.shuffle(list_to_randomize) n = 10 batch_size = len(list_to_randomize) // n remainder = len(list_to_randomize) - batch_size * n print(batch_size) for (x, y) in zip(d2[0], d2[1]): list_to_randomize.append([x, y]) # extract data to test (001 dataset up to batch size * n + remainder x_001_randarr = np.array([item[0] for item in list_to_randomize[0: n * batch_size + remainder - 1]]) y_001_randarr = np.array([item[1] for item in list_to_randomize[0: n * batch_size + remainder - 1]]) x_002_list = [item[0] for item in list_to_randomize[n * batch_size + remainder:]] # used for ref. point y_002_list = [item[1] for item in list_to_randomize[n * batch_size + remainder:]] clf = KNeighborsClassifier() # creat KNN object # train with dataset 2 x_002_arr = np.array(x_002_list) int_output = rep_layer.predict(x_002_arr) int_output = int_output.reshape(x_002_arr.shape[0], -1) clf.fit(int_output, np.array(y_002_list)) init_loss = knn_accuracy(clf, x_001_randarr, y_001_randarr, rep_layer) # Test on 001 accuracy_list[ds].append(init_loss) z = 1 for i in range(10): print("Fitting on batch number:", z) x_test_list = [item[0] for item in list_to_randomize[0:(i + 1) * batch_size - 1 + remainder * (i // 9)]] + [ item[0] for item in list_to_randomize[n * batch_size + remainder:]] y_test_list = [item[1] for item in list_to_randomize[0:(i + 1) * batch_size - 1 + remainder * (i // 9)]] + [ item[1] for item in list_to_randomize[n * batch_size + remainder:]] x = np.array(x_test_list) y = np.array(y_test_list) print(x.shape, y.shape) int_output = rep_layer.predict(x) int_output = int_output.reshape(x.shape[0], -1) clf.fit(int_output, y) accuracy = knn_accuracy(clf, x_001_randarr, y_001_randarr, rep_layer) print(accuracy) accuracy_list[ds].append(accuracy) z += 1 with open('gs://data-daisy/knn_test.pickle', 'wb+') as handle: pickle.dump(accuracy_list, handle) return accuracy_list
csvData.append(tmp_str) f = open('/Users/jesusnavarro/Desktop/trail_project/Pickledata/visualize_001_002_nn_exc_001.txt', 'w+') for line in csvData: print(line) f.write(line) f.close() return None model_exc_001 = load_model( '/Users/jesusnavarro/Desktop/trail_project/results/trained_models/deepnn_subset_full_exc_001.h5') dataset_001 = DataSet(root_dir='/Users/jesusnavarro/Desktop/DataSet/', train_subsets=[], test_subsets=['/001/']) dataset_002 = DataSet(root_dir='/Users/jesusnavarro/Desktop/DataSet/', train_subsets=[], test_subsets=['/002/']) key = {str(np.array([1, 0, 0])): 'right', str(np.array([0, 1, 0])): 'center', str(np.array([0, 0, 1])): 'left'} csv_001_key = {'left': 0, 'center': 1, 'right': 2} csv_002_key = {'left': 3, 'center': 4, 'right': 5} x_001, y_001, paths_001 = dataset_001.test_set x_002, y_002, paths_002 = dataset_002.test_set
continue variables_to_restore.append(var) print(variables_to_restore) train_op = tf.train.AdamOptimizer(1e-4).minimize( loss, var_list=trainable_var_list) load_fn = slim.assign_from_checkpoint_fn("./ckpt/vgg_16.ckpt", variables_to_restore) saver = tf.train.Saver(tf.global_variables()) with tf.Session() as sess: tf.global_variables_initializer().run() load_fn(sess) batch_size = 20 train_steps = 1000 epochs = 10 trainset = DataSet("./data/train_data.txt") testset = DataSet("./data/test_data.txt") for epoch in range(epochs): for i in range(train_steps): batch_data, batch_labels = trainset.next_batch(batch_size) _, train_step_loss, train_step_acc = sess.run( [train_op, loss, accuracy], feed_dict={ x: batch_data, y: batch_labels }) print("Loss: ", train_step_loss) print("Acc: ", train_step_acc) batch_test_data, batch_test_labels = testset.next_batch(100) test_step_loss, test_step_acc = sess.run([loss, accuracy], feed_dict={
import numpy as np import tensorflow as tf from core.distillation_model2 import Distillation_Model2 from core.dataset import DataSet import os os.environ["CODA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CODA_VISIBLE_DEVICES"] = "3" learn_rate = 1e-7 ckpt_file = "./ckpt/train_model.ckpt" trainset = DataSet('./data/train_data.txt') testset = DataSet('./data/test_data.txt') input_data = tf.placeholder(shape=[None, 224, 224, 3], dtype=tf.float32, name='input_data') label = tf.placeholder(shape=[None, 5], dtype=tf.float32, name="label") with tf.device("/gpu:0"): model = Distillation_Model2(input_data) y_ = model.fc10 loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=y_)) correct_prediction = tf.equal(tf.argmax(label, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) train_op = tf.train.AdamOptimizer(learn_rate).minimize(loss) saver = tf.train.Saver(tf.global_variables()) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: init_op = tf.global_variables_initializer()
import numpy as np from keras.applications.vgg16 import VGG16 from core.dataset import DataSet from core.knn import reset_weights from utils.data_processing import rep_layer_ouptut vgg16_rep_layer = VGG16(weights=None, include_top=False, pooling='max', input_shape=(101, 101, 3)) reset_weights(vgg16_rep_layer) data_001 = DataSet(root_dir='/Users/jesusnavarro/Desktop/DataSet/', train_subsets=[], test_subsets=['/001/'], location='local') data_002 = DataSet(root_dir='/Users/jesusnavarro/Desktop/DataSet/', train_subsets=[], test_subsets=['/002/'], location='local') print(vgg16_rep_layer.summary()) d1_df = data_001.test_set[2] d2_df = data_002.test_set[2] x1, y1 = rep_layer_ouptut(d1_df, vgg16_rep_layer) x2, y2 = rep_layer_ouptut(d2_df, vgg16_rep_layer) path = '/Users/jesusnavarro/Desktop/vgg16_tests/npz_data/vgg16_output_top_off_pooling_resized.npz' np.savez(path, x1=x1, y1=y1, x2=x2, y2=y2) print(vgg16_rep_layer.summary())