def finalTest(size_training, size_test, hidden_layers, lambd, num_iterations): print "\nBeginning of the finalTest... \n" images_training, labels_training, images_test, labels_test = read_dataset(size_training, size_test) # Setup the parameters you will use for this exercise input_layer_size = 784 # 28x28 Input Images of Digits num_labels = 10 # 10 labels, from 0 to 9 (one label for each digit) layers = [input_layer_size] + hidden_layers + [num_labels] num_of_hidden_layers = len(hidden_layers) # Fill the randInitializeWeights.py in order to initialize the neural network weights. Theta = randInitializeWeights(layers) # Unroll parameters nn_weights = unroll_params(Theta) res = fmin_l_bfgs_b(costFunction, nn_weights, fprime=backwards, args=(layers, images_training, labels_training, num_labels, lambd), maxfun = num_iterations, factr = 1., disp = True) Theta = roll_params(res[0], layers) print "\nTesting Neural Network... \n" pred_training = predict(Theta, images_training) print '\nAccuracy on training set: ' + str(mean(labels_training == pred_training) * 100) pred = predict(Theta, images_test) print '\nAccuracy on test set: ' + str(mean(labels_test == pred) * 100) # Display the images where the algorithm got wrong temp = (labels_test == pred) indexes_false = [] for i in range(size_test): if temp[i] == 0: indexes_false.append(i) displayData(images_training[indexes_false, :])
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', type=str) parser.add_argument('--config', type=str) parser.add_argument('--data', type=str) parser.add_argument('--split', type=int, default=2) args = parser.parse_args() # restore configuration constants = restore_constants(args.config) # get image data image_size = tuple(constants.IMAGE_SIZE[:-1]) get_next, _ = read_dataset(args.data, image_size, int(1e4), constants.BATCH_SIZE, constants.EPOCH) # make network reconstruct, generate_from_latent, train = build(constants) sess = tf.Session() sess.__enter__() # restore parameters saver = tf.train.Saver() saver.restore(sess, args.model) train_iterator = get_next() batch_images = np.array([next(train_iterator)[0]], dtype=np.float32) / 255.0 # reconstruction reconst, latent = reconstruct(batch_images) latent_range = np.linspace(-3.0, 3.0, num=20) latent_in_page = int(constants.LATENT_SIZE / args.split) for page in range(args.split): image_rows = [] for i in range(latent_in_page): index = page * latent_in_page + i # change specific element of latent variable tiled_latent = np.tile(latent[0].copy(), (20, 1)) tiled_latent[:,index] = latent_range # reconstruct from latent variable reconst = generate_from_latent(tiled_latent) # tiling reconstructed images reconst_images = np.array(reconst * 255, dtype=np.uint8) reconst_tiled_images = tile_images(reconst_images, row=1) image_rows.append(reconst_tiled_images) # show reconstructed images image_rows = tile_images(np.array(image_rows), row=latent_in_page) cv2.imshow('test{}'.format(page), image_rows) cv2.imshow('reconstructed', reconst[0]) cv2.imshow('original', batch_images[0]) while cv2.waitKey(10) < 10: time.sleep(0.1)
def finalTest(size_training, size_test, hidden_layers, dropout=0.2, nb_epoch=50, batch_size=128): print "\nBeginning of the finalTest... \n" images_training, labels_training, images_test, labels_test = read_dataset( size_training, size_test) X_train = images_training.reshape(60000, 784) X_test = images_test.reshape(10000, 784) X_train = X_train.astype('float32') X_test = X_test.astype('float32') Y_train = np_utils.to_categorical(labels_training, 10) Y_test = np_utils.to_categorical(labels_test, 10) # Setup the parameters you will use for this exercise input_layer_size = 784 # 28x28 Input Images of Digits num_labels = 10 # 10 labels, from 0 to 9 (one label for each digit) model = Sequential() model.add(Dense(784, input_dim=input_layer_size, init='he_normal')) model.add(Activation('relu')) model.add(Dropout(dropout)) for layer in hidden_layers: model.add(Dense(layer, init='he_normal')) model.add(Activation('relu')) model.add(Dropout(dropout)) model.add(Dense(10, init='he_normal')) model.add(Activation('softmax')) rms = RMSprop() model.compile(loss='categorical_crossentropy', optimizer=rms) history = model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=2, validation_data=(X_test, Y_test)) plt.figure(1) plt.plot(history.history['val_acc']) plt.plot(history.history['acc']) plt.show() score = model.evaluate(images_test, y_test, show_accuracy=True, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1]) print(history.history['val_acc'])
def finalTest(size_training, size_test, hidden_layers, dropout=0.2, nb_epoch=50, batch_size=128): print "\nBeginning of the finalTest... \n" images_training, labels_training, images_test, labels_test = read_dataset(size_training, size_test) X_train = images_training.reshape(60000, 784) X_test = images_test.reshape(10000, 784) X_train = X_train.astype('float32') X_test = X_test.astype('float32') Y_train = np_utils.to_categorical(labels_training, 10) Y_test = np_utils.to_categorical(labels_test, 10) # Setup the parameters you will use for this exercise input_layer_size = 784 # 28x28 Input Images of Digits num_labels = 10 # 10 labels, from 0 to 9 (one label for each digit) model = Sequential() model.add(Dense(784, input_dim=input_layer_size, init='he_normal')) model.add(Activation('relu')) model.add(Dropout(dropout)) for layer in hidden_layers: model.add(Dense(layer, init='he_normal')) model.add(Activation('relu')) model.add(Dropout(dropout)) model.add(Dense(10, init='he_normal')) model.add(Activation('softmax')) rms = RMSprop() model.compile(loss='categorical_crossentropy', optimizer=rms) history = model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=2, validation_data=(X_test, Y_test)) plt.figure(1) plt.plot(history.history['val_acc']) plt.plot(history.history['acc']) plt.show() score = model.evaluate(images_test, y_test, show_accuracy=True, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1]) print(history.history['val_acc'])
def read_galaxy(self, itype, gn, sgn, centre): """ For a given galaxy (defined by its GroupNumber and SubGroupNumber) extract the coordinates and mass of all particles of a selected type. Coordinates are then wrapped around the centre to account for periodicity. """ data = {} # Load data, then mask to selected GroupNumber and SubGroupNumber. gns = read_dataset(itype, 'GroupNumber') sgns = read_dataset(itype, 'SubGroupNumber') mask = np.logical_and(gns == gn, sgns == sgn) if itype == 1: data['mass'] = read_dataset_dm_mass()[mask] * u.g.to(u.Msun) else: data['mass'] = read_dataset(itype, 'Mass')[mask] * u.g.to(u.Msun) data['coords'] = read_dataset(itype, 'Coordinates')[mask] * u.cm.to( u.Mpc) # Periodic wrap coordinates around centre. boxsize = self.boxsize / self.h data['coords'] = np.mod(data['coords'] - centre + 0.5 * boxsize, boxsize) + centre - 0.5 * boxsize return data
def read_galaxy(self, itype, gn, sgn): """ For a given galaxy (defined by its GroupNumber and SubGroupNumber) extract the temperature, density and star formation rate of all gas particles. """ data = {} # Load data. for att in [ 'GroupNumber', 'SubGroupNumber', 'Temperature', 'Density', 'StarFormationRate' ]: data[att] = read_dataset(itype, att) # Mask to selected GroupNumber and SubGroupNumber. mask = np.logical_and(data['GroupNumber'] == gn, data['SubGroupNumber'] == sgn) for att in data.keys(): data[att] = data[att][mask] return data
from mix_gmm_em import mix_gmm_em from read_dataset import read_dataset from math import log from classifier import classify_gmm # parameters for the lab (can be changed for the bonus question) size_features = 8 # number of features retained from the PCA (max: 128) size_training = 20000 #number of samples retained for training size_test = 9000 #number of samples retained for testing K_max = 30 # maximum complexity of the mixture - number of GDs / digit (class) # arrays to store results results = zeros((K_max+1, 2)) # reading of the dataset images, labels_training, images_test, labels_test = read_dataset(size_training, size_test,size_features); # reading of the features extracted from dataset features_test = numpy.array(list(csv.reader(open("test_data.csv","rb"),delimiter=','))).astype('float') #loading the PCA features of the test data set features_training = numpy.array(list(csv.reader(open("training_data.csv","rb"),delimiter=','))).astype('float') #loading the PCA features of the training data set features_test = features_test[:size_test, :size_features] #only "size_features" first features are kept for training set features_training = features_training[:size_training, :size_features] #only "size_features" first features are kept for test set # arrays containing the model for the mixture mean_mix = zeros((10, K_max, size_features)) # mean values for the Gaussians var_mix = zeros((10, K_max, size_features)) # variance for the Gaussians alpha_mix = zeros((10, K_max)) # mixture weights for the Gaussians # array for the Expectation step of the EM W = zeros((10, K_max, size_training))
from mix_gmm_em import mix_gmm_em from read_dataset import read_dataset from math import log from classifier import classify_gmm # parameters for the lab (can be changed for the bonus question) size_features = 8 # number of features retained from the PCA (max: 128) size_training = 20000 #number of samples retained for training size_test = 9000 #number of samples retained for testing K_max = 30 # maximum complexity of the mixture - number of GDs / digit (class) # arrays to store results results = zeros((K_max + 1, 2)) # reading of the dataset images, labels_training, images_test, labels_test = read_dataset( size_training, size_test, size_features) # reading of the features extracted from dataset features_test = numpy.array( list(csv.reader(open("test_data.csv", "rb"), delimiter=','))).astype( 'float') #loading the PCA features of the test data set features_training = numpy.array( list(csv.reader(open("training_data.csv", "rb"), delimiter=','))).astype( 'float') #loading the PCA features of the training data set features_test = features_test[:size_test, : size_features] #only "size_features" first features are kept for training set features_training = features_training[:size_training, : size_features] #only "size_features" first features are kept for test set # arrays containing the model for the mixture mean_mix = zeros((10, K_max, size_features)) # mean values for the Gaussians
ax2.scatter(data_art[:,0],data_art[:,1], c=labels_pred_art) ax2.set_xlabel('1st dimension') ax2.set_ylabel('2nd dimension') ax2.set_title("Vizualization of the clusters produced by k-means algorithm") #plt.show() ############################################## ## PART B: MNIST dataset ############################################## # Number of instaces and number of principal components (features) n_instances = 1000 pca_features = 8 # Get the labels of each digit images, labels_mnist = read_dataset(n_instances, pca_features) # Create the dataset (data_mnist) that will be used in clustering # load the PCA features of the test data set data_mnist = array(list(csv.reader(open("test_data.csv","rb"),delimiter=','))).astype('float') data_mnist = data_mnist[:n_instances,:pca_features] # only 8 first features are kept # Plot 2 out of 8 dimensions of the dataset - colors correspond to true labels # (Hint: experiment with different combinations of dimensions) # Only for illustration purposes fig3 = plt.figure(3) ax3 = fig3.add_subplot(111) ax3.scatter(data_mnist[:,0],data_mnist[:,1], c=labels_mnist) ax3.set_xlabel('1st dimension') ax3.set_ylabel('2nd dimension') ax3.set_title("Vizualization of the dataset (2D)")
from read_dataset import read_dataset from imputation import * from reduc_dim import * from predict_xgboost import * from clustering_missing import * print("preprocessing...") data_train = read_dataset('train.csv') data_test_ini = read_dataset('test.csv') #data2_train_modified = mean_imputation(data_train) #data_test_modified = mean_imputation(data_test_ini) data_train_modified = cluster_missing(data_train) data_test_modified = cluster_missing(data_test_ini) print("processing...") s1, y1 = Xgb_and_Lgb(data_train, data_test_ini) #s2,y2=Xgb_and_Lgb(data_modified,data_modified) #predict_xgboost_k_fold(data_train,data_test_ini,5) #predict_lgboost_k_fold(data_train,data_test_ini,5) #predict_xgboost_k_fold(data_modified,data_modified,5) #predict_lgboost_k_fold(data_modified,data_modified,5)
ratio_of_valid_set = float( input( 'What ratio of dataset should be used for validation(0.0, 1.0): ' )) # (0.0, 1.0) ratio_of_test_set = float( input( 'What ratio of dataset should be used for testing(0.0, 1.0): ') ) # (0.0, 1.0) random_state = int( float(input('Enter an integer for seeding random numbers: '))) # read dataset print('reading dataset ...\n') trainX, validX, testX, trainY, validY, testY = read_dataset( train_ratio=ratio_of_train_set, valid_ratio=ratio_of_valid_set, test_ratio=ratio_of_test_set, random_state=random_state) som = None if train_load == 1: #print(som_shape, type_distance, type_of_neighbourhood, max_epochs, initial_learning_rate, ratio_of_train_set, ratio_of_valid_set, ratio_of_test_set) # select some samples as random weights fo initialization initial_sample_indexes = np.random.permutation(trainX.shape[0]) number_of_neurons = som_shape[0] * som_shape[1] * som_shape[2] # create & train SOM som = SOM(shape=som_shape, number_of_feature=trainX.shape[1], distance_measure_str=type_distance, topology=type_of_neighbourhood,
from backwards import backwards from checkNNCost import checkNNCost from checkNNGradients import checkNNGradients from sigmoid import sigmoid from sigmoidGradient import sigmoidGradient # ================================ Step 1: Loading and Visualizing Data ================================ print("\nLoading and visualizing Data ...\n") #Reading of the dataset # You are free to reduce the number of samples retained for training, in order to reduce the computational cost size_training = 60000 # number of samples retained for training size_test = 10000 # number of samples retained for testing images_training, labels_training, images_test, labels_test = read_dataset(size_training, size_test) # Randomly select 100 data points to display random_instances = list(range(size_training)) random.shuffle(random_instances) displayData(images_training[random_instances[0:100],:]) input('Program paused. Press enter to continue!!!') # ================================ Step 2: Setting up Neural Network Structure & Initialize NN Parameters ================================ print("\nSetting up Neural Network Structure ...\n") # Setup the parameters you will use for this exercise input_layer_size = 784 # 28x28 Input Images of Digits num_labels = 10 # 10 labels, from 0 to 9 (one label for each digit)
from jordan import * if __name__ == '__main__': import matplotlib.pyplot as plt from read_dataset import read_dataset from errors import mse # read dataset, col 0 col_i = read_dataset(ith_col=5) trainX = col_i[0:10000, :] validX = col_i[10000:13000, :] testX = col_i[13000:23000, :] trainY = col_i[1:10001, :] validY = col_i[10001:13001, :] testY = col_i[13001:23001, :] # transpose trainX = trainX.T validX = validX.T testX = testX.T trainY = trainY.T validY = validY.T testY = testY.T # shape of network n = [trainX.shape[0], 4, trainY.shape[0]] activations = ['tanh', 'tanh'] jordan = Jordan(input_size=1, output_size=1, hidden_units=4,
from read_dataset import read_dataset from proTras import proTras dsName = 'ecoli2.dat' data = read_dataset(dsName) proTras(data)
import os from matplotlib import gridspec import cv2 import read_dataset as data import hough_circles as hough import visualization as vis root = "test" os.path.exists(root) images = data.read_dataset(root, "png") print(len(images)) for img in images: circle = hough.detect_inner_circle(img) circling1 = cv2.circle(img,(circle[0],circle[1]),circle[2],(0,255,0),2) circling2 = cv2.circle(img,(circle[0],circle[1]),2,(0,255,0),3) print(circling1) print(circling2) vis.plot_images_grid(images[:30], 5, 6)
if __name__ == '__main__': # Import packages import numpy as np from measures import purity_measure, rand_index, f_measure import time import copy import sys from read_dataset import read_dataset import json import threading from sklearn.cluster import KMeans # read dataset print('reading dataset ...\n') trainX, validX, testX, trainY, validY, testY = read_dataset( train_ratio=0.80, valid_ratio=0.10, test_ratio=0.10, random_state=0) # for different number of clusters for k in [2, 3, 4, 9, 12, 16, 20, 24, 27, 36, 64]: # Kmeans clustering kmeans = KMeans(n_clusters=k, random_state=0).fit(trainX) y_train_pre = kmeans.predict(X=trainX) y_valid_pre = kmeans.predict(X=validX) y_test_pre = kmeans.predict(X=testX) y_train_pre = y_train_pre.reshape(y_train_pre.shape[0], 1) y_valid_pre = y_valid_pre.reshape(y_valid_pre.shape[0], 1) y_test_pre = y_test_pre.reshape(y_test_pre.shape[0], 1) print(
def main(): date = datetime.now().strftime('%Y%m%d%H%M%S') parser = argparse.ArgumentParser() parser.add_argument('--modeldir', type=str, default=date) parser.add_argument('--data', type=str) args = parser.parse_args() # get image data image_size = tuple(constants.IMAGE_SIZE[:-1]) print('reading') get_next, get_test = read_dataset(args.data, image_size, int(1e5), constants.BATCH_SIZE, constants.EPOCH) # make network reconstruct, generate, train = build(constants) sess = tf.Session() sess.__enter__() sess.run(tf.global_variables_initializer()) train_iterator = get_next() test_iterator = get_test() # start training count = 0 for batch_images in train_iterator: batch_images = np.array(batch_images, dtype=np.float32) / 255.0 loss = train(batch_images, keep_prob=constants.KEEP_PROB, beta=constants.BETA) print('loss {}:'.format(count), loss) count += 1 # visualize # if count % 100 == 0: # test_images = next(test_iterator) # test_images = np.array(test_images, dtype=np.float32) / 255.0 # # reconstruction # reconst, latent = reconstruct(test_images) # # show reconstructed images # reconst_images = np.array(reconst * 255, dtype=np.uint8) # reconst_tiled_images = tile_images(reconst_images) ## cv2.imshow('test', reconst_tiled_images) # # show original images # original_images = np.array(test_images * 255, dtype=np.uint8) # original_tiled_images = tile_images(original_images) # cv2.imshow('original', original_tiled_images) # if cv2.waitKey(10) > 0: # pass # save model print('saving model...') modeldir = 'saved_models/' + args.modeldir if not os.path.exists(modeldir): os.makedirs(modeldir) saver = tf.train.Saver() saver.save(sess, modeldir + '/model.ckpt') # save configuration as json dump_constants(constants, modeldir + '/constants.json')
from sigmoid import sigmoid from sigmoidGradient import sigmoidGradient # ================================ Step 1: Loading and Visualizing Data ================================ print "\nLoading and visualizing Data ...\n" #Reading of the dataset # You are free to reduce the number of samples retained for training, in order to reduce the computational cost # TODO: change this #size_training = 60000 # number of samples retained for training size_training = 5000 # number of samples retained for training #size_test = 10000 # number of samples retained for testing size_test = 5000 # number of samples retained for testing images_training, labels_training, images_test, labels_test = read_dataset(size_training, size_test) # Randomly select 100 data points to display random_instances = range(size_training) random.shuffle(random_instances) displayData(images_training[random_instances[0:100],:]) raw_input('Program paused. Press enter to continue!!!') # ================================ Step 2: Setting up Neural Network Structure & Initialize NN Parameters ================================ print "\nSetting up Neural Network Structure ...\n" # Setup the parameters you will use for this exercise input_layer_size = 784 # 28x28 Input Images of Digits num_labels = 10 # 10 labels, from 0 to 9 (one label for each digit)
""" import torchvision.transforms as transforms from infogan import Generator, Discriminator, Q, D_Q_commonlayer from encoder import Encoder from trainer import Trainer from read_dataset import read_dataset from read_result import read_result epoch = 20 batch_size = 100 img_size = 64 c_size = 1 z_size = 99 dataloader = read_dataset('../pic', img_size, batch_size) version = input('result version:') c_loss_weight = 0.3 RF_loss_weight = 0.7 generator_loss_weight = 0.7 path = './result_' + version + '/arg_' + version + '.txt' f = open(path, 'a+') arg='epoch='+str(epoch)+'\n'+'batch_size='+str(batch_size)+'\n'+'img_size='+str(img_size)+'\n'+\ 'c_size='+str(c_size)+'\n'+'z_size='+str(z_size)+'\n'+'RF_loss_weight=generator_loss_weight='+str(RF_loss_weight)+'\n'+'c_loss_weight='+str(c_loss_weight)+'\n' f.write(arg + '\n') f.close() unloader = transforms.ToPILImage() encoder = Encoder(c_size, z_size)