Ejemplo n.º 1
0
def finalTest(size_training, size_test, hidden_layers, lambd, num_iterations):
    print "\nBeginning of the finalTest... \n"

    images_training, labels_training, images_test, labels_test = read_dataset(size_training, size_test)
    # Setup the parameters you will use for this exercise
    input_layer_size = 784        # 28x28 Input Images of Digits
    num_labels = 10         # 10 labels, from 0 to 9 (one label for each digit)
    layers = [input_layer_size] + hidden_layers + [num_labels]
    num_of_hidden_layers = len(hidden_layers)
    # Fill the randInitializeWeights.py in order to initialize the neural network weights.
    Theta = randInitializeWeights(layers)

    # Unroll parameters
    nn_weights = unroll_params(Theta)
    res = fmin_l_bfgs_b(costFunction, nn_weights, fprime=backwards, args=(layers, images_training, labels_training, num_labels, lambd), maxfun = num_iterations, factr = 1., disp = True)
    Theta = roll_params(res[0], layers)

    print "\nTesting Neural Network... \n"

    pred_training = predict(Theta, images_training)
    print '\nAccuracy on training set: ' + str(mean(labels_training == pred_training) * 100)

    pred = predict(Theta, images_test)
    print '\nAccuracy on test set: ' + str(mean(labels_test == pred) * 100)

    # Display the images where the algorithm got wrong
    temp = (labels_test == pred)
    indexes_false = []
    for i in range(size_test):
        if temp[i] == 0:
            indexes_false.append(i)

    displayData(images_training[indexes_false, :])
Ejemplo n.º 2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', type=str)
    parser.add_argument('--config', type=str)
    parser.add_argument('--data', type=str)
    parser.add_argument('--split', type=int, default=2)
    args = parser.parse_args()

    # restore configuration
    constants = restore_constants(args.config)

    # get image data
    image_size = tuple(constants.IMAGE_SIZE[:-1])
    get_next, _ = read_dataset(args.data, image_size, int(1e4),
                               constants.BATCH_SIZE, constants.EPOCH)

    # make network
    reconstruct, generate_from_latent, train = build(constants)

    sess = tf.Session()
    sess.__enter__()

    # restore parameters
    saver = tf.train.Saver()
    saver.restore(sess, args.model)

    train_iterator = get_next()
    batch_images = np.array([next(train_iterator)[0]], dtype=np.float32) / 255.0

    # reconstruction
    reconst, latent = reconstruct(batch_images)
    latent_range = np.linspace(-3.0, 3.0, num=20)
    latent_in_page = int(constants.LATENT_SIZE / args.split)

    for page in range(args.split):
        image_rows = []
        for i in range(latent_in_page):
            index = page * latent_in_page + i
            # change specific element of latent variable
            tiled_latent = np.tile(latent[0].copy(), (20, 1))
            tiled_latent[:,index] = latent_range

            # reconstruct from latent variable
            reconst = generate_from_latent(tiled_latent)

            # tiling reconstructed images
            reconst_images = np.array(reconst * 255, dtype=np.uint8)
            reconst_tiled_images = tile_images(reconst_images, row=1)
            image_rows.append(reconst_tiled_images)

        # show reconstructed images
        image_rows = tile_images(np.array(image_rows), row=latent_in_page)
        cv2.imshow('test{}'.format(page), image_rows)

    cv2.imshow('reconstructed', reconst[0])
    cv2.imshow('original', batch_images[0])

    while cv2.waitKey(10) < 10:
        time.sleep(0.1)
Ejemplo n.º 3
0
def finalTest(size_training,
              size_test,
              hidden_layers,
              dropout=0.2,
              nb_epoch=50,
              batch_size=128):
    print "\nBeginning of the finalTest... \n"

    images_training, labels_training, images_test, labels_test = read_dataset(
        size_training, size_test)

    X_train = images_training.reshape(60000, 784)
    X_test = images_test.reshape(10000, 784)
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')

    Y_train = np_utils.to_categorical(labels_training, 10)
    Y_test = np_utils.to_categorical(labels_test, 10)

    # Setup the parameters you will use for this exercise
    input_layer_size = 784  # 28x28 Input Images of Digits
    num_labels = 10  # 10 labels, from 0 to 9 (one label for each digit)

    model = Sequential()

    model.add(Dense(784, input_dim=input_layer_size, init='he_normal'))
    model.add(Activation('relu'))
    model.add(Dropout(dropout))

    for layer in hidden_layers:
        model.add(Dense(layer, init='he_normal'))
        model.add(Activation('relu'))
        model.add(Dropout(dropout))

    model.add(Dense(10, init='he_normal'))
    model.add(Activation('softmax'))

    rms = RMSprop()
    model.compile(loss='categorical_crossentropy', optimizer=rms)

    history = model.fit(X_train,
                        Y_train,
                        batch_size=batch_size,
                        nb_epoch=nb_epoch,
                        show_accuracy=True,
                        verbose=2,
                        validation_data=(X_test, Y_test))

    plt.figure(1)
    plt.plot(history.history['val_acc'])
    plt.plot(history.history['acc'])
    plt.show()

    score = model.evaluate(images_test, y_test, show_accuracy=True, verbose=0)
    print('Test score:', score[0])
    print('Test accuracy:', score[1])
    print(history.history['val_acc'])
Ejemplo n.º 4
0
def finalTest(size_training, size_test, hidden_layers, dropout=0.2, nb_epoch=50, batch_size=128):
    print "\nBeginning of the finalTest... \n"

    images_training, labels_training, images_test, labels_test = read_dataset(size_training, size_test)

    X_train = images_training.reshape(60000, 784)
    X_test = images_test.reshape(10000, 784)
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')

    Y_train = np_utils.to_categorical(labels_training, 10)
    Y_test = np_utils.to_categorical(labels_test, 10)

    # Setup the parameters you will use for this exercise
    input_layer_size = 784        # 28x28 Input Images of Digits
    num_labels = 10         # 10 labels, from 0 to 9 (one label for each digit)

    model = Sequential()

    model.add(Dense(784, input_dim=input_layer_size, init='he_normal'))
    model.add(Activation('relu'))
    model.add(Dropout(dropout))

    for layer in hidden_layers:
        model.add(Dense(layer, init='he_normal'))
        model.add(Activation('relu'))
        model.add(Dropout(dropout))

    model.add(Dense(10, init='he_normal'))
    model.add(Activation('softmax'))

    rms = RMSprop()
    model.compile(loss='categorical_crossentropy', optimizer=rms)

    history = model.fit(X_train, Y_train,
              batch_size=batch_size, nb_epoch=nb_epoch,
              show_accuracy=True, verbose=2,
              validation_data=(X_test, Y_test))

    plt.figure(1)
    plt.plot(history.history['val_acc'])
    plt.plot(history.history['acc'])
    plt.show()

    score = model.evaluate(images_test, y_test,
                           show_accuracy=True, verbose=0)
    print('Test score:', score[0])
    print('Test accuracy:', score[1])
    print(history.history['val_acc'])
Ejemplo n.º 5
0
    def read_galaxy(self, itype, gn, sgn, centre):
        """ For a given galaxy (defined by its GroupNumber and SubGroupNumber)
        extract the coordinates and mass of all particles of a selected type.
        Coordinates are then wrapped around the centre to account for periodicity. """

        data = {}

        # Load data, then mask to selected GroupNumber and SubGroupNumber.
        gns = read_dataset(itype, 'GroupNumber')
        sgns = read_dataset(itype, 'SubGroupNumber')
        mask = np.logical_and(gns == gn, sgns == sgn)
        if itype == 1:
            data['mass'] = read_dataset_dm_mass()[mask] * u.g.to(u.Msun)
        else:
            data['mass'] = read_dataset(itype, 'Mass')[mask] * u.g.to(u.Msun)
        data['coords'] = read_dataset(itype, 'Coordinates')[mask] * u.cm.to(
            u.Mpc)

        # Periodic wrap coordinates around centre.
        boxsize = self.boxsize / self.h
        data['coords'] = np.mod(data['coords'] - centre + 0.5 * boxsize,
                                boxsize) + centre - 0.5 * boxsize

        return data
Ejemplo n.º 6
0
    def read_galaxy(self, itype, gn, sgn):
        """ For a given galaxy (defined by its GroupNumber and SubGroupNumber)
        extract the temperature, density and star formation rate of all gas particles. """

        data = {}

        # Load data.
        for att in [
                'GroupNumber', 'SubGroupNumber', 'Temperature', 'Density',
                'StarFormationRate'
        ]:
            data[att] = read_dataset(itype, att)

        # Mask to selected GroupNumber and SubGroupNumber.
        mask = np.logical_and(data['GroupNumber'] == gn,
                              data['SubGroupNumber'] == sgn)
        for att in data.keys():
            data[att] = data[att][mask]

        return data
Ejemplo n.º 7
0
from mix_gmm_em import mix_gmm_em
from read_dataset import read_dataset
from math import log
from classifier import classify_gmm

# parameters for the lab (can be changed for the bonus question)
size_features = 8  # number of features retained from the PCA (max: 128)
size_training = 20000  #number of samples retained for training
size_test = 9000  #number of samples retained for testing
K_max = 30  # maximum complexity of the mixture - number of GDs / digit (class)

# arrays to store results
results = zeros((K_max+1, 2))

# reading of the dataset
images, labels_training, images_test, labels_test = read_dataset(size_training, size_test,size_features);

# reading of the features extracted from dataset
features_test = numpy.array(list(csv.reader(open("test_data.csv","rb"),delimiter=','))).astype('float') #loading the PCA features of the test data set
features_training = numpy.array(list(csv.reader(open("training_data.csv","rb"),delimiter=','))).astype('float') #loading the PCA features of the training data set
features_test = features_test[:size_test, :size_features]  #only "size_features" first features are kept for training set
features_training = features_training[:size_training, :size_features]  #only "size_features" first features are kept for test set

# arrays containing the model for the mixture
mean_mix = zeros((10, K_max, size_features))  # mean values for the Gaussians
var_mix = zeros((10, K_max, size_features))  # variance for the Gaussians
alpha_mix = zeros((10, K_max))  # mixture weights for the Gaussians

# array for the Expectation step of the EM
W = zeros((10, K_max, size_training))
Ejemplo n.º 8
0
from mix_gmm_em import mix_gmm_em
from read_dataset import read_dataset
from math import log
from classifier import classify_gmm

# parameters for the lab (can be changed for the bonus question)
size_features = 8  # number of features retained from the PCA (max: 128)
size_training = 20000  #number of samples retained for training
size_test = 9000  #number of samples retained for testing
K_max = 30  # maximum complexity of the mixture - number of GDs / digit (class)

# arrays to store results
results = zeros((K_max + 1, 2))

# reading of the dataset
images, labels_training, images_test, labels_test = read_dataset(
    size_training, size_test, size_features)

# reading of the features extracted from dataset
features_test = numpy.array(
    list(csv.reader(open("test_data.csv", "rb"), delimiter=','))).astype(
        'float')  #loading the PCA features of the test data set
features_training = numpy.array(
    list(csv.reader(open("training_data.csv", "rb"), delimiter=','))).astype(
        'float')  #loading the PCA features of the training data set
features_test = features_test[:size_test, :
                              size_features]  #only "size_features" first features are kept for training set
features_training = features_training[:size_training, :
                                      size_features]  #only "size_features" first features are kept for test set

# arrays containing the model for the mixture
mean_mix = zeros((10, K_max, size_features))  # mean values for the Gaussians
Ejemplo n.º 9
0
ax2.scatter(data_art[:,0],data_art[:,1], c=labels_pred_art)
ax2.set_xlabel('1st dimension')
ax2.set_ylabel('2nd dimension')
ax2.set_title("Vizualization of the clusters produced by k-means algorithm")
#plt.show()


##############################################
## PART B: MNIST dataset
##############################################
# Number of instaces and number of principal components (features)
n_instances = 1000
pca_features = 8

# Get the labels of each digit
images, labels_mnist = read_dataset(n_instances, pca_features)

# Create the dataset (data_mnist) that will be used in clustering
# load the PCA features of the test data set
data_mnist = array(list(csv.reader(open("test_data.csv","rb"),delimiter=','))).astype('float')
data_mnist = data_mnist[:n_instances,:pca_features]  # only 8 first features are kept

# Plot 2 out of 8 dimensions of the dataset - colors correspond to true labels
# (Hint: experiment with different combinations of dimensions)
# Only for illustration purposes
fig3 = plt.figure(3)
ax3 = fig3.add_subplot(111)
ax3.scatter(data_mnist[:,0],data_mnist[:,1], c=labels_mnist)
ax3.set_xlabel('1st dimension')
ax3.set_ylabel('2nd dimension')
ax3.set_title("Vizualization of the dataset (2D)")
Ejemplo n.º 10
0
from read_dataset import read_dataset
from imputation import *
from reduc_dim import *
from predict_xgboost import *
from clustering_missing import *

print("preprocessing...")
data_train = read_dataset('train.csv')
data_test_ini = read_dataset('test.csv')
#data2_train_modified = mean_imputation(data_train)
#data_test_modified = mean_imputation(data_test_ini)
data_train_modified = cluster_missing(data_train)
data_test_modified = cluster_missing(data_test_ini)

print("processing...")
s1, y1 = Xgb_and_Lgb(data_train, data_test_ini)
#s2,y2=Xgb_and_Lgb(data_modified,data_modified)
#predict_xgboost_k_fold(data_train,data_test_ini,5)
#predict_lgboost_k_fold(data_train,data_test_ini,5)
#predict_xgboost_k_fold(data_modified,data_modified,5)
#predict_lgboost_k_fold(data_modified,data_modified,5)
Ejemplo n.º 11
0
        ratio_of_valid_set = float(
            input(
                'What ratio of dataset should be used for validation(0.0, 1.0): '
            ))  # (0.0, 1.0)
        ratio_of_test_set = float(
            input(
                'What ratio of dataset should be used for testing(0.0, 1.0): ')
        )  # (0.0, 1.0)
        random_state = int(
            float(input('Enter an integer for seeding random numbers: ')))

    # read dataset
    print('reading dataset ...\n')
    trainX, validX, testX, trainY, validY, testY = read_dataset(
        train_ratio=ratio_of_train_set,
        valid_ratio=ratio_of_valid_set,
        test_ratio=ratio_of_test_set,
        random_state=random_state)

    som = None
    if train_load == 1:
        #print(som_shape, type_distance, type_of_neighbourhood, max_epochs, initial_learning_rate, ratio_of_train_set, ratio_of_valid_set, ratio_of_test_set)
        # select some samples as random weights fo initialization
        initial_sample_indexes = np.random.permutation(trainX.shape[0])
        number_of_neurons = som_shape[0] * som_shape[1] * som_shape[2]

        # create & train SOM
        som = SOM(shape=som_shape,
                  number_of_feature=trainX.shape[1],
                  distance_measure_str=type_distance,
                  topology=type_of_neighbourhood,
Ejemplo n.º 12
0
from backwards import backwards
from checkNNCost import checkNNCost
from checkNNGradients import checkNNGradients
from sigmoid import sigmoid
from sigmoidGradient import sigmoidGradient



# ================================ Step 1: Loading and Visualizing Data ================================
print("\nLoading and visualizing Data ...\n")

#Reading of the dataset
# You are free to reduce the number of samples retained for training, in order to reduce the computational cost
size_training = 60000     # number of samples retained for training
size_test     = 10000     # number of samples retained for testing
images_training, labels_training, images_test, labels_test = read_dataset(size_training, size_test)

# Randomly select 100 data points to display
random_instances = list(range(size_training))
random.shuffle(random_instances)
displayData(images_training[random_instances[0:100],:])

input('Program paused. Press enter to continue!!!')

# ================================ Step 2: Setting up Neural Network Structure &  Initialize NN Parameters ================================
print("\nSetting up Neural Network Structure ...\n")

# Setup the parameters you will use for this exercise
input_layer_size   = 784        # 28x28 Input Images of Digits
num_labels         = 10         # 10 labels, from 0 to 9 (one label for each digit) 
Ejemplo n.º 13
0
from jordan import *

if __name__ == '__main__':
    import matplotlib.pyplot as plt
    from read_dataset import read_dataset
    from errors import mse

    # read dataset, col 0
    col_i = read_dataset(ith_col=5)
    trainX = col_i[0:10000, :]
    validX = col_i[10000:13000, :]
    testX = col_i[13000:23000, :]
    trainY = col_i[1:10001, :]
    validY = col_i[10001:13001, :]
    testY = col_i[13001:23001, :]

    # transpose
    trainX = trainX.T
    validX = validX.T
    testX = testX.T
    trainY = trainY.T
    validY = validY.T
    testY = testY.T

    # shape of network
    n = [trainX.shape[0], 4, trainY.shape[0]]
    activations = ['tanh', 'tanh']

    jordan = Jordan(input_size=1,
                    output_size=1,
                    hidden_units=4,
Ejemplo n.º 14
0
from read_dataset import read_dataset
from proTras import proTras

dsName = 'ecoli2.dat'
data = read_dataset(dsName)

proTras(data)
Ejemplo n.º 15
0
import os
from matplotlib import gridspec
import cv2
import read_dataset as data
import hough_circles as hough
import visualization as vis

root = "test"
os.path.exists(root)
images = data.read_dataset(root, "png")
print(len(images))

for img in images:
    circle = hough.detect_inner_circle(img)
    circling1 = cv2.circle(img,(circle[0],circle[1]),circle[2],(0,255,0),2)
    circling2 = cv2.circle(img,(circle[0],circle[1]),2,(0,255,0),3)
    print(circling1)
    print(circling2)

vis.plot_images_grid(images[:30], 5, 6)
Ejemplo n.º 16
0
if __name__ == '__main__':

    # Import packages
    import numpy as np
    from measures import purity_measure, rand_index, f_measure
    import time
    import copy
    import sys
    from read_dataset import read_dataset
    import json
    import threading
    from sklearn.cluster import KMeans

    # read dataset
    print('reading dataset ...\n')
    trainX, validX, testX, trainY, validY, testY = read_dataset(
        train_ratio=0.80, valid_ratio=0.10, test_ratio=0.10, random_state=0)

    # for different number of clusters
    for k in [2, 3, 4, 9, 12, 16, 20, 24, 27, 36, 64]:
        # Kmeans clustering
        kmeans = KMeans(n_clusters=k, random_state=0).fit(trainX)

        y_train_pre = kmeans.predict(X=trainX)
        y_valid_pre = kmeans.predict(X=validX)
        y_test_pre = kmeans.predict(X=testX)

        y_train_pre = y_train_pre.reshape(y_train_pre.shape[0], 1)
        y_valid_pre = y_valid_pre.reshape(y_valid_pre.shape[0], 1)
        y_test_pre = y_test_pre.reshape(y_test_pre.shape[0], 1)

        print(
Ejemplo n.º 17
0
def main():
    date = datetime.now().strftime('%Y%m%d%H%M%S')
    parser = argparse.ArgumentParser()
    parser.add_argument('--modeldir', type=str, default=date)
    parser.add_argument('--data', type=str)
    args = parser.parse_args()

    # get image data
    image_size = tuple(constants.IMAGE_SIZE[:-1])
    print('reading')
    get_next, get_test = read_dataset(args.data, image_size, int(1e5),
                                      constants.BATCH_SIZE, constants.EPOCH)

    # make network
    reconstruct, generate, train = build(constants)

    sess = tf.Session()
    sess.__enter__()
    sess.run(tf.global_variables_initializer())

    train_iterator = get_next()
    test_iterator = get_test()

    # start training
    count = 0
    for batch_images in train_iterator:
        batch_images = np.array(batch_images, dtype=np.float32) / 255.0
        loss = train(batch_images, keep_prob=constants.KEEP_PROB,
                     beta=constants.BETA)
        print('loss {}:'.format(count), loss)
        count += 1

        # visualize
    #    if count % 100 == 0:
    #        test_images = next(test_iterator)
    #        test_images = np.array(test_images, dtype=np.float32) / 255.0
    #        # reconstruction
    #        reconst, latent = reconstruct(test_images)

    #        # show reconstructed images
    #        reconst_images = np.array(reconst * 255, dtype=np.uint8)
    #        reconst_tiled_images = tile_images(reconst_images)
    ##        cv2.imshow('test', reconst_tiled_images)

    #        # show original images
    #        original_images = np.array(test_images * 255, dtype=np.uint8)
    #        original_tiled_images = tile_images(original_images)
    #        cv2.imshow('original', original_tiled_images)

    #        if cv2.waitKey(10) > 0:
    #            pass

    # save model
    print('saving model...')
    modeldir = 'saved_models/' + args.modeldir
    if not os.path.exists(modeldir):
        os.makedirs(modeldir)
    saver = tf.train.Saver()
    saver.save(sess, modeldir + '/model.ckpt')
    # save configuration as json
    dump_constants(constants, modeldir + '/constants.json')
Ejemplo n.º 18
0
from sigmoid import sigmoid
from sigmoidGradient import sigmoidGradient



# ================================ Step 1: Loading and Visualizing Data ================================
print "\nLoading and visualizing Data ...\n"

#Reading of the dataset
# You are free to reduce the number of samples retained for training, in order to reduce the computational cost
# TODO: change this
#size_training = 60000     # number of samples retained for training
size_training = 5000     # number of samples retained for training
#size_test     = 10000     # number of samples retained for testing
size_test     = 5000     # number of samples retained for testing
images_training, labels_training, images_test, labels_test = read_dataset(size_training, size_test)


# Randomly select 100 data points to display
random_instances = range(size_training)
random.shuffle(random_instances)
displayData(images_training[random_instances[0:100],:])

raw_input('Program paused. Press enter to continue!!!')

# ================================ Step 2: Setting up Neural Network Structure &  Initialize NN Parameters ================================
print "\nSetting up Neural Network Structure ...\n"

# Setup the parameters you will use for this exercise
input_layer_size   = 784        # 28x28 Input Images of Digits
num_labels         = 10         # 10 labels, from 0 to 9 (one label for each digit) 
Ejemplo n.º 19
0
"""

import torchvision.transforms as transforms

from infogan import Generator, Discriminator, Q, D_Q_commonlayer
from encoder import Encoder
from trainer import Trainer
from read_dataset import read_dataset
from read_result import read_result

epoch = 20
batch_size = 100
img_size = 64
c_size = 1
z_size = 99
dataloader = read_dataset('../pic', img_size, batch_size)
version = input('result version:')

c_loss_weight = 0.3
RF_loss_weight = 0.7
generator_loss_weight = 0.7

path = './result_' + version + '/arg_' + version + '.txt'
f = open(path, 'a+')
arg='epoch='+str(epoch)+'\n'+'batch_size='+str(batch_size)+'\n'+'img_size='+str(img_size)+'\n'+\
  'c_size='+str(c_size)+'\n'+'z_size='+str(z_size)+'\n'+'RF_loss_weight=generator_loss_weight='+str(RF_loss_weight)+'\n'+'c_loss_weight='+str(c_loss_weight)+'\n'

f.write(arg + '\n')
f.close()
unloader = transforms.ToPILImage()
encoder = Encoder(c_size, z_size)