Ejemplo n.º 1
0
def _drawReconstructions(encoder, decoder, dataDir, listOfFiles, imageID):
	# HARD CODE. Make sure the data files are in increasing order in main.py.
	if imageID > 1000 and imageID < 2001:
		datafiledir = dataDir + '/' + listOfFiles[1]
	elif imageID > 2000 and imageID < 3001:
		datafiledir = dataDir + '/' + listOfFiles[2]
	elif imageID > 3000 and imageID < 4001:
		datafiledir = dataDir + '/' + listOfFiles[3]
	elif imageID > 4000 and imageID < 5001:
		datafiledir = dataDir + '/' + listOfFiles[4]
	elif imageID > 5000 and imageID < 6001:
		datafiledir = dataDir + '/' + listOfFiles[5]
	elif imageID > 6000 and imageID < 7001:
		datafiledir = dataDir + '/' + listOfFiles[6]
	else:
		datafiledir = dataDir + '/' + listOfFiles[0]
	img = imageID % 1000

	# Draw first image
	data = LoadData(datafiledir, 'train')
	inputs = data['inputs_train']
	numSamples = inputs.shape[0]
	numFeatures = np.prod(inputs.shape[1:])
	inputsFlat = inputs.reshape(numSamples, numFeatures)
	myImage = inputsFlat[img,:]
	print myImage.shape
	myImage = myImage.reshape(1,numFeatures)
	encoded_img = encoder.predict(myImage)
	decoded_img = decoder.predict(encoded_img)
	decoded_img = decoded_img.reshape(1,128,128,3)
	ShowImage(decoded_img[0])
Ejemplo n.º 2
0
def LoadAllTrainData(dataDir, listOfFiles, prefix=None, binarize=True):
	allImages = np.empty(shape=(0,0))
	allLabels = np.empty(shape=(0,0))

	for datafile in listOfFiles:
		datafiledir = dataDir + datafile
		if prefix is not None:
			datafiledir = dataDir + prefix + datafile
		print 'Reading ',datafiledir, '...'
		data = LoadData(datafiledir, 'train', prefix)
		images = data['inputs_train']
		if allImages.shape == (0,0):
			allImages = images
		else:
			allImages = np.concatenate((allImages,images))
		if prefix is not "VGG16_":
			labels = data['targets_train']
			assert labels.shape[1] == 2
			labels = labels[:,1]
			if allLabels.shape == (0,0):
				allLabels = labels
			else:
				allLabels = np.hstack((allLabels,labels))
			
	if binarize:
		allLabels = _convertAllLabels(allLabels)

	return allImages, allLabels
Ejemplo n.º 3
0
def run_public_test_on(class_name):

    if class_name == 'knn':
        res_1 = open('bg1knn.dump', 'r')
        clf = pickle.load(res_1)
        res_1.close()
        print "knn done"
    elif class_name == 'lr':
        res_2 = open('bg2lr.dump', 'r')
        clf = pickle.load(res_2)
        res_2.close()
        print "LR done"
    elif class_name == 'svm':
        res_3 = open('bg3svm.dump', 'r')
        clf = pickle.load(res_3)
        res_3.close()
        print "svm done"
    elif class_name == 'nn':
        res_4 = open('bestNet.dump', 'r')
        clf = pickle.load(res_4)
        res_4.close()
        print "net done"
    validation_set = LoadData('public_test_images.mat', False, False)
    fixed_valid = fix_pixels(validation_set)
    fin_pred = clf.predict_proba(fixed_valid)
    fin_labels = [(np.argmax(ar, axis=0) + 1) for ar in fin_pred]
    create_csv(fin_labels, 'res_csv.csv')
Ejemplo n.º 4
0
def DoKNN(dataDir, listOfFiles):
    numClasses = 8
    classes = range(1, numClasses + 1)

    # Scale data
    scaler = StandardizeData(dataDir, listOfFiles)

    # Do PCA
    ipca, var = DoIncrementalPCA(dataDir,
                                 listOfFiles,
                                 scaler,
                                 numComponents=100)
    print "Variance explained: ", np.sum(var)
    for i in range(0, len(listOfFiles)):
        print listOfFiles[i]
        datafiledir = dataDir + '/' + listOfFiles[i]
        data = LoadData(datafiledir, 'train')
        inputs = data['inputs_train']
        targets = data['targets_train']
        numSamples = inputs.shape[0]
        numFeatures = np.prod(inputs.shape[1:])
        assert numSamples == targets.shape[0]
        targets = targets[:, 1]

        # Transform data
        inputs = inputs.reshape(numSamples, numFeatures)
        inputs = TransformData(inputs, ipca, scaler)
Ejemplo n.º 5
0
def DoSVM_new(dataDir, listOfFiles):
    # Do SVM on HOG features
    numClasses = 8
    classes = range(1, numClasses + 1)

    # Scale data
    #scaler = StandardizeData(dataDir, listOfFiles)

    newData = np.empty((0, ))

    #for i in range(0, len(listOfFiles)):
    for i in range(0, 1):
        print listOfFiles[i]
        datafiledir = dataDir + '/' + listOfFiles[i]
        data = LoadData(datafiledir, 'train')
        inputs = data['inputs_train']
        #targets = data['targets_train']
        #numSamples = inputs.shape[0]
        #numFeatures = np.prod(inputs.shape[1:])
        #targets = targets[:,1]

        inputs = color.rgb2gray(inputs)
        for img in inputs:
            print '.',
            sys.stdout.flush()
            if newData.shape[0] == 0:
                newData = hog(img)
            else:
                newimg = hog(img)
                newData = np.vstack((newData, newimg))
        print ''
    print newData.shape
    #np.savez('hogimages', img=newData)

    ShowImage(newData[0], gray=1)
Ejemplo n.º 6
0
def PredictOnValidation(clf, dataDir, listOfFiles, valSetIndices, numClasses, \
      scaler, ipca, rbf_feature, predict = 1):
    assert isinstance(clf, SGDClassifier)
    assert len(valSetIndices) == len(listOfFiles)

    # Predict on set
    classes = range(1, numClasses + 1)
    predictions = np.empty((0, ))
    trueLabels = np.empty((0, ))
    for i in range(0, len(listOfFiles)):
        valSetIdx = valSetIndices[i]
        print listOfFiles[i]
        datafiledir = dataDir + '/' + listOfFiles[i]
        data = LoadData(datafiledir, 'train')
        inputs = data['inputs_train']
        targets = data['targets_train']
        numSamples = inputs.shape[0]
        numFeatures = np.prod(inputs.shape[1:])
        assert numSamples == targets.shape[0]
        targets = targets[:, 1]

        # Transform data and get validation set
        inputs = inputs.reshape(numSamples, numFeatures)
        inputs = TransformData(inputs, ipca, scaler)
        inputs = inputs[valSetIdx, :]
        targets = targets[valSetIdx]
        inputs = rbf_feature.fit_transform(inputs)

        # Get predictions
        if not predict:
            valPredictions = clf.decision_function(inputs)
            targets = label_binarize(targets, classes=classes)
        else:
            valPredictions = clf.predict(inputs)

        if predictions.shape[0] == 0:
            predictions = valPredictions
        else:
            if not predict:
                predictions = np.vstack((predictions, valPredictions))
            else:
                predictions = np.hstack((predictions, valPredictions))
        if trueLabels.shape[0] == 0:
            trueLabels = targets
        else:
            if not predict:
                trueLabels = np.vstack((trueLabels, targets))
            else:
                trueLabels = np.hstack((trueLabels, targets))

    print trueLabels.shape, predictions.shape
    assert len(trueLabels) == len(predictions)
    total = len(trueLabels)
    correct = np.sum((predictions == trueLabels).astype(int))
    i = 0
    for i in range(0, len(predictions)):
        if predictions[i] == trueLabels[i]:
            print predictions[i]
    return correct / total
Ejemplo n.º 7
0
def q35_plot(model, forward):
    _, _, inputs_test, _, _, _ = LoadData('../toronto_face.npz')
    prediction = Softmax(forward(model, inputs_test)['y'])
    largest = np.argmax(prediction, axis=1)
    idx = []
    for i in range(prediction.shape[0]):
        if prediction[i, largest[i]] < 0.5:
            idx.append(i)
    plotExample(np.transpose(inputs_test[idx][0:8, :]), 3, 4)
Ejemplo n.º 8
0
def q35():
    inputs_train, inputs_valid, inputs_test, target_train, target_valid, \
    target_test = LoadData('../toronto_face.npz')
    train_inaccurate = [173]
    valid_inaccurate = [170, 173]
    test_inaccurate = [275]
    print(target_test[275])
    blah = inputs_test[275].reshape(48, 48)
    plt.imshow(blah, cmap=plt.cm.gray)
    plt.show()
    plt.savefig('./plots/inaccuracy/test_' + str(275) + '.png')
Ejemplo n.º 9
0
def GetIntensityStats(dataDir, listOfFiles, statsType='avg', filterData='sobel'):
	# filterData = {'', 'gauss', 'sobel'}
	# statsType = {'avg', 'max', 'min'}

	numClasses = 8
	R_intensity = dict()
	for i in range(1,9):
		R_intensity[i] = np.empty((0,))

	classPopulation = defaultdict(int) # Number of samples from each class

	for i in range(0, len(listOfFiles)):
		print listOfFiles[i]
		datafiledir = dataDir + '/' + listOfFiles[i]
		d = LoadData(datafiledir, 'train')
		inputs = SobelFilter(d['inputs_train'])
		targets = d['targets_train']
		targets = targets[:,1]
		assert inputs.shape[0] == targets.shape[0]

		for j in range(1,numClasses+1):
			# Get samples from class 'j'
			idxSamples = np.where(targets == j)[0]
			classSamples = inputs[idxSamples]
			numSamples = len(idxSamples)
			imgSize = np.prod(classSamples[:,:,:].shape[1:])

			r = np.reshape(classSamples[:,:,:], (numSamples, imgSize))
			red = np.empty((0,))

			if statsType == 'avg':
				red = np.hstack((red, np.mean(r, axis=1)))
			else:
				if statsType == 'max':
					red = np.hstack((red, np.amax(r,axis=1)))
				elif statsType == 'min':
					red = np.hstack((red, np.amin(r,axis=1)))

			R_intensity[j] = np.hstack((R_intensity[j], red))
			classPopulation[j] += len(classSamples)

	for i in range(1,numClasses+1):
		print i, ':', len(R_intensity[i])

	for i in range(1,numClasses+1):
		classPop = classPopulation[i]
		R_intensity[i] = (np.mean(R_intensity[i]), np.std(R_intensity[i]))

	#print classPopulation
	print R_intensity
	return R_intensity
Ejemplo n.º 10
0
def _getAllLabels(dataDir, listOfFiles):
	allLabels = np.empty(shape=(0,0))
	for datafile in listOfFiles:
		print 'Reading', datafile, '...'
		datafiledir = dataDir + '/' + datafile
		data = LoadData(datafiledir, 'train')
		labels = data['targets_train']
		assert labels.shape[1] == 2
		labels = labels[:,1]
		if allLabels.shape == (0,0):
			allLabels = labels
		else:
			allLabels = np.hstack((allLabels,labels))
	return allLabels
Ejemplo n.º 11
0
def FilterImage(trainingSetFile, img):
	# Function is used to test out filters
	data = LoadData(trainingSetFile, 'train')
	inputs = data['inputs_train']
	numSamples = inputs.shape[0]
	numFeatures = np.prod(inputs.shape[1:])
	myImage = inputs[img]

#========= ndimage sobel edge detector
	myImageGray = _rgb2gray(myImage)
	sx = ndimage.sobel(myImageGray, axis=0, mode='constant')
	sy = ndimage.sobel(myImageGray, axis=1, mode='constant')
	sob = np.hypot(sx, sy)
	ShowImage(sob)
Ejemplo n.º 12
0
def DoAutoEncoder(dataDir, listOfFiles):
	# Initialize
	d_file = dataDir + '/' + listOfFiles[0]
	d = LoadData(d_file, 'train')
	inputs = d['inputs_train']
	losses = np.empty((0,))
	numSamples = inputs.shape[0]
	numFeatures = np.prod(inputs.shape[1:])
	autoencoder, encoder, decoder = _initAutoEncoder(numFeatures, 100)
	early_stop = EarlyStopping(min_delta=0.00000001, patience=2, mode='min')

	# Do training
	for i in range(10):
		print '=============== Epoch %d ===============' % i
		for j in range(len(listOfFiles)):
			d_file = dataDir + '/' + listOfFiles[j]
			d = LoadData(d_file, 'train')
			inputs = d['inputs_train']
			assert numSamples == inputs.shape[0]
			assert numFeatures == np.prod(inputs.shape[1:])
			inputs = inputs.reshape(numSamples, numFeatures)
			rnd_idx = np.arange(inputs.shape[0])
			np.random.shuffle(rnd_idx)
			inputs = inputs[rnd_idx]
			hist = autoencoder.fit(inputs, inputs, batch_size=10, shuffle=True, \
									 nb_epoch=10, validation_split=0.2, callbacks=[early_stop])
			losses = np.hstack((losses, hist.history['loss']))

	np.savez('loss', loss=losses)
	plt.plot(losses)
	plt.show()

	print "Saving model..."
	autoencoder.save('my_model.h5')

	_drawReconstructions(encoder, decoder, dataDir, listOfFiles, 1)
Ejemplo n.º 13
0
def NetworkUncertainty():
    """
    Plot some examples where the neural network is not confident of the
    classification output (the top score is below some threshold)
    """
    train_X, valid_X, test_X, \
    train_t, valid_t, test_t = LoadData('../toronto_face.npz')
    train_inaccurate = [173]
    valid_inaccurate = [170, 173]
    test_inaccurate = [275]
    print(test_t[275])
    blah = test_X[275].reshape(48, 48)
    plt.imshow(blah, cmap=plt.cm.gray)
    plt.show()
    plt.savefig('./plots/inaccuracy/test_' + str(275) + '.png')
Ejemplo n.º 14
0
def main():
    """Trains a NN."""
    model_fname = 'nn_model.npz'
    stats_fname = 'nn_stats.npz'

    # Hyper-parameters. Modify them if needed.
    num_hiddens = [16, 32]
    eps = 0.01
    momentum = 0.5
    num_epochs = 1000
    batch_size = 100

    # Input-output dimensions.
    num_inputs = 2304
    num_outputs = 7

    # Initialize model.
    model = InitNN(num_inputs, num_hiddens, num_outputs)

    # Uncomment to reload trained model here.
    # model = Load(model_fname)

    # Check gradient implementation.
    print('Checking gradients...')
    x = np.random.rand(10, 48 * 48) * 0.1
    CheckGrad(model, NNForward, NNBackward, 'W3', x)
    CheckGrad(model, NNForward, NNBackward, 'b3', x)
    CheckGrad(model, NNForward, NNBackward, 'W2', x)
    CheckGrad(model, NNForward, NNBackward, 'b2', x)
    CheckGrad(model, NNForward, NNBackward, 'W1', x)
    CheckGrad(model, NNForward, NNBackward, 'b1', x)

    # Train model.
    stats = Train(model, NNForward, NNBackward, NNUpdate, eps, momentum,
                  num_epochs, batch_size)

    print(model['W1'].shape)
    inputs_train, inputs_valid, inputs_test, target_train, target_valid, \
        target_test = LoadData('toronto_face.npz')

    ip = inputs_test[16:30, :]
    print(ip.T.shape)
    ShowMeans(ip.T, 5)
Ejemplo n.º 15
0
def StandardizeData(dataDir, listOfFiles):
	# Standardizes data to have 0 mean and 1 variance
	print "Scaling data..."
	scaler = StandardScaler()
	for i in range(0, len(listOfFiles)):
		print listOfFiles[i]
		datafiledir = dataDir + '/' + listOfFiles[i]
		data = LoadData(datafiledir, 'train')
		inputs = data['inputs_train']
		targets = data['targets_train']
		numSamples = inputs.shape[0]
		numFeatures = np.prod(inputs.shape[1:])

		assert numSamples == targets.shape[0]
		targets = targets[:,1]
		inputs = inputs.reshape(numSamples, numFeatures)
		# Normalize data
		scaler.partial_fit(inputs, y=targets)
	return scaler
Ejemplo n.º 16
0
def make_data_for_prepro():
    accuracys = []
    training_sett, train_set_labelts, validation_set, validation_set_labels = LoadData(
        'labeled_images.mat', True, True)
    # training_set, train_set_labels, idst = LoadData('labeled_images.mat', True, False)
    # kknn_class = KNeighborsClassifier(weights='distance', n_neighbors=5)
    # logistic_regression_solver = sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.001, C=1.2, fit_intercept=True,
    #                                                                          intercept_scaling=1, class_weight=None, random_state=None, solver='newton-cg',
    #                                                                          max_iter=200, multi_class='ovr', verbose=0, warm_start=False, n_jobs=2)
    # svm_class = svm.SVC(kernel='rbf', C=50, shrinking = False,decision_function_shape='ovr', tol=0.001, max_iter=-1)

    standard_train_inputs = standard_data(training_sett)
    standard_valid_inputs = standard_data(validation_set)

    fixed_train_set = fix_pixels(training_sett)
    fixed_valid = fix_pixels(validation_set)

    # garbored_train_set = gabor_filter(training_sett)
    # garbored_valid_set = gabor_filter(validation_set)

    data_list = [(training_sett, validation_set),
                 (standard_train_inputs, standard_valid_inputs),
                 (fixed_train_set, fixed_valid)
                 ]  #,(garbored_train_set,garbored_valid_set)]
    for (t, v) in data_list:

        # accuracys.append(knn(t, train_set_labelts, v, validation_set_labels, False))
        # accuracys.append(logistic_regression(t,train_set_labelts , v, validation_set_labels, False))
        # accuracys.append(run_svm(t, train_set_labelts, v, validation_set_labels, False))
        net_clf = net_class(t, train_set_labelts, v, validation_set_labels,
                            False)
        net_preds = []
        for in_data in v:
            net_preds.append(net_clf.activate(in_data))
        accuracys.append(get_acc(net_preds, validation_set_labels, True))
        print "done iter"

    create_csv(accuracys, 'barplot_pre_accuracy.csv')
    fig = plt.figure()
    ax = fig.add_subplot(111)
    barplot_preprocess(ax, accuracys)
Ejemplo n.º 17
0
def DoIncrementalPCA(dataDir, listOfFiles, scaler, numComponents=10):
	# Do PCA on data
	print "Doing PCA..."
	ipca = IncrementalPCA(n_components=numComponents, batch_size=10)
	for i in range(0, len(listOfFiles)):
		print listOfFiles[i]
		datafiledir = dataDir + '/' + listOfFiles[i]
		data = LoadData(datafiledir, 'train')
		inputs = data['inputs_train']
		targets = data['targets_train']
		numSamples = inputs.shape[0]
		numFeatures = np.prod(inputs.shape[1:])

		assert numSamples == targets.shape[0]
		targets = targets[:,1]
		inputs = inputs.reshape(numSamples, numFeatures)
		# Normalize data
		inputs = scaler.transform(inputs)
		ipca.partial_fit(inputs, y=targets)

	return ipca, ipca.explained_variance_ratio_
Ejemplo n.º 18
0
def DoNN(dataDir, listOfFiles):
	# First convert all the labels using one-hot encoding
	allLabels = _getAllLabels(dataDir, listOfFiles)
	oneHotLabels = _convertAllLabels(allLabels)

	# Defaults: lbfgs, 200 batch, RELU, constant learning rate,
	#			max_iter=200, tol=1e-4
	i = 0
	clf = MLPClassifier(hidden_layer_sizes=(100,), warm_start=True)
	for datafile in listOfFiles:
		print "Training on", datafile
		datafiledir = dataDir + '/' + datafile

		# Load data
		data = LoadData(datafiledir, 'train')
		inputs = data['inputs_train']
		assert len(inputs.shape) == 4

		# Flatten the inputs so it's 1D
		num_samples = inputs.shape[0]
		num_features = inputs.shape[1] * inputs.shape[2] * inputs.shape[3]
		flatInputs = inputs.reshape(num_samples, num_features)

		# Get correct subset of labels
		start = i * num_samples
		end = i * num_samples + num_samples
		labels = oneHotLabels[start:end,:]
		assert labels.shape[0] == num_samples
		assert num_samples == 1000 # Hard code for now

		# Debugger
		#name = 'labels_' + str(i) + '.out'
		#np.savetxt(name, oneHotLabels[start:end,:], delimiter=',')

		# Fit
		flatInpEnc = MultiLabelBinarizer().fit_transform(flatInputs)
		clf.fit(flatInputs, flatInpEnc)
		i += 1
Ejemplo n.º 19
0
def main(debug=0):

    dataDir = 'Data/NPZ_data'
    listOfTrainingSetFiles = ['train_1_1000.npz', 'train_1001_2000.npz', \
           'train_2001_3000.npz', 'train_3001_4000.npz', \
           'train_4001_5000.npz', 'train_5001_6000.npz', \
           'train_6001_7000.npz']
    gist_data_file = "Data/gist_data"
    labelsFile = "Data/train.csv"

    # dt = DataTable('Data/val', 'Data/train.csv', 'train')
    # dt.processImages()
    # dt = DataTable('Data/test_128', 'Data/train.csv', 'test')
    # dt.processImages()

    # listOfTrainingSetFiles = ['train_6001_7000.npz']

    # Get the distribution of training set labels
    #PlotHistogramOfLabels(dataDir, listOfTrainingSetFiles)

    # Train Model
    # print "Loading Data"
    # images = scio.loadmat(gist_data_file)['gist']
    # _,labels = GetAllLabels(labelsFile)

    # Get VGG16 features
    # model = Model()
    # VGG16 = model.VGG16_extract()
    # for file in listOfTrainingSetFiles:
    # images, labels = LoadAllTrainData('Data/NPZ_data/', [file], "224_")
    # images = images.astype(np.float32)
    # for i in [0,100,200,300,400,500,600,700,800,900]:
    # feature = VGG16.predict(preprocess_input(images[i:i+100]))
    # np.savez_compressed("4096_{}_{}".format(file,i), inputs_train=feature, targets_train=[-1])
    # testImages = LoadData('Data/NPZ_data/224_test_1_2000.npz', 'test')
    # testImages =  testImages['inputs_test'].astype(np.float32)
    # for i in [0,100,200,300,400,500,600,700,800,900]:
    # feature = VGG16.predict(preprocess_input(testImages[i:i+100]))
    # np.savez_compressed("4096_224_test_1_970_{}.npz".format(i), inputs_test=feature)
    # feature = VGG16.predict(preprocess_input(testImages[i+1000:i+1100]))
    # np.savez_compressed("4096_224_test_1_970_{}.npz".format(i+1000), inputs_test=feature)

    # for file in listOfTrainingSetFiles:
    # allImages = np.empty(shape=(0,0))
    # for i in [0,100,200,300,400,500,600,700,800,900]:
    # data = LoadData("4096_{}_{}.npz".format(file,i), 'train')
    # images = data['inputs_train']
    # if allImages.shape == (0,0):
    # allImages = images
    # else:
    # allImages = np.concatenate((allImages,images))
    # print "Saving ", file
    # print allImages.shape
    # np.savez_compressed("VGG16_"+file, inputs_train=allImages, targets_train=[-1])

    # for file in ['4096_224_test_1_970_{}.npz']:
    # allImages = np.empty(shape=(0,0))
    # for i in [0,100,200,300,400,500,600,700,800,900,1000,1100,1200,1300,1400,1500,1600,1700,1800,1900]:
    # data = LoadData(file.format(i), 'test')
    # images = data['inputs_test']
    # if allImages.shape == (0,0):
    # allImages = images
    # else:
    # allImages = np.concatenate((allImages,images))
    # print "Saving ", file
    # print allImages.shape
    # np.savez_compressed("VGG16_test_1_970.npz", inputs_test=allImages)

    # print "Processing Data"
    # images, _ = LoadAllTrainData('Data/NPZ_data/', listOfTrainingSetFiles, "VGG16_")
    # data = dict()
    # images = images.reshape(-1, 7*7*512)
    # data["inputs_train"] = images[:-88].astype(np.float32)
    # data["targets_train"] = labels[:-88].astype(np.float32)
    # data["inputs_val"] = images[-88:].astype(np.float32)
    # data["targets_val"] = labels[-88:].astype(np.float32)
    # # model = Model(batchSize=128, trainingIterations=100, kernel_1=[5, 5, 3, 32],kernel_2 = [5, 5, 32, 64],linear_hidden_size = 256)
    # model = Model(batchSize=128, trainingIterations=1621,linear_hidden_size = 256)
    # model.createModelVGGTop()
    # model.train(data)

    # # Find incorrect validation data
    # listOfTrainingSetFiles = ['train_6001_7000.npz']
    # images, _ = LoadAllTrainData('Data/NPZ_data/', listOfTrainingSetFiles, "VGG16_")
    # data = dict()
    # print(images[0,:,:,0])
    # test = np.mean(images,axis=(1,2,3))
    # print test.shape
    # np.savetxt("./test.txt", test, delimiter='\n')
    # images = images.reshape(-1, 7*7*512)
    # data["inputs_train"] = images[:-88].astype(np.float32)
    # data["targets_train"] = labels[:-88].astype(np.float32)
    # data["inputs_val"] = images[-88:].astype(np.float32)
    # data["targets_val"] = labels[-88:].astype(np.float32)
    # model = Model(batchSize=128, trainingIterations=5401,linear_hidden_size = 256)
    # model.createModelVGGTop()
    # model.inference("./results/tmp/valid/checkpoint-3240",data["inputs_train"][0:100])
    # print(np.argmax(data["targets_train"][0:100], 1))

    # testImages = LoadData('Data/NPZ_data/VGG16_test_1_970.npz', 'test')
    # testImages = testImages["inputs_test"]
    # print(testImages[0,:,:,0])

    # # Test VGG16
    # listOfTrainingSetFiles = ['train_6001_7000.npz']
    # images, _ = LoadAllTrainData('Data/NPZ_data/', listOfTrainingSetFiles, "VGG16_")
    # testImages = LoadData('Data/NPZ_data/VGG16_test_1_2000.npz', 'test')
    # testImages = testImages['inputs_test'].reshape(-1,7*7*512)
    # images = testImages[0:100]
    # # ShowImage(images[1000],"Test")
    # images = images.reshape(-1,7*7*512)
    # for i in range(10):
    # ShowImage(data["inputs_val"][i,:,:,:],"before_{}".format(i))
    # test = np.array([imresize(data["inputs_val"][0],(224,224),interp="lanczos")])
    # print(images[-88])
    # print test.shape
    # for i,img in enumerate(data["inputs_val"]):
    # if i==0:
    # continue
    # test = np.concatenate((test,[imresize(img,(224,224),interp="lanczos")]))
    # test = test.astype(np.float32)
    # for i in range(10):
    # ShowImage(test[i,:,:,:],i)
    # print(test[0])
    # print test.shape
    # model = Model()
    # # VGG16 = model.VGG16_vanilla()
    # VGG16 = model.VGG16_test()
    # preds = VGG16.predict(images)
    # print(decode_predictions(preds))

    # Get inference results
    testImages = LoadData('Data/NPZ_data/VGG16_test_1_2000.npz', 'test')
    testImages = testImages['inputs_test'].reshape(-1, 7 * 7 * 512)
    model = Model(linear_hidden_size=512)
    model.createModelVGGTop()
    model.inference(
        "./results/VGG16/VGGTop_Hidden_512_WD_0.0/valid/checkpoint-1620",
        testImages)
Ejemplo n.º 20
0
def Train(model, forward, backward, update, eps, momentum, num_epochs,
          batch_size, network, num_hiddens):
    """Trains a simple MLP.

    Args:
        model:           Dictionary of model weights.
        forward:         Forward prop function.
        backward:        Backward prop function.
        update:          Update weights function.
        eps:             Learning rate.
        momentum:        Momentum.
        num_epochs:      Number of epochs to run training for.
        batch_size:      Mini-batch size, -1 for full batch.

    Returns:
        stats:           Dictionary of training statistics.
            - train_ce:       Training cross entropy.
            - valid_ce:       Validation cross entropy.
            - train_acc:      Training accuracy.
            - valid_acc:      Validation accuracy.
    """
    inputs_train, inputs_valid, inputs_test, target_train, target_valid, \
        target_test = LoadData('../toronto_face.npz')
    rnd_idx = np.arange(inputs_train.shape[0])
    
    train_ce_list = []
    valid_ce_list = []
    train_acc_list = []
    valid_acc_list = []
    v = { }
    v['W1'], v['W2'] , v['W3'] = np.zeros(model['W1'].shape), np.zeros(model['W2'].shape), np.zeros(model['W3'].shape)
    v['b1'], v['b2'] , v['b3'] = np.zeros(model['b1'].shape), np.zeros(model['b2'].shape), np.zeros(model['b3'].shape)  
    num_train_cases = inputs_train.shape[0]
    if batch_size == -1:
        batch_size = num_train_cases
    num_steps = int(np.ceil(num_train_cases / batch_size))
    for epoch in range(num_epochs):
        np.random.shuffle(rnd_idx)
        inputs_train = inputs_train[rnd_idx]
        target_train = target_train[rnd_idx]
        for step in range(num_steps):
            # Forward prop.
            start = step * batch_size
            end = min(num_train_cases, (step + 1) * batch_size)
            x = inputs_train[start: end]
            t = target_train[start: end]

            var = forward(model, x)
            prediction = Softmax(var['y'])
            #print(prediction)
            train_ce = -np.sum(t * np.log(prediction)) / x.shape[0]
            train_acc = (np.argmax(prediction, axis=1) ==
                         np.argmax(t, axis=1)).astype('float').mean()

            #print(('Epoch {:3d} Step {:2d} Train CE {:.5f} '
            #       'Train Acc {:.5f}').format(
            #    epoch, step, train_ce, train_acc))

            # Compute error.
            error = (prediction - t) / x.shape[0]
            
            # Backward prop.
            backward(model, error, var)

            # Update weights.
            update(model, eps, momentum, v)
#        for i in range(len(prediction)):
#            if np.all( prediction[i] > 0.1 ):
#                plt.matshow( x[i].reshape(48,48) ,fignum = np.argmax(prediction[i]) , cmap=plt.cm.gray)
#                plt.show()    
        
        valid_ce, valid_acc = Evaluate(
            inputs_valid, target_valid, model, forward, batch_size=batch_size)
        print(('Epoch {:3d} '
               'Validation CE {:.5f} '
               'Validation Acc {:.5f}\n').format(
            epoch, valid_ce, valid_acc))
        train_ce_list.append((epoch, train_ce))
        train_acc_list.append((epoch, train_acc))
        valid_ce_list.append((epoch, valid_ce))
        valid_acc_list.append((epoch, valid_acc))
        

    #DisplayPlot1(train_ce_list, valid_ce_list, 'Cross Entropy', network,'CE' , num_hiddens , number=0)
    #DisplayPlot1(train_acc_list, valid_acc_list, 'Accuracy', network, 'AC', num_hiddens , number=1)    
    #DisplayPlot(train_ce_list, valid_ce_list, 'Cross Entropy',network,'CE' , eps , momentum, batch_size , number=0)
    #DisplayPlot(train_acc_list, valid_acc_list, 'Accuracy',network, 'AC', eps , momentum, batch_size , number=1)
    
    
    var = forward(model, inputs_train)
    prediction = Softmax(var['y'])

    
    print(prediction.shape)
    print(target_train.shape)
    print(np.argmax(prediction, axis=1)+1)
    print(max(np.argmax(target_train, axis=1)))
    
    
    print()
    train_ce, train_acc = Evaluate(
        inputs_train, target_train, model, forward, batch_size=batch_size)
    valid_ce, valid_acc = Evaluate(
        inputs_valid, target_valid, model, forward, batch_size=batch_size)
    test_ce, test_acc = Evaluate(
        inputs_test, target_test, model, forward, batch_size=batch_size)

    print('CE: Train %.5f Validation %.5f Test %.5f' %
          (train_ce, valid_ce, test_ce))
    print('Acc: Train {:.5f} Validation {:.5f} Test {:.5f}'.format(
        train_acc, valid_acc, test_acc))

    stats = { 
        'train_ce': train_ce_list,
        'valid_ce': valid_ce_list,
        'train_acc': train_acc_list,
        'valid_acc': valid_acc_list
    }
    
#    if prediction < 0.2:
#        plt.matshow(model['W1'][:,:,0,i] , cmap=plt.cm.gray)
#        plt.show()
#    train_acc = (np.argmax(prediction, axis=1) ==
#                         np.argmax(t, axis=1)).astype('float')
    
    return model, stats 
Ejemplo n.º 21
0
def q2():
    # Question 4.2 and 4.3
    K = 7
    iters = 10
    minVary = 0.01
    randConst = 100.0

    # load data
    inputs_train, inputs_valid, inputs_test, target_train, target_valid, target_test = LoadData(
        '../toronto_face.npz')

    # Train a MoG model with 7 components on all training data, i.e., inputs_train,
    # with both original initialization and kmeans initialization.

    p, mu, vary, log_likelihood = mogEM(inputs_train, K, iters, randConst, minVary)
    ShowMeans(mu, 1)
    ShowMeans(vary, 2)
    print  p
Ejemplo n.º 22
0
def main():
    """Trains a NN."""
    model_fname = 'nn_model.npz'
    stats_fname = 'nn_stats.npz'

    # Hyper-parameters. Modify them if needed.
    num_hiddens = [16, 32]  #16
    #    eps = 0.01
    #    momentum = 0.0
    #    num_epochs = 1000
    #    batch_size = 100
    eps = 0.1
    momentum = 0.9
    num_epochs = 100
    batch_size = 100

    # Input-output dimensions.
    num_inputs = 2304
    num_outputs = 7

    # Initialize model.
    model = InitNN(num_inputs, num_hiddens, num_outputs)

    # Uncomment to reload trained model here.
    #    model = Load(model_fname) #load a trained model
    inputs_train, inputs_valid, inputs_test, target_train, target_valid, \
    target_test = LoadData('toronto_face.npz')

    #    inputs_train1, inputs_valid1, inputs_test1, target_train1, target_valid1, \
    #    target_test1 = LoadData('test.npz')

    #    img = Image.open("testImage.png")
    #    arr = np.array(img)
    #    np.savez("test", arr)
    #    inputs_train1 = arr.T/255

    #Uncomment to plot low softmax probabilities (high uncertainity images)
    #    prediction_stats = NNForward(model, inputs_test)
    #    prediction = Softmax(prediction_stats['y'])
    #    plot_uncertain_images(inputs_test, target_test, prediction)

    #    prediction_stats = NNForward(model, inputs_train1)
    #    prediction = Softmax(prediction_stats['y'])
    #    plot_uncertain_images(inputs_train1, target_test, prediction)

    # Check gradient implementation.
    print('Checking gradients...')

    x = np.random.rand(10, 48 * 48) * 0.1
    CheckGrad(model, NNForward, NNBackward, 'W3', x)
    CheckGrad(model, NNForward, NNBackward, 'b3', x)
    CheckGrad(model, NNForward, NNBackward, 'W2', x)
    CheckGrad(model, NNForward, NNBackward, 'b2', x)
    CheckGrad(model, NNForward, NNBackward, 'W1', x)
    CheckGrad(model, NNForward, NNBackward, 'b1', x)

    # Train model.
    stats = Train(model, NNForward, NNBackward, NNUpdate, eps, momentum,
                  num_epochs, batch_size)

    # Uncomment if you wish to save the model.
    Save(model_fname, model)
Ejemplo n.º 23
0
def main():
    K = 7
    iters = 20
    inputs_train, inputs_valid, inputs_test, target_train, target_valid, target_test = LoadData(
        '../toronto_face.npz')
    means = KMeans(inputs_train, K, iters)
    ShowMeans(means, 0)
def main():
    """Trains a NN."""
    model_fname = 'nn_model.npz'
    stats_fname = 'nn_stats.npz'

    # Hyper-parameters. Modify them if needed.
    num_hiddens = [16, 8]
    eps = 0.01
    momentum = 0.8
    num_epochs = 250
    batch_size = 100

    # Input-output dimensions.
    num_inputs = 2304
    num_outputs = 7

    # Initialize model.
    model = InitNN(num_inputs, num_hiddens, num_outputs)

    # Uncomment to reload trained model here.
    model = Load('nn_model.npz')
    inputs_train, inputs_valid, inputs_test, target_train, target_valid, target_test = LoadData(
        '../toronto_face.npz')
    var = NNForward(model, inputs_train[144, :])
    y = var['y']
    y = Softmax(y)
    t = target_train[144, :]
    PlotProbabilities(y, t)

    # Check gradient implementation.
    print('Checking gradients...')
    x = np.random.rand(10, 48 * 48) * 0.1
    CheckGrad(model, NNForward, NNBackward, 'W3', x)
    CheckGrad(model, NNForward, NNBackward, 'b3', x)
    CheckGrad(model, NNForward, NNBackward, 'W2', x)
    CheckGrad(model, NNForward, NNBackward, 'b2', x)
    CheckGrad(model, NNForward, NNBackward, 'W1', x)
    CheckGrad(model, NNForward, NNBackward, 'b1', x)
Ejemplo n.º 25
0
    lX = standard_data(X)
    for vd, vt in zip(lX, Y):
        vtarr = [int(i == vt - 1) for i in range(0, 7)]
        vds.addSample(vd, vtarr)
    ttrainer = BackpropTrainer(nnet,
                               vds,
                               learningrate=0.005,
                               momentum=0,
                               weightdecay=0.05,
                               batchlearning=False,
                               verbose=True)
    ttstresult = percentError(ttrainer.testOnClassData(), Y)

    print " Classification rate for the trained Neural net is: %5.2f%%" % (
        100 - ttstresult)
    res_f.close()
    return ttrainer.testOnClassData()


if __name__ == '__main__':

    ttraining_set, ttrain_set_labels, validation_set, validation_set_labels = LoadData(
        'labeled_images.mat', True, True)
    training_set, train_set_labels, ids = LoadData('labeled_images.mat', True,
                                                   False)
    # net_class(training_set, train_set_labels, validation_set, validation_set_labels)
    net_class(ttraining_set, ttrain_set_labels, validation_set,
              validation_set_labels)

    # load_net_and_check_errorate(validation_set, validation_set_labels)
Ejemplo n.º 26
0
def _DoAutoEncoder(dataDir, listOfFiles):
	# Get first dataset
	print listOfFiles[0]
	datafiledir = dataDir + '/' + listOfFiles[0]
	data = LoadData(datafiledir, 'train')
	inputs = data['inputs_train']
	numSamples = inputs.shape[0]
	numFeatures = np.prod(inputs.shape[1:])

	# Flatten inputs
	inputs = inputs.reshape(numSamples, numFeatures)

	# Initialize model
	autoencoder, encoder, decoder = _initAutoEncoder(numFeatures, 100)

	# Fit first data set
	early_stop = EarlyStopping(min_delta=0.000001)
	hist = autoencoder.fit(inputs, inputs, batch_size=20, shuffle=True, \
							 nb_epoch=70, validation_split=0.2, callbacks=[early_stop])
	tmp = np.array(hist.history['loss'])
	print (hist.history['loss'])

	# Save model
	#autoencoder.save('my_model_test.h5')

	# Plot loss
	plt.figure()
	plt.plot(tmp)
	plt.show()

	# Fit the rest
	for i in range(1,len(listOfFiles)):
	#	autoencoder = load_model('my_model_test.h5')
		print listOfFiles[i]
		datafiledir = dataDir + '/' + listOfFiles[i]
		data = LoadData(datafiledir, 'train')
		inputs = data['inputs_train']
		assert numSamples == inputs.shape[0]
		assert numFeatures == np.prod(inputs.shape[1:])
		inputs = inputs.reshape(numSamples, numFeatures)
		hist = autoencoder.fit(inputs, inputs, batch_size=20, shuffle=True, \
								 nb_epoch=70, validation_split=0.2, callbacks=[early_stop])
		tmp = np.hstack((tmp, hist.history['loss']))
		print(hist.history['loss'])
		plt.figure()
		plt.plot(tmp)
		plt.show()

	np.savez('loss', loss=tmp)

	print "Saving model..."
	autoencoder.save('my_model_test.h5')

	# Draw first image
	datafiledir = dataDir + '/' + listOfFiles[0]
	data = LoadData(datafiledir, 'train')
	inputs = data['inputs_train']
	inputsFlat = inputs.reshape(numSamples, numFeatures)
	firstImage = inputsFlat[0,:]
	print firstImage.shape
	firstImage = firstImage.reshape(1,numFeatures)
	encoded_img = encoder.predict(firstImage)
	decoded_img = decoder.predict(encoded_img)
	decoded_img = decoded_img.reshape(1,128,128,3)
	ShowImage(decoded_img[0])
Ejemplo n.º 27
0
def main():
    inputs_train, inputs_valid, inputs_test, target_train, target_valid, target_test = LoadData(
        '../toronto_face.npz')
    find_uncertain(inputs_test, 33, 33, 'NN')
Ejemplo n.º 28
0
    l2a = tf.nn.relu(
        tf.nn.conv2d(l1, w2, strides=[1, 1, 1, 1], padding='SAME') + b2)

    l2 = tf.nn.max_pool(l2a,
                        ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1],
                        padding='SAME')

    l3 = tf.reshape(l2, [-1, w3.get_shape().as_list()[0]])

    pyx = tf.matmul(l3, w3) + b3
    return pyx


inputs_train, inputs_valid, inputs_test, target_train, target_valid, \
    target_test = LoadData('data/toronto_face.npz')

dataDim = {'h': 48, 'w': 48, 'c': 1}
nfilters = [8, 16]
fsize = 5
num_outpus = 7
eps = 0.001
num_epochs = 40
batch_size = 100

# HxWxC input img
trX = inputs_train.reshape(-1, dataDim['h'], dataDim['w'], dataDim['c'])
trY = target_train

valX = inputs_valid.reshape(-1, dataDim['h'], dataDim['w'], dataDim['c'])
valY = target_valid
Ejemplo n.º 29
0
def Train(model, forward, backward, update, eps, momentum, num_epochs,
          batch_size):
    """Trains a simple MLP.

    Args:
        model:           Dictionary of model weights.
        forward:         Forward prop function.
        backward:        Backward prop function.
        update:          Update weights function.
        eps:             Learning rate.
        momentum:        Momentum.
        num_epochs:      Number of epochs to run training for.
        batch_size:      Mini-batch size, -1 for full batch.

    Returns:
        stats:           Dictionary of training statistics.
            - train_ce:       Training cross entropy.
            - valid_ce:       Validation cross entropy.
            - train_acc:      Training accuracy.
            - valid_acc:      Validation accuracy.
    """
    inputs_train, inputs_valid, inputs_test, target_train, target_valid, \
        target_test = LoadData('../toronto_face.npz')
    rnd_idx = np.arange(inputs_train.shape[0])
    train_ce_list = []
    valid_ce_list = []
    train_acc_list = []
    valid_acc_list = []
    num_train_cases = inputs_train.shape[0]
    if batch_size == -1:
        batch_size = num_train_cases
    num_steps = int(np.ceil(num_train_cases / batch_size))
    for epoch in range(num_epochs):
        np.random.shuffle(rnd_idx)
        inputs_train = inputs_train[rnd_idx]
        target_train = target_train[rnd_idx]
        for step in range(num_steps):
            # Forward prop.
            start = step * batch_size
            end = min(num_train_cases, (step + 1) * batch_size)
            x = inputs_train[start:end]
            t = target_train[start:end]

            var = forward(model, x)
            prediction = Softmax(var['y'])

            train_ce = -np.sum(t * np.log(prediction)) / x.shape[0]
            train_acc = (np.argmax(prediction, axis=1) == np.argmax(
                t, axis=1)).astype('float').mean()
            print(('Epoch {:3d} Step {:2d} Train CE {:.5f} '
                   'Train Acc {:.5f}').format(epoch, step, train_ce,
                                              train_acc))

            # Compute error.
            error = (prediction - t) / x.shape[0]

            # Backward prop.
            backward(model, error, var)

            # Update weights.
            update(model, eps, momentum)

        valid_ce, valid_acc = Evaluate(inputs_valid,
                                       target_valid,
                                       model,
                                       forward,
                                       batch_size=batch_size)
        print(('Epoch {:3d} '
               'Validation CE {:.5f} '
               'Validation Acc {:.5f}\n').format(epoch, valid_ce, valid_acc))
        train_ce_list.append((epoch, train_ce))
        train_acc_list.append((epoch, train_acc))
        valid_ce_list.append((epoch, valid_ce))
        valid_acc_list.append((epoch, valid_acc))
        DisplayPlot(train_ce_list, valid_ce_list, 'Cross Entropy', number=0)
        DisplayPlot(train_acc_list, valid_acc_list, 'Accuracy', number=1)

    print()
    train_ce, train_acc = Evaluate(inputs_train,
                                   target_train,
                                   model,
                                   forward,
                                   batch_size=batch_size)
    valid_ce, valid_acc = Evaluate(inputs_valid,
                                   target_valid,
                                   model,
                                   forward,
                                   batch_size=batch_size)
    test_ce, test_acc = Evaluate(inputs_test,
                                 target_test,
                                 model,
                                 forward,
                                 batch_size=batch_size)
    print('CE: Train %.5f Validation %.5f Test %.5f' %
          (train_ce, valid_ce, test_ce))
    print('Acc: Train {:.5f} Validation {:.5f} Test {:.5f}'.format(
        train_acc, valid_acc, test_acc))

    stats = {
        'train_ce': train_ce_list,
        'valid_ce': valid_ce_list,
        'train_acc': train_acc_list,
        'valid_acc': valid_acc_list
    }

    return model, stats
Ejemplo n.º 30
0
def main():
    a = np.array([[[1], [2]], [[2], [3]], [[3], [2]], [[4], [3]]])
    inputs_train, inputs_valid, inputs_test, target_train, target_valid, \
        target_test = LoadData('../toronto_face.npz')
    print(inputs_train.shape)
    print(a.shape)