def test(depth, p, dataset, num_epochs=200, seed=None):
    if seed is None:
        seed = 0

    np.random.seed(seed)

    data = None
    if dataset == "mnist":
        data = mnist.load().astype(np.float32)
    elif dataset == "cifar10":
        data = cifar10.load().astype(np.float32)

    num_observations, input_dim = data.shape
    data_split_index = int(num_observations * 0.9)
    training_data_iterator = DataIterator(batch_size, data[:data_split_index],
                                          data[:data_split_index])
    validation_data_iterator = DataIterator(batch_size,
                                            data[data_split_index:],
                                            data[data_split_index:])

    # make net
    net = Network(input_dim, input_dim, hidden_layers=([
        1000,
    ] * depth), p=p)
    losses = net.train(training_data_iterator,
                       validation_data_iterator,
                       num_epochs=num_epochs)
    net.close()

    return losses
def create_train_test_split(digits,
                            n_testimages,
                            transpose_trainig=True,
                            transpose_test=False):
    # load mnist data
    x_train, y_train, x_test, y_test = mnist.load()

    # pick the images of the first digit from training and test data
    d1_train = pick_digit_from_data(x_train, y_train, digits[0])
    d1_test = pick_digit_from_data(x_test, y_test, digits[0])[0:n_testimages]

    # pick the images of the second digit from training and test data
    d2_train = pick_digit_from_data(x_train, y_train, digits[1])
    d2_test = pick_digit_from_data(x_test, y_test, digits[1])[0:n_testimages]

    # Transpose the data if parameter are set to True
    if transpose_trainig:
        d1_train = d1_train.T
        d2_train = d2_train.T

    if transpose_test:
        d1_test = d1_test.T
        d2_test = d2_test.T

    return d1_train, d1_test, d2_train, d2_test
예제 #3
0
def process_data(task, train_size=60000, test_size=10000, val_perc=0.1):
    '''
    Creates the datasets to be used in the logistic regression task.
    '''
    if task == 'logistic_regression':
        excluded = [0, 1, 4, 5, 6, 7, 8, 9]
        true_class = 2
    X_train, Y_train, X_test, Y_test = mnist.load()

    train_data, test_data = (X_train, Y_train), (X_test, Y_test)
    train_data, test_data = partition_dataset(train_data, test_data,
                                              train_size, test_size)
    for digit in excluded:
        train_data, test_data = remove_digit(train_data, test_data, digit)

    train_data, val_data = create_validation_set(train_data, val_perc)
    train_data = normalize_data(train_data)
    val_data = normalize_data(val_data)
    test_data = normalize_data(test_data)
    train_data = append_ones(train_data)
    val_data = append_ones(val_data)
    test_data = append_ones(test_data)

    if task == 'logistic_regression':
        train_data = binary_class(train_data, true_class)
        val_data = binary_class(val_data, true_class)
        test_data = binary_class(test_data, true_class)
    return train_data, val_data, test_data
예제 #4
0
def load_binary_dataset(class1: int, class2: int):
    """
    Loads, prunes and splits the dataset into train, and validation.
    """
    train_size = 20000
    val_size = 10000
    X_train, Y_train, X_val, Y_val = mnist.load()

    # First 20000 images from train set
    X_train, Y_train = X_train[:train_size], Y_train[:train_size]
    # Last 2000 images from test set
    X_val, Y_val = X_val[:val_size], Y_val[:val_size]
    X_train, Y_train = binary_prune_dataset(
        class1, class2, X_train, Y_train
    )
    X_val, Y_val = binary_prune_dataset(
        class1, class2, X_val, Y_val
    )
    # Reshape to (N, 1)
    Y_train = Y_train.reshape(-1, 1)
    Y_val = Y_val.reshape(-1, 1)

    print(f"Train shape: X: {X_train.shape}, Y: {Y_train.shape}")
    print(f"Validation shape: X: {X_val.shape}, Y: {Y_val.shape}")

    return X_train, Y_train, X_val, Y_val
예제 #5
0
def get_mnist():
    """
    Load the MNIST data
    """
    mnist.init()
    x_train, t_train, x_test, t_test = mnist.load()
    print("Loaded MNIST data")
    return x_train, t_train, x_test, t_test
예제 #6
0
def main():
    training_data, validation_data, test_data = mnist.load()
    net = mlp.MLP([784,36,10])
    epochs = 500
    mini_batch_size = 10
    learning_rate = 0.5
    lmbda = 5.0
    drop_prob = 0.5
    net.sgd(training_data, epochs, mini_batch_size, test_data, learning_rate, lmbda, drop_prob)
예제 #7
0
def main():
    training_data, validation_data, test_data = mnist.load()
    net = mlp.MLP([784, 36, 10])
    epochs = 500
    mini_batch_size = 10
    learning_rate = 0.5
    lmbda = 5.0
    drop_prob = 0.5
    net.sgd(training_data, epochs, mini_batch_size, test_data, learning_rate,
            lmbda, drop_prob)
예제 #8
0
def load_data():
    mnist = {}
    mnist["training_images"], mnist["training_labels"], mnist[
        "test_images"], mnist["test_labels"] = load()
    mnist["training_images"] = mnist["training_images"].reshape(
        (60000, 1, 28, 28))
    mnist["test_images"] = mnist["test_images"].reshape((10000, 1, 28, 28))
    mnist["training_labels"] = one_hot(mnist["training_labels"])
    mnist["test_labels"] = one_hot(mnist["test_labels"])
    return mnist
예제 #9
0
def get_mnist(num_train):
    x_train, y_train, x_test, y_test = mnist.load()
    x_train, y_train = shuffle(x_train, y_train)
    x_train, y_train = x_train[:num_train], y_train[:num_train]
    y_train = one_hot(y_train, 10)
    y_test = one_hot(y_test, 10)
    x_train, x_val, y_train, y_val = train_test_split(x_train,
                                                      y_train,
                                                      test_size=.3)
    return x_train, y_train, x_val, y_val, x_test, y_test
예제 #10
0
def test_feedforward_network_n2048(benchmark):
    global training_data
    if not training_data:
        training_data = mnist.load("train")
    net = feedforward.Network([28 * 28, 2048, 10])

    @benchmark
    def train():
        net.stochastic_gradient_descent(training_data,
                                        1000,
                                        epochs=10,
                                        mini_batch_size=10,
                                        learning_rate=3.0)
예제 #11
0
def init(digit1, digit2, num_train, num_test):
    # Download dataset
    if not os.path.isfile("mnist.pkl"):
        mnist.init()

    # Load whole dataset into memory
    x_train, t_train, x_test, t_test = mnist.load()

    # Subset training data
    if num_train > 0:
        indices1 = [
            i for i, j in enumerate(t_train)
            if ((j == digit1) or (j == digit2))
        ]
        x = x_train[indices1, :]
        y = t_train[indices1]
        y = np.cast[int](y)
        y[y == digit1] = -1
        y[y == digit2] = 1
        ind1 = np.random.choice(np.arange(y.size), num_train)
        x = x[ind1, :]
        y = y[ind1]
    else:
        x = None
        y = None

    # Subset test data
    if num_test > 0:
        indices2 = [
            i for i, j in enumerate(t_test) if ((j == digit1) or (j == digit2))
        ]
        xtest = x_test[indices2, :]
        ytest = t_test[indices2]
        ytest = np.cast[int](ytest)
        ytest[ytest == digit1] = -1
        ytest[ytest == digit2] = 1
        ind2 = np.random.choice(np.arange(ytest.size), num_test)
        xtest = xtest[ind2, :]
        ytest = ytest[ind2]
    else:
        xtest = None
        ytest = None

    # Return
    return (x, y, xtest, ytest)
예제 #12
0
def format():
    traindata, trainlabels, testdata, testlabels = mnist.load()

    trainlabels = list(trainlabels)

    # label data and scale from (0,255) to (0,1)
    trainlabeled = [[], [], [], [], [], [], [], [], [], []]
    for i in range(len(traindata)):
        trainlabeled[trainlabels[i]].append(traindata[i] / 255)

    testlabels = list(testlabels)

    # label data and scale from (0,255) to (0,1)
    testlabeled = [[], [], [], [], [], [], [], [], [], []]
    for i in range(len(testdata)):
        testlabeled[testlabels[i]].append(testdata[i] / 255)

    return trainlabeled, testlabeled
예제 #13
0
def main():
    if len(sys.argv) == 1:
        print("Ejecuta: python3 clustering.py [n] [p].")
        return

    n_proj = int(sys.argv[1])
    p = float(sys.argv[2])

    x_train, t_train, _, _ = mnist.load()
    n, m = 360, 784
    data = np.zeros((n, m), dtype='float')
    labels = np.zeros((n, 1), dtype='int')
    cnt, idx = 0, 0
    while cnt < 300:
        if t_train[idx] == 3 or t_train[idx] == 8 or t_train[idx] == 9:
            data[cnt, :] = (x_train[idx, :] / LA.norm(x_train[idx, :], 1))
            labels[cnt] = t_train[idx]
            cnt += 1
        idx += 1
    while cnt < 360:
        if t_train[idx] != 3 and t_train[idx] != 8 and t_train[idx] != 9:
            data[cnt, :] = (x_train[idx, :] / LA.norm(x_train[idx, :], 1))
            labels[cnt] = t_train[idx]
            cnt += 1
        idx += 1

    W_PCA = optimization.PCA(data, n_proj)

    data_proj_PCA = data.dot(W_PCA)
    data_mean = np.zeros((data.shape[1]), dtype='float')
    for i in range(data.shape[1]):
        data_mean[i] = np.mean(data[:, i])

    gm = optimization.gen_mean(data, data_mean, p)
    _, W_PCAGM = optimization.PCAGM(data, gm, n_proj, W_PCA, p)
    data_proj_PCAGM = data.dot(W_PCAGM)

    k = 3
    prec_1 = make_clustering(data_proj_PCA, labels, 3)
    prec_2 = make_clustering(data_proj_PCAGM, labels, 3)

    print("Precision de la clasificacion (PCA): ", prec_1)
    print("Precision de la clasificacion (PCA GM): ", prec_2)
예제 #14
0
def load_full_mnist():
    """
    Loads and splits the dataset into train, validation and test.
    """
    train_size = 20000
    test_size = 10000
    X_train, Y_train, X_val, Y_val = mnist.load()

    # First 20000 images from train set
    X_train, Y_train = X_train[:train_size], Y_train[:train_size]
    # Last 2000 images from test set
    X_val, Y_val = X_val[-test_size:], Y_val[-test_size:]
    # Reshape to (N, 1)
    Y_train = Y_train.reshape(-1, 1)
    Y_val = Y_val.reshape(-1, 1)

    print(f"Train shape: X: {X_train.shape}, Y: {Y_train.shape}")
    print(f"Validation shape: X: {X_val.shape}, Y: {Y_val.shape}")

    return X_train, Y_train, X_val, Y_val
예제 #15
0
def load_split_tasks(n):
    tc = 3000
    vc = 1000
    nclass = 10
    ds, vds = mnist.load(shuffle=False, train_count=tc, val_count=vc)
    sds = []
    for c in range(nclass):
        sds.append((ds[tc * c:tc * (c + 1)], vds[vc * c:vc * (c + 1)]))
    np.random.shuffle(sds)
    tasks = []
    for g in range(0, nclass, n):
        cds = sds[g:g + n]
        if len(cds) == n:
            ct, cv = zip(*cds)
            ct = np.array([i for j in ct for i in j])
            cv = np.array([i for j in cv for i in j])
            print ct.shape, cv.shape
            tasks.append((ct, cv))
            x, y = map(np.array, zip(*ct))
    return tasks
예제 #16
0
def total_mmv():
    traindata, _, _, _ = mnist.load()

    scale = []
    for t in traindata:
        scale.append(t / 255)

    mmv_total = {
        'mean': np.array([]),
        'median': np.array([]),
        'var': np.array([]),
        'std': np.array([])
    }

    mmv_total['mean'] = np.mean(scale, axis=0)
    mmv_total['median'] = np.median(scale, axis=0)
    mmv_total['var'] = np.var(scale, axis=0)
    mmv_total['std'] = np.std(scale, axis=0)

    return mmv_total
def create_train_test_split(digits,
                            n_testimages,
                            transpose_trainig=True,
                            transpose_test=False):

    x_train, y_train, x_test, y_test = mnist.load()

    d1_train = pick_digit_from_data(x_train, y_train, digits[0])
    d1_test = pick_digit_from_data(x_test, y_test, digits[0])[0:n_testimages]

    d2_train = pick_digit_from_data(x_train, y_train, digits[1])
    d2_test = pick_digit_from_data(x_test, y_test, digits[1])[0:n_testimages]

    if transpose_trainig:
        d1_train = d1_train.T
        d2_train = d2_train.T

    if transpose_test:
        d1_test = d1_test.T
        d2_test = d2_test.T

    return d1_train, d1_test, d2_train, d2_test
예제 #18
0
def load_full_mnist(val_percentage: float):
    """
    Loads and splits the dataset into train, validation and test.
    """
    train_size = 20000
    test_size = 2000
    X_train, Y_train, X_test, Y_test = mnist.load()

    # First 20000 images from train set
    X_train, Y_train = X_train[:train_size], Y_train[:train_size]
    # Last 2000 images from test set
    X_test, Y_test = X_test[-test_size:], Y_test[-test_size:]
    # Reshape to (N, 1)
    Y_train = Y_train.reshape(-1, 1)
    Y_test = Y_test.reshape(-1, 1)
    X_train, Y_train, X_val, Y_val = train_val_split(X_train, Y_train,
                                                     val_percentage)
    print(f"Train shape: X: {X_train.shape}, Y: {Y_train.shape}")
    print(f"Validation shape: X: {X_val.shape}, Y: {Y_val.shape}")
    print(f"Test shape: X: {X_test.shape}, Y: {Y_test.shape}")

    return X_train, Y_train, X_val, Y_val, X_test, Y_test
예제 #19
0
def load_binary_dataset(class1: int, class2: int, val_percentage: float):
    """
    Loads, prunes and splits the dataset into train, validation and test.
    """
    train_size = 20000
    test_size = 2000
    X_train, Y_train, X_test, Y_test = mnist.load()

    # First 20000 images from train set
    X_train, Y_train = X_train[:train_size], Y_train[:train_size]
    # Last 2000 images from test set
    X_test, Y_test = X_test[-test_size:], Y_test[-test_size:]
    X_train, Y_train = binary_prune_dataset(class1, class2, X_train, Y_train)
    X_test, Y_test = binary_prune_dataset(class1, class2, X_test, Y_test)
    # Reshape to (N, 1)
    Y_train = Y_train.reshape(-1, 1)
    Y_test = Y_test.reshape(-1, 1)
    X_train, Y_train, X_val, Y_val = train_val_split(X_train, Y_train,
                                                     val_percentage)
    print(f"Train shape: X: {X_train.shape}, Y: {Y_train.shape}")
    print(f"Validation shape: X: {X_val.shape}, Y: {Y_val.shape}")
    print(f"Test shape: X: {X_test.shape}, Y: {Y_test.shape}")

    return X_train, Y_train, X_val, Y_val, X_test, Y_test
예제 #20
0
def mnist_preprocess(data):
    data['data'] /= 255.
    return data

# Logger setup
logger = Logger('MNIST AE',
                train_log_mode='TRAIN_LOSS_ONLY',
                test_log_mode='TEST_LOSS_ONLY')

# Configure GPU Device
if args.gpu >= 0:
    cuda.check_cuda_available()
xp = cuda.cupy if args.gpu >= 0 else np

# loading dataset
dataset = mnist.load()
dim = dataset['train']['data'][0].size
N_train = len(dataset['train']['target'])
N_test = len(dataset['test']['target'])
train_data_dict = {'data':dataset['train']['data'].reshape(N_train, dim).astype(np.float32)}
test_data_dict = {'data':dataset['test']['data'].reshape(N_test, dim).astype(np.float32)}
train_data = DataFeeder(train_data_dict, batchsize=args.batch)
test_data = DataFeeder(test_data_dict, batchsize=args.valbatch)

train_data.hook_preprocess(mnist_preprocess)
test_data.hook_preprocess(mnist_preprocess)


# Model Setup
h_units = 1200
model = models.AutoencoderModel(
예제 #21
0
파일: task4.py 프로젝트: larsXYZ/TDT4265_A1
import mnist
import numpy as np
import one_hot_encoding as ohe
import matplotlib.pyplot as plt
import pickle

#Data settings
training_data_size = 55000
validation_data_size = 4000
testing_data_size = 1000

#Loading data from MNIST dataset
X_train, Y_train, X_test, Y_test = mnist.load()

#Reducing values to between 0 - 1
X_train = X_train/255
X_test = X_test/255

#Performing the "bias trick"
X_train = np.concatenate((X_train,np.ones([60000,1])), axis=1)
X_test = np.concatenate((X_test,np.ones([10000,1])), axis=1)

#Selecting training data and validation data
training_data_input = X_train[0:training_data_size,:].copy()
training_data_output = Y_train[0:training_data_size].copy()
validation_data_input = X_train[training_data_size:training_data_size+validation_data_size].copy()
validation_data_output = Y_train[training_data_size:training_data_size+validation_data_size].copy()
testing_data_input = X_test[-testing_data_size:].copy()
testing_data_output = Y_test[-testing_data_size:].copy()

#One hot encode the wanted results
# goal: use weights/biases learned in rbm_matlab_minst_general.py to get two things:
# 1. the low dimensional representation of each example
# 2. the reconstruction of each example
# this is basically going to be a translation of the non-backprop part in backprop.m

import numpy as np
import mnist
from rbm_matlab_mnist_general import random_mini_batches
from pylab import imshow, cm, show
import os

## load mnist data, make batches
# read data
x_train, t_train, x_test, t_test = mnist.load()

# scale data
x_train = x_train / 255
x_test = x_test / 255

# batch input data
batchdata = random_mini_batches(
    x_train, mini_batch_size=100)  # list of batches of input data
numbatches = len(batchdata)

## load weights
home = os.getenv('HOME')
vishid = np.load(home + '/Deep_Learning_Examples/RBM/vishid.npy')
hidrecbiases = np.load(home + '/Deep_Learning_Examples/RBM/hidrecbiases.npy')
visbiases = np.load(home + '/Deep_Learning_Examples/RBM/visbiases.npy')
hidpen = np.load(home + '/Deep_Learning_Examples/RBM/hidpen.npy')
penrecbiases = np.load(home + '/Deep_Learning_Examples/RBM/penrecbiases.npy')
예제 #23
0
#     ax.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
#     ax.set_title('Training: %i' % label)

# flatten the images
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))
X_train2, X_test2, y_train2, y_test2 = train_test_split(data,
                                                        digits.target,
                                                        test_size=0.5,
                                                        shuffle=False)
# print(X_train2[1])
# print(y_train2[1])
# print(X_test2[1])
# print(y_test2[1])

X_train, y_train, X_test, y_test = mnist.load()
X_train = (X_train[1:limit, :] / 16.0).astype(numpy.uint8).astype(
    numpy.float64)
X_test = (X_test[1:limit, :] / 16.0).astype(numpy.uint8).astype(numpy.float64)
y_train = y_train[1:limit]
y_test = y_test[1:limit]

# print(X_train[2])
# print(y_train[2])
# print(X_test[1])
# print(y_test)

# Create a classifier: a support vector classifier
clf = svm.SVC(gamma='scale')

# Learn the digits on the train subset
예제 #24
0
파일: Task3.py 프로젝트: SByfuglien/TDT4265
 def __init__(self):
     self.X_train, self.Y_train, self.X_test, self.Y_test = mnist.load()
     self.X_val = None
     self.Y_val = None
     self.X_train_vanilla = self.X_train
     self.Y_train_vanilla = self.Y_train
예제 #25
0
def vectorize(i: int) -> List:
    vec = np.zeros((10, 1))
    vec[i] = 1
    return vec


epochs = 1
mini_batch_size = 20
eta = 0.4
NN_struct = []
input_layer = 784
output_layer = 10
hidden_layers = [100]
NN_struct.append(input_layer)
for layer in hidden_layers:
    NN_struct.append(layer)
NN_struct.append(output_layer)
train_images, train_labels, test_images, test_labels = load()
train_labels = np.array([vectorize(i) for i in train_labels])
test_labels = np.array([vectorize(i) for i in test_labels])
train_data = np.array(list(zip(train_images, train_labels)))
test_data = np.array(list(zip(test_images, test_labels)))
network = Network([784, 30, 10])
network.train_SGD(train_data, epochs, mini_batch_size,
                  eta, test_data=test_data)
print(f"Saving object via pickle with parameters:\nepochs - {epochs}\n"
      f"minibatch size - {mini_batch_size}\neta - {eta}")
network.save(f"SNN_{epochs}_{mini_batch_size}_{eta}_HL_{hidden_layers}.pkl")
print("Done.")
예제 #26
0
파일: classif.py 프로젝트: vivien98/AdvML
import torch.nn.functional as F
import torch.nn as nn
import torch
from torch import optim
import mnist
import numpy as np

trainingData, trainingLabels, testData, testLabels = mnist.load()
trainingData = trainingData / 255 > 0.5
testData = testData / 255 > 0.5
trainingData = trainingData.astype(float)
testData = testData.astype(float)

trainingSize = 60000
actualTrainingSize = 50000
validSize = 10000
testSize = 10000
numEpochs = 10


class regressionClassifier(nn.Module):
    def __init__(self):
        super(regressionClassifier, self).__init__()
        self.output = nn.Linear(784, 10)

    def forward(self, x):
        x = self.output(x)
        print(str(x.size()))
        #x = F.softmax(x,dim = 1 )
        return x  #nn.LogSoftmax(dim1)
예제 #27
0
파일: train.py 프로젝트: hunse/mnist-nengo
    # architecture two
    shapes = [(28, 28), 1000, 500, 200]
    rf_shapes = [(9, 9), None, None]
    rates = [1., 1., 1.]

n_layers = len(shapes) - 1
assert len(rf_shapes) == n_layers
assert len(rates) == n_layers

# --- define our rate neuron model
neuron = ('softlif', dict(
    sigma=0.01, tau_rc=0.02, tau_ref=0.002, gain=1, bias=1, amp=1. / 63.04))
neuron_fn = neurons.get_theano_fn(*neuron)

# --- load the data
train, valid, test = mnist.load(
    normalize=True, shuffle=True, spaun=args.spaun)
train_images, test_images = train[0], test[0]

# --- pretrain with SGD backprop
n_epochs = 15
batch_size = 100

deep = DeepAutoencoder()
data = train_images
for i in range(n_layers):
    vis_func = None if i == 0 else neuron_fn

    # create autoencoder for the next layer
    auto = Autoencoder(
        shapes[i], shapes[i+1], rf_shape=rf_shapes[i],
        vis_func=vis_func, hid_func=neuron_fn)
예제 #28
0
# coding: utf-8

import numpy as np
import pickle
import mnist
import func
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

from two_layer_net import TwoLayerNet

(x_train, t_train), (x_test, t_test) = mnist.load(normalize=True,
                                                  one_hot_label=True)

iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)

net = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
예제 #29
0
파일: view.py 프로젝트: hunse/mnist-nengo
    args = parser.parse_args()

    if not os.path.exists(args.loadfile):
        raise IOError("Cannot find '%s'" % args.loadfile)

    data = np.load(args.loadfile)
    if all(a in data for a in ['weights', 'biases', 'Wc', 'bc']):
        # Static network params file
        if 'neuron' in data:
            _, neuron_params = data['neuron']
        else:
            neuron_params = dict(sigma=0.01, tau_rc=0.02, tau_ref=0.002,
                                 gain=1, bias=1, amp=1. / 63.04)

        # --- load the testing data
        _, _, [images, labels] = mnist.load(
            normalize=True, shuffle=True, spaun=args.spaun)
        assert np.unique(labels).size == data['bc'].size

        # --- compute the error
        neuron = ('softlif', dict(neuron_params))
        errors = compute_static_error(data, images, labels, neuron)
        print("----- Static network with softlif -----")
        print("Static error: %0.2f%%" % (100 * errors.mean()))

        neuron = ('lif', dict(neuron_params))
        neuron[1].pop('sigma')
        errors = compute_static_error(data, images, labels, neuron)
        print("----- Static network with lif -----")
        print("Static error: %0.2f%%" % (100 * errors.mean()))
        view_static(data, images, labels, neuron)
예제 #30
0
        random_state=None,
        verbose=True,
        stopping_criterion=None,  # 'edv', 'tie'
        edv_threshold=0.25,
        tie_threshold=0.25,
        #=======================================================================
        # sparse=True,
        #=======================================================================
        sparse=False,
        minimum_sparseness=0.25,
        maximum_sparseness=0.75,
        early_stopping=False,
        validation_fraction=0.1,
        tol=0.0001,
        n_iter_no_change=10,
        metric='rmse',
        prob_skip_connection=0.0)  # 0.35

    return estimator


if __name__ == '__main__':

    scale = True
    X_train, y_train, X_test, y_test = load(scale)

    for run in range(1, 1 + 1):
        print('MNIST: SLM run', run)
        estimator = get_estimator()
        fit_and_predict(estimator, X_train, y_train, X_test, y_test)
예제 #31
0
parser.add_argument('--max_epochs', type=int, default=1000)
parser.add_argument('--batch_size', type=int, default=32)
parser.add_argument('output_dir', type=str)
parser.add_argument('network_structure', type=str)
args = parser.parse_args()

os.makedirs(args.output_dir)

# Set up the network:
print "Setting up network..."

dimensions = [28*28] + [int(x) for x in args.network_structure.split("x")] + [10]
net = models.BatchTrainedModel(models.perceptron_model(dimensions))

print "Loading MNIST..."
(train_x, train_y), (test_x, test_y) = mnist.load()
print "Done Loading MNIST."

print "%d training examples" % train_x.shape[0]

print "Training..."
graph_f = open("%s/graph.tsv" % args.output_dir, "w")
for i in xrange(args.max_epochs+1):
  print "Ran for", i, "epochs"
  net.write("%s/epoch_%04d.hdf5" % (args.output_dir, i))
  train_error = net.error_rate(train_x, train_y)
  test_error = net.error_rate(test_x, test_y)
  graph_f.write("%d\t%f\t%f\n" % (i, train_error, test_error))
  graph_f.flush()
  print "Train Error rate =", train_error
  print "Test Error rate =", test_error
예제 #32
0
import mnist
import numpy as np
import NeuralNetwork as nn
import random
import pygame
import math

# Get data from MNIST
# NOTE: I did not write the code to retrieve the data, code taken from:
# https://github.com/hsjeong5/MNIST-for-Numpy
imgTrain, lblTrain, imgTest, lblTest = mnist.load()

# Initialize input, hidden and output layers
il = np.zeros((784, 1))
hl1 = np.zeros((50, 1))
hl2 = np.zeros((50, 1))
middleLayer = np.zeros((2, 1))
hl3 = np.zeros((50, 1))
hl4 = np.zeros((50, 1))
ol = np.zeros((784, 1))

layers = [il, hl1, middleLayer, hl4, ol]

# Initializes the network, loading the weights and biases from the files
#
network = nn.NeuralNetwork(layers,
                           learningRate=0.0003,
                           weightImportFile="EncoderWeights.txt",
                           biasImportFile="EncoderBiases.txt")

예제 #33
0
def load_data():
    global training_data, test_data
    training_data, test_data = mnist.load()
def train(input_dim=INPUT_DIM,
          batch_size=BATCH_SIZE,
          n_features_first=N_FEATURES_FIRST,
          critic_iters=CRITIC_ITERS,
          lambda_reg=LAMBDA,
          learning_rate=1e-4,
          iterations=ITERS,
          fixed_noise_size=FIXED_NOISE_SIZE,
          n_features_reduction_factor=2,
          gen_fix_layer_1=False,
          gen_fix_layer_2=False,
          gen_fix_layer_3=False,
          gen_fix_layer_4=False,
          disc_fix_layer_1=False,
          disc_fix_layer_2=False,
          disc_fix_layer_3=False,
          disc_fix_layer_4=False,
          architecture='DCGAN',
          init_method='He',
          BN_layers_trainable=True,
          load_saved=True):
    """
    - this is the function to use to train a GAN model for MNIST, with the configuration given by the parameters
    - the function computes losses and auto-saves the model every 100 steps and automatically resumes training where it
    stopped (when load_saved=True)

    :param input_dim:
    :param batch_size:
    :param n_features_first:
    :param critic_iters:
    :param lambda_reg:
    :param learning_rate:
    :param iterations:
    :param fixed_noise_size:
    :param n_features_reduction_factor: integer, e.g.: 1: use same number of feature-maps everywhere, 2: half the number
           of feature-maps in every step
    :param architecture: right now only supports 'WGANGP' and 'DCGAN', defaults to 'DCGAN'
    :param init_method: the method with which the variables are initialized, support: 'uniform', 'normal',
           'truncated_normal' (each using std given by xavier initializer), 'normal1', 'truncated_normal1' (each using
           std 1), 'normal_BN', 'uniform_BN', 'normal_BN_shift', 'He', defaults to 'He'
    :param BN_layers_trainable: shall the BN layers be trainable
    :param load_saved:
    :return:
    """

    # -------------------------------------------------------
    # setting for sending emails and getting statistics
    send = settings.send_email
    get_stats = settings.get_statistics

    # -------------------------------------------------------
    # architecture default
    if architecture not in ['WGANGP']:
        architecture = 'DCGAN'
    if architecture == 'DCGAN':
        lambda_reg = None

    # -------------------------------------------------------
    # init_method default
    if init_method not in [
            'normal', 'truncated_normal', 'normal1', 'truncated_normal1',
            'normal_BN', 'uniform_BN', 'normal_BN_shift', 'He',
            'LayerDistribution'
    ]:
        init_method = 'uniform'

    # -------------------------------------------------------
    # create unique folder name
    dir1 = 'partly_fixed2/'
    directory = dir1+str(input_dim)+'_'+str(batch_size)+'_'+str(n_features_first)+'_'+str(critic_iters)+'_'+\
                str(lambda_reg)+'_'+str(learning_rate)+'_'+str(n_features_reduction_factor)+'_'+\
                str(gen_fix_layer_1)+'_'+str(gen_fix_layer_2)+'_'+str(gen_fix_layer_3)+'_'+str(gen_fix_layer_4)+'_' + \
                str(disc_fix_layer_1) + '_' + str(disc_fix_layer_2) + '_' + str(disc_fix_layer_3) + '_' + \
                str(disc_fix_layer_4) + '_' + \
                str(architecture)+'_'+str(init_method)+'_'+str(BN_layers_trainable)+'_'+str(BN)+'/'
    samples_dir = directory + 'samples/'
    model_dir = directory + 'model/'

    # create directories if they don't exist
    if not os.path.isdir(dir1):
        call(['mkdir', dir1])

    if not os.path.isdir(directory):
        load_saved = False
        print 'make new directory:', directory
        print
        call(['mkdir', directory])
        call(['mkdir', samples_dir])
        call(['mkdir', model_dir])

    # if directories already exist, but model wasn't saved so far, set load_saved to False
    if 'training_progress.csv' not in os.listdir(directory):
        load_saved = False

    # -------------------------------------------------------
    # initialize a TF session
    config = tf.ConfigProto()
    if N_CPUS_TF is None:
        number_cpus_tf = settings.number_cpus
    else:
        number_cpus_tf = N_CPUS_TF
    config.intra_op_parallelism_threads = number_cpus_tf
    config.inter_op_parallelism_threads = number_cpus_tf
    session = tf.Session(config=config)

    # -------------------------------------------------------
    # convenience function to build the model
    def build_model(gen_fix_layer_1_b=gen_fix_layer_1,
                    gen_fix_layer_2_b=gen_fix_layer_2,
                    gen_fix_layer_3_b=gen_fix_layer_3,
                    gen_fix_layer_4_b=gen_fix_layer_4,
                    disc_fix_layer_1_b=disc_fix_layer_1,
                    disc_fix_layer_2_b=disc_fix_layer_2,
                    disc_fix_layer_3_b=disc_fix_layer_3,
                    disc_fix_layer_4_b=disc_fix_layer_4):
        with tf.name_scope('placeholders'):
            x_true = tf.placeholder(tf.float32, [None, 28, 28, 1])
            z = tf.placeholder(tf.float32, [None, input_dim])

        x_generated = generator(
            z,
            n_features_first=n_features_first,
            n_features_reduction_factor=n_features_reduction_factor,
            fix_layer_1=gen_fix_layer_1_b,
            fix_layer_2=gen_fix_layer_2_b,
            fix_layer_3=gen_fix_layer_3_b,
            fix_layer_4=gen_fix_layer_4_b,
            architecture=architecture,
            init_method=init_method)

        d_true = discriminator(
            x_true,
            reuse=False,
            n_features_first=n_features_first,
            n_features_reduction_factor=n_features_reduction_factor,
            fix_layer_1=disc_fix_layer_1_b,
            fix_layer_2=disc_fix_layer_2_b,
            fix_layer_3=disc_fix_layer_3_b,
            fix_layer_4=disc_fix_layer_4_b,
            architecture=architecture,
            init_method=init_method)

        d_generated = discriminator(
            x_generated,
            reuse=True,
            n_features_first=n_features_first,
            n_features_reduction_factor=n_features_reduction_factor,
            fix_layer_1=disc_fix_layer_1_b,
            fix_layer_2=disc_fix_layer_2_b,
            fix_layer_3=disc_fix_layer_3_b,
            fix_layer_4=disc_fix_layer_4_b,
            architecture=architecture,
            init_method=init_method)

        if architecture == 'DCGAN':
            with tf.name_scope('loss'):
                g_loss = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(
                        logits=d_generated, labels=tf.ones_like(d_generated)))
                d_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_generated,
                                                                                labels=tf.zeros_like(d_generated))) +\
                         tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_true,
                                                                                labels=tf.ones_like(d_true)))
                d_loss = d_loss / 2.

            with tf.name_scope('g_optimizer'):
                g_optimizer = tf.train.AdamOptimizer(learning_rate=2 *
                                                     learning_rate,
                                                     beta1=0.5)

                g_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           scope='generator')
                # make BN layers trainable or not, depending on BN_layers_trainable
                g_vars2 = []
                not_to_include = []
                if gen_fix_layer_1_b:
                    not_to_include += [
                        'generator/fully_connected/BatchNorm/beta:0'
                    ]
                if gen_fix_layer_2_b:
                    not_to_include += [
                        'generator/Conv2d_transpose/BatchNorm/beta:0'
                    ]
                if gen_fix_layer_3_b:
                    not_to_include += [
                        'generator/Conv2d_transpose_1/BatchNorm/beta:0'
                    ]
                if disc_fix_layer_1_b:
                    not_to_include += ['discriminator/Conv/BatchNorm/beta:0']
                if disc_fix_layer_2_b:
                    not_to_include += ['discriminator/Conv_1/BatchNorm/beta:0']
                if disc_fix_layer_3_b:
                    not_to_include += ['discriminator/Conv_2/BatchNorm/beta:0']
                for v in g_vars:
                    if v.name not in not_to_include:
                        g_vars2 += [v]
                if not BN_layers_trainable:
                    g_vars = g_vars2
                g_train = g_optimizer.minimize(g_loss, var_list=g_vars)
            with tf.name_scope('d_optimizer'):
                d_optimizer = tf.train.AdamOptimizer(learning_rate=2 *
                                                     learning_rate,
                                                     beta1=0.5)

                d_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           scope='discriminator')
                # make BN layers trainable or not, depending on BN_layers_trainable
                d_vars2 = []
                for v in d_vars:
                    if v.name not in not_to_include:
                        d_vars2 += [v]
                if not BN_layers_trainable:
                    d_vars = d_vars2
                d_train = d_optimizer.minimize(d_loss, var_list=d_vars)

        else:  # WGAN-GP
            with tf.name_scope('regularizer'):
                epsilon = tf.random_uniform([batch_size, 1, 1, 1], 0.0, 1.0)
                x_hat = epsilon * x_true + (1 - epsilon) * x_generated

                d_hat = discriminator(
                    x_hat,
                    reuse=True,
                    n_features_first=n_features_first,
                    n_features_reduction_factor=n_features_reduction_factor,
                    fix_layer_1=disc_fix_layer_1_b,
                    fix_layer_2=disc_fix_layer_2_b,
                    fix_layer_3=disc_fix_layer_3_b,
                    fix_layer_4=disc_fix_layer_4_b,
                    architecture=architecture,
                    init_method=init_method)

                gradients = tf.gradients(d_hat, x_hat)[0]
                ddx = tf.sqrt(tf.reduce_sum(gradients**2, axis=[1, 2]))
                d_regularizer = tf.reduce_mean((ddx - 1.0)**2)

            with tf.name_scope('loss'):
                g_loss = -tf.reduce_mean(d_generated)
                wasserstein_dist = tf.reduce_mean(d_true) - tf.reduce_mean(
                    d_generated)
                d_loss = -wasserstein_dist + lambda_reg * d_regularizer

            with tf.name_scope('g_optimizer'):
                g_optimizer = tf.train.AdamOptimizer(
                    learning_rate=learning_rate, beta1=0, beta2=0.9)

                g_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           scope='generator')
                g_train = g_optimizer.minimize(g_loss, var_list=g_vars)
            with tf.name_scope('d_optimizer'):
                d_optimizer = tf.train.AdamOptimizer(
                    learning_rate=learning_rate, beta1=0, beta2=0.9)

                d_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           scope='discriminator')
                d_train = d_optimizer.minimize(d_loss, var_list=d_vars)

        # initialize variables using uniform xavier init method, see tensorflow documentation
        session.run(tf.global_variables_initializer())

        if architecture == 'DCGAN':
            return x_true, z, x_generated, g_loss, d_loss, g_train, d_train, g_vars, d_vars
        else:  # WGANGP
            return x_true, z, x_generated, g_loss, wasserstein_dist, d_loss, g_train, d_train, g_vars, d_vars

    # -------------------------------------------------------
    # build the model
    if (init_method in ['uniform', 'He', 'normal']) or load_saved:
        if architecture == 'DCGAN':
            x_true, z, x_generated, g_loss, d_loss, g_train, d_train, g_vars, d_vars = build_model(
            )
        else:  # WGANGP
            x_true, z, x_generated, g_loss, wasserstein_dist, d_loss, g_train, d_train, g_vars, d_vars = build_model(
            )
    else:  # not load_saved and not 'uniform'
        # build model with all variables trainable to be able to change weights
        if architecture == 'DCGAN':
            x_true, z, x_generated, g_loss, d_loss, g_train, d_train, \
            g_vars, d_vars = build_model(False, False, False, False,False, False, False, False)
        else:  # WGANGP
            x_true, z, x_generated, g_loss, wasserstein_dist, d_loss, g_train, \
            d_train, g_vars, d_vars = build_model(False, False, False, False, False, False, False, False)

        # change the weights as wanted
        saver = tf.train.Saver(max_to_keep=1)
        trainable_vars = tf.trainable_variables()
        if get_stats:
            import matplotlib.pyplot as plt
        for v in trainable_vars:
            print 'change weights of: ' + str(v.name)
            weights = session.run(v)
            # if 'BatchNorm' in v.name: #delete
            #     print 'BN weights:' #delete
            #     print weights #delete
            #     print #delete

            if init_method == 'truncated_normal':  # using xavier init method, see tensorflow documentation
                max_abs_val = np.max(np.abs(weights))
                session.run(
                    tf.assign(v,
                              value=tf.truncated_normal(v.shape,
                                                        mean=0.0,
                                                        stddev=max_abs_val /
                                                        np.sqrt(3))))
            elif init_method == 'normal1':
                session.run(
                    tf.assign(v,
                              value=tf.random_normal(v.shape,
                                                     mean=0.0,
                                                     stddev=1.0)))
            elif init_method == 'truncated_normal1':
                session.run(
                    tf.assign(v,
                              value=tf.truncated_normal(v.shape,
                                                        mean=0.0,
                                                        stddev=1.0)))
            elif init_method == 'uniform_BN':
                max_abs_val = np.max(np.abs(weights))
                if 'BatchNorm' in v.name:
                    session.run(
                        tf.assign(v,
                                  value=tf.random_uniform(v.shape,
                                                          minval=-last_val,
                                                          maxval=last_val)))
                last_val = max_abs_val
            elif init_method == 'normal_BN':
                max_abs_val = np.max(np.abs(weights))
                if 'BatchNorm' in v.name:
                    session.run(
                        tf.assign(v,
                                  value=tf.random_normal(v.shape,
                                                         mean=0.0,
                                                         stddev=last_val /
                                                         np.sqrt(3))))
                else:
                    session.run(
                        tf.assign(v,
                                  value=tf.random_normal(v.shape,
                                                         mean=0.0,
                                                         stddev=max_abs_val /
                                                         np.sqrt(3))))
                last_val = max_abs_val
            elif init_method == 'normal_BN_shift':
                max_abs_val = np.max(np.abs(weights))
                if 'BatchNorm' in v.name:
                    session.run(
                        tf.assign(v,
                                  value=tf.random_normal(v.shape,
                                                         mean=-last_val,
                                                         stddev=last_val)))
                else:
                    session.run(
                        tf.assign(v,
                                  value=tf.random_normal(v.shape,
                                                         mean=0.0,
                                                         stddev=max_abs_val /
                                                         np.sqrt(3))))
                last_val = max_abs_val
            elif init_method == 'LayerDistribution':
                if v.name == 'generator/fully_connected/weights:0':
                    session.run(
                        tf.assign(v,
                                  value=tf.random_normal(v.shape,
                                                         mean=0.0,
                                                         stddev=0.037907723)))
                elif v.name == 'generator/Conv2d_transpose/weights:0':
                    session.run(
                        tf.assign(v,
                                  value=tf.random_normal(v.shape,
                                                         mean=-0.007851141,
                                                         stddev=0.034838371)))
                elif v.name == 'generator/Conv2d_transpose_1/weights:0':
                    session.run(
                        tf.assign(v,
                                  value=tf.random_normal(v.shape,
                                                         mean=-0.001966879,
                                                         stddev=0.037020162)))
                elif v.name == 'generator/Conv2d_transpose_2/weights:0':
                    session.run(
                        tf.assign(v,
                                  value=tf.random_normal(v.shape,
                                                         mean=-0.121885814,
                                                         stddev=0.294095486)))
                elif v.name == 'discriminator/Conv/weights:0':
                    session.run(
                        tf.assign(v,
                                  value=tf.random_normal(v.shape,
                                                         mean=-0.005809855,
                                                         stddev=0.044240803)))
                elif v.name == 'discriminator/Conv_1/weights:0':
                    session.run(
                        tf.assign(v,
                                  value=tf.random_normal(v.shape,
                                                         mean=-0.000329115,
                                                         stddev=0.03293338)))
                elif v.name == 'discriminator/Conv_2/weights:0':
                    session.run(
                        tf.assign(v,
                                  value=tf.random_normal(v.shape,
                                                         mean=-0.000697783,
                                                         stddev=0.028810507)))
                elif v.name == 'discriminator/fully_connected/weights:0':
                    session.run(
                        tf.assign(v,
                                  value=tf.random_normal(v.shape,
                                                         mean=0.000849896,
                                                         stddev=0.074863143)))

            if get_stats:
                weights_new = session.run(v)
                f = plt.figure()
                plt.hist(np.reshape(weights_new, newshape=(-1, )),
                         bins=100,
                         density=True)
                f.savefig(fname=directory +
                          v.name.replace('/', '_').replace(':', '') + '.png')
                plt.close(f)

        saver.save(sess=session, save_path=model_dir + 'saved_model')
        print
        print 'weights were initialized with: ' + init_method
        print

        # load new session, so that no conflict with names in the name_scopes
        session.close()
        tf.reset_default_graph()
        session = tf.Session(config=config)

        # load the model with the perturbed weights, but now s.t. the correct variables are trainable
        if architecture == 'DCGAN':
            x_true, z, x_generated, g_loss, d_loss, g_train, d_train, g_vars, d_vars = build_model(
            )
        else:  # WGANGP
            x_true, z, x_generated, g_loss, wasserstein_dist, d_loss, g_train, d_train, g_vars, d_vars = build_model(
            )

        # restore the model with the correctly initialized weights
        saver = tf.train.Saver(max_to_keep=1)
        saver.restore(sess=session, save_path=model_dir + 'saved_model')
        print 'loaded model with weights initialized with: ' + init_method
        print

    # -------------------------------------------------------
    # FK: For saving samples, taken from IWGAN
    fixed_noise = np.random.normal(size=(fixed_noise_size,
                                         input_dim)).astype('float32')

    def generate_image(frame):
        samples = session.run(x_generated, feed_dict={
            z: fixed_noise
        }).squeeze()
        # print samples.shape
        save_images.save_images(samples.reshape((fixed_noise_size, 28, 28)),
                                samples_dir + 'iteration_{}.png'.format(frame))

    # -------------------------------------------------------
    # FK: for saving the model create a saver
    saver = tf.train.Saver(max_to_keep=1)
    iterations_trained = 0
    if architecture == 'DCGAN':
        training_progress = pd.DataFrame(
            data=None, index=None, columns=['iteration', 'time', 'd_loss'])
    else:  # WGAN-GP
        training_progress = pd.DataFrame(
            data=None,
            index=None,
            columns=['iteration', 'time', 'Wasserstein_dist', 'd_loss'])

    # restore the model:
    if load_saved:
        saver.restore(sess=session, save_path=model_dir + 'saved_model')
        iterations_trained = int(np.loadtxt(fname=model_dir +
                                            'iterations.csv'))
        tp_app = pd.read_csv(filepath_or_buffer=directory +
                             'training_progress.csv',
                             index_col=0,
                             header=0)
        training_progress = pd.concat([training_progress, tp_app],
                                      axis=0,
                                      ignore_index=True)
        print 'loaded training progress, and the model, which was already trained for {} iterations'.format(
            iterations_trained)
        print training_progress
        print

    # if the network is already trained completely, set send to false
    if iterations_trained == iterations:
        send = False

    # -------------------------------------------------------
    # FK: print and get model summary
    n_params_gen = model_summary(var_list=g_vars)[0]
    print
    n_params_disc = model_summary(var_list=d_vars)[0]
    print

    # -------------------------------------------------------
    # FK: print model config to file
    model_config = [[
        'input_dim', 'batch_size', 'n_features_first', 'critic_iters',
        'lambda_reg', 'learning_rate', 'fixed_noise_size',
        'n_features_reduction_factor', 'gen_fix_layer_1', 'gen_fix_layer_2',
        'gen_fix_layer_3', 'gen_fix_layer_4', 'disc_fix_layer_1',
        'disc_fix_layer_2', 'disc_fix_layer_3', 'disc_fix_layer_4',
        'architecture', 'init_method', 'BN_layers_trainable',
        'n_trainable_params_gen', 'n_trainable_params_disc'
    ],
                    [
                        input_dim, batch_size, n_features_first, critic_iters,
                        lambda_reg, learning_rate, fixed_noise_size,
                        n_features_reduction_factor, gen_fix_layer_1,
                        gen_fix_layer_2, gen_fix_layer_3, gen_fix_layer_4,
                        disc_fix_layer_1, disc_fix_layer_2, disc_fix_layer_3,
                        disc_fix_layer_4, architecture, init_method,
                        BN_layers_trainable, n_params_gen, n_params_disc
                    ]]
    model_config = np.transpose(model_config)
    model_config = pd.DataFrame(data=model_config)
    model_config.to_csv(path_or_buf=directory + 'model_config.csv')
    print 'saved model configuration'
    print

    # -------------------------------------------------------
    # FK: get the MNIST data loader
    train_gen, dev_gen, test_gen = mnist.load(batch_size, batch_size)

    # create an infinite generator
    def inf_train_gen():
        while True:
            for images, targets in train_gen():
                yield images

    gen = inf_train_gen()

    # -------------------------------------------------------
    # training loop
    print model_config
    print
    t = time.time()  # get start time

    # for average times:
    if get_stats:
        t1s = np.zeros((iterations - iterations_trained))
        t2s = np.zeros((iterations - iterations_trained))
        t3s = np.zeros((iterations - iterations_trained))
        t4s = np.zeros((iterations - iterations_trained))

    for i in xrange(iterations - iterations_trained):
        z_train = np.random.randn(batch_size, input_dim)
        if get_stats:
            tt1 = time.time()
        session.run(g_train, feed_dict={z: z_train})
        if get_stats:
            tt1 = time.time() - tt1

        # loop for critic training
        for j in xrange(critic_iters):
            # FK: insert the following 3 lines s.t. not the same batch is used for all 5 discriminator updates
            if get_stats:
                tt = time.time()
            batch = gen.next()
            images = batch.reshape([-1, 28, 28, 1])
            z_train = np.random.randn(batch_size, input_dim)
            if get_stats:
                print '\ncomputation time to get true batch and random vector: {}'.format(
                    time.time() - tt)
                tt = time.time()
            session.run(d_train, feed_dict={x_true: images, z: z_train})
            if get_stats:
                t1 = time.time() - tt + tt1
                t1s[i] = t1
                print 'computation time to train for 1 iteration (minimize disc and gen one step): t1 = {}'.format(
                    t1)
                tt = time.time()
                session.run(d_loss, feed_dict={x_true: images, z: z_train})
                session.run(g_loss, feed_dict={z: z_train})
                t2 = time.time() - tt
                t2s[i] = t2
                print 'computation time to compute the disc. and gen. loss once: t2 = {}'.format(
                    t2)
                tt = time.time()
                session.run(x_generated, feed_dict={z: z_train})
                t3 = time.time() - tt
                t3s[i] = t3
                print 'computation time to compute x_generated: t3 = {}'.format(
                    t3)
                print 't1/t2 = {}'.format(t1 / t2)
                # list_ = session.run(g_optimizer.compute_gradients(g_loss, var_list=g_vars), feed_dict={z: z_train})
                # print 'number of gradients computed: {}'.format(2*len(list_))

        # print the current iteration
        print('iteration={}/{}'.format(i + iterations_trained + 1, iterations))

        # all 100 steps compute the losses and elapsed times, and generate images
        if (i + iterations_trained) % 100 == 99:
            # get time for last 100 iterations
            elapsed_time = time.time() - t

            # generate sample images from fixed noise
            generate_image(i + iterations_trained + 1)
            print 'generated images'

            # compute and save losses on dev set
            if architecture == 'DCGAN':
                dev_d_loss = []
                for images_dev, _ in dev_gen():
                    images_dev = images_dev.reshape([-1, 28, 28, 1])
                    z_train_dev = np.random.randn(batch_size, input_dim)
                    _dev_d_loss = session.run(d_loss,
                                              feed_dict={
                                                  x_true: images_dev,
                                                  z: z_train_dev
                                              })
                    dev_d_loss.append(_dev_d_loss)
                tp_app = pd.DataFrame(data=[[
                    i + iterations_trained + 1, elapsed_time,
                    np.mean(dev_d_loss)
                ]],
                                      index=None,
                                      columns=['iteration', 'time', 'd_loss'])
                training_progress = pd.concat([training_progress, tp_app],
                                              axis=0,
                                              ignore_index=True)
                training_progress.to_csv(path_or_buf=directory +
                                         'training_progress.csv')
            else:  # WGAN-GP
                dev_W_dist = []
                dev_d_loss = []
                for images_dev, _ in dev_gen():
                    images_dev = images_dev.reshape([-1, 28, 28, 1])
                    z_train_dev = np.random.randn(batch_size, input_dim)
                    _dev_W_dist = session.run(wasserstein_dist,
                                              feed_dict={
                                                  x_true: images_dev,
                                                  z: z_train_dev
                                              })
                    _dev_d_loss = session.run(d_loss,
                                              feed_dict={
                                                  x_true: images_dev,
                                                  z: z_train_dev
                                              })
                    dev_W_dist.append(_dev_W_dist)
                    dev_d_loss.append(_dev_d_loss)
                tp_app = pd.DataFrame(data=[[
                    i + iterations_trained + 1, elapsed_time,
                    np.mean(dev_W_dist),
                    np.mean(dev_d_loss)
                ]],
                                      index=None,
                                      columns=[
                                          'iteration', 'time',
                                          'Wasserstein_dist', 'd_loss'
                                      ])
                training_progress = pd.concat([training_progress, tp_app],
                                              axis=0,
                                              ignore_index=True)
                training_progress.to_csv(path_or_buf=directory +
                                         'training_progress.csv')
            print 'saved training progress'
            print

            # save model
            saver.save(sess=session, save_path=model_dir + 'saved_model')
            # save number of iterations trained
            np.savetxt(fname=model_dir + 'iterations.csv',
                       X=[i + iterations_trained + 1])
            print 'saved model after training iteration {}'.format(
                i + iterations_trained + 1)

            # fix new start time
            t = time.time()

    # average times:
    if get_stats:
        print '\n\naverage times over {} iterations:'.format(
            iterations - iterations_trained)
        print 'computation time to train for 1 iteration (minimize disc and gen one step): t1 = {}'.format(
            np.mean(t1s))
        print 'computation time to compute the disc. and gen. loss once: t2 = {}'.format(
            np.mean(t2s))
        print 'computation time to compute x_generated: t3 = {}'.format(
            np.mean(t3s))
        if architecture == 'WGANGP':
            print 'computation time to compute gradient regularization term: t4 = {}'.format(
                np.mean(t4s))
        print 't1/t2 = {}'.format(np.mean(t1s) / np.mean(t2s))
        print

    # -------------------------------------------------------
    # after training close the session
    session.close()
    tf.reset_default_graph()

    # -------------------------------------------------------
    # when training is done send email
    if send:
        subject = 'GAN (MNIST) partly fixed training finished'
        body = 'to download the results of this model use (in the terminal):\n\n'
        body += 'scp -r [email protected]:/cluster/home/fkrach/MasterThesis/MTCode1/' + directory + ' .'
        files = [
            directory + 'model_config.csv',
            directory + 'training_progress.csv',
            samples_dir + 'iteration_{}.png'.format(iterations)
        ]
        send_email.send_email(subject=subject, body=body, file_names=files)

    return directory