Exemple #1
0
def should_success_calculate_for_multiple_neurons():
    network = MultipleLayersModel([
        Layer(input_dimension=1,
              output_dimension=3,
              activation_function=LinearFunction(),
              weights_initializer=ConstShapeInitializer(
                  np.asarray([[1., 2., 3.]])),
              biases_initializer=ConstShapeInitializer(np.asarray([1., 2.,
                                                                   3.]))),
        Layer(input_dimension=3,
              output_dimension=1,
              activation_function=LinearFunction(2.),
              weights_initializer=ConstShapeInitializer(
                  np.asarray([[1.], [2.], [3.]])),
              biases_initializer=ConstShapeInitializer(np.asarray([1.])))
    ])
    X = np.asarray([[0.], [1.]])
    Y = np.asarray([[0.], [2.]])
    gradient = ApproximateGradient()
    square_error = SquareError()
    network_gradient = gradient(network, X, Y, square_error)
    expected = np.asarray([[
        np.asarray([[224.00000444, 448.0000166, 672.00003605]]),
        np.asarray([344.00000857, 688.0000326, 1032.00007197])
    ],
                           [
                               np.asarray([[568.00002073], [1136.00008012],
                                           [1704.00017987]]),
                               np.asarray([344.00000834])
                           ]])
    equals(expected, network_gradient)
Exemple #2
0
    def test_next_layer(self):
        with self.assertRaises(AssertionError):
            Layer(15).next_layer('')

        x = Layer(1)
        y = Layer(2)
        x.next_layer(y)
        x.next is y
Exemple #3
0
    def test_prev_layer(self):
        with self.assertRaises(AssertionError):
            Layer(15).prev_layer('')

        x = Layer(1)
        y = Layer(2)
        y.prev_layer(x)
        y.prev is x
Exemple #4
0
    def init_model_params(self, dim_x):
        print 'M1 model params initialize'
        dim_z = self.hyper_params['dim_z']
        n_hidden = self.hyper_params['n_hidden'] # [500, 500, 500]
        self.type_px = self.hyper_params['type_px']
        def relu(x): return x*(x>0) + 0.01 * x
        def softplus(x): return T.log(T.exp(x) + 1)
        activation = {'tanh': T.tanh, 'relu': relu, 'softplus': softplus, 'sigmoid': T.nnet.sigmoid, 'none': None}
        nonlinear_q = activation[self.hyper_params['nonlinear_q']]
        nonlinear_p = activation[self.hyper_params['nonlinear_p']]
        if self.type_px == 'bernoulli':
            output_f = activation['sigmoid']
        elif self.type_px == 'gaussian':
            output_f= activation['none']

        # Recognize model
        self.recognize_layers = [Layer((dim_x, n_hidden[0]), function=nonlinear_q)]
        if len(n_hidden) > 1:
            self.recognize_layers += [Layer(shape, function=nonlinear_q) for shape in zip(n_hidden[:-1], n_hidden[1:])]
        self.recognize_mean_layer = Layer((n_hidden[-1], dim_z), function=None)
        self.recognize_log_sigma_layer = Layer((n_hidden[-1], dim_z), function=None, w_zero=True, b_zero=True)


        # Generate Model
        self.generate_layers = [Layer((dim_z, n_hidden[0]), function=nonlinear_p)]
        if len(n_hidden) > 1:
            self.generate_layers += [Layer(shape, function=nonlinear_p) for shape in zip(n_hidden[:-1], n_hidden[1:])]
        self.generate_mean_layer = Layer((n_hidden[-1], dim_x), function=output_f)
        self.generate_log_sigma_layer = Layer((n_hidden[-1], dim_x), function=None, b_zero=True)


        self.model_params_ = (
            [param for layer in self.generate_layers for param in layer.params] +
            self.recognize_mean_layer.params +
            self.recognize_log_sigma_layer.params +
            [param for layer in self.recognize_layers for param in layer.params] +
            self.generate_mean_layer.params
        )

        if self.type_px == 'gaussian':
            self.model_params_ += self.generate_log_sigma_layer.params
Exemple #5
0
def main():
    xor = MLP()
    xor.add_layer(Layer(2))
    xor.add_layer(Layer(2))
    xor.add_layer(Layer(1))

    xor.init_network()

    xor.patterns = [
        ([0, 0], [0]),
        ([0, 1], [1]),
        ([1, 0], [1]),
        ([1, 1], [0]),
    ]

    print xor.train(xor.patterns)
    for inp, target in xor.patterns:
        tolerance = 0.1
        computed = xor.forward(inp)
        error = abs(computed[0] - target[0])
        print 'input: %s target: %s, output: %s, error: %.4f' % (inp,
            target, computed, error)
Exemple #6
0
def should_success_calculate_for_multiple_examples():
    network = MultipleLayersModel([
        Layer(input_dimension=1,
              output_dimension=1,
              activation_function=LinearFunction(),
              weights_initializer=ConstShapeInitializer(np.asarray([[1.]])),
              biases_initializer=ConstShapeInitializer(np.asarray([2.]))),
        Layer(input_dimension=1,
              output_dimension=1,
              activation_function=LinearFunction(2.),
              weights_initializer=ConstShapeInitializer(np.asarray([[3.]])),
              biases_initializer=ConstShapeInitializer(np.asarray([0.])))
    ])
    X = np.asarray([[0.], [1.]])
    Y = np.asarray([[0.], [2.]])
    gradient = ApproximateGradient()
    square_error = SquareError()
    network_gradient = gradient(network, X, Y, square_error)
    expected = np.asarray(
        [[np.asarray([[192.0000359518781]]),
          np.asarray([336.0000719681011])],
         [np.asarray([[288.0000519667192]]),
          np.asarray([112.00000793110121])]])
    equals(expected, network_gradient)
Exemple #7
0
def should_be_success_calculate_output():
    layer = Layer(
        input_dimension=2,
        output_dimension=3,
        activation_function=LinearFunction(),
        weights_initializer=ConstShapeInitializer(
            np.asarray([
                [1., 2., 3.],
                [1., 2., 3.]
            ])
        ),
        biases_initializer=ConstShapeInitializer(
            np.asarray(
                [1., 2., 3.]
            )
        )
    )
    expected = np.asarray(
        [4., 8, 12.]
    )
    equals(expected, layer([1, 2]))
Exemple #8
0
from mlp import MultipleLayersModel, Layer
from initializers import UniformInitializer, ConstShapeInitializer

import numpy as np

__all__ = ['gradient_teacher_test']


def function(x):
    return 2 * x


network = MultipleLayersModel([
    Layer(input_dimension=1,
          output_dimension=1,
          activation_function=LinearFunction(),
          weights_initializer=ConstShapeInitializer(np.asarray([[1.]])),
          biases_initializer=ConstShapeInitializer(np.asarray([2.]))),
    Layer(input_dimension=1,
          output_dimension=1,
          activation_function=LinearFunction(2.),
          weights_initializer=ConstShapeInitializer(np.asarray([[3.]])),
          biases_initializer=ConstShapeInitializer(np.asarray([0.])))
])


def gradient_teacher_test():
    uniform = UniformInitializer(seed=2019)
    inputs = uniform((5, 1))
    outputInitializer = ConstShapeInitializer(
        [function(value) for value in inputs])
Exemple #9
0
    def test_init_weights(self):
        x = Layer(1)
        x.init_weights()
        self.assertIsNone(x.weights)

        x = Layer(1)
        y = Layer(1)
        x.next_layer(y)
        x.init_weights()
        self.assertEqual(x.weights, [[0]])

        x = Layer(1)
        y = Layer(2)
        x.next_layer(y)
        x.init_weights()
        self.assertEqual(x.weights, [[0, 0]])

        x = Layer(2)
        y = Layer(1)
        x.next_layer(y)
        x.init_weights()
        self.assertEqual(x.weights, [[0], [0]])

        x = Layer(2)
        y = Layer(2)
        x.next_layer(y)
        x.init_weights()
        self.assertEqual(x.weights, [[0, 0], [0, 0]])
    expected_value = np.zeros(len(files))
    expected_value[random_index] = 1
    
    return input_data, expected_value


def update_ema(current, average):
    """Update the exponential moving average."""
    return ERROR_RATE * abs(current) + (1 - ERROR_RATE) * average


if __name__ == '__main__':
    input_files = sorted(os.listdir(INPUT_DIR))
    input_data, expected_value = get_input_data(input_files)

    hidden = Layer(NUM_HIDDEN_NODES, input_data.shape[0], expit)
    output = Layer(len(input_files), NUM_HIDDEN_NODES, softmax)

    average_errors = np.ones(len(input_files))
    accepted_errors = np.full(len(input_files), ACCEPTED_ERROR)

    while not np.all(np.less(average_errors, accepted_errors)):
        # get a random date
        input_data, expected_value = get_input_data(input_files)

        # process inputs
        outputs = output.process(hidden.process(input_data))

        # calculate errors
        output.errors = expected_value - outputs
        hidden.errors = expit_prime(hidden.h) * np.dot(output.errors, output.weights)
Exemple #11
0
# Extract configuration
if path.isdir(options.configure) is True:
    raise Exception(options.configure + ': Is a directory.')
if path.exists(options.configure) is False:
    raise Exception(options.configure + ': No such file or directory.')
with open(options.configure, 'r') as yfile:
    cfg = yaml.load(yfile, Loader=yaml.BaseLoader)

# Load data set
X_train, y_train, X_test, y_test = preprocessing(cfg, csv2data(dataset_path))

# Build the network
nn = NeuralNetwork(error=options.error)
w_seed = int(cfg['weights_seed'])
b_seed = int(cfg['bias_seed'])
nn.add_layer(Layer(n_input=X_train.shape[1]), weights_seed=w_seed, bias_seed=b_seed)
nn.add_layer(Layer(n_input=14, activation='tanh'), weights_seed=w_seed, bias_seed=b_seed)
nn.add_layer(Layer(n_input=14, activation='tanh'), weights_seed=w_seed, bias_seed=b_seed)
nn.add_layer(Layer(n_input=14, activation='tanh'), weights_seed=w_seed, bias_seed=b_seed)
nn.add_layer(Layer(n_input=14, activation='tanh'), weights_seed=w_seed, bias_seed=b_seed)
nn.add_layer(Layer(n_input=14, activation='tanh'), weights_seed=w_seed, bias_seed=b_seed)
nn.add_layer(Layer(n_input=y_train.shape[1], activation='softmax'), weights_seed=w_seed, bias_seed=b_seed)

# Train
mses, cees = nn.train(X_train, y_train, X_test, y_test, learning_rate=float(cfg['learning_rate']), max_epochs=int(cfg['epoch']), mini_batch_size=float(cfg['mini_batch_size']))

if (options.plot is True):
    nn.plot(mses, cees, learning_rate=float(cfg['learning_rate']), mini_batch_size=int(cfg['mini_batch_size']))

nn.save()
Exemple #12
0
def main():
    imres = MLP()
    num_points = 784
    imres.add_layer(Layer(num_points))
    imres.add_layer(Layer(20))
    imres.add_layer(Layer(10))

    imres.add_bias()
    imres.init_network()

    imres.step = 0.001
    imres.moment = imres.step / 10
    imres.verbose = True
    target_error = 0.01

    imres.patterns = []
    imres._patterns = []
    imres.test_patterns = []
    imres._test_patterns = []

    def norm(inp):
        def fn(x):
            return x / 255

        return map(fn, inp)

    mn = MNIST('./mnist/data/')
    samples, labels = mn.load_testing()
    for i in range(100):
        outvect = [0] * 10
        outvect[labels[i]] = 1
        imres.patterns.append((samples[i], outvect))
        imres._patterns.append((samples[i], labels[i], outvect))

    for i in range(100, 200):
        outvect = [0] * 10
        outvect[labels[i]] = 1
        imres.test_patterns.append((samples[i], outvect))
        imres._test_patterns.append((samples[i], labels[i], outvect))

    print 'Training samples: %d' % len(imres.patterns)
    print 'Testing samples: %d' % len(imres.test_patterns)
    print 'Target error: %.4f' % target_error

    final_err, steps = imres.train_target(imres.patterns,
                                          target_error=target_error)

    print 'Training done in %d steps with final error of %.6f' % (steps,
                                                                  final_err)

    print '----- Detailed test output -----'
    total_tests = len(imres._test_patterns)
    total_fails = 0
    for inp, num, target in imres._test_patterns:
        computed = imres.run(inp)
        error = abs(computed[0] - target[0])
        computed = map(lambda x: round(x, 4), computed)
        maxn = computed[0]
        pos = 0
        for i in range(len(computed)):
            if computed[i] > maxn:
                maxn = computed[i]
                pos = i

        if num != pos:
            total_fails += 1
        print 'in: %d, out: %d' % (num, pos)
        print 'target: %s \noutput: %s' % (target, computed)

    print '-----'
    print 'Testing done - %d of %d samples classified incorrectly' % (
        total_fails, total_tests)
Exemple #13
0
    def init_model_params(self, dim_x, dim_y):
        print 'M2 model params initialize'

        dim_z = self.hyper_params['dim_z']
        n_hidden = self.hyper_params['n_hidden']  # [500, 500, 500]
        n_hidden_recognize = n_hidden
        n_hidden_generate = n_hidden[::-1]

        self.type_px = self.hyper_params['type_px']

        activation = {
            'tanh': T.tanh,
            'relu': self.relu,
            'softplus': self.softplus,
            'sigmoid': T.nnet.sigmoid,
            'none': self.identify,
        }

        self.nonlinear_q = activation[self.hyper_params['nonlinear_q']]
        self.nonlinear_p = activation[self.hyper_params['nonlinear_p']]
        if self.type_px == 'bernoulli':
            output_f = activation['sigmoid']
        elif self.type_px == 'gaussian':
            output_f = activation['none']

        # Recognize model
        self.recognize_layers = [
            Layer(param_shape=(dim_x, n_hidden_recognize[0]),
                  function=self.identify,
                  nonbias=True),
            Layer(param_shape=(dim_y, n_hidden_recognize[0]),
                  function=self.identify)
        ]
        if len(n_hidden_recognize) > 1:
            self.recognize_layers += [
                Layer(param_shape=shape,
                      function=self.nonlinear_q) for shape in zip(
                          n_hidden_recognize[:-1], n_hidden_recognize[1:])
            ]
        self.recognize_mean_layer = Layer(param_shape=(n_hidden_recognize[-1],
                                                       dim_z),
                                          function=self.identify)
        self.recognize_log_var_layer = Layer(
            param_shape=(n_hidden_recognize[-1], dim_z),
            function=self.identify,
            w_zero=True,
            b_zero=True)

        # Generate Model
        self.generate_layers = [
            Layer((dim_z, n_hidden_generate[0]),
                  function=self.identify,
                  nonbias=True),
            Layer((dim_y, n_hidden_generate[0]), function=self.identify),
        ]
        if len(n_hidden) > 1:
            self.generate_layers += [
                Layer(param_shape=shape, function=self.nonlinear_p)
                for shape in zip(n_hidden_generate[:-1], n_hidden_generate[1:])
            ]
        self.generate_mean_layer = Layer(param_shape=(n_hidden_generate[-1],
                                                      dim_x),
                                         function=output_f)
        self.generate_log_var_layer = Layer(param_shape=(n_hidden_generate[-1],
                                                         dim_x),
                                            function=self.identify,
                                            b_zero=True)

        # Add all parameters
        self.model_params_ = ([
            param for layer in self.recognize_layers for param in layer.params
        ] + self.recognize_mean_layer.params +
                              self.recognize_log_var_layer.params + [
                                  param for layer in self.generate_layers
                                  for param in layer.params
                              ] + self.generate_mean_layer.params)

        if self.type_px == 'gaussian':
            self.model_params_ += self.generate_log_var_layer.params
Exemple #14
0
class M2_VAE(Base_VAE):
    def __init__(self,
                 hyper_params=None,
                 optimize_params=None,
                 model_params=None):
        super(M2_VAE, self).__init__(hyper_params,
                                     optimize_params,
                                     model_params,
                                     model_name='M2')

    def init_model_params(self, dim_x, dim_y):
        print 'M2 model params initialize'

        dim_z = self.hyper_params['dim_z']
        n_hidden = self.hyper_params['n_hidden']  # [500, 500, 500]
        n_hidden_recognize = n_hidden
        n_hidden_generate = n_hidden[::-1]

        self.type_px = self.hyper_params['type_px']

        activation = {
            'tanh': T.tanh,
            'relu': self.relu,
            'softplus': self.softplus,
            'sigmoid': T.nnet.sigmoid,
            'none': self.identify,
        }

        self.nonlinear_q = activation[self.hyper_params['nonlinear_q']]
        self.nonlinear_p = activation[self.hyper_params['nonlinear_p']]
        if self.type_px == 'bernoulli':
            output_f = activation['sigmoid']
        elif self.type_px == 'gaussian':
            output_f = activation['none']

        # Recognize model
        self.recognize_layers = [
            Layer(param_shape=(dim_x, n_hidden_recognize[0]),
                  function=self.identify,
                  nonbias=True),
            Layer(param_shape=(dim_y, n_hidden_recognize[0]),
                  function=self.identify)
        ]
        if len(n_hidden_recognize) > 1:
            self.recognize_layers += [
                Layer(param_shape=shape,
                      function=self.nonlinear_q) for shape in zip(
                          n_hidden_recognize[:-1], n_hidden_recognize[1:])
            ]
        self.recognize_mean_layer = Layer(param_shape=(n_hidden_recognize[-1],
                                                       dim_z),
                                          function=self.identify)
        self.recognize_log_var_layer = Layer(
            param_shape=(n_hidden_recognize[-1], dim_z),
            function=self.identify,
            w_zero=True,
            b_zero=True)

        # Generate Model
        self.generate_layers = [
            Layer((dim_z, n_hidden_generate[0]),
                  function=self.identify,
                  nonbias=True),
            Layer((dim_y, n_hidden_generate[0]), function=self.identify),
        ]
        if len(n_hidden) > 1:
            self.generate_layers += [
                Layer(param_shape=shape, function=self.nonlinear_p)
                for shape in zip(n_hidden_generate[:-1], n_hidden_generate[1:])
            ]
        self.generate_mean_layer = Layer(param_shape=(n_hidden_generate[-1],
                                                      dim_x),
                                         function=output_f)
        self.generate_log_var_layer = Layer(param_shape=(n_hidden_generate[-1],
                                                         dim_x),
                                            function=self.identify,
                                            b_zero=True)

        # Add all parameters
        self.model_params_ = ([
            param for layer in self.recognize_layers for param in layer.params
        ] + self.recognize_mean_layer.params +
                              self.recognize_log_var_layer.params + [
                                  param for layer in self.generate_layers
                                  for param in layer.params
                              ] + self.generate_mean_layer.params)

        if self.type_px == 'gaussian':
            self.model_params_ += self.generate_log_var_layer.params

    def recognize_model(self, X, Y):
        for i, layer in enumerate(self.recognize_layers):
            if i == 0:
                layer_out = layer.fprop(X)
            elif i == 1:
                layer_out += layer.fprop(Y)
                layer_out = self.nonlinear_q(layer_out)
            else:
                layer_out = layer.fprop(layer_out)

        q_mean = self.recognize_mean_layer.fprop(layer_out)
        q_log_var = self.recognize_log_var_layer.fprop(layer_out)

        return {
            'q_mean': q_mean,
            'q_log_var': q_log_var,
        }

    def generate_model(self, Z, Y):
        for i, layer in enumerate(self.generate_layers):
            if i == 0:
                layer_out = layer.fprop(Z)
            elif i == 1:
                layer_out += layer.fprop(Y)
                layer_out = self.nonlinear_p(layer_out)
            else:
                layer_out = layer.fprop(layer_out)

        p_mean = self.generate_mean_layer.fprop(layer_out)
        p_log_var = self.generate_log_var_layer.fprop(layer_out)

        return {'p_mean': p_mean, 'p_log_var': p_log_var}

    def encode(self, x, y):
        if self.encode_main is None:
            X = T.matrix()
            Y = T.matrix()
            self.encode_main = theano.function(inputs=[X, Y],
                                               outputs=self.recognize_model(
                                                   X, Y)['q_mean'])
        return self.encode_main(x, y)

    def decode(self, z, y):
        if self.decode_main is None:
            Z = T.matrix()
            Y = T.matrix()
            self.decode_main = theano.function(inputs=[Z, Y],
                                               outputs=self.generate_model(
                                                   Z, Y)['p_mean'])
        return self.decode_main(z, y)

    def get_expr_lbound(self, X, Y):
        n_samples = X.shape[0]

        recognized_zs = self.recognize_model(X, Y)
        q_mean = recognized_zs['q_mean']
        q_log_var = recognized_zs['q_log_var']

        eps = self.rng_noise.normal(avg=0., std=1., size=q_mean.shape).astype(
            theano.config.floatX)
        # T.exp(0.5 * q_log_var) = std
        # z = mean_z + std * epsilon
        z_tilda = q_mean + T.exp(0.5 * q_log_var) * eps

        generated_x = self.generate_model(z_tilda, Y)
        p_mean = generated_x['p_mean']
        p_log_var = generated_x['p_log_var']

        if self.type_px == 'gaussian':
            log_p_x_given_z = (-0.5 * np.log(2 * np.pi) - 0.5 * p_log_var -
                               0.5 * (X - p_mean)**2 / (2 * T.exp(p_log_var)))
        elif self.type_px == 'bernoulli':
            # log_p_x_given_z = X * T.log(p_mean) + (1 - X) * T.log(1 - p_mean)
            log_p_x_given_z = -T.nnet.binary_crossentropy(p_mean, X)

        logqz = -0.5 * (np.log(2 * np.pi) + 1 + q_log_var)
        logpz = -0.5 * (np.log(2 * np.pi) + q_mean**2 + T.exp(q_log_var))
        # logqz = - 0.5 * T.sum(np.log(2 * np.pi) + 1 + q_log_var, axis=1)
        # logpz = - 0.5 * T.sum(np.log(2 * np.pi) + q_mean ** 2 + T.exp(q_log_var), axis=1)
        D_KL = T.sum(logpz - logqz)
        recon_error = T.sum(log_p_x_given_z)

        return D_KL, recon_error
        # return log_p_x_given_z, logpz, logqz

    def fit(self, x_datas, y_labels):
        X = T.matrix()
        Y = T.matrix()
        self.rng_noise = RandomStreams(self.hyper_params['rng_seed'])
        self.init_model_params(dim_x=x_datas.shape[1], dim_y=y_labels.shape[1])

        D_KL, recon_error = self.get_expr_lbound(X, Y)
        L = D_KL + recon_error

        print 'start fitting'
        gparams = T.grad(cost=L, wrt=self.model_params_)

        optimizer = {
            'sgd': self.sgd,
            'adagrad': self.adagrad,
            'adadelta': self.adaDelta,
            'rmsprop': self.rmsProp,
            'adam': self.adam
        }

        updates = optimizer[self.hyper_params['optimizer']](
            self.model_params_, gparams, self.optimize_params)
        self.hist = self.early_stopping(
            # self.hist = self.optimize(
            X,
            Y,
            x_datas,
            y_labels,
            self.optimize_params,
            L,
            updates,
            self.rng,
            D_KL,
            recon_error,
        )

    def optimize(self, X, Y, x_datas, y_labels, hyper_params, cost, updates,
                 rng, D_KL, recon_error):
        n_iters = hyper_params['n_iters']
        minibatch_size = hyper_params['minibatch_size']
        n_mod_history = hyper_params['n_mod_history']

        train_x = x_datas[:50000]
        valid_x = x_datas[50000:]

        train_y = y_labels[:50000]
        valid_y = y_labels[50000:]

        train = theano.function(inputs=[X, Y],
                                outputs=[cost, D_KL, recon_error],
                                updates=updates)

        validate = theano.function(inputs=[X, Y],
                                   outputs=[cost, D_KL, recon_error])

        n_samples = train_x.shape[0]
        cost_history = []

        total_cost = 0
        total_dkl = 0
        total_recon_error = 0
        for i in xrange(n_iters):
            ixs = rng.permutation(n_samples)
            for j in xrange(0, n_samples, minibatch_size):
                cost, D_KL, recon_error = train(
                    train_x[ixs[j:j + minibatch_size]],
                    train_y[ixs[j:j + minibatch_size]])
                # print np.sum(hoge(train_x[:1])[0])
                total_cost += cost
                total_dkl += D_KL
                total_recon_error += recon_error

            if np.mod(i, n_mod_history) == 0:
                num = n_samples / minibatch_size
                print(
                    '%d epoch train D_KL error: %.3f, Reconstruction error: %.3f, total error: %.3f'
                    % (i, total_dkl / num, total_recon_error / num,
                       total_cost / num))
                total_cost = 0
                total_dkl = 0
                total_recon_error = 0
                valid_error, valid_dkl, valid_recon_error = validate(
                    valid_x, valid_y)
                print '\tvalid D_KL error: %.3f, Reconstruction error: %.3f, total error: %.3f' % (
                    valid_dkl, valid_recon_error, valid_error)
                cost_history.append((i, valid_error))
        return cost_history

    def early_stopping(self, X, Y, x_datas, y_labels, hyper_params, cost,
                       updates, rng, D_KL, recon_error):
        minibatch_size = hyper_params['minibatch_size']

        train_x = x_datas[:50000]
        valid_x = x_datas[50000:]

        train_y = y_labels[:50000]
        valid_y = y_labels[50000:]

        train = theano.function(inputs=[X, Y],
                                outputs=[cost, D_KL, recon_error],
                                updates=updates)

        validate = theano.function(
            inputs=[X, Y],
            outputs=cost,
        )

        n_samples = train_x.shape[0]
        cost_history = []
        best_params = None
        valid_best_error = -np.inf
        best_epoch = 0
        patience = 5000
        patience_increase = 2
        improvement_threshold = 1.005

        done_looping = False

        for i in xrange(1000000):
            if done_looping: break
            ixs = rng.permutation(n_samples)
            for j in xrange(0, n_samples, minibatch_size):
                cost, D_KL, recon_error = train(
                    train_x[ixs[j:j + minibatch_size]],
                    train_y[ixs[j:j + minibatch_size]])

                iter = i * (n_samples / minibatch_size) + j / minibatch_size

                if (iter + 1) % 50 == 0:
                    valid_error = 0.
                    for _ in xrange(3):
                        valid_error += validate(valid_x, valid_y)
                    valid_error /= 3
                    if i % 100 == 0:
                        print 'epoch %d, minibatch %d/%d, valid total error: %.3f' % (
                            i, j / minibatch_size + 1,
                            n_samples / minibatch_size, valid_error)
                    cost_history.append((i * j, valid_error))
                    if valid_error > valid_best_error:
                        if valid_error > valid_best_error * improvement_threshold:
                            patience = max(patience, iter * patience_increase)
                        best_params = self.model_params_
                        valid_best_error = valid_error
                        best_epoch = i

                if patience <= iter:
                    done_looping = True
                    break
        self.model_params_ = best_params
        print 'epoch %d, minibatch %d/%d, valid total error: %.3f' % (
            best_epoch, j / minibatch_size + 1, n_samples / minibatch_size,
            valid_best_error)
        return cost_history
Exemple #15
0
class M1_GVAE(object):
    def __init__(self,
                 hyper_params=None,
                 sgd_params=None,
                 adagrad_params=None,
                 model_params=None):

        if (sgd_params is not None) and (adagrad_params is not None):
            raise ValueError('Error: select only one algorithm')

        self.hyper_params = hyper_params
        self.sgd_params = sgd_params
        self.adagrad_params = adagrad_params
        self.model_params = model_params

        self.rng = np.random.RandomState(hyper_params['rng_seed'])

        self.model_params_ = None
        self.decode_main = None
        self.encode_main = None

    def init_model_params(self, dim_x):
        print 'M1 model params initialize'
        dim_z = self.hyper_params['dim_z']
        n_hidden = self.hyper_params['n_hidden']  # [500, 500, 500]
        self.type_px = self.hyper_params['type_px']

        def relu(x):
            return x * (x > 0) + 0.01 * x

        def softplus(x):
            return T.log(T.exp(x) + 1)

        activation = {
            'tanh': T.tanh,
            'relu': relu,
            'softplus': softplus,
            'sigmoid': T.nnet.sigmoid,
            'none': None
        }
        nonlinear_q = activation[self.hyper_params['nonlinear_q']]
        nonlinear_p = activation[self.hyper_params['nonlinear_p']]
        if self.type_px == 'bernoulli':
            output_f = activation['sigmoid']
        elif self.type_px == 'gaussian':
            output_f = activation['none']

        # Recognize model
        self.recognize_layers = [
            Layer((dim_x, n_hidden[0]), function=nonlinear_q)
        ]
        if len(n_hidden) > 1:
            self.recognize_layers += [
                Layer(shape, function=nonlinear_q)
                for shape in zip(n_hidden[:-1], n_hidden[1:])
            ]
        self.recognize_mean_layer = Layer((n_hidden[-1], dim_z), function=None)
        self.recognize_log_sigma_layer = Layer((n_hidden[-1], dim_z),
                                               function=None,
                                               w_zero=True,
                                               b_zero=True)

        # Generate Model
        self.generate_layers = [
            Layer((dim_z, n_hidden[0]), function=nonlinear_p)
        ]
        if len(n_hidden) > 1:
            self.generate_layers += [
                Layer(shape, function=nonlinear_p)
                for shape in zip(n_hidden[:-1], n_hidden[1:])
            ]
        self.generate_mean_layer = Layer((n_hidden[-1], dim_x),
                                         function=output_f)
        self.generate_log_sigma_layer = Layer((n_hidden[-1], dim_x),
                                              function=None,
                                              b_zero=True)

        self.model_params_ = ([
            param for layer in self.generate_layers for param in layer.params
        ] + self.recognize_mean_layer.params +
                              self.recognize_log_sigma_layer.params + [
                                  param for layer in self.recognize_layers
                                  for param in layer.params
                              ] + self.generate_mean_layer.params)

        if self.type_px == 'gaussian':
            self.model_params_ += self.generate_log_sigma_layer.params

    def generate_model(self, Z):

        for i, layer in enumerate(self.generate_layers):
            if i == 0:
                layer_out = layer.fprop(Z)
            else:
                layer_out = layer.fprop(layer_out)

        p_mean = self.generate_mean_layer.fprop(layer_out)
        p_log_var = self.generate_log_sigma_layer.fprop(layer_out)

        return {
            # 'mu': 0.5 * (T.tanh(p_mean) + 1), # 0 <= mu <= 1
            # 'log_sigma': 3 * T.tanh(p_log_var) - 1, # -4 <= log sigma **2 <= 2
            # 'mu': T.clip(p_mean, 0., 1.),
            # 'log_sigma': T.clip(p_log_var, -4., 2.)
            'mu': p_mean,
            'log_sigma': p_log_var
        }

    def recognize_model(self, X):

        for i, layer in enumerate(self.recognize_layers):
            if i == 0:
                layer_out = layer.fprop(X)
            else:
                layer_out = layer.fprop(layer_out)

        q_mean = self.recognize_mean_layer.fprop(layer_out)
        q_log_var = self.recognize_log_sigma_layer.fprop(layer_out)

        return {
            'mu': q_mean,
            # 'log_sigma': 3 * T.tanh(q_log_var) - 1,
            # 'log_sigma': T.clip(q_log_var, -4., 2.)
            'log_sigma': q_log_var
        }

    def decode(self, z):
        if self.decode_main is None:
            Z = T.matrix()
            self.decode_main = theano.function(
                inputs=[Z], outputs=self.generate_model(Z)['mu'])
        return self.decode_main(z)

    def encode(self, x):
        if self.encode_main is None:
            X = T.matrix()
            self.encode_main = theano.function(
                inputs=[X], outputs=self.recognize_model(X)['mu'])
        return self.encode_main(x)

    def get_expr_lbound(self, X):
        n_mc_sampling = self.hyper_params['n_mc_sampling']
        n_samples = X.shape[0]
        dim_z = self.hyper_params['dim_z']

        stats_z = self.recognize_model(X)
        q_mean = stats_z['mu']
        q_log_var = stats_z['log_sigma']

        eps = self.rng_noise.normal(size=(n_mc_sampling, n_samples, dim_z))
        z_tilda = q_mean + T.exp(0.5 * q_log_var) * eps

        stats_x = self.generate_model(z_tilda)
        p_mean = stats_x['mu']
        p_log_var = stats_x['log_sigma']

        if self.type_px == 'gaussian':
            log_p_x_given_z = (-0.5 * np.log(2 * np.pi) - 0.5 * p_log_var -
                               0.5 * (X - p_mean)**2 / (2 * T.exp(p_log_var)))
        elif self.type_px == 'bernoulli':
            log_p_x_given_z = X * T.log(p_mean) + (1 - X) * T.log(1 - p_mean)

        logqz = -0.5 * T.sum(np.log(2 * np.pi) + 1 + q_log_var)
        logpz = -0.5 * T.sum(np.log(2 * np.pi) + q_mean**2 + T.exp(q_log_var))
        consts = []

        return (T.sum(log_p_x_given_z) / n_mc_sampling +
                (logpz - logqz)) / n_samples, consts

    def fit(self, x_datas):
        X = T.matrix()
        self.rng_noise = RandomStreams(self.hyper_params['rng_seed'])
        self.init_model_params(dim_x=x_datas.shape[1])

        lbound, consts = self.get_expr_lbound(X)
        cost = -lbound

        print 'start fitting'
        self.hist = self.adam_calc(x_datas, cost, consts, X,
                                   self.model_params_, self.adagrad_params,
                                   self.rng)

    def adagrad_calc(self, x_datas, cost, consts, X, model_params,
                     hyper_params, rng):
        n_iters = hyper_params['n_iters']
        learning_rate = hyper_params['learning_rate']
        minibatch_size = hyper_params['minibatch_size']
        n_mod_history = hyper_params['n_mod_history']
        calc_history = hyper_params['calc_history']

        hs = [
            theano.shared(
                np.ones(param.get_value(borrow=True).shape).astype(
                    theano.config.floatX)) for param in model_params
        ]

        gparams = T.grad(cost=cost, wrt=model_params, consider_constant=consts)
        updates = [(param, param - learning_rate / (T.sqrt(h)) * gparam)
                   for param, gparam, h in zip(model_params, gparams, hs)]
        updates += [(h, h + gparam**2) for gparam, h in zip(gparams, hs)]

        train = theano.function(inputs=[X], outputs=cost, updates=updates)

        validate = theano.function(inputs=[X], outputs=cost)

        n_samples = x_datas.shape[0]
        cost_history = []

        for i in xrange(n_iters):
            ixs = rng.permutation(n_samples)[:minibatch_size]
            minibatch_cost = train(x_datas[ixs])
            # print minibatch_cost

            if np.mod(i, n_mod_history) == 0:
                print '%d epoch error: %f' % (i, minibatch_cost)
                if calc_history == 'minibatch':
                    cost_history.append((i, minibatch_cost))
                else:
                    cost_history.append((i, validate(x_datas[ixs])))
        return cost_history

    def adam_calc(self, x_datas, cost, consts, X, model_params, hyper_params,
                  rng):
        n_iters = hyper_params['n_iters']
        learning_rate = hyper_params['learning_rate']
        minibatch_size = hyper_params['minibatch_size']
        n_mod_history = hyper_params['n_mod_history']
        calc_history = hyper_params['calc_history']

        rs = [
            theano.shared(
                np.ones(param.get_value(borrow=True).shape).astype(
                    theano.config.floatX)) for param in model_params
        ]
        vs = [
            theano.shared(
                np.ones(param.get_value(borrow=True).shape).astype(
                    theano.config.floatX)) for param in model_params
        ]
        ts = [
            theano.shared(
                np.ones(param.get_value(borrow=True).shape).astype(
                    theano.config.floatX)) for param in model_params
        ]

        gnma = 0.999
        beta = 0.9
        weight_decay = 1000 / 50000.

        gparams = T.grad(cost=cost, wrt=model_params, consider_constant=consts)

        updates = [(param, param - learning_rate /
                    (T.sqrt(r / (1 - gnma**t))) * v / (1 - beta**t))
                   for param, r, v, t in zip(model_params, rs, vs, ts)]
        updates += [
            (r, gnma * r + (1 - gnma) * (gparam - weight_decay * param)**2)
            for param, gparam, r in zip(model_params, gparams, rs)
        ]
        updates += [(v,
                     beta * v + (1 - beta) * (gparam - weight_decay * param))
                    for param, gparam, v in zip(model_params, gparams, vs)]
        updates += [(t, t + 1) for t in ts]

        train = theano.function(inputs=[X], outputs=cost, updates=updates)

        validate = theano.function(inputs=[X], outputs=cost)

        n_samples = x_datas.shape[0]
        cost_history = []

        for i in xrange(n_iters):
            ixs = rng.permutation(n_samples)[:minibatch_size]
            minibatch_cost = train(x_datas[ixs])
            # print minibatch_cost

            if np.mod(i, n_mod_history) == 0:
                print '%d epoch error: %f' % (i, minibatch_cost)
                if calc_history == 'minibatch':
                    cost_history.append((i, minibatch_cost))
                else:
                    cost_history.append((i, validate(x_datas[ixs])))
        return cost_history
Exemple #16
0
    def test_init_weights(self):
        x = Layer(1)
        x.init_weights()
        self.assertIsNone(x.weights)

        cnf = lambda: 0

        x = Layer(1, cnf)
        y = Layer(1, cnf)
        x.next_layer(y)
        x.init_weights()
        self.assertEqual(x.weights, [[0]])

        x = Layer(1, cnf)
        y = Layer(2, cnf)
        x.next_layer(y)
        x.init_weights()
        self.assertEqual(x.weights, [[0, 0]])

        x = Layer(2, cnf)
        y = Layer(1, cnf)
        x.next_layer(y)
        x.init_weights()
        self.assertEqual(x.weights, [[0], [0]])

        x = Layer(2, cnf)
        y = Layer(2, cnf)
        x.next_layer(y)
        x.init_weights()
        self.assertEqual(x.weights, [[0, 0], [0, 0]])
Exemple #17
0
    def init_model_params(self, dim_x):
        print 'M1 model params initialize'
        dim_z = self.hyper_params['dim_z']
        n_hidden = self.hyper_params['n_hidden']  # [500, 500, 500]
        self.type_px = self.hyper_params['type_px']

        def relu(x):
            return x * (x > 0) + 0.01 * x

        def softplus(x):
            return T.log(T.exp(x) + 1)

        activation = {
            'tanh': T.tanh,
            'relu': relu,
            'softplus': softplus,
            'sigmoid': T.nnet.sigmoid,
            'none': None
        }
        nonlinear_q = activation[self.hyper_params['nonlinear_q']]
        nonlinear_p = activation[self.hyper_params['nonlinear_p']]
        if self.type_px == 'bernoulli':
            output_f = activation['sigmoid']
        elif self.type_px == 'gaussian':
            output_f = activation['none']

        # Recognize model
        self.recognize_layers = [
            Layer((dim_x, n_hidden[0]), function=nonlinear_q)
        ]
        if len(n_hidden) > 1:
            self.recognize_layers += [
                Layer(shape, function=nonlinear_q)
                for shape in zip(n_hidden[:-1], n_hidden[1:])
            ]
        self.recognize_mean_layer = Layer((n_hidden[-1], dim_z), function=None)
        self.recognize_log_sigma_layer = Layer((n_hidden[-1], dim_z),
                                               function=None,
                                               w_zero=True,
                                               b_zero=True)

        # Generate Model
        self.generate_layers = [
            Layer((dim_z, n_hidden[0]), function=nonlinear_p)
        ]
        if len(n_hidden) > 1:
            self.generate_layers += [
                Layer(shape, function=nonlinear_p)
                for shape in zip(n_hidden[:-1], n_hidden[1:])
            ]
        self.generate_mean_layer = Layer((n_hidden[-1], dim_x),
                                         function=output_f)
        self.generate_log_sigma_layer = Layer((n_hidden[-1], dim_x),
                                              function=None,
                                              b_zero=True)

        self.model_params_ = ([
            param for layer in self.generate_layers for param in layer.params
        ] + self.recognize_mean_layer.params +
                              self.recognize_log_sigma_layer.params + [
                                  param for layer in self.recognize_layers
                                  for param in layer.params
                              ] + self.generate_mean_layer.params)

        if self.type_px == 'gaussian':
            self.model_params_ += self.generate_log_sigma_layer.params
Exemple #18
0
nn_path = options.model
if path.isdir(nn_path) is True:
    raise Exception(nn_path + ': Is a directory.')
if path.exists(nn_path) is False:
    raise Exception(nn_path + ': No such file or directory.')
"""
The format of the save is as following
[layer:[activation,n_input,neurons,weights,biases]]
"""
nn_load = np.load(nn_path, allow_pickle=True)

nn = NeuralNetwork()

# Use all provided dataset
cfg['batch_size'] = 1

# Load data set
_, _, X_test, y_test = preprocessing(cfg, csv2data(dataset_path))

for x in nn_load:
    activation = x[0]
    weights = x[3]
    bias = x[4]
    nn.add_layer(Layer(activation=activation, weights=weights, bias=bias))

y_predict = nn.feed_forward(X_test)

print('MSE: %f' % (nn.mean_squarred_error(y_predict, y_test)))
print('CEE: %f' % (nn.cross_entropy_error(y_predict, y_test)))
print('ACCURACY: %f' % (nn.accuracy(y_predict, y_test)))
def create_tab(function_text, model, learning_rate=1e-3):
    return {
        'function': function_text,
        'model': model,
        'gradient': Gradient(),
        'error': SquareError(),
        'teacher': GradientTeacher(),
        'learning_rate': learning_rate
    }


tabs = [
    create_tab(function_text="2 * x",
               model=MultipleLayersModel([
                   Layer(input_dimension=1,
                         output_dimension=1,
                         activation_function=LinearFunction()),
                   Layer(input_dimension=1,
                         output_dimension=1,
                         activation_function=LinearFunction())
               ])),
    create_tab(function_text="50 * x",
               learning_rate=1e-4,
               model=MultipleLayersModel([
                   Layer(input_dimension=1,
                         output_dimension=1,
                         activation_function=LinearFunction()),
                   Layer(input_dimension=1,
                         output_dimension=1,
                         activation_function=LinearFunction())
               ])),
Exemple #20
0
class M1_GVAE(object):
    def __init__(
        self,
        hyper_params=None,
        sgd_params=None,
        adagrad_params=None,
        model_params=None
    ):

        if (sgd_params is not None) and (adagrad_params is not None):
            raise ValueError('Error: select only one algorithm')

        self.hyper_params = hyper_params
        self.sgd_params = sgd_params
        self.adagrad_params = adagrad_params
        self.model_params = model_params

        self.rng = np.random.RandomState(hyper_params['rng_seed'])

        self.model_params_ = None
        self.decode_main = None
        self.encode_main = None



    def init_model_params(self, dim_x):
        print 'M1 model params initialize'
        dim_z = self.hyper_params['dim_z']
        n_hidden = self.hyper_params['n_hidden'] # [500, 500, 500]
        self.type_px = self.hyper_params['type_px']
        def relu(x): return x*(x>0) + 0.01 * x
        def softplus(x): return T.log(T.exp(x) + 1)
        activation = {'tanh': T.tanh, 'relu': relu, 'softplus': softplus, 'sigmoid': T.nnet.sigmoid, 'none': None}
        nonlinear_q = activation[self.hyper_params['nonlinear_q']]
        nonlinear_p = activation[self.hyper_params['nonlinear_p']]
        if self.type_px == 'bernoulli':
            output_f = activation['sigmoid']
        elif self.type_px == 'gaussian':
            output_f= activation['none']

        # Recognize model
        self.recognize_layers = [Layer((dim_x, n_hidden[0]), function=nonlinear_q)]
        if len(n_hidden) > 1:
            self.recognize_layers += [Layer(shape, function=nonlinear_q) for shape in zip(n_hidden[:-1], n_hidden[1:])]
        self.recognize_mean_layer = Layer((n_hidden[-1], dim_z), function=None)
        self.recognize_log_sigma_layer = Layer((n_hidden[-1], dim_z), function=None, w_zero=True, b_zero=True)


        # Generate Model
        self.generate_layers = [Layer((dim_z, n_hidden[0]), function=nonlinear_p)]
        if len(n_hidden) > 1:
            self.generate_layers += [Layer(shape, function=nonlinear_p) for shape in zip(n_hidden[:-1], n_hidden[1:])]
        self.generate_mean_layer = Layer((n_hidden[-1], dim_x), function=output_f)
        self.generate_log_sigma_layer = Layer((n_hidden[-1], dim_x), function=None, b_zero=True)


        self.model_params_ = (
            [param for layer in self.generate_layers for param in layer.params] +
            self.recognize_mean_layer.params +
            self.recognize_log_sigma_layer.params +
            [param for layer in self.recognize_layers for param in layer.params] +
            self.generate_mean_layer.params
        )

        if self.type_px == 'gaussian':
            self.model_params_ += self.generate_log_sigma_layer.params

    def generate_model(self, Z):

        for i, layer in enumerate(self.generate_layers):
            if i == 0:
                layer_out = layer.fprop(Z)
            else:
                layer_out = layer.fprop(layer_out)

        p_mean = self.generate_mean_layer.fprop(layer_out)
        p_log_var = self.generate_log_sigma_layer.fprop(layer_out)

        return {
            # 'mu': 0.5 * (T.tanh(p_mean) + 1), # 0 <= mu <= 1
            # 'log_sigma': 3 * T.tanh(p_log_var) - 1, # -4 <= log sigma **2 <= 2
            # 'mu': T.clip(p_mean, 0., 1.),
            # 'log_sigma': T.clip(p_log_var, -4., 2.)
            'mu': p_mean,
            'log_sigma': p_log_var
        }

    def recognize_model(self, X):

        for i, layer in enumerate(self.recognize_layers):
            if i == 0:
                layer_out = layer.fprop(X)
            else:
                layer_out = layer.fprop(layer_out)

        q_mean = self.recognize_mean_layer.fprop(layer_out)
        q_log_var = self.recognize_log_sigma_layer.fprop(layer_out)

        return {
            'mu': q_mean,
            # 'log_sigma': 3 * T.tanh(q_log_var) - 1,
            # 'log_sigma': T.clip(q_log_var, -4., 2.)
            'log_sigma': q_log_var
        }

    def decode(self, z):
        if self.decode_main is None:
            Z = T.matrix()
            self.decode_main = theano.function(
                inputs=[Z],
                outputs=self.generate_model(Z)['mu']
            )
        return self.decode_main(z)

    def encode(self, x):
        if self.encode_main is None:
            X = T.matrix()
            self.encode_main = theano.function(
                inputs=[X],
                outputs=self.recognize_model(X)['mu']
            )
        return self.encode_main(x)

    def get_expr_lbound(self, X):
        n_mc_sampling = self.hyper_params['n_mc_sampling']
        n_samples = X.shape[0]
        dim_z = self.hyper_params['dim_z']

        stats_z = self.recognize_model(X)
        q_mean = stats_z['mu']
        q_log_var = stats_z['log_sigma']

        eps = self.rng_noise.normal(size=(n_mc_sampling, n_samples, dim_z))
        z_tilda = q_mean + T.exp(0.5 * q_log_var) * eps

        stats_x = self.generate_model(z_tilda)
        p_mean = stats_x['mu']
        p_log_var = stats_x['log_sigma']

        if self.type_px == 'gaussian':
            log_p_x_given_z = (
                -0.5 * np.log(2 * np.pi) - 0.5 * p_log_var - 0.5 * (X - p_mean) ** 2 / (2 * T.exp(p_log_var))
            )
        elif self.type_px == 'bernoulli':
            log_p_x_given_z = X * T.log(p_mean) + (1 - X) * T.log(1 - p_mean)

        logqz = - 0.5 * T.sum(np.log(2 * np.pi) + 1 + q_log_var)
        logpz = - 0.5 * T.sum(np.log(2 * np.pi) + q_mean ** 2 + T.exp(q_log_var))
        consts = []

        return (T.sum(log_p_x_given_z) / n_mc_sampling + (logpz - logqz)) / n_samples, consts


    def fit(self, x_datas):
        X = T.matrix()
        self.rng_noise = RandomStreams(self.hyper_params['rng_seed'])
        self.init_model_params(dim_x=x_datas.shape[1])

        lbound, consts = self.get_expr_lbound(X)
        cost = -lbound

        print 'start fitting'
        self.hist = self.adam_calc(
            x_datas,
            cost,
            consts,
            X,
            self.model_params_,
            self.adagrad_params,
            self.rng
        )

    def adagrad_calc(self, x_datas, cost, consts, X, model_params, hyper_params, rng):
        n_iters = hyper_params['n_iters']
        learning_rate = hyper_params['learning_rate']
        minibatch_size = hyper_params['minibatch_size']
        n_mod_history = hyper_params['n_mod_history']
        calc_history = hyper_params['calc_history']

        hs = [theano.shared(np.ones(
                    param.get_value(borrow=True).shape
                ).astype(theano.config.floatX))
            for param in model_params]

        gparams = T.grad(
            cost=cost,
            wrt=model_params,
            consider_constant=consts
        )
        updates = [(param, param - learning_rate / (T.sqrt(h)) * gparam)
                    for param, gparam, h in zip(model_params, gparams, hs)]
        updates += [(h, h + gparam ** 2) for gparam, h in zip(gparams, hs)]

        train = theano.function(
            inputs=[X],
            outputs=cost,
            updates=updates
        )

        validate = theano.function(
            inputs=[X],
            outputs=cost
        )

        n_samples = x_datas.shape[0]
        cost_history = []

        for i in xrange(n_iters):
            ixs = rng.permutation(n_samples)[:minibatch_size]
            minibatch_cost = train(x_datas[ixs])
            # print minibatch_cost

            if np.mod(i, n_mod_history) == 0:
                print '%d epoch error: %f' % (i, minibatch_cost)
                if calc_history == 'minibatch':
                    cost_history.append((i, minibatch_cost))
                else:
                    cost_history.append((i, validate(x_datas[ixs])))
        return cost_history


    def adam_calc(self, x_datas, cost, consts, X, model_params, hyper_params, rng):
        n_iters = hyper_params['n_iters']
        learning_rate = hyper_params['learning_rate']
        minibatch_size = hyper_params['minibatch_size']
        n_mod_history = hyper_params['n_mod_history']
        calc_history = hyper_params['calc_history']

        rs = [theano.shared(np.ones(
                    param.get_value(borrow=True).shape
                ).astype(theano.config.floatX))
            for param in model_params]
        vs = [theano.shared(np.ones(
                    param.get_value(borrow=True).shape
                ).astype(theano.config.floatX))
            for param in model_params]
        ts = [theano.shared(np.ones(
                    param.get_value(borrow=True).shape
                ).astype(theano.config.floatX))
            for param in model_params]

        gnma = 0.999
        beta = 0.9
        weight_decay = 1000 / 50000.

        gparams = T.grad(
            cost=cost,
            wrt=model_params,
            consider_constant=consts
        )


        updates = [(param, param - learning_rate / (T.sqrt(r / (1 - gnma ** t))) * v / (1 - beta ** t))
                    for param, r, v, t  in zip(model_params, rs, vs, ts)]
        updates += [(r, gnma * r + (1- gnma) * (gparam - weight_decay * param) ** 2) for param, gparam, r in zip(model_params, gparams, rs)]
        updates += [(v, beta * v + (1- beta) * (gparam - weight_decay * param)) for param, gparam, v in zip(model_params, gparams, vs)]
        updates += [(t, t + 1) for t in ts]


        train = theano.function(
            inputs=[X],
            outputs=cost,
            updates=updates
        )

        validate = theano.function(
            inputs=[X],
            outputs=cost
        )

        n_samples = x_datas.shape[0]
        cost_history = []

        for i in xrange(n_iters):
            ixs = rng.permutation(n_samples)[:minibatch_size]
            minibatch_cost = train(x_datas[ixs])
            # print minibatch_cost

            if np.mod(i, n_mod_history) == 0:
                print '%d epoch error: %f' % (i, minibatch_cost)
                if calc_history == 'minibatch':
                    cost_history.append((i, minibatch_cost))
                else:
                    cost_history.append((i, validate(x_datas[ixs])))
        return cost_history