コード例 #1
0
def adam(learning_rate, parameters, grads, inputs, cost):
    g_shared = [theano.shared(p.get_value() * floatX(0.), name='%s_grad' % k) for k, p in parameters.iteritems()]
    gs_up = [(gs, g) for gs, g in zip(g_shared, grads)]

    f_grad_shared = theano.function(inputs, cost, updates=gs_up)
    lr0 = 0.0002
    b1 = 0.1
    b2 = 0.001
    e = 1e-8
    updates = []
    i = theano.shared(floatX(0.))
    i_t = i + 1.
    fix1 = 1. - b1 ** i_t
    fix2 = 1. - b2 ** i_t
    lr_t = lr0 * (T.sqrt(fix2) / fix1)

    for p, g in zip(parameters.values(), g_shared):
        m = theano.shared(p.get_value() * floatX(0.))
        v = theano.shared(p.get_value() * floatX(0.))
        m_t = (b1 * g) + ((1. - b1) * m)
        v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
        g_t = m_t / (T.sqrt(v_t) + e)
        p_t = p - (lr_t * g_t)
        updates.append((m, m_t))
        updates.append((v, v_t))
        updates.append((p, p_t))
    updates.append((i, i_t))

    f_update = theano.function([learning_rate], [], updates=updates, on_unused_input='ignore')

    return f_grad_shared, f_update
コード例 #2
0
    def create_param(param, shape):
        """
        Helper method to create Theano shared variables for
        Layer parameters and to initialize them.

        param: one of three things:
            - a numpy array with the initial parameter values
            - a Theano shared variable
            - a function or callable that takes the desired
              shape of the parameter array as its single
              argument.

        shape: the desired shape of the parameter array.
        """
        if isinstance(param, np.ndarray):
            if param.shape != shape:
                raise RuntimeError("parameter array has shape %s, should be %s" % (param.shape, shape))
            return theano.shared(param)

        elif isinstance(param, theano.compile.SharedVariable):
            # cannot check shape here, the shared variable might not be initialized correctly yet.
            return param

        elif hasattr(param, '__call__'):
            arr = param(shape)
            if not isinstance(arr, np.ndarray):
                raise RuntimeError("cannot initialize parameters: the provided callable did not return a numpy array")

            return theano.shared(utils.floatX(arr))

        else:
            raise RuntimeError("cannot initialize parameters: 'param' is not a numpy array, a Theano shared variable, or a callable")
コード例 #3
0
    def prep_image(self, im):
        if len(im.shape) == 2:
            im = im[:, :, np.newaxis]
            im = np.repeat(im, 3, axis=2)
        h, w, _ = im.shape
        if h < w:
            im = skimage.transform.resize(im,
                                          (self.IMAGE_W, w * self.IMAGE_W / h),
                                          preserve_range=True)
        else:
            im = skimage.transform.resize(im,
                                          (h * self.IMAGE_W / w, self.IMAGE_W),
                                          preserve_range=True)

        # Central crop
        h, w, _ = im.shape
        im = im[h // 2 - self.IMAGE_W // 2:h // 2 + self.IMAGE_W // 2,
                w // 2 - self.IMAGE_W // 2:w // 2 + self.IMAGE_W // 2]

        rawim = np.copy(im).astype('uint8')

        # Shuffle axes to c01
        im = np.swapaxes(np.swapaxes(im, 1, 2), 0, 1)

        # Convert RGB to BGR
        im = im[::-1, :, :]

        im = im - self.MEAN_VALUES
        return rawim, floatX(im[np.newaxis])
コード例 #4
0
ファイル: updates.py プロジェクト: 19rick96/DMN
def adam(loss_or_grads, params, learning_rate=0.001, beta1=0.9,
         beta2=0.999, epsilon=1e-8):
    """Adam updates
    Adam updates implemented as in [1]_.
    Parameters
    ----------
    loss_or_grads : symbolic expression or list of expressions
        A scalar loss expression, or a list of gradient expressions
    params : list of shared variables
        The variables to generate update expressions for
    learning_rate : float
        Learning rate
    beta1 : float
        Exponential decay rate for the first moment estimates.
    beta2 : float
        Exponential decay rate for the second moment estimates.
    epsilon : float
        Constant for numerical stability.
    Returns
    -------
    OrderedDict
        A dictionary mapping each parameter to its update expression
    Notes
    -----
    The paper [1]_ includes an additional hyperparameter lambda. This is only
    needed to prove convergence of the algorithm and has no practical use
    (personal communication with the authors), it is therefore omitted here.
    References
    ----------
    .. [1] Kingma, Diederik, and Jimmy Ba (2014):
           Adam: A Method for Stochastic Optimization.
           arXiv preprint arXiv:1412.6980.
    """
    all_grads = get_or_compute_grads(loss_or_grads, params)
    t_prev = theano.shared(utils.floatX(0.))
    updates = OrderedDict()

    # Using theano constant to prevent upcasting of float32
    one = T.constant(1)

    t = t_prev + 1
    a_t = learning_rate*T.sqrt(one-beta2**t)/(one-beta1**t)

    for param, g_t in zip(params, all_grads):
        value = param.get_value(borrow=True)
        m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                               broadcastable=param.broadcastable)
        v_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                               broadcastable=param.broadcastable)

        m_t = beta1*m_prev + (one-beta1)*g_t
        v_t = beta2*v_prev + (one-beta2)*g_t**2
        step = a_t*m_t/(T.sqrt(v_t) + epsilon)

        updates[m_prev] = m_t
        updates[v_prev] = v_t
        updates[param] = param - step

    updates[t_prev] = t
    return updates
コード例 #5
0
ファイル: init.py プロジェクト: Jakobularius/nntools
    def sample(self, shape):
        if len(shape) != 2:
            raise RuntimeError("sparse initializer only works with shapes of length 2")

        w = floatX(np.zeros(shape))
        n_inputs, n_outputs = shape
        size = int(self.sparsity * n_inputs) # fraction of the number of inputs

        for k in xrange(n_outputs):
            indices = np.arange(n_inputs)
            np.random.shuffle(indices)
            indices = indices[:size]
            values = floatX(np.random.normal(0.0, self.std, size=size))
            w[indices, k] = values

        return w
コード例 #6
0
ファイル: updates.py プロジェクト: pyx123/QA
def adamax(loss_or_grads,
           params,
           learning_rate=0.002,
           beta1=0.9,
           beta2=0.999,
           epsilon=1e-8):
    """Adamax updates
    Adamax updates implemented as in [1]_. This is a variant of of the Adam
    algorithm based on the infinity norm.
    Parameters
    ----------
    loss_or_grads : symbolic expression or list of expressions
        A scalar loss expression, or a list of gradient expressions
    params : list of shared variables
        The variables to generate update expressions for
    learning_rate : float
        Learning rate
    beta1 : float
        Exponential decay rate for the first moment estimates.
    beta2 : float
        Exponential decay rate for the weighted infinity norm estimates.
    epsilon : float
        Constant for numerical stability.
    Returns
    -------
    OrderedDict
        A dictionary mapping each parameter to its update expression
    References
    ----------
    .. [1] Kingma, Diederik, and Jimmy Ba (2014):
           Adam: A Method for Stochastic Optimization.
           arXiv preprint arXiv:1412.6980.
    """
    all_grads = get_or_compute_grads(loss_or_grads, params)
    t_prev = theano.shared(utils.floatX(0.))
    updates = OrderedDict()

    # Using theano constant to prevent upcasting of float32
    one = T.constant(1)

    t = t_prev + 1
    a_t = learning_rate / (one - beta1**t)

    for param, g_t in zip(params, all_grads):
        value = param.get_value(borrow=True)
        m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                               broadcastable=param.broadcastable)
        u_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                               broadcastable=param.broadcastable)

        m_t = beta1 * m_prev + (one - beta1) * g_t
        u_t = T.maximum(beta2 * u_prev, abs(g_t))
        step = a_t * m_t / (u_t + epsilon)

        updates[m_prev] = m_t
        updates[u_prev] = u_t
        updates[param] = param - step

    updates[t_prev] = t
    return updates
コード例 #7
0
    def sample(self, shape):
        if len(shape) != 2:
            raise RuntimeError(
                "sparse initializer only works with shapes of length 2")

        w = floatX(np.zeros(shape))
        n_inputs, n_outputs = shape
        size = int(self.sparsity * n_inputs)  # fraction of number of inputs

        for k in range(n_outputs):
            indices = np.arange(n_inputs)
            get_rng().shuffle(indices)
            indices = indices[:size]
            values = floatX(get_rng().normal(0.0, self.std, size=size))
            w[indices, k] = values

        return w
コード例 #8
0
    def get_output_for(self, input, deterministic=False, *args, **kwargs):
        if deterministic or self.p == 0:
            return input
        else:
            retain_prob = 1 - self.p
            if self.rescale:
                input /= retain_prob

            return input * utils.floatX(_srng.binomial(input.shape, p=retain_prob, dtype='int32'))
コード例 #9
0
def gen_samples(n, nbatch=128):
    samples = []
    labels = []
    n_gen = 0
    for i in range(n / nbatch):
        ymb = floatX(OneHot(np_rng.randint(0, 10, nbatch), ny))
        zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz)))
        xmb = _gen(zmb, ymb)
        samples.append(xmb)
        labels.append(np.argmax(ymb, axis=1))
        n_gen += len(xmb)
    n_left = n - n_gen
    ymb = floatX(OneHot(np_rng.randint(0, 10, n_left), ny))
    zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz)))
    xmb = _gen(zmb, ymb)
    samples.append(xmb)
    labels.append(np.argmax(ymb, axis=1))
    return np.concatenate(samples, axis=0), np.concatenate(labels, axis=0)
コード例 #10
0
ファイル: updates.py プロジェクト: 19rick96/DMN
def adamax(loss_or_grads, params, learning_rate=0.002, beta1=0.9,
           beta2=0.999, epsilon=1e-8):
    """Adamax updates
    Adamax updates implemented as in [1]_. This is a variant of of the Adam
    algorithm based on the infinity norm.
    Parameters
    ----------
    loss_or_grads : symbolic expression or list of expressions
        A scalar loss expression, or a list of gradient expressions
    params : list of shared variables
        The variables to generate update expressions for
    learning_rate : float
        Learning rate
    beta1 : float
        Exponential decay rate for the first moment estimates.
    beta2 : float
        Exponential decay rate for the weighted infinity norm estimates.
    epsilon : float
        Constant for numerical stability.
    Returns
    -------
    OrderedDict
        A dictionary mapping each parameter to its update expression
    References
    ----------
    .. [1] Kingma, Diederik, and Jimmy Ba (2014):
           Adam: A Method for Stochastic Optimization.
           arXiv preprint arXiv:1412.6980.
    """
    all_grads = get_or_compute_grads(loss_or_grads, params)
    t_prev = theano.shared(utils.floatX(0.))
    updates = OrderedDict()

    # Using theano constant to prevent upcasting of float32
    one = T.constant(1)

    t = t_prev + 1
    a_t = learning_rate/(one-beta1**t)

    for param, g_t in zip(params, all_grads):
        value = param.get_value(borrow=True)
        m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                               broadcastable=param.broadcastable)
        u_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                               broadcastable=param.broadcastable)

        m_t = beta1*m_prev + (one-beta1)*g_t
        u_t = T.maximum(beta2*u_prev, abs(g_t))
        step = a_t*m_t/(u_t + epsilon)

        updates[m_prev] = m_t
        updates[u_prev] = u_t
        updates[param] = param - step

    updates[t_prev] = t
    return updates
コード例 #11
0
def sgd(learning_rate, parameters, grads, inputs, cost):
    g_shared = [theano.shared(p.get_value() * floatX(0.), name='%s_grad' % k) for k, p in parameters.iteritems()]
    gs_up = [(gs, g) for gs, g in zip(g_shared, grads)]

    f_grad_shared = theano.function(inputs, cost, updates=gs_up, profile=False)

    p_up = [(p, p - learning_rate * g) for p, g in zip(parameters.itervalues(), g_shared)]
    f_update = theano.function([learning_rate], [], updates=p_up, profile=False)

    return f_grad_shared, f_update
コード例 #12
0
    def iter(self):
        
        if self.shuffle:
            self.seqs, self.targets = shuffle(self.seqs, self.targets)

        for i in range(0, len(self.seqs), self.size):
            xmb, ymb = self.seqs[i:i+self.size], self.targets[i:i+self.size]
            xmb = padded(xmb)
            ymb = floatX(ymb)
            yield xmb, ymb
コード例 #13
0
def rmsprop(learning_rate, parameters, grads, inputs, cost):
    zipped_grads = [theano.shared(p.get_value() * floatX(0.), name='%s_grad' % k)
                    for k, p in parameters.iteritems()]
    running_grads = [theano.shared(p.get_value() * floatX(0.), name='%s_rgrad' % k)
                     for k, p in parameters.iteritems()]
    running_grads2 = [theano.shared(p.get_value() * floatX(0.), name='%s_rgrad2' % k)
                      for k, p in parameters.iteritems()]

    zg_up = [(zg, g) for zg, g in zip(zipped_grads, grads)]
    rg_up = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
    rg2_up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)]

    f_grad_shared = theano.function(inputs, cost, updates=zg_up + rg_up + rg2_up, profile=False)

    updir = [theano.shared(p.get_value() * floatX(0.), name='%s_updir' % k) for k, p in parameters.iteritems()]
    updir_new = [(ud, 0.9 * ud - 1e-4 * zg / T.sqrt(rg2 - rg ** 2 + 1e-4)) for ud, zg, rg, rg2 in
                 zip(updir, zipped_grads, running_grads, running_grads2)]
    param_up = [(p, p + udn[1]) for p, udn in zip(parameters.itervalues(), updir_new)]
    f_update = theano.function([learning_rate], [], updates=updir_new + param_up, on_unused_input='ignore', profile=False)

    return f_grad_shared, f_update
コード例 #14
0
    def sample(self, shape):
        if len(shape) < 2:
            raise RuntimeError("Only shapes of length 2 or more are "
                               "supported.")

        flat_shape = (shape[0], np.prod(shape[1:]))
        a = get_rng().normal(0.0, 1.0, flat_shape)
        u, _, v = np.linalg.svd(a, full_matrices=False)
        # pick the one with the correct shape
        q = u if u.shape == flat_shape else v
        q = q.reshape(shape)
        return floatX(self.gain * q)
コード例 #15
0
def adadelta(learning_rate, parameters, grads, inputs, cost):
    zipped_grads = [theano.shared(p.get_value() * floatX(0.), name='%s_grad' % k)
                    for k, p in parameters.iteritems()]
    running_up2 = [theano.shared(p.get_value() * floatX(0.), name='%s_rup2' % k)
                   for k, p in parameters.iteritems()]
    running_grads2 = [theano.shared(p.get_value() * floatX(0.), name='%s_rgrad2' % k)
                      for k, p in parameters.iteritems()]

    zg_up = [(zg, g) for zg, g in zip(zipped_grads, grads)]
    rg2_up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)]

    f_grad_shared = theano.function(inputs, cost, updates=zg_up + rg2_up, profile=False)

    updir = [-T.sqrt(ru2 + 1e-6) / T.sqrt(rg2 + 1e-6) * zg for zg, ru2, rg2 in
             zip(zipped_grads, running_up2, running_grads2)]
    ru2_up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) for ru2, ud in zip(running_up2, updir)]
    param_up = [(p, p + ud) for p, ud in zip(parameters.itervalues(), updir)]

    f_update = theano.function([learning_rate], [], updates=ru2_up + param_up, on_unused_input='ignore', profile=False)

    return f_grad_shared, f_update
コード例 #16
0
ファイル: init.py プロジェクト: Jakobularius/nntools
    def sample(self, shape):
        if self.range is None:
            # no range given, use the Glorot et al. approach
            if len(shape) != 2:
                raise RuntimeError("uniform initializer without parameters only works with shapes of length 2")

            n_inputs, n_outputs = shape
            m = np.sqrt(6.0 / (n_inputs + n_outputs))
            range = (-m, m)

        elif isinstance(self.range, Number):
            range = (-self.range, self.range)

        else:
            range = self.range

        return floatX(np.random.uniform(low=range[0], high=range[1], size=shape))
コード例 #17
0
def padded(seqs, pad_back=True, is_int=False, pad_by=None):
    if not pad_by:
        pad_by = [0]
    lens = map(len, seqs)
    max_len = max(lens)
    seqs_padded = []
    for seq, seq_len in zip(seqs, lens):
        n_pad = max_len - seq_len
        if pad_back:
            seq = seq + pad_by * n_pad
        else:
            seq = pad_by * n_pad + seq
        seqs_padded.append(seq)

    if is_int:
        return intX(seqs_padded)
    else:
        return floatX(seqs_padded)
コード例 #18
0
    def iter(self):

        if self.shuffle:
            self.seqs, self.targets = shuffle(self.seqs, self.targets)

        for x_chunk, y_chunk in iter_data(self.seqs,
                                          self.targets,
                                          size=self.size * 20):
            sort = np.argsort([len(x) for x in x_chunk])
            x_chunk = [x_chunk[idx] for idx in sort]
            y_chunk = [y_chunk[idx] for idx in sort]
            # print range(len(x_chunk))[::self.size]
            mb_chunks = [[
                x_chunk[idx:idx + self.size], y_chunk[idx:idx + self.size]
            ] for idx in range(len(x_chunk))[::self.size]]
            mb_chunks = shuffle(mb_chunks)
            for xmb, ymb in mb_chunks:
                xmb = padded(xmb)
                # print xmb.shape
                ymb = floatX(ymb)
                yield xmb, ymb
コード例 #19
0
ファイル: updates.py プロジェクト: ishalyminov/xtrack2
 def get_updates(self, params, cost):
     updates = []
     grads = T.grad(cost, params)
     grads = clip_norms(grads, self.clipnorm)
     i = theano.shared(floatX(0.))
     i_t = i + 1.
     fix1 = 1. - self.b1**(i_t)
     fix2 = 1. - self.b2**(i_t)
     lr_t = self.lr * (T.sqrt(fix2) / fix1)
     for p, g in zip(params, grads):
         m = theano.shared(p.get_value() * 0.)
         v = theano.shared(p.get_value() * 0.)
         m_t = (self.b1 * g) + ((1. - self.b1) * m)
         v_t = (self.b2 * T.sqr(g)) + ((1. - self.b2) * v)
         g_t = m_t / (T.sqrt(v_t) + self.e)
         g_t = self.regularizer.gradient_regularize(p, g_t)
         p_t = p - (lr_t * g_t)
         p_t = self.regularizer.weight_regularize(p_t)
         updates.append((m, m_t))
         updates.append((v, v_t))
         updates.append((p, p_t))
     updates.append((i, i_t))
     return updates
コード例 #20
0
ファイル: updates.py プロジェクト: ishalyminov/xtrack2
 def get_updates(self, params, cost):
     updates = []
     grads = T.grad(cost, params)
     grads = clip_norms(grads, self.clipnorm)
     i = theano.shared(floatX(0.))
     i_t = i + 1.
     fix1 = 1. - self.b1**(i_t)
     fix2 = 1. - self.b2**(i_t)
     lr_t = self.lr * (T.sqrt(fix2) / fix1)
     for p, g in zip(params, grads):
         m = theano.shared(p.get_value() * 0.)
         v = theano.shared(p.get_value() * 0.)
         m_t = (self.b1 * g) + ((1. - self.b1) * m)
         v_t = (self.b2 * T.sqr(g)) + ((1. - self.b2) * v)
         g_t = m_t / (T.sqrt(v_t) + self.e)
         g_t = self.regularizer.gradient_regularize(p, g_t)
         p_t = p - (lr_t * g_t)
         p_t = self.regularizer.weight_regularize(p_t)
         updates.append((m, m_t))
         updates.append((v, v_t))
         updates.append((p, p_t))
     updates.append((i, i_t))
     return updates
コード例 #21
0
    def __call__(self, params, cost):
        updates = []
        grads = T.grad(cost, params)
        grads = clip_norms(grads, self.clipnorm)
        t = theano.shared(floatX(1.))
        b1_t = self.b1 * self.l**(t - 1)

        for p, g in zip(params, grads):
            g = self.regularizer.gradient_regularize(p, g)
            m = theano.shared(p.get_value() * 0.)
            v = theano.shared(p.get_value() * 0.)

            m_t = b1_t * m + (1 - b1_t) * g
            v_t = self.b2 * v + (1 - self.b2) * g**2
            m_c = m_t / (1 - self.b1**t)
            v_c = v_t / (1 - self.b2**t)
            p_t = p - (self.lr * m_c) / (T.sqrt(v_c) + self.e)
            p_t = self.regularizer.weight_regularize(p_t)
            updates.append((m, m_t))
            updates.append((v, v_t))
            updates.append((p, p_t))
        updates.append((t, t + 1.))
        return updates
コード例 #22
0
ファイル: data.py プロジェクト: fyabc/TheanoProject
def load_cifar10_data(data_dir=None, one_file=None):
    """

    Args:
        data_dir: directory of the CIFAR-10 data.
        one_file: is the data in a single file?

    Returns:
        A dict, which contains train data and test data.
        Shapes of data:
            x_train:    (100000, 3, 32, 32)
            y_train:    (100000,)
            x_test:     (10000, 3, 32, 32)
            y_test:     (10000,)
    """

    def process(x):
        x = np.dstack((x[:, :1024], x[:, 1024:2048], x[:, 2048:]))
        x = x.reshape((x.shape[0], 32, 32, 3)).transpose(0, 3, 1, 2)

        # subtract per-pixel mean
        pixel_mean = np.mean(x[0:train_size], axis=0)
        # pickle.dump(pixel_mean, open("cifar10-pixel_mean.pkl","wb"))
        x -= pixel_mean

        return x

    data_dir = data_dir or C['data_dir']
    one_file = one_file or C['one_file']

    if not os.path.exists(data_dir):
        raise Exception("CIFAR-10 dataset can not be found. Please download the dataset from "
                        "'https://www.cs.toronto.edu/~kriz/cifar.html'.")

    train_size = 50000

    if one_file:
        train, test = f_open(data_dir)
        x_train, y_train = train
        x_test, y_test = test

        x_train = process(x_train)
        x_test = process(x_test)
    else:
        xs = []
        ys = []
        for j in range(5):
            d = f_open(data_dir + '/data_batch_%d' % (j + 1))
            xs.append(d['data'])
            ys.append(d['labels'])

        d = f_open(data_dir + '/test_batch')
        xs.append(d['data'])
        ys.append(d['labels'])

        x = np.concatenate(xs) / np.float32(255)
        y = np.concatenate(ys)

        x = process(x)

        x_train = x[0:train_size, :, :, :]
        y_train = y[0:train_size]

        x_test = x[train_size:, :, :, :]
        y_test = y[train_size:]

    # create mirrored images
    x_train_flip = x_train[:, :, :, ::-1]
    y_train_flip = y_train
    x_train = np.concatenate((x_train, x_train_flip), axis=0)
    y_train = np.concatenate((y_train, y_train_flip), axis=0)

    return {
        'x_train': floatX(x_train),
        'y_train': y_train.astype('int32'),
        'x_test': floatX(x_test),
        'y_test': y_test.astype('int32')
    }
コード例 #23
0
 def sample(self, shape):
     return floatX(get_rng().uniform(
         low=self.range[0], high=self.range[1], size=shape))
コード例 #24
0
 def sample(self, shape):
     return floatX(get_rng().normal(self.mean, self.std, size=shape))
コード例 #25
0
    def __init__(self, num_input, num_hidden, input_layers=None, name="lstm"):
        """
        LSTM layer

        Arguments:
            num_input: previous layer's size
            num_hidden: hidden neurons' size
            input_layers: previous layer
        """
        self.name = name
        self.num_input = num_input
        self.num_hidden = num_hidden

        if len(input_layers) >= 2:
            self.X = T.concatenate(
                [input_layer.output() for input_layer in input_layers], axis=1)
        else:
            self.X = input_layers[0].output()

        self.h0 = theano.shared(floatX(np.zeros(num_hidden)))
        self.s0 = theano.shared(floatX(np.zeros(num_hidden)))

        self.W_gx = self._random_weights((num_input, num_hidden),
                                         name=self.name + "W_gx")
        self.W_ix = self._random_weights((num_input, num_hidden),
                                         name=self.name + "W_ix")
        self.W_fx = self._random_weights((num_input, num_hidden),
                                         name=self.name + "W_fx")
        self.W_ox = self._random_weights((num_input, num_hidden),
                                         name=self.name + "W_ox")

        self.W_gh = self._random_weights((num_hidden, num_hidden),
                                         name=self.name + "W_gh")
        self.W_ih = self._random_weights((num_hidden, num_hidden),
                                         name=self.name + "W_ih")
        self.W_fh = self._random_weights((num_hidden, num_hidden),
                                         name=self.name + "W_fh")
        self.W_oh = self._random_weights((num_hidden, num_hidden),
                                         name=self.name + "W_oh")

        self.b_g = self._zeros(num_hidden, name=self.name + "b_g")
        self.b_i = self._zeros(num_hidden, name=self.name + "b_i")
        self.b_f = self._zeros(num_hidden, name=self.name + "b_f")
        self.b_o = self._zeros(num_hidden, name=self.name + "b_o")

        self.params = [
            self.W_gx,
            self.W_ix,
            self.W_ox,
            self.W_fx,
            self.W_gh,
            self.W_ih,
            self.W_oh,
            self.W_fh,
            self.b_g,
            self.b_i,
            self.b_f,
            self.b_o,
        ]

        self.output()
コード例 #26
0
 def update(self):
     current = self.get_value()
     updated = current * self.decay
     self.set_value(floatX(updated))
コード例 #27
0
 def sample(self, shape):
     return floatX(np.ones(shape) * self.val)
コード例 #28
0
 def _reset_state(self):
     self.h0 = theano.shared(floatX(np.zeros(self.num_hidden)))
     self.s0 = theano.shared(floatX(np.zeros(self.num_hidden)))
コード例 #29
0
 def __new__(self, value, **kwargs):
     variable = theano.shared(floatX(value))
     for k, v in kwargs.items():
         setattr(variable, k, v)
     variable.update = MethodType(self.update, variable)
     return variable
コード例 #30
0
 def __new__(self, value, **kwargs):
     variable = theano.shared(floatX(value))
     for k, v in kwargs.items():
         setattr(variable, k, v)
     variable.update = MethodType(self.update, variable)
     return variable
コード例 #31
0
 def _zeros(self, shape, name=""):
     return theano.shared(floatX(np.zeros(shape)), name=name)
コード例 #32
0
def run(cl_weight, con_layers, sty_layers, photopath, artpath):
    def build_and_load_model():
        def build_model(theano_input):
            net = {}
            order = [
                'input', 'conv1_1', 'conv1_2', 'pool1', 'conv2_1', 'conv2_2',
                'pool2', 'conv3_1', 'conv3_2', 'conv3_3', 'conv3_4', 'pool3',
                'conv4_1', 'conv4_2', 'conv4_3', 'conv4_4', 'pool4', 'conv5_1',
                'conv5_2', 'conv5_3', 'conv5_4'
            ]
            net['input'] = InputLayer(theano_input, (1, 3, IMAGE_W, IMAGE_W))
            net['conv1_1'] = ConvLayer(net['input'],
                                       64,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['conv1_2'] = ConvLayer(net['conv1_1'],
                                       64,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['pool1'] = PoolLayer(net['conv1_2'],
                                     poolsize=(2, 2),
                                     mode='average_exc_pad')
            net['conv2_1'] = ConvLayer(net['pool1'],
                                       128,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['conv2_2'] = ConvLayer(net['conv2_1'],
                                       128,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['pool2'] = PoolLayer(net['conv2_2'],
                                     poolsize=(2, 2),
                                     mode='average_exc_pad')
            net['conv3_1'] = ConvLayer(net['pool2'],
                                       256,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['conv3_2'] = ConvLayer(net['conv3_1'],
                                       256,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['conv3_3'] = ConvLayer(net['conv3_2'],
                                       256,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['conv3_4'] = ConvLayer(net['conv3_3'],
                                       256,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['pool3'] = PoolLayer(net['conv3_4'],
                                     poolsize=(2, 2),
                                     mode='average_exc_pad')
            net['conv4_1'] = ConvLayer(net['pool3'],
                                       512,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['conv4_2'] = ConvLayer(net['conv4_1'],
                                       512,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['conv4_3'] = ConvLayer(net['conv4_2'],
                                       512,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['conv4_4'] = ConvLayer(net['conv4_3'],
                                       512,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['pool4'] = PoolLayer(net['conv4_4'],
                                     poolsize=(2, 2),
                                     mode='average_exc_pad')
            net['conv5_1'] = ConvLayer(net['pool4'],
                                       512,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['conv5_2'] = ConvLayer(net['conv5_1'],
                                       512,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['conv5_3'] = ConvLayer(net['conv5_2'],
                                       512,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['conv5_4'] = ConvLayer(net['conv5_3'],
                                       512,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['pool5'] = PoolLayer(net['conv5_4'],
                                     poolsize=(2, 2),
                                     mode='average_exc_pad')
            return net, order

        # build it
        net, order = build_model(T.tensor4())
        # load it
        values = pickle.load(open('./data/vgg19_normalized.pkl',
                                  'rb'))['param values']
        set_all_param_values(net, values, order)
        return net

    net = build_and_load_model()

    # select the layer to use
    layers = con_layers + sty_layers
    layers = {k: net[k] for k in layers}

    ###############################################################
    # get the images
    ###############################################################
    imageHelper = ImageHelper(IMAGE_W=600)
    photo, art = imageHelper.prep_photo_and_art(photo_path=photopath,
                                                art_path=artpath)

    input_im_theano = T.tensor4()
    # compute layer activations for photo and artwork
    outputs = get_outputs(layers, {net['input']: input_im_theano})
    # these features are constant which is the reference for loss
    photo_features = {
        k: theano.shared(output.eval({input_im_theano: photo}))
        for k, output in zip(layers.keys(), outputs)
    }
    art_features = {
        k: theano.shared(output.eval({input_im_theano: art}))
        for k, output in zip(layers.keys(), outputs)
    }

    ###############################################################
    # calculate loss and grads
    ###############################################################
    # Get expressions for layer activations for generated image
    generated_image = theano.shared(
        floatX(np.random.uniform(-128, 128, (1, 3, IMAGE_W, IMAGE_W))))
    gen_features = get_outputs(layers, {net['input']: generated_image})
    gen_features = {k: v for k, v in zip(layers.keys(), gen_features)}

    def gram_matrix(x):
        x = x.flatten(ndim=3)
        g = T.tensordot(x, x, axes=([2], [2]))
        return g

    def content_loss(P, X, layer):
        p = P[layer]
        x = X[layer]

        loss = 1. / 2 * ((x - p)**2).sum()
        return loss

    def style_loss(A, X, layer):
        a = A[layer]
        x = X[layer]

        A = gram_matrix(a)
        G = gram_matrix(x)

        N = a.shape[1]
        M = a.shape[2] * a.shape[3]

        loss = 1. / (4 * N**2 * M**2) * ((G - A)**2).sum()
        return loss

    def total_variation_loss(x):
        return (((x[:, :, :-1, :-1] - x[:, :, 1:, :-1])**2 +
                 (x[:, :, :-1, :-1] - x[:, :, :-1, 1:])**2)**1.25).sum()

    # Define loss function
    losses = []

    # content loss
    losses.append(cl_weight *
                  content_loss(photo_features, gen_features, con_layers[0]))

    # style loss
    for style_layer in sty_layers:
        losses.append(0.2e6 *
                      style_loss(art_features, gen_features, style_layer))

    # total variation penalty
    losses.append(0.1e-7 * total_variation_loss(generated_image))

    total_loss = sum(losses)

    grad = T.grad(total_loss, generated_image)
    # Theano functions to evaluate loss and gradient
    f_loss = theano.function([], total_loss)
    f_grad = theano.function([], grad)

    ###############################################################
    # start to optimize
    ###############################################################
    # Helper functions to interface with scipy.optimize
    def eval_loss(x0):
        x0 = floatX(x0.reshape((1, 3, IMAGE_W, IMAGE_W)))
        generated_image.set_value(x0)
        return f_loss().astype('float64')

    def eval_grad(x0):
        x0 = floatX(x0.reshape((1, 3, IMAGE_W, IMAGE_W)))
        generated_image.set_value(x0)
        return np.array(f_grad()).flatten().astype('float64')

    x0 = generated_image.get_value().astype('float64')
    xs = []
    xs.append(x0)

    # Optimize, saving the result periodically
    for i in range(8):
        scipy.optimize.fmin_l_bfgs_b(eval_loss,
                                     x0.flatten(),
                                     fprime=eval_grad,
                                     maxfun=40)
        x0 = generated_image.get_value().astype('float64')
        xs.append(floatX(x0.reshape((1, 3, IMAGE_W, IMAGE_W))))

    return imageHelper.deprocess(xs[-1])
コード例 #33
0
updates = d_updates + g_updates

print('Compiling')
t = time()
_train_g = theano.function([X, Z, Y], cost, updates=g_updates)
_train_d = theano.function([X, Z, Y], cost, updates=d_updates)
_gen = theano.function([Z, Y], gX)
print('%.2f seconds to compile theano functions' % (time() - t))

tr_idxs = np.arange(len(trX))
trX_vis = np.asarray([[trX[i] for i in py_rng.sample(tr_idxs[trY == y], 20)]
                      for y in range(10)]).reshape(200, -1)
trX_vis = trX_vis.reshape(-1, npx, npx)
grayscale_grid_vis(trX_vis, (10, 20), 'samples/cond_dcgan_etl_test.png')

sample_zmb = floatX(np_rng.uniform(-1., 1., size=(200, nz)))
sample_ymb = floatX(
    OneHot(
        np.asarray([[i for _ in range(20)] for i in range(10)]).flatten(), ny))


def gen_samples(n, nbatch=128):
    samples = []
    labels = []
    n_gen = 0
    for i in range(n / nbatch):
        ymb = floatX(OneHot(np_rng.randint(0, 10, nbatch), ny))
        zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz)))
        xmb = _gen(zmb, ymb)
        samples.append(xmb)
        labels.append(np.argmax(ymb, axis=1))
コード例 #34
0
 def update(self):
     current = self.get_value()
     updated = current * self.decay
     self.set_value(floatX(updated))
コード例 #35
0
 def _random_weights(self, shape, name=None):
     # return theano.shared(floatX(np.random.randn(*shape) * 0.01), name=name)
     return theano.shared(floatX(
         np.random.uniform(size=shape, low=-1, high=1)),
                          name=name)
コード例 #36
0
 def eval_loss(x0):
     x0 = floatX(x0.reshape((1, 3, IMAGE_W, IMAGE_W)))
     generated_image.set_value(x0)
     return f_loss().astype('float64')
コード例 #37
0
def runSGD(cl_weight, con_layers, sty_layers, photopath, artpath):
    def build_and_load_model():
        def build_model(theano_input):
            net = {}
            order = [
                'input', 'conv1_1', 'conv1_2', 'pool1', 'conv2_1', 'conv2_2',
                'pool2', 'conv3_1', 'conv3_2', 'conv3_3', 'conv3_4', 'pool3',
                'conv4_1', 'conv4_2', 'conv4_3', 'conv4_4', 'pool4', 'conv5_1',
                'conv5_2', 'conv5_3', 'conv5_4'
            ]
            net['input'] = InputLayer(theano_input, (1, 3, IMAGE_W, IMAGE_W))
            net['conv1_1'] = ConvLayer(net['input'],
                                       64,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['conv1_2'] = ConvLayer(net['conv1_1'],
                                       64,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['pool1'] = PoolLayer(net['conv1_2'],
                                     poolsize=(2, 2),
                                     mode='average_exc_pad')
            net['conv2_1'] = ConvLayer(net['pool1'],
                                       128,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['conv2_2'] = ConvLayer(net['conv2_1'],
                                       128,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['pool2'] = PoolLayer(net['conv2_2'],
                                     poolsize=(2, 2),
                                     mode='average_exc_pad')
            net['conv3_1'] = ConvLayer(net['pool2'],
                                       256,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['conv3_2'] = ConvLayer(net['conv3_1'],
                                       256,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['conv3_3'] = ConvLayer(net['conv3_2'],
                                       256,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['conv3_4'] = ConvLayer(net['conv3_3'],
                                       256,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['pool3'] = PoolLayer(net['conv3_4'],
                                     poolsize=(2, 2),
                                     mode='average_exc_pad')
            net['conv4_1'] = ConvLayer(net['pool3'],
                                       512,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['conv4_2'] = ConvLayer(net['conv4_1'],
                                       512,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['conv4_3'] = ConvLayer(net['conv4_2'],
                                       512,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['conv4_4'] = ConvLayer(net['conv4_3'],
                                       512,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['pool4'] = PoolLayer(net['conv4_4'],
                                     poolsize=(2, 2),
                                     mode='average_exc_pad')
            net['conv5_1'] = ConvLayer(net['pool4'],
                                       512,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['conv5_2'] = ConvLayer(net['conv5_1'],
                                       512,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['conv5_3'] = ConvLayer(net['conv5_2'],
                                       512,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['conv5_4'] = ConvLayer(net['conv5_3'],
                                       512,
                                       3,
                                       rng,
                                       flip_filters=False)
            net['pool5'] = PoolLayer(net['conv5_4'],
                                     poolsize=(2, 2),
                                     mode='average_exc_pad')
            return net, order

        # build it
        net, order = build_model(T.tensor4())
        # load it
        values = pickle.load(open('./data/vgg19_normalized.pkl',
                                  'rb'))['param values']
        set_all_param_values(net, values, order)
        return net

    net = build_and_load_model()

    layers = con_layers + sty_layers
    layers = {k: net[k] for k in layers}
    # select the layer to use
    # layers = ['conv4_2', 'conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']
    # layers = {k: net[k] for k in layers}

    ###############################################################
    # get the images
    ###############################################################
    imageHelper = ImageHelper(IMAGE_W=600)
    photo, art = imageHelper.prep_photo_and_art(photo_path=photopath,
                                                art_path=artpath)

    input_im_theano = T.tensor4()
    # compute layer activations for photo and artwork
    outputs = get_outputs(layers, {net['input']: input_im_theano})
    # these features are constant which is the reference for loss
    photo_features = {
        k: theano.shared(output.eval({input_im_theano: photo}))
        for k, output in zip(layers.keys(), outputs)
    }
    art_features = {
        k: theano.shared(output.eval({input_im_theano: art}))
        for k, output in zip(layers.keys(), outputs)
    }

    ###############################################################
    # calculate loss and grads
    ###############################################################
    # Get expressions for layer activations for generated image
    generated_image = theano.shared(
        floatX(np.random.uniform(-128, 128, (1, 3, IMAGE_W, IMAGE_W))))
    gen_features = get_outputs(layers, {net['input']: generated_image})
    gen_features = {k: v for k, v in zip(layers.keys(), gen_features)}

    def gram_matrix(x):
        x = x.flatten(ndim=3)
        g = T.tensordot(x, x, axes=([2], [2]))
        return g

    def content_loss(P, X, layer):
        p = P[layer]
        x = X[layer]

        loss = 1. / 2 * ((x - p)**2).sum()
        return loss

    def style_loss(A, X, layer):
        a = A[layer]
        x = X[layer]

        A = gram_matrix(a)
        G = gram_matrix(x)

        N = a.shape[1]
        M = a.shape[2] * a.shape[3]

        loss = 1. / (4 * N**2 * M**2) * ((G - A)**2).sum()
        return loss

    def total_variation_loss(x):
        return (((x[:, :, :-1, :-1] - x[:, :, 1:, :-1])**2 +
                 (x[:, :, :-1, :-1] - x[:, :, :-1, 1:])**2)**1.25).sum()

    # Define loss function
    losses = []

    # content loss
    losses.append(cl_weight *
                  content_loss(photo_features, gen_features, 'conv4_2'))

    # style loss
    for style_layer in sty_layers:
        losses.append(0.2e6 *
                      style_loss(art_features, gen_features, style_layer))

    # total variation penalty
    losses.append(0.1e-7 * total_variation_loss(generated_image))

    total_loss = sum(losses)

    grad = T.grad(total_loss, generated_image)

    ###############################################################
    # start to optimize
    ###############################################################

    def RMSprop(cost, params, lr=0.8, rho=0.95, epsilon=1e-6):
        grads = T.grad(cost=cost, wrt=params)
        updates = []
        for p, g in zip(params, grads):
            acc = theano.shared(p.get_value() * 0.)
            acc_new = rho * acc + (1 - rho) * g**2
            gradient_scaling = T.sqrt(acc_new + epsilon)
            g = g / gradient_scaling
            updates.append((acc, acc_new))
            updates.append((p, p - lr * g))
        return updates

    updates = RMSprop(cost=total_loss, params=[generated_image])

    train_model = theano.function([], total_loss, updates=updates)

    for i in range(3000):
        print train_model()

    ###############################################################
    # display result
    ###############################################################

    xout = generated_image.get_value().astype('float64')
    plt.figure(figsize=(4, 4))
    plt.gca().xaxis.set_visible(False)
    plt.gca().yaxis.set_visible(False)
    plt.imshow(imageHelper.deprocess(xout))
    plt.show()
    return imageHelper.deprocess(xout)
コード例 #38
0
 def eval_grad(x0):
     x0 = floatX(x0.reshape((1, 3, IMAGE_W, IMAGE_W)))
     generated_image.set_value(x0)
     return np.array(f_grad()).flatten().astype('float64')
コード例 #39
0
ファイル: init.py プロジェクト: Jakobularius/nntools
 def sample(self, shape):
     return floatX(np.random.normal(self.avg, self.std, size=shape))