def adam(learning_rate, parameters, grads, inputs, cost): g_shared = [theano.shared(p.get_value() * floatX(0.), name='%s_grad' % k) for k, p in parameters.iteritems()] gs_up = [(gs, g) for gs, g in zip(g_shared, grads)] f_grad_shared = theano.function(inputs, cost, updates=gs_up) lr0 = 0.0002 b1 = 0.1 b2 = 0.001 e = 1e-8 updates = [] i = theano.shared(floatX(0.)) i_t = i + 1. fix1 = 1. - b1 ** i_t fix2 = 1. - b2 ** i_t lr_t = lr0 * (T.sqrt(fix2) / fix1) for p, g in zip(parameters.values(), g_shared): m = theano.shared(p.get_value() * floatX(0.)) v = theano.shared(p.get_value() * floatX(0.)) m_t = (b1 * g) + ((1. - b1) * m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((i, i_t)) f_update = theano.function([learning_rate], [], updates=updates, on_unused_input='ignore') return f_grad_shared, f_update
def create_param(param, shape): """ Helper method to create Theano shared variables for Layer parameters and to initialize them. param: one of three things: - a numpy array with the initial parameter values - a Theano shared variable - a function or callable that takes the desired shape of the parameter array as its single argument. shape: the desired shape of the parameter array. """ if isinstance(param, np.ndarray): if param.shape != shape: raise RuntimeError("parameter array has shape %s, should be %s" % (param.shape, shape)) return theano.shared(param) elif isinstance(param, theano.compile.SharedVariable): # cannot check shape here, the shared variable might not be initialized correctly yet. return param elif hasattr(param, '__call__'): arr = param(shape) if not isinstance(arr, np.ndarray): raise RuntimeError("cannot initialize parameters: the provided callable did not return a numpy array") return theano.shared(utils.floatX(arr)) else: raise RuntimeError("cannot initialize parameters: 'param' is not a numpy array, a Theano shared variable, or a callable")
def prep_image(self, im): if len(im.shape) == 2: im = im[:, :, np.newaxis] im = np.repeat(im, 3, axis=2) h, w, _ = im.shape if h < w: im = skimage.transform.resize(im, (self.IMAGE_W, w * self.IMAGE_W / h), preserve_range=True) else: im = skimage.transform.resize(im, (h * self.IMAGE_W / w, self.IMAGE_W), preserve_range=True) # Central crop h, w, _ = im.shape im = im[h // 2 - self.IMAGE_W // 2:h // 2 + self.IMAGE_W // 2, w // 2 - self.IMAGE_W // 2:w // 2 + self.IMAGE_W // 2] rawim = np.copy(im).astype('uint8') # Shuffle axes to c01 im = np.swapaxes(np.swapaxes(im, 1, 2), 0, 1) # Convert RGB to BGR im = im[::-1, :, :] im = im - self.MEAN_VALUES return rawim, floatX(im[np.newaxis])
def adam(loss_or_grads, params, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8): """Adam updates Adam updates implemented as in [1]_. Parameters ---------- loss_or_grads : symbolic expression or list of expressions A scalar loss expression, or a list of gradient expressions params : list of shared variables The variables to generate update expressions for learning_rate : float Learning rate beta1 : float Exponential decay rate for the first moment estimates. beta2 : float Exponential decay rate for the second moment estimates. epsilon : float Constant for numerical stability. Returns ------- OrderedDict A dictionary mapping each parameter to its update expression Notes ----- The paper [1]_ includes an additional hyperparameter lambda. This is only needed to prove convergence of the algorithm and has no practical use (personal communication with the authors), it is therefore omitted here. References ---------- .. [1] Kingma, Diederik, and Jimmy Ba (2014): Adam: A Method for Stochastic Optimization. arXiv preprint arXiv:1412.6980. """ all_grads = get_or_compute_grads(loss_or_grads, params) t_prev = theano.shared(utils.floatX(0.)) updates = OrderedDict() # Using theano constant to prevent upcasting of float32 one = T.constant(1) t = t_prev + 1 a_t = learning_rate*T.sqrt(one-beta2**t)/(one-beta1**t) for param, g_t in zip(params, all_grads): value = param.get_value(borrow=True) m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) v_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) m_t = beta1*m_prev + (one-beta1)*g_t v_t = beta2*v_prev + (one-beta2)*g_t**2 step = a_t*m_t/(T.sqrt(v_t) + epsilon) updates[m_prev] = m_t updates[v_prev] = v_t updates[param] = param - step updates[t_prev] = t return updates
def sample(self, shape): if len(shape) != 2: raise RuntimeError("sparse initializer only works with shapes of length 2") w = floatX(np.zeros(shape)) n_inputs, n_outputs = shape size = int(self.sparsity * n_inputs) # fraction of the number of inputs for k in xrange(n_outputs): indices = np.arange(n_inputs) np.random.shuffle(indices) indices = indices[:size] values = floatX(np.random.normal(0.0, self.std, size=size)) w[indices, k] = values return w
def adamax(loss_or_grads, params, learning_rate=0.002, beta1=0.9, beta2=0.999, epsilon=1e-8): """Adamax updates Adamax updates implemented as in [1]_. This is a variant of of the Adam algorithm based on the infinity norm. Parameters ---------- loss_or_grads : symbolic expression or list of expressions A scalar loss expression, or a list of gradient expressions params : list of shared variables The variables to generate update expressions for learning_rate : float Learning rate beta1 : float Exponential decay rate for the first moment estimates. beta2 : float Exponential decay rate for the weighted infinity norm estimates. epsilon : float Constant for numerical stability. Returns ------- OrderedDict A dictionary mapping each parameter to its update expression References ---------- .. [1] Kingma, Diederik, and Jimmy Ba (2014): Adam: A Method for Stochastic Optimization. arXiv preprint arXiv:1412.6980. """ all_grads = get_or_compute_grads(loss_or_grads, params) t_prev = theano.shared(utils.floatX(0.)) updates = OrderedDict() # Using theano constant to prevent upcasting of float32 one = T.constant(1) t = t_prev + 1 a_t = learning_rate / (one - beta1**t) for param, g_t in zip(params, all_grads): value = param.get_value(borrow=True) m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) u_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) m_t = beta1 * m_prev + (one - beta1) * g_t u_t = T.maximum(beta2 * u_prev, abs(g_t)) step = a_t * m_t / (u_t + epsilon) updates[m_prev] = m_t updates[u_prev] = u_t updates[param] = param - step updates[t_prev] = t return updates
def sample(self, shape): if len(shape) != 2: raise RuntimeError( "sparse initializer only works with shapes of length 2") w = floatX(np.zeros(shape)) n_inputs, n_outputs = shape size = int(self.sparsity * n_inputs) # fraction of number of inputs for k in range(n_outputs): indices = np.arange(n_inputs) get_rng().shuffle(indices) indices = indices[:size] values = floatX(get_rng().normal(0.0, self.std, size=size)) w[indices, k] = values return w
def get_output_for(self, input, deterministic=False, *args, **kwargs): if deterministic or self.p == 0: return input else: retain_prob = 1 - self.p if self.rescale: input /= retain_prob return input * utils.floatX(_srng.binomial(input.shape, p=retain_prob, dtype='int32'))
def gen_samples(n, nbatch=128): samples = [] labels = [] n_gen = 0 for i in range(n / nbatch): ymb = floatX(OneHot(np_rng.randint(0, 10, nbatch), ny)) zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz))) xmb = _gen(zmb, ymb) samples.append(xmb) labels.append(np.argmax(ymb, axis=1)) n_gen += len(xmb) n_left = n - n_gen ymb = floatX(OneHot(np_rng.randint(0, 10, n_left), ny)) zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz))) xmb = _gen(zmb, ymb) samples.append(xmb) labels.append(np.argmax(ymb, axis=1)) return np.concatenate(samples, axis=0), np.concatenate(labels, axis=0)
def adamax(loss_or_grads, params, learning_rate=0.002, beta1=0.9, beta2=0.999, epsilon=1e-8): """Adamax updates Adamax updates implemented as in [1]_. This is a variant of of the Adam algorithm based on the infinity norm. Parameters ---------- loss_or_grads : symbolic expression or list of expressions A scalar loss expression, or a list of gradient expressions params : list of shared variables The variables to generate update expressions for learning_rate : float Learning rate beta1 : float Exponential decay rate for the first moment estimates. beta2 : float Exponential decay rate for the weighted infinity norm estimates. epsilon : float Constant for numerical stability. Returns ------- OrderedDict A dictionary mapping each parameter to its update expression References ---------- .. [1] Kingma, Diederik, and Jimmy Ba (2014): Adam: A Method for Stochastic Optimization. arXiv preprint arXiv:1412.6980. """ all_grads = get_or_compute_grads(loss_or_grads, params) t_prev = theano.shared(utils.floatX(0.)) updates = OrderedDict() # Using theano constant to prevent upcasting of float32 one = T.constant(1) t = t_prev + 1 a_t = learning_rate/(one-beta1**t) for param, g_t in zip(params, all_grads): value = param.get_value(borrow=True) m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) u_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) m_t = beta1*m_prev + (one-beta1)*g_t u_t = T.maximum(beta2*u_prev, abs(g_t)) step = a_t*m_t/(u_t + epsilon) updates[m_prev] = m_t updates[u_prev] = u_t updates[param] = param - step updates[t_prev] = t return updates
def sgd(learning_rate, parameters, grads, inputs, cost): g_shared = [theano.shared(p.get_value() * floatX(0.), name='%s_grad' % k) for k, p in parameters.iteritems()] gs_up = [(gs, g) for gs, g in zip(g_shared, grads)] f_grad_shared = theano.function(inputs, cost, updates=gs_up, profile=False) p_up = [(p, p - learning_rate * g) for p, g in zip(parameters.itervalues(), g_shared)] f_update = theano.function([learning_rate], [], updates=p_up, profile=False) return f_grad_shared, f_update
def iter(self): if self.shuffle: self.seqs, self.targets = shuffle(self.seqs, self.targets) for i in range(0, len(self.seqs), self.size): xmb, ymb = self.seqs[i:i+self.size], self.targets[i:i+self.size] xmb = padded(xmb) ymb = floatX(ymb) yield xmb, ymb
def rmsprop(learning_rate, parameters, grads, inputs, cost): zipped_grads = [theano.shared(p.get_value() * floatX(0.), name='%s_grad' % k) for k, p in parameters.iteritems()] running_grads = [theano.shared(p.get_value() * floatX(0.), name='%s_rgrad' % k) for k, p in parameters.iteritems()] running_grads2 = [theano.shared(p.get_value() * floatX(0.), name='%s_rgrad2' % k) for k, p in parameters.iteritems()] zg_up = [(zg, g) for zg, g in zip(zipped_grads, grads)] rg_up = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)] rg2_up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)] f_grad_shared = theano.function(inputs, cost, updates=zg_up + rg_up + rg2_up, profile=False) updir = [theano.shared(p.get_value() * floatX(0.), name='%s_updir' % k) for k, p in parameters.iteritems()] updir_new = [(ud, 0.9 * ud - 1e-4 * zg / T.sqrt(rg2 - rg ** 2 + 1e-4)) for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads, running_grads2)] param_up = [(p, p + udn[1]) for p, udn in zip(parameters.itervalues(), updir_new)] f_update = theano.function([learning_rate], [], updates=updir_new + param_up, on_unused_input='ignore', profile=False) return f_grad_shared, f_update
def sample(self, shape): if len(shape) < 2: raise RuntimeError("Only shapes of length 2 or more are " "supported.") flat_shape = (shape[0], np.prod(shape[1:])) a = get_rng().normal(0.0, 1.0, flat_shape) u, _, v = np.linalg.svd(a, full_matrices=False) # pick the one with the correct shape q = u if u.shape == flat_shape else v q = q.reshape(shape) return floatX(self.gain * q)
def adadelta(learning_rate, parameters, grads, inputs, cost): zipped_grads = [theano.shared(p.get_value() * floatX(0.), name='%s_grad' % k) for k, p in parameters.iteritems()] running_up2 = [theano.shared(p.get_value() * floatX(0.), name='%s_rup2' % k) for k, p in parameters.iteritems()] running_grads2 = [theano.shared(p.get_value() * floatX(0.), name='%s_rgrad2' % k) for k, p in parameters.iteritems()] zg_up = [(zg, g) for zg, g in zip(zipped_grads, grads)] rg2_up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)] f_grad_shared = theano.function(inputs, cost, updates=zg_up + rg2_up, profile=False) updir = [-T.sqrt(ru2 + 1e-6) / T.sqrt(rg2 + 1e-6) * zg for zg, ru2, rg2 in zip(zipped_grads, running_up2, running_grads2)] ru2_up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) for ru2, ud in zip(running_up2, updir)] param_up = [(p, p + ud) for p, ud in zip(parameters.itervalues(), updir)] f_update = theano.function([learning_rate], [], updates=ru2_up + param_up, on_unused_input='ignore', profile=False) return f_grad_shared, f_update
def sample(self, shape): if self.range is None: # no range given, use the Glorot et al. approach if len(shape) != 2: raise RuntimeError("uniform initializer without parameters only works with shapes of length 2") n_inputs, n_outputs = shape m = np.sqrt(6.0 / (n_inputs + n_outputs)) range = (-m, m) elif isinstance(self.range, Number): range = (-self.range, self.range) else: range = self.range return floatX(np.random.uniform(low=range[0], high=range[1], size=shape))
def padded(seqs, pad_back=True, is_int=False, pad_by=None): if not pad_by: pad_by = [0] lens = map(len, seqs) max_len = max(lens) seqs_padded = [] for seq, seq_len in zip(seqs, lens): n_pad = max_len - seq_len if pad_back: seq = seq + pad_by * n_pad else: seq = pad_by * n_pad + seq seqs_padded.append(seq) if is_int: return intX(seqs_padded) else: return floatX(seqs_padded)
def iter(self): if self.shuffle: self.seqs, self.targets = shuffle(self.seqs, self.targets) for x_chunk, y_chunk in iter_data(self.seqs, self.targets, size=self.size * 20): sort = np.argsort([len(x) for x in x_chunk]) x_chunk = [x_chunk[idx] for idx in sort] y_chunk = [y_chunk[idx] for idx in sort] # print range(len(x_chunk))[::self.size] mb_chunks = [[ x_chunk[idx:idx + self.size], y_chunk[idx:idx + self.size] ] for idx in range(len(x_chunk))[::self.size]] mb_chunks = shuffle(mb_chunks) for xmb, ymb in mb_chunks: xmb = padded(xmb) # print xmb.shape ymb = floatX(ymb) yield xmb, ymb
def get_updates(self, params, cost): updates = [] grads = T.grad(cost, params) grads = clip_norms(grads, self.clipnorm) i = theano.shared(floatX(0.)) i_t = i + 1. fix1 = 1. - self.b1**(i_t) fix2 = 1. - self.b2**(i_t) lr_t = self.lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): m = theano.shared(p.get_value() * 0.) v = theano.shared(p.get_value() * 0.) m_t = (self.b1 * g) + ((1. - self.b1) * m) v_t = (self.b2 * T.sqr(g)) + ((1. - self.b2) * v) g_t = m_t / (T.sqrt(v_t) + self.e) g_t = self.regularizer.gradient_regularize(p, g_t) p_t = p - (lr_t * g_t) p_t = self.regularizer.weight_regularize(p_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((i, i_t)) return updates
def __call__(self, params, cost): updates = [] grads = T.grad(cost, params) grads = clip_norms(grads, self.clipnorm) t = theano.shared(floatX(1.)) b1_t = self.b1 * self.l**(t - 1) for p, g in zip(params, grads): g = self.regularizer.gradient_regularize(p, g) m = theano.shared(p.get_value() * 0.) v = theano.shared(p.get_value() * 0.) m_t = b1_t * m + (1 - b1_t) * g v_t = self.b2 * v + (1 - self.b2) * g**2 m_c = m_t / (1 - self.b1**t) v_c = v_t / (1 - self.b2**t) p_t = p - (self.lr * m_c) / (T.sqrt(v_c) + self.e) p_t = self.regularizer.weight_regularize(p_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((t, t + 1.)) return updates
def load_cifar10_data(data_dir=None, one_file=None): """ Args: data_dir: directory of the CIFAR-10 data. one_file: is the data in a single file? Returns: A dict, which contains train data and test data. Shapes of data: x_train: (100000, 3, 32, 32) y_train: (100000,) x_test: (10000, 3, 32, 32) y_test: (10000,) """ def process(x): x = np.dstack((x[:, :1024], x[:, 1024:2048], x[:, 2048:])) x = x.reshape((x.shape[0], 32, 32, 3)).transpose(0, 3, 1, 2) # subtract per-pixel mean pixel_mean = np.mean(x[0:train_size], axis=0) # pickle.dump(pixel_mean, open("cifar10-pixel_mean.pkl","wb")) x -= pixel_mean return x data_dir = data_dir or C['data_dir'] one_file = one_file or C['one_file'] if not os.path.exists(data_dir): raise Exception("CIFAR-10 dataset can not be found. Please download the dataset from " "'https://www.cs.toronto.edu/~kriz/cifar.html'.") train_size = 50000 if one_file: train, test = f_open(data_dir) x_train, y_train = train x_test, y_test = test x_train = process(x_train) x_test = process(x_test) else: xs = [] ys = [] for j in range(5): d = f_open(data_dir + '/data_batch_%d' % (j + 1)) xs.append(d['data']) ys.append(d['labels']) d = f_open(data_dir + '/test_batch') xs.append(d['data']) ys.append(d['labels']) x = np.concatenate(xs) / np.float32(255) y = np.concatenate(ys) x = process(x) x_train = x[0:train_size, :, :, :] y_train = y[0:train_size] x_test = x[train_size:, :, :, :] y_test = y[train_size:] # create mirrored images x_train_flip = x_train[:, :, :, ::-1] y_train_flip = y_train x_train = np.concatenate((x_train, x_train_flip), axis=0) y_train = np.concatenate((y_train, y_train_flip), axis=0) return { 'x_train': floatX(x_train), 'y_train': y_train.astype('int32'), 'x_test': floatX(x_test), 'y_test': y_test.astype('int32') }
def sample(self, shape): return floatX(get_rng().uniform( low=self.range[0], high=self.range[1], size=shape))
def sample(self, shape): return floatX(get_rng().normal(self.mean, self.std, size=shape))
def __init__(self, num_input, num_hidden, input_layers=None, name="lstm"): """ LSTM layer Arguments: num_input: previous layer's size num_hidden: hidden neurons' size input_layers: previous layer """ self.name = name self.num_input = num_input self.num_hidden = num_hidden if len(input_layers) >= 2: self.X = T.concatenate( [input_layer.output() for input_layer in input_layers], axis=1) else: self.X = input_layers[0].output() self.h0 = theano.shared(floatX(np.zeros(num_hidden))) self.s0 = theano.shared(floatX(np.zeros(num_hidden))) self.W_gx = self._random_weights((num_input, num_hidden), name=self.name + "W_gx") self.W_ix = self._random_weights((num_input, num_hidden), name=self.name + "W_ix") self.W_fx = self._random_weights((num_input, num_hidden), name=self.name + "W_fx") self.W_ox = self._random_weights((num_input, num_hidden), name=self.name + "W_ox") self.W_gh = self._random_weights((num_hidden, num_hidden), name=self.name + "W_gh") self.W_ih = self._random_weights((num_hidden, num_hidden), name=self.name + "W_ih") self.W_fh = self._random_weights((num_hidden, num_hidden), name=self.name + "W_fh") self.W_oh = self._random_weights((num_hidden, num_hidden), name=self.name + "W_oh") self.b_g = self._zeros(num_hidden, name=self.name + "b_g") self.b_i = self._zeros(num_hidden, name=self.name + "b_i") self.b_f = self._zeros(num_hidden, name=self.name + "b_f") self.b_o = self._zeros(num_hidden, name=self.name + "b_o") self.params = [ self.W_gx, self.W_ix, self.W_ox, self.W_fx, self.W_gh, self.W_ih, self.W_oh, self.W_fh, self.b_g, self.b_i, self.b_f, self.b_o, ] self.output()
def update(self): current = self.get_value() updated = current * self.decay self.set_value(floatX(updated))
def sample(self, shape): return floatX(np.ones(shape) * self.val)
def _reset_state(self): self.h0 = theano.shared(floatX(np.zeros(self.num_hidden))) self.s0 = theano.shared(floatX(np.zeros(self.num_hidden)))
def __new__(self, value, **kwargs): variable = theano.shared(floatX(value)) for k, v in kwargs.items(): setattr(variable, k, v) variable.update = MethodType(self.update, variable) return variable
def _zeros(self, shape, name=""): return theano.shared(floatX(np.zeros(shape)), name=name)
def run(cl_weight, con_layers, sty_layers, photopath, artpath): def build_and_load_model(): def build_model(theano_input): net = {} order = [ 'input', 'conv1_1', 'conv1_2', 'pool1', 'conv2_1', 'conv2_2', 'pool2', 'conv3_1', 'conv3_2', 'conv3_3', 'conv3_4', 'pool3', 'conv4_1', 'conv4_2', 'conv4_3', 'conv4_4', 'pool4', 'conv5_1', 'conv5_2', 'conv5_3', 'conv5_4' ] net['input'] = InputLayer(theano_input, (1, 3, IMAGE_W, IMAGE_W)) net['conv1_1'] = ConvLayer(net['input'], 64, 3, rng, flip_filters=False) net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, rng, flip_filters=False) net['pool1'] = PoolLayer(net['conv1_2'], poolsize=(2, 2), mode='average_exc_pad') net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, rng, flip_filters=False) net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, rng, flip_filters=False) net['pool2'] = PoolLayer(net['conv2_2'], poolsize=(2, 2), mode='average_exc_pad') net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, rng, flip_filters=False) net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, rng, flip_filters=False) net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, rng, flip_filters=False) net['conv3_4'] = ConvLayer(net['conv3_3'], 256, 3, rng, flip_filters=False) net['pool3'] = PoolLayer(net['conv3_4'], poolsize=(2, 2), mode='average_exc_pad') net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, rng, flip_filters=False) net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, rng, flip_filters=False) net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, rng, flip_filters=False) net['conv4_4'] = ConvLayer(net['conv4_3'], 512, 3, rng, flip_filters=False) net['pool4'] = PoolLayer(net['conv4_4'], poolsize=(2, 2), mode='average_exc_pad') net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, rng, flip_filters=False) net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, rng, flip_filters=False) net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, rng, flip_filters=False) net['conv5_4'] = ConvLayer(net['conv5_3'], 512, 3, rng, flip_filters=False) net['pool5'] = PoolLayer(net['conv5_4'], poolsize=(2, 2), mode='average_exc_pad') return net, order # build it net, order = build_model(T.tensor4()) # load it values = pickle.load(open('./data/vgg19_normalized.pkl', 'rb'))['param values'] set_all_param_values(net, values, order) return net net = build_and_load_model() # select the layer to use layers = con_layers + sty_layers layers = {k: net[k] for k in layers} ############################################################### # get the images ############################################################### imageHelper = ImageHelper(IMAGE_W=600) photo, art = imageHelper.prep_photo_and_art(photo_path=photopath, art_path=artpath) input_im_theano = T.tensor4() # compute layer activations for photo and artwork outputs = get_outputs(layers, {net['input']: input_im_theano}) # these features are constant which is the reference for loss photo_features = { k: theano.shared(output.eval({input_im_theano: photo})) for k, output in zip(layers.keys(), outputs) } art_features = { k: theano.shared(output.eval({input_im_theano: art})) for k, output in zip(layers.keys(), outputs) } ############################################################### # calculate loss and grads ############################################################### # Get expressions for layer activations for generated image generated_image = theano.shared( floatX(np.random.uniform(-128, 128, (1, 3, IMAGE_W, IMAGE_W)))) gen_features = get_outputs(layers, {net['input']: generated_image}) gen_features = {k: v for k, v in zip(layers.keys(), gen_features)} def gram_matrix(x): x = x.flatten(ndim=3) g = T.tensordot(x, x, axes=([2], [2])) return g def content_loss(P, X, layer): p = P[layer] x = X[layer] loss = 1. / 2 * ((x - p)**2).sum() return loss def style_loss(A, X, layer): a = A[layer] x = X[layer] A = gram_matrix(a) G = gram_matrix(x) N = a.shape[1] M = a.shape[2] * a.shape[3] loss = 1. / (4 * N**2 * M**2) * ((G - A)**2).sum() return loss def total_variation_loss(x): return (((x[:, :, :-1, :-1] - x[:, :, 1:, :-1])**2 + (x[:, :, :-1, :-1] - x[:, :, :-1, 1:])**2)**1.25).sum() # Define loss function losses = [] # content loss losses.append(cl_weight * content_loss(photo_features, gen_features, con_layers[0])) # style loss for style_layer in sty_layers: losses.append(0.2e6 * style_loss(art_features, gen_features, style_layer)) # total variation penalty losses.append(0.1e-7 * total_variation_loss(generated_image)) total_loss = sum(losses) grad = T.grad(total_loss, generated_image) # Theano functions to evaluate loss and gradient f_loss = theano.function([], total_loss) f_grad = theano.function([], grad) ############################################################### # start to optimize ############################################################### # Helper functions to interface with scipy.optimize def eval_loss(x0): x0 = floatX(x0.reshape((1, 3, IMAGE_W, IMAGE_W))) generated_image.set_value(x0) return f_loss().astype('float64') def eval_grad(x0): x0 = floatX(x0.reshape((1, 3, IMAGE_W, IMAGE_W))) generated_image.set_value(x0) return np.array(f_grad()).flatten().astype('float64') x0 = generated_image.get_value().astype('float64') xs = [] xs.append(x0) # Optimize, saving the result periodically for i in range(8): scipy.optimize.fmin_l_bfgs_b(eval_loss, x0.flatten(), fprime=eval_grad, maxfun=40) x0 = generated_image.get_value().astype('float64') xs.append(floatX(x0.reshape((1, 3, IMAGE_W, IMAGE_W)))) return imageHelper.deprocess(xs[-1])
updates = d_updates + g_updates print('Compiling') t = time() _train_g = theano.function([X, Z, Y], cost, updates=g_updates) _train_d = theano.function([X, Z, Y], cost, updates=d_updates) _gen = theano.function([Z, Y], gX) print('%.2f seconds to compile theano functions' % (time() - t)) tr_idxs = np.arange(len(trX)) trX_vis = np.asarray([[trX[i] for i in py_rng.sample(tr_idxs[trY == y], 20)] for y in range(10)]).reshape(200, -1) trX_vis = trX_vis.reshape(-1, npx, npx) grayscale_grid_vis(trX_vis, (10, 20), 'samples/cond_dcgan_etl_test.png') sample_zmb = floatX(np_rng.uniform(-1., 1., size=(200, nz))) sample_ymb = floatX( OneHot( np.asarray([[i for _ in range(20)] for i in range(10)]).flatten(), ny)) def gen_samples(n, nbatch=128): samples = [] labels = [] n_gen = 0 for i in range(n / nbatch): ymb = floatX(OneHot(np_rng.randint(0, 10, nbatch), ny)) zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz))) xmb = _gen(zmb, ymb) samples.append(xmb) labels.append(np.argmax(ymb, axis=1))
def _random_weights(self, shape, name=None): # return theano.shared(floatX(np.random.randn(*shape) * 0.01), name=name) return theano.shared(floatX( np.random.uniform(size=shape, low=-1, high=1)), name=name)
def eval_loss(x0): x0 = floatX(x0.reshape((1, 3, IMAGE_W, IMAGE_W))) generated_image.set_value(x0) return f_loss().astype('float64')
def runSGD(cl_weight, con_layers, sty_layers, photopath, artpath): def build_and_load_model(): def build_model(theano_input): net = {} order = [ 'input', 'conv1_1', 'conv1_2', 'pool1', 'conv2_1', 'conv2_2', 'pool2', 'conv3_1', 'conv3_2', 'conv3_3', 'conv3_4', 'pool3', 'conv4_1', 'conv4_2', 'conv4_3', 'conv4_4', 'pool4', 'conv5_1', 'conv5_2', 'conv5_3', 'conv5_4' ] net['input'] = InputLayer(theano_input, (1, 3, IMAGE_W, IMAGE_W)) net['conv1_1'] = ConvLayer(net['input'], 64, 3, rng, flip_filters=False) net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, rng, flip_filters=False) net['pool1'] = PoolLayer(net['conv1_2'], poolsize=(2, 2), mode='average_exc_pad') net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, rng, flip_filters=False) net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, rng, flip_filters=False) net['pool2'] = PoolLayer(net['conv2_2'], poolsize=(2, 2), mode='average_exc_pad') net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, rng, flip_filters=False) net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, rng, flip_filters=False) net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, rng, flip_filters=False) net['conv3_4'] = ConvLayer(net['conv3_3'], 256, 3, rng, flip_filters=False) net['pool3'] = PoolLayer(net['conv3_4'], poolsize=(2, 2), mode='average_exc_pad') net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, rng, flip_filters=False) net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, rng, flip_filters=False) net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, rng, flip_filters=False) net['conv4_4'] = ConvLayer(net['conv4_3'], 512, 3, rng, flip_filters=False) net['pool4'] = PoolLayer(net['conv4_4'], poolsize=(2, 2), mode='average_exc_pad') net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, rng, flip_filters=False) net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, rng, flip_filters=False) net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, rng, flip_filters=False) net['conv5_4'] = ConvLayer(net['conv5_3'], 512, 3, rng, flip_filters=False) net['pool5'] = PoolLayer(net['conv5_4'], poolsize=(2, 2), mode='average_exc_pad') return net, order # build it net, order = build_model(T.tensor4()) # load it values = pickle.load(open('./data/vgg19_normalized.pkl', 'rb'))['param values'] set_all_param_values(net, values, order) return net net = build_and_load_model() layers = con_layers + sty_layers layers = {k: net[k] for k in layers} # select the layer to use # layers = ['conv4_2', 'conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1'] # layers = {k: net[k] for k in layers} ############################################################### # get the images ############################################################### imageHelper = ImageHelper(IMAGE_W=600) photo, art = imageHelper.prep_photo_and_art(photo_path=photopath, art_path=artpath) input_im_theano = T.tensor4() # compute layer activations for photo and artwork outputs = get_outputs(layers, {net['input']: input_im_theano}) # these features are constant which is the reference for loss photo_features = { k: theano.shared(output.eval({input_im_theano: photo})) for k, output in zip(layers.keys(), outputs) } art_features = { k: theano.shared(output.eval({input_im_theano: art})) for k, output in zip(layers.keys(), outputs) } ############################################################### # calculate loss and grads ############################################################### # Get expressions for layer activations for generated image generated_image = theano.shared( floatX(np.random.uniform(-128, 128, (1, 3, IMAGE_W, IMAGE_W)))) gen_features = get_outputs(layers, {net['input']: generated_image}) gen_features = {k: v for k, v in zip(layers.keys(), gen_features)} def gram_matrix(x): x = x.flatten(ndim=3) g = T.tensordot(x, x, axes=([2], [2])) return g def content_loss(P, X, layer): p = P[layer] x = X[layer] loss = 1. / 2 * ((x - p)**2).sum() return loss def style_loss(A, X, layer): a = A[layer] x = X[layer] A = gram_matrix(a) G = gram_matrix(x) N = a.shape[1] M = a.shape[2] * a.shape[3] loss = 1. / (4 * N**2 * M**2) * ((G - A)**2).sum() return loss def total_variation_loss(x): return (((x[:, :, :-1, :-1] - x[:, :, 1:, :-1])**2 + (x[:, :, :-1, :-1] - x[:, :, :-1, 1:])**2)**1.25).sum() # Define loss function losses = [] # content loss losses.append(cl_weight * content_loss(photo_features, gen_features, 'conv4_2')) # style loss for style_layer in sty_layers: losses.append(0.2e6 * style_loss(art_features, gen_features, style_layer)) # total variation penalty losses.append(0.1e-7 * total_variation_loss(generated_image)) total_loss = sum(losses) grad = T.grad(total_loss, generated_image) ############################################################### # start to optimize ############################################################### def RMSprop(cost, params, lr=0.8, rho=0.95, epsilon=1e-6): grads = T.grad(cost=cost, wrt=params) updates = [] for p, g in zip(params, grads): acc = theano.shared(p.get_value() * 0.) acc_new = rho * acc + (1 - rho) * g**2 gradient_scaling = T.sqrt(acc_new + epsilon) g = g / gradient_scaling updates.append((acc, acc_new)) updates.append((p, p - lr * g)) return updates updates = RMSprop(cost=total_loss, params=[generated_image]) train_model = theano.function([], total_loss, updates=updates) for i in range(3000): print train_model() ############################################################### # display result ############################################################### xout = generated_image.get_value().astype('float64') plt.figure(figsize=(4, 4)) plt.gca().xaxis.set_visible(False) plt.gca().yaxis.set_visible(False) plt.imshow(imageHelper.deprocess(xout)) plt.show() return imageHelper.deprocess(xout)
def eval_grad(x0): x0 = floatX(x0.reshape((1, 3, IMAGE_W, IMAGE_W))) generated_image.set_value(x0) return np.array(f_grad()).flatten().astype('float64')
def sample(self, shape): return floatX(np.random.normal(self.avg, self.std, size=shape))