def local_1msigmoid(node): """ 1-sigm(x) -> sigm(-x) """ if node.op == tensor.sub: sub_l, sub_r = node.inputs if len(sub_r.clients) > 1: return # graph is using both sigm and 1-sigm if sub_r.owner and sub_r.owner.op == sigmoid: try: val_l = opt.get_scalar_constant_value(sub_l) except tensor.NotScalarConstantError: return if np.allclose(np.sum(val_l), 1): out = sigmoid(-sub_r.owner.inputs[0]) copy_stack_trace([sub_r, node.outputs[0]], out) return [out] register_local_1msigmoid = False # This is False because the Stabilize pattern above # is looking for 1-sigm. Also Canonizer turns neg into *(-1) and so # this optimization might set off an unwanted chain of things. # OTH - this transformation can be seen as pushing normal arithmetic either below or above the # sigmoidal nonlinearity... so if the canonicalized form had anything to say about that then it # would be a consideration... anyway leaving False for now. if register_local_1msigmoid: opt.register_canonicalize(local_1msigmoid)
except Exception: return if numpy.allclose(numpy.sum(val_l), 1): return [sigmoid(-sub_r.owner.inputs[0])] register_local_1msigmoid = False # This is False because the Stabilize pattern above # is looking for 1-sigm. Also Canonizer turns neg into *(-1) and so # this optimization might set off an unwanted chain of things. # OTH - this transformation can be seen as pushing normal arithmetic either below or above the # sigmoidal nonlinearity... so if the canonicalized form had anything to say about that then it # would be a consideration... anyway leaving False for now. if register_local_1msigmoid: opt.register_canonicalize(local_1msigmoid) if 0: # This code is if'd out because it is not complete, # and it isn't obviously a good idea anyway. # The motivation here was to identify the last exp() node # in the SciPy2010 article, which was not optimized away at the time of publication, # so the example is actually not numerically stable, even though it should be. @opt.register_stabilize @gof.local_optimizer([tensor.mul]) def local_sigm_gest(node): print("CANONICALIZE") print(sigm_canonicalize(node)) def sigm_canonicalize(node): mul = tensor.mul
import theano.tensor as T import theano import numpy as np from Data.load_imagenet import denormalize import theano import theano.tensor as T from theano.tensor.opt import register_canonicalize class ConsiderConstant(theano.compile.ViewOp): def grad(self, args, g_outs): return [g_out * T.sqrt(T.mean(T.abs_(g_out))) / (1.0 + T.sqrt(T.mean(T.abs_(g_out), axis = (0,1), keepdims = True))) for g_out in g_outs] #return [g_out * 100.0 / (1.0 + T.abs_(g_out)) for g_out in g_outs] consider_constant = ConsiderConstant() register_canonicalize(theano.gof.OpRemove(consider_constant), name='consider_norm') def decoder(z_reconstruction, z_sampled, numLatent, numHidden, mb_size, image_width): c = [2048, 2048, 512, 3] layers = [] layers += [HiddenLayer(num_in = numLatent, num_out = c[0] * 4 * 4, activation = 'relu', batch_norm = True)] layers += [DeConvLayer(in_channels = c[0], out_channels = c[1], kernel_len = 5, activation = 'relu', batch_norm = True, unflatten_input = (mb_size, c[0], 4, 4))] #8x8 layers += [DeConvLayer(in_channels = c[1], out_channels = c[2], kernel_len = 5, activation = 'relu', batch_norm = True)] #16x16
class GradClip(theano.compile.ViewOp): def __init__(self, clip_lower_bound, clip_upper_bound): self.clip_lower_bound = clip_lower_bound self.clip_upper_bound = clip_upper_bound assert (self.clip_upper_bound >= self.clip_lower_bound) def grad(self, args, g_outs): return [ T.clip(g_out, self.clip_lower_bound, self.clip_upper_bound) for g_out in g_outs ] grad_clip = GradClip(-2., 2.) register_canonicalize(theano.gof.OpRemove(grad_clip), name='grad_clip') class RNNTheano: def __init__(self, word_dim, hidden_dim=100, bptt_truncate=-1): self.num_Changed = 0 ############### self.vocab_limit = 3500 self.hidden_dim = hidden_dim self.bptt_truncate = bptt_truncate self.no_Change = [] """ The exact method of importing data will be determined later For now I'll just focus on the basic algorithm """
from theano import tensor as T import theano from theano.tensor.opt import register_canonicalize import numpy as np #consider constant op by benanne #https://gist.github.com/benanne/9212037 class ConsiderConstant(theano.compile.ViewOp): """treats input as constant when computing grads""" def grad(self, args, g_outs): return [T.zeros(g_out.shape,dtype=theano.config.floatX) for g_out in g_outs] consider_constant = ConsiderConstant() register_canonicalize(theano.gof.OpRemove(consider_constant), name='remove_consider_constant_op') #gradient reversal layer by Daniel Renshaw #http://stackoverflow.com/users/127480/daniel-renshaw #thanks to him, but idk if it works :P class MultiplyGradient(theano.gof.Op): view_map = {0: [0]} __props__ = ('hp_lambda',) def __init__(self, hp_lambda=1): """this operation multiplies the gradient by hp_lambda when computing grads""" super(MultiplyGradient, self).__init__() self.hp_lambda = hp_lambda