def __init__(self, input = None, target = None, regularize = True): super(RegressionLayer, self).__init__() #boilerplate # MODEL CONFIGURATION self.regularize = regularize # ACQUIRE/MAKE INPUT AND TARGET if not input: input = T.matrix('input') if not target: target = T.matrix('target') # HYPER-PARAMETERS self.stepsize = T.scalar() # a stepsize for gradient descent # PARAMETERS self.w = T.matrix() #the linear transform to apply to our input points self.b = T.vector() #a vector of biases, which make our transform affine instead of linear # REGRESSION MODEL self.activation = T.dot(input, self.w) + self.b self.prediction = self.build_prediction() # CLASSIFICATION COST self.classification_cost = self.build_classification_cost(target) # REGULARIZATION COST self.regularization = self.build_regularization() # TOTAL COST self.cost = self.classification_cost if self.regularize: self.cost = self.cost + self.regularization # GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS self.grad_w, self.grad_b, grad_act = T.grad(self.cost, [self.w, self.b, self.prediction]) print('grads', self.grad_w, self.grad_b) # INTERFACE METHODS self.update = M.Method([input, target], [self.cost, self.grad_w, self.grad_b, grad_act], updates={self.w: self.w - self.stepsize * self.grad_w, self.b: self.b - self.stepsize * self.grad_b}) self.apply = M.Method(input, self.prediction)
def __init__(self, args, cost, params, gradients=None, stepsize=None, WEIRD_STUFF=True): """ :param stepsize: the step to take in (negative) gradient direction :type stepsize: None, scalar value, or scalar TensorVariable """ super(StochasticGradientDescent, self).__init__() self.WEIRD_STUFF = WEIRD_STUFF self.stepsize_init = None if stepsize is None: self.stepsize = (T.dscalar()) elif isinstance(stepsize, T.TensorVariable): self.stepsize = stepsize else: if self.WEIRD_STUFF: #TODO: why is this necessary? why does the else clause not work? # self.stepsize = module.Member(T.dscalar(), init = stepsize) self.stepsize = (T.dscalar()) self.stepsize_init = stepsize else: # self.stepsize = module.Member(T.value(stepsize)) self.stepsize = (T.constant(stepsize)) #work! if self.stepsize.ndim != 0: raise ValueError('stepsize must be a scalar', stepsize) self.params = params if gradients is None: self.gparams = T.grad(cost, self.params) else: self.gparams = gradients self.updates = dict((p, p - self.stepsize * g) for p, g in zip(self.params, self.gparams)) self.step = module.Method(args, [], updates=self.updates) self.step_cost = module.Method(args, cost, updates=self.updates)
def __init__(self): super(M, self).__init__() x = T.matrix('x') # input, target self.w = module.Member(T.matrix('w')) # weights self.a = module.Member(T.vector('a')) # hid bias self.b = module.Member(T.vector('b')) # output bias self.hid = T.tanh(T.dot(x, self.w) + self.a) hid = self.hid self.out = T.tanh(T.dot(hid, self.w.T) + self.b) out = self.out self.err = 0.5 * T.sum((out - x)**2) err = self.err params = [self.w, self.a, self.b] gparams = T.grad(err, params) updates = [(p, p - 0.01 * gp) for p, gp in zip(params, gparams)] self.step = module.Method([x], err, updates=dict(updates))
def __init__( self, window_size, n_quadratic_filters, activation_function, reconstruction_cost_function, tie_weights=False, # _input, # _targ ): super(ConvolutionalMLP, self).__init__() #self.lr = module.Member(T.scalar()) self.lr = (T.scalar()) self.inputs = [T.dmatrix() for i in range(window_size)] self.targ = T.lvector() self.input_representations = [] self.input_representations.append( QDAA(input=self.inputs[0], tie_weights=tie_weights, n_quadratic_filters=n_quadratic_filters, activation_function=activation_function, reconstruction_cost_function=reconstruction_cost_function)) for i in self.inputs[1:]: self.input_representations.append( QDAA(input=i, tie_weights=tie_weights, n_quadratic_filters=n_quadratic_filters, activation_function=activation_function, reconstruction_cost_function=reconstruction_cost_function, _w1=self.input_representations[0].w1, _w2=self.input_representations[0].w2, _b1=self.input_representations[0].b1, _b2=self.input_representations[0].b2, _qfilters=self.input_representations[0].qfilters)) assert self.input_representations[-1].w1 is \ self.input_representations[0].w1 self.input_representation = T.concatenate( [i.hidden for i in self.input_representations], axis=1) self.hidden = QDAA( input=self.input_representation, tie_weights=tie_weights, n_quadratic_filters=n_quadratic_filters, activation_function=activation_function, reconstruction_cost_function=reconstruction_cost_function) self.output = Module_Nclass(x=self.hidden.hidden, targ=self.targ) input_pretraining_params = [ self.input_representations[0].w1, self.input_representations[0].w2, self.input_representations[0].b1, self.input_representations[0].b2 ] + self.input_representations[0].qfilters hidden_pretraining_params = [ self.hidden.w1, self.hidden.w2, self.hidden.b1, self.hidden.b2 ] + self.hidden.qfilters input_pretraining_cost = sum(i.ncost for i in self.input_representations) hidden_pretraining_cost = self.hidden.ncost input_pretraining_gradients = T.grad(input_pretraining_cost, input_pretraining_params) hidden_pretraining_gradients = T.grad(hidden_pretraining_cost, hidden_pretraining_params) pretraining_updates = \ dict((p, p - self.lr * g) for p, g in \ zip(input_pretraining_params, input_pretraining_gradients) \ + zip(hidden_pretraining_params, hidden_pretraining_gradients)) self.pretraining_update = module.Method( self.inputs, [input_pretraining_cost, hidden_pretraining_cost], pretraining_updates) finetuning_params = \ [self.input_representations[0].w1, self.input_representations[0].b1] + self.input_representations[0].qfilters + \ [self.hidden.w1, self.hidden.b1] + self.hidden.qfilters + \ [self.output.w, self.output.b] finetuning_cost = self.output.cost finetuning_gradients = T.grad(finetuning_cost, finetuning_params) finetuning_updates = dict( (p, p - self.lr * g) for p, g in zip(finetuning_params, finetuning_gradients)) self.finetuning_update = module.Method(self.inputs + [self.targ], self.output.cost, finetuning_updates)