def __init__(self, shape, bias=True, wr=0, eta=1e-2, momentum=0.9, gamma=1e+0, scale=1., minibatch_size=10, seed=99): """ shape : tuple of integers. Dimension and the number of classes bias : flag for whether to use bias or not. wr : float. The L2-regularization paremter. opt_params : dictionary. minibatch_size : integer. Minibatch size to calcurate stochastic gradient. seed : integer. Seed for random module. """ super(SVM, self).__init__(eta, scale, minibatch_size, seed) self.show_param(shape, wr, eta, momentum, scale, minibatch_size, seed) # input symbols. self.Z = T.matrix(dtype=theano.config.floatX) self.Y = T.ivector() self.symbols = [self.Z, self.Y] # parameters. W = L.linear_param(shape[0], shape[1], scale=5e-2) b = L.zeros_param(shape[1]) if bias: self.params = [b, W] else: self.params = [W] # functions. A = L.FullConnect(self.Z, self.params) # (n,K), K is the number of classes. margin = A[T.arange(self.Y.shape[0]), self.Y][:, None] - A # (n,K) self.loss = T.mean(T.sum(T.nnet.softplus(gamma - margin), axis=1)) self.pred = T.argmax(A, axis=1) if wr > 0: self.wr = wr if bias: self.reg = 0.5 * wr * T.sum(self.params[1]**2) else: self.reg = 0.5 * wr * T.sum(self.params[0]**2) else: self.wr = 0 self.reg = 0 self.sgrad = T.grad(cost=self.loss + self.reg, wrt=self.params) # compile. self.compile() # optimizer. self.optimizer = AGD(self, eta=eta, momentum=momentum)
def __init__(self, shape, wr=0, eta=1e-2, momentum=0.9, scale=1., minibatch_size=10, eval_iters=1000, seed=99, log_level=DEBUG): """ shape : tuple of integers. Dimension and the number of classes wr : float. The L2-regularization paremter. opt_params : dictionary. minibatch_size : integer. Minibatch size to calcurate stochastic gradient. seed : integer. Seed for random module. """ super(MLPBlock, self).__init__(eta, scale, minibatch_size, eval_iters, seed, log_level) self.show_param(shape, wr, eta, momentum, scale, minibatch_size, eval_iters, seed) # input symbols. self.Z = T.matrix(dtype=theano.config.floatX) self.Y = T.matrix(dtype=theano.config.floatX) self.symbols = [self.Z, self.Y] # parameters. self.params = [] for l in range(len(shape) - 1): b = L.zeros_param(shape[l + 1]) W = L.linear_param(shape[l], shape[l + 1], scale=5e-2) self.params.extend([b, W]) # functions. Z = self.Z for l in range(0, len(shape) - 1): b = self.params[2 * l] W = self.params[2 * l + 1] Z = L.Act(L.FullConnect(Z, [b, W]), 'relu') self.output = Z self.loss = L.Loss(self.output, self.Y, 'squared_error') if wr > 0: self.wr = wr val = 0 for l in range(1, len(self.params), 2): val += T.sum(self.params[l]**2) self.reg = 0.5 * wr * val else: logger.log( ERROR, 'negative regularization parameter is given: {0}'.format(wr)) sys.exit(-1) self.sgrad = T.grad(cost=self.loss + self.reg, wrt=self.params) # compile. self.compile() # optimizer. self.optimizer = AGD(self, eta=eta, momentum=momentum)
def __init__(self, shape, bias=True, wr=0, eta=1e-2, momentum=0.9, scale=1., minibatch_size=10, eval_iters=1000, seed=99, log_level=DEBUG): """ shape : tuple of integers. Dimension and the number of classes bias : flag for whether to use bias or not. wr : float. The L2-regularization paremter. opt_params : dictionary. minibatch_size : integer. Minibatch size to calcurate stochastic gradient. seed : integer. Seed for random module. """ super(LogReg, self).__init__(eta, scale, minibatch_size, eval_iters, seed, log_level) self.show_param(shape, wr, eta, momentum, scale, minibatch_size, eval_iters, seed) # input symbols. self.Z = T.matrix(dtype=theano.config.floatX) self.Y = T.ivector() self.symbols = [self.Z, self.Y] # parameters. W = L.linear_param(shape[0], shape[1], scale=5e-2) b = L.zeros_param(shape[1]) if bias: self.params = [b, W] else: self.params = [W] # functions. output = L.Act(L.FullConnect(self.Z, self.params), u'softmax') self.pred = T.argmax(output, axis=1) self.pred_proba = output self.loss = L.Loss(output, self.Y) if wr > 0: self.wr = wr if bias: self.reg = 0.5 * wr * T.sum(self.params[1]**2) else: self.reg = 0.5 * wr * T.sum(self.params[0]**2) else: logger.log( ERROR, 'negative regularization parameter is given: {0}'.format(wr)) sys.exit(-1) self.sgrad = T.grad(cost=self.loss + self.reg, wrt=self.params) # compile. self.compile() # optimizer. self.optimizer = AGD(self, eta=eta, momentum=momentum)
def __init__(self, shape, wr=0, eta=1e-2, momentum=0.9, scale=1., minibatch_size=10, eval_iters=1000, seed=99, log_level=DEBUG): """ shape : tuple of integers. Dimension and the number of classes wr : float. The L2-regularization paremter. opt_params : dictionary. minibatch_size : integer. Minibatch size to calcurate stochastic gradient. seed : integer. Seed for random module. """ super(MLPBlock2, self).__init__(eta, scale, minibatch_size, eval_iters, seed, log_level) self.show_param(shape, wr, eta, momentum, scale, minibatch_size, eval_iters, seed) # input symbols. self.X = T.matrix(dtype=theano.config.floatX) self.Z = T.matrix(dtype=theano.config.floatX) self.Y = T.matrix(dtype=theano.config.floatX) self.symbols = [self.X, self.Z, self.Y] # parameters. self.params = [] for l in range(len(shape) - 1): b = L.zeros_param(shape[l + 1]) W = L.linear_param(shape[l], shape[l + 1], scale=5e-2) b2 = L.zeros_param(shape[l + 1]) W2 = L.linear_param(shape[l], shape[l + 1], scale=5e-2) self.params.extend([b, W, b2, W2]) # functions. normalize = False # test X = self.X if normalize: Z = L.normalize(self.Z, 1e-4) * float(shape[0]) Z2 = L.normalize(self.X, 1e-4) * float(shape[0]) else: Z = self.Z Z2 = self.X for l in range(0, len(shape) - 1): b = self.params[4 * l] W = self.params[4 * l + 1] b2 = self.params[4 * l + 2] W2 = self.params[4 * l + 3] if l == len(shape) - 2: Z = L.Act(L.FullConnect(Z, [b, W]), 'tanh') Z2 = L.Act(L.FullConnect(Z2, [b2, W2]), 'tanh') else: Z = L.Act(L.FullConnect(Z, [b, W]), 'relu') Z2 = L.Act(L.FullConnect(Z2, [b2, W2]), 'relu') self.output_1 = Z # receive the output of previous layer. self.output_2 = Z2 # receive input data directly. self.output = Z + Z2 self.loss = L.Loss(self.output, self.Y, 'inner_prod') # self.loss = L.Loss(self.output, self.Y, 'huber') # self.loss = L.Loss(self.output, self.Y, 'abs') # self.loss = L.Loss(self.output, self.Y, 'squared_error') if wr > 0: self.wr = wr val = 0 for l in range(1, len(self.params), 2): val += T.sum(self.params[l]**2) self.reg = 0.5 * wr * val else: logger.log( ERROR, 'negative regularization parameter is given: {0}'.format(wr)) sys.exit(-1) self.sgrad = T.grad(cost=self.loss + self.reg, wrt=self.params) # compile. self.compile() # optimizer. self.optimizer = AGD(self, eta=eta, momentum=momentum)