Example #1
0
class Linear(Module):
    '''
    Linear Module
    '''
    def __init__(self, in_features, out_features):
        '''
        Initializes weight, bias, gradient of weight and gradient of bias
        '''
        super(Linear, self).__init__()
        self.weight = FloatTensor(out_features, in_features)
        self.bias = FloatTensor(out_features).view(-1, 1)
        self.reset_parameters()
        self.bias_grad = FloatTensor(self.bias.size()).zero_()
        self.weight_grad = FloatTensor(self.weight.size()).zero_()
        self.previous_x = None

    def reset_parameters(self):
        '''
        Initializes weight and bias with uniform law. Taken from Lecture 5 of Deep Learning
        '''
        std = 1 / math.sqrt(self.weight.size(1))
        self.weight.uniform_(-std, std)
        self.bias.uniform_(-std, std)

    def forward(self, x):
        '''
        Computes forward step of the Linear module
        '''
        self.previous_x = x
        return self.weight.matmul(x) + self.bias

    def backward(self, *gradwrtoutput):
        '''
        Computes backward step of the Linear module
        '''
        self.bias_grad.add_(gradwrtoutput[0].sum(1))
        self.weight_grad.add_(gradwrtoutput[0].matmul(self.previous_x.t()))

        return self.weight.t().matmul(gradwrtoutput[0])

    def step(self, eta):
        '''
        Updates the weight and bias after gradient step
        '''
        self.weight = self.weight - eta * self.weight_grad
        self.bias = self.bias - eta * self.bias_grad

    def grad_zero(self):
        '''
        Resets the gradients to zero after gradient step
        '''
        self.bias_grad.zero_()
        self.weight_grad.zero_()

    def param(self):
        '''
        Return the weight and the bias
        '''
        return [(self.weight, self.weight_grad), (self.bias, self.bias_grad)]
Example #2
0
class Linear(Module):
    def __init__(self, in_features, out_features, bias=True):
        super(Linear, self).__init__()
        self.epsilon = 200
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Tensor(out_features, in_features)
        self.dweight = Tensor(self.weight.size())
        if bias:
            self.bias = Tensor(out_features)
        else:
            self.bias = None
        self.dbias = Tensor(self.bias.size())
        self.previous_input = Tensor()
        self.current_output = Tensor()
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.epsilon)
        self.weight.uniform_(0, stdv)
        if self.bias is not None:
            self.bias.uniform_(0, stdv)

    def reset_gradient(self):
        self.dweight.zero_()
        self.dbias.zero_()

    def forward(self, input):
        self.previous_input = input
        output = self.weight.mv(input) + self.bias
        self.current_output = output
        return output

    def backward(self, input):
        dl_ds = dtanh(self.current_output) * input
        dl_dx = self.weight.t().mv(dl_ds)
        self.dweight.add_(
            dl_ds.view(-1, 1).mm(self.previous_input.view(1, -1)))
        self.dbias.add_(dl_ds)

        return dl_dx

    def update_parameters(self, eta):
        self.weight = self.weight - eta * self.dweight
        self.bias = self.bias - eta * self.dbias

    def parameters(self):
        return self.weight, self.bias
Example #3
0
class Linear(Module):
    '''
    Assumption based:
        Linear Equation : Y = X * W + b.
        Data Structure  : Rows represent data, colums represent features.   
    '''
    def __init__(self, input_dim, output_dim, bias=True, initOption='Normal'):
        super(Linear).__init__()
        self.name = 'Linear'

        self.input_dim, self.output_dim = input_dim, output_dim

        self.w = FloatTensor(input_dim, output_dim)
        self.gradW = FloatTensor(input_dim, output_dim)

        self.b = FloatTensor(output_dim)
        self.gradB = FloatTensor(output_dim)

        if bias:
            self.b = FloatTensor(output_dim)
            self.gradB = FloatTensor(output_dim)
        else:
            self.b = None
            self.gradB = None

        self.initOption = initOption
        self.initParameters()

    def initParameters(self):
        '''
        Different methods for parameter initialization.
        '''
        if self.initOption == 'Normal':
            self.w.normal_()
        if self.initOption == 'Zero':
            self.w.zero_()
        if self.initOption == 'He':
            # 'He initialization' recommends for layers with a ReLU activation
            self.w.normal_().mul_(math.sqrt(2 / (self.input_dim)))
        if self.initOption == 'Xavier':
            # 'Xavier initialization' recommends for layers with a tanh activation
            self.w.normal_().mul_(
                math.sqrt(2 / (self.input_dim + self.output_dim)))

        self.gradW.fill_(0)

        if self.b is not None:
            self.b.normal_()
            self.gradB.fill_(0)

    def forward(self, input):
        '''
        Forward Pass: 
            Y = X * W + b.
        '''
        self.input = input

        if self.b is not None:
            self.output = self.input.matmul(self.w).add(self.b)  # Broadcast
        else:
            self.output = self.input.matmul(self.w)
        return self.output

    def backward(self, gradwrtoutput):
        '''
        Backpropagation: gradwrtoutput = batch_size * output_dim
            dW = X^T * dL/dY
            db = (dL/dY)^T * I
            dX = dL/dY * W^T.
        '''
        self.gradW.add_(self.input.t().matmul(gradwrtoutput))

        if self.b is not None:
            self.gradB.add_(gradwrtoutput.sum(0))
        return gradwrtoutput.matmul(self.w.t())

    def zero_grad(self):
        '''
        Set gradient to 0.
        '''
        self.gradW.zero_()

        if self.b is not None:
            self.gradB.zero_()

    def param(self):
        '''
        Return parameters.
        '''
        if self.b is not None:
            return [(self.w, self.gradW), (self.b, self.gradB)]
        else:
            return [(self.w, self.gradW)]
Example #4
0
class Linear(Module):

    # one fully-connected layer

    def __init__(self, in_dim, out_dim, eps=1., method='xavier'):
        self.in_dim = in_dim
        self.out_dim = out_dim

        # define weight, bias and their gradient
        self.w = FloatTensor(out_dim, in_dim)
        self.dw = FloatTensor(out_dim, in_dim)
        self.b = FloatTensor(out_dim)
        self.db = FloatTensor(out_dim)

        # initialization: defaulted as Xavier
        if method == 'zero':
            self.w = self.w.fill_(0)
            self.b = self.w.fill_(0)
        elif method == 'normal':
            self.w = self.w.normal_(mean=0, std=eps)
            self.w = self.b.normal_(mean=0, std=eps)
        else:
            temp_std = 1. / math.sqrt((self.in_dim + self.out_dim) / 2)
            self.w = self.w.normal_(mean=0, std=temp_std)
            self.b = self.b.normal_(mean=0, std=temp_std)

        # zero gradient intialization
        self.dw = self.dw.zero_()
        self.db = self.db.zero_()

    def forward(self, x):

        # y = w * x + b

        self.input = x.clone()
        self.output = self.w.matmul(self.input) + self.b
        #self.output=self.w @ self.input + self.b

        return self.output

    def backward(self, gradwrtoutput):

        temp_wt = self.w.t()

        # dw = dL/dy * x
        temp_dw = gradwrtoutput.view(-1, 1).mm(self.input.view(1, -1))
        self.dw.add_(temp_dw)

        # db = dL/dy
        temp_db = gradwrtoutput.clone()
        self.db.add_(temp_db)

        # dx = w.T * dL/dy
        temp_dx = temp_wt.matmul(gradwrtoutput)

        return temp_dx

    def param(self):
        return [self.w, self.dw, self.b, self.db]

    def zero_grad(self):
        self.dw.zero_()
        self.db.zero_()