Example #1
0
File: model.py Project: ebanner/ml
 def predict(self, X):
     """Return the probability of x belonging to either class"""
     
     hidden = sigmoid(self.Wh @ X + self.bh)
     scores = self.Ws @ hidden + self.bs
     probs = softmax_vectorized(scores)
     
     return probs.argmax(axis=0)
Example #2
0
File: units.py Project: ebanner/ml
    def forward(self, scores, ys):
        self.ys = ys
        self.probs = softmax_vectorized(np.array(scores))
        y_hats = self.probs[self.ys, range(len(self.ys))]

        # Loss
        losses = -np.log(y_hats)
        loss = losses.sum()
        
        return loss, losses, self.probs
Example #3
0
File: model.py Project: ebanner/ml
    def forward_backward_prop(self, X=None, ys=None, rollout=None,
            Whh=None, bhh=None, Wxh=None, bxh=None, Ws=None, bs=None,
            hidden=None, predict=False):
        """Perform forward and backward prop over a single training example
        
        Returns loss and gradients
        
        """
        # Hidden and input weights
        Whh = self.Whh if not type(Whh) == np.ndarray else Whh
        bhh = self.bhh if not type(bhh) == np.ndarray else bhh
        Wxh = self.Wxh if not type(Wxh) == np.ndarray else Wxh
        bxh = self.bxh if not type(bxh) == np.ndarray else bxh
        
        # Softmax weights
        Ws = self.Ws if not type(Ws) == np.ndarray else Ws
        bs = self.bs if not type(bs) == np.ndarray else bs
        
        # Initial hidden state
        hidden = self.hidden if not type(hidden) == np.ndarray else hidden

        # Where to start in the sequence and how far to go
        rollout = self.rollout if not rollout else rollout

        # Convert X and ys to dictionaries?
        if predict:
            X = {t+1:X[:, [t]] for t in range(rollout)}
            ys = {t+1:1 for t in range(rollout)}
        else:
            X, ys = {}, {}

        # Forward pass!
        dWhh, dbhh = np.zeros_like(Whh), np.zeros_like(bhh)
        dWxh, dbxh = np.zeros_like(Wxh), np.zeros_like(bxh)
        dWs, dbs = np.zeros_like(Ws), np.zeros_like(bs)

        loss = 0.
        hiddens = {0: hidden}
        dhiddens, dhiddens_downstream, dhiddens_local = {}, {rollout:np.zeros((self.H, 1))}, {}
        scores, probs = {}, {}
        for t in range(1, rollout+1):
            # Get the next input in the sequence
            X[t], ys[t] = next(self.input)

            # Previous hidden layer and input at time t
            Z = (Whh @ hiddens[t-1] + bhh) + (Wxh @ X[t] + bxh)
            hiddens[t] = np.tanh(Z)
            
            # Softmax
            scores[t] = Ws @ hiddens[t] + bs
            probs[t] = softmax_vectorized(scores[t])
            y_hat = probs[t][ys[t]]

            # Loss
            loss += -np.log(y_hat).sum()

        # Add regularization
        loss += self.regularizer * 0.5*(np.sum(Whh**2) + np.sum(bhh**2) +
                                        np.sum(Wxh**2) + np.sum(bxh**2) +
                                        np.sum(Ws**2) + np.sum(bs**2))
        if predict:
            return hiddens[rollout], scores
        
        # Backpropagate!
        backwards = list(reversed(range(rollout+1)))
        for t in backwards[:-1]:
            # Scores
            dscores = probs[t]
            dscores[ys[t], 0] -= 1

            # Softmax weights
            dbs += dscores
            dWs += dscores @ hiddens[t].T

            dhiddens_local[t] = Ws.T @ dscores
            dhiddens[t] = dhiddens_local[t] + dhiddens_downstream[t] # Karpathy optimization
            
            dZ = tanh_grad(hiddens[t]) * dhiddens[t]

            # Input and hidden weights
            dbxh += dZ
            dWxh += dZ @ X[t].T
            dbhh += dZ
            dWhh += dZ @ hiddens[t-1].T
            
            # Set up incoming hidden weight gradient for previous time step
            dhiddens_downstream[t-1] = Whh.T @ dZ
        
        # Regularization
        #
        # Hidden and input weights
        dWhh += (self.regularizer*Whh)
        dbhh += (self.regularizer*bhh)
        dWxh += (self.regularizer*Wxh)
        dbxh += (self.regularizer*bxh)
        
        # Softmax weights
        dWs += (self.regularizer*Ws)
        dbs += (self.regularizer*bs)
        
        # Log additional info?
        if self.inspect:
            self.xs, self.ys = str(X), str(ys)
            self.scores, self.probs = scores, probs
            self.loss = loss
            self.dWhh, self.dbhh, self.dWxh, self.dbxh = dWhh, dbhh, dWxh, dbxh
            self.dWs, self.dbs = dWs, dbs
            self.hiddens = hiddens
            self.dhiddens = dhiddens
            self.dhiddens_local, self.dhiddens_downstream = dhiddens_local, dhiddens_downstream
        
        return State(loss, Gradients(dWhh, dbhh, dWxh, dbxh, dWs, dbs), hiddens[rollout])
Example #4
0
    def forward_backward_prop(self, X=None, ys=None, rollout=None, train_index=None,
            Whh=None, bhh=None, Wxh=None, bxh=None, Ws=None, bs=None,
            hidden=None, predict=False):
        """Perform forward and backward prop over a single training example
        
        Returns loss and gradients
        
        """
        # Hidden and input weights
        Whh = self.Whh if not type(Whh) == np.ndarray else Whh
        bhh = self.bhh if not type(bhh) == np.ndarray else bhh
        Wxh = self.Wxh if not type(Wxh) == np.ndarray else Wxh
        bxh = self.bxh if not type(bxh) == np.ndarray else bxh
        
        # Softmax weights
        Ws = self.Ws if not type(Ws) == np.ndarray else Ws
        bs = self.bs if not type(bs) == np.ndarray else bs
        
        # Initial hidden state
        hidden = self.hidden if not type(hidden) == np.ndarray else hidden

        # Where to start in the sequence and how far to go
        rollout = self.rollout if not rollout else rollout
        train_index = self.train_index if not train_index else train_index

        # Get next portion of sequence to train on
        if not type(X) == np.ndarray:
            X = self.X_train[:, train_index:train_index+rollout] 
            ys = self.ys_train[train_index:train_index+rollout]
            
            # Got to the end and need to wrap around?
            if train_index+rollout > self.T:
                rollover_index = (train_index+rollout) % self.T

                X = np.hstack([X, self.X_train[:, :rollover_index]])
                ys = np.hstack([ys, self.ys_train[:rollover_index]])

        # Append column of zeros to align X and Y with natural time
        X, ys = np.hstack([np.zeros((self.N, 1)), X]), np.hstack([np.zeros(1, dtype=np.int), ys])

        # Forward pass!
        dWhh, dbhh = np.zeros_like(Whh), np.zeros_like(bhh)
        dWxh, dbxh = np.zeros_like(Wxh), np.zeros_like(bxh)
        dWs, dbs = np.zeros_like(Ws), np.zeros_like(bs)
        
        loss = 0.
        hiddens = {0: hidden}
        dhiddens, dhiddens_downstream, dhiddens_local = {}, {rollout:np.zeros((self.H, 1))}, {}
        scores, probs = {}, {}
        for t in range(1, rollout+1):
            # Previous hidden layer and input at time t
            Z = (Whh @ hiddens[t-1] + bhh) + (Wxh @ X[:,[t]] + bxh)
            hiddens[t] = np.tanh(Z)
            
            # Softmax
            scores[t] = Ws @ hiddens[t] + bs
            probs[t] = softmax_vectorized(scores[t])
            y_hat = probs[t][ys[t]]

            # Loss
            loss += -np.log(y_hat).sum()

        # Add regularization
        loss += self.regularizer * 0.5*(np.sum(Whh**2) + np.sum(bhh**2) +
                                        np.sum(Wxh**2) + np.sum(bxh**2) +
                                        np.sum(Ws**2) + np.sum(bs**2))
        if predict:
            return hiddens[rollout], scores
        
        # Backpropagate!
        backwards = list(reversed(range(rollout+1)))
        for t in backwards[:-1]:
            # Scores
            dscores = probs[t]
            dscores[ys[t], 0] -= 1

            # Softmax weights
            dbs += dscores
            dWs += dscores @ hiddens[t].T

            dhiddens_local[t] = Ws.T @ dscores
            dhiddens[t] = dhiddens_local[t] + dhiddens_downstream[t] # Karpathy optimization
            
            dZ = tanh_grad(hiddens[t]) * dhiddens[t]

            # Input and hidden weights
            dbxh += dZ
            dWxh += dZ @ X[:,[t]].T
            dbhh += dZ
            dWhh += dZ @ hiddens[t-1].T
            
            # Set up incoming hidden weight gradient for previous time step
            dhiddens_downstream[t-1] = Whh.T @ dZ
        
        # Regularization
        #
        # Hidden and input weights
        dWhh += (self.regularizer*Whh)
        dbhh += (self.regularizer*bhh)
        dWxh += (self.regularizer*Wxh)
        dbxh += (self.regularizer*bxh)
        
        # Softmax weights
        dWs += (self.regularizer*Ws)
        dbs += (self.regularizer*bs)
        
        # Log additional info?
        if self.inspect:
            self.xs, self.ys = str(X[:, 1:]), str(ys[1:])
            self.scores, self.probs = scores, probs
            self.loss = loss
            self.dWhh, self.dbhh, self.dWxh, self.dbxh = dWhh, dbhh, dWxh, dbxh
            self.dWs, self.dbs = dWs, dbs
            self.hiddens = hiddens
            self.dhiddens = dhiddens
            self.dhiddens_local, self.dhiddens_downstream = dhiddens_local, dhiddens_downstream
        
        return State(loss, Gradients(dWhh, dbhh, dWxh, dbxh, dWs, dbs), hiddens[rollout])