Example #1
0
 def batch_train(self, anc_idx, pos_idx, neg_idx, learn_rate=1e-3):
     """Perform a batch update of all parameters based on the given sets
     of anchor, positive example, and negative example indices.
     """
     # Force incoming LUT indices to the right type (i.e. np.uint32)
     anc_idx = anc_idx.astype(np.uint32)
     pos_idx = pos_idx[:, np.newaxis]
     pn_idx = np.hstack((pos_idx, neg_idx)).astype(np.uint32)
     pn_sign = -1.0 * ones(pn_idx.shape)
     pn_sign[:, 0] = 1.0
     L = zeros((1, ))
     # Do feedforward and backprop through the predictor/predictee tables
     w2v_ff_bp(anc_idx, pn_idx, pn_sign, self.params['Wa'], \
               self.params['Wc'], self.params['b'], self.grads['Wa'], \
               self.grads['Wc'], self.grads['b'], L, 1)
     L = L[0]
     # Apply gradients to (touched only) look-up-table parameters
     a_mod_idx = np.unique(anc_idx)
     c_mod_idx = np.unique(pn_idx)
     ag_update_2d(a_mod_idx, self.params['Wa'], self.grads['Wa'], \
             self.moms['Wa'], learn_rate)
     ag_update_2d(c_mod_idx, self.params['Wc'], self.grads['Wc'], \
             self.moms['Wc'], learn_rate)
     ag_update_1d(c_mod_idx, self.params['b'], self.grads['b'], \
             self.moms['b'], learn_rate)
     return L
Example #2
0
 def apply_grad(self, learn_rate=1e-2):
     """Apply the current accumulated gradients, with adagrad."""
     nz_idx = np.asarray([i for i in self.grad_idx]).astype(np.uint32)
     ag_update_2d(nz_idx, self.params['W'], self.grads['W'], \
                  self.moms['W'], learn_rate)
     self.grad_idx = set()
     return
 def batch_train(self, anc_idx, pos_idx, neg_idx, learn_rate=1e-3):
     """Perform a batch update of all parameters based on the given sets
     of anchor, positive example, and negative example indices.
     """
     # Force incoming LUT indices to the right type (i.e. np.uint32)
     anc_idx = anc_idx.astype(np.uint32)
     pos_idx = pos_idx[:,np.newaxis]
     pn_idx = np.hstack((pos_idx, neg_idx)).astype(np.uint32)
     pn_sign = -1.0 * ones(pn_idx.shape)
     pn_sign[:,0] = 1.0
     L = zeros((1,))
     # Do feedforward and backprop through the predictor/predictee tables
     w2v_ff_bp(anc_idx, pn_idx, pn_sign, self.params['Wa'], \
               self.params['Wc'], self.params['b'], self.grads['Wa'], \
               self.grads['Wc'], self.grads['b'], L, 1)
     L = L[0]
     # Apply gradients to (touched only) look-up-table parameters
     a_mod_idx = np.unique(anc_idx)
     c_mod_idx = np.unique(pn_idx)
     ag_update_2d(a_mod_idx, self.params['Wa'], self.grads['Wa'], \
             self.moms['Wa'], learn_rate)
     ag_update_2d(c_mod_idx, self.params['Wc'], self.grads['Wc'], \
             self.moms['Wc'], learn_rate)
     ag_update_1d(c_mod_idx, self.params['b'], self.grads['b'], \
             self.moms['b'], learn_rate)
     return L
 def apply_grad(self, learn_rate=1e-2):
     """Apply the current accumulated gradients, with adagrad."""
     nz_idx = np.asarray([i for i in self.grad_idx]).astype(np.uint32)
     ag_update_2d(nz_idx, self.params['W'], self.grads['W'], \
                  self.moms['W'], learn_rate)
     self.grad_idx = set()
     return
Example #5
0
 def apply_grad(self, learn_rate=1e-2):
     """Apply the current accumulated gradients, with adagrad."""
     nz_idx = self.grad_idx[self.grad_idx < self.key_count]
     ag_update_2d(nz_idx, self.params['W'], self.grads['W'], \
                  self.moms['W'], learn_rate)
     ag_update_1d(nz_idx, self.params['b'], self.grads['b'], \
                  self.moms['b'], learn_rate)
     self.grad_idx = []
     return
 def apply_grad(self, learn_rate=1e-2):
     """Apply the current accumulated gradients, with adagrad."""
     nz_idx = self.grad_idx[self.grad_idx < self.key_count]
     ag_update_2d(nz_idx, self.params['W'], self.grads['W'], \
                  self.moms['W'], learn_rate)
     ag_update_1d(nz_idx, self.params['b'], self.grads['b'], \
                  self.moms['b'], learn_rate)
     self.grad_idx = []
     return
Example #7
0
 def apply_grad(self, learn_rate=1e-2):
     """Apply the current accumulated gradients, with adagrad."""
     nz_idx = np.asarray([i for i in self.grad_idx]).astype(np.uint32)
     # Information from the word LUT should not pass through this
     # layer when source_dim < 5. In this case, we assume that we
     # will do prediction using only the context-adaptive biases.
     if self.do_rescale:
         m_rate = learn_rate if (self.source_dim >= 5) else 0.0
         ag_update_2d(nz_idx, self.params['Wm'], self.grads['Wm'], \
                      self.moms['Wm'], m_rate)
     # No context-adaptive bias term should be applied if self.bias_dim
     # is < 5. I.e. only information coming up from the word LUT, and
     # possibly rescaled by this layer, should be used in prediction.
     b_rate = learn_rate if (self.bias_dim >= 5) else 0.0
     ag_update_2d(nz_idx, self.params['Wb'], self.grads['Wb'], \
                  self.moms['Wb'], b_rate)
     self.grad_idx = set()
     return
 def apply_grad(self, learn_rate=1e-2):
     """Apply the current accumulated gradients, with adagrad."""
     nz_idx = np.asarray([i for i in self.grad_idx]).astype(np.uint32)
     # Information from the word LUT should not pass through this
     # layer when source_dim < 5. In this case, we assume that we
     # will do prediction using only the context-adaptive biases.
     if self.do_rescale:
         m_rate = learn_rate if (self.source_dim >= 5) else 0.0
         ag_update_2d(nz_idx, self.params['Wm'], self.grads['Wm'], \
                      self.moms['Wm'], m_rate)
     # No context-adaptive bias term should be applied if self.bias_dim
     # is < 5. I.e. only information coming up from the word LUT, and
     # possibly rescaled by this layer, should be used in prediction.
     b_rate = learn_rate if (self.bias_dim >= 5) else 0.0
     ag_update_2d(nz_idx, self.params['Wb'], self.grads['Wb'], \
                  self.moms['Wb'], b_rate)
     self.grad_idx = set()
     return