コード例 #1
0
def main(args):
    # Create model.
    model = TwoLayerNet(args)
    for k, v in model.param_configs.items():
        model.params[k] = np.zeros(v['shape'])

    img = np.zeros((args.batch_size, 784))
    label = np.zeros((args.batch_size, ))

    for l in range(args.num_loops):
        if l == num_cold:
            start = time.time()

        def loss_func(*params):
            f = model.forward(img, 'train')
            return model.loss(f, label)

        if args.only_forward:
            loss = loss_func()
            loss.asnumpy()
        else:
            param_arrays = list(model.params.values())
            param_keys = list(model.params.keys())
            grad_and_loss_func = core.grad_and_loss(loss_func,
                                                    argnum=range(
                                                        len(param_arrays)))
            grad_arrays, loss = grad_and_loss_func(*param_arrays)
            for g in grad_arrays:
                g.get_data(minpy.array_variants.ArrayType.MXNET).wait_to_read()
    dur = time.time() - start
    print('Per Loop Time: %.6f' % (dur / (args.num_loops - num_cold)))
コード例 #2
0
ファイル: mlp_minpy_gpu.py プロジェクト: HrWangChengdu/minpy
def main(args):
    # Create model.
    model = TwoLayerNet(args)
    for k, v in model.param_configs.items():
        model.params[k] = np.zeros(v["shape"])

    img = np.zeros((args.batch_size, 784))
    label = np.zeros((args.batch_size,))

    for l in range(args.num_loops):
        if l == num_cold:
            start = time.time()

        def loss_func(*params):
            f = model.forward(img, "train")
            return model.loss(f, label)

        if args.only_forward:
            loss = loss_func()
            loss.asnumpy()
        else:
            param_arrays = list(model.params.values())
            param_keys = list(model.params.keys())
            grad_and_loss_func = core.grad_and_loss(loss_func, argnum=range(len(param_arrays)))
            grad_arrays, loss = grad_and_loss_func(*param_arrays)
            for g in grad_arrays:
                g.get_data(minpy.array_variants.ArrayType.MXNET).wait_to_read()
    dur = time.time() - start
    print("Per Loop Time: %.6f" % (dur / (args.num_loops - num_cold)))
コード例 #3
0
    def _step(self, batch):
        """
        Make a single gradient update. This is called by train() and should not
        be called manually.
        """

        # Compute loss and gradient
        def loss_func(*params):  # pylint: disable=unused-argument
            """
            Loss function calculate the loss
            """

            # It seems that params are not used in forward function. But since we will pass
            # model.params as arguments, we are ok here.
            predict = self.model.forward_batch(batch, mode='train')
            return self.model.loss_batch(batch, predict)

        param_arrays = list(self.model.params.values())
        param_keys = list(self.model.params.keys())
        grad_and_loss_func = core.grad_and_loss(loss_func,
                                                argnum=range(
                                                    len(param_arrays)))
        grad_arrays, loss = grad_and_loss_func(*param_arrays)
        grads = dict(zip(param_keys, grad_arrays))

        self.loss_history.append(loss.asnumpy())

        # Perform a parameter update
        for p, w in self.model.params.items():
            dw = grads[p]
            config = self.optim_configs[p]
            next_w, next_config = self.update_rule(w, dw, config)
            self.model.params[p] = next_w
            self.optim_configs[p] = next_config
コード例 #4
0
ファイル: solver.py プロジェクト: HrWangChengdu/minpy
    def _step(self, batch):
        """
        Make a single gradient update. This is called by train() and should not
        be called manually.
        """
        # Compute loss and gradient
        def loss_func(*params): # pylint: disable=unused-argument
            """
            Loss function calculate the loss
            """

            # It seems that params are not used in forward function. But since we will pass
            # model.params as arguments, we are ok here.
            predict = self.model.forward_batch(batch, mode='train')
            return self.model.loss_batch(batch, predict)

        param_arrays = list(self.model.params.values())
        param_keys = list(self.model.params.keys())
        grad_and_loss_func = core.grad_and_loss(
            loss_func, argnum=range(len(param_arrays)))
        grad_arrays, loss = grad_and_loss_func(*param_arrays)
        grads = dict(zip(param_keys, grad_arrays))

        self.loss_history.append(loss.asnumpy())

        # Perform a parameter update
        for p, w in self.model.params.items():
            dw = grads[p]
            config = self.optim_configs[p]
            next_w, next_config = self.update_rule(w, dw, config)
            self.model.params[p] = next_w
            self.optim_configs[p] = next_config
コード例 #5
0
ファイル: solver.py プロジェクト: xlong88/minpy
    def _step(self):
        """
        Make a single gradient update. This is called by train() and should not
        be called manually.
        """
        # Make a minibatch of training data
        num_train = self.X_train.shape[0]
        batch_mask = np.random.choice(num_train, self.batch_size)
        X_batch = self.X_train[batch_mask]
        y_batch = self.y_train[batch_mask]

        # Compute loss and gradient
        def loss_func(*params):
            # It seems that params are not used in forward function. But since we will pass
            # model.params as arguments, we are ok here.
            predict = self.model.forward(X_batch)
            return self.model.loss(predict, y_batch)

        param_arrays = list(self.model.params.values())
        param_keys = list(self.model.params.keys())
        grad_and_loss_func = core.grad_and_loss(loss_func, argnum=range(len(param_arrays)))
        grad_arrays, loss = grad_and_loss_func(*param_arrays)
        grads = dict(zip(param_keys, grad_arrays))

        self.loss_history.append(loss.asnumpy())

        # Perform a parameter update
        for p, w in self.model.params.items():
            dw = grads[p]
            config = self.optim_configs[p]
            next_w, next_config = self.update_rule(w, dw, config)
            self.model.params[p] = next_w
            self.optim_configs[p] = next_config
コード例 #6
0
ファイル: deparser.py プロジェクト: wddabc/minpy
    def train_on_batch(self, tokens, oracle_actions):
        """
        Make a single gradient update. This is called by train() and should not
        be called manually.
        """

        # Compute loss and gradient
        def loss_func(*params):
            """
            Loss function calculate the loss
            """

            # It seems that params are not used in forward function. But since we will pass
            # model.params as arguments, we are ok here.
            return self.model.parse(tokens, oracle_actions=oracle_actions)

        param_arrays = list(self.model.params.values())
        param_keys = list(self.model.params.keys())
        grad_and_loss_func = core.grad_and_loss(loss_func,
                                                argnum=range(
                                                    len(param_arrays)))
        grad_arrays, loss = grad_and_loss_func(*param_arrays)
        grads = dict(zip(param_keys, grad_arrays))

        # Perform a parameter update
        for p, w in self.model.params.items():
            dw = grads[p]
            config = self.optim_configs[p]
            next_w, next_config = self.update_rule(w, dw, config)
            self.model.params[p] = next_w
            self.optim_configs[p] = next_config
        return loss
コード例 #7
0
def main(args):
    # Create model.
    model = RNNNet(args)
    for k, v in model.param_configs.items():
        model.params[k] = np.zeros(v['shape'])

    data = np.zeros(
        (args.batch_size, args.input_size))  # Data of only one time step.
    label = np.zeros((args.batch_size, ))

    for l in range(args.num_loops):
        if l == num_cold:
            start = time.time()

        def loss_func(*params):
            f = model.forward(data, 'train')
            return model.loss(f, label)

        if args.only_forward:
            loss = loss_func()
            loss.wait_to_read()
        else:
            param_arrays = list(model.params.values())
            param_keys = list(model.params.keys())
            grad_and_loss_func = core.grad_and_loss(loss_func,
                                                    argnum=range(
                                                        len(param_arrays)))
            grad_arrays, loss = grad_and_loss_func(*param_arrays)
            for g in grad_arrays:
                g.wait_to_read()

    dur = time.time() - start
    print('Per Loop Time: %.6f' % (dur / (args.num_loops - num_cold)))
コード例 #8
0
ファイル: rnn_minpy_cpu.py プロジェクト: HrWangChengdu/minpy
def main(args):
    # Create model.
    model = RNNNet(args)
    for k, v in model.param_configs.items():
        model.params[k] = np.zeros(v['shape'])

    data = np.zeros((args.batch_size, args.input_size)) # Data of only one time step.
    label = np.zeros((args.batch_size,), dtype=np.int)

    for l in range(args.num_loops):
        if l == num_cold:
            start = time.time()
        def loss_func(*params):
            f = model.forward(data, 'train')
            return model.loss(f, label)
        if args.only_forward:
            loss = loss_func()
            loss.asnumpy()
        else:
            param_arrays = list(model.params.values())
            param_keys = list(model.params.keys())
            grad_and_loss_func = core.grad_and_loss(
                loss_func, argnum=range(len(param_arrays)))
            grad_arrays, loss = grad_and_loss_func(*param_arrays)
    dur = time.time() - start
    print('Per Loop Time: %.6f' % (dur / (args.num_loops - num_cold)))
コード例 #9
0
 def loss(self, X, y=None):
     if y is None:
         return self._forward(X, *self.param)
     else:
         backprop = grad_and_loss(self._softmax_loss,
                                  range(2,
                                        len(self.param) + 2))
         return backprop(X, y, *self.param)
コード例 #10
0
ファイル: fc_net_minpy.py プロジェクト: dsqx71/minpy
  def loss_and_derivative(self, X, y=None):
    """
    Compute loss and gradient for the fully-connected net.

    Input / output: Same as TwoLayerNet above.
    """

    X_plain = np.reshape(X, (X.shape[0], -1))
    mode = 'test' if y is None else 'train'

    if self.dropout_param is not None:
      self.dropout_param['mode'] = mode   

    if self.use_batchnorm:
      for bn_param in self.bn_params:
        bn_param['mode'] = mode

    params_array = self.pack_params()

    def train_loss(*args):
      X = args[0]
      y = args[1]

      res = X
      for l in xrange(self.num_layers):
        prev_res = res
        res = affine_forward(prev_res, args[self.w_idx(l)], args[self.b_idx(l)])

        if l < (self.num_layers - 1):
          if self.use_batchnorm:
            res = batchnorm_forward(res, args[self.bn_ga_idx(l)],
                                    args[self.bn_bt_idx(l)], self.bn_params[l])
          res = relu_forward(res)
          if self.use_dropout:
            res = dropout_forward(res, self.dropout_param)

      scores = res

      if mode == 'test':
        return scores

      #loss, _ = softmax_loss(scores, y)
      loss = svm_loss(scores, y)
      return loss

    if y is None:
      return train_loss(X_plain, y, *params_array)

    grad_function = grad_and_loss(train_loss, range(self.data_target_cnt, self.data_target_cnt + len(params_array)))
    grads_array, loss = grad_function(X_plain, y, *params_array)

    grads = {}

    for i, grad in enumerate(grads_array):
      grads[self.param_keys[i]] = grad
    return loss, grads
コード例 #11
0
    def loss_and_derivative(self, X, y=None):
        """
    Compute loss and gradient for a minibatch of data.

    Inputs:
    - X: Array of input data of shape (N, d_1, ..., d_k)
    - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

    Returns:
    If y is None, then run a test-time forward pass of the model and return:
    - scores: Array of shape (N, C) giving classification scores, where
      scores[i, c] is the classification score for X[i] and class c.

    If y is not None, then run a training-time forward and backward pass and
    return a tuple of:
    - loss: Scalar value giving the loss
    - grads: Dictionary with the same keys as self.params, mapping parameter
      names to gradients of the loss with respect to those parameters.
    """

        # Note: types of X, y are mxnet.ndarray
        def train_loss(X, y, W1, W2, b1, b2):
            l1 = affine_relu_forward(X, W1, b1)
            l2 = affine_forward(l1, W2, b2)
            scores = l2

            if y is None:
                return scores

            #[TODO]: softmax is not supported yet
            # loss, d_scores = softmax_loss(scores, y)
            loss = svm_loss(scores, y)
            loss_with_reg = loss + np.sum(W1**2) * 0.5 * self.reg + np.sum(
                W2**2) * 0.5 * self.reg

            return loss_with_reg

        self.params_array = []
        params_list_name = ['W1', 'W2', 'b1', 'b2']
        for param_name in params_list_name:
            self.params_array.append(self.params[param_name])

        X_plain = np.reshape(X, (X.shape[0], -1))
        if y is None:
            return train_loss(X_plain, y, *self.params_array)

        grad_function = grad_and_loss(train_loss, range(2, 6))

        grads_array, loss = grad_function(X_plain, y, *self.params_array)

        grads = {}
        for i in range(len(params_list_name)):
            grads[params_list_name[i]] = grads_array[i]

        return loss, grads
コード例 #12
0
    def _forward_backward(self, loss_func):
        param_arrays = list(self.params.values())
        param_keys = list(self.params.keys())
        grad_and_loss_func = core.grad_and_loss(loss_func, argnum=range(len(param_arrays)))
        grad_arrays, loss = grad_and_loss_func(*param_arrays)
        grads = dict(zip(param_keys, grad_arrays))
        if self.config.grad_clip:
            for k, v in grads.iteritems():
                grads[k] = numpy.clip(v, -self.config.clip_magnitude, self.config.clip_magnitude)

        return grads
コード例 #13
0
ファイル: fc_net_minpy.py プロジェクト: dsqx71/minpy
  def loss_and_derivative(self, X, y=None):
    """
    Compute loss and gradient for a minibatch of data.

    Inputs:
    - X: Array of input data of shape (N, d_1, ..., d_k)
    - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

    Returns:
    If y is None, then run a test-time forward pass of the model and return:
    - scores: Array of shape (N, C) giving classification scores, where
      scores[i, c] is the classification score for X[i] and class c.

    If y is not None, then run a training-time forward and backward pass and
    return a tuple of:
    - loss: Scalar value giving the loss
    - grads: Dictionary with the same keys as self.params, mapping parameter
      names to gradients of the loss with respect to those parameters.
    """  
    # Note: types of X, y are mxnet.ndarray
    def train_loss(X, y, W1, W2, b1, b2):
      l1 = affine_relu_forward(X, W1, b1)
      l2 = affine_forward(l1, W2, b2)
      scores = l2

      if y is None:
        return scores
   
      #[TODO]: softmax is not supported yet
      # loss, d_scores = softmax_loss(scores, y)
      loss = svm_loss(scores, y)
      loss_with_reg = loss + np.sum(W1 ** 2) * 0.5 * self.reg + np.sum(W2 ** 2) * 0.5 * self.reg

      return loss_with_reg 

    self.params_array = []
    params_list_name = ['W1', 'W2', 'b1', 'b2']
    for param_name in params_list_name:
      self.params_array.append(self.params[param_name])

    X_plain = np.reshape(X, (X.shape[0], -1))
    if y is None:
      return train_loss(X_plain, y, *self.params_array)

    grad_function = grad_and_loss(train_loss, range(2, 6))

    grads_array, loss = grad_function(X_plain, y, *self.params_array)

    grads = {}
    for i in range(len(params_list_name)):
      grads[params_list_name[i]] = grads_array[i]

    return loss, grads
コード例 #14
0
ファイル: fc_net_minpy.py プロジェクト: HrWangChengdu/CS231n
  def loss(self, X, y=None):
    """
    Compute loss and gradient for the fully-connected net.

    Input / output: Same as TwoLayerNet above.
    """
    mode = 'test' if y is None else 'train'

    if self.dropout_param is not None:
      self.dropout_param['mode'] = mode   

    if self.use_batchnorm:
      for bn_param in self.bn_params:
        bn_param[mode] = mode

    # TODO: add bn_options and dropout option
    assert not (self.use_batchnorm or self.use_dropout)

    # args is [X, Y, W[0], ..., W[n-1], b[0], ..., b[n-1]]
    # type of (args) is list.
    def train_loss(*args):
      last_layer_output = args[0]

      for l in xrange(self.num_layers):
        if l < (self.num_layers - 1):
          # TODO: last_layer_output is mutated in this code
          # TODO: rewrite last_layer_output 
          last_layer_output, _ = affine_relu_forward(last_layer_output, 
            args[2 + l], args[2 + self.num_layers + l]) 
        else:
          last_layer_output, _ = affine_forward(last_layer_output, 
            args[2 + l], args[2 + self.num_layers + l]) 

      scores = last_layer_output 

      if mode == 'test':
        return scores

      loss, _ = softmax_loss(scores, y)
      return loss

    grad_function = grad_and_loss(train_loss, range(2, 2+2*self.num_layers))

    #TODO: define self.WeightAndBiasArray
    loss, grads_array = grad_function(X, y, *self.WeightAndBiasArray)
    grads = {}

    for l in xrange(self.num_layers - 1, -1, -1):
      grads[self.GetWeightName(l)] = grads_array[l]
      grads[self.GetBiasName(l)] = grads_array[l + self.num_layers]

    return loss, grads
コード例 #15
0
def test_policy():
    @minpy.wrap_policy(minpy.OnlyNumPyPolicy())
    def gaussian_cluster_generator(num_samples=10000,
                                   num_features=500,
                                   num_classes=5):
        # with minpy.OnlyNumPyPolicy():
        mu = np.random.rand(num_classes, num_features)
        sigma = np.ones((num_classes, num_features)) * 0.1
        num_cls_samples = num_samples / num_classes
        x = np.zeros((num_samples, num_features))
        y = np.zeros((num_samples, num_classes))
        for i in range(num_classes):
            cls_samples = np.random.normal(mu[i, :], sigma[i, :],
                                           (num_cls_samples, num_features))
            x[i * num_cls_samples:(i + 1) * num_cls_samples] = cls_samples
            y[i * num_cls_samples:(i + 1) * num_cls_samples, i] = 1
        return x, y

    def predict(w, x):
        a = np.exp(np.dot(x, w))
        a_sum = np.sum(a, axis=1, keepdims=True)
        prob = a / a_sum
        return prob

    def train_loss(w, x):
        prob = predict(w, x)
        loss = -np.sum(label * np.log(prob)) / num_samples
        return loss

    """Use Minpy's auto-grad to derive a gradient function off loss"""
    grad_function = grad_and_loss(train_loss)

    # Using gradient descent to fit the correct classes.
    def train(w, x, loops):
        for i in range(loops):
            dw, loss = grad_function(w, x)
            if i % 10 == 0:
                print('Iter {}, training loss {}'.format(i, loss))
            # gradient descent
            w -= 0.1 * dw

    # Initialize training data.
    num_samples = 10000
    num_features = 500
    num_classes = 5
    data, label = gaussian_cluster_generator(num_samples, num_features,
                                             num_classes)

    # Initialize training weight and train
    weight = random.randn(num_features, num_classes)
    train(weight, data, 100)
コード例 #16
0
ファイル: test_policy.py プロジェクト: HrWangChengdu/minpy
def test_policy():
    @minpy.wrap_policy(minpy.OnlyNumPyPolicy())
    def gaussian_cluster_generator(num_samples=10000, num_features=500, num_classes=5):
        # with minpy.OnlyNumPyPolicy():
        mu = np.random.rand(num_classes, num_features)
        sigma = np.ones((num_classes, num_features)) * 0.1
        num_cls_samples = num_samples / num_classes
        x = np.zeros((num_samples, num_features))
        y = np.zeros((num_samples, num_classes))
        for i in range(num_classes):
            cls_samples = np.random.normal(mu[i,:], sigma[i,:], (num_cls_samples, num_features))
            x[i*num_cls_samples:(i+1)*num_cls_samples] = cls_samples
            y[i*num_cls_samples:(i+1)*num_cls_samples,i] = 1
        return x, y
    
    def predict(w, x):
        a = np.exp(np.dot(x, w))
        a_sum = np.sum(a, axis=1, keepdims=True)
        prob = a / a_sum
        return prob
    
    def train_loss(w, x):
        prob = predict(w, x)
        loss = -np.sum(label * np.log(prob)) / num_samples
        return loss
    
    """Use Minpy's auto-grad to derive a gradient function off loss"""
    grad_function = grad_and_loss(train_loss)
    
    # Using gradient descent to fit the correct classes.
    def train(w, x, loops):
        for i in range(loops):
            dw, loss = grad_function(w, x)
            if i % 10 == 0:
                print('Iter {}, training loss {}'.format(i, loss))
            # gradient descent
            w -= 0.1 * dw
    
    # Initialize training data.
    num_samples = 10000
    num_features = 500
    num_classes = 5
    data, label = gaussian_cluster_generator(num_samples, num_features, num_classes)
    
    # Initialize training weight and train
    weight = random.randn(num_features, num_classes)
    train(weight, data, 100)
コード例 #17
0
    def loss_and_derivative(self, X, y=None):
        # symbol's init func takes input size.
        if self.symbol_func == None:
            self.set_mxnet_symbol(X)

        params_array = self.pack_params()

        #TODO(Haoran): isolate this part out for user
        #if so, loss_and_derivative function should be inherited from super mxnet model class
        def train_loss(*args):
            inputs = args[0]
            softmax_label = args[1]
            probs = self.symbol_func(**self.make_mxnet_weight_dict(
                inputs, softmax_label, args[self.data_target_cnt:len(args)]))
            if softmax_label is None:
                return probs

            samples_num = X.shape[0]
            targets = np.zeros((samples_num, self.num_classes))
            targets[np.arange(samples_num), softmax_label] = 1
            loss = -np.sum(targets * np.log(probs)) / samples_num
            for i in self.get_index_reg_weight():
                loss = loss + np.sum(0.5 * args[i]**2 * self.reg)

            return loss

        if y is None:
            return train_loss(X, y, *params_array)

        grad_function = core.grad_and_loss(
            train_loss,
            range(self.data_target_cnt,
                  self.data_target_cnt + len(params_array)))
        grads_array, loss = grad_function(X, y, *params_array)

        grads = {}
        for i, grad in enumerate(grads_array):
            grads[self.param_keys[i]] = grad

        return loss, grads
コード例 #18
0
ファイル: cnn_minpy.py プロジェクト: colinsongf/minpy
    def loss_and_derivative(self, X, y=None):
        # symbol's init func takes input size.
        if self.symbol_func == None:
            self.set_mxnet_symbol(X)

        params_array = self.pack_params()

        #TODO(Haoran): isolate this part out for user
        #if so, loss_and_derivative function should be inherited from super mxnet model class
        def train_loss(*args):
            inputs = args[0]
            softmax_label = args[1]
            probs = self.symbol_func(**self.make_mxnet_weight_dict(
                inputs, softmax_label, args[self.data_target_cnt:len(args)]))
            if softmax_label is None:
                return probs

            samples_num = X.shape[0]
            targets = np.zeros((samples_num, self.num_classes))
            targets[np.arange(samples_num), softmax_label] = 1
            loss = -np.sum(targets * np.log(probs)) / samples_num
            for i in self.get_index_reg_weight():
                loss = loss + np.sum(0.5 * args[i]**2 * self.reg)

            return loss

        if y is None:
            return train_loss(X, y, *params_array)

        grad_function = core.grad_and_loss(train_loss, range(
            self.data_target_cnt, self.data_target_cnt + len(params_array)))
        grads_array, loss = grad_function(X, y, *params_array)

        grads = {}
        for i, grad in enumerate(grads_array):
            grads[self.param_keys[i]] = grad

        return loss, grads
コード例 #19
0
    def loss_and_derivative(self, X, y=None):
        """
    Compute loss and gradient for the fully-connected net.

    Input / output: Same as TwoLayerNet above.
    """

        X_plain = np.reshape(X, (X.shape[0], -1))
        mode = 'test' if y is None else 'train'

        if self.dropout_param is not None:
            self.dropout_param['mode'] = mode

        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param['mode'] = mode

        params_array = self.pack_params()

        def train_loss(*args):
            X = args[0]
            y = args[1]

            res = X
            for l in xrange(self.num_layers):
                prev_res = res
                res = affine_forward(prev_res, args[self.w_idx(l)],
                                     args[self.b_idx(l)])

                if l < (self.num_layers - 1):
                    if self.use_batchnorm:
                        res = batchnorm_forward(res, args[self.bn_ga_idx(l)],
                                                args[self.bn_bt_idx(l)],
                                                self.bn_params[l])
                    res = relu_forward(res)
                    if self.use_dropout:
                        res = dropout_forward(res, self.dropout_param)

            scores = res

            if mode == 'test':
                return scores

            #loss, _ = softmax_loss(scores, y)
            loss = svm_loss(scores, y)
            return loss

        if y is None:
            return train_loss(X_plain, y, *params_array)

        grad_function = grad_and_loss(
            train_loss,
            range(self.data_target_cnt,
                  self.data_target_cnt + len(params_array)))
        grads_array, loss = grad_function(X_plain, y, *params_array)

        grads = {}

        for i, grad in enumerate(grads_array):
            grads[self.param_keys[i]] = grad
        return loss, grads
コード例 #20
0
ファイル: multiple_derivative.py プロジェクト: lryta/minpy
    preds = predict(weights, inputs)
    error = np.count_nonzero(
        np.argmax(
            preds, axis=1) - np.argmax(
                targets, axis=1))
    return (256 - error) * 100 / 256.0


xshape = (256, 500)
wshape = (500, 250)
tshape = (256, 250)
inputs = random.rand(*xshape) - 0.5
targets = np.zeros(tshape)
truth = random.randint(0, 250, 256)
targets[np.arange(256), truth] = 1
weights = random.rand(*wshape) - 0.5

#training_gradient_fun_0 = grad(training_loss, 0)
grad_arg0 = grad_and_loss(training_loss, 0)
grad, loss = grad_arg0(weights, inputs)
print('1st arg\'s grad by single grad func', grad)

grad_arg1 = grad_and_loss(training_loss, 1)
grad, loss = grad_arg1(weights, inputs)
print('2nd arg\'s grad by single grad func', grad)

grad_args = grad_and_loss(training_loss, [0, 1])
grads, loss = grad_args(weights, inputs)
print('1st arg\'s grad by single grad func', grads[0])
print('2nd arg\'s grad by single grad func', grads[1])
コード例 #21
0
ファイル: basic.py プロジェクト: LubyRuffy/hexo-practice-code
mx.nd.array(numpy.array([1, 2, 3]))
type(numpy.array([1, 2, 3]))
type(c)
np.ones((2, 3))
np.ones([2, 3])
mx.nd.ones([2, 3])
mx.nd.ones([2, 3]).asnumpy()


def foo(x):
    return (5 * (x**2) + 3 * x + 2)


print(foo(4))
d_foo = grad(foo)
d_l_foo = grad_and_loss(foo)
d_foo(4)
d_l_foo(4)

# Symbol
a = mx.sym.Variable('a')
b = mx.sym.Variable('b')
c = a + b
# elemental wise times
d = a * b
# matrix multiplication
e = mx.sym.dot(a, b)
f = mx.sym.Reshape(d + e, shape=(1, 4))
# broadcast
g = mx.sym.broadcast_to(f, shape=(2, 4))
mx.viz.plot_network(symbol=g)
コード例 #22
0
ファイル: test_autograd.py プロジェクト: robingong/minpy
# preparation
N, D, H = 4, 5, 6
x = np.random.randn(N, D)
h = np.random.randn(N, H)
Wx = np.random.randn(D, H)
Wh = np.random.randn(H, H)
b = np.random.randn(H)
out, cache = rnn_step_forward(x, h, Wx, Wh, b)
dnext_h = np.random.randn(*out.shape)

# test MinPy
start = time.time()
rnn_step_forward_loss = lambda x, h, Wx, Wh, b, dnext_h: minpy_rnn_step_forward(
    x, h, Wx, Wh, b) * nm(dnext_h)
grad_loss_function = wraps('numpy')(grad_and_loss(rnn_step_forward_loss,
                                                  xrange(5)))
grad_arrays = grad_loss_function(x, h, Wx, Wh, b, dnext_h)[0]
end = time.time()
print "MinPy total time elapsed:", end - start

# test NumPy
start = time.time()
out, cache = rnn_step_forward(x, h, Wx, Wh, b)
dx, dprev_h, dWx, dWh, db = rnn_step_backward(dnext_h, cache)
out *= dnext_h  # to agree with MinPy calculation
end = time.time()
print "NumPy total time elapsed:", end - start

print
print "Result Check:"
print 'dx error: ', rel_error(dx, grad_arrays[0])
コード例 #23
0
ファイル: caffe_convolution.py プロジェクト: zhxxhit/minpy

def predict(inputs, fc_weight, fc_bias, conv_weight, conv_bias):
    #return f( data=[('x', inputs)], weight=[('fc_weight', weights)], ctx=mx.cpu())
    return f(x=inputs,
             fc_0_weight=fc_weight,
             fc_1_bias=fc_bias,
             conv_0_weight=conv_weight,
             conv_1_bias=conv_bias)


def training_loss(inputs, targets, fc_weight, fc_bias, conv_weight, conv_bias):
    preds = predict(inputs, fc_weight, fc_bias, conv_weight, conv_bias)
    label_probabilities = preds * targets + (1 - preds) * (1 - targets)
    return -np.sum(np.log(label_probabilities))


training_gradient_fun = core.grad_and_loss(training_loss, range(2, 6))

lr = 1e-5
for i in range(100):
    grads, loss = training_gradient_fun(inputs, targets, fc_weight, fc_bias,
                                        conv_weight, conv_bias)
    #print('Training gradient: {}'.format(gr))
    fc_weight -= grads[0] * lr
    fc_bias -= grads[1] * lr
    conv_weight -= grads[2] * lr
    conv_bias -= grads[3] * lr
    if i % 10 == 0:
        print('Trained loss: {}'.format(loss))
コード例 #24
0
    def train(self):
        """Trains the model for `num_episodes` iterations.

        On each iteration, runs an episode (see `.run_episode()`) to generate three matrices of
        observations, labels and rewards (xs, ys, rs) containing data for the _entire_ episode.
        Then the parameter gradients are found using these episode matrices.

        Specifically, auto-grad is performed on `loss_func`, which does a single forward pass
        with the episode's observations `xs` then computes the loss using the output of the forward
        pass and the episode's labels `ys` and discounted rewards `rs`.

        This two-step approach of generating episode data then doing a single forward/backward pass
        is done to conserve memory during the auto-grad computation.
        """

        # Accumulate gradients since updates are only performed every `update_every` iterations.
        grad_buffer = self._init_grad_buffer()

        for episode_number in xrange(1, self.num_episodes):
            episode_start = time.time()

            # Generate an episode of training data.
            xs, ys, rs = self.run_episode()

            # Performs a forward pass and computes loss using an entire episode's data.
            def loss_func(*params):
                ps = self.model.forward(xs)
                return self.model.loss(ps, ys, rs)

            # Compute gradients with auto-grad on `loss_func` (duplicated from `Solver`).
            param_arrays = list(self.model.params.values())
            param_keys = list(self.model.params.keys())
            grad_and_loss_func = core.grad_and_loss(loss_func, argnum=range(len(param_arrays)))
            backward_start = time.time()
            grad_arrays, loss = grad_and_loss_func(*param_arrays)
            backward_time = time.time() - backward_start
            grads = dict(zip(param_keys, grad_arrays))

            # Accumulate gradients until an update is performed.
            for k, v in grads.iteritems():
                grad_buffer[k] += v

            # Misc. diagnostic info.
            self.loss_history.append(loss.asnumpy())
            episode_time = time.time() - episode_start
            if self.verbose:
                print('Backward pass complete (%.2fs)' % backward_time)
            if self.verbose or episode_number % self.print_every == 0:
                print('Episode %d complete (%.2fs), loss: %s, reward: %s, running reward: %s' %
                      (episode_number, episode_time, loss, self.episode_reward, self.running_reward))

            # Perform parameter update and reset the `grad_buffer` when appropriate.
            if episode_number % self.update_every == 0:
                for p, w in self.model.params.items():
                    dw = grad_buffer[p]
                    config = self.optim_configs[p]
                    next_w, next_config = self.update_rule(w, dw, config)
                    self.model.params[p] = next_w
                    self.optim_configs[p] = next_config
                    grad_buffer[p] = np.zeros_like(w)

            # Save model parameters to `save_dir` when appropriate..
            if episode_number % self.save_every == 0:
                if self.verbose:
                    print('Saving model parameters...')
                file_name = os.path.join(self.save_dir, 'params_%d.p' % episode_number)
                with open(file_name, 'w') as f:
                    pickle.dump({k: v.asnumpy() for k, v in self.model.params.iteritems()}, f)
                if self.verbose:
                    print('Wrote parameter file %s' % file_name)
コード例 #25
0
ファイル: test_autograd.py プロジェクト: ZihengJiang/minpy
def test_autograd():
    @convert_args
    def minpy_rnn_step_forward(x, prev_h, Wx, Wh, b):
        next_h = mp.tanh(x.dot(Wx) + prev_h.dot(Wh) + b)
        return next_h
    
    
    def rel_error(x, y):
      """ returns relative error """
      return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
    
    
    def rnn_step_forward(x, prev_h, Wx, Wh, b):
        next_h = np.tanh(prev_h.dot(Wh) + x.dot(Wx) + b)
        cache = next_h, prev_h, x, Wx, Wh
        return next_h, cache
    
    
    def rnn_step_backward(dnext_h, cache):
        dx, dprev_h, dWx, dWh, db = None, None, None, None, None
        # Load values from rnn_step_forward
        next_h, prev_h, x, Wx, Wh = cache
        # Gradients of loss wrt tanh
        dtanh = dnext_h * (1 - next_h * next_h)  # (N, H)
        # Gradients of loss wrt x
        dx = dtanh.dot(Wx.T)
        # Gradients of loss wrt prev_h
        dprev_h = dtanh.dot(Wh.T)
        # Gradients of loss wrt Wx
        dWx = x.T.dot(dtanh)  # (D, H)
        # Gradients of loss wrt Wh
        dWh = prev_h.T.dot(dtanh)
        # Gradients of loss wrt b. Note we broadcast b in practice. Thus result of
        # matrix ops are just sum over columns
        db = dtanh.sum(axis=0)  # == np.ones([N, 1]).T.dot(dtanh)[0, :]
        return dx, dprev_h, dWx, dWh, db
    
    
    # preparation
    N, D, H = 4, 5, 6
    x = np.random.randn(N, D)
    h = np.random.randn(N, H)
    Wx = np.random.randn(D, H)
    Wh = np.random.randn(H, H)
    b = np.random.randn(H)
    out, cache = rnn_step_forward(x, h, Wx, Wh, b)
    dnext_h = np.random.randn(*out.shape)
    
    # test MinPy
    start = time.time()
    rnn_step_forward_loss = lambda x, h, Wx, Wh, b, dnext_h: minpy_rnn_step_forward(x, h, Wx, Wh, b) * nm(dnext_h)
    grad_loss_function = return_numpy(grad_and_loss(rnn_step_forward_loss, range(5)))
    grad_arrays = grad_loss_function(x, h, Wx, Wh, b, dnext_h)[0]
    end = time.time()
    print("MinPy total time elapsed:", end - start)
    
    # test NumPy
    start = time.time()
    out, cache = rnn_step_forward(x, h, Wx, Wh, b)
    dx, dprev_h, dWx, dWh, db = rnn_step_backward(dnext_h, cache)
    out *= dnext_h # to agree with MinPy calculation
    end = time.time()
    print("NumPy total time elapsed:", end - start)
    
    print()
    print("Result Check:")
    print('dx error: ', rel_error(dx, grad_arrays[0]))
    print('dprev_h error: ', rel_error(dprev_h, grad_arrays[1]))
    print('dWx error: ', rel_error(dWx, grad_arrays[2]))
    print('dWh error: ', rel_error(dWh, grad_arrays[3]))
    print('db error: ', rel_error(db, grad_arrays[4]))
コード例 #26
0
  def loss(self, features, captions):
    """
    Compute training-time loss for the RNN. We input image features and
    ground-truth captions for those images, and use an RNN (or LSTM) to compute
    loss and gradients on all parameters.
    
    Inputs:
    - features: Input image features, of shape (N, D)
    - captions: Ground-truth captions; an integer array of shape (N, T) where
      each element is in the range 0 <= y[i, t] < V
      
    Returns a tuple of:
    - loss: Scalar loss
    - grads: Dictionary of gradients parallel to self.params
    """
    # Cut captions into two pieces: captions_in has everything but the last word
    # and will be input to the RNN; captions_out has everything but the first
    # word and this is what we will expect the RNN to generate. These are offset
    # by one relative to each other because the RNN should produce word (t+1)
    # after receiving word t. The first element of captions_in will be the START
    # token, and the first element of captions_out will be the first word.
    captions_in = captions[:, :-1]
    captions_out = captions[:, 1:]

    # You'll need this 
    mask = (captions_out != self._null)

    # Weight and bias for the affine transform from image features to initial
    # hidden state
    W_proj, b_proj = self.params['W_proj'], self.params['b_proj']
    
    # Word embedding matrix
    W_embed = self.params['W_embed']

    # Input-to-hidden, hidden-to-hidden, and biases for the RNN
    Wx, Wh, b = self.params['Wx'], self.params['Wh'], self.params['b']

    # Weight and bias for the hidden-to-vocab transformation.
    W_vocab, b_vocab = self.params['W_vocab'], self.params['b_vocab']
    
    loss, grads = 0.0, {}
    
    grad_function = grad_and_loss(self.rnnNet, xrange(8))
    grad_array, loss = grad_function(W_proj, b_proj, W_embed, Wx, Wh, b, W_vocab, b_vocab,
                    features, captions_in, captions_out, mask)
    
    #                                                                          #
    # In the backward pass you will need to compute the gradient of the loss   #
    # with respect to all model parameters. Use the loss and grads variables   #
    # defined above to store loss and gradients; grads[k] should give the      #
    # gradients for self.params[k].  
    # TODO: set grad_array to grads dictionary
    grads['W_proj'] = 
    grads['b_proj'] = 
    grads['W_embed'] = 
    grads['Wx'] = 
    grads['Wh'] = 
    grads['b'] = 
    grads['W_vocab'] = 
    grads['b_vocab'] = 
    # END TO
    return loss, grads
コード例 #27
0

# preparation
N, D, H = 4, 5, 6
x = np.random.randn(N, D)
h = np.random.randn(N, H)
Wx = np.random.randn(D, H)
Wh = np.random.randn(H, H)
b = np.random.randn(H)
out, cache = rnn_step_forward(x, h, Wx, Wh, b)
dnext_h = np.random.randn(*out.shape)

# test MinPy
start = time.time()
rnn_step_forward_loss = lambda x, h, Wx, Wh, b, dnext_h: minpy_rnn_step_forward(x, h, Wx, Wh, b) * nm(dnext_h)
grad_loss_function = return_numpy(grad_and_loss(rnn_step_forward_loss, range(5)))
grad_arrays = grad_loss_function(x, h, Wx, Wh, b, dnext_h)[0]
end = time.time()
print("MinPy total time elapsed:", end - start)

# test NumPy
start = time.time()
out, cache = rnn_step_forward(x, h, Wx, Wh, b)
dx, dprev_h, dWx, dWh, db = rnn_step_backward(dnext_h, cache)
out *= dnext_h # to agree with MinPy calculation
end = time.time()
print("NumPy total time elapsed:", end - start)

print()
print("Result Check:")
print('dx error: ', rel_error(dx, grad_arrays[0]))
コード例 #28
0
def training_accuracy(weights, inputs):
    preds = predict(weights, inputs)
    error = np.count_nonzero(
        np.argmax(preds, axis=1) - np.argmax(targets, axis=1))
    return (256 - error) * 100 / 256.0


xshape = (256, 500)
wshape = (500, 250)
tshape = (256, 250)
inputs = random.rand(*xshape) - 0.5
targets = np.zeros(tshape)
truth = random.randint(0, 250, 256)
targets[np.arange(256), truth] = 1
weights = random.rand(*wshape) - 0.5

#training_gradient_fun_0 = grad(training_loss, 0)
grad_arg0 = grad_and_loss(training_loss, 0)
grad, loss = grad_arg0(weights, inputs)
print('1st arg\'s grad by single grad func', grad)

grad_arg1 = grad_and_loss(training_loss, 1)
grad, loss = grad_arg1(weights, inputs)
print('2nd arg\'s grad by single grad func', grad)

grad_args = grad_and_loss(training_loss, [0, 1])
grads, loss = grad_args(weights, inputs)
print('1st arg\'s grad by single grad func', grads[0])
print('2nd arg\'s grad by single grad func', grads[1])
コード例 #29
0
ファイル: test.py プロジェクト: colinsongf/minpy
    y = np.dot(x, w)
    prob = softmax(x=y, softmax_label=softmax_label)
    return prob

#util.plot_data(x, t)
#util.plot_data(x, predict(w, x))

'''
for i in range(1):
    prob = predict(w, x)
    #print prob
    dy = t - prob
    dw = np.dot(x.T, dy) / 10000
    w -= 0.1 * dw

print w

#util.plot_data(x, predict(w, x))

'''
def loss(w, x):
    prob = predict(w, x)
    return -np.sum(np.log(prob) * t) / 10000  + 0.5 * w * w

gl = grad_and_loss(loss)

for i in range(10):
    dw, loss = gl(w, x)
    print loss
    w -= 0.1 * dw
コード例 #30
0
# Predict the class using multinomial logistic regression (softmax regression).
def predict(w, x):
    a = np.exp(np.dot(x, w))
    a_sum = np.sum(a, axis=1, keepdims=True)
    prob = a / a_sum
    return prob


def train_loss(w, x):
    prob = predict(w, x)
    loss = -np.sum(label * np.log(prob)) / num_samples
    return loss


"""Use Minpy's auto-grad to derive a gradient function off loss"""
grad_function = grad_and_loss(train_loss)


# Using gradient descent to fit the correct classes.
def train(w, x, loops):
    for i in range(loops):
        dw, loss = grad_function(w, x)
        if i % 10 == 0:
            print('Iter {}, training loss {}'.format(i, loss))
        # gradient descent
        w -= 0.1 * dw


# Initialize training data.
num_samples = 10000
num_features = 500
コード例 #31
0
ファイル: test_autograd.py プロジェクト: sxjscience/minpy
def test_autograd():
    @convert_args
    def minpy_rnn_step_forward(x, prev_h, Wx, Wh, b):
        next_h = mp.tanh(x.dot(Wx) + prev_h.dot(Wh) + b)
        return next_h

    def rel_error(x, y):
        """ returns relative error """
        return np.max(
            np.abs(x - y) / (np.maximum(1e-8,
                                        np.abs(x) + np.abs(y))))

    def rnn_step_forward(x, prev_h, Wx, Wh, b):
        next_h = np.tanh(prev_h.dot(Wh) + x.dot(Wx) + b)
        cache = next_h, prev_h, x, Wx, Wh
        return next_h, cache

    def rnn_step_backward(dnext_h, cache):
        dx, dprev_h, dWx, dWh, db = None, None, None, None, None
        # Load values from rnn_step_forward
        next_h, prev_h, x, Wx, Wh = cache
        # Gradients of loss wrt tanh
        dtanh = dnext_h * (1 - next_h * next_h)  # (N, H)
        # Gradients of loss wrt x
        dx = dtanh.dot(Wx.T)
        # Gradients of loss wrt prev_h
        dprev_h = dtanh.dot(Wh.T)
        # Gradients of loss wrt Wx
        dWx = x.T.dot(dtanh)  # (D, H)
        # Gradients of loss wrt Wh
        dWh = prev_h.T.dot(dtanh)
        # Gradients of loss wrt b. Note we broadcast b in practice. Thus result of
        # matrix ops are just sum over columns
        db = dtanh.sum(axis=0)  # == np.ones([N, 1]).T.dot(dtanh)[0, :]
        return dx, dprev_h, dWx, dWh, db

    # preparation
    N, D, H = 4, 5, 6
    x = np.random.randn(N, D)
    h = np.random.randn(N, H)
    Wx = np.random.randn(D, H)
    Wh = np.random.randn(H, H)
    b = np.random.randn(H)
    out, cache = rnn_step_forward(x, h, Wx, Wh, b)
    dnext_h = np.random.randn(*out.shape)

    # test MinPy
    start = time.time()
    rnn_step_forward_loss = lambda x, h, Wx, Wh, b, dnext_h: minpy_rnn_step_forward(
        x, h, Wx, Wh, b) * nm(dnext_h)
    grad_loss_function = return_numpy(
        grad_and_loss(rnn_step_forward_loss, range(5)))
    grad_arrays = grad_loss_function(x, h, Wx, Wh, b, dnext_h)[0]
    end = time.time()
    print("MinPy total time elapsed:", end - start)

    # test NumPy
    start = time.time()
    out, cache = rnn_step_forward(x, h, Wx, Wh, b)
    dx, dprev_h, dWx, dWh, db = rnn_step_backward(dnext_h, cache)
    out *= dnext_h  # to agree with MinPy calculation
    end = time.time()
    print("NumPy total time elapsed:", end - start)

    print()
    print("Result Check:")
    print('dx error: ', rel_error(dx, grad_arrays[0]))
    print('dprev_h error: ', rel_error(dprev_h, grad_arrays[1]))
    print('dWx error: ', rel_error(dWx, grad_arrays[2]))
    print('dWh error: ', rel_error(dWh, grad_arrays[3]))
    print('db error: ', rel_error(db, grad_arrays[4]))
コード例 #32
0
# set_context(gpu(0)) # set the global context as gpu(0)

# Predict the class using multinomial logistic regression (softmax regression).
def predict(w, x):
    a = np.exp(np.dot(x, w))
    a_sum = np.sum(a, axis=1, keepdims=True)
    prob = a / a_sum
    return prob

def train_loss(w, x):
    prob = predict(w, x)
    loss = -np.sum(label * np.log(prob)) / num_samples
    return loss

"""Use Minpy's auto-grad to derive a gradient function off loss"""
grad_function = grad_and_loss(train_loss)

# Using gradient descent to fit the correct classes.
def train(w, x, loops):
    for i in range(loops):
        dw, loss = grad_function(w, x)
        if i % 10 == 0:
            print('Iter {}, training loss {}'.format(i, loss))
        # gradient descent
        w -= 0.1 * dw

# Initialize training data.
num_samples = 10000
num_features = 500
num_classes = 5
data, label = make_data(num_samples, num_features, num_classes)