Esempio n. 1
0
def evaluate(net, data_iter):
    loss, acc, n = 0., 0., 0.
    steps = len(data_iter)
    for data, label in data_iter:
        data, label = data.as_in_context(ctx), label.as_in_context(ctx)
        output = net(data)
        acc += accuracy(output, label)
        loss += nd.mean(softmax_cross_entropy(output, label)).asscalar()
    return loss/steps, acc/steps
Esempio n. 2
0
def predict(net, data, label):
    data = nd.array(data)
    label = nd.array(label)
    hidden = net.begin_state(func=mx.nd.zeros,batch_size = data.shape[0],ctx=mx.cpu())
    dd = nd.array(data.reshape((data.shape[0],5,11)).swapaxes(0,1))
    output,hidden = net(dd,hidden)
    output = output.reshape((5,data.shape[0],1))
    output = nd.sum(output,axis=0)/5
    l = nd.argmax(output, axis=1)
    res = nd.mean(l==label)
    return res.asscalar()
Esempio n. 3
0
def train(net, trainer, img_dir, img_attr_file, img_landmark_file, ctx, batch_size, epochs, out_model_file, lr_schedule):
    loss_softmax = gluon.loss.SoftmaxCrossEntropyLoss()
    loss_weighted_cross_entropy = WeightedCrossEntropyLoss()
    loss_l2 = gluon.loss.L2Loss()
    # loss_hinge = gluon.loss.HingeLoss()
    for epoch in range(epochs):
        data_iter = get_data(img_dir, img_attr_file, img_landmark_file, ctx, batch_size)
        # total_loss = 0
        loss_iter = 0
        for i, (data, label) in enumerate(data_iter):
            if not data:
                break
            with autograd.record():
                d = data[0]
                img_files = data[1]
                l, vs, classifier_output, _, _ = net(d)

                label, vis, landmarks = label
                # print(vis.shape)
                vis_data = [vis[:, k] for k in range(vis.shape[1])]
                loss_landmark = loss_l2(l, landmarks) / 100
                loss_attr = loss_weighted_cross_entropy(classifier_output, label)
                loss = loss_landmark + loss_attr
                for v, d in zip(vs, vis_data):
                    loss = loss + loss_softmax(v, d)

            # print(loss)
            loss.backward()
            loss_iter += nd.mean(loss).asscalar()
            trainer.step(batch_size, ignore_stale_grad=True)

            if (i + 1) % 40 == 0:
                print(img_files)
                print(l)
                print(landmarks)
                print(loss_landmark)
                print(loss_attr)
                print('epoch: %d, iter: %d, loss: %f' % (epoch, i + 1, loss_iter))
                loss_iter = 0
        # half the learning rate every epoch
        lr_schedule.learning_rate /= 2.0
        net.collect_params().save(out_model_file + '_' + str(epoch))
def evaluate_rnn(loss_func, data_iterator, model, hidden, ctx=[mx.cpu()]):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    acc = nd.array([0])
    total_loss = 0.0
    n = 0.
    if isinstance(data_iterator, mx.io.MXDataIter):
        data_iterator.reset()
    for batch in data_iterator:
        data, label, batch_size = _get_batch(batch, ctx)
        for X, y in zip(data, label):
            out = model(X, hidden)
            
            acc += nd.sum(out.argmax(axis=1)==y).copyto(mx.cpu())
            cur_loss = loss_func(out, y).copyto(mx.cpu())
            total_loss += nd.mean(cur_loss).asscalar()
            
            n += y.size
        acc.wait_to_read() # don't push too many operators into backend
    return acc.asscalar() / n, total_loss / n
 def evaluate_accuracy(self, data_iterator, net):
     """
     compute top-1 accuracy
     :param data_iterator: 
     :param net: 
     :return: 
     """
     loss = utils.AverageMeter()
     acc = mx.metric.Accuracy()
     for idx, (d, l) in enumerate(data_iterator):
         data = d.as_in_context(self.ctx[0])
         label = l.as_in_context(self.ctx[0])
         output = net(data)
         _loss = self.get_loss(output, label)
         curr_loss = nd.mean(_loss).asscalar()
         loss.update(curr_loss, data.shape[0])
         predictions = nd.argmax(output, axis=1)
         acc.update(preds=predictions, labels=label)
         utils.view_bar(idx + 1, len(data_iterator))  # view_bar
     return acc.get()[1], loss.avg
Esempio n. 6
0
def trim(epoch, gradients, net, lr, f, byz, b=20):
    param_list = [
        nd.concat(*[xx.reshape((-1, 1)) for xx in x], dim=0) for x in gradients
    ]
    param_list = byz(epoch, param_list, net, lr, f)

    sorted_array = nd.sort(nd.concat(*param_list, dim=1), axis=-1)
    n = len(param_list)
    q = f
    m = n - b * 2
    trim_nd = nd.mean(sorted_array[:, b:(b + m)], axis=-1, keepdims=1)
    idx = 0

    for j, (param) in enumerate(net.collect_params().values()):
        if param.grad_req == 'null':
            continue
        param.set_data(
            param.data() - lr *
            trim_nd[idx:(idx + param.data().size)].reshape(param.data().shape))
        idx += param.data().size
def evaluate_loss(data_iterator, net, ctx=[mx.cpu()]):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    acc = nd.array([0])
    n = 0.
    if isinstance(data_iterator, mx.io.MXDataIter) or isinstance(
            data_iterator, mx.image.ImageIter):
        data_iterator.reset()
    for batch in data_iterator:
        data, label, batch_size = _get_batch(batch, ctx)
        for X, y in zip(data, label):
            y = y.astype('float32')
            y0 = net(X)
            #acc += nd.sum( (y0-y)*(y0-y) ).copyto(mx.cpu())
            acc += nd.mean(0.5 * (y0 - y) * (y0 - y), axis=1).copyto(
                mx.cpu()).sum()  #mean along dim of L2Loss
            n += y.shape[0]
        acc.wait_to_read()  # don't push too many operators into backend

    return acc.asscalar() / n  #mean of L2Loss
Esempio n. 8
0
def test_compute_quantile_loss() -> None:
    y_true = nd.ones(shape=(10, 10, 10))
    y_pred = nd.zeros(shape=(10, 10, 10, 2))

    quantiles = [0.5, 0.9]

    loss = QuantileLoss(quantiles)

    correct_qt_loss = [1.0, 1.8]

    for idx, q in enumerate(quantiles):
        assert (
            nd.mean(
                loss.compute_quantile_loss(
                    nd.ndarray, y_true, y_pred[:, :, :, idx], q
                )
            )
            - correct_qt_loss[idx]
            < 1e-5
        ), f"computing quantile loss at quantile {q} fails!"
Esempio n. 9
0
def partial_trim(epoch, v, net, f):
    # apply partial knowledge trimmed mean attack

    vi_shape = v[0].shape

    #first compute the distribution parameters
    all_grads = nd.concat(*v, dim=1)
    adv_grads = all_grads[:, :f]
    e_mu = nd.mean(adv_grads, axis=1)  # mean
    e_sigma = nd.sqrt(
        nd.sum(nd.square(nd.subtract(adv_grads, e_mu.reshape(-1, 1))), axis=1)
        / f)  # standard deviation

    for i in range(f):
        # apply attack to compromised worker devices with randomness
        v[i] = (
            e_mu - nd.multiply(e_sigma, nd.sign(e_mu)) *
            (3. + nd.random.uniform(shape=e_sigma.shape))).reshape(vi_shape)

    return v
Esempio n. 10
0
def evaluate(loader, net, ctx, loss):
    """
    Evaluate the loss function
    :param loader: data loader to be used in evaluation
    :param net: network
    :param context: prediction context
    :param loss: loss function
    """
    epoch_loss = 0
    weight_updates = 0
    for i, (X) in enumerate(loader):
        X_U_cont, X_U_emb, X_I_cont, X_I_emb, X_I_neg_cont, X_I_neg_emb = (x.as_in_context(ctx) for x in X)

        # Forward  pass: loss depends on both positive and negative predictions
        pos_pred = net(X_U_cont, X_U_emb, X_I_cont, X_I_emb)
        neg_pred = net(X_U_cont, X_U_emb, X_I_neg_cont, X_I_neg_emb)
        l = loss(pos_pred, neg_pred)
        epoch_loss += nd.mean(l).asscalar()
        weight_updates += 1
    return epoch_loss / weight_updates
Esempio n. 11
0
def train(net, train_data, valid_data, num_epochs, lr, wd, ctx, lr_period,
          lr_decay):
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {
        'learning_rate': lr,
        'momentum': 0.99,
        'wd': wd
    })

    prev_time = datetime.datetime.now()
    for epoch in range(num_epochs):
        print(epoch)
        train_loss = 0.0
        train_acc = 0.0
        if epoch > 0 and epoch % lr_period == 0:
            trainer.set_learning_rate(trainer.learning_rate * lr_decay)
        count = 0
        for data, label in train_data:
            label = label.as_in_context(ctx)
            with autograd.record():
                output = net(data.as_in_context(ctx))
                loss = softmax_cross_entropy(output, label)
            loss.backward()
            trainer.step(batch_size)
            count += 1
            train_loss += nd.mean(loss).asscalar()
            train_acc += utils.accuracy(output, label)
        cur_time = datetime.datetime.now()
        h, remainder = divmod((cur_time - prev_time).seconds, 3600)
        m, s = divmod(remainder, 60)
        time_str = "Time %02d:%02d:%02d" % (h, m, s)
        if valid_data is not None:
            valid_acc = utils.evaluate_accuracy(valid_data, net, ctx)
            epoch_str = ("Epoch %d. Loss: %f, Train acc %f, Valid acc %f, " %
                         (epoch, train_loss / len(train_data),
                          train_acc / len(train_data), valid_acc))
        else:
            epoch_str = ("Epoch %d. Loss: %f, Train acc %f, " %
                         (epoch, train_loss / len(train_data),
                          train_acc / len(train_data)))
        prev_time = cur_time
        print(epoch_str + time_str + ', lr ' + str(trainer.learning_rate))
Esempio n. 12
0
    def hybrid_forward(self, F, output, *args, **kwargs):
        """
        Masks the outputs to the sequence lengths and returns the cross entropy loss
        output is a (batch x max_name_length x log_probabilities) tensor of name predictions for each graph
        """
        (label, _), data_encoder = args
        loss = nd.pick(output, label.values, axis=2)

        # Masking output to max(where_RNN_emitted_PAD_token, length_of_label)
        output_preds = F.argmax(output, axis=2).asnumpy()
        output_lengths = []
        for row in output_preds:
            end_token_idxs = np.where(row == data_encoder.all_node_name_subtokens['__PAD__'])[0]
            if len(end_token_idxs):
                output_lengths.append(int(min(end_token_idxs)) + 1)
            else:
                output_lengths.append(output.shape[1])
        output_lengths = F.array(output_lengths, ctx=output.context)
        mask_lengths = F.maximum(output_lengths, label.value_lengths)
        loss = F.SequenceMask(loss, use_sequence_length=True, sequence_length=mask_lengths, axis=1)
        return nd.mean(-loss, axis=0, exclude=True)
Esempio n. 13
0
    def get_returns(self, discount_factor=0.99):
        """
        Calculate the return for every state. This is defined as the discounted 
        sum of rewards after visiting the state. 

        Args:
            discount_factor (float) : determines how much we care about distant 
                                        rewards (1.0) vs immediate rewards (0.).

        Returns:
            normalized_returns (array of float) : the returns, from which the mean is 
                                                 substracted to reduce the variance.
        """
        returns = []
        curr_sum = 0.
        for r in reversed(self.rewards):
            curr_sum = r + discount_factor * curr_sum
            returns.append(curr_sum)
        returns.reverse()
        normalized_returns = nd.array(returns) - nd.mean(nd.array(returns))
        return normalized_returns
Esempio n. 14
0
    def _evaluate_accuracy(self, X, Y, batch_size=64):
        data_loader = self.generate_batch(X, Y, batch_size, shuffled=False)

        softmax_loss = gluon.loss.SoftmaxCrossEntropyLoss()

        num_batches = len(X) // batch_size

        metric = mx.metric.Accuracy()
        loss_avg = 0.
        for i, (data, label) in enumerate(data_loader):
            data = data.as_in_context(self.model_ctx)
            label = label.as_in_context(self.model_ctx)
            output = self.model(data)
            predictions = nd.argmax(output, axis=1)
            loss = softmax_loss(output, label)
            metric.update(preds=predictions, labels=label)
            loss_avg = loss_avg * i / (i + 1) + nd.mean(loss).asscalar() / (i + 1)

            if i + 1 == num_batches:
                break
        return metric.get()[1], loss_avg
Esempio n. 15
0
def bulyan(epoch, gradients, net, lr, byz, f=0):

    param_list = [
        nd.concat(*[xx.reshape((-1, 1)) for xx in x], dim=0) for x in gradients
    ]
    param_list = byz(epoch, param_list, net, f, lr, np.arange(len(param_list)))

    k = len(param_list) - f - 2
    dist = mx.nd.zeros((len(param_list), len(param_list)))
    for i in range(0, len(param_list)):
        for j in range(0, i):
            dist[i][j] = nd.norm(param_list[i] - param_list[j])
            dist[j][i] = dist[i][j]

    sorted_dist = mx.nd.sort(dist)
    sum_dist = mx.nd.sum(sorted_dist[:, :k + 1], axis=1)
    bulyan_list = []
    bul_client_list = np.ones(len(param_list)) * (-1)
    for i in range(len(param_list) - 2 * f):
        chosen = int(nd.argmin(sum_dist).asscalar())
        sum_dist[chosen] = 10**8
        bul_client_list[i] = chosen
        bulyan_list.append(param_list[chosen])
        for j in range(len(sum_dist)):
            sum_dist[j] = sum_dist[j] - dist[j][chosen]
    sorted_array = nd.sort(nd.concat(*bulyan_list, dim=1), axis=-1)
    trim_nd = nd.mean(sorted_array[:, f:(len(bulyan_list) - f)],
                      axis=-1,
                      keepdims=1)

    idx = 0
    for j, (param) in enumerate(net.collect_params().values()):
        if param.grad_req == 'null':
            continue
        param.set_data(
            param.data() - lr *
            trim_nd[idx:(idx + param.data().size)].reshape(param.data().shape))
        idx += param.data().size
    return trim_nd, bul_client_list
Esempio n. 16
0
def train(net, train_data, valid_data, num_epochs, batch_size, ctx, trainer,
          loss_func, lr_period, lr_decay, filename):
    prev_time = datetime.datetime.now()

    for epoch in range(num_epochs):
        train_loss = 0.0
        train_acc = 0.0
        if epoch > 0 and epoch % lr_period == 0:
            trainer.set_learning_rate(trainer.learning_rate * lr_decay)

        for data, label in train_data:
            label = label.astype('float32').as_in_context(ctx)
            with autograd.record():
                output = net(data.as_in_context(ctx))
                loss = loss_func(output, label)
            loss.backward()
            trainer.step(batch_size)
            train_loss += nd.mean(loss).asscalar()
            train_acc += accuracy(output, label)

        cur_time = datetime.datetime.now()
        h, remainder = divmod((cur_time - prev_time).seconds, 3600)
        m, s = divmod(remainder, 60)
        time_str = "Time %02d:%02d:%02d" % (h, m, s)

        if valid_data is not None:
            valid_acc = evaluate_accuracy(valid_data, net, ctx)
            epoch_str = ("Epoch %d. Loss: %f, Train acc %f, Valid acc %f, " %
                         (epoch, train_loss / len(train_data),
                          train_acc / len(train_data), valid_acc))
        else:
            epoch_str = ("Epoch %d. Loss: %f, Train acc %f, " %
                         (epoch, train_loss / len(train_data),
                          train_acc / len(train_data)))

        prev_time = cur_time
        print(epoch_str + time_str + ', lr ' + str(trainer.learning_rate))

        net.save_params(filename)
Esempio n. 17
0
def vali_loss_cal(data_iter, net):
    data_iter.reset()
    moving_loss = 0
    smoothing_constant = .01
    for i, batch in enumerate(train_iter):
        #print(data.shape)
        #print(label.shape)
        data = batch.data[0].as_in_context(ctx)
        label = batch.label[0].as_in_context(ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)

        ##########################
        #  Keep a moving average of the losses
        ##########################
        curr_loss = nd.mean(loss).asscalar()
        moving_loss = (curr_loss if ((i == 0) and (e == 0)) else
                       (1 - smoothing_constant) * moving_loss +
                       smoothing_constant * curr_loss)

    return moving_loss
Esempio n. 18
0
def evaluate_accuracy(data_iterator, net, ctx, loss_fun, num_classes):
    """
    This function is used for evaluating accuracy of
    a given data iterator. (Either Train/Test data)
    It takes in the loss function used too!
    """
    acc = mx.metric.Accuracy()
    loss_avg = 0.
    for i, (data, labels) in enumerate(data_iterator):
        data = data.as_in_context(ctx)  #.reshape((-1,784))
        labels = labels.as_in_context(ctx)
        output = net(data)
        loss = loss_fun(output, labels)
        preds = []
        if (num_classes == 2):
            preds = (nd.sign(output) + 1) / 2
            preds = preds.reshape(-1)
        else:
            preds = nd.argmax(output, axis=1)
        acc.update(preds=preds, labels=labels)
        loss_avg = loss_avg * i / (i + 1) + nd.mean(loss).asscalar() / (i + 1)
    return acc.get()[1], loss_avg
Esempio n. 19
0
def record_loss(losses, loss_names, summary_writer, step=0, exp=''):
    '''
    record a list of losses to summary_writer.

    Parameter:
    ----------
    losses: list of mxnet.ndarray
      the array is 1-D, length is batch size
    loss_names: list of string
      name of losses, len()
    summary_writer: mxboard.SummaryWriter
    step: int
      training step
    exp: string
      record to which figure
    '''
    assert len(losses) == len(loss_names), (
        'length of first arg(losses) should equal to second arg(loss_names)')

    for i, L in enumerate(losses):
        loss_name = loss_names[i]
        summary_writer.add_scalar(exp, (loss_name, nd.mean(L).asnumpy()), step)
Esempio n. 20
0
def train(epoch):
    #   print(epoch)
    train_loss = 0.

    for batch_idx, (data, label) in enumerate(train_data):
        data = data.as_in_context(ctx)
        #        print(data)
        label = label.as_in_context(ctx)
        batch_size = data.shape[0]
        #        print(batch_idx,batch_size)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(batch_size)
        train_loss += nd.mean(loss).asscalar()
        if batch_idx % 500 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_data.dataset),
                100. * batch_idx / len(train_data),
                train_loss / len(train_data)))
    test()
Esempio n. 21
0
 def pointEvaluator(self,
                    nnModel,
                    testX,
                    testX2,
                    testY,
                    lossFunc,
                    mode='Normal'):
     assert mode in set(['Normal', 'logTransform'])
     pred = self.predict(nnModel, testX, testX2)
     validPred = pred.asnumpy()
     validTrue = testY
     if (mode == 'logTransform'):
         validPred = np.exp(validPred) - 1
         validTrue = np.exp(validTrue) - 1
     # The loss
     loss = nd.mean(
         lossFunc(pred, nd.array(testY, dtype='float32',
                                 ctx=self.dataCtx))).asscalar()
     # The evaluation metrics
     validND, validSMAPE, validNRMSE = ND(validPred, validTrue), SMAPE(
         validPred, validTrue), NRMSE(validPred, validTrue)
     return loss, validND, validSMAPE, validNRMSE
Esempio n. 22
0
    def forward(self, is_train, req, in_data, out_data, aux):
        x = in_data[0]
        gamma = in_data[1]
        beta = in_data[2]
        moving_mean = in_data[3]
        moving_var = in_data[4]
        # print(x.sum())
        y = out_data[0]

        if is_train:
            mean = nd.mean(x, axis=(0, 2, 3))
            var = nd.array(np.var(x.asnumpy(), axis=(0, 2, 3)))
            #print(moving_mean ,self.momentum, mean)
            moving_mean = moving_mean * self.momentum + mean * (1 -
                                                                self.momentum)
            moving_var = moving_var * self.momentum + var * (1 - self.momentum)
            self.assign(in_data[3], req[0], moving_mean)
            self.assign(in_data[4], req[0], moving_var)

        else:
            mean = moving_mean
            var = moving_var

        quan_gamma = self.quantize(gamma / (nd.sqrt(var + self.eps)))
        quan_beta = self.quantize(beta -
                                  mean * gamma / nd.sqrt(var + self.eps))

        y = nd.BatchNorm(x,
                         gamma=quan_gamma,
                         beta=quan_beta,
                         moving_mean=nd.zeros(shape=moving_mean.shape),
                         moving_var=nd.ones(shape=moving_var.shape),
                         eps=self.eps,
                         momentum=self.momentum,
                         fix_gamma=self.fix_gamma,
                         name=self.name)

        self.assign(out_data[0], req[0], mx.nd.array(y))
Esempio n. 23
0
def _leave_one_out_gradient_estimator(h, f, zero_mean_h=False):
  """Estimate gradient of f using score function and control variate h.

  Optimal scaling of control variate is given by: a = Cov(h, f) / Var(h).
  """
  if h.ndim > f.ndim:
    # expand parameter dimension (last dimension summed over in f)
    f = nd.expand_dims(f, f.ndim)
  grad_f = h * f
  if zero_mean_h:
    cov_h_f = _leave_one_out_mean(h * grad_f)
    var_h = _leave_one_out_mean(h * h)
  else:
    cov_h_f = _held_out_covariance(h, grad_f)
    var_h = _held_out_covariance(h, h)
  # sampling zero for low-variance score functions is probable, so add EPSILON!
  optimal_a = cov_h_f / (EPSILON + var_h)
  if h.ndim == 2:
    # If no batch dim: nd.Embedding removes batch dim for batches of size 1
    keepdims = True
  else:
    keepdims = False
  return nd.mean(grad_f - optimal_a * h, 0, keepdims=keepdims)
Esempio n. 24
0
 def mask_loss(self, mask_pred, mask_eoc, mask_target, matches, bt_target):
     samples = (matches >= 0)
     pos_num = samples.sum(axis=-1).asnumpy().astype('int')
     rank = (-matches).argsort(axis=-1)
     losses = []
     for i in range(mask_pred.shape[0]):
         if pos_num[i] == 0:
             losses.append(nd.zeros(shape=(1, ), ctx=mask_pred.context))
             continue
         idx = rank[i, :pos_num[i]]
         pos_bboxe = nd.take(bt_target[i], idx)
         area = (pos_bboxe[:, 3] - pos_bboxe[:, 1]) * (pos_bboxe[:, 2] -
                                                       pos_bboxe[:, 0])
         weight = self.gt_weidth * self.gt_height / area
         mask_gt = mask_target[i, matches[i, idx], :, :]
         mask_preds = nd.dot(nd.take(mask_eoc[i], idx), mask_pred[i])
         _, h, w = mask_preds.shape
         # mask_preds = self.global_aware(mask_preds)
         mask_preds = nd.sigmoid(mask_preds)
         mask_preds = self.crop(pos_bboxe, h, w, mask_preds)
         loss = self.SBCELoss(mask_preds, mask_gt) * weight
         losses.append(nd.mean(loss))
     return nd.concat(*losses, dim=0)
Esempio n. 25
0
    def forward(self, x):
        embeds = self.embed(x)  # batch * time step * embedding
        x_i = embeds.expand_dims(1)
        x_i = nd.repeat(x_i, repeats=self.sentence_length,
                        axis=1)  # batch * time step * time step * embedding
        x_j = embeds.expand_dims(2)
        x_j = nd.repeat(x_j, repeats=self.sentence_length,
                        axis=2)  # batch * time step * time step * embedding
        x_full = nd.concat(
            x_i, x_j, dim=3)  # batch * time step * time step * (2 * embedding)
        # New input data
        _x = x_full.reshape((-1, 2 * self.emb_dim))

        # Network for attention
        _attn = self.attn(_x)
        _att = _attn.reshape((-1, self.sentence_length, self.sentence_length))
        _att = nd.sigmoid(_att)
        att = nd.softmax(_att, axis=1)

        _x = self.g_fc1(_x)  # (batch * time step * time step) * hidden_dim
        _x = self.g_fc2(_x)  # (batch * time step * time step) * hidden_dim
        # sentence_length*sentence_length개의 결과값을 모두 합해서 sentence representation으로 나타냄

        x_g = _x.reshape(
            (-1, self.sentence_length, self.sentence_length, self.hidden_dim))

        _inflated_att = _att.expand_dims(axis=-1)
        _inflated_att = nd.repeat(_inflated_att,
                                  repeats=self.hidden_dim,
                                  axis=3)

        x_q = nd.multiply(_inflated_att, x_g)

        sentence_rep = nd.mean(x_q.reshape(shape=(-1, self.sentence_length**2,
                                                  self.hidden_dim)),
                               axis=1)
        return sentence_rep, att
Esempio n. 26
0
def train(data_iter):
    lstm = OCRLSTM()
    lstm.collect_params().initialize(mx.init.Xavier(), ctx=mx.cpu())

    loss = gluon.loss.CTCLoss(layout='NTC', label_layout='NT')
    trainer = gluon.Trainer(lstm.collect_params(), 'sgd',
                            {'learning_rate': 0.001})
    state = lstm.begin_state(batch_size=1)
    global_step = 0

    for epoch in range(100):
        print("epoch ", epoch)
        for sample in data_iter:
            data = sample[0]
            label = sample[1]
            train_loss = .0
            with autograd.record():
                # print("data ",state)
                output, state = lstm(data, state)
                # output = nd.expand_dims(output, axis=1)
                output = output.transpose((1, 0, 2))
                # label = nd.expand_dims(label, axis=1)
                # label = label.reshape((1,4))
                # print("output ", output.shape, label.shape)
                L = loss(output, label)
            L.backward()
            train_loss = nd.mean(L).asscalar()
            # sw.add_scalar(tag="loss",value=train_loss,global_step=global_step)
            global_step = global_step + 1
            # if epoch == 1 :
            #     sw.add_graph(net)
            trainer.step(1, ignore_stale_grad=True)

            if (epoch % 100 == 0):
                print('train_loss %.4f' % (train_loss))
                # print('output max', output.argmax(axis=2))
            predict(data, state)
Esempio n. 27
0
    def pick_the_best_function(self):
        def accuracy(y_hat, y):
            # 注意这里 y_hat 的 shape 必须与 y 的 shape 保持一致
            return nd.mean(y_hat.argmax(
                axis=1).reshape(y.shape) == y).asscalar()

        def evaluate_accuracy(data_iter, net, ctx):
            acc = 0.
            for batch_X, batch_y in data_iter:
                batch_X = batch_X.as_in_context(ctx)
                batch_y = batch_y.as_in_context(ctx)
                batch_y = batch_y.reshape((-1, 1))
                batch_y_hat = net(batch_X)
                acc += accuracy(batch_y_hat, batch_y)
            return acc / len(data_iter)

        for e in range(self.__epochs):
            train_loss = 0.
            train_acc = 0.
            for self.__batch_X, self.__batch_y in self.__train_data_iter:
                self.__batch_X = self.__batch_X.as_in_context(self.__ctx)
                self.__batch_y = self.__batch_y.reshape(
                    (-1, 1)).as_in_context(self.__ctx)
                with autograd.record():
                    self.__batch_y_hat = self.__net(self.__batch_X)
                    loss = self.__softmax_cross_entropy(
                        self.__batch_y_hat, self.__batch_y)
                loss.backward()
                self.__trainer.step(self.__batch_size)

                train_loss += nd.mean(loss).asscalar()
                train_acc += accuracy(self.__batch_y_hat, self.__batch_y)
            test_acc = evaluate_accuracy(self.__test_data_iter, self.__net,
                                         self.__ctx)
            print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" %
                  (e, train_loss / len(self.__train_data_iter),
                   train_acc / len(self.__train_data_iter), test_acc))
Esempio n. 28
0
def train_step(model, optimizer, data, epoch):

    running_loss = 0.0
    global update_count
    N = data.shape[0]
    idxlist = list(range(N))
    np.random.shuffle(idxlist)
    training_steps = len(range(0, N, args.batch_size))

    with trange(training_steps) as t:
        for batch_idx, start_idx in zip(t, range(0, N, args.batch_size)):
            t.set_description("epoch: {}".format(epoch + 1))

            end_idx = min(start_idx + args.batch_size, N)
            X_inp = data[idxlist[start_idx:end_idx]]
            X_inp = nd.array(X_inp.toarray()).as_in_context(ctx)

            if args.constant_anneal:
                anneal = args.anneal_cap
            else:
                anneal = min(args.anneal_cap, update_count / total_anneal_steps)
            update_count += 1

            with autograd.record():
                if model.__class__.__name__ == "MultiVAE":
                    X_out, mu, logvar = model(X_inp)
                    loss = vae_loss_fn(X_inp, X_out, mu, logvar, anneal)
                    train_step.anneal = anneal
                elif model.__class__.__name__ == "MultiDAE":
                    X_out = model(X_inp)
                    loss = -nd.mean(nd.sum(nd.log_softmax(X_out) * X_inp, -1))
            loss.backward()
            trainer.step(X_inp.shape[0])
            running_loss += loss.asscalar()
            avg_loss = running_loss / (batch_idx + 1)

            t.set_postfix(loss=avg_loss)
Esempio n. 29
0
    def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
        dx = in_grad[0]
        dgamma = in_grad[1]
        dbeta = in_grad[2]

        x = in_data[0]
        gamma = in_data[1]
        beta = in_data[2]

        y = out_data[0]
        dy = out_grad[0]

        mean = nd.mean(x, axis=(0, 2, 3))
        var = nd.array(np.var(x.asnumpy(), axis=(0, 2, 3)))

        quan_gamma = gamma
        quan_beta = beta

        x.attach_grad(), gamma.attach_grad(), beta.attach_grad()
        with autograd.record():
            y = nd.BatchNorm(x,
                             gamma=quan_gamma,
                             beta=quan_beta,
                             moving_mean=mean,
                             moving_var=var,
                             eps=self.eps,
                             momentum=self.momentum,
                             fix_gamma=self.fix_gamma,
                             name=self.name)

        dx, dgamma, dbeta = autograd.grad(y, [x, quan_gamma, quan_beta],
                                          dy,
                                          retain_graph=True)
        self.assign(in_grad[0], req[0], dx)
        self.assign(in_grad[1], req[0], dgamma)
        self.assign(in_grad[2], req[0], dbeta)
Esempio n. 30
0
def train_step(model, train_loader, trainer, metric, epoch, zero_padding):

    metric.reset()
    train_steps = len(train_loader)
    running_loss = 0.0
    with trange(train_steps) as t:
        for batch_idx, (data, target) in zip(t, train_loader):
            t.set_description("epoch %i" % (epoch + 1))

            X = data.as_in_context(ctx)
            y = target.as_in_context(ctx)

            with autograd.record():
                y_pred = model(X)
                loss = criterion(y_pred, y)
                loss.backward()
            if zero_padding:
                p_zero_padding(model)

            trainer.step(X.shape[0])
            running_loss += nd.mean(loss).asscalar()
            avg_loss = running_loss / (batch_idx + 1)
            metric.update(preds=nd.argmax(y_pred, axis=1), labels=y)
            t.set_postfix(acc=metric.get()[1], loss=avg_loss)
Esempio n. 31
0
def cgc_filter(gradients, net, f, byz):
    """Gets rid of the largest f gradients away from the norm"""
    cgc_method = cfg['cgc_method']
    if cgc_method == 'by-layer':
        output = cgc_by_layer(gradients, f)
    else:
        output = multiply_norms(gradients, f)

    # X is a 2d list of nd array
    param_list = [
        nd.concat(*[xx.reshape((-1, 1)) for xx in x], dim=0) for x in output
    ]
    byz(param_list, f)
    mean_nd = nd.mean(nd.concat(*param_list, dim=1), axis=-1)
    grad_collect = []
    idx = 0
    for j, (param) in enumerate(net.collect_params().values()):
        if param.grad_req != 'null':
            # mapping back to the collection of ndarray
            # append to list for uploading to cloud
            grad_collect.append(mean_nd[idx:(idx + param.data().size)].reshape(
                param.data().shape))
            idx += param.data().size
    return grad_collect
Esempio n. 32
0
def validate(val_data, net, criterion, num_parts, ctx):
    loss = 0.0
    for data, label in val_data:
        data_list = gluon.utils.split_and_load(data, ctx)
        label_list = gluon.utils.split_and_load(label, ctx)

        losses = []
        accurays = []
        for i in range(opt.num_gpus):
            outputs = [X for X in net(data_list[i])]
            temp_loss = sum([criterion(X, label_list[i])
                             for X in outputs]) / num_parts
            losses.append(temp_loss)
            temp_acc = sum([
                nd.mean(X.argmax(
                    axis=1) == label_list[i].astype('float32')).asscalar()
                for X in outputs
            ]) / num_parts
            accurays.append(temp_acc)

        loss_list = [l.mean().asscalar() for l in losses]
        loss += sum(loss_list) / len(loss_list)

    return loss / len(val_data), sum(accurays) / len(accurays)
Esempio n. 33
0
def accuracy(output, label):
    return nd.mean(output.argmax(axis=1) == label).asscalar()
Esempio n. 34
0
def accuracy(output, labels):
    return nd.mean(nd.argmax(output, axis=1) == labels).asscalar()
Esempio n. 35
0
def train(epochs, ctx):
    """Training function."""
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    net.initialize(mx.init.Xavier(magnitude=2), ctx=ctx)

    opt_options = {'learning_rate': opt.lr, 'wd': opt.wd}
    if opt.optimizer == 'sgd':
        opt_options['momentum'] = 0.9
    if opt.optimizer == 'adam':
        opt_options['epsilon'] = 1e-7
    trainer = gluon.Trainer(net.collect_params(), opt.optimizer,
                            opt_options,
                            kvstore=opt.kvstore)
    if opt.lr_beta > 0.0:
        # Jointly train class-specific beta.
        # See "sampling matters in deep embedding learning" paper for details.
        beta.initialize(mx.init.Constant(opt.beta), ctx=ctx)
        trainer_beta = gluon.Trainer([beta], 'sgd',
                                     {'learning_rate': opt.lr_beta, 'momentum': 0.9},
                                     kvstore=opt.kvstore)

    loss = MarginLoss(margin=opt.margin, nu=opt.nu)

    best_val = 0.0
    for epoch in range(epochs):
        tic = time.time()
        prev_loss, cumulative_loss = 0.0, 0.0

        # Learning rate schedule.
        trainer.set_learning_rate(get_lr(opt.lr, epoch, steps, opt.factor))
        logging.info('Epoch %d learning rate=%f', epoch, trainer.learning_rate)
        if opt.lr_beta > 0.0:
            trainer_beta.set_learning_rate(get_lr(opt.lr_beta, epoch, steps, opt.factor))
            logging.info('Epoch %d beta learning rate=%f', epoch, trainer_beta.learning_rate)

        # Inner training loop.
        for i in range(200):
            batch = train_data.next()
            data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
            label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)

            Ls = []
            with ag.record():
                for x, y in zip(data, label):
                    a_indices, anchors, positives, negatives, _ = net(x)

                    if opt.lr_beta > 0.0:
                        L = loss(anchors, positives, negatives, beta, y[a_indices])
                    else:
                        L = loss(anchors, positives, negatives, opt.beta, None)

                    # Store the loss and do backward after we have done forward
                    # on all GPUs for better speed on multiple GPUs.
                    Ls.append(L)
                    cumulative_loss += nd.mean(L).asscalar()

                for L in Ls:
                    L.backward()

            # Update.
            trainer.step(batch.data[0].shape[0])
            if opt.lr_beta > 0.0:
                trainer_beta.step(batch.data[0].shape[0])

            if (i+1) % opt.log_interval == 0:
                logging.info('[Epoch %d, Iter %d] training loss=%f' % (
                    epoch, i+1, cumulative_loss - prev_loss))
                prev_loss = cumulative_loss

        logging.info('[Epoch %d] training loss=%f'%(epoch, cumulative_loss))
        logging.info('[Epoch %d] time cost: %f'%(epoch, time.time()-tic))

        names, val_accs = test(ctx)
        for name, val_acc in zip(names, val_accs):
            logging.info('[Epoch %d] validation: %s=%f'%(epoch, name, val_acc))

        if val_accs[0] > best_val:
            best_val = val_accs[0]
            logging.info('Saving %s.' % opt.save_model_prefix)
            net.save_params('%s.params' % opt.save_model_prefix)
    return best_val
Esempio n. 36
0
            metric.update([real_label, ], [output, ])

            # train with fake image
            fake_image = g_net(noise)
            output = d_net(fake_image.detach()).reshape((-1, 1))
            errD_fake = loss(output, fake_label)
            errD = errD_real + errD_fake
            errD.backward()
            metric.update([fake_label, ], [output, ])

        d_trainer.step(BATCH_SIZE)
        # update G
        with autograd.record():
            fake_image = g_net(noise)
            output = d_net(fake_image).reshape(-1, 1)
            errG = loss(output, real_label)
            errG.backward()

        g_trainer.step(BATCH_SIZE)

        # print log infomation every 100 batches
        if i % 100 == 0:
            name, acc = metric.get()
            logging.info('discriminator loss = %f, generator loss = %f, \
                          binary training acc = %f at iter %d epoch %d',
                         nd.mean(errD).asscalar(), nd.mean(errG).asscalar(), acc, i, epoch)
        if i == 0:
            save_image(fake_image, epoch, IMAGE_SIZE, BATCH_SIZE, OUTPUT_DIR)

    metric.reset()
Esempio n. 37
0
def square_loss(yhat, y):
    return nd.mean((yhat - y) ** 2)
Esempio n. 38
0
 for datas, labels in train_data:
     #data, label, batch_size = _get_batch(batch, ctx)  
     #batch_size = batch.shape[0]
     #pdb.set_trace()
     trainNum += datas.asnumpy().shape[0]
     labels = labels.astype('float32').as_in_context(ctx) 
     with autograd.record(): #each sample each time
         yhats = net(datas.as_in_context(ctx))
         #losses = [ loss_func(yhat, label) for yhat,label in zip(yhats,labels)] 
         loss = loss_func(yhats,labels)
     #pdb.set_trace()
     #for loss in losses:
     #    loss.backward()
     loss.backward()
     trainer.step(batch_size)
     train_loss += nd.mean(loss).asscalar()
     train_acc += accuracy(yhats,labels)
     batchNum += 1
     cur_time = datetime.datetime.now()
     h, remainder = divmod((cur_time - prev_time).seconds, 3600)
     m, s = divmod(remainder, 60)
     time_str = "Time %02d:%02d:%02d" % (h, m, s)
     if valid_data is not None and 0 == (batchNum%check_freq):
         valid_acc = evaluate_accuracy(valid_data, net, ctx)
         epoch_str = ("Epoch %d. Batch %d Loss: %f, Train acc %f, Valid acc %f, "
                      % (epoch, batchNum, train_loss / trainNum,
                         train_acc / trainNum, valid_acc))
         logging.info(epoch_str + time_str + ', lr ' + str(trainer.learning_rate))
     elif 0 == (batchNum%print_freq):
         epoch_str = ("Epoch %d. Batch %d Loss: %f, Train acc %f, "
                      % (epoch, batchNum, train_loss / trainNum,
Esempio n. 39
0
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]


epochs = 10
smoothing_constant = .01

for e in range(epochs):
    train_data.reset()
    for i, batch in enumerate(train_data):
        data = batch.data[0].as_in_context(ctx)
        label = batch.label[0].as_in_context(ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])

        ##########################
        #  Keep a moving average of the losses
        ##########################
        curr_loss = nd.mean(loss).asscalar()
        moving_loss = (curr_loss if ((i == 0) and (e == 0))
                       else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" % (e, moving_loss, train_accuracy, test_accuracy))
Esempio n. 40
0
# state = nd.zeros(shape=(batch_size, num_hidden), ctx=ctx)
for e in range(epochs):
    ############################
    # Attenuate the learning rate by a factor of 2 every 100 epochs.
    ############################
    if ((e+1) % 100 == 0):
        learning_rate = learning_rate / 2.0
    h = nd.zeros(shape=(batch_size, num_hidden), ctx=ctx)
    c = nd.zeros(shape=(batch_size, num_hidden), ctx=ctx)
    for i in range(num_batches):
        data_one_hot = train_data[i]
        label_one_hot = train_label[i]
        with autograd.record():
            outputs, h, c = gru_rnn(data_one_hot, h, c)
            loss = average_ce_loss(outputs, label_one_hot)
            loss.backward()
        SGD(params, learning_rate)

        ##########################
        #  Keep a moving average of the losses
        ##########################
        if (i == 0) and (e == 0):
            moving_loss = nd.mean(loss).asscalar()
        else:
            moving_loss = .99 * moving_loss + .01 * nd.mean(loss).asscalar()

    print("Epoch %s. Loss: %s" % (e, moving_loss))
    print(sample("The Time Ma", 1024, temperature=.1))
    print(sample("The Medical Man rose, came to the lamp,", 1024, temperature=.1))
Esempio n. 41
0
def cross_entropy(yhat, y):
    return - nd.mean(nd.sum(y * nd.log(yhat), axis=0, exclude=True))