Beispiel #1
0
    def forward(self,
                src_seq,
                tgt_seq,
                src_valid_length=None,
                tgt_valid_length=None):  #pylint: disable=arguments-differ
        """Generate the prediction given the src_seq and tgt_seq.

        This is used in training an NMT model.

        Parameters
        ----------
        src_seq : NDArray
        tgt_seq : NDArray
        src_valid_length : NDArray or None
        tgt_valid_length : NDArray or None

        Returns
        -------
        outputs : NDArray
            Shape (batch_size, tgt_length, tgt_word_num)
        additional_outputs : list of list
            Additional outputs of encoder and decoder, e.g, the attention weights
        """
        src_valid_length = nd.cast(src_valid_length, dtype='float32')
        tgt_valid_length = nd.cast(tgt_valid_length, dtype='float32')
        additional_outputs = []
        encoder_outputs, encoder_additional_outputs = self.encode(
            src_seq, valid_length=src_valid_length)
        decoder_states = self.decoder.init_state_from_encoder(
            encoder_outputs, encoder_valid_length=src_valid_length)
        outputs, _, decoder_additional_outputs =\
            self.decode_seq(tgt_seq, decoder_states, tgt_valid_length)
        additional_outputs.append(encoder_additional_outputs)
        additional_outputs.append(decoder_additional_outputs)
        return outputs, additional_outputs
def train(ctx):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    if opt.use_pretrained_base:
        net.deconv_layers.initialize(ctx=ctx)
        net.final_layer.initialize(ctx=ctx)
    else:
        net.initialize(mx.init.MSRAPrelu(), ctx=ctx)

    trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params)

    L = gluon.loss.L2Loss()
    metric = HeatmapAccuracy()

    best_val_score = 1

    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)

    for epoch in range(opt.num_epochs):
        loss_val = 0
        tic = time.time()
        btic = time.time()
        metric.reset()

        for i, batch in enumerate(train_data):
            data, label, weight, imgid = train_batch_fn(batch, ctx)

            with ag.record():
                outputs = [net(X.astype(opt.dtype, copy=False)) for X in data]
                loss = [nd.cast(L(nd.cast(yhat, 'float32'), y, w), opt.dtype)
                        for yhat, y, w in zip(outputs, label, weight)]
            for l in loss:
                l.backward()
            trainer.step(batch_size)

            metric.update(label, outputs)

            loss_val += sum([l.mean().asscalar() for l in loss]) / num_gpus
            if opt.log_interval and not (i+1)%opt.log_interval:
                metric_name, metric_score = metric.get()
                logger.info('Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\tloss=%f\tlr=%f\t%s=%.3f'%(
                             epoch, i, batch_size*opt.log_interval/(time.time()-btic),
                             loss_val / (i+1), trainer.learning_rate, metric_name, metric_score))
                btic = time.time()

        time_elapsed = time.time() - tic
        logger.info('Epoch[%d]\t\tSpeed: %d samples/sec over %d secs\tloss=%f\n'%(
                     epoch, int(i*batch_size / time_elapsed), int(time_elapsed), loss_val / (i+1)))
        if save_frequency and save_dir and (epoch + 1) % save_frequency == 0:
            net.save_parameters('%s/%s-%d.params'%(save_dir, model_name, epoch))
            trainer.save_states('%s/%s-%d.states'%(save_dir, model_name, epoch))

    if save_frequency and save_dir:
        net.save_parameters('%s/%s-%d.params'%(save_dir, model_name, opt.num_epochs-1))
        trainer.save_states('%s/%s-%d.states'%(save_dir, model_name, opt.num_epochs-1))

    return net
def train(ctx):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    if opt.use_pretrained_base:
        net.deconv_layers.initialize(ctx=ctx)
        net.final_layer.initialize(ctx=ctx)
    else:
        net.initialize(mx.init.MSRAPrelu(), ctx=ctx)

    trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params)

    L = gluon.loss.L2Loss()
    metric = HeatmapAccuracy()

    best_val_score = 1

    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)

    for epoch in range(opt.num_epochs):
        loss_val = 0
        tic = time.time()
        btic = time.time()
        metric.reset()

        for i, batch in enumerate(train_data):
            data, label, weight, imgid = train_batch_fn(batch, ctx)

            with ag.record():
                outputs = [net(X.astype(opt.dtype, copy=False)) for X in data]
                loss = [nd.cast(L(nd.cast(yhat, 'float32'), y, w), opt.dtype)
                        for yhat, y, w in zip(outputs, label, weight)]
            ag.backward(loss)
            trainer.step(batch_size)

            metric.update(label, outputs)

            loss_val += sum([l.mean().asscalar() for l in loss]) / num_gpus
            if opt.log_interval and not (i+1)%opt.log_interval:
                metric_name, metric_score = metric.get()
                logger.info('Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\tloss=%f\tlr=%f\t%s=%.3f'%(
                             epoch, i, batch_size*opt.log_interval/(time.time()-btic),
                             loss_val / (i+1), trainer.learning_rate, metric_name, metric_score))
                btic = time.time()

        time_elapsed = time.time() - tic
        logger.info('Epoch[%d]\t\tSpeed: %d samples/sec over %d secs\tloss=%f\n'%(
                     epoch, int(i*batch_size / time_elapsed), int(time_elapsed), loss_val / (i+1)))
        if save_frequency and save_dir and (epoch + 1) % save_frequency == 0:
            net.save_parameters('%s/%s-%d.params'%(save_dir, model_name, epoch))
            trainer.save_states('%s/%s-%d.states'%(save_dir, model_name, epoch))

    if save_frequency and save_dir:
        net.save_parameters('%s/%s-%d.params'%(save_dir, model_name, opt.num_epochs-1))
        trainer.save_states('%s/%s-%d.states'%(save_dir, model_name, opt.num_epochs-1))

    return net
Beispiel #4
0
    def hybrid_forward(self, F, score_gt, kernel_gt, score_pred,
                       training_masks, *args, **kwargs):

        # cal ohem mask
        selected_masks = []
        for i in range(score_gt.shape[0]):
            # cal for text region
            selected_mask = self._ohem_single(score_gt[i:i + 1],
                                              score_pred[i:i + 1],
                                              training_masks[i:i + 1])
            selected_masks.append(selected_mask)
        selected_masks = F.concat(*selected_masks, dim=0)

        s1, s2, s3, s4, s5, s6 = F.split(kernel_gt,
                                         num_outputs=6,
                                         axis=3,
                                         squeeze_axis=True)
        s1_pred, s2_pred, s3_pred, s4_pred, s5_pred, s6_pred, C_pred = F.split(
            score_pred, num_outputs=7, axis=1, squeeze_axis=True)

        self.pixel_acc = batch_pix_accuracy(C_pred, score_gt)
        # for text map
        eps = 1e-5
        intersection = F.sum(score_gt * C_pred * selected_masks, axis=1)
        union = F.sum(score_gt * selected_masks, axis=1) + F.sum(
            C_pred * selected_mask, axis=1) + eps
        C_dice_loss = 1. - F.mean((2 * intersection / union))

        # loss for kernel
        kernel_dices = []
        for s, s_pred in zip(
            [s1, s2, s3, s4, s5, s6],
            [s1_pred, s2_pred, s3_pred, s4_pred, s5_pred, s6_pred]):
            kernel_mask = F.where(C_pred > 0.5, F.ones_like(s_pred),
                                  F.zeros_like(s_pred))
            kernel_mask = F.cast(kernel_mask, dtype='float32')
            kernel_mask = F.cast(F.logical_or(kernel_mask, score_gt),
                                 dtype='float32')
            s = F.cast(s, dtype='float32')
            kernel_intersection = F.sum(s * s_pred * training_masks *
                                        kernel_mask,
                                        axis=1)
            kernel_union = F.sum(
                training_masks * s * kernel_mask, axis=1) + F.sum(
                    training_masks * s_pred * kernel_mask, axis=1) + eps
            kernel_dice = 2. * kernel_intersection / kernel_union
            kernel_dice = 1. - F.mean(
                (2. * kernel_intersection / kernel_union))
            kernel_dices.append(kernel_dice)
        kernel_dice_loss = F.mean(F.array(kernel_dices))

        self.kernel_loss = kernel_dice_loss
        self.C_loss = C_dice_loss

        loss = self.lam * C_dice_loss + (1. - self.lam) * kernel_dice_loss

        return loss
Beispiel #5
0
    def forward(self,  pred, label, valid_length): # pylint: disable=arguments-differ
        """

        Parameters
        ----------
        F
        pred : Symbol or NDArray
            Shape (batch_size, length, V)
        label : Symbol or NDArray
            Shape (batch_size, length)
        valid_length : Symbol or NDArray
            Shape (batch_size, )
        Returns
        -------
        loss : Symbol or NDArray
            Shape (batch_size,)
        """
        if self._sparse_label:
            sample_weight = nd.cast(nd.expand_dims(nd.ones_like(label), axis=-1), dtype=np.float32)
        else:
            sample_weight = nd.ones_like(label)
        sample_weight = nd.SequenceMask(sample_weight,
                                       sequence_length=valid_length,
                                       use_sequence_length=True,
                                       axis=1)
        return super(SoftmaxCEMaskedLoss, self).forward( pred, label, sample_weight)
def padded_cross_entropy_loss(logits, labels, smoothing, vocab_size):
    """
    Calculate cross entropy loss while ignoring padding.

    :param logits: Tensor of size [batch_size, length_logits, vocab_size]
    :param labels: Tensor of size [batch_size, length_labels]
    :param smoothing: Label smoothing constant, used to determine the on an off values
    :param vocab_size: int size of the vocabulary
    :return: a float32 tennsor with shape
    [batch_size, max(length_logits, length_labels)]
    """
    logits, labels = _pad_tensors_to_same_length(logits, labels)

    confidence = 1.0 - smoothing
    low_confidence = (1.0 - confidence) / float(vocab_size - 1)
    soft_targets = nd.one_hot(indices=nd.cast(labels, dtype='int32'),
                              depth=vocab_size,
                              on_value=confidence,
                              off_value=low_confidence)
    softmax_cross_entropy = mx.gluon.loss.SoftmaxCrossEntropyLoss(
        axis=-1, sparse_label=False, from_logits=True)
    xentropy = softmax_cross_entropy(logits, soft_targets)

    normalizing_constant = -(confidence * np.log(confidence) +
                             float(vocab_size - 1) * low_confidence *
                             np.log(low_confidence + 1e-20))
    xentropy = xentropy - normalizing_constant

    return xentropy
Beispiel #7
0
def translate_file(model,
                   subtokenizer,
                   input_file,
                   output_file=None,
                   print_all_translations=True):
    """Translate lines in file, and save to output file if specified.

      Args:
        estimator: tf.Estimator used to generate the translations.
        subtokenizer: Subtokenizer object for encoding and decoding source and
           translated lines.
        input_file: file containing lines to translate
        output_file: file that stores the generated translations.
        print_all_translations: If true, all translations are printed to stdout.

      Raises:
        ValueError: if output file is invalid.
      """
    print("Begin translate file from: %s" % input_file)
    batch_size = _DECODE_BATCH_SIZE

    sorted_inputs, sorted_keys = _get_sorted_inputs(input_file)
    num_decode_batches = (len(sorted_inputs) - 1) // batch_size + 1

    def get_batch(idx):
        if idx == (num_decode_batches - 1):
            ret = sorted_inputs[idx * batch_size:-1]
            leng = len(ret)
        else:
            ret = sorted_inputs[idx * batch_size:idx * batch_size + batch_size]
            leng = len(ret)

        max_length = 0
        for j in xrange(leng):
            ret[j] = _encode_and_add_eos(ret[j], subtokenizer)
            if max_length < len(ret[j]):
                max_length = len(ret[j])

        for k in xrange(leng):
            ret[k] = ret[k] + np.zeros(max_length - len(ret[k])).tolist()

        return nd.array(ret, ctx=ctx)

    translations = []
    for i in xrange(num_decode_batches):
        print("\t Tranlate batch %d of %d" % (i, num_decode_batches))
        output = model(get_batch(i))
        output = output['outputs']
        output = nd.cast(output, dtype='int32')
        for j in xrange(len(output)):
            translation = _trim_and_decode(output[j].asnumpy().tolist(),
                                           subtokenizer)
            translations.append(translation)

    with open(output_file) as f:
        print("Finished translation and write the translated file.")
        for index in xrange(len(sorted_keys)):
            f.write("%s\n" % translations[sorted_keys[index]])
        f.close()
Beispiel #8
0
    def _likelihood(self, init, append, connect, end, action_0, actions,
                    iw_ids, log_p_sigma, batch_size, iw_size):

        # decompose action:
        action_type, node_type, edge_type, append_pos, connect_pos = \
            actions[:, 0], actions[:, 1], actions[:, 2], actions[:, 3], actions[:, 4]
        _log_mask = lambda _x, _mask: _mask * nd.log(_x + 1e-10) + (
            1 - _mask) * nd.zeros_like(_x)

        # init
        init = init.reshape([batch_size * iw_size, self.N_A])
        index = nd.stack(nd.arange(action_0.shape[0],
                                   ctx=action_0.context,
                                   dtype='int32'),
                         action_0,
                         axis=0)
        loss_init = nd.log(nd.gather_nd(init, index) + 1e-10)

        # end
        loss_end = _log_mask(end, nd.cast(action_type == 2, 'float32'))

        # append
        index = nd.stack(append_pos, node_type, edge_type, axis=0)
        loss_append = _log_mask(nd.gather_nd(append, index),
                                nd.cast(action_type == 0, 'float32'))

        # connect
        index = nd.stack(connect_pos, edge_type, axis=0)
        loss_connect = _log_mask(nd.gather_nd(connect, index),
                                 nd.cast(action_type == 1, 'float32'))

        # sum up results
        log_p_x = loss_end + loss_append + loss_connect
        log_p_x = fn.squeeze(
            fn.SegmentSumFn(iw_ids,
                            batch_size * iw_size)(fn.unsqueeze(log_p_x, -1)),
            -1)
        log_p_x = log_p_x + loss_init

        # reshape
        log_p_x = log_p_x.reshape([batch_size, iw_size])
        log_p_sigma = log_p_sigma.reshape([batch_size, iw_size])
        l = log_p_x - log_p_sigma
        l = fn.logsumexp(l, axis=1) - math.log(float(iw_size))
        return l
Beispiel #9
0
 def hybrid_forward(self, F, xcos_theta, xphi_theta, target):
     self.it += 1
     batch_size = target.size  # size = (B,classnum)
     oh_target = target.one_hot(xcos_theta.shape[1])
     self.lamb = max(self.LambdaMin, self.LambdaMax / (1 + 0.1 * self.it))
     # because indexing is not differentiable in mxnet, we must do this
     output = xcos_theta - \
              oh_target * xcos_theta[range(0, batch_size), target].reshape(-1, 1) / (1 + self.lamb) + \
              oh_target * xphi_theta[range(0, batch_size), target].reshape(-1, 1) / (1 + self.lamb)
     loss = nd.softmax_cross_entropy(output, nd.cast(target, 'float32'))  # (B,Classnum)
     return loss
Beispiel #10
0
    def hybrid_forward(self, F, score_gt, kernel_gt, score_pred,
                       training_masks, *args, **kwargs):
        s1, s2, s3, s4, s5, s6 = F.split(kernel_gt,
                                         num_outputs=6,
                                         axis=3,
                                         squeeze_axis=True)
        s1_pred, s2_pred, s3_pred, s4_pred, s5_pred, s6_pred, C_pred = F.split(
            score_pred, num_outputs=7, axis=1, squeeze_axis=True)

        self.pixel_acc = batch_pix_accuracy(C_pred, score_gt)
        # classification loss
        eps = 1e-5
        intersection = F.sum(score_gt * C_pred * training_masks, axis=1)
        union = F.sum(training_masks * score_gt, axis=1) + F.sum(
            training_masks * C_pred, axis=1) + eps
        C_dice_loss = 1. - F.mean((2 * intersection / union))
        # loss for kernel
        kernel_dices = []
        for s, s_pred in zip(
            [s1, s2, s3, s4, s5, s6],
            [s1_pred, s2_pred, s3_pred, s4_pred, s5_pred, s6_pred]):
            kernel_mask = F.where((C_pred * training_masks > 0.5),
                                  F.ones_like(C_pred), F.zeros_like(C_pred))
            kernel_mask = F.cast(F.logical_or(kernel_mask, score_gt),
                                 dtype='float32')

            s = F.cast(s, dtype='float32')
            kernel_intersection = F.sum(s * s_pred * kernel_mask, axis=1)
            kernel_union = F.sum(s * kernel_mask, axis=1) + F.sum(
                s_pred * kernel_mask, axis=1) + eps
            kernel_dice = 1. - F.mean(
                (2. * kernel_intersection / kernel_union))
            kernel_dices.append(kernel_dice.asscalar())
        kernel_dice_loss = F.mean(F.array(kernel_dices))
        # print("kernel_loss:", kernel_dice_loss)
        self.C_loss = C_dice_loss
        self.kernel_loss = kernel_dice_loss
        loss = self.lam * C_dice_loss + (1. - self.lam) * kernel_dice_loss

        return loss
Beispiel #11
0
    def _rnn_test(self, X, NX, NX_rep, NX_cum, h):
        # note: one partition for one molecule
        X_avg = fn.SegmentSumFn(NX_rep, NX.shape[0])(X) / nd.cast(
            fn.unsqueeze(NX, 1), 'float32')
        X_curr = nd.take(X, indices=NX_cum - 1)
        X = nd.concat(X_avg, X_curr, dim=1)  # size: [NX, F_in * 2]

        # rnn
        X = fn.unsqueeze(X, axis=1)
        X, h = self.rnn(X, h)

        X = fn.squeeze(X, axis=1)
        return X, h
def _gather_beams(list, beam_indices, batch_size, new_beam_size, cache=None):
    """Gather beams from nested structure of tensors.

    Each tensor in nested represents a batch of beams, where beam refers to a
    single search state (beam search involves searching through multiple states
    in parallel).

    This function is used to gather the top beams, specified by
    beam_indices, from the nested tensors.

    Args:
      nested: Nested structure (tensor, list, tuple or dict) containing tensors
        with shape [batch_size, beam_size, ...].
      beam_indices: int32 tensor with shape [batch_size, new_beam_size]. Each
       value in beam_indices must be between [0, beam_size), and are not
       necessarily unique.
      batch_size: int size of batch
      new_beam_size: int number of beams to be pulled from the nested tensors.

    Returns:
      Nested structure containing tensors with shape
        [batch_size, new_beam_size, ...]
    """
    batch_pos = np.arange(0, batch_size * new_beam_size)
    batch_pos = nd.array(batch_pos, ctx=ctx, dtype='int32') / new_beam_size
    batch_pos = nd.reshape(batch_pos, (batch_size, new_beam_size))
    beam_indices = nd.cast(beam_indices, dtype='int32')

    coordinates = nd.stack(batch_pos, beam_indices, axis=2)
    m = coordinates.shape[0]
    n = coordinates.shape[1]
    coordinates_tmp = nd.zeros(shape=(m, 2, n), ctx=ctx)
    for i in xrange(m):
        coordinates_tmp[i] = coordinates[i].T

    coordinates_new = nd.ones(shape=(2, m, n), ctx=ctx)
    for i in xrange(m):
        coordinates_new[0][i] = coordinates_tmp[i][0]
        coordinates_new[1][i] = coordinates_tmp[i][1]

    if cache is None:
        for i in xrange(len(list)):
            list[i] = nd.gather_nd(list[i], coordinates_new)
        return list
    else:
        cache = map_structure(lambda t: nd.gather_nd(t, coordinates_new),
                              cache)
        return cache
Beispiel #13
0
    def _rnn_train(self, X, NX, NX_rep, graph_to_rnn, rnn_to_graph, NX_cum):
        X_avg = fn.SegmentSumFn(NX_rep, NX.shape[0])(X) / nd.cast(
            fn.unsqueeze(NX, 1), 'float32')
        X_curr = nd.take(X, indices=NX_cum - 1)
        X = nd.concat(X_avg, X_curr, dim=1)

        # rnn
        X = nd.take(
            X,
            indices=graph_to_rnn)  # batch_size, iw_size, length, num_features
        batch_size, iw_size, length, num_features = X.shape
        X = X.reshape([batch_size * iw_size, length, num_features])
        X = self.rnn(X)

        X = X.reshape([batch_size, iw_size, length, -1])
        X = nd.gather_nd(X, indices=rnn_to_graph)

        return X
 def _initialize(self, force_reinit=True, ctx=mx.cpu(), dtype='float32'):
     for k, v in self.collect_params().items():
         if 'conv' in k:
             if 'weight' in k:
                 if 'first' in k or 'output' in k or 'fc' in k or 'squeeze' in k or 'excitation' in k:
                     v.initialize(mx.init.Normal(0.01), force_reinit=force_reinit, ctx=ctx)
                 elif 'transpose' in k:
                     v.initialize(mx.init.Normal(0.01), force_reinit=force_reinit, ctx=ctx)
                     v.set_data(nd.cast(generate_transpose_conv_kernel(v.shape[0]), dtype=dtype))
                     v.grad_req = 'null'
                 else:
                     v.initialize(mx.init.Normal(1.0 / v.shape[1]), force_reinit=force_reinit, ctx=ctx)
             if 'bias' in k:
                 v.initialize(mx.init.Constant(0), force_reinit=force_reinit, ctx=ctx)
         elif 'batchnorm' in k:
             if 'gamma' in k:
                 v.initialize(mx.init.Constant(1), force_reinit=force_reinit, ctx=ctx)
             if 'beta' in k:
                 v.initialize(mx.init.Constant(0.0001), force_reinit=force_reinit, ctx=ctx)
             if 'running' in k:
                 v.initialize(mx.init.Constant(0), force_reinit=force_reinit, ctx=ctx)
    def forward(self, X, NX, NX_rep, X_end=None):
        # segment mean for X
        if X_end is None:
            X_end = fn.SegmentSumFn(NX_rep, NX.shape[0])(X) / nd.cast(
                fn.unsqueeze(NX, 1), 'float32')
        X = nd.concat(X, X_end[NX_rep, :], dim=1)

        X_h = nd.relu(self.linear_h(X)).reshape([-1, self.F_h])
        X_h_end = nd.relu(self.linear_h_t(X_end)).reshape([-1, self.F_h])

        X_x = nd.exp(self.linear_x(X_h)).reshape(
            [-1, self.k, self.N_B + self.N_B * self.N_A])
        X_x_end = nd.exp(self.linear_x_t(X_h_end)).reshape([-1, self.k, 1])

        X_sum = nd.sum(fn.SegmentSumFn(NX_rep, NX.shape[0])(X_x),
                       -1,
                       keepdims=True) + X_x_end
        X_sum_gathered = X_sum[NX_rep, :, :]

        X_softmax = X_x / X_sum_gathered
        X_softmax_end = X_x_end / X_sum

        if self.k > 1:
            pi = fn.unsqueeze(nd.softmax(self.linear_pi(X_end), axis=1), -1)
            pi_gathered = pi[NX_rep, :, :]

            X_softmax = nd.sum(X_softmax * pi_gathered, axis=1)
            X_softmax_end = nd.sum(X_softmax_end * pi, axis=1)
        else:
            X_softmax = fn.squeeze(X_softmax, 1)
            X_softmax_end = fn.squeeze(X_softmax_end, 1)

        # generate output
        connect, append = X_softmax[:, :self.N_B], X_softmax[:, self.N_B:]
        append = append.reshape([-1, self.N_A, self.N_B])
        end = fn.squeeze(X_softmax_end, -1)

        return append, connect, end
Beispiel #16
0
    def train(self):

        self.net.collect_params().reset_ctx(self.ctx)

        trainer = gluon.Trainer(
            params=self.net.collect_params(),
            optimizer='sgd',
            optimizer_params={
                'learning_rate': self.lr,
                'wd': self.wd,
                'momentum': self.momentum
            },
            update_on_kvstore=(False if self.use_amp else None))

        if self.use_amp:
            amp.init_trainer(trainer)

        lr_decay = self.lr_decay
        lr_steps = sorted(
            [float(ls) for ls in self.lr_decay_epoch.split(',') if ls.strip()])

        mbox_loss = SSDMultiBoxLoss()
        ce_metric = mx.metric.Loss('CrossEntropy')
        smoothl1_metric = mx.metric.Loss('SmoothL1')

        logging.info('Start training from scratch...')

        for epoch in range(self.epoch):
            while lr_steps and epoch > lr_steps[0]:
                new_lr = trainer.learning_rate * lr_decay
                lr_steps.pop(0)
                trainer.set_learning_rate(new_lr)
                logging.info("Epoch {} Set learning rate to {}".format(
                    epoch, new_lr))
            ce_metric.reset()
            smoothl1_metric.reset()
            tic = time.time()
            btic = time.time()
            # reset cause save params may change
            self.net.collect_params().reset_ctx(self.ctx)
            self.net.hybridize(static_alloc=True, static_shape=True)
            for i, batch in enumerate(self.train_data):
                data = [d.data[0] for d in batch]
                box_targets = [d.label[0] for d in batch]
                cls_targets = [
                    nd.cast(d.label[1], dtype='float32') for d in batch
                ]

                with autograd.record():
                    cls_preds = []
                    box_preds = []
                    for x in data:
                        cls_pred, box_pred, _ = self.net(x)
                        cls_preds.append(cls_pred)
                        box_preds.append(box_pred)
                    sum_loss, cls_loss, box_loss = mbox_loss(
                        cls_preds, box_preds, cls_targets, box_targets)
                    if self.use_amp:
                        with amp.scale_loss(sum_loss, trainer) as scaled_loss:
                            autograd.backward(scaled_loss)
                    else:
                        autograd.backward(sum_loss)
                # since we have already normalized the loss, we don't want to normalize
                # by batch-size anymore
                trainer.step(1)
                ce_metric.update(0, [l * self.batch_size for l in cls_loss])
                smoothl1_metric.update(0,
                                       [l * self.batch_size for l in box_loss])
                if i > 0 and i % 50 == 0:
                    name1, loss1 = ce_metric.get()
                    name2, loss2 = smoothl1_metric.get()
                    logging.info('Epoch {} Batch {} Speed: {:.3f} samples/s, {}={:.3f}, {}={:.3f}'.\
                           format(epoch, i, self.batch_size/(time.time()-btic), name1, loss1, name2, loss2))

                btic = time.time()
            map_name, mean_ap = self.validation()
            val_msg = '\n'.join(
                ['{}={}'.format(k, v) for k, v in zip(map_name, mean_ap)])
            logging.info('[Epoch {}] Validation: \n{}'.format(epoch, val_msg))
            self.save_params(epoch)
def test_cast():
    x = create_vector(size=LARGE_X // 4)
    x = nd.tile(x, 4)
    y = nd.cast(x, np.int32)
    assert y.dtype == np.int32
    assert y[-1] == LARGE_X // 4 - 1
Beispiel #18
0
def train(opt):

    batch_size = opt.batch_size
    num_joints = opt.num_joints

    num_gpus = opt.num_gpus
    batch_size *= max(1, num_gpus)
    ctx = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    num_workers = opt.num_workers

    model_name = opt.model

    kwargs = {
        'ctx': ctx,
        'num_joints': num_joints,
        'pretrained': opt.use_pretrained,
        'pretrained_base': opt.use_pretrained_base,
        'pretrained_ctx': ctx
    }

    net = get_model(model_name, **kwargs)
    net.cast(opt.dtype)

    input_size = [int(i) for i in opt.input_size.split(',')]
    train_dataset, train_data, train_batch_fn = get_data_loader(
        opt, batch_size, num_workers, input_size)

    num_training_samples = len(train_dataset)
    lr_decay = opt.lr_decay
    lr_decay_period = opt.lr_decay_period
    if opt.lr_decay_period > 0:
        lr_decay_epoch = list(
            range(lr_decay_period, opt.num_epochs, lr_decay_period))
    else:
        lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')]
    lr_decay_epoch = [e - opt.warmup_epochs for e in lr_decay_epoch]
    num_batches = num_training_samples // batch_size
    lr_scheduler = LRSequential([
        LRScheduler('linear',
                    base_lr=0,
                    target_lr=opt.lr,
                    nepochs=opt.warmup_epochs,
                    iters_per_epoch=num_batches),
        LRScheduler(opt.lr_mode,
                    base_lr=opt.lr,
                    target_lr=0,
                    nepochs=opt.num_epochs - opt.warmup_epochs,
                    iters_per_epoch=num_batches,
                    step_epoch=lr_decay_epoch,
                    step_factor=lr_decay,
                    power=2)
    ])

    # optimizer = 'sgd'
    # optimizer_params = {'wd': opt.wd, 'momentum': 0.9, 'lr_scheduler': lr_scheduler}
    optimizer = 'adam'
    optimizer_params = {'wd': opt.wd, 'lr_scheduler': lr_scheduler}
    if opt.dtype != 'float32':
        optimizer_params['multi_precision'] = True

    save_frequency = opt.save_frequency
    if opt.save_dir and save_frequency:
        save_dir = opt.save_dir
        makedirs(save_dir)
    else:
        save_dir = ''
        save_frequency = 0

    if isinstance(ctx, mx.Context):
        ctx = [ctx]

    if opt.use_pretrained_base:
        if model_name.startswith('simple'):
            net.deconv_layers.initialize(ctx=ctx)
            net.final_layer.initialize(ctx=ctx)
        elif model_name.startswith('mobile'):
            net.upsampling.initialize(ctx=ctx)
    else:
        net.initialize(mx.init.MSRAPrelu(), ctx=ctx)

    trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params)

    L = gluon.loss.L2Loss()
    metric = HeatmapAccuracy()

    best_val_score = 1

    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)

    for epoch in range(opt.num_epochs):
        loss_val = 0
        tic = time.time()
        btic = time.time()
        metric.reset()

        for i, batch in enumerate(train_data):
            data, label, weight, imgid = train_batch_fn(batch, ctx)

            with ag.record():
                outputs = [net(X.astype(opt.dtype, copy=False)) for X in data]
                loss = [
                    nd.cast(L(nd.cast(yhat, 'float32'), y, w), opt.dtype)
                    for yhat, y, w in zip(outputs, label, weight)
                ]
            ag.backward(loss)
            trainer.step(batch_size)

            metric.update(label, outputs)

            loss_val += sum([l.mean().asscalar() for l in loss]) / num_gpus
            if opt.log_interval and not (i + 1) % opt.log_interval:
                metric_name, metric_score = metric.get()
                logger.info(
                    'Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\tloss=%f\tlr=%f\t%s=%.3f'
                    % (epoch, i, batch_size * opt.log_interval /
                       (time.time() - btic), loss_val / (i + 1),
                       trainer.learning_rate, metric_name, metric_score))
                btic = time.time()

        time_elapsed = time.time() - tic
        logger.info(
            'Epoch[%d]\t\tSpeed: %d samples/sec over %d secs\tloss=%f\n' %
            (epoch, int(i * batch_size / time_elapsed), int(time_elapsed),
             loss_val / (i + 1)))
        if save_frequency and save_dir and (epoch + 1) % save_frequency == 0:
            net.save_parameters('%s/%s-%d.params' %
                                (save_dir, model_name, epoch))
            trainer.save_states('%s/%s-%d.states' %
                                (save_dir, model_name, epoch))

    if save_frequency and save_dir:
        net.save_parameters('%s/%s-%d.params' %
                            (save_dir, model_name, opt.num_epochs - 1))
        trainer.save_states('%s/%s-%d.states' %
                            (save_dir, model_name, opt.num_epochs - 1))

    return net
Beispiel #19
0
def train(net, train_data, val_data, eval_metric, ctx, args):
    """Training pipeline"""
    net.collect_params().reset_ctx(ctx)

    if args.horovod:
        hvd.broadcast_parameters(net.collect_params(), root_rank=0)
        trainer = hvd.DistributedTrainer(
                        net.collect_params(), 'sgd',
                        {'learning_rate': args.lr, 'wd': args.wd, 'momentum': args.momentum})
    else:
        trainer = gluon.Trainer(
                    net.collect_params(), 'sgd',
                    {'learning_rate': args.lr, 'wd': args.wd, 'momentum': args.momentum},
                    update_on_kvstore=(False if args.amp else None))

    if args.amp:
        amp.init_trainer(trainer)

    # lr decay policy
    lr_decay = float(args.lr_decay)
    lr_steps = sorted([float(ls) for ls in args.lr_decay_epoch.split(',') if ls.strip()])

    mbox_loss = gcv.loss.SSDMultiBoxLoss()
    ce_metric = mx.metric.Loss('CrossEntropy')
    smoothl1_metric = mx.metric.Loss('SmoothL1')

    # set up logger
    logging.basicConfig()
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    log_file_path = args.save_prefix + '_train.log'
    log_dir = os.path.dirname(log_file_path)
    if log_dir and not os.path.exists(log_dir):
        os.makedirs(log_dir)
    fh = logging.FileHandler(log_file_path)
    logger.addHandler(fh)
    logger.info(args)
    logger.info('Start training from [Epoch {}]'.format(args.start_epoch))
    best_map = [0]

    for epoch in range(args.start_epoch, args.epochs):
        while lr_steps and epoch >= lr_steps[0]:
            new_lr = trainer.learning_rate * lr_decay
            lr_steps.pop(0)
            trainer.set_learning_rate(new_lr)
            logger.info("[Epoch {}] Set learning rate to {}".format(epoch, new_lr))
        ce_metric.reset()
        smoothl1_metric.reset()
        tic = time.time()
        btic = time.time()
        net.hybridize(static_alloc=True, static_shape=True)

        for i, batch in enumerate(train_data):
            if args.dali:
                # dali iterator returns a mxnet.io.DataBatch
                data = [d.data[0] for d in batch]
                box_targets = [d.label[0] for d in batch]
                cls_targets = [nd.cast(d.label[1], dtype='float32') for d in batch]

            else:
                data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
                cls_targets = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0)
                box_targets = gluon.utils.split_and_load(batch[2], ctx_list=ctx, batch_axis=0)

            with autograd.record():
                cls_preds = []
                box_preds = []
                for x in data:
                    cls_pred, box_pred, _ = net(x)
                    cls_preds.append(cls_pred)
                    box_preds.append(box_pred)
                sum_loss, cls_loss, box_loss = mbox_loss(
                    cls_preds, box_preds, cls_targets, box_targets)
                if args.amp:
                    with amp.scale_loss(sum_loss, trainer) as scaled_loss:
                        autograd.backward(scaled_loss)
                else:
                    autograd.backward(sum_loss)
            # since we have already normalized the loss, we don't want to normalize
            # by batch-size anymore
            trainer.step(1)

            if (not args.horovod or hvd.rank() == 0):
                local_batch_size = int(args.batch_size // (hvd.size() if args.horovod else 1))
                ce_metric.update(0, [l * local_batch_size for l in cls_loss])
                smoothl1_metric.update(0, [l * local_batch_size for l in box_loss])
                if args.log_interval and not (i + 1) % args.log_interval:
                    name1, loss1 = ce_metric.get()
                    name2, loss2 = smoothl1_metric.get()
                    logger.info('[Epoch {}][Batch {}], Speed: {:.3f} samples/sec, {}={:.3f}, {}={:.3f}'.format(
                        epoch, i, args.batch_size/(time.time()-btic), name1, loss1, name2, loss2))
                btic = time.time()

        if (not args.horovod or hvd.rank() == 0):
            name1, loss1 = ce_metric.get()
            name2, loss2 = smoothl1_metric.get()
            logger.info('[Epoch {}] Training cost: {:.3f}, {}={:.3f}, {}={:.3f}'.format(
                epoch, (time.time()-tic), name1, loss1, name2, loss2))
            if (epoch % args.val_interval == 0) or (args.save_interval and epoch % args.save_interval == 0):
                # consider reduce the frequency of validation to save time
                map_name, mean_ap = validate(net, val_data, ctx, eval_metric)
                val_msg = '\n'.join(['{}={}'.format(k, v) for k, v in zip(map_name, mean_ap)])
                logger.info('[Epoch {}] Validation: \n{}'.format(epoch, val_msg))
                current_map = float(mean_ap[-1])
            else:
                current_map = 0.
            save_params(net, best_map, current_map, epoch, args.save_interval, args.save_prefix)
def train(ctx):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]

    trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params)

    L = gluon.loss.L2Loss(weight=2.0)
    metric = HeatmapAccuracy()

    best_ap = 0

    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)

    for epoch in range(opt.num_epochs):
        loss_val = 0
        tic = time.time()
        btic = time.time()
        metric.reset()

        train_data_desc = tqdm(train_data, dynamic_ncols=True)
        for i, batch in enumerate(train_data_desc):
            data, label, weight, imgid = train_batch_fn(batch, ctx)

            with ag.record():
                outputs = [net(X.astype(opt.dtype, copy=False)) for X in data]
                loss = [
                    nd.cast(L(nd.cast(yhat, 'float32'), y, w), opt.dtype)
                    for yhat, y, w in zip(outputs, label, weight)
                ]
            ag.backward(loss)
            trainer.step(batch_size)

            metric.update(label, outputs)

            loss_val += sum([l.mean().asscalar() for l in loss]) / num_gpus
            if opt.log_interval and not (i + 1) % opt.log_interval:
                metric_name, metric_score = metric.get()
                logger.info(
                    'Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\tloss=%f\tlr=%f\t%s=%.3f'
                    % (epoch, i, batch_size * opt.log_interval /
                       (time.time() - btic), loss_val / (i + 1),
                       trainer.learning_rate, metric_name, metric_score))
                btic = time.time()

        time_elapsed = time.time() - tic
        logger.info(
            'Epoch[%d]\t\tSpeed: %d samples/sec over %d secs\tloss=%f\n' %
            (epoch, int(i * batch_size / time_elapsed), int(time_elapsed),
             loss_val / (i + 1)))
        if save_frequency and save_dir and (epoch + 1) % save_frequency == 0:
            net.save_parameters('%s/%s-%d.params' %
                                (save_dir, model_name, epoch))
            trainer.save_states('%s/%s-%d.states' %
                                (save_dir, model_name, epoch))
        if (epoch + 1) % 2 == 0:
            res = validate(val_data, val_dataset, net, context, opt)[0]
            logger.info(res)
            if res['AP'] > best_ap:
                bestAP = res['AP']
                net.save_parameters(
                    f'{save_dir}/best-{round(bestAP, 3)}.params')
                if os.path.islink(f'{save_dir}/final.params'):
                    os.remove(f'{save_dir}/final.params')
                os.symlink(f'./best-{round(bestAP, 3)}.params',
                           f'{save_dir}/final.params')

    if save_frequency and save_dir:
        net.save_parameters('%s/%s-%d.params' %
                            (save_dir, model_name, opt.num_epochs - 1))
        trainer.save_states('%s/%s-%d.states' %
                            (save_dir, model_name, opt.num_epochs - 1))

    return net
Beispiel #21
0
    def calulation(self, input_str, ko_dict, en_dict, en_rev_dict, ctx):
        """
        inference 코드 
        """
        #앞뒤에 START,END 코드 추가
        input_str = [
            [
                'START',
            ] + mecab.morphs(input_str.strip()) + [
                'END',
            ],
        ]
        X = encoding_and_padding(input_str,
                                 ko_dict,
                                 max_seq=self.max_seq_length)
        #string to embed
        inputs = F.array(X, ctx=ctx)

        inputs = F.cast(inputs, dtype='float32')
        in_sent_last_idx = F.argmax(F.where(inputs == self.end_idx,
                                            F.ones_like(inputs),
                                            F.zeros_like(inputs)),
                                    axis=1)

        #encoder GRU
        embeddinged_in = F.cast(self.embedding(inputs), dtype='float32')
        next_h = F.random.normal(0, 1, (1, self.n_hidden), ctx=ctx)
        for j in range(self.in_seq_len):
            p_outputs = F.slice_axis(embeddinged_in,
                                     axis=1,
                                     begin=j,
                                     end=j + 1)
            p_outputs = F.reshape(p_outputs, (-1, self.embed_dim))
            enout, (next_h, ) = self.encoder(p_outputs, [
                next_h,
            ])
            if j == 0:
                enouts = enout
                next_hs = next_h
            else:
                enouts = F.concat(enouts, enout, dim=1)
                next_hs = F.concat(next_hs, next_h, dim=1)
        #masking with 0 using length
        enouts = F.reshape(enouts, (-1, self.in_seq_len, self.n_hidden))
        enouts = F.transpose(enouts, (1, 0, 2))
        enouts = F.SequenceMask(enouts,
                                sequence_length=in_sent_last_idx + 1,
                                use_sequence_length=True)
        enouts = F.transpose(enouts, (1, 0, 2))

        next_hs = F.reshape(next_hs, (-1, self.n_hidden))
        #take가 0 dim만 지원하기 때문에..
        # N, 30, 300 -> N * 30, 300 , N = (0,1,2,3,4,5...)
        next_hs = next_hs.take(in_sent_last_idx)

        #디코더의 초기 입력값으로 넣을 'START'를 임베딩한다.
        Y_init = F.array([
            [
                en_dict['START'],
            ],
        ], ctx=ctx)
        Y_init = F.cast(self.embedding(Y_init), dtype='float32')
        deout = Y_init[:, 0, :]

        #출력 시퀀스 길이만큼 순회
        for i in range(self.out_seq_len):
            if self.attention:
                #print(deout.shape)
                deout, att_weight = self.apply_attention(
                    F=F, inputs=deout, hidden=next_hs, encoder_outputs=enouts)
                if i == 0:
                    att_weights = att_weight
                else:
                    att_weights = F.concat(att_weights, att_weight, dim=0)
            deout, (next_hs, ) = self.decoder(deout, [
                next_hs,
            ])
            #batchnorm을 적용하기 위해 차원 증가/원복
            deout = F.expand_dims(deout, axis=1)
            deout = self.batchnorm(deout)
            #reduce dim
            deout = deout[:, 0, :]
            #'START'의 다음 시퀀스 출력값도출
            deout_sm = self.dense(deout)
            #print(deout_sm.shape)
            deout = F.one_hot(F.argmax(F.softmax(deout_sm, axis=1), axis=1),
                              depth=self.vocab_size)
            #print(deout.shape)
            #decoder에 들어갈 수 있는 형태로 변환(임베딩 적용 및 차원 맞춤)
            deout = F.argmax(deout, axis=1)
            deout = F.expand_dims(deout, axis=0)
            deout = F.cast(self.embedding(deout)[:, 0, :], dtype='float32')
            gen_char = en_rev_dict[F.argmax(deout_sm,
                                            axis=1).asnumpy()[0].astype('int')]
            if gen_char == '__PAD__' or gen_char == 'END':
                break
            else:
                if i == 0:
                    ret_seq = [
                        gen_char,
                    ]
                else:
                    ret_seq += [
                        gen_char,
                    ]
        return (" ".join(ret_seq), att_weights)
Beispiel #22
0
    def hybrid_forward(self, F, inputs, outputs, initial_hidden_state,
                       batch_size_seq):
        #문장 길이 2 == END tag index
        inputs = F.cast(inputs, dtype='float32')
        in_sent_last_idx = F.argmax(F.where(inputs == self.end_idx,
                                            F.ones_like(inputs),
                                            F.zeros_like(inputs)),
                                    axis=1)

        outputs = F.cast(outputs, dtype='float32')
        out_sent_last_idx = F.argmax(F.where(outputs == self.end_idx,
                                             F.ones_like(outputs),
                                             F.zeros_like(outputs)),
                                     axis=1)
        #encoder GRU
        embeddinged_in = F.cast(self.embedding(inputs), dtype='float32')

        next_h = initial_hidden_state
        for j in range(self.in_seq_len):
            p_outputs = F.slice_axis(embeddinged_in,
                                     axis=1,
                                     begin=j,
                                     end=j + 1)
            p_outputs = F.reshape(p_outputs, (-1, self.embed_dim))
            enout, (next_h, ) = self.encoder(p_outputs, [
                next_h,
            ])
            if j == 0:
                enouts = enout
                next_hs = next_h
            else:
                enouts = F.concat(enouts, enout, dim=1)
                next_hs = F.concat(next_hs, next_h, dim=1)
        #masking with 0 using length
        enouts = F.reshape(enouts, (-1, self.in_seq_len, self.n_hidden))
        enouts = F.transpose(enouts, (1, 0, 2))
        enouts = F.SequenceMask(enouts,
                                sequence_length=in_sent_last_idx + 1,
                                use_sequence_length=True)
        enouts = F.transpose(enouts, (1, 0, 2))

        next_hs = F.reshape(next_hs, (-1, self.n_hidden))
        #take가 0 dim만 지원하기 때문에..
        # N, 30, 300 -> N * 30, 300 , N = (0,1,2,3,4,5...)
        next_hs = next_hs.take(in_sent_last_idx +
                               (batch_size_seq * self.max_seq_length))
        embeddinged_out = F.cast(self.embedding(outputs), dtype='float32')

        #decoder GRU with attention
        for i in range(self.out_seq_len):
            #out_seq_len 길이만큼 GRUCell을 unroll하면서 출력값을 적재한다.
            p_outputs = F.slice_axis(embeddinged_out,
                                     axis=1,
                                     begin=i,
                                     end=i + 1)
            p_outputs = F.reshape(p_outputs, (-1, self.embed_dim))
            # p_outputs = outputs[:,i,:]
            # 위와 같이 진행한 이유는 hybridize를 위함
            if self.attention:
                p_outputs, _ = self.apply_attention(F=F,
                                                    inputs=p_outputs,
                                                    hidden=next_hs,
                                                    encoder_outputs=enouts)
            deout, (next_hs, ) = self.decoder(p_outputs, [
                next_hs,
            ])
            if i == 0:
                deouts = deout
            else:
                deouts = F.concat(deouts, deout, dim=1)
        #2dim -> 3dim 으로 reshape
        deouts = F.reshape(deouts, (-1, self.out_seq_len, self.n_hidden))
        #0 padding
        deouts = F.transpose(deouts, (1, 0, 2))
        deouts = F.SequenceMask(deouts,
                                sequence_length=out_sent_last_idx + 1,
                                use_sequence_length=True)
        deouts = F.transpose(deouts, (1, 0, 2))

        deouts = self.batchnorm(deouts)
        deouts_fc = self.dense(deouts)
        return (deouts_fc)
Beispiel #23
0
    def _train_loop(self, train_data, val_data, train_eval_data):
        # fix seed for mxnet, numpy and python builtin random generator.
        gutils.random.seed(self._cfg.train.seed)
        # loss and metric
        mbox_loss = SSDMultiBoxLoss()
        ce_metric = mx.metric.Loss('CrossEntropy')
        smoothl1_metric = mx.metric.Loss('SmoothL1')

        # lr decay policy
        lr_decay = float(self._cfg.train.lr_decay)
        lr_steps = sorted([float(ls) for ls in self._cfg.train.lr_decay_epoch])

        self._logger.info('Start training from [Epoch %d]',
                          max(self._cfg.train.start_epoch, self.epoch))

        self.net.collect_params().reset_ctx(self.ctx)
        for self.epoch in range(max(self._cfg.train.start_epoch, self.epoch),
                                self._cfg.train.epochs):
            epoch = self.epoch
            while lr_steps and epoch >= lr_steps[0]:
                new_lr = self.trainer.learning_rate * lr_decay
                lr_steps.pop(0)
                self.trainer.set_learning_rate(new_lr)
                self._logger.info("[Epoch {}] Set learning rate to {}".format(
                    epoch, new_lr))
            ce_metric.reset()
            smoothl1_metric.reset()
            tic = time.time()
            btic = time.time()
            self.net.hybridize(static_alloc=True, static_shape=True)

            for i, batch in enumerate(train_data):
                if self._cfg.train.dali:
                    # dali iterator returns a mxnet.io.DataBatch
                    data = [d.data[0] for d in batch]
                    box_targets = [d.label[0] for d in batch]
                    cls_targets = [
                        nd.cast(d.label[1], dtype='float32') for d in batch
                    ]
                else:
                    data = gluon.utils.split_and_load(batch[0],
                                                      ctx_list=self.ctx,
                                                      batch_axis=0,
                                                      even_split=False)
                    cls_targets = gluon.utils.split_and_load(batch[1],
                                                             ctx_list=self.ctx,
                                                             batch_axis=0,
                                                             even_split=False)
                    box_targets = gluon.utils.split_and_load(batch[2],
                                                             ctx_list=self.ctx,
                                                             batch_axis=0,
                                                             even_split=False)

                with autograd.record():
                    cls_preds = []
                    box_preds = []
                    for x in data:
                        cls_pred, box_pred, _ = self.net(x)
                        cls_preds.append(cls_pred)
                        box_preds.append(box_pred)
                    sum_loss, cls_loss, box_loss = mbox_loss(
                        cls_preds, box_preds, cls_targets, box_targets)
                    if self._cfg.ssd.amp:
                        with amp.scale_loss(sum_loss,
                                            self.trainer) as scaled_loss:
                            autograd.backward(scaled_loss)
                    else:
                        autograd.backward(sum_loss)
                # since we have already normalized the loss, we don't want to normalize
                # by batch-size anymore
                self.trainer.step(1)

                if not self._cfg.horovod or hvd.rank() == 0:
                    local_batch_size = int(
                        self._cfg.train.batch_size //
                        (hvd.size() if self._cfg.horovod else 1))
                    ce_metric.update(0,
                                     [l * local_batch_size for l in cls_loss])
                    smoothl1_metric.update(
                        0, [l * local_batch_size for l in box_loss])
                    if self._cfg.train.log_interval and not (
                            i + 1) % self._cfg.train.log_interval:
                        name1, loss1 = ce_metric.get()
                        name2, loss2 = smoothl1_metric.get()
                        self._logger.info(
                            '[Epoch %d][Batch %d], Speed: %f samples/sec, %s=%f, %s=%f',
                            epoch, i,
                            self._cfg.train.batch_size / (time.time() - btic),
                            name1, loss1, name2, loss2)
                    btic = time.time()

            if not self._cfg.horovod or hvd.rank() == 0:
                name1, loss1 = ce_metric.get()
                name2, loss2 = smoothl1_metric.get()
                self._logger.info('[Epoch %d] Training cost: %f, %s=%f, %s=%f',
                                  epoch, (time.time() - tic), name1, loss1,
                                  name2, loss2)
                if (epoch % self._cfg.valid.val_interval == 0) or \
                    (self._cfg.save_interval and epoch % self._cfg.save_interval == 0):
                    # consider reduce the frequency of validation to save time
                    map_name, mean_ap = self._evaluate(val_data)
                    val_msg = '\n'.join([
                        '{}={}'.format(k, v)
                        for k, v in zip(map_name, mean_ap)
                    ])
                    self._logger.info('[Epoch %d] Validation: \n%s', epoch,
                                      str(val_msg))
                    current_map = float(mean_ap[-1])
                    if current_map > self._best_map:
                        cp_name = os.path.join(self._logdir,
                                               'best_checkpoint.pkl')
                        self._logger.info(
                            '[Epoch %d] Current best map: %f vs previous %f, saved to %s',
                            self.epoch, current_map, self._best_map, cp_name)
                        self.save(cp_name)
                        self._best_map = current_map
                if self._reporter:
                    self._reporter(epoch=epoch, map_reward=current_map)
            self._time_elapsed += time.time() - btic
        # map on train data
        map_name, mean_ap = self._evaluate(train_eval_data)
        return {
            'train_map': float(mean_ap[-1]),
            'valid_map': self._best_map,
            'time': self._time_elapsed
        }
Beispiel #24
0
def train():
    """Training function."""
    trainer = gluon.Trainer(model.collect_params(), args.optimizer,
                            {'learning_rate': args.lr, 'beta2': 0.98, 'epsilon': 1e-9})

    train_batchify_fn = btf.Tuple(btf.Pad(), btf.Pad(), btf.Stack(), btf.Stack())
    test_batchify_fn = btf.Tuple(btf.Pad(), btf.Pad(), btf.Stack(), btf.Stack(), btf.Stack())
    target_val_lengths = list(map(lambda x: x[-1], data_val_lengths))
    target_test_lengths = list(map(lambda x: x[-1], data_test_lengths))
    if args.bucket_scheme == 'constant':
        bucket_scheme = ConstWidthBucket()
    elif args.bucket_scheme == 'linear':
        bucket_scheme = LinearWidthBucket()
    elif args.bucket_scheme == 'exp':
        bucket_scheme = ExpWidthBucket(bucket_len_step=1.2)
    else:
        raise NotImplementedError
    train_batch_sampler = FixedBucketSampler(lengths=data_train_lengths,
                                             batch_size=args.batch_size,
                                             num_buckets=args.num_buckets,
                                             ratio=args.bucket_ratio,
                                             shuffle=True,
                                             use_average_length=True,
                                             bucket_scheme=bucket_scheme)
    logging.info('Train Batch Sampler:\n{}'.format(train_batch_sampler.stats()))
    train_data_loader = DataLoader(data_train,
                                   batch_sampler=train_batch_sampler,
                                   batchify_fn=train_batchify_fn,
                                   num_workers=8)

    val_batch_sampler = FixedBucketSampler(lengths=target_val_lengths,
                                           batch_size=args.test_batch_size,
                                           num_buckets=args.num_buckets,
                                           ratio=args.bucket_ratio,
                                           shuffle=False,
                                           use_average_length=True,
                                           bucket_scheme=bucket_scheme)
    logging.info('Valid Batch Sampler:\n{}'.format(val_batch_sampler.stats()))
    val_data_loader = DataLoader(data_val,
                                 batch_sampler=val_batch_sampler,
                                 batchify_fn=test_batchify_fn,
                                 num_workers=8)
    test_batch_sampler = FixedBucketSampler(lengths=target_test_lengths,
                                            batch_size=args.test_batch_size,
                                            num_buckets=args.num_buckets,
                                            ratio=args.bucket_ratio,
                                            shuffle=False,
                                            use_average_length=True,
                                            bucket_scheme=bucket_scheme)
    logging.info('Test Batch Sampler:\n{}'.format(test_batch_sampler.stats()))
    test_data_loader = DataLoader(data_test,
                                  batch_sampler=test_batch_sampler,
                                  batchify_fn=test_batchify_fn,
                                  num_workers=8)

    if args.bleu == 'tweaked':
        bpe = True
        split_compound_word = True
        tokenized = True
    elif args.bleu == '13a' or args.bleu == 'intl':
        bpe = False
        split_compound_word = False
        tokenized = False
    else:
        raise NotImplementedError

    best_valid_bleu = 0.0
    step_num = 0
    warmup_steps = args.warmup_steps
    grad_interval = args.num_accumulated
    model.collect_params().setattr('grad_req', 'add')
    average_start = (len(train_data_loader) // grad_interval) * (args.epochs - args.average_start)
    average_param_dict = None
    model.collect_params().zero_grad()
    for epoch_id in range(args.epochs):
        log_avg_loss = 0
        log_wc = 0
        loss_denom = 0
        step_loss = 0
        log_start_time = time.time()
        for batch_id, (src_seq, tgt_seq, src_valid_length, tgt_valid_length) \
                in enumerate(train_data_loader):
            src_valid_length = nd.cast(src_valid_length, dtype='float32')
            tgt_valid_length = nd.cast(tgt_valid_length, dtype='float32')
            if batch_id % grad_interval == 0:
                step_num += 1
                new_lr = args.lr / math.sqrt(args.num_units) \
                         * min(1. / math.sqrt(step_num), step_num * warmup_steps ** (-1.5))
                trainer.set_learning_rate(new_lr)
            src_wc = src_valid_length.sum().asscalar()
            tgt_wc = tgt_valid_length.sum().asscalar()
            loss_denom += tgt_wc - tgt_valid_length.shape[0]
            if src_seq.shape[0] > len(ctx):
                src_seq_list, tgt_seq_list, src_valid_length_list, tgt_valid_length_list \
                    = [gluon.utils.split_and_load(seq, ctx, batch_axis=0, even_split=False)
                       for seq in [src_seq, tgt_seq, src_valid_length, tgt_valid_length]]
            else:
                src_seq_list = [src_seq.as_in_context(ctx[0])]
                tgt_seq_list = [tgt_seq.as_in_context(ctx[0])]
                src_valid_length_list = [src_valid_length.as_in_context(ctx[0])]
                tgt_valid_length_list = [tgt_valid_length.as_in_context(ctx[0])]

            Ls = []
            with mx.autograd.record():
                for src_seq, tgt_seq, src_valid_length, tgt_valid_length \
                        in zip(src_seq_list, tgt_seq_list,
                               src_valid_length_list, tgt_valid_length_list):
                    out, _ = model(src_seq, tgt_seq[:, :-1],
                                   src_valid_length, tgt_valid_length - 1)
                    smoothed_label = label_smoothing(tgt_seq[:, 1:])
                    ls = loss_function(out, smoothed_label, tgt_valid_length - 1).sum()
                    Ls.append((ls * (tgt_seq.shape[1] - 1)) / args.batch_size)
            for L in Ls:
                L.backward()
            if batch_id % grad_interval == grad_interval - 1 or\
                    batch_id == len(train_data_loader) - 1:
                if average_param_dict is None:
                    average_param_dict = {k: v.data(ctx[0]).copy() for k, v in
                                          model.collect_params().items()}
                trainer.step(float(loss_denom) / args.batch_size)
                param_dict = model.collect_params()
                param_dict.zero_grad()
                if step_num > average_start:
                    alpha = 1. / max(1, step_num - average_start)
                    for name, average_param in average_param_dict.items():
                        average_param[:] += alpha * (param_dict[name].data(ctx[0]) - average_param)
            step_loss += sum([L.asscalar() for L in Ls])
            if batch_id % grad_interval == grad_interval - 1 or\
                    batch_id == len(train_data_loader) - 1:
                log_avg_loss += step_loss / loss_denom * args.batch_size
                loss_denom = 0
                step_loss = 0
            log_wc += src_wc + tgt_wc
            if (batch_id + 1) % (args.log_interval * grad_interval) == 0:
                wps = log_wc / (time.time() - log_start_time)
                logging.info('[Epoch {} Batch {}/{}] loss={:.4f}, ppl={:.4f}, '
                             'throughput={:.2f}K wps, wc={:.2f}K'
                             .format(epoch_id, batch_id + 1, len(train_data_loader),
                                     log_avg_loss / args.log_interval,
                                     np.exp(log_avg_loss / args.log_interval),
                                     wps / 1000, log_wc / 1000))
                log_start_time = time.time()
                log_avg_loss = 0
                log_wc = 0
        mx.nd.waitall()
        valid_loss, valid_translation_out = evaluate(val_data_loader, ctx[0])
        valid_bleu_score, _, _, _, _ = compute_bleu([val_tgt_sentences], valid_translation_out,
                                                    tokenized=tokenized, tokenizer=args.bleu,
                                                    split_compound_word=split_compound_word,
                                                    bpe=bpe)
        logging.info('[Epoch {}] valid Loss={:.4f}, valid ppl={:.4f}, valid bleu={:.2f}'
                     .format(epoch_id, valid_loss, np.exp(valid_loss), valid_bleu_score * 100))
        test_loss, test_translation_out = evaluate(test_data_loader, ctx[0])
        test_bleu_score, _, _, _, _ = compute_bleu([test_tgt_sentences], test_translation_out,
                                                   tokenized=tokenized, tokenizer=args.bleu,
                                                   split_compound_word=split_compound_word,
                                                   bpe=bpe)
        logging.info('[Epoch {}] test Loss={:.4f}, test ppl={:.4f}, test bleu={:.2f}'
                     .format(epoch_id, test_loss, np.exp(test_loss), test_bleu_score * 100))
        write_sentences(valid_translation_out,
                        os.path.join(args.save_dir, 'epoch{:d}_valid_out.txt').format(epoch_id))
        write_sentences(test_translation_out,
                        os.path.join(args.save_dir, 'epoch{:d}_test_out.txt').format(epoch_id))
        if valid_bleu_score > best_valid_bleu:
            best_valid_bleu = valid_bleu_score
            save_path = os.path.join(args.save_dir, 'valid_best.params')
            logging.info('Save best parameters to {}'.format(save_path))
            model.save_params(save_path)
        save_path = os.path.join(args.save_dir, 'epoch{:d}.params'.format(epoch_id))
        model.save_params(save_path)
    save_path = os.path.join(args.save_dir, 'average.params')
    mx.nd.save(save_path, average_param_dict)
    if args.average_checkpoint:
        for j in range(args.num_averages):
            params = mx.nd.load(os.path.join(args.save_dir,
                                             'epoch{:d}.params'.format(args.epochs - j - 1)))
            alpha = 1. / (j + 1)
            for k, v in model._collect_params_with_prefix().items():
                for c in ctx:
                    v.data(c)[:] += alpha * (params[k].as_in_context(c) - v.data(c))
    elif args.average_start > 0:
        for k, v in model.collect_params().items():
            v.set_data(average_param_dict[k])
    else:
        model.load_params(os.path.join(args.save_dir, 'valid_best.params'), ctx)
    valid_loss, valid_translation_out = evaluate(val_data_loader, ctx[0])
    valid_bleu_score, _, _, _, _ = compute_bleu([val_tgt_sentences], valid_translation_out,
                                                tokenized=tokenized, tokenizer=args.bleu, bpe=bpe,
                                                split_compound_word=split_compound_word)
    logging.info('Best model valid Loss={:.4f}, valid ppl={:.4f}, valid bleu={:.2f}'
                 .format(valid_loss, np.exp(valid_loss), valid_bleu_score * 100))
    test_loss, test_translation_out = evaluate(test_data_loader, ctx[0])
    test_bleu_score, _, _, _, _ = compute_bleu([test_tgt_sentences], test_translation_out,
                                               tokenized=tokenized, tokenizer=args.bleu, bpe=bpe,
                                               split_compound_word=split_compound_word)
    logging.info('Best model test Loss={:.4f}, test ppl={:.4f}, test bleu={:.2f}'
                 .format(test_loss, np.exp(test_loss), test_bleu_score * 100))
    write_sentences(valid_translation_out,
                    os.path.join(args.save_dir, 'best_valid_out.txt'))
    write_sentences(test_translation_out,
                    os.path.join(args.save_dir, 'best_test_out.txt'))
def test_cast():
    x = create_2d_tensor(rows=SMALL_Y, columns=LARGE_X)
    y = nd.cast(x, np.int32)
    assert y.dtype == np.int32
    assert y[-1][-1] == SMALL_Y-1
Beispiel #26
0
 def clip_pass_gradient(x, l=-1., u=1.):
     clip_up = nd.cast(x > u, "float32")
     clip_low = nd.cast(x < l, "float32")
     return x + nd.stop_gradient((u - x) * clip_up +
                                 (l - x) * clip_low)
Beispiel #27
0
 def min_between(arr1, arr2):
     return nd.cast(
         nd.min(nd.array([arr1.asnumpy(), arr2.asnumpy()]), axis=0), dtype="float64"
     )