Example #1
0
 def __call__(self, epoch: int, sym: mxnet.symbol.Symbol,
              arg: Dict[str, np.ndarray], aux: Dict[str, np.ndarray]):
     if epoch % self._frequency != 0:
         return
     with tune.checkpoint_dir(step=epoch) as checkpoint_dir:
         save_checkpoint(os.path.join(checkpoint_dir, self._filename),
                         epoch, sym, arg, aux)
    def save_best_model(self):
        if self.best_param is None or self.best_acc == 0:
            print 'No Best Model'
            return

        from mxnet.model import save_checkpoint
        save_checkpoint("%s[ACC-%0.5f E%d]" %
                        (self.path, self.best_acc, self.best_param[0]), *self.best_param)
Example #3
0
 def _callback(epoch, sym, arg, aux):
     if epoch % period == 4:
         save_checkpoint(os.path.join(os.environ['ROOT_DIR'], prefix),
                         epoch, sym, arg, aux)
         symbol_fp = '%s-symbol.json' % prefix
         param_fp = '%s-%04d.params' % (prefix, epoch)
         setup_upload_from_s3(symbol_fp, recursive=False)
         setup_upload_from_s3(param_fp, recursive=False)
    def save_best_model(self):
        if self.best_param is None or self.best_acc == 0:
            print 'No Best Model'
            return

        from mxnet.model import save_checkpoint
        save_checkpoint("%s[ACC-%0.5f E%d]" %
                        (self.path, self.best_acc, self.best_param[0]), *self.best_param)
Example #5
0
 def _callback(iter_no, sym, arg, aux):
     #if config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
     #    print "save model with mean/std"
     #    num_classes = len(arg['bbox_pred_bias'].asnumpy()) / 4
     #    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), (1, num_classes))
     #    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), (1, num_classes))
     #    arg['bbox_pred_weight'] = (arg['bbox_pred_weight'].T * mx.nd.array(stds)).T
     #    arg['bbox_pred_bias'] = arg['bbox_pred_bias'] * mx.nd.array(np.squeeze(stds)) + \
     #                                   mx.nd.array(np.squeeze(means))
     """The checkpoint function."""
     save_checkpoint(prefix, iter_no + 1, sym, arg, aux)
Example #6
0
 def _callback(iter_no, sym, arg, aux):
     if config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
         print "save model with mean/std"
         num_classes = len(arg['bbox_pred_bias'].asnumpy()) / 4
         means = np.tile(np.array(config.TRAIN.BBOX_MEANS), (1, num_classes))
         stds = np.tile(np.array(config.TRAIN.BBOX_STDS), (1, num_classes))
         arg['bbox_pred_weight'] = (arg['bbox_pred_weight'].T * mx.nd.array(stds)).T
         arg['bbox_pred_bias'] = arg['bbox_pred_bias'] * mx.nd.array(np.squeeze(stds)) + \
                                        mx.nd.array(np.squeeze(means))
     """The checkpoint function."""
     save_checkpoint(prefix, iter_no + 1, sym, arg, aux)
Example #7
0
 def do(self, loop):
     logging.info("Saving model to %s @ %d"%(self.prefix, loop.status['epochs']))
     save_checkpoint(self.prefix, loop.status['epochs'], loop.sym, loop.model.arg_params, loop.model.aux_params)
Example #8
0
def main(args):
    learning_rate = args.lr
    epoches = args.epoches
    batch_size = args.batch_size
    num_hidden = args.num_hidden
    num_embed = args.num_embed
    num_lstm_layer = args.num_lstm_layer
    freq_val = args.freq_val
    val_flag = True if args.freq_val > 0 else False
    ctx = mx.cpu(0) if args.gpu is None else mx.gpu(int(args.gpu))
    prefix = args.prefix
    period = args.period

    with open(config.text_root, 'r') as f:
        captions = json.load(f)
    buckets = [10, 20, 30]
    # buckets = None
    train_data = caption_dataIter(captions=captions,
                                  batch_size=batch_size,
                                  mode='train')
    val_data = caption_dataIter(captions=captions,
                                batch_size=batch_size,
                                mode='val')

    ##########################################################################
    ########################### custom train process #########################
    ##########################################################################

    cnn_shapes = {'image_data': (batch_size, 3, 224, 224)}
    cnn_sym = vgg16_fc7('image_data')
    cnn_exec = cnn_sym.simple_bind(ctx=ctx, is_train=False, **cnn_shapes)
    lstm = caption_module(num_lstm_layer=num_lstm_layer,
                          seq_len=train_data.sent_length + 2,
                          vocab_size=train_data.vocab_size,
                          num_hidden=num_hidden,
                          num_embed=num_embed,
                          batch_size=batch_size)
    lstm_shapes = {
        'image_feature': (batch_size, 4096),
        'word_data': (batch_size, train_data.sent_length + 2),
        'softmax_label': (batch_size, train_data.sent_length + 2)
    }

    lstm_exec = lstm.simple_bind(ctx=ctx, is_train=True, **lstm_shapes)

    # init params
    pretrain = mx.nd.load(config.vgg_pretrain)
    init_cnn(cnn_exec, pretrain)

    # init optimazer
    optimazer = mx.optimizer.create('adam')
    optimazer.lr = learning_rate
    updater = mx.optimizer.get_updater(optimazer)

    # init metric
    perplexity = mx.metric.Perplexity(ignore_label=-1)
    perplexity.reset()

    # callback
    params = callbacks(nbatch=0, eval_metric=perplexity, epoch=0)
    speedometer = mx.callback.Speedometer(batch_size=batch_size, frequent=20)
    for epoch in range(epoches):
        for i, batch in enumerate(train_data):

            # cnn forward, get image_feature
            cnn_exec.arg_dict['image_data'] = batch.data[0]
            cnn_exec.forward()
            image_feature = cnn_exec.outputs[0]

            # lstm forward
            lstm_exec.arg_dict['image_feature'] = image_feature
            lstm_exec.arg_dict['word_data'] = batch.data[1]
            lstm_exec.arg_dict['softmax_label'] = batch.label

            lstm_exec.forward(is_train=True)
            print batch.label
            params.eval_metric.update(labels=batch.label,
                                      preds=lstm_exec.outputs)
            lstm_exec.backward()
            params.epoch = epoch
            params.nbatch += 1
            speedometer(params)
            for j, name in enumerate(lstm.list_arguments()):
                if name not in lstm_shapes.keys():
                    updater(j, lstm_exec.grad_dict[name],
                            lstm_exec.arg_dict[name])
        train_data.reset()
        params.nbatch = 0

        if val_flag and epoch % freq_val == 0:
            for i, batch in enumerate(val_data):

                # cnn forward, get image_feature
                cnn_exec.arg_dict['image_data'] = batch.data[0]
                cnn_exec.forward()
                image_feature = cnn_exec.outputs[0]

                # lstm forward
                lstm_exec.arg_dict['image_feature'] = image_feature
                lstm_exec.arg_dict['word_data'] = batch.data[1]
                lstm_exec.arg_dict['softmax_label'] = batch.label

                lstm_exec.forward(is_train=False)
                params.eval_metric.update(labels=batch.label,
                                          preds=lstm_exec.outputs)
                params.epoch = epoch
                params.nbatch += 1
                speedometer(params)
            params.nbatch = 0
            val_data.reset()
        if period:
            save_checkpoint(prefix=prefix,
                            epoch=epoch,
                            symbol=lstm,
                            arg_params=lstm_exec.arg_dict,
                            aux_params=lstm_exec.aux_dict)
Example #9
0
 def checkpoint_if_only_best(self, eval_metric, sym, arg, aux):
     if self.is_best(eval_metric, update_value=True):
         save_checkpoint(self._prefix, 0, sym, arg, aux)