else:
    ctx = mx.gpu(args.gpu)

encoder, decoder, one_step_ahead_decoder = get_gnmt_encoder_decoder(
    hidden_size=args.num_hidden,
    dropout=args.dropout,
    num_layers=args.num_layers,
    num_bi_layers=args.num_bi_layers)
model = NMTModel(src_vocab=src_vocab,
                 tgt_vocab=tgt_vocab,
                 encoder=encoder,
                 decoder=decoder,
                 one_step_ahead_decoder=one_step_ahead_decoder,
                 embed_size=args.num_hidden,
                 prefix='gnmt_')
model.initialize(init=mx.init.Uniform(0.1), ctx=ctx)
static_alloc = True
model.hybridize(static_alloc=static_alloc)
logging.info(model)

translator = BeamSearchTranslator(model=model,
                                  beam_size=args.beam_size,
                                  scorer=nlp.model.BeamSearchScorer(
                                      alpha=args.lp_alpha, K=args.lp_k),
                                  max_length=args.tgt_max_len + 100)
logging.info('Use beam_size={}, alpha={}, K={}'.format(args.beam_size,
                                                       args.lp_alpha,
                                                       args.lp_k))

loss_function = MaskedSoftmaxCELoss()
loss_function.hybridize(static_alloc=static_alloc)
    dropout=args.dropout,
    num_layers=args.num_layers,
    num_heads=args.num_heads,
    max_src_length=max(src_max_len, 500),
    max_tgt_length=max(tgt_max_len, 500),
    scaled=args.scaled)
model = NMTModel(src_vocab=src_vocab,
                 tgt_vocab=tgt_vocab,
                 encoder=encoder,
                 decoder=decoder,
                 share_embed=args.dataset not in ('TOY', 'IWSLT2015'),
                 embed_size=args.num_units,
                 tie_weights=args.dataset not in ('TOY', 'IWSLT2015'),
                 embed_initializer=None,
                 prefix='transformer_')
model.initialize(init=mx.init.Xavier(magnitude=args.magnitude), ctx=ctx)
static_alloc = True
model.hybridize(static_alloc=static_alloc)
logging.info(model)

translator = BeamSearchTranslator(model=model,
                                  beam_size=args.beam_size,
                                  scorer=nlp.model.BeamSearchScorer(
                                      alpha=args.lp_alpha, K=args.lp_k),
                                  max_length=200)
logging.info('Use beam_size={}, alpha={}, K={}'.format(args.beam_size,
                                                       args.lp_alpha,
                                                       args.lp_k))

label_smoothing = LabelSmoothing(epsilon=args.epsilon, units=len(tgt_vocab))
label_smoothing.hybridize(static_alloc=static_alloc)
if args.tgt_max_len > 0:
    tgt_max_len = args.tgt_max_len
else:
    tgt_max_len = max_len[1]
encoder, decoder = get_transformer_encoder_decoder(units=args.num_units,
                                                   hidden_size=args.hidden_size,
                                                   dropout=args.dropout,
                                                   num_layers=args.num_layers,
                                                   num_heads=args.num_heads,
                                                   max_src_length=max(src_max_len, 500),
                                                   max_tgt_length=max(tgt_max_len, 500),
                                                   scaled=args.scaled)
model = NMTModel(src_vocab=src_vocab, tgt_vocab=tgt_vocab, encoder=encoder, decoder=decoder,
                 share_embed=args.dataset != 'TOY', embed_size=args.num_units,
                 tie_weights=args.dataset != 'TOY', embed_initializer=None, prefix='transformer_')
model.initialize(init=mx.init.Xavier(magnitude=args.magnitude), ctx=ctx)
static_alloc = True
model.hybridize(static_alloc=static_alloc)
logging.info(model)

translator = BeamSearchTranslator(model=model, beam_size=args.beam_size,
                                  scorer=nlp.model.BeamSearchScorer(alpha=args.lp_alpha,
                                                                    K=args.lp_k),
                                  max_length=200)
logging.info('Use beam_size={}, alpha={}, K={}'.format(args.beam_size, args.lp_alpha, args.lp_k))

label_smoothing = LabelSmoothing(epsilon=args.epsilon, units=len(tgt_vocab))
label_smoothing.hybridize(static_alloc=static_alloc)

loss_function = SoftmaxCEMaskedLoss(sparse_label=False)
loss_function.hybridize(static_alloc=static_alloc)
Exemple #4
0
def main(_argv):

    if FLAGS.num_gpus > 0:  # only supports 1 GPU
        ctx = mx.gpu()
    else:
        ctx = mx.cpu()

    key_flags = FLAGS.get_key_flags_for_module(sys.argv[0])
    print('\n'.join(f.serialize() for f in key_flags))

    # are we using features or do we include the CNN?
    if FLAGS.feats_model is None:
        backbone_net = get_model(FLAGS.backbone, pretrained=True, ctx=ctx).features
        cnn_model = FrameModel(backbone_net, 11)  # hardcoded the number of classes
        if FLAGS.backbone_from_id:
            if os.path.exists(os.path.join('models', 'vision', 'experiments', FLAGS.backbone_from_id)):
                files = os.listdir(os.path.join('models', 'vision', 'experiments', FLAGS.backbone_from_id))
                files = [f for f in files if f[-7:] == '.params']
                if len(files) > 0:
                    files = sorted(files, reverse=True)  # put latest model first
                    model_name = files[0]
                    cnn_model.load_parameters(os.path.join('models', 'vision', 'experiments', FLAGS.backbone_from_id, model_name), ctx=ctx)
                    print('Loaded backbone params: {}'.format(os.path.join('models', 'vision', 'experiments', FLAGS.backbone_from_id, model_name)))
            else:
                raise FileNotFoundError('{}'.format(os.path.join('models', 'vision', 'experiments', FLAGS.backbone_from_id)))

        if FLAGS.freeze_backbone:
            for param in cnn_model.collect_params().values():
                param.grad_req = 'null'

        cnn_model = TimeDistributed(cnn_model.backbone)

        src_embed = cnn_model

        transform_test = transforms.Compose([
            transforms.Resize(FLAGS.data_shape + 32),
            transforms.CenterCrop(FLAGS.data_shape),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
        ])

    else:
        from mxnet.gluon import nn  # need to do this to force no use of Embedding on src
        src_embed = nn.HybridSequential(prefix='src_embed_')
        with src_embed.name_scope():
            src_embed.add(nn.Dropout(rate=0.0))

        transform_train = None
        transform_test = None

    # setup the data
    data_train = TennisSet(split='train', transform=transform_train, captions=True, max_cap_len=FLAGS.tgt_max_len,
                           every=FLAGS.every, feats_model=FLAGS.feats_model)
    data_val = TennisSet(split='val', transform=transform_test, captions=True, vocab=data_train.vocab,
                         every=FLAGS.every, inference=True, feats_model=FLAGS.feats_model)
    data_test = TennisSet(split='test', transform=transform_test, captions=True, vocab=data_train.vocab,
                          every=FLAGS.every, inference=True, feats_model=FLAGS.feats_model)

    test_tgt_sentences = data_test.get_captions(split=True)
    write_sentences(test_tgt_sentences, os.path.join('models', 'captioning', 'experiments', FLAGS.model_id, 'test_gt.txt'))

    # load embeddings for tgt_embed
    if FLAGS.emb_file:
        word_embs = nlp.embedding.TokenEmbedding.from_file(file_path=os.path.join('data', FLAGS.emb_file))
        data_test.vocab.set_embedding(word_embs)

        input_dim, output_dim = data_test.vocab.embedding.idx_to_vec.shape
        tgt_embed = gluon.nn.Embedding(input_dim, output_dim)
        tgt_embed.initialize(ctx=ctx)
        tgt_embed.weight.set_data(data_test.vocab.embedding.idx_to_vec)
    else:
        tgt_embed = None

    # setup the model
    encoder, decoder = get_gnmt_encoder_decoder(cell_type=FLAGS.cell_type,
                                                hidden_size=FLAGS.num_hidden,
                                                dropout=FLAGS.dropout,
                                                num_layers=FLAGS.num_layers,
                                                num_bi_layers=FLAGS.num_bi_layers)
    model = NMTModel(src_vocab=None, tgt_vocab=data_test.vocab, encoder=encoder, decoder=decoder,
                     embed_size=FLAGS.emb_size, prefix='gnmt_', src_embed=src_embed, tgt_embed=tgt_embed)

    model.initialize(init=mx.init.Uniform(0.1), ctx=ctx)
    static_alloc = True
    model.hybridize(static_alloc=static_alloc)
    print(model)

    if os.path.exists(os.path.join('models', 'captioning', 'experiments', FLAGS.model_id)):
        files = os.listdir(os.path.join('models', 'captioning', 'experiments', FLAGS.model_id))
        files = [f for f in files if f[-7:] == '.params']
        if len(files) > 0:
            files = sorted(files, reverse=True)  # put latest model first
            model_name = files[0]
            if model_name == 'valid_best.params':
                model_name = files[1]
            model.load_parameters(os.path.join('models', 'captioning', 'experiments', FLAGS.model_id, model_name), ctx=ctx)
            print('Loaded model params: {}'.format(os.path.join('models', 'captioning', 'experiments', FLAGS.model_id, model_name)))

    # setup the beam search
    translator = BeamSearchTranslator(model=model, beam_size=FLAGS.beam_size,
                                      scorer=nlp.model.BeamSearchScorer(alpha=FLAGS.lp_alpha, K=FLAGS.lp_k),
                                      max_length=FLAGS.tgt_max_len + 100)
    print('Use beam_size={}, alpha={}, K={}'.format(FLAGS.beam_size, FLAGS.lp_alpha, FLAGS.lp_k))

    # run the training
    train_data_loader, val_data_loader, test_data_loader = get_dataloaders(data_train, data_val, data_test)

    # load and evaluate the best model
    if os.path.exists(os.path.join('models', 'captioning', 'experiments', FLAGS.model_id, 'valid_best.params')):
        model.load_parameters(os.path.join('models', 'captioning', 'experiments', FLAGS.model_id, 'valid_best.params'))

    preds_path = os.path.join('models', 'captioning', 'experiments', FLAGS.model_id, 'best_test_out.txt')
    if not os.path.exists(preds_path):
        _, test_translation_out = evaluate(test_data_loader, model, translator, data_train, ctx)
    else:
        test_translation_out = read_sentences(preds_path)

    str_ = ''
    nlgeval = NLGEval()
    metrics_dict = nlgeval.compute_metrics([[' '.join(sent) for sent in test_tgt_sentences]],
                                           [' '.join(sent) for sent in test_translation_out])

    for k, v in metrics_dict.items():
        str_ += ', test ' + k + '={:.4f}'.format(float(v))
    print(str_)

    write_sentences(test_translation_out, preds_path)
                                     for i, ele in enumerate(data_val)])
data_test = gluon.data.SimpleDataset([(ele[0], ele[1], len(ele[0]), len(ele[1]), i)
                                      for i, ele in enumerate(data_test)])
if args.gpu is None:
    ctx = mx.cpu()
    print('Use CPU')
else:
    ctx = mx.gpu(args.gpu)

encoder, decoder = get_gnmt_encoder_decoder(hidden_size=args.num_hidden,
                                            dropout=args.dropout,
                                            num_layers=args.num_layers,
                                            num_bi_layers=args.num_bi_layers)
model = NMTModel(src_vocab=src_vocab, tgt_vocab=tgt_vocab, encoder=encoder, decoder=decoder,
                 embed_size=args.num_hidden, prefix='gnmt_')
model.initialize(init=mx.init.Uniform(0.1), ctx=ctx)
static_alloc = True
model.hybridize(static_alloc=static_alloc)
logging.info(model)

translator = BeamSearchTranslator(model=model, beam_size=args.beam_size,
                                  scorer=nlp.model.BeamSearchScorer(alpha=args.lp_alpha,
                                                                    K=args.lp_k),
                                  max_length=args.tgt_max_len + 100)
logging.info('Use beam_size={}, alpha={}, K={}'.format(args.beam_size, args.lp_alpha, args.lp_k))


loss_function = SoftmaxCEMaskedLoss()
loss_function.hybridize(static_alloc=static_alloc)

Exemple #6
0
def main(_argv):

    os.makedirs(os.path.join('models', 'captioning', 'experiments',
                             FLAGS.model_id),
                exist_ok=True)

    if FLAGS.num_gpus > 0:  # only supports 1 GPU
        ctx = mx.gpu()
    else:
        ctx = mx.cpu()

    # Set up logging
    logging.basicConfig()
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    log_file_path = os.path.join('models', 'captioning', 'experiments',
                                 FLAGS.model_id, 'log.txt')
    log_dir = os.path.dirname(log_file_path)
    if log_dir and not os.path.exists(log_dir):
        os.makedirs(log_dir)
    fh = logging.FileHandler(log_file_path)
    logger.addHandler(fh)

    key_flags = FLAGS.get_key_flags_for_module(sys.argv[0])
    logging.info('\n'.join(f.serialize() for f in key_flags))

    # set up tensorboard summary writer
    tb_sw = SummaryWriter(log_dir=os.path.join(log_dir, 'tb'),
                          comment=FLAGS.model_id)

    # are we using features or do we include the CNN?
    if FLAGS.feats_model is None:
        backbone_net = get_model(FLAGS.backbone, pretrained=True,
                                 ctx=ctx).features
        cnn_model = FrameModel(backbone_net,
                               11)  # hardcoded the number of classes
        if FLAGS.backbone_from_id:
            if os.path.exists(
                    os.path.join('models', 'vision', 'experiments',
                                 FLAGS.backbone_from_id)):
                files = os.listdir(
                    os.path.join('models', 'vision', 'experiments',
                                 FLAGS.backbone_from_id))
                files = [f for f in files if f[-7:] == '.params']
                if len(files) > 0:
                    files = sorted(files,
                                   reverse=True)  # put latest model first
                    model_name = files[0]
                    cnn_model.load_parameters(os.path.join(
                        'models', 'vision', 'experiments',
                        FLAGS.backbone_from_id, model_name),
                                              ctx=ctx)
                    logging.info('Loaded backbone params: {}'.format(
                        os.path.join('models', 'vision', 'experiments',
                                     FLAGS.backbone_from_id, model_name)))
            else:
                raise FileNotFoundError('{}'.format(
                    os.path.join('models', 'vision', 'experiments',
                                 FLAGS.backbone_from_id)))

        if FLAGS.freeze_backbone:
            for param in cnn_model.collect_params().values():
                param.grad_req = 'null'

        cnn_model = TimeDistributed(cnn_model.backbone)

        src_embed = cnn_model

        transform_train = transforms.Compose([
            transforms.RandomResizedCrop(FLAGS.data_shape),
            transforms.RandomFlipLeftRight(),
            transforms.RandomColorJitter(brightness=0.4,
                                         contrast=0.4,
                                         saturation=0.4),
            transforms.RandomLighting(0.1),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
        ])

        transform_test = transforms.Compose([
            transforms.Resize(FLAGS.data_shape + 32),
            transforms.CenterCrop(FLAGS.data_shape),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
        ])

    else:
        from mxnet.gluon import nn  # need to do this to force no use of Embedding on src
        src_embed = nn.HybridSequential(prefix='src_embed_')
        with src_embed.name_scope():
            src_embed.add(nn.Dropout(rate=0.0))

        transform_train = None
        transform_test = None

    # setup the data
    data_train = TennisSet(split='train',
                           transform=transform_train,
                           captions=True,
                           max_cap_len=FLAGS.tgt_max_len,
                           every=FLAGS.every,
                           feats_model=FLAGS.feats_model)
    data_val = TennisSet(split='val',
                         transform=transform_test,
                         captions=True,
                         vocab=data_train.vocab,
                         every=FLAGS.every,
                         inference=True,
                         feats_model=FLAGS.feats_model)
    data_test = TennisSet(split='test',
                          transform=transform_test,
                          captions=True,
                          vocab=data_train.vocab,
                          every=FLAGS.every,
                          inference=True,
                          feats_model=FLAGS.feats_model)

    val_tgt_sentences = data_val.get_captions(split=True)
    test_tgt_sentences = data_test.get_captions(split=True)
    write_sentences(
        val_tgt_sentences,
        os.path.join('models', 'captioning', 'experiments', FLAGS.model_id,
                     'val_gt.txt'))
    write_sentences(
        test_tgt_sentences,
        os.path.join('models', 'captioning', 'experiments', FLAGS.model_id,
                     'test_gt.txt'))

    # load embeddings for tgt_embed
    if FLAGS.emb_file:
        word_embs = nlp.embedding.TokenEmbedding.from_file(
            file_path=os.path.join('data', FLAGS.emb_file))
        data_train.vocab.set_embedding(word_embs)

        input_dim, output_dim = data_train.vocab.embedding.idx_to_vec.shape
        tgt_embed = gluon.nn.Embedding(input_dim, output_dim)
        tgt_embed.initialize(ctx=ctx)
        tgt_embed.weight.set_data(data_train.vocab.embedding.idx_to_vec)
    else:
        tgt_embed = None

    # setup the model
    encoder, decoder = get_gnmt_encoder_decoder(
        cell_type=FLAGS.cell_type,
        hidden_size=FLAGS.num_hidden,
        dropout=FLAGS.dropout,
        num_layers=FLAGS.num_layers,
        num_bi_layers=FLAGS.num_bi_layers)
    model = NMTModel(src_vocab=None,
                     tgt_vocab=data_train.vocab,
                     encoder=encoder,
                     decoder=decoder,
                     embed_size=FLAGS.emb_size,
                     prefix='gnmt_',
                     src_embed=src_embed,
                     tgt_embed=tgt_embed)

    model.initialize(init=mx.init.Uniform(0.1), ctx=ctx)
    static_alloc = True
    model.hybridize(static_alloc=static_alloc)
    logging.info(model)

    start_epoch = 0
    if os.path.exists(
            os.path.join('models', 'captioning', 'experiments',
                         FLAGS.model_id)):
        files = os.listdir(
            os.path.join('models', 'captioning', 'experiments',
                         FLAGS.model_id))
        files = [f for f in files if f[-7:] == '.params']
        if len(files) > 0:
            files = sorted(files, reverse=True)  # put latest model first
            model_name = files[0]
            if model_name == 'valid_best.params':
                model_name = files[1]
            start_epoch = int(model_name.split('.')[0]) + 1
            model.load_parameters(os.path.join('models', 'captioning',
                                               'experiments', FLAGS.model_id,
                                               model_name),
                                  ctx=ctx)
            logging.info('Loaded model params: {}'.format(
                os.path.join('models', 'captioning', 'experiments',
                             FLAGS.model_id, model_name)))

    # setup the beam search
    translator = BeamSearchTranslator(model=model,
                                      beam_size=FLAGS.beam_size,
                                      scorer=nlp.model.BeamSearchScorer(
                                          alpha=FLAGS.lp_alpha, K=FLAGS.lp_k),
                                      max_length=FLAGS.tgt_max_len + 100)
    logging.info('Use beam_size={}, alpha={}, K={}'.format(
        FLAGS.beam_size, FLAGS.lp_alpha, FLAGS.lp_k))

    # setup the loss function
    loss_function = MaskedSoftmaxCELoss()
    loss_function.hybridize(static_alloc=static_alloc)

    # run the training
    train(data_train, data_val, data_test, model, loss_function,
          val_tgt_sentences, test_tgt_sentences, translator, start_epoch, ctx,
          tb_sw)