Esempio n. 1
0
 def __init__(self, weight=None, batch_axis=0, customize_loss=False, **kwargs):  # pylint: disable=unused-argument
     super(BertForQALoss, self).__init__(
         weight=None, batch_axis=0, **kwargs)
     self.customize_loss = customize_loss
     if self.customize_loss:
         self.loss = loss.SoftmaxCELoss(sparse_label=False)
     else:
         self.loss = loss.SoftmaxCELoss()
Esempio n. 2
0
 def __init__(self, emb_size, vocab_size, hidden_size=256, num_layers=2, dropout=.2, \
              bidir=True, latent_size=64, **kwargs):
     super(VAE_LSTM, self).__init__(**kwargs)
     with self.name_scope():
         self.latent_size = latent_size
         # i have confirmed the calculation of kl divergence is right
         self.kl_div = lambda mean, logv: 0.5 * nd.sum(
             1 + logv - mean.square() - logv.exp())
         # self.kl_div = lambda mu, sg: (-0.5 * nd.sum(sg - mu*mu - nd.exp(sg) + 1, 1)).mean().squeeze()
         self.ce_loss = loss.SoftmaxCELoss()
         self.encoder = VAEEncoder(vocab_size=vocab_size, emb_size=emb_size, hidden_size=hidden_size, \
                                   num_layers=num_layers, dropout=dropout, bidir=bidir)
         self.decoder = VAEDecoder(vocab_size=vocab_size, emb_size=emb_size, hidden_size=hidden_size, \
                                   num_layers=num_layers, dropout=dropout, bidir=bidir)
Esempio n. 3
0
 def __init__(self,
              xlnet_base,
              start_top_n=None,
              end_top_n=None,
              is_eval=False,
              units=768,
              prefix=None,
              params=None):
     super(XLNetForQA, self).__init__(prefix=prefix, params=params)
     with self.name_scope():
         self.xlnet = xlnet_base
         self.start_top_n = start_top_n
         self.end_top_n = end_top_n
         self.loss = loss.SoftmaxCELoss()
         self.start_logits = PoolerStartLogits()
         self.end_logits = PoolerEndLogits(units=units, is_eval=is_eval)
         self.eval = is_eval
         self.answer_class = XLNetPoolerAnswerClass(units=units)
         self.cls_loss = loss.SigmoidBinaryCrossEntropyLoss()
Esempio n. 4
0
def main(args):
    # init some setting
    # config logging
    log_path = os.path.join(args.log_root, '{}.log'.format(args.model_name))
    logger = config_logger(log_path)

    gpu_idx = args.gpu
    if not gpu_idx:
        ctx = mx.cpu()
    else:
        ctx = mx.gpu(gpu_idx - 1)
    logger.info("Using ctx: {}".format(ctx))

    # Loading vocab and model
    ch_bert, ch_vocab = gluonnlp.model.get_model(
        args.bert_model,
        dataset_name=args.ch_bert_dataset,
        pretrained=True,
        ctx=ctx,
        use_pooler=False,
        use_decoder=False,
        use_classifier=False)
    model = BertClass(bert=ch_bert, max_seq_len=args.max_seq_len, ctx=ctx)
    logger.info("Model Creating Completed.")

    # init or load params for model
    if args.istrain:
        model.output_dense.initialize(init.Xavier(), ctx)
    else:
        model.load_parameters(args.model_params_path, ctx=ctx)
    logger.info("Parameters Initing and Loading Completed")

    model.hybridize()

    if args.istrain:
        # Loading dataloader
        assiant = DatasetAssiantTransformer(ch_vocab=ch_vocab,
                                            max_seq_len=args.max_seq_len)
        dataset = ClassDataset(args.train_file_path)
        train_dataset, dev_dataset = train_valid_split(dataset,
                                                       valid_ratio=0.1)
        train_dataiter = ClassDataLoader(train_dataset,
                                         batch_size=args.batch_size,
                                         assiant=assiant,
                                         shuffle=True).dataiter
        dev_dataiter = ClassDataLoader(dev_dataset,
                                       batch_size=args.batch_size,
                                       assiant=assiant,
                                       shuffle=True).dataiter
        logger.info("Data Loading Completed")
    else:
        assiant = DatasetAssiantTransformer(ch_vocab=ch_vocab,
                                            max_seq_len=args.max_seq_len,
                                            istrain=args.istrain)
        test_dataset = ClassTestDataset(args.test_file_path)
        test_dataiter = ClassDataLoader(test_dataset,
                                        batch_size=args.batch_size,
                                        assiant=assiant,
                                        shuffle=True).dataiter

    # build trainer
    finetune_trainer = gluon.Trainer(ch_bert.collect_params(), args.optimizer,
                                     {"learning_rate": args.finetune_lr})
    trainer = gluon.Trainer(model.collect_params("dense*"), args.optimizer,
                            {"learning_rate": args.train_lr})

    loss_func = gloss.SoftmaxCELoss()

    if args.istrain:
        logger.info("## Trainning Start ##")
        train_and_valid(ch_bert=ch_bert,
                        model=model,
                        ch_vocab=ch_vocab,
                        train_dataiter=train_dataiter,
                        dev_dataiter=dev_dataiter,
                        trainer=trainer,
                        finetune_trainer=finetune_trainer,
                        epochs=args.epochs,
                        loss_func=loss_func,
                        ctx=ctx,
                        lr=args.train_lr,
                        batch_size=args.batch_size,
                        params_save_step=args.params_save_step,
                        params_save_path_root=args.params_save_path_root,
                        eval_step=args.eval_step,
                        log_step=args.log_step,
                        check_step=args.check_step,
                        logger=logger,
                        num_train_examples=len(train_dataset),
                        warmup_ratio=args.warmup_ratio)
    else:
        predict(ch_bert=ch_bert,
                model=model,
                ch_vocab=ch_vocab,
                test_dataiter=test_dataiter,
                logger=logger,
                ctx=ctx)
Esempio n. 5
0
 def __init__(self):
     super(MyLoss3, self).__init__()
     self.loss1 = loss.SoftmaxCELoss()
     self.loss2 = loss.SigmoidBCELoss()
Esempio n. 6
0
 def __init__(self):
     super(MyLoss2, self).__init__()
     self.loss = loss.SoftmaxCELoss()
Esempio n. 7
0
 def __init__(self, weight=None, batch_axis=0, **kwargs):  # pylint: disable=unused-argument
     super(BertForQALoss, self).__init__(weight=None,
                                         batch_axis=0,
                                         **kwargs)
     self.loss = loss.SoftmaxCELoss()
Esempio n. 8
0
dataset = gdata.ArrayDataset(features, labels)
# 随机读取小批量
train_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)
test_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)

for X, y in train_iter:
    print(X, y)
    break

# sigmoid relu
net = nn.Sequential()
# net.add(nn.Dense(5, activation="sigmoid"), nn.Dropout(0.1),nn.Dense(3))
net.add(nn.Dense(5, activation="relu"), nn.Dense(3))
net.initialize(init.Normal(sigma=0.5))

loss = gloss.SoftmaxCELoss()  # 平方损失又称L2范数损失
trainer = gluon.Trainer(net.collect_params(), "sgd", {
    "learning_rate": 0.015,
    'wd': 1
})


def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        y = y.astype("float32")
        acc_sum += (net(X).argmax(axis=1) == y).sum().asscalar()
        n += y.size
    return acc_sum / n

Esempio n. 9
0
import d2lzh as d2l
from mxnet import gluon, init
from mxnet.gluon import loss as gloss, nn
bathsize = 256
trainer_iter, test_iter = d2l.load_data_fashion_mnist(bathsize)
net = nn.Sequential()
net.add(nn.Dense(10))
net.initialize(init.Normal(sigma=0.01))
loss = gloss.SoftmaxCELoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1})
num = 5
d2l.train_ch3(net, trainer_iter, test_iter, loss, num, bathsize, None, None,
              trainer)
for x, y in test_iter:
    break
truelabes = d2l.get_fashion_mnist_labels(y.asnumpy())
falselabes = d2l.get_fashion_mnist_labels(net(x).argmax(axis=1).asnumpy())
title = [true + '\n' + pred for true, pred in zip(truelabes, falselabes)]
d2l.show_fashion_mnist(x[0:9], title[0:9])