warnings.warn('model_parameters is not set. ' 'Randomly initialized model will be used for inference.') get_pretrained = True get_model_params = { 'name': args.model_name, 'dataset_name': args.dataset, 'pretrained': get_pretrained, 'ctx': ctxs, 'use_decoder': False, 'dropout': args.dropout, 'attention_dropout': args.attention_dropout } xlnet_base, vocab, tokenizer = model.get_model(**get_model_params) # initialize the rest of the parameters initializer = mx.init.Normal(0.02) do_regression = not task.class_labels if do_regression: num_classes = 1 loss_function = gluon.loss.L2Loss() else: num_classes = len(task.class_labels) loss_function = gluon.loss.SoftmaxCELoss() # reuse the XLnetClassifier class with num_classes=1 for regression model = XLNetClassifier(xlnet_base, units=xlnet_base._net._units, dropout=0.1, num_classes=num_classes)
type=int, default=64, help='Batch size for evaluation.') parser.add_argument('--gpu', type=int, help='GPU id') args = parser.parse_args() start_time = time.time() # Model from transformer.model import get_model with open(args.vocab_file, 'r') as f: vocab = nlp.Vocab.from_json(f.read()) ctx = mx.gpu(args.gpu) if args.gpu is not None else mx.cpu() model, vocab = get_model('transformerxl', vocab=vocab, dataset_name=args.dataset, clamp_len=args.clamp_len) model.initialize(ctx=ctx) model.load_parameters(args.parameter_file, ignore_extra=False) model.hybridize() print(model) # Data if args.dataset == 'wt103': val_dataset, test_dataset = [ nlp.data.WikiText103(segment=segment, skip_empty=False, bos=vocab.bos_token, eos=vocab.eos_token) for segment in ['val', 'test'] ]