pretrained=get_pretrained,
                             ctx=ctx,
                             use_pooler=True,
                             use_decoder=False,
                             use_classifier=False)

if not task.class_labels:
    # STS-B is a regression task.
    # STSBTask().class_labels returns None
    model = BERTRegression(bert, dropout=0.1)
    if not model_parameters:
        model.regression.initialize(init=mx.init.Normal(0.02), ctx=ctx)
    loss_function = gluon.loss.L2Loss()
else:
    model = BERTClassifier(bert,
                           dropout=0.1,
                           num_classes=len(task.class_labels))
    if not model_parameters:
        model.classifier.initialize(init=mx.init.Normal(0.02), ctx=ctx)
    loss_function = gluon.loss.SoftmaxCELoss()

# load checkpointing
output_dir = args.output_dir
if pretrained_bert_parameters:
    logging.info('loading bert params from %s', pretrained_bert_parameters)
    model.bert.load_parameters(pretrained_bert_parameters,
                               ctx=ctx,
                               ignore_extra=True)
if model_parameters:
    logging.info('loading model params from %s', model_parameters)
    model.load_parameters(model_parameters, ctx=ctx)
###############################################################################
#                              Hybridize the model                            #
###############################################################################

seq_length = args.seq_length

if args.task == 'classification':
    bert, _ = get_model(name=args.model_name,
                        dataset_name=args.dataset_name,
                        pretrained=False,
                        use_pooler=True,
                        use_decoder=False,
                        use_classifier=False,
                        seq_length=args.seq_length)
    net = BERTClassifier(bert, num_classes=2, dropout=args.dropout)
elif args.task == 'regression':
    bert, _ = get_model(name=args.model_name,
                        dataset_name=args.dataset_name,
                        pretrained=False,
                        use_pooler=True,
                        use_decoder=False,
                        use_classifier=False,
                        seq_length=args.seq_length)
    net = BERTRegression(bert, dropout=args.dropout)
elif args.task == 'question_answering':
    bert, _ = get_model(name=args.model_name,
                        dataset_name=args.dataset_name,
                        pretrained=False,
                        use_pooler=False,
                        use_decoder=False,