training_engine = TrainingEngine()
    trX, trM = training_engine.transform_veracity(trX)
    vaX, vaM = training_engine.transform_veracity(vaX)
    if submit:
        teX, teM = training_engine.transform_veracity(teX)

    n_train = len(trY)
    n_valid = len(vaY)
    n_batch_train = args.n_batch * max(n_gpu, 1)
    n_updates_total = (n_train // n_batch_train) * args.n_iter

    dh_model = DoubleHeadModel(args, clf_token, ('classification', 3), vocab,
                               n_ctx)

    criterion = nn.CrossEntropyLoss(reduction='none')
    model_opt = OpenAIAdam(dh_model.parameters(),
                           lr=args.lr,
                           schedule=args.lr_schedule,
                           warmup=args.lr_warmup,
                           t_total=n_updates_total,
                           b1=args.b1,
                           b2=args.b2,
                           e=args.e,
                           l2=args.l2,
                           vector_l2=args.vector_l2,
                           max_grad_norm=args.max_grad_norm)
    compute_loss_fct = MultipleChoiceLossCompute(criterion, criterion,
                                                 args.lm_coef, model_opt)
    openAIModel = OpenAIModel()
    openAIModel.load_openai_pretrained_model(dh_model.transformer,
                                             n_ctx=n_ctx,
    trX, trM = transform_roc(trX1, trX2, trX3)
    vaX, vaM = transform_roc(vaX1, vaX2, vaX3)
    if submit:
        teX, teM = transform_roc(teX1, teX2, teX3)

    n_train = len(trY)
    n_valid = len(vaY)
    n_batch_train = args.n_batch * max(n_gpu, 1)
    n_updates_total = (n_train // n_batch_train) * args.n_iter

    dh_model = DoubleHeadModel(args, clf_token, 'multiple_choice', vocab,
                               n_ctx)

    criterion = nn.CrossEntropyLoss(reduce=False)
    model_opt = OpenAIAdam(
        params=dh_model.parameters(),
        lr=args.lr,  # 6.25e-5
        schedule=args.lr_schedule,  # warmup_linear
        warmup=args.lr_warmup,  # 0.002
        t_total=n_updates_total,  # 748
        b1=args.b1,  # 0.9
        b2=args.b2,  # 0.999
        e=args.e,  # 1e-8
        l2=args.l2,  # 0.01
        vector_l2=args.vector_l2,
        max_grad_norm=args.max_grad_norm  # 1
    )
    compute_loss_fct = MultipleChoiceLossCompute(criterion, criterion,
                                                 args.lm_coef, model_opt)
    load_openai_pretrained_model(dh_model.transformer,
                                 n_ctx=n_ctx,