dh_model = DoubleHeadModel(args, clf_token, ('classification', 3), vocab,
                               n_ctx)

    criterion = nn.CrossEntropyLoss(reduction='none')
    model_opt = OpenAIAdam(dh_model.parameters(),
                           lr=args.lr,
                           schedule=args.lr_schedule,
                           warmup=args.lr_warmup,
                           t_total=n_updates_total,
                           b1=args.b1,
                           b2=args.b2,
                           e=args.e,
                           l2=args.l2,
                           vector_l2=args.vector_l2,
                           max_grad_norm=args.max_grad_norm)
    compute_loss_fct = MultipleChoiceLossCompute(criterion, criterion,
                                                 args.lm_coef, model_opt)
    openAIModel = OpenAIModel()
    openAIModel.load_openai_pretrained_model(dh_model.transformer,
                                             n_ctx=n_ctx,
                                             n_special=n_special)

    dh_model.to(device)
    dh_model = nn.DataParallel(dh_model)

    n_updates = 0
    n_epochs = 0
    if dataset != 'stsb':
        trYt = trY
    if submit:
        path = os.path.join(save_dir, desc, 'state_of_module')
        torch.save(dh_model.state_dict(), make_path(path))
예제 #2
0
        teX, teM = transform_roc(teX1, teX2, teX3)

    n_train = len(trY)
    n_valid = len(vaY)
    n_batch_train = 8 * max(n_gpu, 1)
    n_updates_total = (n_train // n_batch_train) * 3

    dh_model = Model(clf_token, 'multiple_choice', vocab, n_ctx)

    criterion = nn.CrossEntropyLoss(reduce=False)
    model_opt = torch.optim.adam(
        dh_model.parameters,
        lr=6.25e-5,
    )

    compute_loss_fct = MultipleChoiceLossCompute(criterion, criterion, 0.5,
                                                 model_opt)

    dh_model.to(device)
    dh_model = nn.DataParallel(dh_model)

    n_updates = 0
    n_epochs = 0
    if dataset != 'stsb':
        trYt = trY
    if submit:
        path = os.path.join(save_dir, desc, 'best_params')
        torch.save(dh_model.state_dict(), make_path(path))
    best_score = 0
    for i in range(3):
        print("running epoch", i)
        run_epoch()