Ejemplo n.º 1
0
    compute_loss_fct = MultipleChoiceLossCompute(criterion, criterion,
                                                 args.lm_coef, model_opt)
    load_openai_pretrained_model(dh_model.transformer,
                                 n_ctx=n_ctx,
                                 n_special=n_special)

    dh_model.to(device)
    dh_model = nn.DataParallel(dh_model)

    n_updates = 0
    n_epochs = 0
    if dataset != 'stsb':
        trYt = trY
    if submit:
        path = os.path.join(save_dir, desc, 'best_params')
        torch.save(dh_model.state_dict(), make_path(path))
    best_score = 0
    for i in range(args.n_iter):
        print("running epoch", i)
        run_epoch()
        n_epochs += 1
        log(save_dir, desc)
    if submit:
        path = os.path.join(save_dir, desc, 'best_params')
        dh_model.load_state_dict(torch.load(path))
        predict(dataset, args.submission_dir)
        if args.analysis:
            rocstories_analysis(
                data_dir, os.path.join(args.submission_dir, 'ROCStories.tsv'),
                os.path.join(log_dir, 'rocstories.jsonl'))
Ejemplo n.º 2
0
    dh_model = DoubleHeadModel(**meta['dh_model'])
    if args.snapshot_dir is not None:
        dh_model.to(device)
        dh_model = nn.DataParallel(dh_model)
        print("Loading snapshot...")
        snapshot_dict = torch.load(
            os.path.join(args.snapshot_dir, 'best_params'))
        if args.snapshot_mode == 'transformer_only':
            model_dict = dh_model.state_dict()
            model_dict.update({
                k: v
                for k, v in snapshot_dict.items() if 'task_head' not in k
            })
            snapshot_dict = model_dict
        dh_model.load_state_dict(snapshot_dict)
    else:
        load_openai_pretrained_model(dh_model.transformer,
                                     n_ctx=n_ctx,
                                     n_special=n_special,
                                     n_transfer=args.n_transfer)
        dh_model.to(device)
        dh_model = nn.DataParallel(dh_model)

    n_train = len(trY)
    n_valid = len(vaY)

    n_batch_train = args.n_batch * max(n_gpu, 1)
    n_updates_total = (n_train // n_batch_train) * args.n_iter

    criterion = nn.CrossEntropyLoss(reduce=False)
        torch.save(dh_model.state_dict(), 'model_state')

    else:
        n_vocab = len(voc)
        max_len = 140
        n_special = 2
        n_ctx = max_len + 2
        vocab = n_vocab + n_special + n_ctx
        n_batch_train = args.n_batch * max(n_gpu, 1)
        start_token = n_vocab
        clf_token = n_vocab + 1
        dh_model = DoubleHeadModel(args, clf_token, ('classification', 2), vocab, n_ctx)
        dh_model.to(device)
        dh_model = nn.DataParallel(dh_model)

        dh_model.load_state_dict(torch.load('model_state'))

    # Estimation

    dh_model.eval()
    testing_samples_per_class = 100
    test_data = tweets_pos[:testing_samples_per_class] + tweets_neg[:testing_samples_per_class]
    correct_predictions = 0
    teX1 = generate_encodings(test_data)
    teX, teM = transform_tweet(teX1)
    logits = iter_predict(teX, teM)
    prediction = argmax(logits)
    print(prediction)
    answers = np.append(np.zeros(testing_samples_per_class, dtype=int), np.ones(testing_samples_per_class, dtype=int))
    num_mistakes = sum(abs(prediction - answers))
    print("Estimated accuracy:", (1 - num_mistakes/(testing_samples_per_class*2)))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--input_file', required=True)
    parser.add_argument('-o', '--output_file', required=True)
    parser.add_argument('--n_batch', type=int, default=8)
    parser.add_argument('--skip_preprocess', action='store_true')
    parser.add_argument('--sentence_pair', action='store_true')
    parser.add_argument('--force_delimiter', action='store_true')
    parser.add_argument('--encoder_path', type=str, default='model/encoder_bpe_40000.json')
    parser.add_argument('--bpe_path', type=str, default='model/vocab_40000.bpe')
    parser.add_argument('--model_dir', required=True)
    parser.add_argument('--mc_dropout_iter', type=int, default=0)
    args = parser.parse_args()

    meta = json.load(open(os.path.join(args.model_dir, 'meta.json'), 'r', encoding='utf8'))

    text_encoder = TextEncoder(args.encoder_path, args.bpe_path)
    encoder = text_encoder.encoder
    n_vocab = len(text_encoder.encoder)

    encoder['_start_'] = len(encoder)
    if args.sentence_pair or args.force_delimiter:
        encoder['_delimiter_'] = len(encoder)
    encoder['_classify_'] = len(encoder)
    clf_token = encoder['_classify_']
    n_special = 2 + int('_delimiter_' in encoder)
    n_ctx = meta['dh_model']['n_ctx']
    max_len = meta['encoder']['max_len']
    if args.sentence_pair:
        max_len = min(max_len, n_ctx // 2 - 2)

    texts, labels = load_headerless_tsv(args.input_file, sentence_pair=args.sentence_pair)
    ((X, Y),) = encode_dataset(*[(texts, labels)],
                               encoder=text_encoder,
                               skip_preprocess=args.skip_preprocess)

    X, M = transform_classification(X, max_len, encoder['_start_'], clf_token,
                                    n_vocab, n_special, n_ctx, encoder.get('_delimiter_'))

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    n_gpu = torch.cuda.device_count()
    n_batch_train = args.n_batch * max(n_gpu, 1)

    meta['dh_model']['cfg'] = dotdict(meta['dh_model']['cfg'])
    dh_model = DoubleHeadModel(**meta['dh_model'])
    dh_model.to(device)
    dh_model = torch.nn.DataParallel(dh_model)
    path = os.path.join(args.model_dir, 'best_params')
    if device == torch.device('cpu'):
        map_location = lambda storage, loc: storage
    else:
        map_location = None

    dh_model.load_state_dict(torch.load(path, map_location=map_location))
    prediction_output = predict(X=X,
                                submission_dir=None,
                                filename=None,
                                pred_fn=lambda x: x,
                                label_decoder=None,
                                dh_model=dh_model,
                                n_batch_train=n_batch_train,
                                device=device)

    predictions = np.argmax(prediction_output, axis=1)
    if type(texts) is tuple:
        df = pd.DataFrame({'question': texts[0], 'text': texts[1], 'label': labels, 'prediction': predictions})
    else:
        df = pd.DataFrame({'text': texts, 'label': labels, 'prediction': predictions})
    df.to_csv(args.output_file,
              index=False,
              sep='\t',
              header=False,
              columns=['text', 'label', 'prediction'],
              float_format='%.0f')

    accuracy = accuracy_score(Y, predictions) * 100.
    print('Accuracy: {}%'.format(accuracy))

    basename = os.path.splitext(args.output_file)[0]

    prediction_output_file = basename + '_output.npy'
    np.savetxt(prediction_output_file, prediction_output)
    prediction_probs = np_softmax(prediction_output)
    prediction_probs_file = basename + '_probs.npy'
    np.savetxt(prediction_probs_file, prediction_probs)

    mc_dropout_prediction_output = []
    for _ in tqdm(range(args.mc_dropout_iter)):
        prediction_output = predict(X=X,
                                    submission_dir=None,
                                    filename=None,
                                    pred_fn=lambda x: x,
                                    label_decoder=None,
                                    dh_model=dh_model,
                                    n_batch_train=n_batch_train,
                                    device=device,
                                    enable_dropout=True)
        mc_dropout_prediction_output.append(prediction_output)

    if mc_dropout_prediction_output:
        mc_dropout_prediction_output = np.asarray(mc_dropout_prediction_output)
        mc_dropout_prediction_probs = np.zeros(mc_dropout_prediction_output.shape)
        for i in range(mc_dropout_prediction_output.shape[0]):
            mc_dropout_prediction_probs[i, ...] = np_softmax(mc_dropout_prediction_output[i, ...])

        transpose_dims = (2, 1, 0)
        mc_dropout_prediction_output = mc_dropout_prediction_output.transpose(transpose_dims)
        mc_dropout_prediction_probs = mc_dropout_prediction_probs.transpose(transpose_dims)
        for i in range(mc_dropout_prediction_output.shape[0]):
            prediction_output_file = '{}_class{}_{}'.format(basename, i, 'output.npy')
            np.savetxt(prediction_output_file, mc_dropout_prediction_output[i, ...])
            prediction_probs_file = '{}_class{}_{}'.format(basename, i, 'probs.npy')
            np.savetxt(prediction_probs_file, mc_dropout_prediction_probs[i, ...])