Example #1
0
def transformer_predict(input_file: str, text_encoder: TextEncoder,
                        device: int):
    if device > -1:
        device_name = "cuda"
    else:
        device_name = "cpu"

    print(input_file)
    n_ctx = 512

    transformer = TransformerModel(DEFAULT_CONFIG,
                                   n_ctx=n_ctx,
                                   requires_grad=False)
    load_openai_pretrained_model(transformer, n_ctx=n_ctx)

    with open(input_file) as f:
        sentences = f.readlines()

    encoded_sentences = text_encoder.encode(sentences)

    masks = [
        np.concatenate((np.ones(len(s)), np.zeros(n_ctx - len(s))))
        for s in encoded_sentences
    ]

    input_tensor = torch.LongTensor([
        pad_sequence_to_length(s, desired_length=512)
        for s in encoded_sentences
    ])
    if device_name == "cuda":
        input_tensor = input_tensor.cuda()

    batch_size, num_timesteps = input_tensor.size()

    positional_encodings = get_range_vector(num_timesteps, device) + n_ctx

    batch_tensor = torch.stack(
        [input_tensor,
         positional_encodings.expand(batch_size, num_timesteps)],
        dim=-1)

    if device_name == "cuda":
        transformer = transformer.cuda()

    transformer_embeddings = transformer(batch_tensor)

    np.save("openai_transformer_test_input.npy",
            batch_tensor.data.cpu().numpy())
    np.save("openai_transformer_test_output.npy",
            transformer_embeddings.data.cpu().numpy())
def load_openai_gpt(n_special=1, n_ctx=512):
    text_encoder = TextEncoder("pytorch-openai-transformer-lm/model/encoder_bpe_40000.json",
                               "pytorch-openai-transformer-lm/model/vocab_40000.bpe")
    encoder = text_encoder.encoder
    n_vocab = len(text_encoder.encoder)
    vocab = n_vocab + n_special + n_ctx

    args = DEFAULT_CONFIG
    lm_model = LMModel(args, vocab, n_ctx, return_probs=True)
    load_openai_pretrained_model(lm_model.transformer, n_ctx=n_ctx, n_special=n_special,
                                 path="pytorch-openai-transformer-lm/model/",
                                 path_names="pytorch-openai-transformer-lm/")
    # lm_model.to(device)
    lm_model.return_probs = False
    lm_model.eval()
    return lm_model, text_encoder
Example #3
0
    def __init__(self):
        # initialize lm and text encoder and everything

        # set up the encoder to turn words into indices
        encoder_path = 'model/encoder_bpe_40000.json'
        bpe_path = 'model/vocab_40000.bpe'
        self.text_encoder = TextEncoder(encoder_path, bpe_path)

        self.nvocab = len(self.text_encoder.encoder)
        nctx = 512 # number of positional embeddings (nctx = number of context)
        vocab = self.nvocab + nctx

        # set up pretrained openai model
        args = DEFAULT_CONFIG
        self.lm_model = LMModel(args, vocab, nctx, return_probs = True)
        load_openai_pretrained_model(self.lm_model.transformer, n_ctx=nctx, n_special=0)
        self.lm_model.eval() # this line puts the model in eval mode so we don't do dropout :) 


        # set up spacy for pos tagging
        self.nlp = spacy.load('en', disable=['ner', 'textcat', 'parser'])
Example #4
0
    model_stepwise = StepwiseClassifierModel(args, n_classifier=args.n_classes, vocab_count=args.vocab_count, extra_block=args.extra_block)

    model_opt = OpenAIAdam(model_stepwise.parameters(),
                           lr=args.lr, schedule=args.lr_schedule, 
                           warmup=args.lr_warmup, t_total=n_updates_total,
                           b1=args.b1, b2=args.b2, e=args.e,
                           l2=args.l2, ector_l2=args.vector_l2,
                           max_grad_norm=args.max_grad_norm)
                           
    epoch_start, epoch_max, loss_best = -1, args.n_epoch, None

    if args.checkpoint is None:
      load_openai_pretrained_model(
        model_stepwise.transformer, 
        n_special=args.tokens_special,  n_ctx=n_ctx,   # n_ctx adjusts embedding size to include positional
        path=pretrained_model_path+'/',
        path_names=os.path.join('.', 'orig', 'pytorch-openai-transformer-lm')+'/',
      )

    model_stepwise.to(device)

    if torch.cuda.device_count() > 1:  # https://pytorch.org/tutorials/beginner/blitz/data_parallel_tutorial.html
      print("Let's use", torch.cuda.device_count(), "GPUs!")
      model_stepwise = nn.DataParallel(model_stepwise)
      

    os.makedirs('./checkpoints', exist_ok=True)
      
    if args.checkpoint is not None:
      checkpoint = torch.load(args.checkpoint, map_location=lambda storage, loc: storage)
      epoch_start = checkpoint['epoch']
Example #5
0
    model_opt = OpenAIAdam(dh_model.parameters(),
                           lr=args.lr,
                           schedule=args.lr_schedule,
                           warmup=args.lr_warmup,
                           t_total=n_updates_total,
                           b1=args.b1,
                           b2=args.b2,
                           e=args.e,
                           l2=args.l2,
                           vector_l2=args.vector_l2,
                           max_grad_norm=args.max_grad_norm)
    compute_loss_fct = MultipleChoiceLossCompute(criterion,
                                                 criterion,
                                                 args.lm_coef,
                                                 model_opt)
    load_openai_pretrained_model(dh_model.transformer, n_ctx=n_ctx, n_special=n_special)

    dh_model.to(device)
    dh_model = nn.DataParallel(dh_model)

    n_updates = 0
    n_epochs = 0
    if dataset != 'stsb':
        trYt = trY
    if submit:
        path = os.path.join(save_dir, desc, 'best_params')
        torch.save(dh_model.state_dict(), make_path(path))
    best_score = 0
    for i in range(args.n_iter):
        print("running epoch", i)
        run_epoch()
    model_opt = OpenAIAdam(list(model.parameters()) + list(clf_head.parameters()) + list(lm_head.parameters()),
                           lr=args.lr,
                           schedule=args.lr_schedule,
                           warmup=args.lr_warmup,
                           t_total=n_updates_total,
                           b1=args.b1,
                           b2=args.b2,
                           e=args.e,
                           l2=args.l2,
                           vector_l2=args.vector_l2,
                           max_grad_norm=args.max_grad_norm)
    compute_loss_fct = LossCompute(criterion,
                                   criterion,
                                   args.lm_coef,
                                   model_opt)
    load_openai_pretrained_model(model, n_ctx=n_ctx, n_special=n_special)

    model.to(device)
    lm_head.to(device)
    clf_head.to(device)

    n_updates = 0
    n_epochs = 0
    if dataset != 'stsb':
        trYt = trY
    if submit:
        path = os.path.join(save_dir, desc, 'best_params')
        torch.save(model.state_dict(), make_path(path))
    best_score = 0
    for i in range(args.n_iter):
        print("running epoch", i)
    criterion = nn.CrossEntropyLoss(reduce=False)
    model_opt = OpenAIAdam(lm_model.parameters(),
                           lr=args.lr,
                           schedule=args.lr_schedule,
                           warmup=args.lr_warmup,
                           t_total=n_updates_total,
                           b1=args.b1,
                           b2=args.b2,
                           e=args.e,
                           l2=args.l2,
                           vector_l2=args.vector_l2,
                           max_grad_norm=args.max_grad_norm)
    compute_loss_fct = LMLossCompute(criterion, model_opt)
    load_openai_pretrained_model(lm_model.transformer,
                                 n_ctx=n_ctx,
                                 n_special=n_special,
                                 n_vocab=n_vocab)
    lm_model.to(device)
    lm_model = nn.DataParallel(lm_model)
    n_updates = 0
    n_epochs = 0
    if submit:
        path = os.path.join(save_dir, desc, 'best_params')
        print(path)
        torch.save(lm_model.state_dict(), make_path(path))
    best_score = 0

    for i in range(args.n_iter):
        print("running epoch", i)
        run_epoch()
        n_epochs += 1
Example #8
0
        dh_model.to(device)
        dh_model = nn.DataParallel(dh_model)
        print("Loading snapshot...")
        snapshot_dict = torch.load(
            os.path.join(args.snapshot_dir, 'best_params'))
        if args.snapshot_mode == 'transformer_only':
            model_dict = dh_model.state_dict()
            model_dict.update({
                k: v
                for k, v in snapshot_dict.items() if 'task_head' not in k
            })
            snapshot_dict = model_dict
        dh_model.load_state_dict(snapshot_dict)
    else:
        load_openai_pretrained_model(dh_model.transformer,
                                     n_ctx=n_ctx,
                                     n_special=n_special,
                                     n_transfer=args.n_transfer)
        dh_model.to(device)
        dh_model = nn.DataParallel(dh_model)

    n_train = len(trY)
    n_valid = len(vaY)

    n_batch_train = args.n_batch * max(n_gpu, 1)
    n_updates_total = (n_train // n_batch_train) * args.n_iter

    criterion = nn.CrossEntropyLoss(reduce=False)
    model_opt = OpenAIAdam(dh_model.parameters(),
                           lr=args.lr,
                           schedule=args.lr_schedule,
                           warmup=args.lr_warmup,
def main(args):
    init(args)

    # Constants
    n_ctx = args.n_ctx
    data_dir = args.data_dir

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    n_gpu = torch.cuda.device_count()
    print("device", device, "n_gpu", n_gpu)

    text_encoder = TextEncoder(args.encoder_path, args.bpe_path)
    encoder = text_encoder.encoder
    n_vocab = len(text_encoder.encoder)
    text_encoder.decoder[len(encoder)] = '_start_'
    encoder['_start_'] = len(encoder)
    text_encoder.decoder[len(encoder)] = '_delimiter_'
    encoder['_delimiter_'] = len(encoder)
    text_encoder.decoder[len(encoder)] = '_classify_'
    encoder['_classify_'] = len(encoder)

    n_special = 3  # XD: useless for language modeling task
    vocab = n_vocab + n_special + n_ctx

    lm_model = LMModel(args,
                       vocab,
                       n_ctx,
                       return_probs=True,
                       doc_embed=args.doc_model)
    load_openai_pretrained_model(lm_model.transformer,
                                 n_ctx=n_ctx,
                                 n_special=n_special)
    if args.checkpoint != "none":
        checkpoint = torch.load(args.checkpoint, map_location='cpu')
        state_dict = checkpoint["state_dict"]
        for key in list(state_dict.keys()):
            state_dict[key[7:]] = state_dict[key]
            del state_dict[key]
        pos_emb_mask = torch.zeros(1, 1, vocab)
        pos_emb_mask[:, :, -n_ctx] = -1e12
        state_dict['pos_emb_mask'] = pos_emb_mask
        lm_model.load_state_dict(state_dict)
    lm_model.to(device)
    lm_model = DataParallelModel(lm_model)

    train_bar = get_loader(os.path.join(data_dir, "val_encoded.jsonl"),
                           n_gpu,
                           encoder,
                           num_workers=1,
                           shuffle=True,
                           max_size=args.n_iter)
    srcs, hyps, refs = [], [], []
    with torch.no_grad():
        lm_model.eval()
        for i, (pad_output, mask_output) in enumerate(tqdm(train_bar), 1):
            src_strs, tgt_strs, gen_strs = generate_outputs(
                lm_model, pad_output, mask_output, text_encoder, device,
                args.beam, args.gen_len, args.k, args.decoding_strategy)
            srcs.extend(src_strs)
            hyps.extend(gen_strs)
            refs.extend(tgt_strs)

    for i in range(len(hyps)):
        print("*" * 50)
        print("Source: {}".format(srcs[i]))
        print('Hypothesis: {}'.format(hyps[i]))
        print("Reference: {}".format(refs[i]))
def main(args):
    # Constants
    n_ctx = args.n_ctx
    desc = args.desc

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    n_gpu = torch.cuda.device_count()
    print("device", device, "n_gpu", n_gpu)

    text_encoder = TextEncoder(args.encoder_path, args.bpe_path)
    encoder = text_encoder.encoder
    n_vocab = len(text_encoder.encoder)

    encoder['_start_'] = len(encoder)
    encoder['_delimiter_'] = len(encoder)
    encoder['_classify_'] = len(encoder)
    clf_token = encoder['_classify_']
    n_special = 3

    print("Loading dataset...")
    test_loader = get_loader(args.data_file,
                             args.n_batch,
                             encoder,
                             num_workers=1,
                             shuffle=False,
                             subset=args.subset)

    vocab = n_vocab + n_special + n_ctx
    dh_model = LMModel(args,
                       vocab=vocab,
                       n_ctx=n_ctx,
                       doc_embed=args.doc_model)

    print("Loading model...")
    load_openai_pretrained_model(dh_model.transformer,
                                 n_ctx=n_ctx,
                                 n_special=n_special,
                                 path="./model/",
                                 path_names="./")
    if args.checkpoint != "none":
        checkpoint = torch.load(args.checkpoint, map_location='cpu')
        state_dict = checkpoint["state_dict"]
        for key in list(state_dict.keys()):
            state_dict[key[7:]] = state_dict[key]
            del state_dict[key]
        pos_emb_mask = torch.zeros(1, 1, vocab)
        pos_emb_mask[:, :, -n_ctx] = -1e12
        state_dict['pos_emb_mask'] = pos_emb_mask
        dh_model.load_state_dict(state_dict)

    dh_model.to(device)
    dh_model = DataParallelModel(dh_model)

    stop_words = []
    if args.stop_words is not None:
        with open(args.stop_words) as f:
            for line in f:
                stop_words.append(line)
    evaluate_model(dh_model, test_loader, text_encoder, device, args.beam,
                   args.gen_len, args.k, args.decoding_strategy,
                   args.save_file, args.gen_dir, args.tgt_dir, args.max_len,
                   stop_words, args)
def main(args):
    init(args)

    # Constants
    n_ctx = args.n_ctx
    save_dir = os.path.join(args.output_dir, args.experiment_name, "checkpoints")
    desc = args.desc
    data_dir = args.data_dir
    log_dir = os.path.join(args.output_dir, args.experiment_name, "logs")
    train_log_interval = args.train_log_interval
    val_log_interval = args.val_log_interval
    beam = args.beam
    gen_len = args.gen_len
    k = args.k
    decoding_strategy = args.decoding_strategy
    accum_iter = args.accum_iter

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    n_gpu = torch.cuda.device_count()
    print("device", device, "n_gpu", n_gpu)
    logger = Logger(log_dir)

    text_encoder = TextEncoder(args.encoder_path, args.vocab_path)
    encoder = text_encoder.encoder
    n_vocab = len(text_encoder.encoder)
    encoder['_start_'] = len(encoder)
    encoder['_delimiter_'] = len(encoder)
    encoder['_classify_'] = len(encoder)
    clf_token = encoder['_classify_']
    n_special = 3

    print("Loading dataset...")
    train_loader = get_loader(os.path.join(data_dir, "train_encoded.jsonl"), args.n_batch, encoder, num_workers=3, shuffle=True)
    val_loader = get_loader(os.path.join(data_dir, "val_encoded.jsonl"), n_gpu, encoder, num_workers=0, shuffle=False, max_size=args.num_val_examples)
    print("Train length: {}, Validation length: {}".format(len(train_loader), len(val_loader)))

    vocab = n_vocab + n_special + n_ctx
    n_updates_total = (len(train_loader) // args.accum_iter) * (args.num_epochs_dat + args.num_epochs_ft)

    dh_model = LMModel(args, vocab=vocab, n_ctx=n_ctx, doc_embed=args.doc_model)

    criterion = nn.CrossEntropyLoss(reduction="none")
    model_opt = OpenAIAdam(dh_model.parameters(),
                           lr=args.lr,
                           schedule=args.lr_schedule,
                           warmup=args.lr_warmup,
                           t_total=n_updates_total,
                           b1=args.b1,
                           b2=args.b2,
                           e=args.e,
                           l2=args.l2,
                           vector_l2=args.vector_l2,
                           max_grad_norm=args.max_grad_norm)

    lm_loss = LMLoss(criterion)
    summary_loss = SummaryLoss(criterion)

    print("Loading Model")
    if args.use_pretrain:
        load_openai_pretrained_model(dh_model.transformer, n_ctx=n_ctx, n_special=n_special, path="./model/", path_names="./")
    start_iter, running_loss = load_checkpoint(args.checkpoint, dh_model, model_opt, vocab, n_ctx)

    dh_model.to(device)
    dh_model = DataParallelModel(dh_model)
    lm_loss = DataParallelCriterion(lm_loss)
    summary_loss = DataParallelCriterion(summary_loss)

    for i in range(args.num_epochs_dat):
        start_iter, running_loss = run_epoch(start_iter, running_loss, dh_model, lm_loss, model_opt, train_loader, val_loader, train_log_interval, val_log_interval, device, beam, gen_len, k, decoding_strategy, accum_iter, "DAT Training Epoch [{}/{}]".format(i + 1, args.num_epochs_dat), save_dir, logger, text_encoder, show_progress=args.show_progress, summary_loss=summary_loss)
    for i in range(args.num_epochs_ft):
        start_iter, running_loss = run_epoch(start_iter, running_loss, dh_model, summary_loss, model_opt, train_loader, val_loader, train_log_interval, val_log_interval, device, beam, gen_len, k, decoding_strategy, accum_iter, "FT Training Epoch [{}/{}]".format(i + 1, args.num_epochs_ft), save_dir, logger, text_encoder, show_progress=args.show_progress)