Esempio n. 1
0
def predict_token(tokens_tensors):
    # Load pre-trained model (weights)
    model = TransfoXLLMHeadModel.from_pretrained('transfo-xl-wt103')
    model.eval()

    # If you have a GPU, put everything on cuda
    if torch.cuda.is_available():
        for i, tt in enumerate(tokens_tensors):
            tokens_tensors[i] = tokens_tensors[i].to('cuda')
        model.to('cuda')

    with torch.no_grad():
        # Predict all tokens
        mems = None
        all_predictions = []
        for i, tt in enumerate(tokens_tensors):
            #predictions, mems = model(tt)
            # We can re-use the memory cells in a subsequent call to attend a longer context
            #predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
            predictions, mems = model(tt, mems=mems)
            all_predictions.append(predictions)

    # get the predicted last token
    predicted_index = torch.argmax(all_predictions[-1][0, -1, :]).item()
    predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
    #assert predicted_token == 'who'
    return predicted_token
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--batch_size',default=1,type=int,help='Batch size for inference')

    parser.add_argument('--model_name',default='transfo-xl-wt103',type=str,
                        help='Pre-trained model name')
    parser.add_argument('--max_seq_length',default=128,type=int,
                        help='Maximum total input sequence length after tokenization')

    args = parser.parse_args()

    input_ids = torch.zeros([args.batch_size,args.max_seq_length],dtype=torch.long)

    model = TransfoXLLMHeadModel.from_pretrained(args.model_name)
    torch.onnx.export(model,input_ids,'transfoxll_'+'batch'+str(args.batch_size)+'.onnx')
Esempio n. 3
0
def TextGenerator(line):
    tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103')
    model = TransfoXLLMHeadModel.from_pretrained('transfo-xl-wt103')
    line_tokenized = tokenizer.tokenize(line)
    line_indexed = tokenizer.convert_tokens_to_ids(line_tokenized)
    tokens_tensor = torch.tensor([line_indexed])
    max_predictions = 30
    mems = None
    l = []
    for i in range(max_predictions):
        predictions, mems = model(tokens_tensor, mems=mems)
        predicted_index_tensor = torch.topk(predictions[0, -1, :], 5)[1][1]
        predicted_index = predicted_index_tensor.item()
        predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
        print(predicted_token)
        l.append(predicted_token)
        tokens_tensor = torch.cat(
            (tokens_tensor, predicted_index_tensor.reshape(1, 1)), dim=1)
        s = " ".join(l)
    return s
Esempio n. 4
0
    def __init__(self, args):
        super().__init__()

        if args.transformerxl_model_dir is not None:
            model_name = args.transformerxl_model_dir
            dict_file = model_name
            print("Loading Transformer XL model from {}".format(model_name))
        else:
            model_name = args.transformerxl_model_name
            dict_file = model_name

        # Load pre-trained model tokenizer (vocabulary)
        self.tokenizer = TransfoXLTokenizer.from_pretrained(dict_file)

        self.vocab = list(self.tokenizer.idx2sym)
        self._init_inverse_vocab()
        self.eos_id = self.inverse_vocab[self.EOS_SYMBOL]
        self.unk_symbol = self.UNK_SYMBOL

        # Load pre-trained model (weights)
        self.txl_model = TransfoXLLMHeadModel.from_pretrained(model_name)
        self.txl_model.eval()
        print(self.txl_model.config)
Esempio n. 5
0
def main():
    parser = argparse.ArgumentParser(
        description='PyTorch Transformer Language Model')
    parser.add_argument('--model_name',
                        type=str,
                        default='transfo-xl-wt103',
                        help='pretrained model name')
    parser.add_argument('--split',
                        type=str,
                        default='test',
                        choices=['all', 'valid', 'test'],
                        help='which split to evaluate')
    parser.add_argument('--batch_size',
                        type=int,
                        default=10,
                        help='batch size')
    parser.add_argument('--tgt_len',
                        type=int,
                        default=128,
                        help='number of tokens to predict')
    parser.add_argument('--ext_len',
                        type=int,
                        default=0,
                        help='length of the extended context')
    parser.add_argument('--mem_len',
                        type=int,
                        default=1600,
                        help='length of the retained previous heads')
    parser.add_argument('--clamp_len',
                        type=int,
                        default=1000,
                        help='max positional embedding index')
    parser.add_argument('--no_cuda',
                        action='store_true',
                        help='Do not use CUDA even though CUA is available')
    parser.add_argument('--work_dir',
                        type=str,
                        required=True,
                        help='path to the work_dir')
    parser.add_argument('--no_log',
                        action='store_true',
                        help='do not log the eval result')
    parser.add_argument('--same_length',
                        action='store_true',
                        help='set same length attention with masking')
    parser.add_argument('--server_ip',
                        type=str,
                        default='',
                        help="Can be used for distant debugging.")
    parser.add_argument('--server_port',
                        type=str,
                        default='',
                        help="Can be used for distant debugging.")
    args = parser.parse_args()
    assert args.ext_len >= 0, 'extended context length must be non-negative'

    if args.server_ip and args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd
        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port),
                            redirect_output=True)
        ptvsd.wait_for_attach()

    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
    logger.info("device: {}".format(device))

    # Load a pre-processed dataset
    # You can also build the corpus yourself using TransfoXLCorpus methods
    # The pre-processing involve computing word frequencies to prepare the Adaptive input and SoftMax
    # and tokenizing the dataset
    # The pre-processed corpus is a convertion (using the conversion script )
    corpus = TransfoXLCorpus.from_pretrained(args.model_name)
    ntokens = len(corpus.vocab)

    va_iter = corpus.get_iterator('valid',
                                  args.batch_size,
                                  args.tgt_len,
                                  device=device,
                                  ext_len=args.ext_len)
    te_iter = corpus.get_iterator('test',
                                  args.batch_size,
                                  args.tgt_len,
                                  device=device,
                                  ext_len=args.ext_len)

    # Load a pre-trained model
    model = TransfoXLLMHeadModel.from_pretrained(args.model_name)
    model = model.to(device)

    logger.info(
        'Evaluating with bsz {} tgt_len {} ext_len {} mem_len {} clamp_len {}'.
        format(args.batch_size, args.tgt_len, args.ext_len, args.mem_len,
               args.clamp_len))

    model.reset_length(args.tgt_len, args.ext_len, args.mem_len)
    if args.clamp_len > 0:
        model.clamp_len = args.clamp_len
    if args.same_length:
        model.same_length = True

    ###############################################################################
    # Evaluation code
    ###############################################################################
    def evaluate(eval_iter):
        # Turn on evaluation mode which disables dropout.
        model.eval()
        total_len, total_loss = 0, 0.
        start_time = time.time()
        with torch.no_grad():
            mems = None
            for idx, (data, target, seq_len) in enumerate(eval_iter):
                ret = model(data, target, mems)
                loss, mems = ret
                loss = loss.mean()
                total_loss += seq_len * loss.item()
                total_len += seq_len
            total_time = time.time() - start_time
        logger.info('Time : {:.2f}s, {:.2f}ms/segment'.format(
            total_time, 1000 * total_time / (idx + 1)))
        return total_loss / total_len

    # Run on test data.
    if args.split == 'all':
        test_loss = evaluate(te_iter)
        valid_loss = evaluate(va_iter)
    elif args.split == 'valid':
        valid_loss = evaluate(va_iter)
        test_loss = None
    elif args.split == 'test':
        test_loss = evaluate(te_iter)
        valid_loss = None

    def format_log(loss, split):
        log_str = '| {0} loss {1:5.2f} | {0} ppl {2:9.3f} '.format(
            split, loss, math.exp(loss))
        return log_str

    log_str = ''
    if valid_loss is not None:
        log_str += format_log(valid_loss, 'valid')
    if test_loss is not None:
        log_str += format_log(test_loss, 'test')

    logger.info('=' * 100)
    logger.info(log_str)
    logger.info('=' * 100)
print(tokenizer.tokenize("who was jim henson ?"))
tokenized_text_2 = tokenizer.tokenize(text_2)

indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1); print(indexed_tokens_1)  # [2517, 11, 1666, 12034, 788]
print(tokenizer.convert_tokens_to_ids(tokenizer.tokenize("who was jim henson ?")))  # [52, 11, 24, 24, 788]; 也是 case sensitive
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])

##################################################################
## TransfoXLModel
model = TransfoXLModel.from_pretrained('/Users/coder352/datasets/WordVec/pytorch_pretrained_bert/transfo-xl-wt103')
model.eval()

with torch.no_grad():
    hidden_states_1, mems_1 = model(tokens_tensor_1)  # Predict hidden states features for each layer
    hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1)  # We can re-use the memory cells in a subsequent call to attend a longer context

##################################################################
## TransfoXLLMHeadModel
model = TransfoXLLMHeadModel.from_pretrained('/Users/coder352/datasets/WordVec/pytorch_pretrained_bert/transfo-xl-wt103/')
model.eval()

with torch.no_grad():
    predictions_1, mems_1 = model(tokens_tensor_1)  # Predict all tokens
    predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1)  # We can re-use the memory cells in a subsequent call to attend a longer context

## get the predicted last token
predicted_index = torch.argmax(predictions_2[0, -1, :]).item(); print(predicted_index)  # 52
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index]); print(predicted_token)  # ['who']
Esempio n. 7
0
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)

# Convert inputs to PyTorch tensors
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])

# Load pre-trained model (weights)
model = TransfoXLModel.from_pretrained('transfo-xl-wt103')
model.eval()

with torch.no_grad():
    # Predict hidden states features for each layer
    hidden_states_1, mems_1 = model(tokens_tensor_1)
    # We can re-use the memory cells in a subsequent call to attend a longer context
    hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1)

# Load pre-trained model (weights)
model = TransfoXLLMHeadModel.from_pretrained('transfo-xl-wt103')
model.eval()

with torch.no_grad():
    # Predict all tokens
    predictions_1, mems_1 = model(tokens_tensor_1)
    # We can re-use the memory cells in a subsequent call to attend a longer context
    predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1)

# get the predicted last token
predicted_index = torch.argmax(predictions_2[0, -1, :]).item()
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
# assert predicted_token == 'who'