def openAIGPTModel(*args, **kwargs): """ OpenAIGPTModel is the basic OpenAI GPT Transformer model based on identical stacked masked self-attention blocks and pre-trained on large scale dataset using language modeling signal. Example: # Load the tokenizer >>> import torch >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTTokenizer', 'openai-gpt') # Prepare tokenized input >>> text = "Who was Jim Henson ? Jim Henson was a puppeteer" >>> tokenized_text = tokenizer.tokenize(text) >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) >>> tokens_tensor = torch.tensor([indexed_tokens]) # Load openAIGPTModel >>> model = torch.hub.load('huggingface/pytorch-transformers', 'openAIGPTModel', 'openai-gpt') >>> model.eval() # Predict hidden states features for each layer >>> with torch.no_grad(): hidden_states = model(tokens_tensor) """ model = OpenAIGPTModel.from_pretrained(*args, **kwargs) return model
def convert_openai_checkpoint_to_pytorch(openai_checkpoint_folder_path, openai_config_file, pytorch_dump_folder_path): # Construct model if openai_config_file == "": config = OpenAIGPTConfig() else: config = OpenAIGPTConfig.from_json_file(openai_config_file) model = OpenAIGPTModel(config) # Load weights from numpy load_tf_weights_in_openai_gpt(model, config, openai_checkpoint_folder_path) # Save pytorch-model pytorch_weights_dump_path = pytorch_dump_folder_path + '/' + WEIGHTS_NAME pytorch_config_dump_path = pytorch_dump_folder_path + '/' + CONFIG_NAME print("Save PyTorch model to {}".format(pytorch_weights_dump_path)) torch.save(model.state_dict(), pytorch_weights_dump_path) print("Save configuration file to {}".format(pytorch_config_dump_path)) with open(pytorch_config_dump_path, "w", encoding="utf-8") as f: f.write(config.to_json_string())