def gpt_neo(prompt, model, max_length=100, temp=0.9): if model == "1.3B": model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B") tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B") elif model == "2.7B": model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B") tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B") elif model == "125M": model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-125M") tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-125M") elif model == "350M": model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-350M") tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-350M") else: print("That is not a valid model") input_ids = tokenizer(prompt, return_tensors="pt").input_ids gen_tokens = model.generate(input_ids, do_sample=True, temperature=temp, max_length=max_length) gen_text = tokenizer.batch_decode(gen_tokens)[0] return gen_text
def create_and_check_forward_and_backwards(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): model = GPTNeoForCausalLM(config) model.to(torch_device) result = model(input_ids, token_type_ids=token_type_ids, labels=input_ids) self.parent.assertEqual(result.loss.shape, ()) self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size)) result.loss.backward()
def _load_gpt_j_split_ckpt(ckpt_dir, config=GPT_J_CONFIG): model = GPTNeoForCausalLM.from_pretrained( pretrained_model_name_or_path=None, config=config, state_dict=SplitCheckpoint(ckpt_dir), ) return model
def test_lm_generate_gpt_neo(self): for checkpointing in [True, False]: model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B", gradient_checkpointing=checkpointing) model.to(torch_device) input_ids = torch.tensor([[464, 3290]], dtype=torch.long, device=torch_device) # The dog # fmt: off expected_output_ids = [464, 3290, 12, 3380, 4866, 286, 262, 1492, 11, 543, 318, 257, 4947, 286, 27126, 416, 262, 2739, 1772, 11] # The dog-eared copy of the book, which is a collection of essays by the late author, # fmt: on output_ids = model.generate(input_ids, do_sample=False) self.assertListEqual(output_ids[0].tolist(), expected_output_ids)
def __init__(self): super(CustomGPTNeo, self).__init__() in_dim=50257 hidden_dim1=450 num_tokens=1790 hidden_dim2=1 out_dim=1 self.gptneo = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B") self.regressor=MLP(in_dim,hidden_dim1,hidden_dim2,out_dim,num_tokens)
def test_gpt_neo_sample(self): tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B") model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B") model.to(torch_device) torch.manual_seed(0) tokenized = tokenizer("Today is a nice day and", return_tensors="pt", return_token_type_ids=True) input_ids = tokenized.input_ids.to(torch_device) output_ids = model.generate(input_ids, do_sample=True) output_str = tokenizer.decode(output_ids[0], skip_special_tokens=True) EXPECTED_OUTPUT_STR = "Today is a nice day and if you don’t get the memo here is what you can" self.assertEqual(output_str, EXPECTED_OUTPUT_STR)
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path): # Initialise PyTorch model config_json = json.load(open(config_file, "r")) config = GPTNeoConfig( hidden_size=config_json["n_embd"], num_layers=config_json["n_layer"], num_heads=config_json["n_head"], attention_types=config_json["attention_types"], max_position_embeddings=config_json["n_positions"], resid_dropout=config_json["res_dropout"], embed_dropout=config_json["embed_dropout"], attention_dropout=config_json["attn_dropout"], ) print(f"Building PyTorch model from configuration: {config}") model = GPTNeoForCausalLM(config) # Load weights from tf checkpoint load_tf_weights_in_gpt_neo(model, config, tf_checkpoint_path) # Save pytorch-model print(f"Save PyTorch model to {pytorch_dump_path}") model.save_pretrained(pytorch_dump_path)
def __init__(self): super(CustomGPTNeo, self).__init__() in_dim = 50257 hidden_dim1 = 2048 num_tokens = 45 dim_flat = hidden_dim1 * num_tokens hidden_dim2 = 500 out_dim = 1 self.gptneo = GPTNeoForCausalLM.from_pretrained( "EleutherAI/gpt-neo-1.3B") self.layer1 = nn.Sequential(nn.Linear(in_dim, hidden_dim1), nn.ReLU(inplace=True)) self.layer2 = nn.Sequential(nn.Linear(hidden_dim1, hidden_dim2), nn.ReLU(inplace=True), nn.Flatten(), nn.Linear(hidden_dim2, out_dim))
def test_batch_generation(self): model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B") model.to(torch_device) tokenizer = GPT2Tokenizer.from_pretrained("gpt2") tokenizer.padding_side = "left" # Define PAD Token = EOS Token = 50256 tokenizer.pad_token = tokenizer.eos_token model.config.pad_token_id = model.config.eos_token_id # use different length sentences to test batching sentences = [ "Hello, my dog is a little", "Today, I am", ] inputs = tokenizer(sentences, return_tensors="pt", padding=True) input_ids = inputs["input_ids"].to(torch_device) outputs = model.generate( input_ids=input_ids, attention_mask=inputs["attention_mask"].to(torch_device), ) inputs_non_padded = tokenizer(sentences[0], return_tensors="pt").input_ids.to(torch_device) output_non_padded = model.generate(input_ids=inputs_non_padded) num_paddings = inputs_non_padded.shape[-1] - inputs["attention_mask"][-1].long().sum().cpu().item() inputs_padded = tokenizer(sentences[1], return_tensors="pt").input_ids.to(torch_device) output_padded = model.generate(input_ids=inputs_padded, max_length=model.config.max_length - num_paddings) batch_out_sentence = tokenizer.batch_decode(outputs, skip_special_tokens=True) non_padded_sentence = tokenizer.decode(output_non_padded[0], skip_special_tokens=True) padded_sentence = tokenizer.decode(output_padded[0], skip_special_tokens=True) expected_output_sentence = [ "Hello, my dog is a little bit of a kitty. She is a very sweet and loving", "Today, I am going to talk about the best way to get a job in the", ] self.assertListEqual(expected_output_sentence, batch_out_sentence) self.assertListEqual(expected_output_sentence, [non_padded_sentence, padded_sentence])
def predict_text(input: str) -> str: logging.info(f"Getting model weights environment variable") model_weights_path = os.getenv(MODEL_WEIGHTS_PATH_ENV_VARIABLE) logging.info(f"Loading model weights from file: {model_weights_path}") model = GPTNeoForCausalLM.from_pretrained(model_weights_path) tokenizer = GPT2Tokenizer.from_pretrained(model_weights_path) logging.info(f"Loaded model weights from file: {model_weights_path}") input_ids = tokenizer(input, return_tensors="pt").input_ids logging.info(f"Generating text") gen_tokens = model.generate( input_ids, do_sample=True, temperature=0.9, max_length=200, ) gen_text = tokenizer.batch_decode(gen_tokens)[0] logging.info(f"Generated text: {gen_text}") return gen_text
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <https://www.gnu.org/licenses/>. """ print("Loading, please wait...") # it takes a while to load the AI from transformers import GPTNeoForCausalLM, GPT2Tokenizer import os import random import sys MODEL_NAME = 'EleutherAI/gpt-neo-125M' MODEL = GPTNeoForCausalLM.from_pretrained(MODEL_NAME) TOKENIZER = GPT2Tokenizer.from_pretrained(MODEL_NAME) safemode = False # TODO: Make safe mode actually do something temperature = 1.0 # help message for /help HELP = f""" [AIC help text] /help - show this help /remember <thing> - add something to memory /forget <thing> - remove something from memory /memory - view the memory /delete <thing> - erase something from the story /rename <old> <new> - rename a character or object /safemode - toggle safe mode (censor things you might not want to see)
from transformers import GPTNeoForCausalLM, AutoTokenizer import argparse parser = argparse.ArgumentParser() parser.add_argument("model") args = parser.parse_args() model = GPTNeoForCausalLM.from_pretrained(args.model).to("cuda") tokenizer = AutoTokenizer.from_pretrained(args.model) while True: text = input("\n\nInput text to prompt the model: ") text = str(text) if len(text) == 0: continue ids = tokenizer(text, return_tensors="pt").input_ids.to("cuda") # add the length of the prompt tokens to match with the mesh-tf generation max_length = 400 + ids.shape[1] gen_tokens = model.generate( ids, do_sample=True, min_length=max_length, max_length=max_length, temperature=0.9, use_cache=True ) gen_text = tokenizer.batch_decode(gen_tokens)[0]
# How to load these weights as storage for azure functions # https://docs.microsoft.com/en-us/azure/azure-functions/storage-considerations # Map Azure Functions to storage account # https://docs.microsoft.com/en-us/azure/azure-functions/scripts/functions-cli-mount-files-storage-linux # huggingface_hub.hf_hub_download("EleutherAI/gpt-neo-1.3B", "pytorch_model.bin") file_path = "./1.3B/EleutherAI__gpt-neo-1.3B.a4a110859b10643e414fbb4c171cae4b6b9c7e49" # print("Downloading...") # # TODO: Can I get a specific repo or should I just download latest?? # file_path = huggingface_hub.snapshot_download("EleutherAI/gpt-neo-1.3B", cache_dir="./1.3B/") # print(f"Output directory: {file_path}") from transformers import GPTNeoForCausalLM, GPT2Tokenizer model = GPTNeoForCausalLM.from_pretrained(file_path) tokenizer = GPT2Tokenizer.from_pretrained(file_path) prompt = "I like to have my AI write for me." input_ids = tokenizer(prompt, return_tensors="pt").input_ids gen_tokens = model.generate( input_ids, do_sample=True, temperature=0.9, max_length=200, ) gen_text = tokenizer.batch_decode(gen_tokens)[0] print(gen_text)
os.environ['MASTER_ADDR'] = 'localhost' os.environ['MASTER_PORT'] = '9994' os.environ['RANK'] = "0" os.environ['LOCAL_RANK'] = "0" os.environ['WORLD_SIZE'] = "1" import pandas as pd import torch from torch.utils.data import Dataset, random_split from transformers import GPT2Tokenizer, TrainingArguments, Trainer, GPTNeoForCausalLM torch.manual_seed(42) tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B", bos_token='<|startoftext|>', eos_token='<|endoftext|>', pad_token='<|pad|>') model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B").cuda() model.resize_token_embeddings(len(tokenizer)) descriptions = pd.read_csv('netflix_titles.csv')['description'] max_length = max( [len(tokenizer.encode(description)) for description in descriptions]) print("Max length: {}".format(max_length)) class NetflixDataset(Dataset): def __init__(self, txt_list, tokenizer, max_length): self.input_ids = [] self.attn_masks = [] self.labels = [] for txt in txt_list: encodings_dict = tokenizer('<|startoftext|>' + txt + '<|endoftext|>',
from flask import Flask, render_template from flask_socketio import SocketIO, emit app = Flask(__name__) app.config['SECRET KEY'] = 'secret!' socketio = SocketIO(app) print("{0}OK!{1}".format(colors.GREEN, colors.END)) # Start transformers and create pipeline if(not vars.model in ["InferKit", "Colab"]): if(not vars.noai): print("{0}Initializing transformers, please wait...{1}".format(colors.PURPLE, colors.END)) from transformers import pipeline, GPT2Tokenizer, GPT2LMHeadModel, GPTNeoForCausalLM # If custom GPT Neo model was chosen if(vars.model == "NeoCustom"): model = GPTNeoForCausalLM.from_pretrained(vars.custmodpth) tokenizer = GPT2Tokenizer.from_pretrained(vars.custmodpth) # Is CUDA available? If so, use GPU, otherwise fall back to CPU if(vars.hascuda and vars.usegpu): generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=0) else: generator = pipeline('text-generation', model=model, tokenizer=tokenizer) # If custom GPT2 model was chosen elif(vars.model == "GPT2Custom"): model = GPT2LMHeadModel.from_pretrained(vars.custmodpth) tokenizer = GPT2Tokenizer.from_pretrained(vars.custmodpth) # Is CUDA available? If so, use GPU, otherwise fall back to CPU if(vars.hascuda and vars.usegpu): generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=0) else: generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
from transformers import GPTNeoForCausalLM, GPT2Tokenizer model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B") tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B") prompt = "In a shocking finding, scientists discovered a herd of unicorns living in a remote, " \ "previously unexplored valley, in the Andes Mountains. Even more surprising to the " \ "researchers was the fact that the unicorns spoke perfect English." def predict_text(prompt, max_length=100): input_ids = tokenizer(prompt, return_tensors="pt").input_ids gen_tokens = model.generate( input_ids, do_sample=True, temperature=0.9, max_length=max_length, ) gen_text = tokenizer.batch_decode(gen_tokens)[0] print(gen_text)
def model(self): return GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B").to( torch_device)
import logging import os import azure.functions as func from transformers import GPTNeoForCausalLM, GPT2Tokenizer # from analytics.predict import predict_text # MOUNT_DIR = "/gptneo" MOUNT_DIR = "TextAnalysisExtension/GPTNeo" MODEL_PATH = "1.3B/EleutherAI__gpt-neo-1.3B.a4a110859b10643e414fbb4c171cae4b6b9c7e49" FILE_PATH = os.path.join(MOUNT_DIR, MODEL_PATH) # TODO: Figure out what plan will support this kind of workload # https://docs.microsoft.com/en-us/azure/azure-functions/dedicated-plan # Answer - none of them. going with Azure Container Instances to run this model = GPTNeoForCausalLM.from_pretrained(FILE_PATH) tokenizer = GPT2Tokenizer.from_pretrained(FILE_PATH) # This function has an activity trigger so that it fan be called from the orchestration function def main(text: str) -> str: logging.info("predicting text") predicted = predict_text(text) return predicted def predict_text(text: str) -> str: logging.info(f"predicting text from {text}") try: input_ids = tokenizer(text, return_tensors="pt").input_ids gen_tokens = model.generate(input_ids, do_sample=True, temperature=0.9, max_length=200,) gen_text = tokenizer.batch_decode(gen_tokens)[0]