class ImageGPT(pl.LightningModule): def __init__( self, centroids, embed_dim=16, num_heads=2, num_layers=8, num_pixels=28, num_vocab=16, num_classes=10, classify=False, learning_rate=3e-3, steps=10_000, warmup_steps=500, **kwargs, ): super(ImageGPT, self).__init__() self.save_hyperparameters() self.gpt = GPT2( embed_dim=embed_dim, num_heads=num_heads, num_layers=num_layers, num_positions=num_pixels * num_pixels, num_vocab=num_vocab, num_classes=num_classes, ) self.centroids = nn.Parameter(torch.from_numpy(np.load(centroids)), requires_grad=False) self.criterion = nn.CrossEntropyLoss() self.classify = classify self.learning_rate = learning_rate self.steps = steps self.warmup_steps = warmup_steps
def __init__(self, hparams): super(ImageGPT, self).__init__() self.hparams = hparams self.gpt = GPT2( embed_dim=self.hparams.embed_dim, num_heads=self.hparams.num_heads, num_layers=self.hparams.num_layers, num_positions=self.hparams.num_pixels * self.hparams.num_pixels, num_vocab=self.hparams.num_vocab, num_classes=self.hparams.num_classes, ) self.criterion = nn.CrossEntropyLoss()
def model_fn(data, training): model = GPT2(config, name="gpt2") inputs = data["tokens"][:, :-1] labels = data["tokens"][:, 1:] dropout = tf.cast(training, tf.float32) * 0.05 logits = model(inputs, use_2d=True, attention_dropout=dropout, dropout=dropout) labels = tf.reshape(labels, [-1]) loss = tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True) mask = tf.sequence_mask(data["length"] - 1, maxlen=labels.shape[1]) mask = tf.reshape(mask, [-1]) mask = tf.cast(mask, loss.dtype) loss = tf.reduce_sum(mask * loss) / tf.reduce_sum(mask) lr = tf.Variable(1e-4, name="lr") model_spec = estimator.ModelSpec( loss=loss, optimizer=tf.train.GradientDescentOptimizer(lr), trainable_variables=model.weights, import_variables=model.weights) return model_spec
import tensorflow as tf import estimator from gpt2 import GPT2 import os import json tf.enable_eager_execution() # Model in eager mode model_path = "model" with open(os.path.join(model_path, "hparams.json")) as f: config = json.load(f) model = GPT2(config, name="gpt2") x = tf.zeros([0, 0], dtype=tf.int32) _ = model(x) # build model model.load_weights(os.path.join(model_path, "weights.h5")) def _data_builder(file_path, batch_size, pad_size): data = tf.data.TextLineDataset(file_path) data = data.repeat() def _map(x): x = tf.expand_dims(x, 0) tokens = tf.strings.split(x, " ").values tokens = tf.strings.to_number(tokens, tf.int32) length = tf.shape(tokens)[0] return {"tokens": tokens, "length": length} data = data.map(_map)
args = parser.parse_args() print(args.index) print(args.index_file) print(args.input_data) print(args.output_format) data = pickle.load(open(args.input_data, "rb")) # print("Length ", len(data)) indices = pickle.load(open(args.index_file, "rb")) print(indices) lower, upper = indices[int(args.index)] print(lower, upper) nlp = spacy.load("en_core_web_sm") model = GPT2(device="cuda", location="./path/to/saved/model/") if os.path.exists(args.output_format + args.index + ".pkl"): output = pickle.load(open(args.output_format + args.index + ".pkl", "rb")) else: output = {} def get_probabilities(articles): """ Given a batch of articles (can be any strings) run a forward pass on GPT2 and obtain word probabilities for the same """ article_splits = [article.split(" ") for article in articles] payload = model.get_probabilities(articles, topk=20) res = [[] for i in range(len(articles))] for t, article in enumerate(articles):
from gpt2 import GPT2 gpt2 = GPT2(0) #history = """My name is Jonas and I have been hunting dinosaurs my entire life. One day during a tough hunt I met my wife Lilly. She was the love of my life and when I saw her the first time I could not focus on the triceratops who was just attacking me. He""" history = """The conference was over and Peter was on his way home. He """ text = gpt2.generate_text(history, 100) print("length hist: " + str(len(history))) print("hist: " + history) print("text: " + text) sentence1 = "the book is on the desk." sentence2 = "he built the love with his bare ears." prob1 = gpt2.score_probability(sentence1) prob2 = gpt2.score_probability(sentence2) print("Prob1: " + str(prob1) + " prob2: " + str(prob2))
if save_model_on_epoch: torch.save( model.state_dict(), os.path.join(output_dir, f"{output_prefix}-{epoch}.pt"), ) return model # print("\nloading dataset\n") # dataset = CSVTwitter("<|tweet|>", truncate=True, gpt2_type="gpt2") # gpt2_type = "gpt2" # print("\ndataset loaded\n") model_g = GPT2() model_dict = model_g.state_dict() #currently with random initialization state_dict = torch.load("./gpt2-pytorch_model.bin") #pretrained weights old_keys = [] new_keys = [] for key in state_dict.keys(): if "mlp" in key: #The hugging face state dict references the feedforward network as mlp, need to replace to `feedforward` be able to reuse these weights new_key = key.replace("mlp", "feedforward") new_keys.append(new_key) old_keys.append(key) for old_key, new_key in zip(old_keys, new_keys): state_dict[new_key] = state_dict.pop(old_key)