コード例 #1
0
ファイル: image_gpt.py プロジェクト: bzantium/image-gpt
class ImageGPT(pl.LightningModule):
    def __init__(
        self,
        centroids,
        embed_dim=16,
        num_heads=2,
        num_layers=8,
        num_pixels=28,
        num_vocab=16,
        num_classes=10,
        classify=False,
        learning_rate=3e-3,
        steps=10_000,
        warmup_steps=500,
        **kwargs,
    ):
        super(ImageGPT, self).__init__()
        self.save_hyperparameters()
        self.gpt = GPT2(
            embed_dim=embed_dim,
            num_heads=num_heads,
            num_layers=num_layers,
            num_positions=num_pixels * num_pixels,
            num_vocab=num_vocab,
            num_classes=num_classes,
        )

        self.centroids = nn.Parameter(torch.from_numpy(np.load(centroids)),
                                      requires_grad=False)
        self.criterion = nn.CrossEntropyLoss()
        self.classify = classify
        self.learning_rate = learning_rate
        self.steps = steps
        self.warmup_steps = warmup_steps
コード例 #2
0
ファイル: module.py プロジェクト: jkooy/set-gpt
    def __init__(self, hparams):
        super(ImageGPT, self).__init__()
        self.hparams = hparams
        self.gpt = GPT2(
            embed_dim=self.hparams.embed_dim,
            num_heads=self.hparams.num_heads,
            num_layers=self.hparams.num_layers,
            num_positions=self.hparams.num_pixels * self.hparams.num_pixels,
            num_vocab=self.hparams.num_vocab,
            num_classes=self.hparams.num_classes,
        )

        self.criterion = nn.CrossEntropyLoss()
コード例 #3
0
ファイル: gpt2-test.py プロジェクト: amirekhlasi/tpu
def model_fn(data, training):
    model = GPT2(config, name="gpt2")
    inputs = data["tokens"][:, :-1]
    labels = data["tokens"][:, 1:]
    dropout = tf.cast(training, tf.float32) * 0.05
    logits = model(inputs,
                   use_2d=True,
                   attention_dropout=dropout,
                   dropout=dropout)
    labels = tf.reshape(labels, [-1])
    loss = tf.keras.losses.sparse_categorical_crossentropy(labels,
                                                           logits,
                                                           from_logits=True)
    mask = tf.sequence_mask(data["length"] - 1, maxlen=labels.shape[1])
    mask = tf.reshape(mask, [-1])
    mask = tf.cast(mask, loss.dtype)
    loss = tf.reduce_sum(mask * loss) / tf.reduce_sum(mask)
    lr = tf.Variable(1e-4, name="lr")
    model_spec = estimator.ModelSpec(
        loss=loss,
        optimizer=tf.train.GradientDescentOptimizer(lr),
        trainable_variables=model.weights,
        import_variables=model.weights)
    return model_spec
コード例 #4
0
ファイル: gpt2-test.py プロジェクト: amirekhlasi/tpu
import tensorflow as tf
import estimator
from gpt2 import GPT2
import os
import json

tf.enable_eager_execution()

# Model in eager mode
model_path = "model"
with open(os.path.join(model_path, "hparams.json")) as f:
    config = json.load(f)
model = GPT2(config, name="gpt2")
x = tf.zeros([0, 0], dtype=tf.int32)
_ = model(x)  # build model

model.load_weights(os.path.join(model_path, "weights.h5"))


def _data_builder(file_path, batch_size, pad_size):
    data = tf.data.TextLineDataset(file_path)
    data = data.repeat()

    def _map(x):
        x = tf.expand_dims(x, 0)
        tokens = tf.strings.split(x, " ").values
        tokens = tf.strings.to_number(tokens, tf.int32)
        length = tf.shape(tokens)[0]
        return {"tokens": tokens, "length": length}

    data = data.map(_map)
コード例 #5
0
args = parser.parse_args()
print(args.index)
print(args.index_file)
print(args.input_data)
print(args.output_format)

data = pickle.load(open(args.input_data, "rb"))  #
print("Length ", len(data))
indices = pickle.load(open(args.index_file, "rb"))
print(indices)
lower, upper = indices[int(args.index)]
print(lower, upper)

nlp = spacy.load("en_core_web_sm")

model = GPT2(device="cuda", location="./path/to/saved/model/")

if os.path.exists(args.output_format + args.index + ".pkl"):
    output = pickle.load(open(args.output_format + args.index + ".pkl", "rb"))
else:
    output = {}


def get_probabilities(articles):
    """
    Given a batch of articles (can be any strings) run a forward pass on GPT2 and obtain word probabilities for the same
    """
    article_splits = [article.split(" ") for article in articles]
    payload = model.get_probabilities(articles, topk=20)
    res = [[] for i in range(len(articles))]
    for t, article in enumerate(articles):
コード例 #6
0
from gpt2 import GPT2

gpt2 = GPT2(0)
#history = """My name is Jonas and I have been hunting dinosaurs my entire life. One day during a tough hunt I met my wife Lilly. She was the love of my life and when I saw her the first time I could not focus on the triceratops who was just attacking me. He"""
history = """The conference was over and Peter was on his way home. He """
text = gpt2.generate_text(history, 100)
print("length hist: " + str(len(history)))
print("hist: " + history)
print("text: " + text)

sentence1 = "the book is on the desk."
sentence2 = "he built the love with his bare ears."

prob1 = gpt2.score_probability(sentence1)
prob2 = gpt2.score_probability(sentence2)

print("Prob1: " + str(prob1) + " prob2: " + str(prob2))
コード例 #7
0
        if save_model_on_epoch:
            torch.save(
                model.state_dict(),
                os.path.join(output_dir, f"{output_prefix}-{epoch}.pt"),
            )
    return model


# print("\nloading dataset\n")

# dataset = CSVTwitter("<|tweet|>", truncate=True, gpt2_type="gpt2")
# gpt2_type = "gpt2"

# print("\ndataset loaded\n")

model_g = GPT2()

model_dict = model_g.state_dict()  #currently with random initialization
state_dict = torch.load("./gpt2-pytorch_model.bin")  #pretrained weights

old_keys = []
new_keys = []
for key in state_dict.keys():
    if "mlp" in key:  #The hugging face state dict references the feedforward network as mlp, need to replace to `feedforward` be able to reuse these weights
        new_key = key.replace("mlp", "feedforward")
        new_keys.append(new_key)
        old_keys.append(key)

for old_key, new_key in zip(old_keys, new_keys):
    state_dict[new_key] = state_dict.pop(old_key)