def openAIGPTModel(*args, **kwargs):
    """
    OpenAIGPTModel is the basic OpenAI GPT Transformer model based on
	identical stacked masked self-attention blocks and pre-trained
	on large scale dataset using language modeling signal.

    Example:
        # Load the tokenizer
		>>> import torch
        >>> tokenizer = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'openAIGPTTokenizer', 'openai-gpt')

        #  Prepare tokenized input
        >>> text = "Who was Jim Henson ? Jim Henson was a puppeteer"
        >>> tokenized_text = tokenizer.tokenize(text)
        >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
        >>> tokens_tensor = torch.tensor([indexed_tokens])

        # Load openAIGPTModel
        >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'openAIGPTModel', 'openai-gpt')
        >>> model.eval()

        # Predict hidden states features for each layer
        >>> with torch.no_grad():
                hidden_states = model(tokens_tensor)
    """
    model = OpenAIGPTModel.from_pretrained(*args, **kwargs)
    return model
def convert_openai_checkpoint_to_pytorch(openai_checkpoint_folder_path, openai_config_file, pytorch_dump_folder_path):
    # Construct model
    if openai_config_file == "":
        config = OpenAIGPTConfig()
    else:
        config = OpenAIGPTConfig(openai_config_file)
    model = OpenAIGPTModel(config)

    # Load weights from numpy
    load_tf_weights_in_openai_gpt(model, openai_checkpoint_folder_path)

    # Save pytorch-model
    pytorch_weights_dump_path = pytorch_dump_folder_path + '/' + WEIGHTS_NAME
    pytorch_config_dump_path = pytorch_dump_folder_path + '/' + CONFIG_NAME
    print("Save PyTorch model to {}".format(pytorch_weights_dump_path))
    torch.save(model.state_dict(), pytorch_weights_dump_path)
    print("Save configuration file to {}".format(pytorch_config_dump_path))
    with open(pytorch_config_dump_path, "w", encoding="utf-8") as f:
        f.write(config.to_json_string())
Esempio n. 3
0
def extractOpenAI():
    model = OpenAIGPTModel.from_pretrained(modelPath[args.model])
    embeddings = model.tokens_embed
    print(embeddings.num_embeddings)
    print(embeddings.weight.size())
    tokenizer = OpenAIGPTTokenizer.from_pretrained(modelPath[args.model])
    weight = embeddings.weight.detach().numpy()
    #print(tokenizer.decoder)
    with open(programmingalpha.openAI768 + "embeddings.txt", "w") as f:
        for i in range(len(weight)):
            vec = weight[i]
            vec_str = list(map(lambda x: str(x), vec))
            token = tokenizer.decoder[i]
            vec_str.insert(0, token)
            vec_str = " ".join(vec_str)
            f.writelines(vec_str + "\n")

    with open(programmingalpha.openAI768 + "vocab.txt", "w") as f:
        for i in range(len(weight)):
            token = tokenizer.decoder[i]
            f.write(token + "\n")
Esempio n. 4
0
 def __init__(self, config, num_labels):
     super(OpenAIGPTForClassification, self).__init__(config)
     self.transformer = OpenAIGPTModel(config)
     self.classifier = nn.Linear(config.n_embd, num_labels)
     self.classifier.weight.data.uniform_(-0.1, 0.1)
     self.classifier.bias.data.zero_()
Esempio n. 5
0
class OpenAIGPTForClassification(OpenAIGPTPreTrainedModel):
    """OpenAI GPT model with a Language Modeling and a Multiple Choice head ("Improving Language Understanding by Generative Pre-Training").

    OpenAI GPT use a single embedding matrix to store the word and special embeddings.
    Special tokens embeddings are additional tokens that are not pre-trained: [SEP], [CLS]...
    Special tokens need to be trained during the fine-tuning if you use them.
    The number of special embeddings can be controled using the `set_num_special_tokens(num_special_tokens)` function.

    The embeddings are ordered as follow in the token embeddings matrice:
        [0,                                                         ----------------------
         ...                                                        -> word embeddings
         config.vocab_size - 1,                                     ______________________
         config.vocab_size,
         ...                                                        -> special embeddings
         config.vocab_size + config.n_special - 1]                  ______________________

    where total_tokens_embeddings can be obtained as config.total_tokens_embeddings and is:
        total_tokens_embeddings = config.vocab_size + config.n_special
    You should use the associate indices to index the embeddings.

    Params:
        config: a OpenAIGPTConfig class instance with the configuration to build a new model

    Inputs:
        `input_ids`: a torch.LongTensor of shape [batch_size, num_choices, sequence_length] with the BPE token
            indices selected in the range [0, total_tokens_embeddings[
        `mc_token_ids`: a torch.LongTensor of shape [batch_size, num_choices] with the index of the token from
            which we should take the hidden state to feed the multiple choice classifier (usually last token of the sequence)
        `position_ids`: an optional torch.LongTensor with the same shape as input_ids
            with the position indices (selected in the range [0, config.n_positions - 1[.
        `token_type_ids`: an optional torch.LongTensor with the same shape as input_ids
            You can use it to add a third type of embedding to each input token in the sequence
            (the previous two being the word and position embeddings).
            The input, position and token_type embeddings are summed inside the Transformer before the first
            self-attention block.
        `lm_labels`: optional language modeling labels: torch.LongTensor of shape [batch_size, num_choices, sequence_length]
            with indices selected in [-1, 0, ..., total_tokens_embeddings]. All labels set to -1 are ignored (masked), the loss
            is only computed for the labels set in [0, ..., total_tokens_embeddings]
        `multiple_choice_labels`: optional multiple choice labels: torch.LongTensor of shape [batch_size]
            with indices selected in [0, ..., num_choices].

    Outputs:
        if `lm_labels` and `multiple_choice_labels` are not `None`:
            Outputs a tuple of losses with the language modeling loss and the multiple choice loss.
        else: a tuple with
            `lm_logits`: the language modeling logits as a torch.FloatTensor of size [batch_size, num_choices, sequence_length, total_tokens_embeddings]
            `multiple_choice_logits`: the multiple choice logits as a torch.FloatTensor of size [batch_size, num_choices]

    Example usage:
    ```python
    # Already been converted into BPE token ids
    input_ids = torch.LongTensor([[[31, 51, 99], [15, 5, 0]]])  # (bsz, number of choice, seq length)
    mc_token_ids = torch.LongTensor([[2], [1]]) # (bsz, number of choice)

    config = modeling_openai.OpenAIGPTConfig()

    model = modeling_openai.OpenAIGPTLMHeadModel(config)
    lm_logits, multiple_choice_logits = model(input_ids, mc_token_ids)
    ```
    """
    def __init__(self, config, num_labels):
        super(OpenAIGPTForClassification, self).__init__(config)
        self.transformer = OpenAIGPTModel(config)
        self.classifier = nn.Linear(config.n_embd, num_labels)
        self.classifier.weight.data.uniform_(-0.1, 0.1)
        self.classifier.bias.data.zero_()

    def set_num_special_tokens(self, num_special_tokens):
        """ Update input and output embeddings with new embedding matrice
            Make sure we are sharing the embeddings
        """
        self.transformer.set_num_special_tokens(num_special_tokens)

    def forward(self,
                input_ids,
                input_mask,
                labels=None,
                token_type_ids=None,
                position_ids=None):
        # get sum of mask
        hidden_states = self.transformer(input_ids, position_ids,
                                         token_type_ids)
        # calculate the position of last element
        input_mask_sel = input_mask.sum(dim=1) - 1
        input_mask_sel = input_mask_sel.unsqueeze(dim=1).unsqueeze(
            dim=1).repeat(1, 1, 768)
        # get the last hidden state
        sentence_hidden = hidden_states.gather(index=input_mask_sel, dim=1)
        sentence_hidden = sentence_hidden.squeeze(dim=1)
        # hidden states pooling
        logits = self.classifier(sentence_hidden)
        return logits
Esempio n. 6
0
        word_to_idx = pickle.load(f)

    train_val_dataset = FlowerDataset(
        img_folder=FLOWERS_DATA_ROOT + 'jpg',
        text_folder=FLOWERS_DATA_ROOT + 'train_val',
        word_to_idx=word_to_idx,
        idx_to_word=idx_to_word,
    )

    if args.use_skip_thought:
        model = BayesianUniSkip('data/skip_thoughts', word_to_idx.keys())
    elif args.use_bert:
        model = BertModel.from_pretrained('bert-base-uncased')
        model.eval()
    elif args.use_gpt:
        model = OpenAIGPTModel.from_pretrained('openai-gpt')
        model.eval()
    else:
        model = RnnEncoder(dict_size=len(word_to_idx),
                           embed_size=args.embed_size,
                           hidden_dim=args.hidden_dim,
                           drop_prob=0.5)
    generator = Generator()
    discriminator = Discriminator()

    dataloader = torch.utils.data.DataLoader(train_val_dataset,
                                             batch_size=1,
                                             shuffle=True)
    model = model.to(device)
    generator = generator.to(device)
    discriminator = discriminator.to(device)
Esempio n. 7
0
def inception_score(imgs,
                    model_file,
                    cuda=True,
                    batch_size=32,
                    resize=False,
                    splits=1):
    """Computes the inception score of the generated images imgs

    imgs -- Torch dataset of (3xHxW) numpy images normalized in the range [-1, 1]
    cuda -- whether or not to run on GPU
    batch_size -- batch size for feeding into Inception v3
    splits -- number of splits
    """
    N = len(imgs)

    assert batch_size > 0
    assert N > batch_size

    device = torch.device('cuda' if cuda else 'cpu')

    # Set up dtype
    if cuda:
        dtype = torch.cuda.FloatTensor
    else:
        if torch.cuda.is_available():
            print(
                "WARNING: You have a CUDA device, so you should probably set cuda=True"
            )
        dtype = torch.FloatTensor

    # Set up dataloader
    dataloader = torch.utils.data.DataLoader(imgs,
                                             batch_size=batch_size,
                                             drop_last=True)

    # Load generator and embeddings
    if args.use_skip_thought:
        model = BayesianUniSkip('data/skip_thoughts', imgs.word_to_idx.keys())
        for param in model.parameters():
            param.requires_grad = False
    elif args.use_bert:
        model = BertModel.from_pretrained('bert-base-uncased')
        model.eval()
    elif args.use_gpt:
        model = OpenAIGPTModel.from_pretrained('openai-gpt')
        model.eval()
    else:
        model = RnnEncoder(dict_size=len(word_to_idx),
                           embed_size=args.embed_size,
                           hidden_dim=args.rnn_hidden_dim,
                           drop_prob=0.5)

    generator = Generator().to(device)
    trainer = Trainer(dataloader, model, generator, None, None, None, None,
                      device, None)
    trainer.load_model(model_file)
    trainer.rnn_encoder.eval()
    trainer.generator.eval()

    # Load inception model
    inception_model = inception_v3(pretrained=True,
                                   transform_input=False).type(dtype)
    inception_model.eval()
    up = nn.Upsample(size=(299, 299), mode='bilinear').type(dtype)

    def get_pred(x):
        if resize:
            x = up(x)
        x = inception_model(x)
        return F.softmax(x).data.cpu().numpy()

    # Get predictions
    preds = np.zeros((N // batch_size, 1000))

    for i, batch in enumerate(dataloader, 0):
        print("Calculating Inception Score... iter: {} / {}  ".format(
            i, N // batch_size),
              end='\r')
        # batch = batch.type(dtype)
        # batchv = Variable(batch)
        imgs, caps, cap_lens, fake_caps, fake_cap_lens = trainer.prepare_data(
            batch)

        # Text embedding
        sent_emb, fake_sent_emb = trainer.embed_text(caps, cap_lens, fake_caps,
                                                     fake_cap_lens, batch_size)

        batch_size_i = caps.size()[0]
        sampled = torch.randn((batch_size_i, generator.z_size)).to(device)
        batchv = generator(sent_emb, sampled)

        preds[i * batch_size:i * batch_size + batch_size_i] = get_pred(batchv)
    print()
    # Now compute the mean kl-div
    split_scores = []

    for k in range(splits):
        part = preds[k * (N // splits):(k + 1) * (N // splits), :]
        py = np.mean(part, axis=0)
        scores = []
        for i in range(part.shape[0]):
            pyx = part[i, :]
            scores.append(entropy(pyx, py))
        split_scores.append(np.exp(np.mean(scores)))

    return np.mean(split_scores), np.std(split_scores)