Beispiel #1
0
    def load(cls, pretrained_model_name_or_path, language=None, **kwargs):
        """
        Load a pretrained model by supplying
        * the name of a remote model on s3 ("gpt2" ...)
        * OR a local path of a model trained via transformers ("some_dir/huggingface_model")
        * OR a local path of a model trained via FARM ("some_dir/farm_model")
        :param pretrained_model_name_or_path: The path of the saved pretrained model or its name.
        :type pretrained_model_name_or_path: str
        """

        gpt2 = cls()
        if "farm_lm_name" in kwargs:
            gpt2.name = kwargs["farm_lm_name"]
        else:
            gpt2.name = pretrained_model_name_or_path
        # We need to differentiate between loading model using FARM format and Pytorch-Transformers format
        farm_lm_config = Path(
            pretrained_model_name_or_path) / "language_model_config.json"
        if os.path.exists(farm_lm_config):
            # FARM style
            gpt2_config = GPT2Config.from_pretrained(farm_lm_config)
            farm_lm_model = Path(
                pretrained_model_name_or_path) / "language_model.bin"
            gpt2.model = GPT2Model.from_pretrained(farm_lm_model,
                                                   config=gpt2_config,
                                                   **kwargs)
            gpt2.language = gpt2.model.config.language
        else:
            # Pytorch-transformer Style
            gpt2.model = GPT2Model.from_pretrained(
                str(pretrained_model_name_or_path), **kwargs)
            gpt2.language = cls._get_or_infer_language_from_name(
                language, pretrained_model_name_or_path)
        return gpt2
Beispiel #2
0
    def __init__(
            self,
            config,
            class_labels,
            pretrained_model_path,
            dropout=0.1,
            freeze_pretrained_part=True,
            reinitialize=False,
            n_layers=6,
    ):
        super().__init__(config, class_labels)

        if reinitialize:
            logger.info('resetting model weights')
            config = GPT2Config.from_json_file(pretrained_model_path + '/config.json')
            config = config.to_dict()
            config['n_layer'] = n_layers
            config = GPT2Config.from_dict(config)
            self.gpt2 = GPT2Model(config)
        else:
            self.gpt2 = GPT2Model.from_pretrained(pretrained_model_path)

        self.dropout = torch.nn.Dropout(dropout)
        self.fc = torch.nn.Linear(self.gpt2.config.n_embd, self.output_dim)
        if freeze_pretrained_part:
            for param in self.gpt2.parameters():
                param.requires_grad = False
Beispiel #3
0
    def __init__(self, config):  # NPI added functionality
        super(GPT2WithNPI, self).__init__(config)  # NPI added functionality

        # self.npi = npi # NPI added functionality
        # self.prediction_indices = prediction_indices # NPI added functionality

        GPT2Model.__init__(self, config)  # NPI added functionality
        pass
Beispiel #4
0
def build_models(text_encoder_type):
    # build model ############################################################
    text_encoder_type = text_encoder_type.casefold()
    if text_encoder_type not in ('rnn', 'transformer'):
        raise ValueError('Unsupported text_encoder_type')

    if text_encoder_type == 'rnn':
        text_encoder = RNN_ENCODER(dataset.n_words,
                                   nhidden=cfg.TEXT.EMBEDDING_DIM)
    image_encoder = CNN_ENCODER(cfg.TEXT.EMBEDDING_DIM)

    labels = Variable(torch.LongTensor(range(batch_size)))
    start_epoch = 0
    if cfg.TRAIN.NET_E:
        if text_encoder_type == 'rnn':
            state_dict = torch.load(cfg.TRAIN.NET_E)
            text_encoder.load_state_dict(state_dict)
        elif text_encoder_type == 'transformer':
            text_encoder = GPT2Model.from_pretrained(cfg.TRAIN.NET_E)
            # output_hidden_states = True )
        print('Load ', cfg.TRAIN.NET_E)
        #
        name = cfg.TRAIN.NET_E.replace('text_encoder', 'image_encoder')
        state_dict = torch.load(name)
        image_encoder.load_state_dict(state_dict)
        print('Load ', name)

        istart = cfg.TRAIN.NET_E.rfind('_') + 8
        iend = cfg.TRAIN.NET_E.rfind('.')
        start_epoch = cfg.TRAIN.NET_E[istart:iend]
        start_epoch = int(start_epoch) + 1
    else:
        if text_encoder_type == 'rnn':
            print('Training RNN from scratch')
        elif text_encoder_type == 'transformer':
            # don't initialize the weights of these huge models from scratch...
            print('Training Transformer starting from pretrained model')
            text_encoder = GPT2Model.from_pretrained(TRANSFORMER_ENCODER)
            # output_hidden_states = True )
        print('Training CNN starting from ImageNet pretrained Inception-v3')

    print('start_epoch', start_epoch)

    if cfg.CUDA:
        text_encoder = text_encoder.cuda()
        image_encoder = image_encoder.cuda()
        labels = labels.cuda()

    return text_encoder, image_encoder, labels, start_epoch
 def __init__(self, config, num_output_labels=4):
     config.output_attentions = True
     super(GPT2ClassificationModel, self).__init__(config)
     self.transformer = GPT2Model(config)
     self.CNN_Max = nn.Sequential(
         # Defining a 2D convolution layer
         nn.Conv2d(1, 4, kernel_size=3, stride=1, padding=1),
         nn.BatchNorm2d(4),
         nn.ReLU(inplace=True),
         nn.MaxPool2d(kernel_size=2, stride=2),
         # Defining another 2D convolution layer
         nn.Conv2d(4, 4, kernel_size=3, stride=1, padding=1),
         nn.BatchNorm2d(4),
         nn.ReLU(inplace=True),
         nn.MaxPool2d(kernel_size=2, stride=2),
     )
     self.CNN_Avg = nn.Sequential(
         # Defining a 2D convolution layer
         nn.Conv2d(1, 4, kernel_size=3, stride=1, padding=1),
         nn.BatchNorm2d(4),
         nn.ReLU(inplace=True),
         nn.AvgPool2d(kernel_size=2, stride=2),
         # Defining another 2D convolution layer
         nn.Conv2d(4, 4, kernel_size=3, stride=1, padding=1),
         nn.BatchNorm2d(4),
         nn.ReLU(inplace=True),
         nn.AvgPool2d(kernel_size=2, stride=2),
     )
     self.ff_layers = nn.Sequential(nn.Linear(256, 10),
                                    nn.Linear(10, num_output_labels))
     self.final_softmax = nn.Softmax(dim=1)
     self.init_weights()
Beispiel #6
0
 def __init__(self, hidden_size: int, num_classes:int ,max_seq_len:int, gpt_model_name:str, 
              cache_dir:str):
     super(SimpleGPT2SequenceClassifier,self).__init__()
     self.gpt2model = GPT2Model.from_pretrained(
         gpt_model_name, cache_dir = cache_dir
     )
     self.fc1 = nn.Linear(hidden_size, num_classes)
 def create_and_check_gpt2_weight_initialization(self, config, *args):
     model = GPT2Model(config)
     model_std = model.config.initializer_range / math.sqrt(2 * model.config.n_layer)
     for key in model.state_dict().keys():
         if "c_proj" in key and "weight" in key:
             self.parent.assertLessEqual(abs(torch.std(model.state_dict()[key]) - model_std), 0.001)
             self.parent.assertLessEqual(abs(torch.mean(model.state_dict()[key]) - 0.0), 0.01)
Beispiel #8
0
def test_openai_gpt2():
    from transformers import GPT2Model, GPT2Tokenizer

    input_text = "Here is some text to encode"
    pt_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
    pt_model = GPT2Model.from_pretrained("gpt2", return_dict=True)
    pt_outputs = pt_model(**pt_tokenizer([input_text], return_tensors="pt"))

    task = build_task({
        "class": "lm",
        "params": {
            "data_pipeline.class": "GPT2DataPipeline",
            "max_len": 50,
            "begin_of_sentence": "eos"
        }
    })

    model_cfgs = get_hyper_parameters("gpt2_117m")
    model = task.build_model(model_cfgs)
    restore_checkpoint_if_possible_v2(model, "117M", model_name="OpenAIGPT2")
    input_ids = task._data_pipeline.process(input_text)
    tf_inputs = {
        "trg_input": tf.convert_to_tensor([input_ids], tf.int64),
        "trg_length": tf.convert_to_tensor([len(input_ids)], tf.int64)
    }
    _, gen_init = model.get_symbols_to_logits_fn(tf_inputs, is_training=False, is_inference=False)
    tf_outputs = model.get_decoder_output(gen_init["decoder_input"],
                                          cache=gen_init["decoder_internal_cache"],
                                          is_training=False)
    assert_equal_numpy(pt_outputs.last_hidden_state.detach().numpy(), tf_outputs[:, :-1].numpy(), 5e-4)
Beispiel #9
0
    def from_pretrained(self, args):

        # loading from pre-trained
        encoder_path = args.output_dir + "/encoder/"
        decoder_path = args.output_dir + "/decoder/"
        vae_path = args.output_dir + "/vae/vae.weights"
        tokenizer_path = args.output_dir + "/tokenizer/"
        logger.info("gpt2_config: " + str(self.gpt2_config))
        self.gpt2_config.vocab_size = self.gpt2_config.vocab_size + 2
        self.encoder = GPT2Model.from_pretrained(
            encoder_path,
            from_tf=bool('.ckpt' in encoder_path),
            config=self.gpt2_config)
        self.decoder = GPT2LMHeadModel.from_pretrained(
            decoder_path,
            from_tf=bool('.ckpt' in decoder_path),
            config=self.gpt2_config)
        self.tokenizer = GPT2Tokenizer.from_pretrained(
            tokenizer_path, do_lower_case=args.do_lower_case)
        self.vae.load_state_dict(torch.load(vae_path))

        # set up for evaluating
        self.encoder.eval()
        self.decoder.eval()
        self.vae.eval()

        # load training args
        training_args = torch.load(
            os.path.join(args.output_dir, 'training_args.bin'))
        logger.info("training_args: " + str(training_args))

        return
Beispiel #10
0
    def __init__(self, config, **kwargs):
        super().__init__(config)
        self.args = kwargs['args']
        self.config = config

        # core gpt2 and lm head
        self.transformer = GPT2Model(config)
        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)

        # mention detection output index
        self.mc_cl2idx = {'<N>': 0, '<M>': 1, '</M>': 2}
        self.mc_idx2cl = {v: k for k, v in self.mc_cl2idx.items()}
        self.cl_head = nn.Linear(config.n_embd,
                                 3)  # head for 3 classes in mention dection

        # attention parameters in coref2qr mechanism
        if self.args.coref_attn_share_between_layer:
            self.c_attn = Conv1D(3 * config.n_embd, config.n_embd)
        else:
            self.c_attn = nn.ModuleList([
                Conv1D(3 * config.n_embd, config.n_embd)
                for _ in range(self.config.n_layer + 1)
            ])

        # binary classification for rewriting or not
        if self.args.use_binary_cls:
            self.binary_cls1 = nn.Linear(config.n_embd, config.n_embd)
            self.binary_cls2 = nn.Linear(
                config.n_embd, 2,
                bias=False)  # output layer for rewrite or not

        self.init_weights()
Beispiel #11
0
    def initialize_model(self, args):

        # load pretrained model and tokenizer for GPT2 encoder and decoder
        encoder_path = args.gpt2_model_name_or_path
        decoder_path = args.gpt2_model_name_or_path
        tokenizer_path = args.gpt2_model_name_or_path
        self.encoder = GPT2Model.from_pretrained(
            encoder_path,
            from_tf=bool('.ckpt' in encoder_path),
            config=self.gpt2_config)
        self.decoder = GPT2LMHeadModel.from_pretrained(
            decoder_path,
            from_tf=bool('.ckpt' in decoder_path),
            config=self.gpt2_config)
        self.tokenizer = GPT2Tokenizer.from_pretrained(
            tokenizer_path, do_lower_case=args.do_lower_case)

        # add [SOS] and [PAD] to tokenizer
        self.tokenizer.add_special_tokens(
            {"additional_special_tokens": ["[PAD]", "[SOS]"]})
        self.encoder.resize_token_embeddings(len(self.tokenizer))
        self.decoder.resize_token_embeddings(len(self.tokenizer))
        logger.info("tokenizer size: " + str(self.tokenizer.__len__()))
        logger.info("tokenizer.decode [50256, 50257, 50258]: " +
                    str(self.tokenizer.decode([50256, 50257, 50258])))

        # No controled initialization for VAE
        logger.info("cautions: no init VAE")

        return
Beispiel #12
0
 def __init__(self, config):
     super().__init__(config)
     self.num_labels = config.num_labels
     self.transformers = GPT2Model(config)
     self.dropout = nn.Dropout(0.1)
     self.classifier = nn.Linear(config.hidden_size, config.num_labels)
     self.init_weights()
Beispiel #13
0
    def create_and_check_gpt2_model_past_large_inputs(
        self, config, input_ids, input_mask, head_mask, token_type_ids, *args
    ):
        model = GPT2Model(config=config)
        model.to(torch_device)
        model.eval()

        # first forward pass
        outputs = model(input_ids, token_type_ids=token_type_ids, use_cache=True)

        output, past = outputs.to_tuple()

        # create hypothetical next token and extent to next_input_ids
        next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size)
        next_token_types = ids_tensor([self.batch_size, 3], self.type_vocab_size)

        # append to next input_ids and token_type_ids
        next_input_ids = torch.cat([input_ids, next_tokens], dim=-1)
        next_token_type_ids = torch.cat([token_type_ids, next_token_types], dim=-1)

        output_from_no_past = model(next_input_ids, token_type_ids=next_token_type_ids)["last_hidden_state"]
        output_from_past = model(next_tokens, token_type_ids=next_token_types, past=past)["last_hidden_state"]
        self.parent.assertTrue(output_from_past.shape[1] == next_tokens.shape[1])

        # select random slice
        random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
        output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx].detach()
        output_from_past_slice = output_from_past[:, :, random_slice_idx].detach()

        # test that outputs are equal for slice
        self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3))
Beispiel #14
0
def get_attentions():
    model_name = request.args.get('model')
    source = request.args.get('source')
    target = request.args.get('target')

    if model_name == 'XLM':
        model_version = 'xlm-mlm-ende-1024'
        model = XLMModel.from_pretrained(model_version, output_attentions=True)
        tokenizer = XLMTokenizer.from_pretrained(model_version)
    elif model_name == 'GPT-2':
        model_version = 'gpt2'
        model = GPT2Model.from_pretrained(model_version, output_attentions=True)
        tokenizer = GPT2Tokenizer.from_pretrained(model_version)
    else:
        # BERT
        model_version = 'bert-base-uncased'
        model = BertModel.from_pretrained(model_version, output_attentions=True)
        tokenizer = BertTokenizer.from_pretrained(model_version, do_lower_case=True)

    inputs = tokenizer.encode_plus(source, target, return_tensors='pt', add_special_tokens=True)
    token_type_ids = inputs['token_type_ids']
    input_ids = inputs['input_ids']
    attention = model(input_ids, token_type_ids=token_type_ids)[-1]
    input_id_list = input_ids[0].tolist()
    tokens = tokenizer.convert_ids_to_tokens(input_id_list)
    return {'attention': format_attention(attention)[0].tolist(), 'source': tokens, 'target': tokens}
Beispiel #15
0
    def __init__(self, config):
        super(HFGpt, self).__init__(config)
        args = self.args

        self.hidden_dim = args["hidden_dim"]
        self.num_layers = 1  # Needed for initalize_h()
        self.batch_size = config["processor"]["params"]["batch_size"]

        self.encoders = config["processor"]["params"]["label_encoder"]
        self.num_classes = config["processor"]["params"]["num_classes"]
        self.num_outputs = len(self.num_classes)

        self.teacher_enforced = args["teacher_enforced"]
        self.in_seq_len = args["inp_seq_len"]
        self.out_seq_len = args["out_seq_len"]
        self.vocab_size = args["vocab_size"]
        self.model_name_or_path = args["model_name_or_path"]
        self.initializer_range = args["initializer_range"]
        self.logger.debug(self.args)

        # Shared for all input
        self.encoder_decoder = GPT2Model.from_pretrained(
            self.model_name_or_path)

        # For each output
        self.out_decoder = torch.nn.ModuleList()

        for i in range(self.num_outputs):
            clss = torch.nn.Linear(self.hidden_dim, self.num_classes[i])
            # Common init way in most sota models
            clss.weight.data.normal_(mean=0.0, std=self.initializer_range)
            self.out_decoder.append(clss)

        # Print statistics
        self.initialize()
Beispiel #16
0
 def __init__(self, cfg, clf_token, task_head_type, vocab=40990, n_ctx=512):
     super(DoubleHeadModel, self).__init__()
     #self.transformer = TransformerModel(cfg, vocab=vocab, n_ctx=n_ctx)
     self.transformer = GPT2Model.from_pretrained('gpt2')
     self.lm_head = LMHead(self.transformer, cfg)
     if isinstance(task_head_type, str):
         if task_head_type == 'multiple_choice':
             self.task_head = MultipleChoiceHead(clf_token, cfg)
         elif task_head_type == 'similarity':
             self.task_head = SimilarityHead(clf_token, cfg)
         elif task_head_type == 'inference':
             # the three classes correspond to entailment, contradiction and neutral.
             self.task_head = ClfHead(clf_token, cfg, 3)
         else:
             raise ValueError(
                 "task_head_type is expected to be 'multiple_choice' "
                 "'similarity', 'inference' or ('classification', n_class) "
                 "got {task_head_type}.")
     elif isinstance(task_head_type, collections.abc.Sequence) and len(task_head_type) == 2 and \
             task_head_type[0] == 'classification':
         n_class = task_head_type[1]
         self.task_head = ClfHead(clf_token, cfg, n_class)
     else:
         raise ValueError(
             "task_head_type is expected to be 'multiple_choice' "
             "'similarity', 'inference' or ('classification', n_class) "
             "got {task_head_type}.")
Beispiel #17
0
    def __init__(self, freeze_bert, tokenizer, device, bidirectional):
        super(GPT2LSTMLogRegCRF, self).__init__()
        #Instantiating BERT model object 
        self.gpt2_layer = GPT2Model.from_pretrained('gpt2', output_hidden_states=True, output_attentions=False)
        
        #Freeze bert layers: if True, the freeze BERT weights
        if freeze_bert:
            for p in self.gpt2_layer.parameters():
                p.requires_grad = False

        self.tokenizer = tokenizer
        self.device = device
        self.bidirectional = bidirectional

        self.dropout = nn.Dropout(0.5)

        # lstm layer
        self.lstm_layer = nn.LSTM(input_size=768, hidden_size = 512, num_layers = 1, bidirectional=bidirectional, batch_first=True)

        # log reg
        if bidirectional == True:
            self.hidden2tag = nn.Linear(1024, clf_P_num_labels)
            self.hidden2tag_fine = nn.Linear(1024, clf_P_fine_num_labels)
        else:
            self.hidden2tag = nn.Linear(512, clf_P_num_labels)
            self.hidden2tag_fine = nn.Linear(512, clf_P_fine_num_labels)

        # crf (coarse)
        self.crf_layer = CRF(clf_P_num_labels, batch_first=True)
        # crf (fine)
        self.crf_layer_fine = CRF(clf_P_fine_num_labels, batch_first=True)
def main():
    options = parse_args()
    input_path = Path(options.input)
    if input_path.is_dir():
        input_path = input_path / "model.pt"
    checkpoint = torch.load(input_path, map_location="cpu")
    converted_state = {
        rename_key(key): reshape_weight(key, value)
        for key, value in checkpoint["state_dict"].items()
    }
    gpt2 = GPT2Model.from_pretrained("gpt2-medium")
    # The vocab is smaller than the actual gpt2 one, therefore it is padded with zeros
    # to match it. The zeros will be unused.
    gpt2_vocab_size = gpt2.wte.weight.size(0)
    vocab_size = converted_state["wte.weight"].size(0)
    pad_size = gpt2_vocab_size - vocab_size
    converted_state["wte.weight"] = F.pad(
        converted_state["wte.weight"], [0, 0, 0, pad_size], mode="constant", value=0.0
    )

    # There are some weights that are not in the pre-trained model, which will be
    # trained in the down stream task. As long as no key from the pre-trained model did
    # not match one of the actual keys, it should be fine.
    incompatible_keys = gpt2.load_state_dict(converted_state, strict=False)
    assert (
        len(incompatible_keys.unexpected_keys) == 0
    ), "Unexpected keys in the model: {}".format(incompatible_keys.unexpected_keys)

    gpt2.save_pretrained(options.output)
Beispiel #19
0
    def __init__(self, config):
        print("************ THIS MODEL COMES FROM CS224N PROJECT ************")
        super().__init__(config)
        self.transformer = GPT2Model(config)
        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)

        self.init_weights()
Beispiel #20
0
    def __init__(self, config: Munch):
        r""" Init a new GPT2 synapse module.

            Args:
                config (:obj:`munch.Munch`, `required`): 
                    munched config class.
        """
        super(GPT2LMSynapse, self).__init__(config=config)
        if config == None:
            config = GPT2LMSynapse.build_config()

        # Build hugging face config.
        huggingface_config = GPT2Config(
            vocab_size=bittensor.__vocab_size__,
            n_embd=bittensor.__network_dim__,
            n_layer=config.synapse.n_layer,
            n_head=config.synapse.n_head,
            n_inner=config.synapse.n_inner,
            activation_function=config.synapse.activation_function,
            resid_pdrop=config.synapse.resid_pdrop,
            embd_pdrop=config.synapse.embd_pdrop,
            attn_pdrop=config.synapse.attn_pdrop,
            layer_norm_epsilon=config.synapse.layer_norm_epsilon,
            initializer_range=config.synapse.initializer_range,
            summary_type=config.synapse.summary_type,
            summary_use_proj=config.synapse.summary_use_proj,
            summary_activation=config.synapse.summary_activation,
            summary_proj_to_labels=config.synapse.summary_proj_to_labels,
            summary_first_dropout=config.synapse.summary_first_dropout,
        )

        # encoder_layer: encodes tokenized sequences to network dim.
        # [batch_size, sequence_len] -> [batch_size, sequence_len, bittensor.__network_dim__]
        self.transformer = GPT2Model(huggingface_config)

        # pooler_layer: pools the hidden units for use by the pkm dendrite rpc query.
        # [batch_size, bittensor.__network_dim__, sequence_len] -> [batch_size, bittensor.__network_dim__]
        self.pooler = GPT2Pooler(huggingface_config)

        # router: (PKM layer) queries network using pooled embeddings as context.
        # [batch_size, bittensor.__network_dim__] -> topk * [batch_size, bittensor.__network_dim__]
        self.router = PKMRouter(config, query_dim=bittensor.__network_dim__)

        # hidden_layer: transforms context and encoding to network_dim hidden units.
        # [batch_size, sequence_dim, 2 * bittensor.__network_dim__] -> [batch_size, sequence_len, bittensor.__network_dim__]
        self.hidden_layer = nn.Linear(bittensor.__network_dim__,
                                      bittensor.__network_dim__)

        # target_layer: maps from hidden layer to vocab dimension for each token. Used by MLM loss.
        # [batch_size, sequence_len, bittensor.__network_dim__] -> [batch_size, sequence_len, bittensor.__vocab_size__]
        self.target_layer = nn.Linear(bittensor.__network_dim__,
                                      bittensor.__vocab_size__,
                                      bias=False)

        # Loss function: MLM cross-entropy loss.
        # predicted: [batch_size, sequence_len, 1], targets: [batch_size, sequence_len, 1] -> [1]
        self.loss_fct = nn.CrossEntropyLoss()

        self.to(self.device)
Beispiel #21
0
    def create_and_check_gpt2_model_attention_mask_past(
            self, config, input_ids, input_mask, head_mask, token_type_ids,
            *args):
        model = GPT2Model(config=config)
        model.to(torch_device)
        model.eval()

        # create attention mask
        attn_mask = torch.ones(input_ids.shape,
                               dtype=torch.long,
                               device=torch_device)
        half_seq_length = self.seq_length // 2
        attn_mask[:, half_seq_length:] = 0

        # first forward pass
        output, past = model(input_ids, attention_mask=attn_mask).to_tuple()

        # create hypothetical next token and extent to next_input_ids
        next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size)

        # change a random masked slice from input_ids
        random_seq_idx_to_change = ids_tensor(
            (1, ), half_seq_length).item() + 1
        random_other_next_tokens = ids_tensor((self.batch_size, 1),
                                              config.vocab_size).squeeze(-1)
        input_ids[:, -random_seq_idx_to_change] = random_other_next_tokens

        # append to next input_ids and attn_mask
        next_input_ids = torch.cat([input_ids, next_tokens], dim=-1)
        attn_mask = torch.cat(
            [
                attn_mask,
                torch.ones((attn_mask.shape[0], 1),
                           dtype=torch.long,
                           device=torch_device)
            ],
            dim=1,
        )

        # get two different outputs
        output_from_no_past = model(
            next_input_ids, attention_mask=attn_mask)["last_hidden_state"]
        output_from_past = model(next_tokens,
                                 past_key_values=past,
                                 attention_mask=attn_mask)["last_hidden_state"]

        # select random slice
        random_slice_idx = ids_tensor((1, ), output_from_past.shape[-1]).item()
        output_from_no_past_slice = output_from_no_past[:, -1,
                                                        random_slice_idx].detach(
                                                        )
        output_from_past_slice = output_from_past[:, 0,
                                                  random_slice_idx].detach()

        # test that outputs are equal for slice
        self.parent.assertTrue(
            torch.allclose(output_from_past_slice,
                           output_from_no_past_slice,
                           atol=1e-3))
Beispiel #22
0
    def __init__(self, config):
        super().__init__(config)
        config.num_labels = 1
        self.transformer = GPT2Model(config)
        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
        self.v_head = ValueHead(config)

        self.init_weights()
 def from_pretrained(model_id_or_path: str,
                     device: Optional[torch.device] = None,
                     backend: Optional[str] = None):
     torch_model = TorchGPT2Model.from_pretrained(model_id_or_path)
     model = GPT2Model.from_torch(torch_model, device, backend)
     model.config = torch_model.config
     model._torch_model = torch_model  # prevent destroy torch model.
     return model
 def __init__(self, config):
     super().__init__(config)
     # config.num_labels = 1
     config.num_labels = le.classes_.shape[0]
     self.transformer = GPT2Model(config)
     self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
     self.multiple_choice_head = SequenceSummary(config)
     self.init_weights()
    def __init__(self, config):
        super().__init__(config)
        config.num_labels = 1
        self.transformer = GPT2Model(config)
        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
        self.cls_head = SequenceSummary(config)

        self.init_weights()
Beispiel #26
0
def convert_gpt2_checkpoint_to_pytorch(gpt2_checkpoint_path, full,
                                       gpt2_config_file,
                                       pytorch_dump_folder_path):
    #putting requirements here so users can see usage info before it errors out on missing modules
    from io import open
    from shutil import copyfile
    import logging
    logging.basicConfig(level=logging.INFO)
    from pathlib import Path
    import torch
    #WEIGHTS_NAME = "pytorch_model.bin"
    #CONFIG_NAME = "config.json"
    from transformers import (
        CONFIG_NAME,
        WEIGHTS_NAME,
        GPT2Config,
        GPT2Model,
        load_tf_weights_in_gpt2,
    )
    gpt2_checkpoint_path = Path(gpt2_checkpoint_path)
    print(gpt2_checkpoint_path.name)

    if pytorch_dump_folder_path == '':
        prefix = '32BIT-' if full else '16BIT-'
        pytorch_dump_folder_path = 'pytorch-' + prefix + gpt2_checkpoint_path.name
    pytorch_dump_folder_path = Path(pytorch_dump_folder_path)

    pytorch_dump_folder_path.mkdir(exist_ok=True)

    # Construct model
    if gpt2_config_file == "":
        #This doesn't seem to work. We will use the hparams.json file that seems to be included in
        #config = GPT2Config()
        gpt2_config_file = gpt2_checkpoint_path / 'hparams.json'

    config = GPT2Config.from_json_file(gpt2_config_file)
    model = GPT2Model(config)

    # Load weights from numpy
    load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path)
    if not full:
        model.half()

    # Save pytorch-model
    pytorch_weights_dump_path = pytorch_dump_folder_path / WEIGHTS_NAME
    pytorch_config_dump_path = pytorch_dump_folder_path / CONFIG_NAME
    print("Save PyTorch model to {}".format(str(pytorch_weights_dump_path)))

    torch.save(model.state_dict(), pytorch_weights_dump_path)

    print("Save configuration file to: " + str(pytorch_config_dump_path))
    with pytorch_config_dump_path.open("w", encoding="utf-8") as f:
        f.write(config.to_json_string())

    copyfile(gpt2_checkpoint_path / 'vocab.bpe',
             pytorch_dump_folder_path / 'merges.txt')
    copyfile(gpt2_checkpoint_path / 'encoder.json',
             pytorch_dump_folder_path / 'vocab.json')
Beispiel #27
0
 def __init__(self):
     super(GPT24QUAC, self).__init__()
     '''
     Load the pre-trained GPT2 Language Model Head Model
     '''
     self.gpt2 = GPT2Model.from_pretrained("gpt2")
     self.config = self.gpt2.config
     self.head = nn.Linear(self.config.n_embd, 2, bias=True)
     self.loss_func = nn.CrossEntropyLoss()
Beispiel #28
0
    def __init__(self, hidden_size: int, num_classes: int):
        super(GPT2ForTokenClassification, self).__init__()

        self.gpt2model = GPT2Model.from_pretrained('gpt2')

        # GPT2ClassificationHead()
        self.num_labels = num_classes

        self.fc1 = nn.Linear(hidden_size, num_classes)
Beispiel #29
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--hidden_layer_num', type=int,
                        help="Number 0..48 of the layer to get hidden states from")
    parser.add_argument('--batch_size', type=int, default=32)

    args = parser.parse_args()

    tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
    config = GPT2Config.from_pretrained('gpt2-medium',
                                        output_hidden_states=True)
    gpt2 = GPT2Model.from_pretrained('gpt2-medium', config=config).cuda()
    logging.getLogger("transformers.tokenization_utils").setLevel(
        logging.ERROR)

    for subsample in ["train", "test"]:
        if not os.path.isdir(subsample):
            os.mkdir(subsample)

        df = pd.read_csv('{}.csv'.format(subsample))
        if os.path.isfile(f'{subsample}_tokens_gpt2.pkl'):
            print("Loading token ids...", file=sys.stderr)
            tokens = joblib.load(f'{subsample}_gpt2.pkl')
        else:
            print("Transforming texts to token ids...", file=sys.stderr)
            tokens = [tokenizer.encode(x) for x in tqdm(df.texts)]
            joblib.dump(tokens, f'{subsample}_gpt2.pkl')
        dataset = DiscourseDataset(tokens, pad_token_id=0, max_len=config.n_positions)
        dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size)
        gpt2.eval()
        mean_results, max_results = list(),  list()
        with torch.no_grad():
            for num, (token_ids, attention_ids) in enumerate(tqdm(dataloader), 1):
                _, _, hidden_states = gpt2(token_ids,
                                           attention_mask=attention_ids)
                hidden_states_cpu = [x.cpu().numpy() for x in hidden_states]
                del hidden_states
                gc.collect()

                output = hidden_states_cpu[args.hidden_layer_num]
                del hidden_states_cpu

                sentence_lens = attention_ids.sum(1).cpu().numpy()

                output_zero_padding = output.transpose([2, 0, 1]) * attention_ids.cpu().numpy()
                output_zero_padding = output_zero_padding.transpose([1, 2, 0])

                mean_result = (output_zero_padding.sum(1).T / sentence_lens).T
                max_result = np.array([matrix[:length].max(0) for matrix, length in zip(output_zero_padding, sentence_lens)])

                mean_results.append(mean_result)
                max_results.append(max_result)

                torch.cuda.empty_cache()

        np.save(f'{subsample}/gpt2_mean_embeddings_layer_{args.hidden_layer_num}', np.vstack(mean_results))
        np.save(f'{subsample}/gpt2_max_embeddings_layer_{args.hidden_layer_num}', np.vstack(max_results))
    def __init__(self, config):
        super().__init__(config)
        config.num_labels = 1
        self.transformer = GPT2Model(config)
        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
        self.debias_head = nn.functional.linear
        self.multiple_choice_head = SequenceSummary(config)

        self.init_weights()