def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path, base_model):
    # Initialise PyTorch model
    config = FunnelConfig.from_json_file(config_file)
    print(f"Building PyTorch model from configuration: {config}")
    model = FunnelBaseModel(config) if base_model else FunnelModel(config)

    # Load weights from tf checkpoint
    load_tf_weights_in_funnel(model, config, tf_checkpoint_path)

    # Save pytorch-model
    print(f"Save PyTorch model to {pytorch_dump_path}")
    torch.save(model.state_dict(), pytorch_dump_path)
Example #2
0
    def prepare_config_and_inputs(self):
        input_ids = ids_tensor([self.batch_size, self.seq_length],
                               self.vocab_size)

        input_mask = None
        if self.use_input_mask:
            input_mask = ids_tensor([self.batch_size, self.seq_length],
                                    vocab_size=2)

        token_type_ids = None
        if self.use_token_type_ids:
            token_type_ids = ids_tensor([self.batch_size, self.seq_length],
                                        self.type_vocab_size)

        sequence_labels = None
        token_labels = None
        choice_labels = None
        if self.use_labels:
            sequence_labels = ids_tensor([self.batch_size],
                                         self.type_sequence_label_size)
            token_labels = ids_tensor([self.batch_size, self.seq_length],
                                      self.num_labels)
            choice_labels = ids_tensor([self.batch_size], self.num_choices)
            fake_token_labels = ids_tensor([self.batch_size, self.seq_length],
                                           1)

        config = FunnelConfig(
            vocab_size=self.vocab_size,
            block_sizes=self.block_sizes,
            num_decoder_layers=self.num_decoder_layers,
            d_model=self.d_model,
            n_head=self.n_head,
            d_head=self.d_head,
            d_inner=self.d_inner,
            hidden_act=self.hidden_act,
            hidden_dropout=self.hidden_dropout,
            attention_dropout=self.attention_dropout,
            activation_dropout=self.activation_dropout,
            max_position_embeddings=self.max_position_embeddings,
            type_vocab_size=self.type_vocab_size,
            return_dict=True,
        )

        return (
            config,
            input_ids,
            token_type_ids,
            input_mask,
            sequence_labels,
            token_labels,
            choice_labels,
            fake_token_labels,
        )
Example #3
0
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file,
                                     pytorch_dump_path):
    # Initialise PyTorch model
    config = FunnelConfig.from_json_file(config_file)
    print("Building PyTorch model from configuration: {}".format(str(config)))
    model = FunnelForPreTraining(config)

    # Load weights from tf checkpoint
    load_tf_weights_in_funnel(model, config, tf_checkpoint_path)

    # Save pytorch-model
    print("Save PyTorch model to {}".format(pytorch_dump_path))
    torch.save(model.state_dict(), pytorch_dump_path)
 def get_config(self):
     return FunnelConfig(
         vocab_size=self.vocab_size,
         block_sizes=self.block_sizes,
         num_decoder_layers=self.num_decoder_layers,
         d_model=self.d_model,
         n_head=self.n_head,
         d_head=self.d_head,
         d_inner=self.d_inner,
         hidden_act=self.hidden_act,
         hidden_dropout=self.hidden_dropout,
         attention_dropout=self.attention_dropout,
         activation_dropout=self.activation_dropout,
         max_position_embeddings=self.max_position_embeddings,
         type_vocab_size=self.type_vocab_size,
     )
def define_config(name):
    if name in [
            "bert-base-multilingual-cased",
            "sangrimlee/bert-base-multilingual-cased-korquad",
            "kykim/bert-kor-base", "monologg/kobert"
    ]:
        return BertConfig.from_pretrained(name)
    elif name in [
            "monologg/koelectra-base-v3-discriminator",
            "kykim/electra-kor-base"
    ]:
        return ElectraConfig.from_pretrained(name)
    elif name in ["xlm-roberta-large"]:
        return XLMRobertaConfig.from_pretrained(name)
    elif name in ["kykim/funnel-kor-base"]:
        return FunnelConfig.from_pretrained(name)
class Funnel_T5_VAE_Config(PretrainedConfig):
    r"""
    This is the configuration class to store the configuration of :class:`~transformer_vae.T5_VAE_Model`.
    It is used to instantiate a Funnel-T5-VAE model according to the specified arguments, defining the model architecture.
    Instantiating a configuration with the defaults will yield a similar configuration to that of the T5 `funnel-t5-vae-base architecture.

    To be able to use `transformer.trainer.Trainer` we need some specific training logic & config in the model.

    Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model
    outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information.

    Arguments:
        latent_size (:obj:`int`, `optional`, defaults to 1,000):
            Number of dimensions to use for the sequences latent code.
        funnel_name (:obj:`str`, `optional`, defaults to t5-base):
            Name of the transformer model to use as encoder & decoder.
        vae_encoder_model (:obj:`str`, `optional`, defaults to None):
            Name of the model to encode T5 hidden states into latent codes.
        vae_decoder_model (:obj:`str`, `optional`, defaults to None):
            Name of the model to decode latent codes into T5 hidden states.
        set_seq_size (:obj:`int`, `optional`, defaults to 60):
            NOTE: Every input sequence must be padded to be equal to this length.
        t5_name (:obj:`str`, `optional`, defaults to t5-base):
            Name of the Transformer model to use as a decoder.
        transformer_critic_name (:obj:`str`, `optional`, defaults to None):
            Name of the Transformer model to use as an advisery on interpolations.
        *** Training Args ***
        reg_schedule_k (:obj:`float`, `optional`, defaults to 0.0025):
            Multiplied by global_step in a sigmoid, more gradually increase regulariser loss weight.
        reg_schedule_b (:obj:`float`, `optional`, defaults to 6.25):
            Added to global step in sigmoid, further delays increase in regulariser loss weight.
        use_extra_logs (:obj:`bool`, `optional`, defaults to False):
            Store extra logs during each training inference.
        gradient_checkpoint (:obj:`bool`, `optional`, defaults to False):
            Checkpoint gradients in the model.
            Currently just checkpoints after the encoder + VAE
        funnel_block_sizes (:obj:`str`, `optional`, defaults to ''):
            Size of each Funnel Encoder block, sequence is halved between each block.
            Example specification: 1_1_1
        *** End ***

        TODO: Add extra models to condition on the latent
    """
    model_type = "transformer_vae"
    is_composition = True

    def __init__(
        self,
        latent_size=1_000,
        funnel_name="funnel-transformer/intermediate",
        t5_name="t5-base",
        vae_encoder_model='',
        vae_decoder_model='',
        critic_type='',
        critic_name='',
        set_seq_size=60,
        decoder_start_token_id=0,
        dont_use_reg_loss=False,
        reg_schedule_k=0.0025,
        reg_schedule_b=6.25,
        use_extra_logs=False,
        cache_dir=None,
        n_latent_tokens=5,  # set to -1 for full sequence
        funnel_block_sizes='',
        num_decoder_layers=0,
        num_decoder_heads=0,
        attention_window_size=0,
        attention_window_overlap=0,
        gradient_checkpoint_encoder=False,
        decoder_grad_chk_pnt_rate=0,
        skip_upsample=False,
        **kwargs,
    ):
        assertIn(vae_encoder_model, VAE_ENCODER_MODELS.keys(),
                 "Unexpected VAE encoder.")
        assertIn(vae_decoder_model, VAE_DECODER_MODELS.keys(),
                 "Unexpected VAE decoder.")

        super().__init__(**kwargs)

        self.set_seq_size = set_seq_size

        # VAE
        self.vae_encoder_model = vae_encoder_model
        self.vae_decoder_model = vae_decoder_model
        if set_seq_size < n_latent_tokens:
            logger.warning(
                f'set_seq_size size is smaller than n_latent_tokens, now using n_latent_tokens={set_seq_size} from {n_latent_tokens}'
            )
            n_latent_tokens = set_seq_size
        self.latent_size = latent_size
        self.n_latent_tokens = n_latent_tokens
        self.skip_upsample = skip_upsample

        # funnel encoder model
        if 'funnel' not in kwargs:
            self.funnel = AutoConfig.from_pretrained(funnel_name,
                                                     cache_dir=cache_dir)
            if funnel_block_sizes:
                self.funnel.block_sizes = [
                    int(i) for i in funnel_block_sizes.split('_')
                ]
            self.funnel.decoder_start_token_id = decoder_start_token_id
            self.funnel.n_positions = set_seq_size
        else:
            self.funnel = FunnelConfig(**kwargs.pop('funnel'))
        pooling_division = 2**(len(self.funnel.block_sizes) - 1)
        self.encoded_seq_size = math.ceil(self.funnel.n_positions /
                                          pooling_division)
        self.gradient_checkpoint_encoder = gradient_checkpoint_encoder

        # T5 decoder model
        if 't5' not in kwargs:
            self.t5 = AutoConfig.from_pretrained(t5_name, cache_dir=cache_dir)
            if num_decoder_layers:
                self.t5.num_layers = num_decoder_layers
            if num_decoder_heads:
                self.t5.num_heads = num_decoder_heads
            self.t5.decoder_start_token_id = decoder_start_token_id
            self.t5.n_positions = self.funnel.n_positions
            assertEqual(self.t5.model_type, "t5",
                        "Need t5 model type for transformer_decoder.")
        else:
            self.t5 = T5Config(**kwargs.pop('t5'))
        assertEqual(self.funnel.d_model, self.t5.d_model,
                    "Funnel & T5 transformers have different dimensions.")
        self.decoder_grad_chk_pnt_rate = decoder_grad_chk_pnt_rate
        assert (attention_window_size < set_seq_size
                ), 'Attention window must be smallar than set sequence size.'
        self.attention_window_size = attention_window_size
        self.attention_window_overlap = attention_window_overlap
        if attention_window_size:
            assert (
                set_seq_size % attention_window_size != 0
            ), 'When doing an alternating attention pattern the sequence size cannot be divisable by the window size as no alternations will be possible.'
            self.attention_window_overlap = set_seq_size % attention_window_size

        # extra training losses
        self.use_reg_loss = not dont_use_reg_loss
        if dont_use_reg_loss:
            logger.warning(
                "Regularisation loss is turned off, you are training an Autoencoder (not a VAE)."
            )
        self.reg_schedule_k = reg_schedule_k
        self.reg_schedule_b = reg_schedule_b
        self.use_extra_logs = use_extra_logs

        # critic model
        self.critic = None
        if critic_name:
            self.critic_type = critic_type
            if 'critic' not in kwargs:
                self.critic = AutoConfig.from_pretrained(critic_name,
                                                         cache_dir=cache_dir)
            else:
                self.critic = FunnelConfig(**kwargs.pop('critic'))
            assertEqual(self.t5.d_model, self.critic.d_model,
                        "Funnel & T5 transformers have different dimensions.")

        # misc
        self.use_cache = getattr(self.funnel, "use_cache", False)
Example #7
0
    # create data objects
    dataset_gen = LineByLineTextDataset(tokenizer=bpe_tokenizer,
                                        file_path=input_path,
                                        block_size=block_size)
    dataset_gen_val = LineByLineTextDataset(tokenizer=bpe_tokenizer,
                                            file_path=input_path_val,
                                            block_size=block_size)

    data_collator = DataCollatorForLanguageModeling(
        tokenizer=bpe_tokenizer, mlm=True, mlm_probability=mlm_probability)

    # create model
    config = FunnelConfig(
        vocab_size=bpe_tokenizer.vocab_size,
        max_position_embeddings=max_len + 10,
        n_head=num_attention_heads,
        block_sizes=block_sizes,
        type_vocab_size=1,
    )
    model = FunnelForMaskedLM(config=config)

    _pretty_print(f"Number of model parameters : {model.num_parameters()}")

    model_path = os.path.join(output_path, "lm")
    training_args = TrainingArguments(
        output_dir=model_path,
        overwrite_output_dir=True,
        num_train_epochs=epochs,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=val_batch_size,
        evaluation_strategy="steps",