Example #1
0
def convert_wav2vec2_checkpoint(checkpoint_path,
                                pytorch_dump_folder_path,
                                config_path=None,
                                dict_path=None,
                                is_finetuned=True):
    """
    Copy/paste/tweak model's weights to transformers design.
    """
    if config_path is not None:
        config = Wav2Vec2Config.from_pretrained(config_path)
    else:
        config = Wav2Vec2Config()

    if is_finetuned:
        hf_wav2vec = Wav2Vec2ForCTC(config)
    else:
        hf_wav2vec = Wav2Vec2Model(config)

    if is_finetuned:
        model, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task(
            [checkpoint_path], arg_overrides={"data": dict_path})
    else:
        model, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task(
            [checkpoint_path])

    model = model[0].eval()

    recursively_load_weights(model, hf_wav2vec, is_finetuned)

    hf_wav2vec.save_pretrained(pytorch_dump_folder_path)
    def prepare_config_and_inputs(self):
        input_values = floats_tensor([self.batch_size, self.seq_length],
                                     self.vocab_size)
        attention_mask = random_attention_mask(
            [self.batch_size, self.seq_length])

        config = Wav2Vec2Config(
            do_stable_layer_norm=self.do_stable_layer_norm,
            hidden_size=self.hidden_size,
            feat_extract_norm=self.feat_extract_norm,
            feat_extract_dropout=self.feat_extract_dropout,
            feat_extract_activation=self.feat_extract_activation,
            conv_dim=self.conv_dim,
            conv_stride=self.conv_stride,
            conv_kernel=self.conv_kernel,
            conv_bias=self.conv_bias,
            num_conv_pos_embeddings=self.num_conv_pos_embeddings,
            num_conv_pos_embedding_groups=self.num_conv_pos_embedding_groups,
            num_hidden_layers=self.num_hidden_layers,
            num_attention_heads=self.num_attention_heads,
            hidden_dropout_prob=self.hidden_dropout_prob,
            intermediate_size=self.intermediate_size,
            layer_norm_eps=self.layer_norm_eps,
            hidden_act=self.hidden_act,
            initializer_range=self.initializer_range,
            vocab_size=self.vocab_size,
        )

        return config, input_values, attention_mask
Example #3
0
    def test_processor_from_local_directory_from_repo(self):
        with tempfile.TemporaryDirectory() as tmpdirname:
            model_config = Wav2Vec2Config()
            processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")

            # save in new folder
            model_config.save_pretrained(tmpdirname)
            processor.save_pretrained(tmpdirname)

            processor = AutoProcessor.from_pretrained(tmpdirname)

        self.assertIsInstance(processor, Wav2Vec2Processor)
Example #4
0
    def test_processor_from_local_directory_from_model_config(self):
        with tempfile.TemporaryDirectory() as tmpdirname:
            model_config = Wav2Vec2Config(processor_class="Wav2Vec2Processor")
            model_config.save_pretrained(tmpdirname)
            # copy relevant files
            copyfile(SAMPLE_VOCAB, os.path.join(tmpdirname, "vocab.json"))
            # create emtpy sample processor
            with open(os.path.join(tmpdirname, FEATURE_EXTRACTOR_NAME), "w") as f:
                f.write("{}")

            processor = AutoProcessor.from_pretrained(tmpdirname)

        self.assertIsInstance(processor, Wav2Vec2Processor)
Example #5
0
def convert_wav2vec2_checkpoint(checkpoint_path, pytorch_dump_folder_path, dict_path=None):
    """
    Copy/paste/tweak model's weights to transformers design.
    """
    hf_wav2vec = Wav2Vec2ForCTC(Wav2Vec2Config())

    model, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task(
        [checkpoint_path], arg_overrides={"data": dict_path}
    )
    model = model[0].eval()

    recursively_load_weights(model, hf_wav2vec)

    hf_wav2vec.save_pretrained(pytorch_dump_folder_path)
Example #6
0
    def test_feature_extractor_from_local_directory_from_config(self):
        with tempfile.TemporaryDirectory() as tmpdirname:
            model_config = Wav2Vec2Config()

            # remove feature_extractor_type to make sure config.json alone is enough to load feature processor locally
            config_dict = AutoFeatureExtractor.from_pretrained(
                SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR).to_dict()
            config_dict.pop("feature_extractor_type")
            config = Wav2Vec2FeatureExtractor(config_dict)

            # save in new folder
            model_config.save_pretrained(tmpdirname)
            config.save_pretrained(tmpdirname)

            config = AutoFeatureExtractor.from_pretrained(tmpdirname)

        self.assertIsInstance(config, Wav2Vec2FeatureExtractor)
 def get_config(self):
     return Wav2Vec2Config(
         hidden_size=self.hidden_size,
         feat_extract_norm=self.feat_extract_norm,
         feat_extract_dropout=self.feat_extract_dropout,
         feat_extract_activation=self.feat_extract_activation,
         conv_dim=self.conv_dim,
         conv_stride=self.conv_stride,
         conv_kernel=self.conv_kernel,
         conv_bias=self.conv_bias,
         num_conv_pos_embeddings=self.num_conv_pos_embeddings,
         num_conv_pos_embedding_groups=self.num_conv_pos_embedding_groups,
         num_hidden_layers=self.num_hidden_layers,
         num_attention_heads=self.num_attention_heads,
         hidden_dropout_prob=self.hidden_dropout_prob,
         intermediate_size=self.intermediate_size,
         layer_norm_eps=self.layer_norm_eps,
         hidden_act=self.hidden_act,
         initializer_range=self.initializer_range,
         vocab_size=self.vocab_size,
     )
    def test_feature_extractor_from_local_directory_from_config(self):
        with tempfile.TemporaryDirectory() as tmpdirname:
            model_config = Wav2Vec2Config()

            # remove feature_extractor_type to make sure config.json alone is enough to load feature processor locally
            config_dict = AutoFeatureExtractor.from_pretrained(SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR).to_dict()

            config_dict.pop("feature_extractor_type")
            config = Wav2Vec2FeatureExtractor(**config_dict)

            # save in new folder
            model_config.save_pretrained(tmpdirname)
            config.save_pretrained(tmpdirname)

            config = AutoFeatureExtractor.from_pretrained(tmpdirname)

            # make sure private variable is not incorrectly saved
            dict_as_saved = json.loads(config.to_json_string())
            self.assertTrue("_processor_class" not in dict_as_saved)

        self.assertIsInstance(config, Wav2Vec2FeatureExtractor)
def convert_wav2vec2_checkpoint(checkpoint_path,
                                pytorch_dump_folder_path,
                                config_path=None,
                                dict_path=None,
                                is_finetuned=True):
    """
    Copy/paste/tweak model's weights to transformers design.
    """
    if config_path is not None:
        config = Wav2Vec2Config.from_pretrained(config_path)
    else:
        config = Wav2Vec2Config()

    if is_finetuned:
        if dict_path:
            target_dict = Dictionary.load(dict_path)

            config.bos_token_id = target_dict.bos_index
            config.eos_token_id = target_dict.eos_index
            config.pad_token_id = target_dict.pad_index
            config.vocab_size = len(target_dict.symbols)
            vocab_path = os.path.join(pytorch_dump_folder_path, "vocab.json")
            if not os.path.isdir(pytorch_dump_folder_path):
                logger.error(
                    "--pytorch_dump_folder_path ({}) should be a directory".
                    format(pytorch_dump_folder_path))
                return
            os.makedirs(pytorch_dump_folder_path, exist_ok=True)
            with open(vocab_path, "w", encoding="utf-8") as vocab_handle:
                json.dump(target_dict.indices, vocab_handle)
            tokenizer = Wav2Vec2CTCTokenizer(
                vocab_path,
                unk_token=target_dict.unk_word,
                pad_token=target_dict.pad_word,
                bos_token=target_dict.bos_word,
                eos_token=target_dict.eos_word,
                word_delimiter_token="|",
                do_lower_case=False,
            )
            return_attention_mask = True if config.feat_extract_norm == "layer" else False
            feature_extractor = Wav2Vec2FeatureExtractor(
                feature_size=1,
                sampling_rate=16000,
                padding_value=0,
                do_normalize=True,
                return_attention_mask=return_attention_mask,
            )
            processor = Wav2Vec2Processor(feature_extractor=feature_extractor,
                                          tokenizer=tokenizer)
            processor.save_pretrained(pytorch_dump_folder_path)

        hf_wav2vec = Wav2Vec2ForCTC(config)
    else:
        hf_wav2vec = Wav2Vec2Model(config)

    if is_finetuned:

        model, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task(
            [checkpoint_path], arg_overrides={"data": dict_path})
    else:
        model, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task(
            [checkpoint_path])

    model = model[0].eval()

    recursively_load_weights(model, hf_wav2vec, is_finetuned)

    hf_wav2vec.save_pretrained(pytorch_dump_folder_path)