Esempio n. 1
0
def convert_hubert_checkpoint(pytorch_dump_folder_path, config_path=None):
    """
    Copy/paste/tweak model's weights to transformers design.
    """
    model = distilhubert().model.model

    if config_path is not None:
        config = HubertConfig.from_pretrained(config_path)
    else:
        config = convert_config(model)
    model = model.eval()

    feature_extractor = Wav2Vec2FeatureExtractor(
        feature_size=1,
        sampling_rate=16000,
        padding_value=0,
        do_normalize=False,
        return_attention_mask=False,
    )
    hf_model = HubertModel(config)

    recursively_load_weights(model, hf_model)

    feature_extractor.save_pretrained(pytorch_dump_folder_path)
    hf_model.save_pretrained(pytorch_dump_folder_path)
Esempio n. 2
0
def convert_s3prl_checkpoint(base_model_name, config_path, checkpoint_path,
                             model_dump_path):
    """
    Copy/paste/tweak model's weights to transformers design.
    """
    checkpoint = torch.load(checkpoint_path, map_location="cpu")
    if checkpoint["Config"]["downstream_expert"]["modelrc"][
            "select"] not in SUPPORTED_MODELS:
        raise NotImplementedError(
            f"The supported s3prl models are {SUPPORTED_MODELS}")

    downstream_dict = checkpoint["Downstream"]

    hf_congfig = HubertConfig.from_pretrained(config_path)
    hf_model = HubertForSequenceClassification.from_pretrained(
        base_model_name, config=hf_congfig)
    hf_feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(
        base_model_name, return_attention_mask=True, do_normalize=False)

    if hf_congfig.use_weighted_layer_sum:
        hf_model.layer_weights.data = checkpoint["Featurizer"]["weights"]

    hf_model.projector.weight.data = downstream_dict["projector.weight"]
    hf_model.projector.bias.data = downstream_dict["projector.bias"]
    hf_model.classifier.weight.data = downstream_dict[
        "model.post_net.linear.weight"]
    hf_model.classifier.bias.data = downstream_dict[
        "model.post_net.linear.bias"]

    hf_feature_extractor.save_pretrained(model_dump_path)
    hf_model.save_pretrained(model_dump_path)
def convert_hubert_checkpoint(checkpoint_path,
                              pytorch_dump_folder_path,
                              config_path=None,
                              dict_path=None,
                              is_finetuned=True):
    """
    Copy/paste/tweak model's weights to transformers design.
    """
    if config_path is not None:
        config = HubertConfig.from_pretrained(config_path)
    else:
        config = HubertConfig()

    if is_finetuned:
        if dict_path:
            target_dict = Dictionary.load(dict_path)

            # important change bos & pad token id since CTC symbol is <pad> and
            # not <s> as in fairseq
            config.bos_token_id = target_dict.pad_index
            config.pad_token_id = target_dict.bos_index
            config.eos_token_id = target_dict.eos_index
            config.vocab_size = len(target_dict.symbols)
            vocab_path = os.path.join(pytorch_dump_folder_path, "vocab.json")
            if not os.path.isdir(pytorch_dump_folder_path):
                logger.error(
                    "--pytorch_dump_folder_path ({}) should be a directory".
                    format(pytorch_dump_folder_path))
                return
            os.makedirs(pytorch_dump_folder_path, exist_ok=True)
            with open(vocab_path, "w", encoding="utf-8") as vocab_handle:
                json.dump(target_dict.indices, vocab_handle)
            tokenizer = Wav2Vec2CTCTokenizer(
                vocab_path,
                unk_token=target_dict.unk_word,
                pad_token=target_dict.pad_word,
                bos_token=target_dict.bos_word,
                eos_token=target_dict.eos_word,
                word_delimiter_token="|",
                do_lower_case=False,
            )
            return_attention_mask = True if config.feat_extract_norm == "layer" else False
            feature_extractor = Wav2Vec2FeatureExtractor(
                feature_size=1,
                sampling_rate=16000,
                padding_value=0,
                do_normalize=True,
                return_attention_mask=return_attention_mask,
            )
            processor = Wav2Vec2Processor(feature_extractor=feature_extractor,
                                          tokenizer=tokenizer)
            processor.save_pretrained(pytorch_dump_folder_path)

        hf_wav2vec = HubertForCTC(config)
    else:
        hf_wav2vec = HubertModel(config)

    if is_finetuned:
        model, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task(
            [checkpoint_path],
            arg_overrides={"data": "/".join(dict_path.split("/")[:-1])})
    else:
        model, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task(
            [checkpoint_path])

    model = model[0].eval()

    recursively_load_weights(model, hf_wav2vec, is_finetuned)

    hf_wav2vec.save_pretrained(pytorch_dump_folder_path)