assert np.array_equal(
        test_labels_acoustic, test_labels_linguistic
    ), "Labels for acoustic and linguistic datasets are not the same!"
    """Choosing hardware"""
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    if device == "cuda":
        print(
            "Using GPU. Setting default tensor type to torch.cuda.FloatTensor")
        torch.set_default_tensor_type("torch.cuda.FloatTensor")
    else:
        print("Using CPU. Setting default tensor type to torch.FloatTensor")
        torch.set_default_tensor_type("torch.FloatTensor")
    """Converting model to specified hardware and format"""
    acoustic_cfg_json = json.load(
        open(args.acoustic_model.replace(".torch", ".json"), "r"))
    acoustic_cfg = AcousticConfig.from_json(acoustic_cfg_json)

    acoustic_model = CNN(acoustic_cfg)
    acoustic_model.float().to(device)
    try:
        acoustic_model.load_state_dict(torch.load(args.acoustic_model))
    except:
        print(
            "Failed to load model from {} without device mapping. Trying to load with mapping to {}"
            .format(args.acoustic_model, device))
        acoustic_model.load_state_dict(
            torch.load(args.acoustic_model, map_location=device))
    linguistic_cfg_json = json.load(
        open(args.linguistic_model.replace(".torch", ".json"), "r"))
    linguistic_cfg = LinguisticConfig.from_json(linguistic_cfg_json)
            params["n_layers"] = np.random.randint(1, 4)
            params["hidden_dim"] = np.random.randint(10, 100)
            params["dropout"] = 0.5 + np.random.rand() * 0.4
            params["dropout2"] = 0.5 + np.random.rand() * 0.45
            params["reg_ratio"] = np.random.rand() * 0.0015
            params["batch_size"] = np.random.randint(26, 256)
            params["bidirectional"] = bool(np.random.randint(0, 2))
            cfg = AcousticLLDConfig(**params)
            model = RNN(cfg)

        elif args.model_type == "acoustic-spectrogram":
            test_features, test_labels, val_features, val_labels, train_features, train_labels = load_spectrogram_dataset(
            )
            params["fc_size"] = np.random.randint(10, 200)
            params["dropout"] = 0.3 + np.random.rand() * 0.6
            cfg = AcousticSpectrogramConfig(**params)
            model = CNN(cfg)

        else:
            raise Exception(
                "model_type parameter has to be one of [linguistic|acoustic-lld|acoustic-spectrogram]"
            )

        print(
            "Subsets sizes: test_features:{}, test_labels:{}, val_features:{}, val_labels:{}, train_features:{}, train_labels:{}"
            .format(test_features.shape[0], test_labels.shape[0],
                    val_features.shape[0], val_labels.shape[0],
                    train_features.shape[0], train_labels.shape[0]))
        """Converting model to specified hardware and format"""
        model.float()
        model = model.to(get_device())
예제 #3
0
    parser = argparse.ArgumentParser()
    parser.add_argument("-m", "--model_type", type=str, default="linguistic")
    args = parser.parse_args()

    if args.model_type == "linguistic":
        cfg = LinguisticConfig()
        test_features, test_labels, val_features, val_labels, train_features, train_labels = load_linguistic_dataset(
        )
        model = RNN(cfg)
    elif args.model_type == "acoustic-lld":
        cfg = AcousticLLDConfig()
        test_features, test_labels, val_features, val_labels, train_features, train_labels = load_acoustic_features_dataset(
        )
        model = RNN(cfg)
    elif args.model_type == "acoustic-spectrogram":
        cfg = AcousticSpectrogramConfig()
        test_features, test_labels, val_features, val_labels, train_features, train_labels = load_spectrogram_dataset(
        )
        model = CNN(cfg)
    else:
        raise Exception(
            "model_type parameter has to be one of [linguistic|acoustic-lld|acoustic-spectrogram]"
        )

    print(
        "Subsets sizes: test_features:{}, test_labels:{}, val_features:{}, val_labels:{}, train_features:{}, train_labels:{}"
        .format(test_features.shape[0], test_labels.shape[0],
                val_features.shape[0], val_labels.shape[0],
                train_features.shape[0], train_labels.shape[0]))
    """Running training"""
    run_training(model, cfg, test_features, test_labels, train_features,
                         dtype=np.int16)
    sd.wait()  # Wait until recording is finished
    print("Recording finished")

    write(TMP_FILENAME, SAMPLE_RATE, myrecording)  # Save as WAV file

    from deepspeech_generator import speech_to_text
    transcription = speech_to_text(join(args.deepspeech, "output_graph.pbmm"),
                                   join(args.deepspeech, "alphabet.txt"),
                                   join(args.deepspeech, "lm.binary"),
                                   join(args.deepspeech, "trie"), TMP_FILENAME)
    print(transcription)
    """Converting model to specified hardware and format"""
    acoustic_cfg_json = json.load(
        open(args.acoustic_model.replace(".torch", ".json"), "r"))
    acoustic_cfg = AcousticSpectrogramConfig.from_json(acoustic_cfg_json)

    acoustic_model = CNN(acoustic_cfg)
    acoustic_model.float().to("cpu")

    try:
        acoustic_model.load_state_dict(torch.load(args.acoustic_model))
    except:
        print(
            "Failed to load model from {} without device mapping. Trying to load with mapping to {}"
            .format(args.acoustic_model, "cpu"))
        acoustic_model.load_state_dict(
            torch.load(args.acoustic_model, map_location="cpu"))

    acoustic_model.eval()