Beispiel #1
0
def get_features(features, datasets_dir, pca=False):
    timestamps_gtzan, feature_vectors_gtzan = Util.read_merged_features(
        datasets_dir + "/gtzan/gtzan_combined.wav", features)
    labels_gtzan = Util.read_audacity_labels(datasets_dir +
                                             "/gtzan/gtzan_combined.txt")
    X_gtzan, Y_gtzan, lbls_gtzan = Util.get_annotated_data_x_y(
        timestamps_gtzan, feature_vectors_gtzan, labels_gtzan)

    timestamps_labrosa, feature_vectors_labrosa = Util.read_merged_features(
        datasets_dir + "/labrosa/labrosa_combined.wav", features)
    labels_labrosa = Util.read_audacity_labels(datasets_dir +
                                               "/labrosa/labrosa_combined.txt")
    X_labrosa, Y_labrosa, lbls_labrosa = Util.get_annotated_data_x_y(
        timestamps_labrosa, feature_vectors_labrosa, labels_labrosa)

    timestamps_mirex, feature_vectors_mirex = Util.read_merged_features(
        datasets_dir + "/mirex/mirex_combined.wav", features)
    labels_mirex = Util.read_audacity_labels(datasets_dir +
                                             "/mirex/mirex_combined.txt")
    X_mirex, Y_mirex, lbls_mirex = Util.get_annotated_data_x_y(
        timestamps_mirex, feature_vectors_mirex, labels_mirex)

    scaler = StandardScaler()
    scaler.fit(np.concatenate((X_labrosa, X_gtzan, X_mirex)))
    with open("pickled/scaler.pickle", 'w') as f:
        pickle.dump(scaler, f)
    X_gtzan = scaler.transform(X_gtzan)
    X_labrosa = scaler.transform(X_labrosa)
    X_mirex = scaler.transform(X_mirex)

    if pca:
        pca = PCA(n_components=20)
        pca.fit(np.concatenate((X_labrosa, X_gtzan, X_mirex)))
        X_gtzan = pca.transform(X_gtzan)
        X_labrosa = pca.transform(X_labrosa)
        X_mirex = pca.transform(X_mirex)

    data = {
        "x_gtzan": X_gtzan,
        "y_gtzan": Y_gtzan,
        "labels_gtzan": labels_gtzan,
        "x_labrosa": X_labrosa,
        "y_labrosa": Y_labrosa,
        "labels_labrosa": labels_labrosa,
        "x_mirex": X_mirex,
        "y_mirex": Y_mirex,
        "labels_mirex": labels_mirex,
        "timestamps_gtzan": timestamps_gtzan,
        "timestamps_labrosa": timestamps_labrosa,
        "timestamps_mirex": timestamps_mirex
    }

    return data
Beispiel #2
0
def read_features(features, wavfile, scale=False):
    timestamps, feature_vectors = Util.read_merged_features(wavfile, features)
    if scale:
        with open("/opt/speech-music-discrimination/pickled/scaler.pickle",
                  'r') as f:
            scaler = pickle.load(f)
        feature_vectors = scaler.transform(feature_vectors)
    return timestamps, feature_vectors