Exemple #1
0
def read_to_melspecgram():
    """
    Reads the raw waveforms and converts them into log-mel spectrograms which
    are stored. This is an alternative to read_data() and load_data() to prevent
    using too much ram. Trades RAM for disk space.
    """
    id_counter = 0

    wav_folder = os.path.join(dbc.CRE_DB_PATH, "AudioWAV")
    labels_path = os.path.join(dbc.CRE_DB_PATH, "tabulatedVotes.csv")
    label_map = get_label_map(labels_path)

    for sample_filename in os.listdir(wav_folder):
        # Read the sample
        sample_path = os.path.join(wav_folder, sample_filename)
        wav = load_wav(sample_path)

        # Process the sample into a log-mel spectrogram
        melspecgram = process_wav(wav, noisy=True)

        # Display the spectrogram
        display_melspecgram(melspecgram)

        # Read the label
        sample_name = os.path.splitext(sample_filename)[0]
        label = label_map[sample_name]
        label = repr_label(label)

        # Save the log-mel spectrogram to use later
        mel_spec_filename = dbc.CRE_MEL_SPEC_FN.format(id=id_counter,
                                                       emo_label=label)
        mel_spec_path = os.path.join(dbc.PROCESS_DB_PATH, mel_spec_filename)
        np.save(mel_spec_path, melspecgram)
        id_counter += 1
Exemple #2
0
def read_data():
    """
    Reads the RAVDESS database into np.array.

    Sample output:
        (array([ 3.0517578e-05,  3.0517578e-05,  3.0517578e-05, ...,
        0.0000000e+00, -3.0517578e-05,  0.0000000e+00], dtype=float32), 0)

    :return: Tuple of (samples, labels) where the samples are a np.array and the
    labels are an array of integers.
    """
    samples = []
    labels = []

    for actor in range(1, NUM_ACTORS + 1):
        actor_foldername = "Actor_{:02d}".format(actor)
        actor_path = os.path.join(dbc.RAV_DB_PATH, actor_foldername)
        print("Processing actor:", actor_foldername)

        for sample_filename in os.listdir(actor_path):
            # Read the sample and remove the first and last second
            sample_path = os.path.join(actor_path, sample_filename)
            wav = load_wav(sample_path)
            samples.append(remove_first_last_sec(wav, RAV_SR))

            # Read the label
            label = get_label(
                sample_filename, "-", RAV_EMO_INDEX, RAV_EMOTION_MAP)
            labels.append(label)

    return np.array(samples), np.array(labels)
Exemple #3
0
def read_data():
    """
    Reads the CREMA-D database into np.array.

    Sample output:
        (array([-0.00228882, -0.00204468, -0.00180054, ...,  0.        ,
        0.        ,  0.        ], dtype=float32), 2)

    :return: Tuple of (samples, labels) where the samples are a np.array and the
    labels are an array of integers.
    """
    samples = []
    labels = []
    wav_folder = os.path.join(dbc.CRE_DB_PATH, "AudioWAV")
    labels_path = os.path.join(dbc.CRE_DB_PATH, "tabulatedVotes.csv")
    label_map = get_label_map(labels_path)

    for sample_filename in os.listdir(wav_folder):
        print("Processing file:", sample_filename)

        # Read the sample
        sample_path = os.path.join(wav_folder, sample_filename)
        samples.append(load_wav(sample_path))

        # Read the label
        sample_name = os.path.splitext(sample_filename)[0]
        label = label_map[sample_name]
        labels.append(label)

    return np.array(samples), np.array(labels)
Exemple #4
0
def read_to_melspecgram():
    """
    Reads the raw waveforms and converts them into log-mel spectrograms which
    are stored. This is an alternative to read_data() and load_data() to prevent
    using too much ram. Trades RAM for disk space.
    """
    id_counter = 0

    for actor in range(1, NUM_ACTORS + 1):
        actor_foldername = "Actor_{:02d}".format(actor)
        actor_path = os.path.join(dbc.RAV_DB_PATH, actor_foldername)
        print("Processing actor:", actor_foldername)

        for sample_filename in os.listdir(actor_path):
            # Read the sample and remove the first and last second
            sample_path = os.path.join(actor_path, sample_filename)
            wav = remove_first_last_sec(load_wav(sample_path), RAV_SR)

            # Process the sample into a log-mel spectrogram
            melspecgram = process_wav(wav)

            # Display the spectrogram
            display_melspecgram(melspecgram)

            # Read the label
            label = get_label(
                sample_filename, "-", RAV_EMO_INDEX, RAV_EMOTION_MAP)
            label = repr_label(label)

            # Save the log-mel spectrogram to use later
            mel_spec_filename = dbc.RAV_MEL_SPEC_FN.format(
                id=id_counter, emo_label=label)
            mel_spec_path = os.path.join(dbc.PROCESS_DB_PATH, mel_spec_filename)
            np.save(mel_spec_path, melspecgram)
            id_counter += 1
Exemple #5
0
def read_data():
    """
    Reads the TESS database into tensors.

    Sample output:
        (array([ 3.0517578e-05,  3.0517578e-05,  3.0517578e-05, ...,
        0.0000000e+00, -3.0517578e-05,  0.0000000e+00], dtype=float32), 0)

    :return: Tuple of (samples, labels) where the samples are a tensor of
             varying shape due to varying audio lengths, and the labels are an
             array of integers.
    """
    samples = []
    labels = []
    wav_folder = os.path.join(dbc.TES_DB_PATH, "data")

    for sample_filename in os.listdir(wav_folder):
        print("Processing file:", sample_filename)

        # Read the label and if it's empty, then drop the sample
        label = get_label(sample_filename, "_", TES_EMO_INDEX, TES_EMOTION_MAP)
        if not label:
            print("Not using sample:", sample_filename)
            continue

        # Read the sample
        sample_path = os.path.join(wav_folder, sample_filename)
        samples.append(load_wav(sample_path))

        labels.append(label)

    return np.array(samples), np.array(labels)
Exemple #6
0
def read_to_melspecgram():
    """
    Reads the raw waveforms and converts them into log-mel spectrograms which
    are stored. This is an alternative to read_data() and load_data() to prevent
    using too much ram. Trades RAM for disk space.
    """
    id_counter = 0

    wav_folder = os.path.join(dbc.TES_DB_PATH, "data")

    for sample_filename in os.listdir(wav_folder):
        print("Processing file:", sample_filename)
        sample_path = os.path.join(wav_folder, sample_filename)

        # Read the sample
        wav = load_wav(sample_path)

        # No outlier check because all samples are within 3 stds
        # Process the sample into a log-mel spectrogram
        melspecgram = process_wav(wav)

        # Display the spectrogram
        display_melspecgram(melspecgram)

        # Read the label
        sample_filename = os.path.splitext(sample_filename)[0]
        label = get_label(sample_filename, "_", TES_EMO_INDEX, TES_EMOTION_MAP)
        label = repr_label(label)

        # Save the log-mel spectrogram to use later
        mel_spec_filename = dbc.TES_MEL_SPEC_FN.format(
            id=id_counter, emo_label=label)
        mel_spec_path = os.path.join(dbc.PROCESS_DB_PATH, mel_spec_filename)
        np.save(mel_spec_path, melspecgram)
        id_counter += 1
Exemple #7
0
def read_to_melspecgram():
    """
    Reads the raw waveforms and converts them into log-mel spectrograms which
    are stored. This is an alternative to read_data() and load_data() to prevent
    using too much ram. Trades RAM for disk space.
    """
    id_counter = 0
    data_path = os.path.join(dbc.IEM_DB_PATH, "data")
    labels_path = os.path.join(dbc.IEM_DB_PATH, "labels")

    for sess_num in range(1, NUM_SESS + 1):
        sess_foldername = "S{}".format(sess_num)
        print("Processing session:", sess_foldername)

        sess_data_path = os.path.join(data_path, sess_foldername)
        sess_labels_path = os.path.join(labels_path, sess_foldername)

        for perform in os.listdir(sess_data_path):
            perform_data_path = os.path.join(sess_data_path, perform)
            perform_labels_path = os.path.join(
                sess_labels_path, "{filename}.txt".format(filename=perform))
            print("Processing performance:", perform)

            perform_label_map = get_label_map(perform_labels_path)

            for sample_filename in os.listdir(perform_data_path):
                # Read the sample
                sample_path = os.path.join(perform_data_path, sample_filename)
                wav = load_wav(sample_path)

                # Process the sample into a log-mel spectrogram
                melspecgram = process_wav(wav, noisy=True)

                # Display the spectrogram
                display_melspecgram(melspecgram)

                # Get the label
                sample_filename = os.path.splitext(sample_filename)[0]
                label = repr_label(perform_label_map[sample_filename])

                # Save the log-mel spectrogram to use later
                mel_spec_filename = dbc.IEM_MEL_SPEC_FN.format(id=id_counter,
                                                               emo_label=label)
                mel_spec_path = os.path.join(dbc.PROCESS_DB_PATH,
                                             mel_spec_filename)
                np.save(mel_spec_path, melspecgram)
                id_counter += 1
Exemple #8
0
def read_data():
    """
    Reads the IEMOCAP database into np.array.

    Sample output:
        (array([ 3.0517578e-05,  3.0517578e-05,  3.0517578e-05, ...,
        0.0000000e+00, -3.0517578e-05,  0.0000000e+00], dtype=float32), 0)

    :return: Tuple of (samples, labels) where the samples are a np.array and the
    labels are an array of integers.
    """
    samples = []
    labels = []
    data_path = os.path.join(dbc.IEM_DB_PATH, "data")
    labels_path = os.path.join(dbc.IEM_DB_PATH, "labels")

    for num_sess in range(1, NUM_SESS + 1):
        sess_foldername = "S{}".format(num_sess)
        print("Processing session:", sess_foldername)

        sess_data_path = os.path.join(data_path, sess_foldername)
        sess_labels_path = os.path.join(labels_path, sess_foldername)

        for perform in os.listdir(sess_data_path):
            perform_data_path = os.path.join(sess_data_path, perform)
            perform_labels_path = os.path.join(
                sess_labels_path, "{filename}.txt".format(filename=perform))
            print("Processing performance:", perform)

            perform_label_map = get_label_map(perform_labels_path)

            for sample_filename in os.listdir(perform_data_path):
                # Read the label and if it's empty, then drop the sample
                sample_name = os.path.splitext(sample_filename)[0]
                label = perform_label_map[sample_name]
                if not label:
                    print("Not using sample:", sample_filename)
                    continue

                # Read the sample
                sample_path = os.path.join(perform_data_path, sample_filename)
                samples.append(load_wav(sample_path))

                labels.append(label)

    return np.array(samples), np.array(labels)