def read_to_melspecgram(): """ Reads the raw waveforms and converts them into log-mel spectrograms which are stored. This is an alternative to read_data() and load_data() to prevent using too much ram. Trades RAM for disk space. """ id_counter = 0 wav_folder = os.path.join(dbc.CRE_DB_PATH, "AudioWAV") labels_path = os.path.join(dbc.CRE_DB_PATH, "tabulatedVotes.csv") label_map = get_label_map(labels_path) for sample_filename in os.listdir(wav_folder): # Read the sample sample_path = os.path.join(wav_folder, sample_filename) wav = load_wav(sample_path) # Process the sample into a log-mel spectrogram melspecgram = process_wav(wav, noisy=True) # Display the spectrogram display_melspecgram(melspecgram) # Read the label sample_name = os.path.splitext(sample_filename)[0] label = label_map[sample_name] label = repr_label(label) # Save the log-mel spectrogram to use later mel_spec_filename = dbc.CRE_MEL_SPEC_FN.format(id=id_counter, emo_label=label) mel_spec_path = os.path.join(dbc.PROCESS_DB_PATH, mel_spec_filename) np.save(mel_spec_path, melspecgram) id_counter += 1
def read_data(): """ Reads the RAVDESS database into np.array. Sample output: (array([ 3.0517578e-05, 3.0517578e-05, 3.0517578e-05, ..., 0.0000000e+00, -3.0517578e-05, 0.0000000e+00], dtype=float32), 0) :return: Tuple of (samples, labels) where the samples are a np.array and the labels are an array of integers. """ samples = [] labels = [] for actor in range(1, NUM_ACTORS + 1): actor_foldername = "Actor_{:02d}".format(actor) actor_path = os.path.join(dbc.RAV_DB_PATH, actor_foldername) print("Processing actor:", actor_foldername) for sample_filename in os.listdir(actor_path): # Read the sample and remove the first and last second sample_path = os.path.join(actor_path, sample_filename) wav = load_wav(sample_path) samples.append(remove_first_last_sec(wav, RAV_SR)) # Read the label label = get_label( sample_filename, "-", RAV_EMO_INDEX, RAV_EMOTION_MAP) labels.append(label) return np.array(samples), np.array(labels)
def read_data(): """ Reads the CREMA-D database into np.array. Sample output: (array([-0.00228882, -0.00204468, -0.00180054, ..., 0. , 0. , 0. ], dtype=float32), 2) :return: Tuple of (samples, labels) where the samples are a np.array and the labels are an array of integers. """ samples = [] labels = [] wav_folder = os.path.join(dbc.CRE_DB_PATH, "AudioWAV") labels_path = os.path.join(dbc.CRE_DB_PATH, "tabulatedVotes.csv") label_map = get_label_map(labels_path) for sample_filename in os.listdir(wav_folder): print("Processing file:", sample_filename) # Read the sample sample_path = os.path.join(wav_folder, sample_filename) samples.append(load_wav(sample_path)) # Read the label sample_name = os.path.splitext(sample_filename)[0] label = label_map[sample_name] labels.append(label) return np.array(samples), np.array(labels)
def read_to_melspecgram(): """ Reads the raw waveforms and converts them into log-mel spectrograms which are stored. This is an alternative to read_data() and load_data() to prevent using too much ram. Trades RAM for disk space. """ id_counter = 0 for actor in range(1, NUM_ACTORS + 1): actor_foldername = "Actor_{:02d}".format(actor) actor_path = os.path.join(dbc.RAV_DB_PATH, actor_foldername) print("Processing actor:", actor_foldername) for sample_filename in os.listdir(actor_path): # Read the sample and remove the first and last second sample_path = os.path.join(actor_path, sample_filename) wav = remove_first_last_sec(load_wav(sample_path), RAV_SR) # Process the sample into a log-mel spectrogram melspecgram = process_wav(wav) # Display the spectrogram display_melspecgram(melspecgram) # Read the label label = get_label( sample_filename, "-", RAV_EMO_INDEX, RAV_EMOTION_MAP) label = repr_label(label) # Save the log-mel spectrogram to use later mel_spec_filename = dbc.RAV_MEL_SPEC_FN.format( id=id_counter, emo_label=label) mel_spec_path = os.path.join(dbc.PROCESS_DB_PATH, mel_spec_filename) np.save(mel_spec_path, melspecgram) id_counter += 1
def read_data(): """ Reads the TESS database into tensors. Sample output: (array([ 3.0517578e-05, 3.0517578e-05, 3.0517578e-05, ..., 0.0000000e+00, -3.0517578e-05, 0.0000000e+00], dtype=float32), 0) :return: Tuple of (samples, labels) where the samples are a tensor of varying shape due to varying audio lengths, and the labels are an array of integers. """ samples = [] labels = [] wav_folder = os.path.join(dbc.TES_DB_PATH, "data") for sample_filename in os.listdir(wav_folder): print("Processing file:", sample_filename) # Read the label and if it's empty, then drop the sample label = get_label(sample_filename, "_", TES_EMO_INDEX, TES_EMOTION_MAP) if not label: print("Not using sample:", sample_filename) continue # Read the sample sample_path = os.path.join(wav_folder, sample_filename) samples.append(load_wav(sample_path)) labels.append(label) return np.array(samples), np.array(labels)
def read_to_melspecgram(): """ Reads the raw waveforms and converts them into log-mel spectrograms which are stored. This is an alternative to read_data() and load_data() to prevent using too much ram. Trades RAM for disk space. """ id_counter = 0 wav_folder = os.path.join(dbc.TES_DB_PATH, "data") for sample_filename in os.listdir(wav_folder): print("Processing file:", sample_filename) sample_path = os.path.join(wav_folder, sample_filename) # Read the sample wav = load_wav(sample_path) # No outlier check because all samples are within 3 stds # Process the sample into a log-mel spectrogram melspecgram = process_wav(wav) # Display the spectrogram display_melspecgram(melspecgram) # Read the label sample_filename = os.path.splitext(sample_filename)[0] label = get_label(sample_filename, "_", TES_EMO_INDEX, TES_EMOTION_MAP) label = repr_label(label) # Save the log-mel spectrogram to use later mel_spec_filename = dbc.TES_MEL_SPEC_FN.format( id=id_counter, emo_label=label) mel_spec_path = os.path.join(dbc.PROCESS_DB_PATH, mel_spec_filename) np.save(mel_spec_path, melspecgram) id_counter += 1
def read_to_melspecgram(): """ Reads the raw waveforms and converts them into log-mel spectrograms which are stored. This is an alternative to read_data() and load_data() to prevent using too much ram. Trades RAM for disk space. """ id_counter = 0 data_path = os.path.join(dbc.IEM_DB_PATH, "data") labels_path = os.path.join(dbc.IEM_DB_PATH, "labels") for sess_num in range(1, NUM_SESS + 1): sess_foldername = "S{}".format(sess_num) print("Processing session:", sess_foldername) sess_data_path = os.path.join(data_path, sess_foldername) sess_labels_path = os.path.join(labels_path, sess_foldername) for perform in os.listdir(sess_data_path): perform_data_path = os.path.join(sess_data_path, perform) perform_labels_path = os.path.join( sess_labels_path, "{filename}.txt".format(filename=perform)) print("Processing performance:", perform) perform_label_map = get_label_map(perform_labels_path) for sample_filename in os.listdir(perform_data_path): # Read the sample sample_path = os.path.join(perform_data_path, sample_filename) wav = load_wav(sample_path) # Process the sample into a log-mel spectrogram melspecgram = process_wav(wav, noisy=True) # Display the spectrogram display_melspecgram(melspecgram) # Get the label sample_filename = os.path.splitext(sample_filename)[0] label = repr_label(perform_label_map[sample_filename]) # Save the log-mel spectrogram to use later mel_spec_filename = dbc.IEM_MEL_SPEC_FN.format(id=id_counter, emo_label=label) mel_spec_path = os.path.join(dbc.PROCESS_DB_PATH, mel_spec_filename) np.save(mel_spec_path, melspecgram) id_counter += 1
def read_data(): """ Reads the IEMOCAP database into np.array. Sample output: (array([ 3.0517578e-05, 3.0517578e-05, 3.0517578e-05, ..., 0.0000000e+00, -3.0517578e-05, 0.0000000e+00], dtype=float32), 0) :return: Tuple of (samples, labels) where the samples are a np.array and the labels are an array of integers. """ samples = [] labels = [] data_path = os.path.join(dbc.IEM_DB_PATH, "data") labels_path = os.path.join(dbc.IEM_DB_PATH, "labels") for num_sess in range(1, NUM_SESS + 1): sess_foldername = "S{}".format(num_sess) print("Processing session:", sess_foldername) sess_data_path = os.path.join(data_path, sess_foldername) sess_labels_path = os.path.join(labels_path, sess_foldername) for perform in os.listdir(sess_data_path): perform_data_path = os.path.join(sess_data_path, perform) perform_labels_path = os.path.join( sess_labels_path, "{filename}.txt".format(filename=perform)) print("Processing performance:", perform) perform_label_map = get_label_map(perform_labels_path) for sample_filename in os.listdir(perform_data_path): # Read the label and if it's empty, then drop the sample sample_name = os.path.splitext(sample_filename)[0] label = perform_label_map[sample_name] if not label: print("Not using sample:", sample_filename) continue # Read the sample sample_path = os.path.join(perform_data_path, sample_filename) samples.append(load_wav(sample_path)) labels.append(label) return np.array(samples), np.array(labels)