def training_data_processing(spec_file, annotation_file, mean, std, spec_file2=None, annotation_file2=None): spec = np.load(spec_file) spec, stretching_rate = pitch_time_deformation_spec(spec) spec = random_filter_spec(spec) spec = random_loudness_spec(spec) label = preprocessing.get_label(annotation_file, spec.shape[1], stretching_rate=stretching_rate) if not (spec_file2 is None): spec2 = np.load(spec_file2) spec2, stretching_rate2 = pitch_time_deformation_spec(spec2) spec2 = random_filter_spec(spec2) spec2 = random_loudness_spec(spec2) label2 = preprocessing.get_label(annotation_file2, spec2.shape[1], stretching_rate=stretching_rate2) spec, label = block_mixing_spec(spec, spec2, label, label2) mels = preprocessing.get_scaled_mel_bands(spec) mels = preprocessing.normalize(mels, mean, std) return mels, label
def validation_data_processing(spec_file, annotation_file, mean, std): spec = np.load(spec_file) mels = preprocessing.get_scaled_mel_bands(spec) mels = preprocessing.normalize(mels, mean, std) n_frame = mels.shape[1] label = preprocessing.get_label( annotation_file, n_frame, stretching_rate=1) return mels, label
def savespec_and_get_bands(file): audio = utils.load_audio(file) if len(audio) > 200: spec = preprocessing.get_spectrogram(audio) bands = preprocessing.get_scaled_mel_bands(spec) length = bands.shape[1] utils.save_matrix(spec, file.replace(".wav", '')) return length, bands else: print("empty file: " + file) return 0, []
def test_data_processing(spec): from smd.data import preprocessing mels = preprocessing.get_scaled_mel_bands(spec) mels = preprocessing.normalize(mels, mean, std) return mels.T