def preprocess(self, x): """ By default, no pre-processing is applied to a microphone input file """ file_obj = processing_utils.decode_base64_to_file(x) if self.preprocessing == "mfcc": return processing_utils.generate_mfcc_features_from_audio_file(file_obj.name) _, signal = scipy.io.wavfile.read(file_obj.name) return signal
def embed(self, x): """ Resamples each audio signal to be 1,000 frames and then returns the flattened vectors """ num_frames = 1000 if self.type == "file": filename = x.name mfcc = processing_utils.generate_mfcc_features_from_audio_file(filename, downsample_to=num_frames) return mfcc.flatten() elif self.type == "numpy": sample_rate, signal = x mfcc = processing_utils.generate_mfcc_features_from_audio_file(wav_filename=None, sample_rate=sample_rate, signal=signal, downsample_to=num_frames) return mfcc.flatten() elif self.type == "mfcc": mfcc = scipy.signal.resample(x, num_frames, axis=1) return mfcc.flatten() else: raise ValueError("Unknown type: " + str(self.type) + ". Please choose from: 'numpy', 'mfcc', 'file'.")