def load(self, saveDataAsH5=True): h5Path = os.path.join(self.inPath, "data.h5") print(h5Path) # h5Path = '/Users/Yana/Documents/myself_doc/008_Python_Project/acapellabot/train_data/data.h5' if os.path.isfile(h5Path): h5f = h5py.File(h5Path, "r") self.x = h5f["x"][:] self.y = h5f["y"][:] else: # Hash bins for each camelot key so we can merge # in the future, this should be a generator w/ yields in order to eat less memory count = 0 for dirPath, dirNames, fileNames in os.walk(os.path.join(self.inPath, 'noisy')): for fileName in filter(lambda f: (f.endswith(".mp3") or f.endswith(".wav")) and not f.startswith("."), fileNames): audio, sampleRate = conversion.loadAudioFile(os.path.join(self.inPath, 'noisy', fileName)) noisy_spectrogram, _ = conversion.audioFileToSpectrogram(audio, self.fftWindowSize) audio, sampleRate = conversion.loadAudioFile(os.path.join(self.inPath, 'clean', fileName)) clean_spectrogram, _ = conversion.audioFileToSpectrogram(audio, self.fftWindowSize) if noisy_spectrogram.shape[1] < clean_spectrogram.shape[1]: newInstrumental = np.zeros(clean_spectrogram.shape) newInstrumental[:noisy_spectrogram.shape[0], :noisy_spectrogram.shape[1]] = noisy_spectrogram noisy_spectrogram = newInstrumental elif clean_spectrogram.shape[1] < noisy_spectrogram.shape[1]: newAcapella = np.zeros(noisy_spectrogram.shape) newAcapella[:clean_spectrogram.shape[0], :clean_spectrogram.shape[1]] = clean_spectrogram clean_spectrogram = newAcapella # simulate a limiter/low mixing (loses info, but that's the point) # I've tested this against making the same mashups in Logic and it's pretty close # mashup = np.maximum(clean_spectrogram, noisy_spectrogram) # AttributeError: 'list' object has no attribute 'shape'chop into slices so everything's the same size in a batch dim = SLICE_SIZE mashupSlices = chop(noisy_spectrogram, dim) acapellaSlices = chop(clean_spectrogram, dim) count += 1 if acapellaSlices.__len__()>0: self.x.extend(mashupSlices) self.y.extend(acapellaSlices) console.info(count, "Created spectrogram for", fileName, "with length", acapellaSlices.__len__()) # Add a "channels" channel to please the network self.x = np.array(self.x)[:, :, :, np.newaxis] self.y = np.array(self.y)[:, :, :, np.newaxis] console.info('Train data shape: x: ', self.x.shape, ' y: ', self.y.shape) # Save to file if asked if saveDataAsH5: h5f = h5py.File(h5Path, "w") h5f.create_dataset("x", data=self.x) h5f.create_dataset("y", data=self.y) h5f.close()
def isolateVocals(self, path, fftWindowSize, phaseIterations=10): console.log("Attempting to isolate vocals from", path) audio, sampleRate = conversion.loadAudioFile(path) spectrogram, phase = conversion.audioFileToSpectrogram(audio, fftWindowSize=fftWindowSize) console.log("Retrieved spectrogram; processing...") expandedSpectrogram = conversion.expandToGrid(spectrogram, self.peakDownscaleFactor) expandedSpectrogramWithBatchAndChannels = expandedSpectrogram[np.newaxis, :, :, np.newaxis] print(expandedSpectrogramWithBatchAndChannels.shape) # 预测 predictedSpectrogramWithBatchAndChannels = self.model.predict(expandedSpectrogramWithBatchAndChannels) predictedSpectrogram = predictedSpectrogramWithBatchAndChannels[0, :, :, 0] # o /// o newSpectrogram = predictedSpectrogram[:spectrogram.shape[0], :spectrogram.shape[1]] console.log("Processed spectrogram; reconverting to audio") newAudio = conversion.spectrogramToAudioFile(newSpectrogram, sampleRate, fftWindowSize=fftWindowSize, phaseIterations=phaseIterations) pathParts = os.path.split(path) fileNameParts = os.path.splitext(pathParts[1]) outputFileNameBase = os.path.join(pathParts[0], fileNameParts[0] + "_unet") console.log("Converted to audio; writing to", outputFileNameBase) conversion.saveAudioFile(newAudio, outputFileNameBase + ".wav", sampleRate) conversion.saveSpectrogram(newSpectrogram, outputFileNameBase + ".png") conversion.saveSpectrogram(spectrogram, os.path.join(pathParts[0], fileNameParts[0]) + ".png") console.log("Vocal isolation complete")