コード例 #1
0
    def infer(self,
              path,
              fft_window_size,
              phase_iterations=10,
              learn_phase=False,
              channels=1):
        console.log("Attempting to isolate vocals from", path)
        audio, sample_rate = conversion.load_audio_file(path)
        spectrogram = conversion.audio_file_to_spectrogram(
            audio,
            fft_window_size=fft_window_size,
            learn_phase=self.config.learn_phase)
        console.log("Retrieved spectrogram; processing...")

        info = self.process_spectrogram(spectrogram, channels)
        spectrogram, new_spectrogram = info

        console.log("reconverting to audio")

        # save original spectrogram as image
        path_parts = os.path.split(path)
        filename_parts = os.path.splitext(path_parts[1])

        conversion.save_spectrogram(
            spectrogram,
            os.path.join(path_parts[0], filename_parts[0]) + ".png")

        # save network output
        self.save_audio(new_spectrogram,
                        fft_window_size,
                        phase_iterations,
                        sample_rate,
                        path,
                        vocal=not self.config.instrumental,
                        learn_phase=learn_phase)

        # save difference
        self.save_audio(spectrogram - new_spectrogram,
                        fft_window_size,
                        phase_iterations,
                        sample_rate,
                        path,
                        vocal=self.config.instrumental,
                        learn_phase=learn_phase)

        console.log("Vocal isolation complete")
コード例 #2
0
    def isolate_vocals(self, path, fft_window_size, phase_iterations=10):
        console.log("Attempting to isolate vocals from", path)
        start_time = time.time()
        audio, sample_rate = conversion.load_audio(path)
        spectrogram, phase = conversion.audio_to_spectrogram(
            audio, fft_window_size=fft_window_size, sr=sample_rate)
        # spectrogram, phase = conversion.isolate_vocal_simple(audio, fft_window_size=fft_window_size, sr=sample_rate)
        console.log("Retrieved spectrogram; processing...")

        expanded_spectrogram = conversion.expand_to_grid(
            spectrogram, self.peak_downscale_factor)
        expanded_spectrogram_with_batch_channels = expanded_spectrogram[
            np.newaxis, :, :, np.newaxis]
        predicted_spectrogram_with_batch_channels = self.model.predict(
            expanded_spectrogram_with_batch_channels)
        predicted_spectrogram = predicted_spectrogram_with_batch_channels[
            0, :, :, 0]  # o /// o
        new_spectrogram = predicted_spectrogram[:spectrogram.shape[0], :
                                                spectrogram.shape[1]]
        console.log("Processed spectrogram; reconverting to audio")

        new_audio = conversion.spectrogram_to_audio(
            new_spectrogram,
            fft_window_size=fft_window_size,
            phase_iterations=phase_iterations)
        path_parts = os.path.split(path)
        filename_parts = os.path.splitext(path_parts[1])
        output_filename_base = os.path.join(path_parts[0],
                                            filename_parts[0] + "_acapella")
        console.log("Converted to audio; writing to", output_filename_base)

        conversion.save_audio(new_audio, output_filename_base + ".wav",
                              sample_rate)
        conversion.save_spectrogram(new_spectrogram,
                                    output_filename_base + ".png")
        conversion.save_spectrogram(
            spectrogram,
            os.path.join(path_parts[0], filename_parts[0]) + ".png")
        # console.log("Vocal isolation complete 👌")
        print('execution time: {}'.format(time.time() - start_time))
        return new_audio
コード例 #3
0
    def save_audio(self,
                   spectrogram,
                   fft_window_size,
                   phase_iterations,
                   sample_rate,
                   path,
                   vocal=True,
                   learn_phase=False):
        part = "_vocal" if vocal else "_instrumental"
        new_audio = conversion.spectrogram_to_audio_file(
            spectrogram,
            fft_window_size=fft_window_size,
            phase_iterations=phase_iterations,
            learn_phase=learn_phase)
        path_parts = os.path.split(path)
        filename_parts = os.path.splitext(path_parts[1])
        output_filename_base = os.path.join(path_parts[0],
                                            filename_parts[0] + part)
        console.log("Converted to audio; writing to",
                    output_filename_base + ".wav")

        conversion.save_audio_file(new_audio, output_filename_base + ".wav",
                                   sample_rate)
        conversion.save_spectrogram(spectrogram, output_filename_base + ".png")
コード例 #4
0
                        mashup = np.maximum(acapella, instrumental)
                        # chop into slices so everything's the same size in a batch
                        dim = SLICE_SIZE
                        mashup_slices = chop(mashup, dim)
                        acapella_slices = chop(acapella, dim)
                        count += 1
                        self.x.extend(mashup_slices)
                        self.y.extend(acapella_slices)
                console.info("Created", count, "mashups for key", k, "with",
                             len(self.x), "total slices so far")
            # Add a "channels" channel to please the network
            self.x = np.array(self.x)[:, :, :, np.newaxis]
            self.y = np.array(self.y)[:, :, :, np.newaxis]
            # Save to file if asked
            if as_h5:
                h5f = h5py.File(h5_path, "w")
                h5f.create_dataset("x", data=self.x)
                h5f.create_dataset("y", data=self.y)
                h5f.close()


if __name__ == "__main__":
    # Simple testing code to use while developing
    console.h1("Loading Data")
    d = Data(sys.argv[1], 1536)
    console.h1("Writing Sample Data")
    conversion.save_spectrogram(d.x[0], "x_sample_0.png")
    conversion.save_spectrogram(d.y[0], "y_sample_0.png")
    audio = conversion.spectrogram_to_audio(d.x[0], 1536)
    conversion.save_audio(audio, "x_sample.wav", 22050)
コード例 #5
0
        track_names = [name.encode("utf8") for name in self.track_names]
        names.create_dataset(name="track", data=track_names)
        for track in self.track_names:
            mashup.create_dataset(name=track.encode("utf8"),
                                  data=self.mashup[track])
            vocal.create_dataset(name=track.encode("utf8"),
                                 data=self.vocal[track])
            instrumental.create_dataset(name=track.encode("utf8"),
                                        data=self.instrumental[track])
        h5f.close()


if __name__ == "__main__":
    # Simple testing code to use while developing
    console.h1("Loading Data")
    d = Data()
    console.h1("Writing Sample Data")
    if not os.path.exists("sample"):
        os.mkdir("sample")
    n = 5
    if len(sys.argv) > 2:
        n = int(sys.argv[2])
    for i in range(n):
        conversion.save_spectrogram(d.x[i].squeeze(),
                                    "sample/x_sample_{}.png".format(i))
        conversion.save_spectrogram(d.y[i].squeeze(),
                                    "sample/y_sample_{}.png".format(i))
        audio = conversion.spectrogram_to_audio_file(d.x[i].squeeze(), 1536)
        conversion.save_audio_file(audio, "sample/x_sample_{}.wav".format(i),
                                   22050)