def load_and_rhythm_preprocess(audio_dir, max_samples=-1):
    print('...load and preprocess files from folder')

    audio_files = get_list_of_files(audio_dir)

    if max_samples > 0:
        audio_files = audio_files[:max_samples]

    num_files = len(audio_files)

    processed = []
    for ind, file_name in enumerate(audio_files):
        strt = time.time()

        if file_name.startswith('..'):
            path = file_name
        else:
            path = join(audio_dir, file_name)
        signal = Signal(path, start=0, stop=30)
        processed.append(rhythm_features_for_signal(signal))

        stp = time.time()
        print('finished treating file {}/{} in {:4.3f}s'.format(
            ind + 1, num_files, stp - strt))

    return processed
Esempio n. 2
0
def _audio_to_segments(filename, sample_rate, num_samples):
    """Loads, and splits an audio into N segments.
  
  Args:
    filename: A path to the audio.
    sample_rate: Sampling rate of the audios. If the sampling rate is different 
      with an audio's original sampling rate, then it re-samples the audio.
    num_samples: Number of samples one segment contains.
    
  Returns:
    A list of numpy arrays; segments.
  """
    # Load an audio file as a numpy array
    sig = Signal(filename,
                 sample_rate=sample_rate,
                 dtype=np.float32,
                 stop=29,
                 num_channels=1)

    # Split the signal into segments
    total_samples = sig.shape[0]
    n_segment = total_samples // num_samples
    segments = [
        sig[i * num_samples:(i + 1) * num_samples] for i in range(n_segment)
    ]

    return segments
Esempio n. 3
0
def get_onset(wav_path):
    y, sr = librosa.core.load(wav_path, sr=None)
    sos = signal.butter(25, 100, btype='highpass', fs=sr, output='sos')
    wav_data = signal.sosfilt(sos, y)
    wav_data = normalize(wav_data)

    sodf = SpectralOnsetProcessor(onset_method='complex_flux',
                                  fps=50,
                                  filterbank=LogarithmicFilterbank,
                                  fmin=100,
                                  num_bands=24,
                                  norm=True)
    from madmom.audio.signal import Signal
    onset_strength = (sodf(Signal(data=wav_data, sample_rate=sr)))
    onset_strength = librosa.util.normalize(onset_strength)
    h_length = int(librosa.time_to_samples(1. / 50, sr=sr))

    onset_times = librosa.onset.onset_detect(onset_envelope=onset_strength,
                                             sr=sr,
                                             hop_length=h_length,
                                             units='time',
                                             pre_max=5,
                                             post_max=5,
                                             pre_avg=5,
                                             post_avg=5)
    f = open(onset_path, 'w')
    for x in onset_times:
        f.write(f"{x}\n")
    return onset_times
def _audio_to_segments(filename, sample_rate, n_samples, center=False):
    """Loads, and splits an audio into N segments.
  
  Args:
    filename: A path to the audio.
    sample_rate: Sampling rate of the audios. If the sampling rate is different 
      with an audio's original sampling rate, then it re-samples the audio.
    n_samples: Number of samples one segment contains.
    
  Returns:
    A list of numpy arrays; segments.
  """
    # Load an audio file as a numpy array
    sig = Signal(filename, sample_rate=sample_rate, dtype=np.float32)

    total_samples = sig.shape[0]
    n_segment = total_samples // n_samples

    if center:
        # Take center samples
        remainder = total_samples % n_samples
        sig = sig[remainder // 2:-remainder // 2]

    # Split the signal into segments
    segments = [
        sig[i * n_samples:(i + 1) * n_samples] for i in range(n_segment)
    ]

    return segments
Esempio n. 5
0
def preprocess_x(filename):
    sig = Signal(filename, sample_rate = 44100, num_channels = 1)
    D = [preprocess_sig(sig, fs) for fs in [2048, 1024, 4096]]
    D = np.dstack(D)
    # Pad left and right with 7 frames
    s = np.repeat(D[:1], 7, axis = 0)
    e = np.repeat(D[-1:], 7, axis = 0)
    D = np.concatenate((s, D, e))
    return D
Esempio n. 6
0
 def setUp(self):
     self.sbs_50 = SemitoneBandpassSpectrogram(sample_file, fps=50)
     self.sbs_10 = SemitoneBandpassSpectrogram(sample_file, fps=10)
     self.sbs_22050 = SemitoneBandpassSpectrogram(sample_file_22050,
                                                  fps=50,
                                                  fmin=2637,
                                                  fmax=4200)
     data = Signal(sample_file)
     self.sbs_10_from_signal = SemitoneBandpassSpectrogram(data, fps=10)
Esempio n. 7
0
 def setUp(self):
     self.clp_50 = CLPChroma(sample_file, fps=50)
     self.clp_10 = CLPChroma(sample_file, fps=10)
     self.clp_22050 = CLPChroma(sample_file_22050,
                                fps=50,
                                fmin=2637,
                                fmax=4200)
     data = Signal(sample_file)
     self.clp_10_from_signal = CLPChroma(data, fps=10)
Esempio n. 8
0
def load_and_rhythm_preprocess(audio_dir, num_samples=-1):
    audio_files = [f for f in listdir(audio_dir) if isfile(join(audio_dir, f))]

    if num_samples > 0:
        audio_files = audio_files[:num_samples]

    processed = []
    for file_name in audio_files:
        path = join(audio_dir, file_name)
        signal = Signal(path)
        processed.append(rhythm_features_for_signal(signal))
    return processed
Esempio n. 9
0
 def test_types(self):
     result = segment_axis(np.arange(10), 4, 2)
     self.assertIsInstance(result, np.ndarray)
     self.assertTrue(result.dtype == np.int)
     result = segment_axis(np.arange(10, dtype=np.float), 4, 2)
     self.assertIsInstance(result, np.ndarray)
     self.assertTrue(result.dtype == np.float)
     # test with a Signal
     from madmom.audio.signal import Signal
     signal = Signal(pj(AUDIO_PATH, 'sample.wav'))
     result = segment_axis(signal, 4, 2)
     self.assertIsInstance(result, np.ndarray)
     self.assertTrue(result.dtype == np.int16)
def get_spectrogram(path,
                    sample_rate=None,
                    fps=None,
                    window=np.hanning,
                    fft_sizes=[1024],
                    filtered=True,
                    filterbank=LogarithmicFilterbank,
                    num_bands=12,
                    fmin=30,
                    fmax=17000):
    ''' 
        path: single file path
        filtered: generate FilteredSpectrogram or normal one
        
        return numpy array shaped (Frequencies, Timeframes, Channels)
        (log-spaced (Filtered)Spectrogram from madmom)
    '''
    spectros = []
    max_fft_size = np.max(fft_sizes)
    # sample_rate=None takes original sample_rate
    # only take 30s snippets to align data
    signal = Signal(path, sample_rate=sample_rate, start=0, stop=30)
    frames = FramedSignal(signal, fps=fps)
    channel_num = 0
    for fft_size in fft_sizes:
        stft = ShortTimeFourierTransform(frames,
                                         window=window,
                                         fft_size=fft_size,
                                         circular_shift=True)
        spectro = LogarithmicSpectrogram(stft)
        if filtered:
            filtered_spectro = FilteredSpectrogram(spectro,
                                                   filterbank=filterbank,
                                                   num_bands=num_bands,
                                                   fmin=fmin,
                                                   fmax=fmax)
            spectros.append(filtered_spectro)
        else:
            spectros.append(spectro)

    # bring all spectros to the same shape, concat them and return them
    num_frequencies = max([spectro.shape[1] for spectro in spectros])
    num_channels = len(spectros)
    num_timestamps = spectros[0].shape[0]

    final_spectro = np.zeros([num_frequencies, num_timestamps, num_channels])
    for channel, spectro in enumerate(spectros):
        final_spectro[:spectro.shape[1], :, channel] = spectro.T
    return final_spectro
Esempio n. 11
0
 def test_fft_window(self):
     # use a signal
     from madmom.audio.signal import Signal
     signal = Signal(sample_file)
     # scale the signal to float and range -1..1
     scaling = float(np.iinfo(signal.dtype).max)
     scaled_signal = signal / scaling
     # calculate the STFTs of both signals
     result = ShortTimeFourierTransform(signal)
     scaled_result = ShortTimeFourierTransform(scaled_signal)
     # both STFTs must be the same
     self.assertTrue(np.allclose(result, scaled_result))
     # if now window is given, a uniformly distributed one should be used
     result = ShortTimeFourierTransform(signal, window=None)
     self.assertTrue(np.allclose(result.fft_window,
                                 np.ones(2048, dtype=float) / scaling))
     scaled_result = ShortTimeFourierTransform(scaled_signal, window=None)
     self.assertTrue(scaled_result.fft_window is None)
Esempio n. 12
0
def get_spectrogram(path,
                    filtered=True,
                    window=np.hanning,
                    fft_size=1024,
                    sample_rate=None):
    ''' 
        path: single file path
        filtered: generate FilteredSpectrogram or normal one
        
        return numpy array shaped (Frequencies, Timeframes, Channels)
        (log-spaced (Filtered)Spectrogram from madmom)
    '''
    # sample_rate=None takes original sample_rate
    signal = Signal(path, sample_rate=sample_rate)
    frames = FramedSignal(signal)
    stft = ShortTimeFourierTransform(frames, window=window, fft_size=fft_size)
    spectro = LogarithmicSpectrogram(stft)
    if filtered:
        return FilteredSpectrogram(spectro)
    else:
        return spectro
Esempio n. 13
0
def pre_process_cwt(onsets_images_dir, non_onsets_images_dir, audio_files, ann_files):
    # onsets_images_dir = join('dataset_transformed', 'train')# , 'onsets')
    # non_onsets_images_dir = join('dataset_transformed', 'train')# , 'non-onsets')
    onsets_images_dir = 'dataset_transformed'
    non_onsets_images_dir = 'dataset_transformed'

    dataset_dir = 'dataset'
    audio_files = list_audio_files(dataset_dir)
    ann_files = list_annotation_files(dataset_dir)
    frame_size = 1024
    sample_rate = 44100
    t = frame_size / sample_rate
    # t = 0.09287981859410431 # seconds for frame_size = 4096

    time = np.arange(frame_size, dtype=np.float16)
    scales = np.arange(1,81) # scaleogram with 80 rows

    print(f'There are {str(len(audio_files))} audio files and {str(len(ann_files))} annotation files')

    i = 0
    for audio_file in audio_files:
        file_name = basename(audio_file)
        print(f'Pre-processing file {str(i+1)}/{str(len(audio_files))}: {file_name}')

        # Read audio file
        sig = Signal(audio_file, sample_rate, num_channels = 1)

        # Split audio signal into frames of same size
        frames = FramedSignal(sig, frame_size, hop_size = frame_size)
        print(f'There are {str(len(frames))} frames')

        # Read onset annotations for current audio file
        onset_file = ann_files[i]
        onsets = np.loadtxt(onset_file)
        print(f'Onsets read from {onset_file}')
        number_of_onsets = len(onsets)
        print(f'There are {str(number_of_onsets)} onsets')

        # Check if we already generated the correct amount of frames for that file before
        matching_files = glob.glob('dataset_transformed/' + '*'+ file_name + '*')
        if len(matching_files) > 0:
            if len(frames) == len(matching_files):
                print(f'Skipping file {str(i)}/{str(len(audio_files))}: {file_name}')
                i += 1
                continue

        start = 0
        end = t
        f = 0
        onsets_found_this_file = 0
        for frame in frames:
            # Plot frame
            # plt.plot(frame)
            # plt.show()

            # Check if contains onset
            start = f * t
            end = start + t
            f += 1
            hasOnset = False
            for onset in onsets:
                if start <= onset and end >= onset:
                    hasOnset = True
                    onsets_found_this_file += 1

            if hasOnset:
                print(f'There is an onset within the range: {str(start)} to {str(end)} ms')
            else:
                print(f'There are no onsets within the range: {str(start)} to {str(end)} ms')

            # Apply CWT
            cwt = scg.CWT(time, frame, scales, wavelet='cmor1.5-1.0')
            # print(cwt.coefs.shape)

            # Get scaleogram
            ax = scg.cws(cwt, yaxis = 'frequency', wavelet = 'cmor1.5-1.0', cbar = None, coi = False)

            # ['cgau1 :\tComplex Gaussian wavelets', 'cgau2 :\tComplex Gaussian wavelets', 
            # 'cgau3 :\tComplex Gaussian wavelets', 'cgau4 :\tComplex Gaussian wavelets', 
            # 'cgau5 :\tComplex Gaussian wavelets', 'cgau6 :\tComplex Gaussian wavelets', 
            # 'cgau7 :\tComplex Gaussian wavelets', 'cgau8 :\tComplex Gaussian wavelets', 
            # 'cmor1.5-1.0 :\tComplex Morlet wavelets', 'fbsp1-1.5-1.0 :\tFrequency B-Spline wavelets',
            #  'gaus1 :\tGaussian', 'gaus2 :\tGaussian', 'gaus3 :\tGaussian', 'gaus4 :\tGaussian', 
            #  'gaus5 :\tGaussian', 'gaus6 :\tGaussian', 'gaus7 :\tGaussian', 'gaus8 :\tGaussian', 
            #  'mexh :\tMexican hat wavelet', 'morl :\tMorlet wavelet', 'shan1.5-1.0 :\tShannon wavelets']

            # Remove axis from image
            plt.subplots_adjust(bottom = 0, top = 1, left = 0, right = 1)
            # plt.show()

            # Get image from matplot and process it
            fig = plt.gcf()
            plot_img_np = get_img_from_fig(fig)
            image = Image.fromarray(plot_img_np).convert('RGB').resize((15,80)) # TODO try PIL.Image.LANCZOS

            # Save image
            label = '1' if hasOnset == True else '0'
            image.save(join(onsets_images_dir, f'{label}-{file_name}-F{str(f)}.png'))

            plt.close()

        if number_of_onsets != onsets_found_this_file:
            print(f'It was supposed to have {str(number_of_onsets)} onsets. Found {str(onsets_found_this_file)} instead. Exiting...')
            exit()

        i += 1
Esempio n. 14
0
def get_cwt_dataset(split_file):
    audio_files = list_audio_files('dataset')
    ann_files = list_annotation_files('dataset')

    split = np.loadtxt(split_file, dtype = str)

    frame_size = 1024
    sample_rate = 44100
    t = 0.01
    time = np.arange(frame_size, dtype=np.float16)
    scales = np.arange(1,81) # scaleogram with 80 rows

    i = 0
    train_features, train_labels = [], [] # spectograms
    validation_features, validation_labels = [], [] # spectograms
    for audio_file in audio_files:
        file_name = basename(audio_file)
        print(f'Pre-processing file {str(i+1)}/{str(len(audio_files))}: {file_name}')

        # Read audio file
        sig = Signal(audio_file, sample_rate, num_channels = 1)

        frames = FramedSignal(sig, frame_size, hop_size = frame_size/2)

        # Read onset annotations for current audio file
        onset_file = ann_files[i]
        onsets = np.loadtxt(onset_file)
        print(f'Onsets read from {onset_file}')
        number_of_onsets = len(onsets)
        print(f'There are {str(number_of_onsets)} onsets')

        start = 0
        end = t
        f = 0

        for frame in frames:
            cwt = scg.CWT(time, frame, scales, wavelet='cmor1.5-1.0')
            # ax = scg.cws(cwt, yaxis = 'frequency', wavelet = 'cmor1.5-1.0', cbar = None, coi = False)
            # plt.subplots_adjust(bottom = 0, top = 1, left = 0, right = 1)
            # fig = plt.gcf()
            # plot_img_np = get_img_from_fig(fig)
            rgb_frame = Image.fromarray(cwt.coefs.astype(np.uint8)).convert('RGB').resize((15,80), Image.LANCZOS)
            rgb_frame = np.asarray(rgb_frame)
            plt.close()

            # Check if contains onset
            start = f * t
            end = start + t
            f += 1
            label = 0
            for onset in onsets:
                if start <= onset and end >= onset:
                    label = 1

            if audio_file in split:
                validation_features.append(rgb_frame)
                validation_labels.append(label)
            else:
                train_features.append(rgb_frame)
                train_labels.append(label)

        i += 1
        if i == 10: break

    # Post process
    train_features = np.array(train_features)
    validation_features = np.array(validation_features)
    train_features = train_features.astype('float32') / 255.
    validation_features = validation_features.astype('float32') / 255.

    train_labels = np.array(train_labels, dtype=int)
    validation_labels = np.array(validation_labels, dtype=int)

    return train_features, train_labels, validation_features, validation_labels
Esempio n. 15
0
def get_ffts_dataset(split_file):
    audio_files = list_audio_files('dataset')
    ann_files = list_annotation_files('dataset')

    split = np.loadtxt(split_file, dtype = str)

    frame_sizes = [2048, 1024, 4096]
    sample_rate = 44100
    t = 0.01

    i = 0
    train_features, train_labels = [], [] # spectograms
    validation_features, validation_labels = [], [] # spectograms
    for audio_file in audio_files:
        file_name = basename(audio_file)
        print(f'Pre-processing file {str(i+1)}/{str(len(audio_files))}: {file_name}')

        # Read audio file
        sig = Signal(audio_file, sample_rate, num_channels = 1)

        all_spectograms = []
        for frame_size in frame_sizes:
            frames = FramedSignal(sig, frame_size, fps = 100, hop_size = 441)
            stft = ShortTimeFourierTransform(frames)
            filt = FilteredSpectrogram(stft, filterbank = MelFilterbank, num_bands = 80, fmin = 27.5, fmax = 16000, norm_filters = True, unique_filters = False)
            log_filt = LogarithmicSpectrogram(filt, log = np.log, add = np.spacing(1))
            all_spectograms.append(log_filt.T.astype(np.uint8))

        # Stack all in different axis
        final_spectogram = np.dstack(all_spectograms)

        # Read onset annotations for current audio file
        onset_file = ann_files[i]
        onsets = np.loadtxt(onset_file)
        print(f'Onsets read from {onset_file}')
        number_of_onsets = len(onsets)
        print(f'There are {str(number_of_onsets)} onsets')

        start = 0
        end = t + 0.14
        f = 0
        for a in range(7, final_spectogram.shape[1]-7):
            final_frame = final_spectogram[:,a-7:a+8] # +8, but numpy does not include the 8th element

            # Check if contains onset
            start = f * t
            end = start + (t * 15)
            f += 1
            label = 0

            onset_frame_start = start + (t * 5)
            onset_frame_end = onset_frame_start + (t * 5)

            # if f == 20:
            # exit()

            for onset in onsets:
                # if start <= onset and end >= onset:
                if onset_frame_start <= onset and onset_frame_end >= onset:
                    label = 1

            # if label == 1:
            # print(f'There is an onset within the range: {str(onset_frame_start)} to {str(onset_frame_end)} ms')
            # else:
            # print(f'There are no onsets within the range: {str(onset_frame_start)} to {str(onset_frame_end)} ms')

            if audio_file in split:
                validation_features.append(final_frame)
                validation_labels.append(label)
            else:
                train_features.append(final_frame)
                train_labels.append(label)

        i += 1

    # Post process
    train_features = np.array(train_features)
    validation_features = np.array(validation_features)
    train_features = train_features.astype('float32') / 255.
    validation_features = validation_features.astype('float32') / 255.

    train_labels = np.array(train_labels, dtype=int)
    validation_labels = np.array(validation_labels, dtype=int)

    return train_features, train_labels, validation_features, validation_labels
Esempio n. 16
0
def get_cqt_dataset(split_file):
    audio_files = list_audio_files('dataset')
    ann_files = list_annotation_files('dataset')

    split = np.loadtxt(split_file, dtype = str)

    sample_rate = 44100
    # octave_resolution = 8.7
    octave_resolution = 80
    minimum_frequency = 27.5
    maximum_frequency = 16000
    time_resolution = 100
    cqt_kernel = zaf.cqtkernel(44100, octave_resolution, minimum_frequency, maximum_frequency)
    t = 0.01

    i = 0
    train_features, train_labels = [], [] # spectograms
    validation_features, validation_labels = [], [] # spectograms
    for audio_file in audio_files:
        file_name = basename(audio_file)
        print(f'Pre-processing file {str(i+1)}/{str(len(audio_files))}: {file_name}')

        # Read audio file
        sig = Signal(audio_file, sample_rate, num_channels = 1)

        cqt_spectrogram = zaf.cqtspectrogram(sig, 44100, time_resolution, cqt_kernel)
        # cqt_spectrogram = zaf.cqtchromagram(sig, 44100, time_resolution, octave_resolution, cqt_kernel)

        # Read onset annotations for current audio file
        onset_file = ann_files[i]
        onsets = np.loadtxt(onset_file)
        print(f'Onsets read from {onset_file}')
        number_of_onsets = len(onsets)
        print(f'There are {str(number_of_onsets)} onsets')

        start = 0
        end = t + 0.14
        f = 0
        onsets_found_this_file = 0

        for a in range(cqt_spectrogram.shape[1]-15):
            frame = cqt_spectrogram[:,a:a+15]
            rgb_frame = Image.fromarray(frame).convert('RGB')
            rgb_frame = np.asarray(rgb_frame)

            # Check if contains onset
            start = f * t
            end = start + t + 0.14
            f += 1
            label = 0
            for onset in onsets:
                if start <= onset and end >= onset:
                    label = 1

            if audio_file in split:
                validation_features.append(rgb_frame)
                validation_labels.append(label)
            else:
                train_features.append(rgb_frame)
                train_labels.append(label)

        i += 1

    # Post process
    train_features = np.array(train_features)
    validation_features = np.array(validation_features)
    train_features = train_features.astype('float32') / 255.
    validation_features = validation_features.astype('float32') / 255.

    train_labels = np.array(train_labels, dtype=int)
    validation_labels = np.array(validation_labels, dtype=int)

    return train_features, train_labels, validation_features, validation_labels
Esempio n. 17
0
def getPCPHistogram(filename, fs=8192, show=False):

    res = {}

    sig = Signal(filename, num_channels=1)
    fsig = FramedSignal(sig, frame_size=fs)
    stft = ShortTimeFourierTransform(fsig)
    spec = Spectrogram(stft)
    chroma = PitchClassProfile(spec, num_classes=12)

    hist = [0 for i in range(12)]
    hist_f = [0 for i in range(12)]
    for f in range(len(chroma)):
        wf = chroma[f]
        hist = map(sum, zip(hist, wf))
        f = flatness(wf)
        hist_f = map(sum, zip(hist_f, [w * f for w in wf]))

    s = sum(hist)
    hist = map(lambda x: x / s, hist)
    C_hist = [hist[i - 9] for i in range(12)]
    res['standard'] = C_hist

    s_f = sum(hist_f)
    hist_f = map(lambda x: x / s_f, hist_f)
    C_hist_f = [hist_f[i - 9] for i in range(12)]
    res['standard_f'] = C_hist_f

    hpss = HarmonicPercussiveSourceSeparation()
    h, _ = hpss.process(spec)
    chroma = PitchClassProfile(h, num_classes=12)

    hist = [0 for i in range(12)]
    hist_f = [0 for i in range(12)]
    for f in range(len(chroma)):
        wf = chroma[f]
        hist = map(sum, zip(hist, wf))
        f = flatness(wf)
        hist_f = map(sum, zip(hist_f, [w * f for w in wf]))

    s = sum(hist)
    hist = map(lambda x: x / s, hist)
    C_hist = [hist[i - 9] for i in range(12)]
    res['hpss'] = C_hist

    s_f = sum(hist_f)
    hist_f = map(lambda x: x / s_f, hist_f)
    C_hist_f = [hist_f[i - 9] for i in range(12)]
    res['hpss_f'] = C_hist_f

    dcp = DeepChromaProcessor()
    deepchroma = dcp(filename)

    hist = [0 for i in range(12)]
    hist_f = [0 for i in range(12)]
    for f in range(len(deepchroma)):
        wf = deepchroma[f]
        hist = map(sum, zip(hist, wf))
        f = flatness(wf)
        hist_f = map(sum, zip(hist_f, [w * f for w in wf]))

    s = sum(hist)
    hist = map(lambda x: x / s, hist)
    res['deep'] = hist

    s_f = sum(hist_f)
    hist_f = map(lambda x: x / s_f, hist_f)
    res['deep_f'] = hist_f

    if show:
        plt.subplot(131)
        plt.barh(range(12), res['standard'])
        plt.subplot(132)
        plt.barh(range(12), res['hpss'])
        plt.subplot(133)
        plt.barh(range(12), res['deep'])
        plt.show()
    return res
Esempio n. 18
0
musicFile = 'YourInputFile.wav'  # Input Music File
outFile = 'YourOutputFile.wav'  # Output Music File
playFile = False  # Play output File immediately

chordOn = True  # Add the chord sound to output
cmRatio = 0.3  # Chord and music file sound ratio

##### Chord Detection #####

dcp = DeepChromaProcessor()
decode = DeepChromaChordRecognitionProcessor()

chroma = dcp(musicFile)
output = decode(chroma)

sig = Signal(musicFile, num_channels=1)

synthComplex(chordResult=output,
             fname=outFile,
             music=musicFile,
             frate=sig.sample_rate,
             cmRatio=cmRatio,
             chordOn=chordOn)

##### Print Results #####

print('\n######## Results ######\n')
for i in range(len(output)):
    print('start time', output[i][0], 'end time', output[i][1], 'result',
          output[i][2])
Esempio n. 19
0
def pre_process_fft(onsets_images_dir, non_onsets_images_dir, audio_files, ann_files):
    frame_sizes = [2048, 1024, 4096]
    sample_rate = 44100
    t = 0.01

    i = 0
    for audio_file in audio_files:
        file_name = basename(audio_file)
        print(f'Pre-processing file {str(i+1)}/{str(len(audio_files))}: {file_name}')

        # Read audio file
        sig = Signal(audio_file, sample_rate, num_channels = 1)

        all_spectograms = []
        for frame_size in frame_sizes:
            frames = FramedSignal(sig, frame_size, fps = 100, hop_size = 441)
            stft = ShortTimeFourierTransform(frames)
            filt = FilteredSpectrogram(stft, filterbank = MelFilterbank, num_bands = 80, fmin = 27.5, fmax = 16000, norm_filters = True, unique_filters = False)
            log_filt = LogarithmicSpectrogram(filt, log = np.log, add = np.spacing(1))
            all_spectograms.append(log_filt.T.astype(np.uint8))

        # Stack all in different axis
        final_spectogram = np.dstack(all_spectograms)
        # image = Image.fromarray((final_spectogram))
        # image.save(join(onsets_images_dir, f'zzzz.png'))

        # Read onset annotations for current audio file
        onset_file = ann_files[i]
        onsets = np.loadtxt(onset_file)
        print(f'Onsets read from {onset_file}')
        number_of_onsets = len(onsets)
        print(f'There are {str(number_of_onsets)} onsets')

        # Split audio signal into frames of same size
        frames = FramedSignal(sig, frame_size, fps = 100, hop_size = 441)
        print(f'There are {str(len(frames))} frames')

        # Check if we already generated the correct amount of frames for that file before
        matching_files = glob.glob('dataset_transformed/' + '*'+ file_name + '*')
        if len(matching_files) > 0:
            if len(frames) == len(matching_files):
                print(f'Skipping file {str(i)}/{str(len(audio_files))}: {file_name}')
                i += 1
                continue

        start = 0
        end = t + 0.14
        f = 0
        onsets_found_this_file = 0

        for a in range(final_spectogram.shape[1]-15):
            final_frame = final_spectogram[:,a:a+15]

            # Check if contains onset
            start = f * t
            end = start + t + 0.14
            f += 1
            hasOnset = False
            for onset in onsets:
                if start <= onset and end >= onset:
                    hasOnset = True
                    onsets_found_this_file += 1

            # if hasOnset:
            #     print(f'There is an onset within the range: {str(start)} to {str(end)} ms')
            # else:
            #     print(f'There are no onsets within the range: {str(start)} to {str(end)} ms')

            image = Image.fromarray(final_frame)

            # Save image
            if hasOnset:
                image.save(join(onsets_images_dir, f'1-{file_name}-F{str(f)}.png'))
            else:
                image.save(join(non_onsets_images_dir, f'0-{file_name}-F{str(f)}.png'))

        i += 1
Esempio n. 20
0
def preprocess_x(filename):
    sig = Signal(filename, sample_rate=44100, num_channels=1)
    frame_sizes = [1024, 2048, 4096]
    D = [preprocess_sig(sig, fs) for fs in frame_sizes]
    return np.hstack(D)