Example #1
0
 def test_db_float_conversions(self):
     self.assertEqual(db_to_float(10), 10)
     self.assertEqual(db_to_float(0), 1)
     self.assertEqual(ratio_to_db(1), 0)
     self.assertEqual(ratio_to_db(10), 10)
     self.assertEqual(3, db_to_float(ratio_to_db(3)))
     self.assertEqual(12, ratio_to_db(db_to_float(12)))
Example #2
0
 def test_db_float_conversions(self):
     self.assertEqual(db_to_float(10), 10)
     self.assertEqual(db_to_float(0), 1)
     self.assertEqual(ratio_to_db(1), 0)
     self.assertEqual(ratio_to_db(10), 10)
     self.assertEqual(3, db_to_float(ratio_to_db(3)))
     self.assertEqual(12, ratio_to_db(db_to_float(12)))
def expand_commercial_silence(audiofile,
                              commercial_list_sample,
                              db_cutoff=20,
                              step=50,
                              distance=5000):
    silence_threshold = db_to_float(db_cutoff)

    commercial_list = []
    noncommercial_list = []
    for s, e in commercial_list_sample:
        start = search_for_silence(sample_to_msec(s),
                                   audiofile,
                                   distance=-distance,
                                   step=-step,
                                   threshold=silence_threshold)
        end = search_for_silence(sample_to_msec(e),
                                 audiofile,
                                 step=step,
                                 distance=distance,
                                 threshold=silence_threshold)
        # figure out how close to a multiple of 30 this is
        pct = (end - start) / 1000.0 / max(
            [round((end - start) / 1000.0 / 30.0) * 30, 30])
        if abs(1.0 - pct) < 0.03:
            print(
                f'{pct} {(end-start)/1000} {start} {sample_to_msec(s)-start} {end} {end-sample_to_msec(e)}'
            )
            commercial_list.append((start, end))
        else:
            print(
                f'noncommercial {pct} {(end-start)/1000} {start} {sample_to_msec(s)-start} {end} {end-sample_to_msec(e)}'
            )
            noncommercial_list.append((start, end))
    return commercial_list, noncommercial_list
Example #4
0
def finding_silent_second(audio_segment,
                          min_silence_len=200,
                          silence_thresh=-16,
                          seek_step=1):
    seg_len = len(audio_segment)

    # you can't have a silent portion of a sound that is longer than the sound
    if seg_len < min_silence_len:
        return []

    # convert silence threshold to a float value (so we can compare it to rms)
    silence_thresh = db_to_float(
        silence_thresh) * audio_segment.max_possible_amplitude

    # find silence and add start and end indicies to the to_cut list
    silence_starts = []

    # check successive (1 sec by default) chunk of sound for silence
    # try a chunk at every "seek step" (or every chunk for a seek step == 1)
    last_slice_start = seg_len - min_silence_len
    slice_starts = range(0, last_slice_start + 1, seek_step)

    # guarantee last_slice_start is included in the range
    # to make sure the last portion of the audio is searched
    if last_slice_start % seek_step:
        slice_starts = itertools.chain(slice_starts, [last_slice_start])

    for i in slice_starts:
        audio_slice = audio_segment[i:i + min_silence_len]
        if audio_slice.rms <= silence_thresh:
            return (i)
Example #5
0
def detect_silence(audio_segment,
                   min_silence_len=1000,
                   silence_thresh=-16,
                   seek_step=1,
                   disable_tqdm=True):
    seg_len = len(audio_segment)

    # you can't have a silent portion of a sound that is longer than the sound
    if seg_len < min_silence_len:
        return []

    # convert silence threshold to a float value (so we can compare it to rms)
    silence_thresh = db_to_float(
        silence_thresh) * audio_segment.max_possible_amplitude

    # find silence and add start and end indicies to the to_cut list
    silence_starts = []

    # check successive (1 sec by default) chunk of sound for silence
    # try a chunk at every "seek step" (or every chunk for a seek step == 1)
    last_slice_start = seg_len - min_silence_len
    slice_starts = range(0, last_slice_start + 1, seek_step)

    # guarantee last_slice_start is included in the range
    # to make sure the last portion of the audio is searched
    if last_slice_start % seek_step:
        slice_starts = itertools.chain(slice_starts, [last_slice_start])

    for i in tqdm(slice_starts, desc='slicing silences', disable=disable_tqdm):
        audio_slice = audio_segment[i:i + min_silence_len]
        if audio_slice.rms <= silence_thresh:
            silence_starts.append(i)

    # short circuit when there is no silence
    if not silence_starts:
        return []

    # combine the silence we detected into ranges (start ms - end ms)
    silent_ranges = []

    prev_i = silence_starts.pop(0)
    current_range_start = prev_i

    for silence_start_i in silence_starts:
        continuous = (silence_start_i == prev_i + seek_step)

        # sometimes two small blips are enough for one particular slice to be
        # non-silent, despite the silence all running together. Just combine
        # the two overlapping silent ranges.
        silence_has_gap = silence_start_i > (prev_i + min_silence_len)

        if not continuous and silence_has_gap:
            silent_ranges.append(
                [current_range_start, prev_i + min_silence_len])
            current_range_start = silence_start_i
        prev_i = silence_start_i

    silent_ranges.append([current_range_start, prev_i + min_silence_len])

    return silent_ranges
def detect_silence(audio_segment, min_silence_len=60, silence_thresh=20, koef=200):
    seg_len = len(audio_segment)
    if seg_len < min_silence_len:
        return []
    silence_thresh = db_to_float(silence_thresh) * audio_segment.max_possible_amplitude
    silence_thresh = silence_thresh//koef
    silence_starts = []

    slice_starts = seg_len - min_silence_len
    for i in range(slice_starts + 1):
        audio_slice = audio_segment[i:i+min_silence_len]
        if audio_slice.rms < silence_thresh:
            silence_starts.append(i)
    if not silence_starts:
        return []
 
    silent_ranges = []
    prev_i = silence_starts.pop(0)
    current_range_start = prev_i
 
    for silence_start_i in silence_starts:
        if silence_start_i - prev_i >= min_silence_len:
            silent_ranges.append([current_range_start,
                                  prev_i ])
            current_range_start = silence_start_i
        prev_i = silence_start_i
 
    silent_ranges.append([current_range_start,
                          prev_i + min_silence_len])
    silent_ranges1=[]
    for e in range(len(silent_ranges)):
        if silent_ranges[e][1]-silent_ranges[e][0]>=min_silence_len:
            silent_ranges1.append(silent_ranges[e])
        
    return silent_ranges1
Example #7
0
def normalize(seg, headroom=0.1):
    peak_sample_val = seg.max

    if peak_sample_val == 0:
        return seg

    target_peak = seg.max_possible_amplitude * db_to_float(-headroom)
    needed_boost = ratio_to_db(target_peak / peak_sample_val)
    return seg.apply_gain(needed_boost)
def normalize(seg, headroom=0.1):
    peak_sample_val = seg.max
    
    if peak_sample_val == 0:
        return seg
    
    target_peak = seg.max_possible_amplitude * db_to_float(-headroom)

    needed_boost = ratio_to_db(target_peak / peak_sample_val)
    return seg.apply_gain(needed_boost)
Example #9
0
  def get_normalize_gain(self, headroom=0.1):
    # cribbed from pydub's code
    peak_sample_val = self._seg.max

    # if the max is 0, this audio segment is silent, and can't be normalized
    if peak_sample_val == 0:
      return 0

    target_peak = self._seg.max_possible_amplitude * db_to_float(-headroom)

    return ratio_to_db(target_peak / peak_sample_val)
def filter_silence(audio_file):
    from pydub import AudioSegment
    from pydub.utils import db_to_float
    sound_file = AudioSegment.from_wav(audio_file)
    average_loudness = sound_file.rms
    print(average_loudness)
    silence_threshold = average_loudness * db_to_float(-1)
    print(silence_threshold)
    # filter out the silence
    audio_chunks = (ms for ms in sound_file if ms.rms > silence_threshold)
    return audio_chunks
Example #11
0
def main():
    audio = AudioSegment.from_wav("track2.wav")
    average_loudness = audio.rms
    audio = audio[:7500]
    print(average_loudness)
    silence_threshold = average_loudness * db_to_float(-10)
    print(silence_threshold)

    parts = (ms for ms in audio if ms.rms > silence_threshold)
    print("t")
    audio = reduce(lambda a, b: a + b, parts)
    print("e")
    audio.export("test.wav", format="wav")
Example #12
0
 def test_db_float_conversions(self):
     self.assertEqual(db_to_float(20), 10)
     self.assertEqual(db_to_float(10, using_amplitude=False), 10)
     self.assertEqual(db_to_float(0), 1)
     self.assertEqual(ratio_to_db(1), 0)
     self.assertEqual(ratio_to_db(10), 20)
     self.assertEqual(ratio_to_db(10, using_amplitude=False), 10)
     self.assertEqual(3, db_to_float(ratio_to_db(3)))
     self.assertEqual(12, ratio_to_db(db_to_float(12)))
     self.assertEqual(3, db_to_float(ratio_to_db(3, using_amplitude=False), using_amplitude=False))
     self.assertEqual(12, ratio_to_db(db_to_float(12, using_amplitude=False), using_amplitude=False))
Example #13
0
File: test.py Project: jiaaro/pydub
 def test_db_float_conversions(self):
     self.assertEqual(db_to_float(20), 10)
     self.assertEqual(db_to_float(10, using_amplitude=False), 10)
     self.assertEqual(db_to_float(0), 1)
     self.assertEqual(ratio_to_db(1), 0)
     self.assertEqual(ratio_to_db(10), 20)
     self.assertEqual(ratio_to_db(10, using_amplitude=False), 10)
     self.assertEqual(3, db_to_float(ratio_to_db(3)))
     self.assertEqual(12, ratio_to_db(db_to_float(12)))
     self.assertEqual(3, db_to_float(ratio_to_db(3, using_amplitude=False), using_amplitude=False))
     self.assertEqual(12, ratio_to_db(db_to_float(12, using_amplitude=False), using_amplitude=False))
def remove_silence(audio):
	# consider anything that is 30 decibels quieter than
	# the average volume of the podcast to be silence
	average_loudness = audio.rms
	silence_threshold = average_loudness * db_to_float(-30)

	# filter out the silence
	audio_parts = (ms for ms in audio if ms.rms > silence_threshold)

	# combine all the chunks back together
	try:
		audio_without_silence = reduce(lambda a, b: a + b, audio_parts)
	except:
		audio_without_silence = audio

	return audio_without_silence
Example #15
0
def detect_silence_at_beginning_and_end(audio_segment,
                                        min_silence_len=1000,
                                        silence_thresh=-16,
                                        seek_step=1):
    seg_len = len(audio_segment)

    # you can't have a silent portion of a sound that is longer than the sound
    if seg_len < min_silence_len:
        return []

    # convert silence threshold to a float value (so we can compare it to rms)
    silence_thresh = (db_to_float(silence_thresh) *
                      audio_segment.max_possible_amplitude)

    # check successive (1 sec by default) chunk of sound for silence
    # try a chunk at every "seek step" (or every chunk for a seek step == 1)
    last_slice_start = seg_len - min_silence_len
    slice_starts = range(0, last_slice_start + 1, seek_step)

    # guarantee last_slice_start is included in the range
    # to make sure the last portion of the audio is seached
    if last_slice_start % seek_step:
        slice_starts = itertools.chain(slice_starts, [last_slice_start])

    song_start = 0
    song_end = seg_len
    for i in slice_starts:
        audio_slice = audio_segment[i:i + min_silence_len]
        if audio_slice.rms > silence_thresh:
            if i == 0:
                song_start = 0
            else:
                song_start = i + min_silence_len
            break
    else:
        return [[0, 0], [song_end, song_end]]

    for i in reversed(slice_starts):
        audio_slice = audio_segment[i:i + min_silence_len]
        if audio_slice.rms > silence_thresh:
            if song_end == slice_starts[-1]:
                song_end = seg_len
            else:
                song_end = i
            break

    return [[0, song_start], [song_end, seg_len]]
def view(fn, thresh, silence_len):
    spf = wave.open(fn, 'r')

    # Extract Raw Audio from Wav File
    # signal = spf.readframes(-1)
    # signal = np.fromstring(signal, 'Int16')

    audio = AudioSegment.from_wav(fn)
    audio_volume = []
    for i in range(len(audio)):
        audio_volume.append(audio[i:i + silence_len].rms)

    # plt.figure(1)
    print(max(audio_volume))
    plt.axhline(db_to_float(thresh) * audio.max_possible_amplitude)
    plt.plot(audio_volume)
    plt.show()
def generate_random_noise(duration, gain, frame_width, sample_rate):
    bit_depth = 8 * frame_width
    minval, maxval = get_min_max_value(bit_depth)
    sample_width = get_frame_width(bit_depth)
    array_type = get_array_type(bit_depth)

    gain = db_to_float(gain)
    sample_count = int(sample_rate * (duration / 1000.0))

    data = ((np.random.rand(sample_count, 1) * 2) - 1.0) * maxval * gain

    return AudioSegment(data=data.astype(array_type).tobytes(),
                        metadata={
                            "channels": 1,
                            "sample_width": sample_width,
                            "frame_rate": sample_rate,
                            "frame_width": sample_width,
                        })
Example #18
0
def detect_silence(audio_segment, min_silence_len=50, silence_thresh=25,koef=3000):
    seg_len = len(audio_segment)
    if seg_len < min_silence_len:
        return []
    silence_thresh = db_to_float(silence_thresh) * audio_segment.max_possible_amplitude
    silence_thresh = silence_thresh//koef

    silence_starts = []

    slice_starts = seg_len - min_silence_len
    for i in range(slice_starts + 1):
        audio_slice = audio_segment[i:i+min_silence_len]
        if audio_slice.rms < silence_thresh:
            silence_starts.append(i)
    if not silence_starts:
        return []
 
    silent_ranges = []
    prev_i = silence_starts.pop(0)
    current_range_start = prev_i
 
    for silence_start_i in silence_starts:
        if silence_start_i != prev_i + 1:
            silent_ranges.append([current_range_start,
                                  prev_i ])
            current_range_start = silence_start_i
        prev_i = silence_start_i
 
    silent_ranges.append([current_range_start,
                          prev_i + min_silence_len])
    a=len(silent_ranges)
    for e in range(a):
        try:
            if silent_ranges[e][1]-silent_ranges[e][0]<min_silence_len:
                silent_ranges.pop(e)
        except:
            break
        
    return silent_ranges
Example #19
0
def detect_silence(audio_segment,
                   min_silence_len=60,
                   silence_thresh=20,
                   koef=200):
    seg_len = len(audio_segment)
    if seg_len < min_silence_len:
        return []
    silence_thresh = db_to_float(
        silence_thresh) * audio_segment.max_possible_amplitude
    silence_thresh = silence_thresh // koef
    silence_starts = []

    slice_starts = seg_len - min_silence_len
    for i in range(slice_starts + 1):
        audio_slice = audio_segment[i:i + min_silence_len]
        if audio_slice.rms < silence_thresh:
            silence_starts.append(i)
    if not silence_starts:
        return []

    silent_ranges = []
    prev_i = silence_starts.pop(0)
    current_range_start = prev_i

    for silence_start_i in silence_starts:
        if silence_start_i - prev_i >= min_silence_len:
            silent_ranges.append([current_range_start, prev_i])
            current_range_start = silence_start_i
        prev_i = silence_start_i

    silent_ranges.append([current_range_start, prev_i + min_silence_len])
    silent_ranges1 = []
    for e in range(len(silent_ranges)):
        if silent_ranges[e][1] - silent_ranges[e][0] >= min_silence_len:
            silent_ranges1.append(silent_ranges[e])

    return silent_ranges1
Example #20
0
	os.chdir(directory)
	for files in os.listdir("."):
		if files.endswith(".aiff"):
			directoryFiles += [files]
	return directoryFiles

originalDir = "/media/xicombd/Storage/Sound/UniIowa/Piano"
finalDir = originalDir
allFiles = getFiles(originalDir)

# Converts the chosen files
if not os.path.exists(finalDir):
    os.makedirs(finalDir)
os.chdir(finalDir)
for files in allFiles:
	print files
	# Let's load up the audio we need...
	track = AudioSegment.from_file(originalDir + "/" + files, format="aiff")
	# Let's consider anything that is 30 decibels quieter than
	# the average volume of the podcast to be silence
	average_loudness = track.rms
	silence_threshold = average_loudness * db_to_float(-1)
	# filter out the silence
	track_parts = (ms for ms in track if ms.rms > silence_threshold)
	# combine all the chunks back together
	track = reduce(lambda a, b: a + b, track_parts)
	# save the result
	fileName = files.split('.aiff')[0]
	track.export(fileName + ".wav", format="wav")

Example #21
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jun 23 14:20:56 2017

silence detector 

@author: Yan Jin
"""

from pydub import AudioSegment
from pydub.utils import db_to_float
from functools import reduce

audio = AudioSegment.from_wav(
    '/Users/mac/Downloads/avec2017/300_P/300_AUDIO.wav')

# the average volume of the audio
average_loudness = audio.rms

# anything that is 30 decibels quiter than the rms to be silence
silence_threshold = average_loudness * db_to_float(-30)

# filter out the silence
audio_silence = (ms for ms in audio if ms.rms > silence_threshold)

# combine all the chunks together
#audio_no_silence = reduce(lambda a, b: a+b, audio_silence)
Example #22
0
        stop = i
        # print("pass")
        if ( start != stop):
            chuck_start_end_list.append([start, stop])
    # build = chunk_list[i]
    #     build.export(("test/test{0}.mp3".format(str(i))), format="mp3")
    # build.export("test2.mp3", format="mp3")


    print((chuck_start_end_list))
    print("Chuck start stop list length: " + str(len(chuck_start_end_list)))
    print("silence threshold:\t" + str(silence_threshold))

    # For testing only -- prints segment sizes, showing larger segment
    large_chunks = 0
    for chunk in chuck_start_end_list:
        size = (chunk[1] - chunk[0])

        if size > 32:
            print("----------" + str(size))
            large_chunks += 1
        else:
            print(size)
    print(large_chunks)

if __name__ == '__main__':
    start_time = time.time()
    main()
    print("time:\t\t\t\t%s seconds" % str(time.time() - start_time))
    print(db_to_float(10))