def generate_utilities_read_file_audio(filename): duration_in_ms = 20 # generate noise noise = WhiteNoise(sample_rate=RATE, bit_depth=NUM_BYTES * BITS_PER_BYTE).to_audio_segment(duration=duration_in_ms) # save noise.export(filename, format="wav") return noise._data
def add_audio(fn, d): print("ADDING SOUND...") sound = WhiteNoise().to_audio_segment(duration=d) namesound = define_name("whiteNoise", ".mp3") sound.export(namesound, format="mp3") video = movie(fn) sonido = music(namesound) result = video + sonido namevid = define_name("noiseVid", ".mp4") result.save(namevid) os.remove(fn) os.remove(namesound)
def generate_record_multivoice_noise(filename): file_path, file_extension = path.splitext(filename) # generate text-to-speech speech1 = generate_speech("Hello", "en", file_path) speech2 = generate_speech("¿Cómo se va?", "es", file_path) speech = speech1 + speech2 # fix lengths while len(speech ) < MIN_SAMPLE_LENGTH * MILLISECONDS_PER_SECOND + LARGER_MARGIN: speech = speech + speech # add silence silence = AudioSegment.silent( duration=MAX_SAMPLE_LENGTH * MILLISECONDS_PER_SECOND + MARGIN).set_frame_rate(RATE) audio = silence + speech + silence # add noise noise = WhiteNoise(sample_rate=RATE, bit_depth=NUM_BYTES * BITS_PER_BYTE).to_audio_segment(duration=len(audio)) noise = noise - 30 audio = audio.overlay(noise) # save audio.export(filename, format="wav") start = milliseconds_to_bytes(len(silence)) end = milliseconds_to_bytes(len(silence) + len(speech)) return (start, end)
def generate_diarization_noise(filename): file_path, file_extension = path.splitext(filename) # generate text-to-speech speech1 = generate_speech("Hello, how are you?", 'en', file_path) speech2 = generate_speech("Hello, how are you?", 'es', file_path) # add silence silence = AudioSegment.silent( duration=MAX_SILENCE_LENGTH * MILLISECONDS_PER_SECOND - MARGIN).set_frame_rate(RATE) audio = speech1 + silence + speech2 # add noise noise = WhiteNoise(sample_rate=RATE, bit_depth=NUM_BYTES * BITS_PER_BYTE).to_audio_segment(duration=len(audio)) noise = noise - 30 audio = audio.overlay(noise) # save audio.export(filename, format="wav") start1 = 0 end1 = milliseconds_to_bytes(len(speech1)) start2 = end1 + milliseconds_to_bytes(len(silence)) end2 = start2 + milliseconds_to_bytes(len(speech2)) return ((start1, end1), (start2, end2))
def test_loudness(self): sine_dbfs = Sine(440).to_audio_segment().dBFS square_dbfs = Square(440).to_audio_segment().dBFS white_noise_dbfs = WhiteNoise().to_audio_segment().dBFS self.assertAlmostEqual(sine_dbfs, -3.0, places=1) self.assertAlmostEqual(square_dbfs, 0.0, places=1) self.assertAlmostEqual(white_noise_dbfs, -5, places=0)
def test_with_smoke(self): Sine(440).to_audio_segment() Square(440).to_audio_segment() Triangle(440).to_audio_segment() Pulse(440, duty_cycle=0.75).to_audio_segment() Sawtooth(440, duty_cycle=0.75).to_audio_segment() WhiteNoise().to_audio_segment()
def add_noise(aud_seg: AudioSegment, volume: float, **kwargs): """Add white noise of given volume to audio segment Args: aud_seg: audio segment to alter volume: volume of generated white noise in dBFS Note that this is a weird measurement where 0 is the max, the more negative the quieter it is """ white_noise = WhiteNoise().to_audio_segment(duration=len(aud_seg), volume=volume) return aud_seg.overlay(white_noise)
class AudioAddWhiteNoiseTransform(object): '''Bla bla ''' def __init__(self, volume=-20.0, sample_rate=44100): self.volume = volume self.white_noise_generator = WhiteNoise(sample_rate=sample_rate) def __call__(self, audio): '''Bla bla ''' audio_white_noise = self.white_noise_generator.to_audio_segment( duration=len(audio), volume=self.volume) return audio.overlay(audio_white_noise)
def generate_diarization_single_noise(filename): file_path, file_extension = path.splitext(filename) # generate text-to-speech speech = generate_speech("Hello, how are you?", 'en', file_path) # add noise noise = WhiteNoise(sample_rate=RATE, bit_depth=NUM_BYTES * BITS_PER_BYTE).to_audio_segment(duration=len(speech)) noise = noise - 30 audio = speech.overlay(noise) # save audio.export(filename, format="wav") start = 0 end = milliseconds_to_bytes(len(speech)) return (start, end)
def padding(wav, white_noise_duration): # print("WAV FILE: " + wav) for x in white_noise_duration: if x == 0: wav_files = [] padded_fname = wav.rsplit(".", 1)[0] # print("PADDED NAME: " + padded_fname) silence_duration = max(white_noise_duration) # print(padded_fname+"_whitenoise.wav") # convert sampling rate, bits per sample, audio channel subprocess.call([ "ffmpeg", "-i", wav, "-ar", "44100", "-ac", "2", padded_fname + "_converted.wav", "-y", ]) # white noise duration should be a list e.g [0,1] # generate white noise wav file wn = WhiteNoise().to_audio_segment(duration=silence_duration * 1000) wn.export( padded_fname + "_whitenoise.wav", format="wav", parameters=["-ar", "16000"], ) # stitch white noise wav file to specific audio wav file # before new_wav_before = AudioSegment.from_wav( padded_fname + "_whitenoise.wav") + AudioSegment.from_wav(padded_fname + "_converted.wav") new_wav_before.export( padded_fname + "_padded" + "_" + str(white_noise_duration[1]) + "_" + str(white_noise_duration[0]) + ".wav", format="wav", parameters=["-ar", "16000"], ) # after new_wav_after = AudioSegment.from_wav( padded_fname + "_converted.wav") + AudioSegment.from_wav(padded_fname + "_whitenoise.wav") new_wav_after.export( padded_fname + "_padded" + "_" + str(white_noise_duration[0]) + "_" + str(white_noise_duration[1]) + ".wav", format="wav", parameters=["-ar", "16000"], ) # remove white noise wav file os.remove(padded_fname + "_whitenoise.wav") os.remove(padded_fname + "_converted.wav") wav_files.append(padded_fname + "_padded" + "_" + str(white_noise_duration[1]) + "_" + str(white_noise_duration[0]) + ".wav") wav_files.append(padded_fname + "_padded" + "_" + str(white_noise_duration[0]) + "_" + str(white_noise_duration[1]) + ".wav") break else: wav_files = [] padded_fname = (wav.rsplit(".", 1)[0]).split("/")[-1] # print("PADDED FILENAME: " + padded_fname) path = (wav.rsplit(".", 1)[0]).rsplit("/", 1)[0] # print("PATH: "+ path) fn = (wav.rsplit(".", 1)[0]).rsplit("/", 1)[1] # print("FILENAME: " + fn) # white noise duration should be a list e.g [0,1] # generate white noise wav file # wn_0 = AudioSegment.silent(duration=white_noise_duration[0] * 1000) wn_0 = WhiteNoise().to_audio_segment( duration=white_noise_duration[0] * 1000) wn_0.export(wav + "_whitenoise_0.wav", format="wav", parameters=["-ar", "16000"]) # wn_1 = AudioSegment.silent(duration=white_noise_duration[1] * 1000) wn_1 = WhiteNoise().to_audio_segment( duration=white_noise_duration[1] * 1000) wn_1.export(wav + "_whitenoise_1.wav", format="wav", parameters=["-ar", "16000"]) # stitch white noise wav file to specific audio wav file new_wav = (AudioSegment.from_wav(wav + "_whitenoise_0.wav") + AudioSegment.from_wav(wav) + AudioSegment.from_wav(wav + "_whitenoise_1.wav")) new_wav.export( path + "/" + padded_fname + "_padded" + "_" + str(white_noise_duration[0]) + "_" + str(white_noise_duration[1]) + ".wav", format="wav", parameters=["-ar", "16000"], ) # after new_wav_reverse = ( AudioSegment.from_wav(wav + "_whitenoise_1.wav") + AudioSegment.from_wav(wav) + AudioSegment.from_wav(wav + "_whitenoise_0.wav")) new_wav_reverse.export( path + "/" + padded_fname + "_padded" + "_" + str(white_noise_duration[1]) + "_" + str(white_noise_duration[0]) + ".wav", format="wav", parameters=["-ar", "16000"], ) # remove white noise wav file os.remove(wav + "_whitenoise_0.wav") os.remove(wav + "_whitenoise_1.wav") wav_files.append(path + "/" + padded_fname + "_padded" + "_" + str(white_noise_duration[0]) + "_" + str(white_noise_duration[1]) + ".wav") wav_files.append(path + "/" + padded_fname + "_padded" + "_" + str(white_noise_duration[1]) + "_" + str(white_noise_duration[0]) + ".wav") # If adding to one folder, specify the path of folder! # new_wav.export("output_/"+fn+"_padded"+"_"+str(white_noise_duration[0])+"_"+str(white_noise_duration[1])+".wav", format="wav", parameters=["-ar", "16000"]) # new_wav_reverse.export("output_/"+fn+"_padded"+"_"+str(white_noise_duration[1])+"_"+str(white_noise_duration[0])+".wav", format="wav", parameters=["-ar", "16000"]) break return wav_files
def voice(a): #a=input("Write:") word = ipa.convert(a) word = list(word) w_size = len(word) w_corr = ["aɪ", "aʊ", "eɪ", "oʊ", "ɔɪ", "eə", "ɪə", "ʊə", "dʒ", "tʃ", "əʊ"] t_word = [] t = 0 for i in range(w_size - 1): t_word = word[i] + word[i + 1] for j in range(10): if (t_word == w_corr[j]): word[i] = w_corr[j] t = t + 1 for k in range(w_size - i - 2): word[i + 1 + k] = word[i + 2 + k] word = word[0:w_size - t] quote = "ˈ" y = 0 for i in range(len(word)): if (word[i] == quote): y = y + 1 for k in range(len(word) - i - 1): word[i + k] = word[i + 1 + k] word = word[0:len(word) - y] print(word) dur = (len(word) / 5) * 1200 #sound = AudioSegment.silent(duration=dur) sound = WhiteNoise().to_audio_segment(duration=dur) - 70 pos = 0 for i in range(len(word)): pos = pos + 120 if ((word[i] == " ") | (word[i] == "*")): word[i] = "_" vowel = [ 'su', 'his', 'her', 'their', 'ella', 'el', 'ellos', 'ellas', 'it' ] for j in range(len(vowel)): if word[i] in vowel: sound1 = AudioSegment.from_mp3( '/home/david/Escritorio/Proyectos/lou/' + str(word[i]) + '.wav') + 10 sound1 = AudioSegment.from_mp3( '/home/david/Escritorio/Proyectos/lou/' + str(word[i]) + '.wav') sound = sound.overlay(sound1, position=pos) play(sound) octaves = 0.09 new_sample_rate = int(sound.frame_rate * (2.0**octaves)) lowpitch_sound = sound._spawn(sound.raw_data, overrides={'frame_rate': new_sample_rate}) play(lowpitch_sound)
def generate_white_noise(self, noise_duration, reduction=10): noise = WhiteNoise().to_audio_segment( duration=noise_duration).set_frame_rate( int(self.audio_info['sample_rate'])) return noise - 10
async def distort(audiof: FileAudioSegment): noise = WhiteNoise().to_audio_segment(duration=len(audiof.track)) return audiof.track.overlay(noise)
def extract_audio(label_list, audio_seg, base_file, label_dict, file): unique_speaker = set(map(lambda x: x['speaker'], label_list)) l = len(audio_seg) dir_name = "{}/{}/".format(base_file, 'new_data') audio_db = audio_seg.dBFS if not os.path.exists(dir_name): os.mkdir(dir_name) label_dict['{}.wav'.format(file)] = { 'labels': label_list, 'no_speakers': len(unique_speaker)} with open("{}/{}/{}.json".format(base_file, 'new_data', file), 'w') as outfile: json.dump(label_dict, outfile) # Export default file default_file_name = "{}/{}/{}.wav".format( base_file, 'new_data/default', file) if not os.path.exists("{}/{}/".format(base_file, 'new_data/default')): os.mkdir("{}/{}/".format(base_file, 'new_data/default')) audio_seg.export(default_file_name, format='wav') # Export white noise file for pct in WN_PCT: noise_file_name = "{}/{}/{}_noise{}.wav".format( base_file, 'new_data/noise' + str(pct), file, pct) if not os.path.exists("{}/{}/".format(base_file, 'new_data/noise' + str(pct))): os.mkdir("{}/{}/".format(base_file, 'new_data/noise' + str(pct))) pct = pct / 100 wn_db = (1 + (1 - pct)) * audio_db noise = WhiteNoise().to_audio_segment(duration=len(audio_seg)).apply_gain(wn_db) noise_audio_seg = audio_seg.overlay(noise) noise_audio_seg.export(noise_file_name, format='wav') # Export random overlay file sound_effect_list = os.listdir('sound_fx') rnd_fx = sound_effect_list[random.randrange( len(sound_effect_list))] print(rnd_fx) random_effect = AudioSegment.from_file('sound_fx/' + rnd_fx) fx_audio_seg = audio_seg.overlay( (random_effect).apply_gain(audio_db * 0.4), loop=True) fx_file_name = "{}/{}/{}_fx_overlay.wav".format( base_file, 'new_data/fx_overlay', file) if not os.path.exists("{}/{}/".format(base_file, 'new_data/fx_overlay')): os.mkdir("{}/{}/".format(base_file, 'new_data/fx_overlay')) fx_audio_seg.export(fx_file_name, format='wav') # # Export random overlay + WN file # overlay_noise_file_name = "{}/{}/{}_noise_overlay.wav".format( # base_file, 'new_data/noise_overlay', file) # if not os.path.exists("{}/{}/".format(base_file, 'new_data/noise_overlay')): # os.mkdir("{}/{}/".format(base_file, # 'new_data/noise_overlay')) # wn_db = (1 + (1 - 0.5)) * audio_db # noise = WhiteNoise().to_audio_segment(duration=len(audio_seg)).apply_gain(wn_db) # fx_audio_seg_noise = fx_audio_seg.overlay(noise) # fx_audio_seg_noise.export(overlay_noise_file_name, format='wav') # # Well, rip # extreme_seg = audio_seg # extreme_file_name = "{}/{}/{}_extreme.wav".format( # base_file, 'new_data/extreme', file) # if not os.path.exists("{}/{}/".format(base_file, 'new_data/extreme')): # os.mkdir("{}/{}/".format(base_file, # 'new_data/extreme')) # for _ in range(3): # rnd_fx = sound_effect_list[random.randrange( # len(sound_effect_list))] # random_effect = AudioSegment.from_file('sound_fx/' + rnd_fx) # extreme_seg = extreme_seg.overlay( # (random_effect * math.ceil(l / len(random_effect))).apply_gain(audio_db * 0.6)) # wn_db = (1 + (1 - 0.5)) * audio_db # noise = WhiteNoise().to_audio_segment(duration=len(audio_seg)).apply_gain(wn_db) # extreme_seg = extreme_seg.overlay(noise) # extreme_seg.export(extreme_file_name, format='wav') print('File {} exported'.format(file))
from pydub import AudioSegment #to save whitenoise to audio_segment from pydub.generators import WhiteNoise #to generate white noise from pydub.playback import play #needed to play the audio segement #from threading import Thread #for async background #whitenoise duration duration = 5000 #duration in millisec wn = WhiteNoise().to_audio_segment(duration=duration)-60 #def play_white_noise(segment,duration): #""" play whitenoise for given duration """ play(wn) #instantiate thread #white_noise = Thread(target=play_white_noise, args=(wn,duration)) #start the thread #white_noise.start()
def pad_tokens(sig=None): """ Pad token with leading and trailing whitespace to fit a given length""" if sig: sound = AudioSegment.from_file(sig) else: sound = AudioSegment.from_file(FLAGS.i) samples = numpy.array(sound.get_array_of_samples()) file_export_name = None # Do we have enough samples for our given time? if sig: desired_length = sound.frame_rate * int(1000 / 1000) else: desired_length = sound.frame_rate * int(FLAGS.l / 1000) if len(samples) <= desired_length: # Segment of white noise to be appended wn = numpy.array( WhiteNoise(sound.frame_rate).to_audio_segment( (1000), -60).get_array_of_samples()) #wn = numpy.zeros(sound.frame_rate) # Samples padded to beginning left_samples = int(numpy.ceil(abs(len(samples) - desired_length) / 2)) pad_left = wn[0:left_samples] # Samples padded to end right_samples = int(numpy.floor( abs(len(samples) - desired_length) / 2)) pad_right = wn[0:right_samples] # Concatenate arrays of samples padded_sound = numpy.concatenate([pad_left, samples, pad_right]) #Convert back to AudioSegment then export audio_segment = AudioSegment( padded_sound.tobytes(), frame_rate=sound.frame_rate, sample_width=sound.sample_width, channels=1, ) if sig is None: file_export_name = "pad_" + os.path.basename(FLAGS.i) else: # Trim edges for sounds longer than our desired length excess = len(samples) - desired_length trim_left = int(numpy.ceil(abs(len(samples) - desired_length) / 2)) trim_right = int(numpy.floor(abs(len(samples) - desired_length) / 2)) trimmed_sound = samples[trim_left:-trim_right] #Convert back to AudioSegment then export audio_segment = AudioSegment( trimmed_sound.tobytes(), frame_rate=sound.frame_rate, sample_width=sound.sample_width, channels=1, ) if sig is None: file_export_name = "trim_" + os.path.basename(FLAGS.i) # Export completed file if sig: file_handle = audio_segment.export("tmp.wav2", format="wav") else: file_handle = audio_segment.export(file_export_name, format="wav")
start_point = random.randint(0, len(song) - duration) end_point = start_point + duration print("Sliced song is from %s to %s." % (start_point, end_point)) # Saves the sliced song to a file on disk. sliced_song = song[start_point:end_point] sliced_song.export("tmp1.mp3", format="mp3") print("The sliced song has been saved to tmp1.mp3 temporarily.") # Attempts to recognize. if recognize_from_file("tmp1.mp3", original_song_name): cut_count += 1 # Creates a strong noise. noise_duration = random.randint(0, duration // strong_noise_max) noise = WhiteNoise().to_audio_segment(noise_duration) decreased_noise = noise - strong_noise_volume # Adds noise to the sound. start_point = random.randint(0, duration - noise_duration) noise_song = sliced_song.overlay(decreased_noise, position=start_point) noise_song.export("tmp2.mp3", format="mp3") # Attempts to recognize. if recognize_from_file("tmp2.mp3", original_song_name): strong_noise_count += 1 # Creates a weak noise. noise_duration = random.randint(0, duration // weak_noise_max) noise = WhiteNoise().to_audio_segment(noise_duration) decreased_noise = noise - weak_noise_volume
def apply_gain_to_file(src_file_obj: AudioSegment, gained_file_path, gained_file_format): noise = WhiteNoise().to_audio_segment(duration=len(src_file_obj)) noise = noise.apply_gain(GAIN_DENORM_VAL) combined = src_file_obj.overlay(noise) combined.export(gained_file_path, format=gained_file_format) assert os.path.exists(gained_file_path)
def main(args): urbansound_folder = args.urbansound_dir urbansound_dogbark_data_folder = urbansound_folder + os.sep + 'data/dog_bark' urbansound_graph_folder = urbansound_folder + os.sep + 'graph' urbansound_dogbark_graph_folder = urbansound_graph_folder + os.sep + 'positive' urbansound_other_graph_folder = urbansound_graph_folder + os.sep + 'negative' if not os.path.exists(urbansound_graph_folder): os.mkdir(urbansound_graph_folder) if not os.path.exists(urbansound_dogbark_graph_folder): os.mkdir(urbansound_dogbark_graph_folder) if not os.path.exists(urbansound_other_graph_folder): os.mkdir(urbansound_other_graph_folder) urbansound_other_data_folders = [urbansound_folder + os.sep + 'data/air_conditioner', urbansound_folder + os.sep + 'data/car_horn', \ urbansound_folder + os.sep + 'data/children_playing', urbansound_folder + os.sep + 'data/drilling', \ urbansound_folder + os.sep + 'data/engine_idling', urbansound_folder + os.sep + 'data/gun_shot', \ urbansound_folder + os.sep + 'data/jackhammer', urbansound_folder + os.sep + 'data/siren', \ urbansound_folder + os.sep + 'data/street_music'] SECOND_MS = 500 #1000 SEGMENT_MS = 500 #2000 ASSIGNED_SAMPLERATE = 44100 ESC50_AUDIO_START_POS = 500 POSITIVE_SAMPLE_DB_TH = -40.0 print('creating positive training set ..') idx = 0 for file in os.listdir(urbansound_dogbark_data_folder): filename, extension = os.path.splitext(file) if extension == '.wav' or extension == '.ogg' or extension == '.mp3' or extension == '.flac' or extension == '.aif' or extension == '.aiff': # open sound file audiopath = urbansound_dogbark_data_folder + os.sep + file print(audiopath) audio = AudioSegment.from_file(audiopath).set_frame_rate( ASSIGNED_SAMPLERATE).set_channels(1).set_sample_width(2)[:] # open csv file csvpath = urbansound_dogbark_data_folder + os.sep + filename + '.csv' csv = open(csvpath, 'r') lines = csv.readlines() for line in lines: start = float(line.split(',')[0]) * SECOND_MS end = float(line.split(',')[1]) * SECOND_MS chunk1 = (end - start) / 10 current = start while 1: outfile = urbansound_dogbark_graph_folder + os.sep + str( idx) + '_dogbark.wav' idx += 1 audioclip = audio[current:current + SEGMENT_MS] if len(audioclip) != SEGMENT_MS: lack = SEGMENT_MS - len( audioclip) + 100 # 100 for default crossfade noiseclip = WhiteNoise().to_audio_segment( duration=lack, volume=-50) lastclip = audioclip.append(noiseclip) if lastclip.dBFS > POSITIVE_SAMPLE_DB_TH: lastclip.export(outfile, format='wav') break else: if audioclip.dBFS > POSITIVE_SAMPLE_DB_TH: audioclip.export(outfile, format='wav') current += SEGMENT_MS chunk2 = end - current if chunk2 < chunk1: break # if current > end: # break csv.close()
def __init__(self, volume=-20.0, sample_rate=44100): self.volume = volume self.white_noise_generator = WhiteNoise(sample_rate=sample_rate)
def main(args): urbansound_folder = args.urbansound_dir urbansound_dogbark_data_folder = urbansound_folder + os.sep + 'data/dog_bark' urbansound_graph_folder = urbansound_folder + os.sep + 'graph' urbansound_dogbark_graph_folder = urbansound_graph_folder + os.sep + 'positive' urbansound_other_graph_folder = urbansound_graph_folder + os.sep + 'negative' esc50_folder = args.esc50_dir esc50_graph_folder = esc50_folder + os.sep + 'graph' esc50_dogbark_graph_folder = esc50_graph_folder + os.sep + 'positive' esc50_other_graph_folder = esc50_graph_folder + os.sep + 'negative' building_106_kitchen_folder = args.kitchen106_dir building_106_kitchen_graph_folder = building_106_kitchen_folder + os.sep + 'graph' building_106_kitchen_other_graph_folder = building_106_kitchen_graph_folder + os.sep + 'negative' print esc50_dogbark_graph_folder print building_106_kitchen_other_graph_folder if not os.path.exists(urbansound_graph_folder): os.mkdir(urbansound_graph_folder, 0755) if not os.path.exists(urbansound_dogbark_graph_folder): os.mkdir(urbansound_dogbark_graph_folder, 0755) if not os.path.exists(urbansound_other_graph_folder): os.mkdir(urbansound_other_graph_folder, 0755) if not os.path.exists(esc50_graph_folder): os.mkdir(esc50_graph_folder, 0755) if not os.path.exists(esc50_dogbark_graph_folder): os.mkdir(esc50_dogbark_graph_folder, 0755) if not os.path.exists(esc50_other_graph_folder): os.mkdir(esc50_other_graph_folder, 0755) if not os.path.exists(building_106_kitchen_graph_folder): os.mkdir(building_106_kitchen_graph_folder, 0755) if not os.path.exists(building_106_kitchen_other_graph_folder): os.mkdir(building_106_kitchen_other_graph_folder, 0755) urbansound_other_data_folders = [urbansound_folder + os.sep + 'data/air_conditioner', urbansound_folder + os.sep + 'data/car_horn', \ urbansound_folder + os.sep + 'data/children_playing', urbansound_folder + os.sep + 'data/drilling', \ urbansound_folder + os.sep + 'data/engine_idling', urbansound_folder + os.sep + 'data/gun_shot', \ urbansound_folder + os.sep + 'data/jackhammer', urbansound_folder + os.sep + 'data/siren', \ urbansound_folder + os.sep + 'data/street_music'] building_106_kitchen_other_data_folders = [building_106_kitchen_folder + os.sep + 'training_segments/bag', \ building_106_kitchen_folder + os.sep + 'training_segments/blender', building_106_kitchen_folder + os.sep + 'training_segments/cornflakes_bowl', \ building_106_kitchen_folder + os.sep + 'training_segments/cornflakes_eating', building_106_kitchen_folder + os.sep + 'training_segments/cup', \ building_106_kitchen_folder + os.sep + 'training_segments/dish_washer', building_106_kitchen_folder + os.sep + 'training_segments/electric_razor', \ building_106_kitchen_folder + os.sep + 'training_segments/flatware_sorting', building_106_kitchen_folder + os.sep + 'training_segments/food_processor', \ building_106_kitchen_folder + os.sep + 'training_segments/hair_dryer', building_106_kitchen_folder + os.sep + 'training_segments/microwave', \ building_106_kitchen_folder + os.sep + 'training_segments/microwave_bell', building_106_kitchen_folder + os.sep + 'training_segments/microwave_door', \ building_106_kitchen_folder + os.sep + 'training_segments/plates_sorting', building_106_kitchen_folder + os.sep + 'training_segments/stirring_cup', \ building_106_kitchen_folder + os.sep + 'training_segments/toaster_up_down', building_106_kitchen_folder + os.sep + 'training_segments/toilet_button', \ building_106_kitchen_folder + os.sep + 'training_segments/toilet_flush', building_106_kitchen_folder + os.sep + 'training_segments/tooth', \ building_106_kitchen_folder + os.sep + 'training_segments/vacuum_cleaner', building_106_kitchen_folder + os.sep + 'training_segments/washing_machine', \ building_106_kitchen_folder + os.sep + 'training_segments/water_boiler', building_106_kitchen_folder + os.sep + 'training_segments/water_tap'] SECOND_MS = 1000 SEGMENT_MS = 2000 ASSIGNED_SAMPLERATE = 44100 ESC50_AUDIO_START_POS = 500 POSITIVE_SAMPLE_DB_TH = -40.0 print 'creating positive training set ..' idx = 0 for file in os.listdir(urbansound_dogbark_data_folder): filename, extension = os.path.splitext(file) if extension == '.wav' or extension == '.ogg' or extension == '.mp3' or extension == '.flac' or extension == '.aif' or extension == '.aiff': # open sound file audiopath = urbansound_dogbark_data_folder + os.sep + file print audiopath audio = AudioSegment.from_file(audiopath).set_frame_rate( ASSIGNED_SAMPLERATE).set_channels(1).set_sample_width(2)[:] # open csv file csvpath = urbansound_dogbark_data_folder + os.sep + filename + '.csv' csv = open(csvpath, 'r') lines = csv.readlines() for line in lines: start = float(line.split(',')[0]) * SECOND_MS end = float(line.split(',')[1]) * SECOND_MS chunk1 = (end - start) / 10 current = start while 1: outfile = urbansound_dogbark_graph_folder + os.sep + str( idx) + '_dogbark.wav' idx += 1 audioclip = audio[current:current + SEGMENT_MS] if len(audioclip) != SEGMENT_MS: lack = SEGMENT_MS - len( audioclip) + 100 # 100 for default crossfade noiseclip = WhiteNoise().to_audio_segment( duration=lack, volume=-50) lastclip = audioclip.append(noiseclip) if lastclip.dBFS > POSITIVE_SAMPLE_DB_TH: lastclip.export(outfile, format='wav') break else: if audioclip.dBFS > POSITIVE_SAMPLE_DB_TH: audioclip.export(outfile, format='wav') current += SEGMENT_MS chunk2 = end - current if chunk2 < chunk1: break # if current > end: # break csv.close() print 'creating negative training set ..' idx = 0 for other_data_folder in urbansound_other_data_folders: for file in os.listdir(other_data_folder): filename, extension = os.path.splitext(file) if extension == '.wav' or extension == '.ogg' or extension == '.mp3' or extension == '.flac' or extension == '.aif' or extension == '.aiff': # open sound file audiopath = other_data_folder + os.sep + file print audiopath try: audio = AudioSegment.from_file(audiopath).set_frame_rate( ASSIGNED_SAMPLERATE).set_channels(1).set_sample_width( 2)[:] num_segment = len(audio) / SEGMENT_MS for i in range(0, num_segment): if i % 4 == 0: # less sample :) outfile = urbansound_other_graph_folder + os.sep + str( idx) + '_other.wav' idx += 1 audio[i * SEGMENT_MS:(i + 1) * SEGMENT_MS].export( outfile, format='wav') except: print 'failed to load this one ^^^^^' print 'creating test set ..' idx = 0 csvpath = esc50_folder + os.sep + 'meta' + os.sep + 'esc50.csv' csv = open(csvpath, 'r') lines = csv.readlines() for line in lines[1:]: filename = line.split(',')[0] audiopath = esc50_folder + os.sep + 'audio' + os.sep + filename print audiopath audio = AudioSegment.from_file(audiopath)[:] audio = audio.set_frame_rate(ASSIGNED_SAMPLERATE) audio = audio.set_channels(1) if line.split(',')[3] == 'dog': outfile = esc50_dogbark_graph_folder + os.sep + str( idx) + '_dogbark.wav' else: outfile = esc50_other_graph_folder + os.sep + str( idx) + '_other.wav' idx += 1 audio[ESC50_AUDIO_START_POS:ESC50_AUDIO_START_POS + SEGMENT_MS].export( outfile, format='wav') csv.close() print 'creating more negative samples' idx = 0 for other_data_folder in building_106_kitchen_other_data_folders: for file in os.listdir(other_data_folder): filename, extension = os.path.splitext(file) if extension == '.wav' or extension == '.ogg' or extension == '.mp3' or extension == '.flac' or extension == '.aif' or extension == '.aiff': # open sound file audiopath = other_data_folder + os.sep + file print audiopath try: audio = AudioSegment.from_file(audiopath).set_frame_rate( ASSIGNED_SAMPLERATE).set_channels(1).set_sample_width( 2)[:] outfile = building_106_kitchen_other_graph_folder + os.sep + str( idx) + '_other.wav' idx += 1 audio[0:SEGMENT_MS].export(outfile, format='wav') except: print 'failed to load this one ^^^^^'