Example #1
0
def generate_utilities_read_file_audio(filename):
    duration_in_ms = 20

    # generate noise
    noise = WhiteNoise(sample_rate=RATE, bit_depth=NUM_BYTES *
                       BITS_PER_BYTE).to_audio_segment(duration=duration_in_ms)

    # save
    noise.export(filename, format="wav")

    return noise._data
Example #2
0
def add_audio(fn, d):
    print("ADDING SOUND...")
    sound = WhiteNoise().to_audio_segment(duration=d)
    namesound = define_name("whiteNoise", ".mp3")
    sound.export(namesound, format="mp3")
    video = movie(fn)
    sonido = music(namesound)
    result = video + sonido
    namevid = define_name("noiseVid", ".mp4")
    result.save(namevid)
    os.remove(fn)
    os.remove(namesound)
Example #3
0
def generate_record_multivoice_noise(filename):
    file_path, file_extension = path.splitext(filename)

    # generate text-to-speech
    speech1 = generate_speech("Hello", "en", file_path)
    speech2 = generate_speech("¿Cómo se va?", "es", file_path)
    speech = speech1 + speech2

    # fix lengths
    while len(speech
              ) < MIN_SAMPLE_LENGTH * MILLISECONDS_PER_SECOND + LARGER_MARGIN:
        speech = speech + speech

    # add silence
    silence = AudioSegment.silent(
        duration=MAX_SAMPLE_LENGTH * MILLISECONDS_PER_SECOND +
        MARGIN).set_frame_rate(RATE)
    audio = silence + speech + silence

    # add noise
    noise = WhiteNoise(sample_rate=RATE, bit_depth=NUM_BYTES *
                       BITS_PER_BYTE).to_audio_segment(duration=len(audio))
    noise = noise - 30
    audio = audio.overlay(noise)

    # save
    audio.export(filename, format="wav")

    start = milliseconds_to_bytes(len(silence))
    end = milliseconds_to_bytes(len(silence) + len(speech))

    return (start, end)
Example #4
0
def generate_diarization_noise(filename):
    file_path, file_extension = path.splitext(filename)

    # generate text-to-speech
    speech1 = generate_speech("Hello, how are you?", 'en', file_path)
    speech2 = generate_speech("Hello, how are you?", 'es', file_path)

    # add silence
    silence = AudioSegment.silent(
        duration=MAX_SILENCE_LENGTH * MILLISECONDS_PER_SECOND -
        MARGIN).set_frame_rate(RATE)
    audio = speech1 + silence + speech2

    # add noise
    noise = WhiteNoise(sample_rate=RATE, bit_depth=NUM_BYTES *
                       BITS_PER_BYTE).to_audio_segment(duration=len(audio))
    noise = noise - 30
    audio = audio.overlay(noise)

    # save
    audio.export(filename, format="wav")

    start1 = 0
    end1 = milliseconds_to_bytes(len(speech1))
    start2 = end1 + milliseconds_to_bytes(len(silence))
    end2 = start2 + milliseconds_to_bytes(len(speech2))

    return ((start1, end1), (start2, end2))
Example #5
0
    def test_loudness(self):
        sine_dbfs = Sine(440).to_audio_segment().dBFS
        square_dbfs = Square(440).to_audio_segment().dBFS
        white_noise_dbfs = WhiteNoise().to_audio_segment().dBFS

        self.assertAlmostEqual(sine_dbfs, -3.0, places=1)
        self.assertAlmostEqual(square_dbfs, 0.0, places=1)
        self.assertAlmostEqual(white_noise_dbfs, -5, places=0)
Example #6
0
    def test_with_smoke(self):
        Sine(440).to_audio_segment()
        Square(440).to_audio_segment()
        Triangle(440).to_audio_segment()

        Pulse(440, duty_cycle=0.75).to_audio_segment()
        Sawtooth(440, duty_cycle=0.75).to_audio_segment()

        WhiteNoise().to_audio_segment()
def add_noise(aud_seg: AudioSegment, volume: float, **kwargs):
    """Add white noise of given volume to audio segment

    Args:
        aud_seg: audio segment to alter
        volume: volume of generated white noise in dBFS
            Note that this is a weird measurement
            where 0 is the max, the more negative
            the quieter it is
    """
    white_noise = WhiteNoise().to_audio_segment(duration=len(aud_seg),
                                                volume=volume)
    return aud_seg.overlay(white_noise)
Example #8
0
class AudioAddWhiteNoiseTransform(object):
    '''Bla bla

    '''
    def __init__(self, volume=-20.0, sample_rate=44100):
        self.volume = volume
        self.white_noise_generator = WhiteNoise(sample_rate=sample_rate)

    def __call__(self, audio):
        '''Bla bla

        '''
        audio_white_noise = self.white_noise_generator.to_audio_segment(
            duration=len(audio), volume=self.volume)
        return audio.overlay(audio_white_noise)
Example #9
0
def generate_diarization_single_noise(filename):
    file_path, file_extension = path.splitext(filename)

    # generate text-to-speech
    speech = generate_speech("Hello, how are you?", 'en', file_path)

    # add noise
    noise = WhiteNoise(sample_rate=RATE, bit_depth=NUM_BYTES *
                       BITS_PER_BYTE).to_audio_segment(duration=len(speech))
    noise = noise - 30
    audio = speech.overlay(noise)

    # save
    audio.export(filename, format="wav")

    start = 0
    end = milliseconds_to_bytes(len(speech))

    return (start, end)
Example #10
0
def padding(wav, white_noise_duration):
    # print("WAV FILE: " + wav)
    for x in white_noise_duration:
        if x == 0:
            wav_files = []
            padded_fname = wav.rsplit(".", 1)[0]
            # print("PADDED NAME: " + padded_fname)
            silence_duration = max(white_noise_duration)
            # print(padded_fname+"_whitenoise.wav")

            # convert sampling rate, bits per sample, audio channel
            subprocess.call([
                "ffmpeg",
                "-i",
                wav,
                "-ar",
                "44100",
                "-ac",
                "2",
                padded_fname + "_converted.wav",
                "-y",
            ])

            # white noise duration should be a list e.g [0,1]
            # generate white noise wav file
            wn = WhiteNoise().to_audio_segment(duration=silence_duration *
                                               1000)
            wn.export(
                padded_fname + "_whitenoise.wav",
                format="wav",
                parameters=["-ar", "16000"],
            )

            # stitch white noise wav file to specific audio wav file
            # before
            new_wav_before = AudioSegment.from_wav(
                padded_fname +
                "_whitenoise.wav") + AudioSegment.from_wav(padded_fname +
                                                           "_converted.wav")
            new_wav_before.export(
                padded_fname + "_padded" + "_" + str(white_noise_duration[1]) +
                "_" + str(white_noise_duration[0]) + ".wav",
                format="wav",
                parameters=["-ar", "16000"],
            )

            # after
            new_wav_after = AudioSegment.from_wav(
                padded_fname +
                "_converted.wav") + AudioSegment.from_wav(padded_fname +
                                                          "_whitenoise.wav")
            new_wav_after.export(
                padded_fname + "_padded" + "_" + str(white_noise_duration[0]) +
                "_" + str(white_noise_duration[1]) + ".wav",
                format="wav",
                parameters=["-ar", "16000"],
            )

            # remove white noise wav file
            os.remove(padded_fname + "_whitenoise.wav")
            os.remove(padded_fname + "_converted.wav")
            wav_files.append(padded_fname + "_padded" + "_" +
                             str(white_noise_duration[1]) + "_" +
                             str(white_noise_duration[0]) + ".wav")
            wav_files.append(padded_fname + "_padded" + "_" +
                             str(white_noise_duration[0]) + "_" +
                             str(white_noise_duration[1]) + ".wav")
            break
        else:
            wav_files = []
            padded_fname = (wav.rsplit(".", 1)[0]).split("/")[-1]
            # print("PADDED FILENAME: " + padded_fname)
            path = (wav.rsplit(".", 1)[0]).rsplit("/", 1)[0]
            # print("PATH: "+ path)
            fn = (wav.rsplit(".", 1)[0]).rsplit("/", 1)[1]
            # print("FILENAME: " + fn)

            # white noise duration should be a list e.g [0,1]
            # generate white noise wav file
            # wn_0 = AudioSegment.silent(duration=white_noise_duration[0] * 1000)
            wn_0 = WhiteNoise().to_audio_segment(
                duration=white_noise_duration[0] * 1000)
            wn_0.export(wav + "_whitenoise_0.wav",
                        format="wav",
                        parameters=["-ar", "16000"])

            # wn_1 = AudioSegment.silent(duration=white_noise_duration[1] * 1000)
            wn_1 = WhiteNoise().to_audio_segment(
                duration=white_noise_duration[1] * 1000)
            wn_1.export(wav + "_whitenoise_1.wav",
                        format="wav",
                        parameters=["-ar", "16000"])

            # stitch white noise wav file to specific audio wav file
            new_wav = (AudioSegment.from_wav(wav + "_whitenoise_0.wav") +
                       AudioSegment.from_wav(wav) +
                       AudioSegment.from_wav(wav + "_whitenoise_1.wav"))
            new_wav.export(
                path + "/" + padded_fname + "_padded" + "_" +
                str(white_noise_duration[0]) + "_" +
                str(white_noise_duration[1]) + ".wav",
                format="wav",
                parameters=["-ar", "16000"],
            )

            # after
            new_wav_reverse = (
                AudioSegment.from_wav(wav + "_whitenoise_1.wav") +
                AudioSegment.from_wav(wav) +
                AudioSegment.from_wav(wav + "_whitenoise_0.wav"))
            new_wav_reverse.export(
                path + "/" + padded_fname + "_padded" + "_" +
                str(white_noise_duration[1]) + "_" +
                str(white_noise_duration[0]) + ".wav",
                format="wav",
                parameters=["-ar", "16000"],
            )

            # remove white noise wav file
            os.remove(wav + "_whitenoise_0.wav")
            os.remove(wav + "_whitenoise_1.wav")

            wav_files.append(path + "/" + padded_fname + "_padded" + "_" +
                             str(white_noise_duration[0]) + "_" +
                             str(white_noise_duration[1]) + ".wav")
            wav_files.append(path + "/" + padded_fname + "_padded" + "_" +
                             str(white_noise_duration[1]) + "_" +
                             str(white_noise_duration[0]) + ".wav")

            # If adding to one folder, specify the path of folder!
            # new_wav.export("output_/"+fn+"_padded"+"_"+str(white_noise_duration[0])+"_"+str(white_noise_duration[1])+".wav", format="wav", parameters=["-ar", "16000"])
            # new_wav_reverse.export("output_/"+fn+"_padded"+"_"+str(white_noise_duration[1])+"_"+str(white_noise_duration[0])+".wav", format="wav", parameters=["-ar", "16000"])

            break
    return wav_files
Example #11
0
def voice(a):
    #a=input("Write:")
    word = ipa.convert(a)
    word = list(word)

    w_size = len(word)
    w_corr = ["aɪ", "aʊ", "eɪ", "oʊ", "ɔɪ", "eə", "ɪə", "ʊə", "dʒ", "tʃ", "əʊ"]

    t_word = []
    t = 0
    for i in range(w_size - 1):
        t_word = word[i] + word[i + 1]

        for j in range(10):
            if (t_word == w_corr[j]):
                word[i] = w_corr[j]
                t = t + 1
                for k in range(w_size - i - 2):
                    word[i + 1 + k] = word[i + 2 + k]

    word = word[0:w_size - t]

    quote = "ˈ"
    y = 0
    for i in range(len(word)):
        if (word[i] == quote):
            y = y + 1
            for k in range(len(word) - i - 1):
                word[i + k] = word[i + 1 + k]

    word = word[0:len(word) - y]
    print(word)
    dur = (len(word) / 5) * 1200
    #sound  = AudioSegment.silent(duration=dur)
    sound = WhiteNoise().to_audio_segment(duration=dur) - 70
    pos = 0
    for i in range(len(word)):
        pos = pos + 120
        if ((word[i] == " ") | (word[i] == "*")):
            word[i] = "_"

        vowel = [
            'su', 'his', 'her', 'their', 'ella', 'el', 'ellos', 'ellas', 'it'
        ]
        for j in range(len(vowel)):
            if word[i] in vowel:
                sound1 = AudioSegment.from_mp3(
                    '/home/david/Escritorio/Proyectos/lou/' + str(word[i]) +
                    '.wav') + 10

        sound1 = AudioSegment.from_mp3(
            '/home/david/Escritorio/Proyectos/lou/' + str(word[i]) + '.wav')
        sound = sound.overlay(sound1, position=pos)

    play(sound)
    octaves = 0.09

    new_sample_rate = int(sound.frame_rate * (2.0**octaves))

    lowpitch_sound = sound._spawn(sound.raw_data,
                                  overrides={'frame_rate': new_sample_rate})
    play(lowpitch_sound)
 def generate_white_noise(self, noise_duration, reduction=10):
     noise = WhiteNoise().to_audio_segment(
         duration=noise_duration).set_frame_rate(
             int(self.audio_info['sample_rate']))
     return noise - 10
Example #13
0
File: audio.py Project: B1Z0N/iahr
async def distort(audiof: FileAudioSegment):
    noise = WhiteNoise().to_audio_segment(duration=len(audiof.track))
    return audiof.track.overlay(noise)
def extract_audio(label_list, audio_seg, base_file, label_dict, file):
    unique_speaker = set(map(lambda x: x['speaker'], label_list))

    l = len(audio_seg)

    dir_name = "{}/{}/".format(base_file, 'new_data')

    audio_db = audio_seg.dBFS

    if not os.path.exists(dir_name):
        os.mkdir(dir_name)

    label_dict['{}.wav'.format(file)] = {
        'labels': label_list, 'no_speakers': len(unique_speaker)}

    with open("{}/{}/{}.json".format(base_file, 'new_data', file), 'w') as outfile:
        json.dump(label_dict, outfile)

    # Export default file

    default_file_name = "{}/{}/{}.wav".format(
        base_file, 'new_data/default', file)

    if not os.path.exists("{}/{}/".format(base_file, 'new_data/default')):
        os.mkdir("{}/{}/".format(base_file, 'new_data/default'))

    audio_seg.export(default_file_name, format='wav')

    # Export white noise file

    for pct in WN_PCT:
        noise_file_name = "{}/{}/{}_noise{}.wav".format(
            base_file, 'new_data/noise' + str(pct), file, pct)

        if not os.path.exists("{}/{}/".format(base_file, 'new_data/noise' + str(pct))):
            os.mkdir("{}/{}/".format(base_file,
                                     'new_data/noise' + str(pct)))

        pct = pct / 100
        wn_db = (1 + (1 - pct)) * audio_db

        noise = WhiteNoise().to_audio_segment(duration=len(audio_seg)).apply_gain(wn_db)
        noise_audio_seg = audio_seg.overlay(noise)
        noise_audio_seg.export(noise_file_name, format='wav')

    # Export random overlay file

    sound_effect_list = os.listdir('sound_fx')

    rnd_fx = sound_effect_list[random.randrange(
        len(sound_effect_list))]

    print(rnd_fx)

    random_effect = AudioSegment.from_file('sound_fx/' + rnd_fx)

    fx_audio_seg = audio_seg.overlay(
        (random_effect).apply_gain(audio_db * 0.4), loop=True)

    fx_file_name = "{}/{}/{}_fx_overlay.wav".format(
        base_file, 'new_data/fx_overlay', file)

    if not os.path.exists("{}/{}/".format(base_file, 'new_data/fx_overlay')):
        os.mkdir("{}/{}/".format(base_file, 'new_data/fx_overlay'))

    fx_audio_seg.export(fx_file_name, format='wav')

    # # Export random overlay + WN file

    # overlay_noise_file_name = "{}/{}/{}_noise_overlay.wav".format(
    #     base_file, 'new_data/noise_overlay', file)

    # if not os.path.exists("{}/{}/".format(base_file, 'new_data/noise_overlay')):
    #     os.mkdir("{}/{}/".format(base_file,
    #                              'new_data/noise_overlay'))

    # wn_db = (1 + (1 - 0.5)) * audio_db

    # noise = WhiteNoise().to_audio_segment(duration=len(audio_seg)).apply_gain(wn_db)
    # fx_audio_seg_noise = fx_audio_seg.overlay(noise)
    # fx_audio_seg_noise.export(overlay_noise_file_name, format='wav')

    # # Well, rip

    # extreme_seg = audio_seg

    # extreme_file_name = "{}/{}/{}_extreme.wav".format(
    #     base_file, 'new_data/extreme', file)

    # if not os.path.exists("{}/{}/".format(base_file, 'new_data/extreme')):
    #     os.mkdir("{}/{}/".format(base_file,
    #                              'new_data/extreme'))

    # for _ in range(3):
    #     rnd_fx = sound_effect_list[random.randrange(
    #         len(sound_effect_list))]

    #     random_effect = AudioSegment.from_file('sound_fx/' + rnd_fx)

    #     extreme_seg = extreme_seg.overlay(
    #         (random_effect * math.ceil(l / len(random_effect))).apply_gain(audio_db * 0.6))

    # wn_db = (1 + (1 - 0.5)) * audio_db
    # noise = WhiteNoise().to_audio_segment(duration=len(audio_seg)).apply_gain(wn_db)
    # extreme_seg = extreme_seg.overlay(noise)
    # extreme_seg.export(extreme_file_name, format='wav')

    print('File {} exported'.format(file))
Example #15
0
from pydub import AudioSegment           #to save whitenoise to audio_segment
from pydub.generators import WhiteNoise  #to generate white noise
from pydub.playback import play          #needed to play the audio segement
#from threading import Thread             #for async background 


#whitenoise duration
duration = 5000  #duration in millisec
wn = WhiteNoise().to_audio_segment(duration=duration)-60


#def play_white_noise(segment,duration):
    #""" play whitenoise for given duration """
play(wn)

#instantiate thread
#white_noise = Thread(target=play_white_noise, args=(wn,duration))

#start the thread
#white_noise.start()
Example #16
0
def pad_tokens(sig=None):
    """ Pad token with leading and trailing whitespace to fit a given length"""
    if sig:
        sound = AudioSegment.from_file(sig)
    else:
        sound = AudioSegment.from_file(FLAGS.i)

    samples = numpy.array(sound.get_array_of_samples())
    file_export_name = None

    # Do we have enough samples for our given time?
    if sig:
        desired_length = sound.frame_rate * int(1000 / 1000)
    else:
        desired_length = sound.frame_rate * int(FLAGS.l / 1000)
    if len(samples) <= desired_length:
        # Segment of white noise to be appended
        wn = numpy.array(
            WhiteNoise(sound.frame_rate).to_audio_segment(
                (1000), -60).get_array_of_samples())
        #wn = numpy.zeros(sound.frame_rate)

        # Samples padded to beginning
        left_samples = int(numpy.ceil(abs(len(samples) - desired_length) / 2))
        pad_left = wn[0:left_samples]

        # Samples padded to end
        right_samples = int(numpy.floor(
            abs(len(samples) - desired_length) / 2))
        pad_right = wn[0:right_samples]

        # Concatenate arrays of samples
        padded_sound = numpy.concatenate([pad_left, samples, pad_right])

        #Convert back to AudioSegment then export
        audio_segment = AudioSegment(
            padded_sound.tobytes(),
            frame_rate=sound.frame_rate,
            sample_width=sound.sample_width,
            channels=1,
        )
        if sig is None:
            file_export_name = "pad_" + os.path.basename(FLAGS.i)
    else:
        # Trim edges for sounds longer than our desired length
        excess = len(samples) - desired_length
        trim_left = int(numpy.ceil(abs(len(samples) - desired_length) / 2))
        trim_right = int(numpy.floor(abs(len(samples) - desired_length) / 2))
        trimmed_sound = samples[trim_left:-trim_right]

        #Convert back to AudioSegment then export
        audio_segment = AudioSegment(
            trimmed_sound.tobytes(),
            frame_rate=sound.frame_rate,
            sample_width=sound.sample_width,
            channels=1,
        )
        if sig is None:
            file_export_name = "trim_" + os.path.basename(FLAGS.i)

    # Export completed file
    if sig:
        file_handle = audio_segment.export("tmp.wav2", format="wav")
    else:
        file_handle = audio_segment.export(file_export_name, format="wav")
Example #17
0
    start_point = random.randint(0, len(song) - duration)
    end_point = start_point + duration
    print("Sliced song is from %s to %s." % (start_point, end_point))

    # Saves the sliced song to a file on disk.
    sliced_song = song[start_point:end_point]
    sliced_song.export("tmp1.mp3", format="mp3")
    print("The sliced song has been saved to tmp1.mp3 temporarily.")

    # Attempts to recognize.
    if recognize_from_file("tmp1.mp3", original_song_name):
        cut_count += 1

    # Creates a strong noise.
    noise_duration = random.randint(0, duration // strong_noise_max)
    noise = WhiteNoise().to_audio_segment(noise_duration)
    decreased_noise = noise - strong_noise_volume

    # Adds noise to the sound.
    start_point = random.randint(0, duration - noise_duration)
    noise_song = sliced_song.overlay(decreased_noise, position=start_point)
    noise_song.export("tmp2.mp3", format="mp3")

    # Attempts to recognize.
    if recognize_from_file("tmp2.mp3", original_song_name):
        strong_noise_count += 1

    # Creates a weak noise.
    noise_duration = random.randint(0, duration // weak_noise_max)
    noise = WhiteNoise().to_audio_segment(noise_duration)
    decreased_noise = noise - weak_noise_volume
Example #18
0
def apply_gain_to_file(src_file_obj: AudioSegment, gained_file_path, gained_file_format):
    noise = WhiteNoise().to_audio_segment(duration=len(src_file_obj))
    noise = noise.apply_gain(GAIN_DENORM_VAL)
    combined = src_file_obj.overlay(noise)
    combined.export(gained_file_path, format=gained_file_format)
    assert os.path.exists(gained_file_path)
Example #19
0
def main(args):
    urbansound_folder = args.urbansound_dir
    urbansound_dogbark_data_folder = urbansound_folder + os.sep + 'data/dog_bark'
    urbansound_graph_folder = urbansound_folder + os.sep + 'graph'
    urbansound_dogbark_graph_folder = urbansound_graph_folder + os.sep + 'positive'
    urbansound_other_graph_folder = urbansound_graph_folder + os.sep + 'negative'

    if not os.path.exists(urbansound_graph_folder):
        os.mkdir(urbansound_graph_folder)
    if not os.path.exists(urbansound_dogbark_graph_folder):
        os.mkdir(urbansound_dogbark_graph_folder)
    if not os.path.exists(urbansound_other_graph_folder):
        os.mkdir(urbansound_other_graph_folder)

    urbansound_other_data_folders = [urbansound_folder + os.sep + 'data/air_conditioner',
                                     urbansound_folder + os.sep + 'data/car_horn', \
                                     urbansound_folder + os.sep + 'data/children_playing',
                                     urbansound_folder + os.sep + 'data/drilling', \
                                     urbansound_folder + os.sep + 'data/engine_idling',
                                     urbansound_folder + os.sep + 'data/gun_shot', \
                                     urbansound_folder + os.sep + 'data/jackhammer',
                                     urbansound_folder + os.sep + 'data/siren', \
                                     urbansound_folder + os.sep + 'data/street_music']

    SECOND_MS = 500  #1000
    SEGMENT_MS = 500  #2000
    ASSIGNED_SAMPLERATE = 44100
    ESC50_AUDIO_START_POS = 500
    POSITIVE_SAMPLE_DB_TH = -40.0

    print('creating positive training set ..')
    idx = 0

    for file in os.listdir(urbansound_dogbark_data_folder):
        filename, extension = os.path.splitext(file)
        if extension == '.wav' or extension == '.ogg' or extension == '.mp3' or extension == '.flac' or extension == '.aif' or extension == '.aiff':
            # open sound file
            audiopath = urbansound_dogbark_data_folder + os.sep + file
            print(audiopath)
            audio = AudioSegment.from_file(audiopath).set_frame_rate(
                ASSIGNED_SAMPLERATE).set_channels(1).set_sample_width(2)[:]
            # open csv file
            csvpath = urbansound_dogbark_data_folder + os.sep + filename + '.csv'
            csv = open(csvpath, 'r')
            lines = csv.readlines()
            for line in lines:
                start = float(line.split(',')[0]) * SECOND_MS
                end = float(line.split(',')[1]) * SECOND_MS
                chunk1 = (end - start) / 10
                current = start
                while 1:
                    outfile = urbansound_dogbark_graph_folder + os.sep + str(
                        idx) + '_dogbark.wav'
                    idx += 1
                    audioclip = audio[current:current + SEGMENT_MS]
                    if len(audioclip) != SEGMENT_MS:
                        lack = SEGMENT_MS - len(
                            audioclip) + 100  # 100 for default crossfade
                        noiseclip = WhiteNoise().to_audio_segment(
                            duration=lack, volume=-50)
                        lastclip = audioclip.append(noiseclip)
                        if lastclip.dBFS > POSITIVE_SAMPLE_DB_TH:
                            lastclip.export(outfile, format='wav')
                        break
                    else:
                        if audioclip.dBFS > POSITIVE_SAMPLE_DB_TH:
                            audioclip.export(outfile, format='wav')
                    current += SEGMENT_MS
                    chunk2 = end - current
                    if chunk2 < chunk1:
                        break
                # if current > end:
                # break
            csv.close()
Example #20
0
 def __init__(self, volume=-20.0, sample_rate=44100):
     self.volume = volume
     self.white_noise_generator = WhiteNoise(sample_rate=sample_rate)
def main(args):
    urbansound_folder = args.urbansound_dir
    urbansound_dogbark_data_folder = urbansound_folder + os.sep + 'data/dog_bark'
    urbansound_graph_folder = urbansound_folder + os.sep + 'graph'
    urbansound_dogbark_graph_folder = urbansound_graph_folder + os.sep + 'positive'
    urbansound_other_graph_folder = urbansound_graph_folder + os.sep + 'negative'
    esc50_folder = args.esc50_dir
    esc50_graph_folder = esc50_folder + os.sep + 'graph'
    esc50_dogbark_graph_folder = esc50_graph_folder + os.sep + 'positive'
    esc50_other_graph_folder = esc50_graph_folder + os.sep + 'negative'
    building_106_kitchen_folder = args.kitchen106_dir
    building_106_kitchen_graph_folder = building_106_kitchen_folder + os.sep + 'graph'
    building_106_kitchen_other_graph_folder = building_106_kitchen_graph_folder + os.sep + 'negative'

    print esc50_dogbark_graph_folder
    print building_106_kitchen_other_graph_folder

    if not os.path.exists(urbansound_graph_folder):
        os.mkdir(urbansound_graph_folder, 0755)
    if not os.path.exists(urbansound_dogbark_graph_folder):
        os.mkdir(urbansound_dogbark_graph_folder, 0755)
    if not os.path.exists(urbansound_other_graph_folder):
        os.mkdir(urbansound_other_graph_folder, 0755)
    if not os.path.exists(esc50_graph_folder):
        os.mkdir(esc50_graph_folder, 0755)
    if not os.path.exists(esc50_dogbark_graph_folder):
        os.mkdir(esc50_dogbark_graph_folder, 0755)
    if not os.path.exists(esc50_other_graph_folder):
        os.mkdir(esc50_other_graph_folder, 0755)
    if not os.path.exists(building_106_kitchen_graph_folder):
        os.mkdir(building_106_kitchen_graph_folder, 0755)
    if not os.path.exists(building_106_kitchen_other_graph_folder):
        os.mkdir(building_106_kitchen_other_graph_folder, 0755)

    urbansound_other_data_folders = [urbansound_folder + os.sep + 'data/air_conditioner',
                                     urbansound_folder + os.sep + 'data/car_horn', \
                                     urbansound_folder + os.sep + 'data/children_playing',
                                     urbansound_folder + os.sep + 'data/drilling', \
                                     urbansound_folder + os.sep + 'data/engine_idling',
                                     urbansound_folder + os.sep + 'data/gun_shot', \
                                     urbansound_folder + os.sep + 'data/jackhammer',
                                     urbansound_folder + os.sep + 'data/siren', \
                                     urbansound_folder + os.sep + 'data/street_music']

    building_106_kitchen_other_data_folders = [building_106_kitchen_folder + os.sep + 'training_segments/bag', \
                                               building_106_kitchen_folder + os.sep + 'training_segments/blender',
                                               building_106_kitchen_folder + os.sep + 'training_segments/cornflakes_bowl', \
                                               building_106_kitchen_folder + os.sep + 'training_segments/cornflakes_eating',
                                               building_106_kitchen_folder + os.sep + 'training_segments/cup', \
                                               building_106_kitchen_folder + os.sep + 'training_segments/dish_washer',
                                               building_106_kitchen_folder + os.sep + 'training_segments/electric_razor', \
                                               building_106_kitchen_folder + os.sep + 'training_segments/flatware_sorting',
                                               building_106_kitchen_folder + os.sep + 'training_segments/food_processor', \
                                               building_106_kitchen_folder + os.sep + 'training_segments/hair_dryer',
                                               building_106_kitchen_folder + os.sep + 'training_segments/microwave', \
                                               building_106_kitchen_folder + os.sep + 'training_segments/microwave_bell',
                                               building_106_kitchen_folder + os.sep + 'training_segments/microwave_door', \
                                               building_106_kitchen_folder + os.sep + 'training_segments/plates_sorting',
                                               building_106_kitchen_folder + os.sep + 'training_segments/stirring_cup', \
                                               building_106_kitchen_folder + os.sep + 'training_segments/toaster_up_down',
                                               building_106_kitchen_folder + os.sep + 'training_segments/toilet_button', \
                                               building_106_kitchen_folder + os.sep + 'training_segments/toilet_flush',
                                               building_106_kitchen_folder + os.sep + 'training_segments/tooth', \
                                               building_106_kitchen_folder + os.sep + 'training_segments/vacuum_cleaner',
                                               building_106_kitchen_folder + os.sep + 'training_segments/washing_machine', \
                                               building_106_kitchen_folder + os.sep + 'training_segments/water_boiler',
                                               building_106_kitchen_folder + os.sep + 'training_segments/water_tap']

    SECOND_MS = 1000
    SEGMENT_MS = 2000
    ASSIGNED_SAMPLERATE = 44100
    ESC50_AUDIO_START_POS = 500
    POSITIVE_SAMPLE_DB_TH = -40.0

    print 'creating positive training set ..'
    idx = 0
    for file in os.listdir(urbansound_dogbark_data_folder):
        filename, extension = os.path.splitext(file)
        if extension == '.wav' or extension == '.ogg' or extension == '.mp3' or extension == '.flac' or extension == '.aif' or extension == '.aiff':
            # open sound file
            audiopath = urbansound_dogbark_data_folder + os.sep + file
            print audiopath
            audio = AudioSegment.from_file(audiopath).set_frame_rate(
                ASSIGNED_SAMPLERATE).set_channels(1).set_sample_width(2)[:]
            # open csv file
            csvpath = urbansound_dogbark_data_folder + os.sep + filename + '.csv'
            csv = open(csvpath, 'r')
            lines = csv.readlines()
            for line in lines:
                start = float(line.split(',')[0]) * SECOND_MS
                end = float(line.split(',')[1]) * SECOND_MS
                chunk1 = (end - start) / 10
                current = start
                while 1:
                    outfile = urbansound_dogbark_graph_folder + os.sep + str(
                        idx) + '_dogbark.wav'
                    idx += 1
                    audioclip = audio[current:current + SEGMENT_MS]
                    if len(audioclip) != SEGMENT_MS:
                        lack = SEGMENT_MS - len(
                            audioclip) + 100  # 100 for default crossfade
                        noiseclip = WhiteNoise().to_audio_segment(
                            duration=lack, volume=-50)
                        lastclip = audioclip.append(noiseclip)
                        if lastclip.dBFS > POSITIVE_SAMPLE_DB_TH:
                            lastclip.export(outfile, format='wav')
                        break
                    else:
                        if audioclip.dBFS > POSITIVE_SAMPLE_DB_TH:
                            audioclip.export(outfile, format='wav')
                    current += SEGMENT_MS
                    chunk2 = end - current
                    if chunk2 < chunk1:
                        break
                # if current > end:
                # break
            csv.close()

    print 'creating negative training set ..'
    idx = 0
    for other_data_folder in urbansound_other_data_folders:
        for file in os.listdir(other_data_folder):
            filename, extension = os.path.splitext(file)
            if extension == '.wav' or extension == '.ogg' or extension == '.mp3' or extension == '.flac' or extension == '.aif' or extension == '.aiff':
                # open sound file
                audiopath = other_data_folder + os.sep + file
                print audiopath
                try:
                    audio = AudioSegment.from_file(audiopath).set_frame_rate(
                        ASSIGNED_SAMPLERATE).set_channels(1).set_sample_width(
                            2)[:]
                    num_segment = len(audio) / SEGMENT_MS
                    for i in range(0, num_segment):
                        if i % 4 == 0:  # less sample :)
                            outfile = urbansound_other_graph_folder + os.sep + str(
                                idx) + '_other.wav'
                            idx += 1
                            audio[i * SEGMENT_MS:(i + 1) * SEGMENT_MS].export(
                                outfile, format='wav')
                except:
                    print 'failed to load this one ^^^^^'

    print 'creating test set ..'
    idx = 0
    csvpath = esc50_folder + os.sep + 'meta' + os.sep + 'esc50.csv'
    csv = open(csvpath, 'r')
    lines = csv.readlines()
    for line in lines[1:]:
        filename = line.split(',')[0]
        audiopath = esc50_folder + os.sep + 'audio' + os.sep + filename
        print audiopath
        audio = AudioSegment.from_file(audiopath)[:]
        audio = audio.set_frame_rate(ASSIGNED_SAMPLERATE)
        audio = audio.set_channels(1)
        if line.split(',')[3] == 'dog':
            outfile = esc50_dogbark_graph_folder + os.sep + str(
                idx) + '_dogbark.wav'
        else:
            outfile = esc50_other_graph_folder + os.sep + str(
                idx) + '_other.wav'
        idx += 1
        audio[ESC50_AUDIO_START_POS:ESC50_AUDIO_START_POS + SEGMENT_MS].export(
            outfile, format='wav')
    csv.close()

    print 'creating more negative samples'
    idx = 0
    for other_data_folder in building_106_kitchen_other_data_folders:
        for file in os.listdir(other_data_folder):
            filename, extension = os.path.splitext(file)
            if extension == '.wav' or extension == '.ogg' or extension == '.mp3' or extension == '.flac' or extension == '.aif' or extension == '.aiff':
                # open sound file
                audiopath = other_data_folder + os.sep + file
                print audiopath
                try:
                    audio = AudioSegment.from_file(audiopath).set_frame_rate(
                        ASSIGNED_SAMPLERATE).set_channels(1).set_sample_width(
                            2)[:]
                    outfile = building_106_kitchen_other_graph_folder + os.sep + str(
                        idx) + '_other.wav'
                    idx += 1
                    audio[0:SEGMENT_MS].export(outfile, format='wav')
                except:
                    print 'failed to load this one ^^^^^'