def add_replay_gain(cls, filenames, progress=None): """Adds ReplayGain values to a list of filename strings. All the filenames must be of this AudioFile type. Raises ValueError if some problem occurs during ReplayGain application. """ tracks = [track for track in open_files(filenames) if isinstance(track, cls)] if (len(tracks) > 0): for (track, track_gain, track_peak, album_gain, album_peak) in calculate_replay_gain(tracks, progress): metadata = track.get_metadata() if (metadata is None): metadata = WavPackAPEv2([]) metadata["replaygain_track_gain"] = ApeTagItem.string( "replaygain_track_gain", u"%+1.2f dB" % (track_gain)) metadata["replaygain_track_peak"] = ApeTagItem.string( "replaygain_track_peak", u"%1.6f" % (track_peak)) metadata["replaygain_album_gain"] = ApeTagItem.string( "replaygain_album_gain", u"%+1.2f dB" % (album_gain)) metadata["replaygain_album_peak"] = ApeTagItem.string( "replaygain_album_peak", u"%1.6f" % (album_peak)) track.set_metadata(metadata)
def add_replay_gain(cls, filenames): """Adds ReplayGain values to a list of filename strings. All the filenames must be of this AudioFile type. Raises ValueError if some problem occurs during ReplayGain application. """ tracks = [ track for track in open_files(filenames) if isinstance(track, cls) ] if (len(tracks) > 0): for (track, track_gain, track_peak, album_gain, album_peak) in calculate_replay_gain(tracks): metadata = track.get_metadata() if (metadata is None): metadata = WavPackAPEv2([]) metadata["replaygain_track_gain"] = ApeTagItem.string( "replaygain_track_gain", u"%+1.2f dB" % (track_gain)) metadata["replaygain_track_peak"] = ApeTagItem.string( "replaygain_track_peak", u"%1.6f" % (track_peak)) metadata["replaygain_album_gain"] = ApeTagItem.string( "replaygain_album_gain", u"%+1.2f dB" % (album_gain)) metadata["replaygain_album_peak"] = ApeTagItem.string( "replaygain_album_peak", u"%1.6f" % (album_peak)) track.set_metadata(metadata)
def compute_gain(self, fnames: Iterable[str], album: bool = True) -> Dict[str, Dict[str, float]]: fnames = list(fnames) audio_files = audiotools.open_files(fnames) if len(audio_files) != len(fnames): raise Exception("Could not load some files") rginfo = {} tag_order = ( "replaygain_track_gain", "replaygain_track_peak", "replaygain_album_gain", "replaygain_album_peak", ) for rg in audiotools.calculate_replay_gain(audio_files): rginfo[rg[0].filename] = dict(zip(tag_order, rg[1:])) return rginfo
def get_redirect_url(self, *args, **kwargs): album = get_object_or_404(Album, pk=kwargs['pk']) filenames = [] album_tracks = album.track_set.all() for track in album_tracks: filenames.append(audiotools.Filename.from_unicode(track.get_fullpath())) at_files = audiotools.open_files(filenames, False) rg_list = audiotools.calculate_replay_gain(at_files) for index, rg in enumerate(rg_list): track = album_tracks[index] track.rg_gain = rg[1] track.rg_peak = rg[2] track.save() if index == 0: album.rg_gain = rg[3] album.rg_peak = rg[4] album.save() return super(AlbumRecalculateRGView, self).get_redirect_url(kwargs['pk'])
def mixFilesInSpeakerPath(spInd, folder): speakerPath = os.path.join(rootPathCleanCorpus, folder) wavFileList = glob.glob(os.path.join(speakerPath, '*.wav')) print 'Starting speaker %s...' % (folder) for (ind,wavFile) in enumerate(wavFileList): try: (fs, samples) = scipy.io.wavfile.read(wavFile) #samples = samples.astype(np.float64)/65536.0 # 64 Bit wav files samples = samples.astype(np.float64)/16384.0 # 16 Bit wav files #print 'Speech snippet %s read.' % (wavFile) tmp_file = os.path.dirname(wavFile) root_dir_name = os.path.basename(tmp_file) root_filename = str(os.path.splitext(os.path.basename(wavFile))[0])+'_out'+str('.wav') print '*** Root Name file: '+str(root_filename) #read annotation #with open(wavFile.replace("wav", "ano")) as f: # anoList = [int(line.rstrip()) for line in list(f)] #if len(anoList) != len(samples): # print 'Mismatch in size between annotation and track!' #get replaygain stats of current file file_rplgain = list(audiotools.calculate_replay_gain([ \ audiotools.open(wavFile) \ ]))[0][1] #calculate gain to ref file and normalize accordingly gain = file_rplgain - ref_rplgain normSignal = samples * (10**(gain/20.0)) if (random.random() < probOfNoise): #mix with noise of same size noise = getRandomFadedNoise(len(normSignal)) #calculate the random SNR randomSNR = snrMin + (snrMax-snrMin) * random.random() #amplify signal by reducing noise noise /= 10**(randomSNR/20) #normSignal *= 10**(randomSNR/20); normSignal += noise # CONVOLVING NOISE MIXED SPEECH SIGNALS WITH THE IMPSULSE RESPONSE SIGNALS irTrain1 = random.choice(impResps) irTrain2 = np.asarray(irTrain1) irTrain = irTrain2.flatten() #print "irTrain Type: "+str(type(irTrain)) #print "irTrain Value: "+str(irTrain) #print "irTrain Length: "+str(len(irTrain)) #print "irTrain Shape: "+str(irTrain.shape) #print "normSignal Length: "+str(len(normSignal)) convolvedSignal1 = scipy.signal.fftconvolve(normSignal, irTrain)[:len(normSignal)] outputDir = os.path.join(outputPath1, root_dir_name) #print '*** Output DIR: '+str(outputDir) #print '*** Root Name file: '+str(root_filename) outputFile1 = os.path.join(outputDir, root_filename) #print '*** Output File Name: '+str(outputFile1) if not os.path.exists(outputDir): os.makedirs(outputDir) #shutil.copyfile(wavFile.replace("wav", "ano"), outputFile1.replace("wav", "ano")) #print 'Writing %s.' % (outputFile) scipy.io.wavfile.write(outputFile1, wantedFs, convolvedSignal1) #f = open(outputFile1.replace("wav", "ano"),'w') #for (ind,line) in enumerate(anoList): # if ind == (len(anoList) - 1): # #no \n at end of file # f.write("%i" % (line)) # else: # f.write("%i\n" % (line)) #f.close() except ValueError: print "Value Error" print 'Speaker %s done' % (folder)
# f.write("%i" % (line)) # else: # f.write("%i\n" % (line)) #f.close() except ValueError: print "Value Error" print 'Speaker %s done' % (folder) if __name__ == '__main__': cacheNoiseFiles() cacheImpulseResponses() #replaygain val of reference file ref_rplgain = list(audiotools.calculate_replay_gain([ \ audiotools.open(replay_gain_ref_path) \ ]))[0][1] #get folder names (folders = speakers) all_speaker_names = os.walk(rootPathCleanCorpus).next()[1] print '%d speakers detected.' % (len(all_speaker_names)) #USING SINGLE PROCESS for (ind,speaker) in enumerate(all_speaker_names): mixFilesInSpeakerPath(ind,speaker) # UTILIZING MULTIPLE PROCESSES via joblib. #results = Parallel(n_jobs=numJobs)(delayed(mixFilesInSpeakerPath)(ind,speaker) \ # for (ind,speaker) in enumerate(all_speaker_names)) print 'All done.'
def main(): # look for command line arguments args = sys.argv[1:] if '-h' in args or '--help' in args or '-?' in args: helptext() sys.exit(0) if '-l' in args: logging = True logfilename = args[args.index('-l') + 1] trimlogfile(logfilename) logfile = open(logfilename, 'a') oldstdout = sys.stdout oldstderr = sys.stderr logsplit = LogFileSplitter(sys.stdout, logfile) sys.stdout = logsplit sys.stderr = logsplit print('Logging started.') if '-c' in args: configfile = args[args.index('-c') + 1] if not os.path.isfile(configfile): print(configfile + ' does not exist, create it? (Y/N):') if not raw_input().lower() == 'y': print('OK, config file will not be created') if logging == True: sys.stdout = oldstdout sys.stderr = oldstderr logfile.close() sys.exit(0) else: configfile = '/etc/ssp_sermon_podcast.xml' if ('--config' in args) or not os.path.isfile(configfile): edit_config_file(configfile) print('Config file created and will be used on next run.') if logging: sys.stdout = oldstdout sys.stderr = oldstderr logfile.close() sys.exit(0) #load the config file if os.path.isfile(configfile): try: configET = ET.parse(configfile) except: print('Can\'t parse config file ' + configfile) sys.exit(1) config = configET.getroot() if not ((config.tag == 'config') and (config.attrib['description'] == 'Seriously Simple Podcasting Sermon Podcast settings')): print(configfile + ' is not a SSP Sermon Podcast config file.') if logging: sys.stdout = oldstdout sys.stderr = oldstderr logfile.close() sys.exit(1) #get the settings from the config settings = config.find('settings') #open the wordpress object wordpress_url = settings.find('wordpress_url').text if wordpress_url.endswith('/'): xmlrpc_url = wordpress_url + 'xmlrpc.php' else: xmlrpc_url = wordpress_url + '/xmlrpc.php' wp = Client(xmlrpc_url, settings.find('wordpress_user').text, decryptpassword(settings.find('wordpress_pass').text)) #get a list of podcast objects allpodcasts = [] interval = 20 offset = 0 while True: podcastbatch = wp.call( GetPosts({ 'post_type': 'podcast', 'number': interval, 'offset': offset })) if len(podcastbatch) == 0: break allpodcasts.extend(podcastbatch) offset += interval print('Retrieved ' + str(len(allpodcasts)) + ' podcasts from WordPress site.') #get the series settings from the config and find out which series will be podcast allseriesconfigs = config.findall('series_config') termids_to_podcast = [] for seriesconfig in allseriesconfigs: termids_to_podcast.append(seriesconfig.attrib['term_id']) #get a list of series from the blog listofseriesterms = [] interval = 20 offset = 0 while True: termsbatch = wp.call( GetTerms('series', { 'number': interval, 'offset': offset })) if len(termsbatch) == 0: break listofseriesterms.extend(termsbatch) offset += interval print('Found ' + str(len(listofseriesterms)) + ' podcast series on the WordPress site.') #find out the hierarchy of the series so we can do the lowest children first termpriority = {} term_parents = {} for term in listofseriesterms: term_parents[term.id] = term.parent order = 0 parentid = term.parent while parentid != '0': order += 1 for parentterm in listofseriesterms: if parentid == parentterm.id: parentid = parentterm.parent break termpriority[term.id] = order #so the order to approach term.ids is termid_order = [] for termid, order in sorted(termpriority.iteritems(), key=lambda x: x[1], reverse=True): termid_order.append(termid) print('This is the order the series terms will be published:') print(', '.join(termid_order)) #find which series config the posts should be published with (if any) podcasts_to_do = {} extension = extension_dot(settings.findtext('source_audio_type')) for termid in termid_order: if termid in termids_to_podcast: for podcast in allpodcasts: #check whether the podcast is flagged to be published and has a date: date_recorded = get_post_custom_field(podcast, 'date_recorded') if get_post_custom_field(podcast, 'publish_now') and date_recorded: podcast_termids = ['0'] for podcastterm in podcast.terms: podcast_termids.append(podcastterm.id) for podcast_termid in podcast_termids: if podcast_termid in term_parents: podcast_termids.append( term_parents[podcast_termid]) if termid in podcast_termids and not podcast.id in podcasts_to_do: #work out what the start of the source file name will be: termid_seriesconfig = seriescfg_from_term_id( allseriesconfigs, termid) source_date_format = termid_seriesconfig.find( 'source_date_format').text date_recorded_format = settings.find( 'date_recorded_format').text sourcefile_name_start = getdatetime( date_recorded, user_format=date_recorded_format ).strftime( source_date_format) + termid_seriesconfig.findtext( 'source_file_code', default='') sourcepath = termid_seriesconfig.findtext( 'source_path') #and does it exist? directorylist = [] if os.path.exists(sourcepath): #this seems to timeout sometimes, so will loop if need be: retrycount = 3 while retrycount: try: directorylist = os.listdir(sourcepath) retrycount = 0 except OSError as errmsg: print(errmsg) retrycount -= 1 if retrycount: print('Retrying directory list...') for filename in directorylist: if filename[:len(sourcefile_name_start )] == sourcefile_name_start: if extension: extposn = -len(extension) if filename[ extposn:] == extension or extension == None: ordered_podcast_termids = [] for termid_again in termid_order: if termid_again in podcast_termids: ordered_podcast_termids.append( termid_again) ordered_podcast_termids.append('0') podcasts_to_do[podcast.id] = [ podcast, termid_seriesconfig, os.path.abspath( os.path.join(sourcepath, filename)), ordered_podcast_termids ] print('There are ' + str(len(podcasts_to_do)) + ' podcasts to process in this pass.') if len(podcasts_to_do) != 0: listofposttags = [] interval = 20 offset = 0 while True: termsbatch = wp.call( GetTerms('post_tag', { 'number': interval, 'offset': offset })) if len(termsbatch) == 0: break listofposttags.extend(termsbatch) offset += interval posttagsdict = {} for posttag in listofposttags: posttagsdict[posttag.name.lower()] = posttag print('Retrieved ' + str(len(posttagsdict)) + ' post tags from WordPress site.') #iterate over the podcasts for podcast_id, podcast_and_config in podcasts_to_do.iteritems(): #open the audio file print('\n') print('Now processing file ' + podcast_and_config[2]) backuppodcast = copy.deepcopy(podcast_and_config[0]) try: sourceaudio = audiotools.open(podcast_and_config[2]) sourcepcm = sourceaudio.to_pcm() #calculate its loudness loudness = audiotools.calculate_replay_gain([sourceaudio]) for loudnesstuple in loudness: gain = loudnesstuple[1] peak = loudnesstuple[2] if peak == 0: print('This audio file is silent, ignoring it.') continue #mix it to the specified number of channels gaincorrection = 0 if settings.findtext('audiochannels') == '1': print('Converting to mono.') sourcepcm_mixed = audiotools.pcmconverter.Averager(sourcepcm) elif settings.findtext('audiochannels') == '2': print('Converting to stereo.') sourcepcm_mixed = audiotools.pcmconverter.Downmixer(sourcepcm) if sourceaudio.channels() == 1: gaincorrection = 6.0 else: sourcepcm_mixed = sourcepcm #adjust the gain to the users' preference instead of replaygain's target -20 target_loudness = float( settings.findtext('target_loudness', default='-24')) newgain = gain + (target_loudness + 20.0) + gaincorrection newpeak = 1.0 / (10.0**(newgain / 20.0)) if (peak / (10.0**(gaincorrection / 20.0))) > newpeak: newpeak = peak / (10.0**(gaincorrection / 20.0)) print( 'Normalising for gain: ' + str(round(newgain, 2)) + 'dB, peak = ' + str( round( (20.0 * log10(peak / (10.0**(gaincorrection / 20.0)))), 2)) + 'dBFS.') #normalise the audio to the target loudness sourcepcm_normalised = audiotools.replaygain.ReplayGainReader( sourcepcm_mixed, newgain, newpeak) try: bitspersample = int(settings.findtext('bitspersample')) except: bitspersample = None if bitspersample: print('Quantising to ' + str(bitspersample) + '-bit.') sourcepcm_resampled = audiotools.pcmconverter.BPSConverter( sourcepcm_normalised, bitspersample) #make some tempfiles: process_tempfile = tempfile.mkstemp( suffix='.wav', prefix='sermon_process_tempfile') processed_tempfile = tempfile.mkstemp( suffix='.wav', prefix='sermon_processed_tempfile') encoded_tempfile = tempfile.mkstemp( suffix=extension_dot(settings.findtext('encoded_audio_type')), prefix='sermon_encoded_tempfile') print('tempfiles: ' + process_tempfile[1] + ', ' + processed_tempfile[1] + ', ' + encoded_tempfile[1]) #write the audio back out to a wave file for processing audiotools.WaveAudio.from_pcm(process_tempfile[1], sourcepcm_resampled) sourcepcm_normalised.close() sourcepcm_mixed.close() sourcepcm.close() sourceaudio = None audioparams = getaudioparams(sourcepcm_resampled) sourcepcm_resampled.close() subprocess_args = [settings.findtext('processing_utility')] for argsubelement in settings.findall('processing_utility_arg'): subprocess_args.append( Template(argsubelement.text).substitute(audioparams)) tempstring = settings.findtext('processing_utility_infile') if tempstring: subprocess_args.append(tempstring) subprocess_args.append(process_tempfile[1]) tempstring = settings.findtext('processing_utility_outfile') if tempstring: subprocess_args.append(tempstring) subprocess_args.append(processed_tempfile[1]) print('Now processing audio ...') print( subprocess.Popen(subprocess_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True).communicate()[0]) os.remove(process_tempfile[1]) processedfile = audiotools.open(processed_tempfile[1]) audioparams = getaudioparams(processedfile.to_pcm()) subprocess_args = [settings.findtext('encoding_utility')] for argsubelement in settings.findall('encoding_utility_arg'): subprocess_args.append( Template(argsubelement.text).substitute(audioparams)) tempstring = settings.findtext('encoding_utility_infile') if tempstring: subprocess_args.append(tempstring) subprocess_args.append(processed_tempfile[1]) tempstring = settings.findtext('encoding_utility_outfile') if tempstring: subprocess_args.append(tempstring) subprocess_args.append(encoded_tempfile[1]) print('Now encoding audio ...') print( subprocess.Popen(subprocess_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True).communicate()[0]) os.remove(processed_tempfile[1]) wp_details = make_podcast_dict(podcast_and_config, wp, settings) wp_details['post_status'] = 'publish' wp_details['publish_now'] = '' updated_podcast = publish_post(podcast_and_config, wp_details, wp, settings) podcast_and_config[0] = updated_podcast updated_details = make_podcast_dict(podcast_and_config, wp, settings, final_pass=True) try: imageurl = urllib2.urlopen(updated_details['image']) podcastimage = imageurl.read() except: podcastimage = False try: audioimage = [ audiotools.Image.new(podcastimage, u'Artwork', 0) ] except: audioimage = [] outputmetadata = audiotools.MetaData( track_name=updated_details['title'], track_number=int(updated_details['episode_number']), album_name=updated_details['series'], artist_name=updated_details['preacher'], copyright=updated_details['copyright'], publisher=updated_details['publisher'], year=updated_details['date'].strftime('%Y'), date=updated_details['date_recorded'], comment=updated_details['content'], images=audioimage) outputfile = audiotools.open(encoded_tempfile[1]) outputfile.set_metadata(outputmetadata) outputfile_seconds = int(outputfile.seconds_length()) outputfile_name = updated_details[ 'output_file_template'] + extension_dot( settings.findtext('encoded_audio_type')) outputfile_size = ftp_encodedfile(encoded_tempfile[1], outputfile_name, podcast_and_config[1]) if outputfile_size == None: raise Exception('FTP appears not to have worked.') print('\n') print('Output file size = ' + str(outputfile_size)) print('Output file duration = ' + str(outputfile_seconds)) print('\n') os.remove(encoded_tempfile[1]) urlpath = podcast_and_config[1].findtext('download_path') if not urlpath[-1] == '/': urlpath = urlpath + '/' updated_details['audio_file'] = urlpath + outputfile_name updated_details['filesize_raw'] = str(outputfile_size) mins = str(outputfile_seconds / 60) secs = str(outputfile_seconds % 60) if len(secs) == 1: secs = '0' + secs updated_details['duration'] = mins + ':' + secs #put the preacher in as a tag: updated_details['tags'] = [] if updated_details['preacher'].lower() in posttagsdict: updated_details['tags'].append( posttagsdict[updated_details['preacher'].lower()]) else: tag = WordPressTerm() tag.taxonomy = 'post_tag' tag.name = updated_details['preacher'] tag.id = wp.call(NewTerm(tag)) updated_details['tags'].append(tag) posttagsdict[tag.name.lower()] = tag #put the book(s) of the bible in as tags: #This bit is really messy and I should try to write my own scripture regular expressions, but in the interest of speed: listofpassages = scriptures.extract( updated_details['bible_passage']) if 'song of songs' in updated_details['bible_passage'].lower(): listofpassages.append(('Song of Songs', 1, 1, 1, 1)) for passage in listofpassages: book = passage[0] if book[:4] == 'III ': bookname = '3 ' + book[4:] elif book[:3] == 'II ': bookname = '2 ' + book[3:] elif book[:2] == 'I ': bookname = '1 ' + book[2:] elif book == 'Song of Solomon': bookname = 'Song of Songs' else: bookname = book if bookname.lower() in posttagsdict: updated_details['tags'].append( posttagsdict[bookname.lower()]) else: tag = WordPressTerm() tag.taxonomy = 'post_tag' tag.name = bookname tag.id = wp.call(NewTerm(tag)) updated_details['tags'].append(tag) posttagsdict[tag.name.lower()] = tag finalpost = publish_post(podcast_and_config, updated_details, wp, settings) print('Final Post details are as follows:\n') for field, contents in finalpost.struct.iteritems(): try: if type(contents) == types.StringType: print(field + ' : ' + contents) elif type(contents) == types.ListType: for subcontents in contents: print(field + ' : ' + str(subcontents)) elif type(contents) == types.DictType: for subfield, subcontents in contents.iteritems(): print(field + ' : ' + subfield + ' : ' + str(subcontents)) elif type(contents) == types.UnicodeType: print(field + ' : ' + contents.encode('ascii', 'ignore')) else: print(field + ' : ' + str(contents)) except: print('Can\'t print field') except Exception as message: print('ERROR: Exception raised while processing that podcast:') print(message) print( 'Attempting to restore original post prior to modification...') try: if wp.call(EditPost(backuppodcast.id, backuppodcast)): print('Post restored.') else: print('Unable to restore original post.') except Exception as message: print('Unable to restore original post: ') print(message) try: os.remove(encoded_tempfile[1]) except: pass try: os.remove(processed_tempfile[1]) except: pass try: os.remove(process_tempfile[1]) except: pass logsplit.write('Completed with normal exit\n\n\n') if logging: sys.stdout = oldstdout sys.stderr = oldstderr logfile.close()
def audioeval(speechFile, referenceFile,noiseFile, root_dir_name, output_root_directory,ir_noise_file): "This function evaluates a single audio file." print 'Noise File Path: '+str(noiseFile) print 'IR-Noise File Path: '+str(ir_noise_file) #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # ReplayGain calculation of reference ref = audiotools.open(referenceFile) ref_replay_gain = audiotools.calculate_replay_gain([ref]) ref_track_gain = list(list(ref_replay_gain)[0])[1] #print ref_track_gain # ReplayGain calculation of example speech file speech = audiotools.open(speechFile) speech_replay_gain = audiotools.calculate_replay_gain([speech]) speech_track_gain = list(list(speech_replay_gain)[0])[1] #print speech_track_gain # Normalization of example speech file (rate_speech, data_speech) = wav.read(speechFile) gain = ref_track_gain-speech_track_gain data_normalized = numpy.asarray(data_speech*math.pow(10, (-(gain)/20)), dtype=numpy.int16) normalizedFile = "speech_normalized.wav" wav.write(normalizedFile , rate_speech, data_normalized) # Loudness test of normalized example speech test = audiotools.open(normalizedFile) test_replay_gain = audiotools.calculate_replay_gain([test]) test_track_gain = list(list(test_replay_gain)[0])[1] #print test_track_gain # Randomly choosing one noise file from the pool # here I just fix one waiting for implementation later # Using pydub API to calculate the length of normalized speech file and the noise file speech_normalized = pydub.AudioSegment.from_wav(normalizedFile) #We have converted all the noise files to 16 bit int format and then passed the directoyr location to randomly choose noise files, which is different for each speech file. noise = pydub.AudioSegment.from_wav(noiseFile) speech_normalized_length = speech_normalized.duration_seconds noise_length = noise.duration_seconds # Selecting a randow start point of the noise file to get a segment of the required length start = random.randrange(0,int(noise_length-speech_normalized_length)*1000) # pydub does things in milliseconds noise_segmented = noise[start:int(start+speech_normalized_length*1000)] noise_segmented.export("noise_segmented.wav",format="wav") # Linear fading of sharply segmented noised segment # 1 sec fade in, 1 sec fade out noise_faded = noise_segmented.fade_in(1000).fade_out(1000) noise_faded.export("noise_faded.wav",format="wav") # how long is good? 1 sec? # Picking a random signal to noise ratio (SNR) SNR_ratio = random.randint(-2, 20) #print "SNR_ratio: " + str(SNR_ratio) # loudness in dBFS (Decibels relative to full scale) # (all peak measurements will be negative numbers) speech_dB = speech_normalized.dBFS noise_dB = noise_segmented.dBFS #print "loudness of speech: " + str(speech_dB) #print "loudness of noise: " + str(noise_dB) # Change the amplitude (generally, loudness) of the speech by SNR ratio from noise. # Gain is specified in dB. gain = SNR_ratio-(speech_dB-noise_dB) #print "gain: " + str(gain) speech_SNRed = speech_normalized.apply_gain(gain) #print "loudness of adjusted speech: " + str(speech_SNRed.dBFS) # check SNR #print "check SNR: " + str(speech_SNRed.dBFS - noise_dB) # mix the two tracks by adding the respective samples # (If the overlaid AudioSegment is longer than this one, the result will be truncated) noisy_speech = speech_SNRed.overlay(noise_segmented) noisy_speech.export("noisy_speech.wav",format="wav") # Since the sample values have increased through the summation, it is possible that they exceed the maximum imposed by the data type. How this API deals with this problem? # draw an impulse response from the pool # ...waiting to implement # peak-normalize it to 0dB (=1) by dividing the IR vector through its maximum value. (rate_IR, data_IR) = wav.read(ir_noise_file) # data_IR.dtype is int16, change it into float64 data_IR = data_IR.astype(numpy.float64)/65536.0 data_IR = data_IR / data_IR.max() # convolve speech with the normalized IR (rate_noisy_speech, data_noisy_speech) = wav.read("noisy_speech.wav") speech_convolved = numpy.convolve(data_IR, data_noisy_speech) #print "Root Directory Name: "+str(root_dir_name) output_directory = os.path.join(output_root_directory, root_dir_name) #print output_directory if not os.path.exists(output_directory): os.makedirs(output_directory) #speech_convolved_file = output_directory+'/'+str(os.path.splitext(speechFile)[0])+"_convolved.wav" speech_convolved_file_name = os.path.basename(speechFile) #print "Speech File Name: "+str(speech_convolved_file_name) speech_convolved_file = os.path.join(output_directory, speech_convolved_file_name) print "Final output file path: "+str(speech_convolved_file) # cut the convolved track to its original length if prolonged and store the resulting track wav.write(speech_convolved_file, rate_noisy_speech, speech_convolved[:data_noisy_speech.size]) # MFCC CODE *********** COMMENTED OUT ************** # MFCC Feature extraction # Do the default parameters (frame size etc.) work for you? #(rate,sig) = wav.read(speech_convolved_file) #mfcc_feat = mfcc(sig,rate) #print "MFCC Shape:" #print mfcc_feat.shape #print mfcc_feat #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ## Cleaup code which deletes the intermediate files which get generated. return;
def mixFilesInSpeakerPath(spInd, folder): speakerPath = os.path.join(rootPathCleanCorpus, folder) wavFileList = glob.glob(os.path.join(speakerPath, '*.wav')) print 'Starting speaker %s...' % (folder) for (ind, wavFile) in enumerate(wavFileList): try: (fs, samples) = scipy.io.wavfile.read(wavFile) #samples = samples.astype(np.float64)/65536.0 # 64 Bit wav files samples = samples.astype(np.float64) / 16384.0 # 16 Bit wav files #print 'Speech snippet %s read.' % (wavFile) tmp_file = os.path.dirname(wavFile) root_dir_name = os.path.basename(tmp_file) root_filename = str( os.path.splitext( os.path.basename(wavFile))[0]) + '_out' + str('.wav') print '*** Root Name file: ' + str(root_filename) #read annotation #with open(wavFile.replace("wav", "ano")) as f: # anoList = [int(line.rstrip()) for line in list(f)] #if len(anoList) != len(samples): # print 'Mismatch in size between annotation and track!' #get replaygain stats of current file file_rplgain = list(audiotools.calculate_replay_gain([ \ audiotools.open(wavFile) \ ]))[0][1] #calculate gain to ref file and normalize accordingly gain = file_rplgain - ref_rplgain normSignal = samples * (10**(gain / 20.0)) if (random.random() < probOfNoise): #mix with noise of same size noise = getRandomFadedNoise(len(normSignal)) #calculate the random SNR randomSNR = snrMin + (snrMax - snrMin) * random.random() #amplify signal by reducing noise noise /= 10**(randomSNR / 20 ) #normSignal *= 10**(randomSNR/20); normSignal += noise # CONVOLVING NOISE MIXED SPEECH SIGNALS WITH THE IMPSULSE RESPONSE SIGNALS irTrain1 = random.choice(impResps) irTrain2 = np.asarray(irTrain1) irTrain = irTrain2.flatten() #print "irTrain Type: "+str(type(irTrain)) #print "irTrain Value: "+str(irTrain) #print "irTrain Length: "+str(len(irTrain)) #print "irTrain Shape: "+str(irTrain.shape) #print "normSignal Length: "+str(len(normSignal)) convolvedSignal1 = scipy.signal.fftconvolve( normSignal, irTrain)[:len(normSignal)] outputDir = os.path.join(outputPath1, root_dir_name) #print '*** Output DIR: '+str(outputDir) #print '*** Root Name file: '+str(root_filename) outputFile1 = os.path.join(outputDir, root_filename) #print '*** Output File Name: '+str(outputFile1) if not os.path.exists(outputDir): os.makedirs(outputDir) #shutil.copyfile(wavFile.replace("wav", "ano"), outputFile1.replace("wav", "ano")) #print 'Writing %s.' % (outputFile) scipy.io.wavfile.write(outputFile1, wantedFs, convolvedSignal1) #f = open(outputFile1.replace("wav", "ano"),'w') #for (ind,line) in enumerate(anoList): # if ind == (len(anoList) - 1): # #no \n at end of file # f.write("%i" % (line)) # else: # f.write("%i\n" % (line)) #f.close() except ValueError: print "Value Error" print 'Speaker %s done' % (folder)
# if ind == (len(anoList) - 1): # #no \n at end of file # f.write("%i" % (line)) # else: # f.write("%i\n" % (line)) #f.close() except ValueError: print "Value Error" print 'Speaker %s done' % (folder) if __name__ == '__main__': cacheNoiseFiles() cacheImpulseResponses() #replaygain val of reference file ref_rplgain = list(audiotools.calculate_replay_gain([ \ audiotools.open(replay_gain_ref_path) \ ]))[0][1] #get folder names (folders = speakers) all_speaker_names = os.walk(rootPathCleanCorpus).next()[1] print '%d speakers detected.' % (len(all_speaker_names)) #USING SINGLE PROCESS for (ind, speaker) in enumerate(all_speaker_names): mixFilesInSpeakerPath(ind, speaker) # UTILIZING MULTIPLE PROCESSES via joblib. #results = Parallel(n_jobs=numJobs)(delayed(mixFilesInSpeakerPath)(ind,speaker) \ # for (ind,speaker) in enumerate(all_speaker_names)) print 'All done.'
def audioeval(speechFile, referenceFile, noiseFile, root_dir_name, output_root_directory, ir_noise_file): "This function evaluates a single audio file." print 'Noise File Path: ' + str(noiseFile) print 'IR-Noise File Path: ' + str(ir_noise_file) #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # ReplayGain calculation of reference ref = audiotools.open(referenceFile) ref_replay_gain = audiotools.calculate_replay_gain([ref]) ref_track_gain = list(list(ref_replay_gain)[0])[1] #print ref_track_gain # ReplayGain calculation of example speech file speech = audiotools.open(speechFile) speech_replay_gain = audiotools.calculate_replay_gain([speech]) speech_track_gain = list(list(speech_replay_gain)[0])[1] #print speech_track_gain # Normalization of example speech file (rate_speech, data_speech) = wav.read(speechFile) gain = ref_track_gain - speech_track_gain data_normalized = numpy.asarray(data_speech * math.pow(10, (-(gain) / 20)), dtype=numpy.int16) normalizedFile = "speech_normalized.wav" wav.write(normalizedFile, rate_speech, data_normalized) # Loudness test of normalized example speech test = audiotools.open(normalizedFile) test_replay_gain = audiotools.calculate_replay_gain([test]) test_track_gain = list(list(test_replay_gain)[0])[1] #print test_track_gain # Randomly choosing one noise file from the pool # here I just fix one waiting for implementation later # Using pydub API to calculate the length of normalized speech file and the noise file speech_normalized = pydub.AudioSegment.from_wav(normalizedFile) #We have converted all the noise files to 16 bit int format and then passed the directoyr location to randomly choose noise files, which is different for each speech file. noise = pydub.AudioSegment.from_wav(noiseFile) speech_normalized_length = speech_normalized.duration_seconds noise_length = noise.duration_seconds # Selecting a randow start point of the noise file to get a segment of the required length start = random.randrange( 0, int(noise_length - speech_normalized_length) * 1000) # pydub does things in milliseconds noise_segmented = noise[start:int(start + speech_normalized_length * 1000)] noise_segmented.export("noise_segmented.wav", format="wav") # Linear fading of sharply segmented noised segment # 1 sec fade in, 1 sec fade out noise_faded = noise_segmented.fade_in(1000).fade_out(1000) noise_faded.export("noise_faded.wav", format="wav") # how long is good? 1 sec? # Picking a random signal to noise ratio (SNR) SNR_ratio = random.randint(-2, 20) #print "SNR_ratio: " + str(SNR_ratio) # loudness in dBFS (Decibels relative to full scale) # (all peak measurements will be negative numbers) speech_dB = speech_normalized.dBFS noise_dB = noise_segmented.dBFS #print "loudness of speech: " + str(speech_dB) #print "loudness of noise: " + str(noise_dB) # Change the amplitude (generally, loudness) of the speech by SNR ratio from noise. # Gain is specified in dB. gain = SNR_ratio - (speech_dB - noise_dB) #print "gain: " + str(gain) speech_SNRed = speech_normalized.apply_gain(gain) #print "loudness of adjusted speech: " + str(speech_SNRed.dBFS) # check SNR #print "check SNR: " + str(speech_SNRed.dBFS - noise_dB) # mix the two tracks by adding the respective samples # (If the overlaid AudioSegment is longer than this one, the result will be truncated) noisy_speech = speech_SNRed.overlay(noise_segmented) noisy_speech.export("noisy_speech.wav", format="wav") # Since the sample values have increased through the summation, it is possible that they exceed the maximum imposed by the data type. How this API deals with this problem? # draw an impulse response from the pool # ...waiting to implement # peak-normalize it to 0dB (=1) by dividing the IR vector through its maximum value. (rate_IR, data_IR) = wav.read(ir_noise_file) # data_IR.dtype is int16, change it into float64 data_IR = data_IR.astype(numpy.float64) / 65536.0 data_IR = data_IR / data_IR.max() # convolve speech with the normalized IR (rate_noisy_speech, data_noisy_speech) = wav.read("noisy_speech.wav") speech_convolved = numpy.convolve(data_IR, data_noisy_speech) #print "Root Directory Name: "+str(root_dir_name) output_directory = os.path.join(output_root_directory, root_dir_name) #print output_directory if not os.path.exists(output_directory): os.makedirs(output_directory) #speech_convolved_file = output_directory+'/'+str(os.path.splitext(speechFile)[0])+"_convolved.wav" speech_convolved_file_name = os.path.basename(speechFile) #print "Speech File Name: "+str(speech_convolved_file_name) speech_convolved_file = os.path.join(output_directory, speech_convolved_file_name) print "Final output file path: " + str(speech_convolved_file) # cut the convolved track to its original length if prolonged and store the resulting track wav.write(speech_convolved_file, rate_noisy_speech, speech_convolved[:data_noisy_speech.size]) # MFCC CODE *********** COMMENTED OUT ************** # MFCC Feature extraction # Do the default parameters (frame size etc.) work for you? #(rate,sig) = wav.read(speech_convolved_file) #mfcc_feat = mfcc(sig,rate) #print "MFCC Shape:" #print mfcc_feat.shape #print mfcc_feat #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ## Cleaup code which deletes the intermediate files which get generated. return