def setUp(self): self._converter = AudioSppasPresenter() self._sample_1 = signals.open(TestExport._sample_path_1) self._sample_2 = signals.open(TestExport._sample_path_2) self._sample_path_new = os.path.join(samples, "converted.wav") if os.path.exists(self._sample_path_new): os.remove(self._sample_path_new)
def extract_channels( settings, factor=0, channel=0, p=None ): """ Extract the first channel for each sound file of the settings. @param settings: a list of dictionaries with extracted information from a settings file. @param factor: coefficient to apply to the volume @param channel: index of the channel to extract @param p is a progress dialog """ total = len( settings ) for i,channelparameter in enumerate(settings): if p: p.update(float(i)/total, "Channel " + str(i+1) + " of " + str(total)) audio = signals.open( channelparameter['file'] ) # CHANNEL EXTRACTION idx = audio.extract_channel( channel ) channelparameter['channel'] = audio.get_channel(idx) # RMS EXTRACTION if factor > 0: rms = audioutils.get_rms(channelparameter['channel'].frames, audio.get_sampwidth()) rmswanted = audioutils.mel2db(audioutils.db2mel(rms)*factor) channelparameter['factor'] = rmswanted/rms audio.close() del audio
def FileSelected(self, filename): """ Show information of a sound file. """ try: logging.info('PropertyPanel. File: '+filename) #_wav = wave.Wave_read( filename ) _audio = signals.open( filename ) duration = float(_audio.get_duration() ) value_list = [ filename, '%.2f' % duration, str(_audio.get_framerate()), str(_audio.get_sampwidth()), str(_audio.get_nchannels()), str(_audio.get_minvolume()), str(_audio.get_meanvolume()), str(_audio.get_maxvolume()) ] except Exception as e: value_list = [ NO_INFO_LABEL ] * len(LABEL_LIST) logging.info(" ... Error reading %s: %s" % (filename,e)) for i,value in enumerate(value_list): #self._values[i].SetLabel(value) # deprecated. (now, only works on windows) self._values[i].ChangeValue(value) # linux self.SetForegroundColour( self._prefs.GetValue('M_FG_COLOUR') )
def test_WriteFrames(self): _sample_new = "newFile.wav" # save first signals.save( _sample_new, self._sample_1 ) # read the saved file and compare Audio() instances newFile = signals.open( _sample_new ) self.assertEqual(newFile.get_framerate(), self._sample_1.get_framerate()) self.assertEqual(newFile.get_sampwidth(), self._sample_1.get_sampwidth()) self.assertEqual(newFile.get_nchannels(), self._sample_1.get_nchannels()) self.assertEqual(newFile.get_nframes(), self._sample_1.get_nframes()) newFile.close() os.remove(_sample_new) self._sample_1.rewind() signals.save_fragment( _sample_new, self._sample_1, self._sample_1.read_frames(self._sample_1.get_nframes())) newFile = signals.open( _sample_new ) self.assertEqual(newFile.get_framerate(), self._sample_1.get_framerate()) self.assertEqual(newFile.get_sampwidth(), self._sample_1.get_sampwidth()) self.assertEqual(newFile.get_nchannels(), self._sample_1.get_nchannels()) self.assertEqual(newFile.get_nframes(), self._sample_1.get_nframes()) newFile.close() os.remove(_sample_new) _sample_new = "newFile.aiff" # save first signals.save( _sample_new, self._sample_4 ) # read the saved file and compare Audio() instances newFile = signals.open( _sample_new ) self.assertEqual(newFile.get_framerate(), self._sample_4.get_framerate()) self.assertEqual(newFile.get_sampwidth(), self._sample_4.get_sampwidth()) self.assertEqual(newFile.get_nchannels(), self._sample_4.get_nchannels()) self.assertEqual(newFile.get_nframes(), self._sample_4.get_nframes()) newFile.close() os.remove(_sample_new) self._sample_4.rewind() signals.save_fragment( _sample_new, self._sample_4, self._sample_4.read_frames(self._sample_4.get_nframes())) newFile = signals.open( _sample_new ) self.assertEqual(newFile.get_framerate(), self._sample_4.get_framerate()) self.assertEqual(newFile.get_sampwidth(), self._sample_4.get_sampwidth()) self.assertEqual(newFile.get_nchannels(), self._sample_4.get_nchannels()) self.assertEqual(newFile.get_nframes(), self._sample_4.get_nframes()) newFile.close() os.remove(_sample_new)
def test_Export_Change_Framerate(self): converted = self._converter.export(self._sample_path_2, self._sample_path_new) self.assertEqual(converted, True) self.assertEqual(os.path.exists(self._sample_path_new), True) newaudio = signals.open(self._sample_path_new) self.assertEqual(newaudio.get_framerate(), self._converter.get_framerate()) self.assertEqual(newaudio.get_sampwidth(), self._converter.get_sampwidth()) self.assertEqual(newaudio.get_nchannels(), 1) self.assertEqual(newaudio.get_nframes()/newaudio.get_framerate(), self._sample_2.get_nframes()/self._sample_2.get_framerate())
def test_Save(self): cidx = self._sample_1.extract_channel(0) channel = self._sample_1.get_channel(cidx) audio = Audio() audio.append_channel( channel ) signals.save( TestChannel._sample_path_new, audio ) savedaudio = signals.open(TestChannel._sample_path_new) self._sample_1.rewind() frames = self._sample_1.read_frames( self._sample_1.get_nframes() ) savedframes = savedaudio.read_frames( self._sample_1.get_nframes() ) self.assertEqual(len(frames), len(savedframes)) self.assertEqual(frames, savedframes)
def export(self, inputname, outputname): """ Create a new wav file with requested parameters. @param inputname (string) name of the inputfile @param reqSamplewidth (string) name of the outputfile """ toconvert = False audio = signals.open(inputname) if (audio.get_sampwidth() < self._reqSamplewidth): raise NameError("The sample width of ("+str(audio.get_sampwidth())+") of the given file is not appropriate. " + str(self._reqSamplewidth) + " bytes required") if (audio.get_framerate() < self._reqFramerate): raise NameError("The framerate of ("+str(audio.get_framerate())+") of the given file is not appropriate. " + str(self._reqFramerate) + " Hz required") if (self._reqSamplewidth != audio.get_sampwidth()): toconvert = True if self._logfile: self._logfile.print_message("The sample width of ("+str(audio.get_sampwidth())+") of the given file is not appropriate. Sample width is changed to " + str(self._reqSamplewidth) + " bytes", indent=3, status=1) if (self._reqChannels != audio.get_nchannels()): toconvert = True if self._logfile: self._logfile.print_message("The number of channels of ("+str(audio.get_nchannels())+") of the given file is not appropriate. Number of channels is changed to " + str(self._reqChannels) + " channels", indent=3, status=1) if (self._reqFramerate != audio.get_framerate()): toconvert = True if self._logfile: self._logfile.print_message("The framerate of ("+str(audio.get_framerate())+") of the given file is not appropriate. Framerate is changed to " + str(self._reqFramerate) + " Hz", indent=3, status=1) if toconvert is True: # Get the expected channel idx = audio.extract_channel(0) # no more need of input data, can close audio.close() # Do the job (do not modify the initial channel). formatter = ChannelFormatter( audio.get_channel(idx) ) formatter.set_framerate(self._reqFramerate) formatter.set_sampwidth(self._reqSamplewidth) formatter.convert() # Save the converted channel audio_out = Audio() audio_out.append_channel( formatter.channel ) signals.save( outputname, audio_out ) return toconvert
def run_alignment(self, inputwav, basename, outputalign): """ Execute the external program julius to align. @param inputwav is the wav input file name. @param basename is the name of the phon file @param outputalign is the output file name. """ try: wavspeech = signals.open(inputwav) duration = wavspeech.get_duration() except Exception: duration = 0. self.run_basic(duration, basename, outputalign)
def run(self, audiofile, trsfile, output_dir, output_ext="TextGrid"): """ Split an audio file into multiple small audio file. @param audiofile is the audio input file name @param trsfile is the transcription input file name @param output_dir is a directory name to save output tracks (one per unit) @param output_ext (default TextGrid) """ if not os.path.exists(output_dir): os.mkdir(output_dir) audiospeech = signals.open(audiofile) transcription = annotationdata.io.read(trsfile) tracks_tier = None for tier in transcription: if "name" in tier.GetName().lower(): tracks_tier = tier if tracks_tier is None: raise Exception("Expected tier not found: a tier name must contain 'name'") list_transcription = TrsUtils.Split(transcription, tracks_tier) names = [a.GetLabel().GetValue() for a in tracks_tier if not a.GetLabel().IsEmpty()] trstracks = [] for trs in list_transcription: begin = int(trs.GetBegin() * audiospeech.get_framerate()) end = int(trs.GetEnd() * audiospeech.get_framerate()) trstracks.append((begin, end)) TrsUtils.Shift(trs, trs.GetBegin()) chunks = [] nframes = audiospeech.get_nframes() for from_pos, to_pos in trstracks: if nframes < from_pos: raise ValueError("Position %d not in range(%d)" % (from_pos, nframes)) audiospeech.set_pos(from_pos) chunks.append(audiospeech.read_frames(to_pos - from_pos)) for name, chunk, trs in zip(names, chunks, list_transcription): signals.save(os.path.join(output_dir, name + ".wav"), chunk) annotationdata.io.write(os.path.join(output_dir, name + "." + output_ext), trs)
def diagnosis(self, inputname): """ Return True if the file corresponds to the requirements. @param inputname (string) name of the inputfile """ toconvert = False audio = signals.open(inputname) if (self._reqSamplewidth != audio.get_sampwidth()): toconvert = True if (self._reqChannels != audio.get_nchannels()): toconvert = True if (self._reqFramerate != audio.get_framerate()): toconvert = True audio.close() return toconvert
# ---------------------------------------------------------------------------- # Verify and extract args: # ---------------------------------------------------------------------------- parser = ArgumentParser(usage="%s -w file [options]" % os.path.basename(PROGRAM), description="... a script to get information about a wav file.") parser.add_argument("-w", metavar="file", required=True, help='Input wav file name') parser.add_argument("-f", metavar="value", default=0.01, type=float, help='Frame duration to estimate rms (default: 0.01)') if len(sys.argv) <= 1: sys.argv.append('-h') args = parser.parse_args() # ---------------------------------------------------------------------------- wavspeech = signals.open(args.w) wavspeech.frameduration = args.f print "Wav file name: ", args.w print "Duration in seconds: ", wavspeech.get_duration() print "Frame rate: ", wavspeech.get_framerate() print "Sample width: ", wavspeech.get_sampwidth() print "Channels: ", wavspeech.get_nchannels() print "Volume min: ", wavspeech.get_minvolume() print "Volume max: ", wavspeech.get_maxvolume() print "Volume mean: ", wavspeech.get_meanvolume() # ----------------------------------------------------------------------------
def setUp(self): self._sample_1 = signals.open(TestChannel._sample_path_1) self._sample_2 = signals.open(TestChannel._sample_path_2)
parser.add_argument("-bs", default=0, metavar="value", type=float, help='The position (in seconds) when begins the mix, don\'t use with -bf') parser.add_argument("-es", default=0, metavar="value", type=float, help='The position (in seconds) when ends the mix, don\'t use with -ef') parser.add_argument("-bf", default=0, metavar="value", type=float, help='The position (in number of frames) when begins the mix, don\'t use with -bs') parser.add_argument("-ef", default=0, metavar="value", type=float, help='The position (in number of frames) when ends the mix, don\'t use with -es') # ---------------------------------------------------------------------------- if len(sys.argv) <= 1: sys.argv.append('-h') args = parser.parse_args() # ---------------------------------------------------------------------------- audio_out = Audio() audio = signals.open(args.w) if args.bf and args.bs: print "bf option and bs option can't be used at the same time !" sys.exit(1) if args.ef and args.es: print "ef option and es option can't be used at the same time !" sys.exit(1) if args.bf: begin = args.bf elif args.bs: begin = args.bs*audio.get_framerate() else: begin = 0
parser.add_argument("-o", metavar="file", required=True, help='Audio Output file name') # ---------------------------------------------------------------------------- if len(sys.argv) <= 1: sys.argv.append('-h') args = parser.parse_args() # ---------------------------------------------------------------------------- mixer = ChannelsMixer() for inputFile in args.w: audio = signals.open(inputFile) for i in xrange(audio.get_nchannels()): idx = audio.extract_channel(i) audio.rewind() mixer.append_channel(audio.get_channel(idx)) newchannel = mixer.mix() # Save the converted channel audio_out = Audio() audio_out.append_channel( newchannel ) signals.save( args.o, audio_out ) # ----------------------------------------------------------------------------
def setUp(self): self._sample_1 = signals.open(TestInformation._sample_path_1) self._sample_2 = signals.open(TestInformation._sample_path_2) self._sample_3 = signals.open(TestInformation._sample_path_3) self._sample_4 = signals.open(TestInformation._sample_path_4)
def run( self, audiofile, trsinputfile=None, trstieridx=None, ntracks=None, diroutput=None, tracksext=None, listoutput=None, textgridoutput=None, ): """ Perform an IPU segmentation from a wav file. - audiofile is the sound input file name - trsinputfile is a transcription (or 'None') - ntracks expected number of tracks - diroutput is a directory name to save output tracks (one per unit) - tracksext is the track extension (used with the diroutput option) - listoutput is a file name to save the IPU segmentation result (this file contains the begin time and end time of each unit, and the wav duration) - textgridoutput is a file name to save the IPU segmentation result. """ fileName, fileExtension = os.path.splitext(audiofile) # Set input if fileExtension.lower() in signals.extensions: try: self.audiospeech = signals.open(audiofile) except Exception as e: raise Exception("Input error.\n" + str(e) + "\n") # Auto-adjust volume if self.volume_cap == 0: minv = self.audiospeech.get_minvolume() meanv = self.audiospeech.get_meanvolume() step = int((meanv - minv) / 5.0) self.volume_cap = minv + step else: raise Exception("Input error: unrecognized file format\n") self.bornestart = 0 self.borneend = 0 idx = self.audiospeech.extract_channel(0) channel = self.audiospeech.get_channel(idx) self.audiosil = ChannelSil(channel, self.min_length) # Silence detection is here: # ########################### # Fix transcription units if a transcription is given trstracks = None sil = True self.trsinput = None if trsinputfile: if trsinputfile.lower().endswith("txt"): self.set_trs(trsinputfile) else: try: # Get tracks and silences from an annotated file trstracks = self.get_from_transcription(trsinputfile, trstieridx) self.audiosil.set_silence(self.silence) # Do not find silences automatically! sil = False except Exception as e: raise Exception("Input transcription error. " + str(e) + "\n") if sil is True: try: self.split(ntracks) except Exception as e: raise Exception("Error while executing Split.\n" + str(e) + "\n") # save output # ############################################################### if trstracks is None: trstracks = self.audiosil.tracks() # Write silences/units into a transcription file if textgridoutput is not None: self.write_textgrid(textgridoutput, trstracks) # Write speech into track files with a given file extension if diroutput is not None or self.dirtracks is True: if diroutput is None: diroutput = fileName + "-tracks" if self.logfile is not None: self.logfile.print_message(str(len(self.trsunits)) + " units to write.", indent=3) self.logfile.print_message(str(len(self.silence)) + " silences.", indent=3) # Automatically Activate the list output file if listoutput is None and self.logfile is not None: listoutput = os.path.join(diroutput, "index.txt") self.logfile.print_message(listoutput, indent=3) # Fix output files format (txt or TextGrid) if tracksext is None: tracksext = "TextGrid" if self.save_as_trs is True else "txt" if "." + tracksext.strip().lower() in annotationdata.io.extensions and tracksext != "txt": trs = self.create_trsunits(trstracks) audiosilpres = AudioSilencePresenter(self.audiosil) audiosilpres.write_tracks( trstracks, diroutput, ext=tracksext, trsunits=trs, trsnames=self.trsnames, logfile=self.logfile ) else: audiosilpres = AudioSilencePresenter(self.audiosil) audiosilpres.write_tracks( trstracks, diroutput, ext=tracksext, trsunits=self.trsunits, trsnames=self.trsnames, logfile=self.logfile, ) # Write silences boundaries (in seconds) into a file if listoutput: self.write_list(listoutput, trstracks) # ##################################################################### # self.restaure_default() if trstracks is None: nbtracks = 0 else: try: nbtracks = len(trstracks) except Exception: nbtracks = 0 return (self.silence, nbtracks)
def setUp(self): self._sample_1 = signals.open(TestChannelFragmentExtracter._sample_path_1) self._sample_2 = signals.open(TestChannelFragmentExtracter._sample_path_2)
parser.add_argument("-w", metavar="file", nargs='+', required=True, help='Audio Input file names') # ---------------------------------------------------------------------------- if len(sys.argv) <= 1: sys.argv.append('-h') args = parser.parse_args() # ---------------------------------------------------------------------------- equalizer = ChannelsEqualizer() file = signals.open(args.w[0]) sampwidth = file.get_sampwidth() framerate = file.get_framerate() for inputFile in args.w: audio = signals.open(inputFile) if audio.get_sampwidth() != sampwidth: print "Input files must have the same sample width !" sys.exit(1) if audio.get_framerate() != framerate: print "Input files must have the same framerate !" sys.exit(1) idx = audio.extract_channel(1) equalizer.append_channel(audio.get_channel(idx)) equalizer.equalize()
# Verify and extract args: # ---------------------------------------------------------------------------- parser = ArgumentParser(usage="%s -o output file [options]" % os.path.basename(PROGRAM), description="A script to apply high-pass filter (development version)") parser.add_argument("-i", metavar="file", required=True, help='Audio Input file name') parser.add_argument("-o", metavar="file", required=True, help='Audio Output file name') if len(sys.argv) <= 1: sys.argv.append('-h') args = parser.parse_args() # ---------------------------------------------------------------------------- audioin = signals.open( args.i ) SAMPLE_RATE = audioin.get_framerate() # ---------------------------------------------------------------------------- # IIR filter coefficients freq = 2000 # Hz r = 0.98 a1 = -2.0 * r * math.cos(freq / (SAMPLE_RATE / 2.0) * math.pi) a2 = r * r filter = [a1, a2] print filter n = audioin.get_nframes() original = struct.unpack('%dh' % n, audioin.read_frames(n)) original = [s / 2.0**15 for s in original]
def setUp(self): self._sample_1 = signals.open(TestMonoFragment._sample_path_1) self._sample_2 = signals.open(TestMonoFragment._sample_path_2)
def setUp(self): self._sample_1 = signals.open(TestAudioUtils._sample_path_1) self._sample_2 = signals.open(TestAudioUtils._sample_path_2) self._sample_3 = signals.open(TestAudioUtils._sample_path_3)