def __append_input(self, base_fn, base_ext): for ae in base_ext: if sppasFileUtils(base_fn + ae).exists() \ and base_fn not in self._inputs: self._inputs.append(base_fn) return True return False
def extract_aligned(self, trstier, nametier): """ Extract from a time-aligned transcription file. :returns: a tuple with tracks and silences lists """ trstracks = [] silences = [] self._units = list() self._names = list() i = 0 last = trstier.GetSize() while i < last: # Set the current annotation values ann = trstier[i] # Save information if ann.GetLabel().IsSilence(): start = ann.GetLocation().GetBegin().GetMidpoint() end = ann.GetLocation().GetEnd().GetMidpoint() # Verify next annotations (concatenate all silences between 2 tracks) if (i + 1) < last: nextann = trstier[i + 1] while (i + 1) < last and nextann.GetLabel().IsSilence(): end = nextann.GetLocation().GetEnd().GetMidpoint() i += 1 if (i + 1) < last: nextann = trstier[i + 1] silences.append([start, end]) else: start = ann.GetLocation().GetBegin().GetMidpoint() end = ann.GetLocation().GetEnd().GetMidpoint() trstracks.append([start, end]) self._units.append(ann.GetLabel().GetValue()) if nametier is not None: aname = nametier.Find(ann.GetLocation().GetBegin().GetMidpoint(), ann.GetLocation().GetEnd().GetMidpoint(), True) if len(aname) == 0: trstracks.pop() self._units.pop() else: sf = sppasFileUtils(aname[0].GetLabel().GetValue()) # We have to take care in case of duplicated names filename = sf.clear_whitespace() if len(filename) == 0: filename = "unnamed_track" new_name = filename idx = 2 while new_name in self._names: new_name = u"%s_%.06d" % (filename, idx) idx += 1 self._names.append(new_name) # Continue i += 1 return trstracks, silences
def fix_workingdir(inputaudio=""): """Fix the working directory to store temporarily the data. :param inputaudio: (str) Audio file name """ sf = sppasFileUtils() workdir = sf.set_random() while os.path.exists(workdir) is True: workdir = sf.set_random() audio_file = os.path.basename(inputaudio) sf = sppasFileUtils(audio_file) formatted_audio_file = sf.format() os.mkdir(workdir) shutil.copy(inputaudio, os.path.join(workdir, formatted_audio_file)) return workdir
def extract_aligned(self, trstier, nametier): """ Extract from a time-aligned transcription file. :returns: a tuple with tracks and silences lists """ trstracks = [] silences = [] self._units = [] self._names = [] i = 0 last = trstier.GetSize() while i < last: # Set the current annotation values ann = trstier[i] # Save information if ann.GetLabel().IsSilence(): start = ann.GetLocation().GetBegin().GetMidpoint() end = ann.GetLocation().GetEnd().GetMidpoint() # Verify next annotations (concatenate all silences between 2 tracks) if (i + 1) < last: nextann = trstier[i + 1] while (i + 1) < last and nextann.GetLabel().IsSilence(): end = nextann.GetLocation().GetEnd().GetMidpoint() i += 1 if (i + 1) < last: nextann = trstier[i + 1] silences.append([start, end]) else: start = ann.GetLocation().GetBegin().GetMidpoint() end = ann.GetLocation().GetEnd().GetMidpoint() trstracks.append([start, end]) self._units.append(ann.GetLabel().GetValue()) if nametier is not None: aname = nametier.Find(ann.GetLocation().GetBegin().GetMidpoint(), ann.GetLocation().GetEnd().GetMidpoint(), True) if len(aname) == 0: trstracks.pop() self._units.pop() else: sf = sppasFileUtils(aname[0].GetLabel().GetValue()) self._names.append(sf.clear_whitespace()) # Continue i += 1 return trstracks, silences
def fix_audioinput(inputaudioname): """ Fix the audio file name that will be used. An only-ascii-based file name without whitespace is set if the current audio file name does not fit in these requirements. :param inputaudioname: (str) Audio file name """ sf = sppasFileUtils(inputaudioname) inputaudio = sf.format() if inputaudio != inputaudioname: shutil.copy(inputaudioname, inputaudio) return inputaudio
def fix_audioinput(inputaudioname): """ Fix the audio file name that will be used. An only-ascii-based file name without whitespace is set if the current audio file name does not fit in these requirements. :param inputaudioname: (str) Audio file name """ sf = sppasFileUtils(inputaudioname) inputaudio = sf.format() if inputaudio != inputaudioname: shutil.copy(inputaudioname, inputaudio) return inputaudio
def read(self, filename): """ Read an ANT file and fill the Transcription. :param filename: (str) """ zf = zipfile.ZipFile(filename, 'r') unzip_dir = sppasFileUtils().set_random() zf.extractall(unzip_dir) zf.close() antx_filename = os.path.join(unzip_dir, "annotation.xml") antx = sppasANTX() antx.read(antx_filename) self.set(antx)
def _get_filename(self, filename, extensions): """ Return a filename corresponding to one of extensions. :param filename: input file name :param extensions: the list of expected extension :returns: a file name of the first existing file with an expected extension or None """ for ext in extensions: ext_filename = os.path.splitext(filename)[0] + ext new_filename = sppasFileUtils(ext_filename).exists() if new_filename is not None and os.path.isfile(new_filename): return new_filename return None
def _get_filename(filename, extensions): """Return a filename corresponding to one of the extensions. :param filename: input file name :param extensions: the list of expected extension :returns: a file name of the first existing file with an expected extension or None """ base_name = os.path.splitext(filename)[0] for ext in extensions: ext_filename = base_name + ext new_filename = sppasFileUtils(ext_filename).exists() if new_filename is not None and os.path.isfile(new_filename): return new_filename return None
def fix_workingdir(inputaudio=""): """ Fix the working directory to store temporarily the data. :param inputaudio: (str) Audio file name """ if len(inputaudio) == 0: # Notice that the following generates a directory that the # aligners won't be able to access under Windows. # No problem with MacOS or Linux. sf = sppasFileUtils() workdir = sf.set_random() while os.path.exists(workdir) is True: workdir = sf.set_random() else: workdir = os.path.splitext(inputaudio)[0]+"-temp" os.mkdir(workdir) return workdir
def fix_workingdir(inputaudio=""): """ Fix the working directory to store temporarily the data. :param inputaudio: (str) Audio file name """ if len(inputaudio) == 0: # Notice that the following generates a directory that the # aligners won't be able to access under Windows. # No problem with MacOS or Linux. sf = sppasFileUtils() workdir = sf.set_random() while os.path.exists(workdir) is True: workdir = sf.set_random() else: workdir = os.path.splitext(inputaudio)[0] + "-temp" i = 1 while os.path.exists(workdir) is True: workdir = os.path.splitext(inputaudio)[0] + "-temp" + str(i) i = i + 1 os.mkdir(workdir) return workdir
def run_ipusegmentation(self, stepidx): """ Execute the SPPAS-IPUSegmentation program. @return number of files processed successfully """ # Initializations step = self.parameters.get_step(stepidx) stepname = self.parameters.get_step_name(stepidx) files_processed_success = 0 self._progress.set_header(stepname) self._progress.update(0,"") # Get the list of input file names, with the ".wav" extension filelist = self.set_filelist(".wav") if len(filelist) == 0: return 0 total = len(filelist) # Create annotation instance, and fix options try: seg = sppasIPUseg(self._logfile) except Exception as e: if self._logfile is not None: self._logfile.print_message("%s\n"%str(e), indent=1,status=4) return 0 # Execute the annotation for each file in the list for i,f in enumerate(filelist): # fix the default values seg.reset() seg.fix_options(step.get_options()) # Indicate the file to be processed if self._logfile is not None: self._logfile.print_message(stepname+" of file "+f, indent=1) self._progress.set_text(os.path.basename(f)+" ("+str(i+1)+"/"+str(total)+")") # Fix input/output file name outname = os.path.splitext(f)[0] + self.parameters.get_output_format() # Is there already an existing IPU-seg (in any format)! ext = [] for e in sppas.src.annotationdata.aio.extensions_in: if not e in ['.txt','.hz', '.PitchTier']: ext.append(e) existoutname = self._get_filename(f, ext) # it's existing... but not in the expected format: convert! if existoutname is not None and existoutname != outname: # just copy the file! if self._logfile is not None: self._logfile.print_message('Export '+existoutname, indent=2) self._logfile.print_message('into '+outname, indent=2) try: t = sppas.src.annotationdata.aio.read(existoutname) sppas.src.annotationdata.aio.write(outname,t) # OK, now outname is as expected! (or not...) except Exception: pass # Execute annotation tgfname = sppasFileUtils(outname).exists() if tgfname is None: # No already existing IPU seg., but perhaps a txt. txtfile = self._get_filename(f, [".txt"]) if self._logfile is not None: if txtfile: self._logfile.print_message("A transcription was found, perform Silence/Speech segmentation time-aligned with a transcription %s"%txtfile, indent=2,status=3) else: self._logfile.print_message("No transcription was found, perform Silence/Speech segmentation only.", indent=2,status=3) try: seg.run(f, trsinputfile=txtfile, ntracks=None, diroutput=None, tracksext=None, trsoutput=outname) files_processed_success += 1 if self._logfile is not None: self._logfile.print_message(outname, indent=2,status=0) except Exception as e: if self._logfile is not None: self._logfile.print_message("%s for file %s\n" % (str(e),outname), indent=2,status=-1) else: if seg.get_option('dirtracks') is True: self._logfile.print_message("A time-aligned transcription was found, split into multiple files", indent=2,status=3) try: seg.run(f, trsinputfile=tgfname, ntracks=None, diroutput=None, tracksext=None, trsoutput=None) files_processed_success += 1 if self._logfile is not None: self._logfile.print_message(tgfname, indent=2,status=0) except Exception as e: if self._logfile is not None: self._logfile.print_message("%s for file %s\n"%(str(e),tgfname), indent=2,status=-1) else: if self._logfile is not None: self._logfile.print_message("because a previous segmentation is existing.", indent=2,status=2) # Indicate progress self._progress.set_fraction(float((i+1))/float(total)) if self._logfile is not None: self._logfile.print_newline() # Indicate completed! self._progress.update(1,"Completed (%d files successfully over %d files).\n"%(files_processed_success,total)) self._progress.set_header("") return files_processed_success
# -*- coding: utf8 -*- import unittest import os import shutil from ..label.label import Label from ..ptime.point import TimePoint from ..ptime.interval import TimeInterval from ..annotation import Annotation from ..aio.praat import TextGrid from sppas.src.utils.fileutils import sppasFileUtils # --------------------------------------------------------------------------- TEMP = sppasFileUtils().set_random() DATA = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") # --------------------------------------------------------------------------- class TestTextGrid(unittest.TestCase): """ Test reader/writers of TextGrid files from Praat. """ def setUp(self): if os.path.exists(TEMP) is False: os.mkdir(TEMP) def tearDown(self):
src.resources.tests.test_vocab.py ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ import unittest import os.path import shutil from sppas import RESOURCES_PATH from sppas.src.utils.fileutils import sppasFileUtils from sppas.src.utils.makeunicode import u from ..vocab import sppasVocabulary # --------------------------------------------------------------------------- TEMP = sppasFileUtils().set_random() VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "vocab.txt") ITA = os.path.join(RESOURCES_PATH, "vocab", "ita.vocab") VOCAB_TEST = os.path.join(TEMP, "vocab.txt") # --------------------------------------------------------------------------- class TestVocabulary(unittest.TestCase): def setUp(self): if os.path.exists(TEMP) is False: os.mkdir(TEMP) def tearDown(self):