Beispiel #1
0
if not args.quiet:
    setup_logging(1,None)

# ----------------------------------------------------------------------------
# Automatic Phonetization is here:
# ----------------------------------------------------------------------------

unkopt = True
if args.nounk:
    unkopt = False

mapfile = None
if args.map:
    mapfile = args.map

if args.i:
    p = sppasPhon( args.dict, mapfile )
    p.set_unk( unkopt )
    p.set_usestdtokens( False )
    p.run( args.i,args.o )
else:
    pdict    = DictPron( args.dict, unkstamp=UNKSTAMP, nodump=False )
    maptable = Mapping()
    if mapfile is not None:
        maptable = Mapping( mapfile )
    phonetizer = DictPhon( pdict, maptable )
    for line in sys.stdin:
        print phonetizer.phonetize( line, unkopt )

# ----------------------------------------------------------------------------
Beispiel #2
0
    def run_phonetization(self, stepidx):
        """
        Execute the SPPAS-Phonetization program.

        @return number of files processed successfully

        """
        # Initializations
        step = self.parameters.get_step(stepidx)
        stepname = self.parameters.get_step_name(stepidx)
        files_processed_success = 0
        self._progress.set_header(stepname)
        self._progress.update(0,"")

        # Get the list of input file names, with the ".wav" (or ".wave") extension
        filelist = self.set_filelist(".wav")#,not_start=["track_"])
        if len(filelist) == 0:
            return 0
        total = len(filelist)

        # Create annotation instance
        try:
            self._progress.set_text("Loading resources...")
            p = sppasPhon( step.get_langresource(), logfile=self._logfile )
        except Exception as e:
            if self._logfile is not None:
                self._logfile.print_message( "%s\n"%e, indent=1,status=4 )
            return 0

        # Execute the annotation for each file in the list
        for i,f in enumerate(filelist):

            # fix the default values
            p.fix_options( step.get_options() )

            # Indicate the file to be processed
            self._progress.set_text( os.path.basename(f)+" ("+str(i+1)+"/"+str(total)+")" )
            if self._logfile is not None:
                self._logfile.print_message(stepname+" of file " + f, indent=1)

            # Get the input file
            ext = ['-token'+self.parameters.get_output_format()]
            for e in annotationdata.io.extensions_out_multitiers:
                ext.append( '-token'+e )

            inname = self._get_filename(f, ext)
            if inname is not None:

                # Fix output file name
                outname = os.path.splitext(f)[0] + '-phon' + self.parameters.get_output_format()

                # Execute annotation
                try:
                    p.run( inname, outname )
                except Exception as e:
                    import traceback
                    print traceback.format_exc()
                    if self._logfile is not None:
                        self._logfile.print_message( "%s for file %s\n"%(str(e),outname), indent=2,status=-1 )
                else:
                    files_processed_success += 1
                    if self._logfile is not None:
                        self._logfile.print_message(outname, indent=2,status=0 )

            else:
                if self._logfile is not None:
                    self._logfile.print_message("Failed to find a file with toketization. Read the documentation for details.",indent=2,status=2)

            # Indicate progress
            self._progress.set_fraction(float((i+1))/float(total))
            if self._logfile is not None:
                self._logfile.print_newline()

        # Indicate completed!
        self._progress.update(1,"Completed (%d files successfully over %d files).\n"%(files_processed_success,total))
        self._progress.set_header("")

        return files_processed_success