Esempio n. 1
0
    def __init__(self, logfile=None):
        """

        """
        sppasBase.__init__(self, logfile)

        self.intsint = Intsint()
Esempio n. 2
0
    def __init__(self, logfile=None):
        """
        Constructor.

        @param logfile (sppasLog) is a log file utility class member.

        """
        sppasBase.__init__(self, logfile)

        self.momel = Momel()
        self.PAS_TRAME = 10.
Esempio n. 3
0
    def __init__(self, resourcefile, logfile=None):
        """
        Create a new sppasRepetition instance.

        @param resourcefile is either the lemma dictionary or the list of stop-words.

        Attention: the extention of the resource file name is very important:
        must be ".stp" for stop-words and ".lem" for lemmas (case-sensitive)!

        """
        sppasBase.__init__(self, logfile)

        # Members
        self._use_lemmatize = True   # Lemmatize the input
        self._use_stopwords = True   # Add specific stopwords of the input
        self._empan  = 5             # Detection length (nb of IPUs; 1=current IPU)
        self._alpha  = 0.5           # Specific stop-words threshold coefficient
        self.logfile = logfile
        self.lemmatizer = None
        self.stopwords  = None

        # Create the lemmatizer instance
        try:
            lemmafile = resourcefile.replace(".stp", ".lem")
            self.lemmatizer = LemmaDict(lemmafile)
        except Exception:
            self._use_lemmatize = False

        if (self._use_lemmatize is True and self.lemmatizer.get_size() == 0) or self._use_lemmatize is False:
            if logfile is not None:
                logfile.print_message("Lemmatization disabled.",indent=2,status=3)
            else:
                print " ... ... [ INFO ] Lemmatization disabled."
            self._use_lemmatize = False

        # Create the list of stop words (list of non-relevant words)
        try:
            stopfile = resourcefile.replace(".lem", ".stp")
            self.stopwords = WordsList(filename=resourcefile, nodump=True)
            if self.stopwords.get_size() == 0:
                self._use_stopwords = False
        except Exception:
            self.stopwords = WordsList()

        #if (self._use_stopwords is True and self.stopwords.get_size() == 0) or self._use_stopwords is False:
        if self._use_stopwords is False:
            if logfile is not None:
                logfile.print_message("StopWords disabled.",indent=2,status=3)
            else:
                print " ... ... [ INFO ] StopWords disabled."
Esempio n. 4
0
    def __init__(self, model, modelL1=None, logfile=None):
        """
        Create a new sppasAlign instance.

        @param model (str) the acoustic model directory name of the language of the text
        @param modelL1 (str) the acoustic model directory name of the mother language of the speaker
        @param logfile (sppasLog)

        """
        sppasBase.__init__(self, logfile)

        # Members: self.alignio
        self.fix_segmenter( model,modelL1 )
        self.reset()
Esempio n. 5
0
    def __init__(self, model, logfile=None):
        """
        Create a new sppasChunks instance.

        @param model (str) the acoustic model directory name of the language of the text
        @param modelL1 (str) the acoustic model directory name of the mother language of the speaker
        @param logfile (sppasLog)

        """
        sppasBase.__init__(self, logfile)

        self.chunks = Chunks(model)
        self._options["clean"] = True  # Remove temporary files
        self._options["silences"] = self.chunks.get_silences()
        self._options["anchors"] = self.chunks.get_anchors()
        self._options["ngram"] = self.chunks.get_ngram_init()
        self._options["ngrammin"] = self.chunks.get_ngram_min()
        self._options["windelay"] = self.chunks.get_windelay_init()
        self._options["windelaymin"] = self.chunks.get_windelay_min()
        self._options["chunksize"] = self.chunks.get_chunk_maxsize()
Esempio n. 6
0
    def __init__(self, dictfilename, mapfile=None, logfile=None):
        """
        Constructor.

        @param dictfilename (str) is the pronunciation dictionary file name
        (HTK-ASCII format, utf8).
        @param mapfile (str) is the filename of a mapping table. It is used
        to generate new pronunciations by mapping phonemes of the dictionary.
        @param logfile (sppasLog) is a log file utility class member.

        """
        sppasBase.__init__(self, logfile)

        # Pronunciation dictionary
        self.maptable = None
        if mapfile is not None:
            self.maptable = Mapping( mapfile )

        self.set_dict( dictfilename )

        # List of options to configure this automatic annotation
        self._options = {}
        self._options['phonunk']      = False # Phonetize missing tokens
        self._options['usestdtokens'] = False # Phonetize standard spelling