Beispiel #1
0
    def generateName(self):
        name = INITIAL_CHAR
        probabilityOfName = 1.0
        while len(name) < self.maxNameLength_ + 1:
            minNgramLength, maxNGramLength = self.prefixCounter_.rangeTuple()
            #Do not try to have prefixes longer that what was already generated
            maxNGramLength = min(maxNGramLength, len(name))

            #Compute weights to choose next character
            for length in range(minNgramLength, maxNGramLength + 1):
                prefix = name[-length:] if length > 0 else ''
                totalOccurences = self.prefixCounter_.countPrefixOccurences(
                    length, prefix)
                perCharOccurences = list(
                    self.prefixCounter_.perCharacterPrefixOccurences(
                        length, prefix))

                for char, charOccurences in perCharOccurences:
                    self.scorer_.learn(length, char, totalOccurences,
                                       charOccurences)

            transitionCharacters, transitionScores = map(
                list, zip(*self.scorer_.scores()))

            #Discard the possibility to finish the name if not enough characters were generated.
            if len(name) < self.minNameLength_:
                if TERMINAL_CHAR in transitionCharacters:
                    i = transitionCharacters.index(TERMINAL_CHAR)
                    del transitionCharacters[i]
                    del transitionScores[i]

            #If there is no meaningful character to follow the prefix, raise an exception
            #( Should never happen if `minNGramLength == 0`, since then the empty prefix is used and any character
            #that appears in the lexicon can be picked with at least very unlikely prabilities.
            if len(transitionCharacters) <= 0:
                namePerplexity = perplx(probabilityOfName, len(name))
                raise InvalidGeneratedWord(
                    ("No character to transition from \"%s\"" % prefix), name,
                    namePerplexity)

            #Pick nexct character and build name
            i = discretepick(transitionScores)
            character = transitionCharacters[i]

            if character == TERMINAL_CHAR:
                break

            name += character
            probabilityOfName *= transitionScores[i] / sum(transitionScores)

            self.scorer_.reset()

        name = name[1:]
        namePerplexity = perplx(probabilityOfName, len(name))

        return (namePerplexity, name)
Beispiel #2
0
    def generateName( self ):
        name = INITIAL_CHAR
        probabilityOfName = 1.0
        while len(name) < self.maxNameLength_ + 1:
            minNgramLength, maxNGramLength = self.prefixCounter_.rangeTuple()
            #Do not try to have prefixes longer that what was already generated
            maxNGramLength = min( maxNGramLength, len(name) )

            #Compute weights to choose next character
            for length in range(minNgramLength,maxNGramLength+1):
                prefix = name[-length:] if length > 0 else ''
                totalOccurences = self.prefixCounter_.countPrefixOccurences( length, prefix )
                perCharOccurences = list( self.prefixCounter_.perCharacterPrefixOccurences( length, prefix ) )

                for char, charOccurences in perCharOccurences:
                    self.scorer_.learn( length, char, totalOccurences, charOccurences )


            transitionCharacters, transitionScores = map(   list,zip( *self.scorer_.scores() )   )
            
            #Discard the possibility to finish the name if not enough characters were generated.
            if len(name) < self.minNameLength_:
                if TERMINAL_CHAR in transitionCharacters:
                    i = transitionCharacters.index( TERMINAL_CHAR )
                    del transitionCharacters[i]
                    del transitionScores[i]

            #If there is no meaningful character to follow the prefix, raise an exception
            #( Should never happen if `minNGramLength == 0`, since then the empty prefix is used and any character
            #that appears in the lexicon can be picked with at least very unlikely prabilities.
            if len(transitionCharacters) <= 0:
                namePerplexity = perplx( probabilityOfName, len(name) )
                raise InvalidGeneratedWord( ("No character to transition from \"%s\""%prefix), name, namePerplexity )

            #Pick nexct character and build name
            i = discretepick( transitionScores )
            character = transitionCharacters[i]

            if character == TERMINAL_CHAR:
                break

            name += character
            probabilityOfName *= transitionScores[i] / sum(transitionScores)
            
            self.scorer_.reset()
        
        name=name[1:]
        namePerplexity = perplx( probabilityOfName, len(name) )

        return ( namePerplexity, name )
Beispiel #3
0
    def generateName(self):
        name = INITIAL_CHAR
        probabilityOfName = 1.0
        while len(name) < self.maxNameLength_ + 1:
            # Compute weights to choose next character
            prefix = name[-self.nGramLength_ :] if self.nGramLength_ > 0 else ""

            transitions = list(self.transitionTable_.transitionsForPrefix(prefix))

            # If there is no meaningful character to follow the prefix, raise an exception
            if len(transitions) <= 0:
                namePerplexity = perplx(probabilityOfName, len(name))
                raise InvalidGeneratedWord(('No character to transition from "%s"' % prefix), name, namePerplexity)

            transitionCharacters, transitionScores = map(list, zip(*transitions))

            # Discard the possibility to finish the name if not enough characters were generated.
            if len(name) < self.minNameLength_:
                if TERMINAL_CHAR in transitionCharacters:
                    i = transitionCharacters.index(TERMINAL_CHAR)
                    del transitionCharacters[i]
                    del transitionScores[i]

                    if len(transitionCharacters) <= 0:
                        namePerplexity = perplx(probabilityOfName, len(name))
                        raise InvalidGeneratedWord(
                            ('Only end-of-word transition from "%s"' % prefix), name, namePerplexity
                        )

            i = discretepick(transitionScores)
            character = transitionCharacters[i]

            # Pick nexct character and build name
            if character == TERMINAL_CHAR:
                break

            name += character
            probabilityOfName *= transitionScores[i] / sum(transitionScores)

        name = name[1:]
        namePerplexity = perplx(probabilityOfName, len(name))

        return (namePerplexity, name)
Beispiel #4
0
    def generateName( self ):
        name = INITIAL_CHAR
        probabilityOfName = 1.0
        while len(name) < self.maxNameLength_ + 1:
            #Compute weights to choose next character
            prefix = name[-self.nGramLength_:] if self.nGramLength_ > 0 else ''
            
            transitions = list( self.transitionTable_.transitionsForPrefix( prefix ) )
            
            #If there is no meaningful character to follow the prefix, raise an exception
            if len(transitions) <= 0:
                namePerplexity = perplx( probabilityOfName, len(name) )
                raise InvalidGeneratedWord( ("No character to transition from \"%s\""%prefix), name, namePerplexity )
            
            
            transitionCharacters, transitionScores = map( list, zip( *transitions ) )
            
            #Discard the possibility to finish the name if not enough characters were generated.
            if len(name) < self.minNameLength_:
                if TERMINAL_CHAR in transitionCharacters:
                    i = transitionCharacters.index( TERMINAL_CHAR )
                    del transitionCharacters[i]
                    del transitionScores[i]
            
                    if len(transitionCharacters) <= 0:
                        namePerplexity = perplx( probabilityOfName, len(name) )
                        raise InvalidGeneratedWord( ("Only end-of-word transition from \"%s\""%prefix), name, namePerplexity )
            
            i = discretepick( transitionScores )
            character = transitionCharacters[i]
            
            #Pick nexct character and build name
            if character == TERMINAL_CHAR:
                break

            name += character
            probabilityOfName *= transitionScores[i] / sum(transitionScores)
            
        
        name=name[1:]
        namePerplexity = perplx( probabilityOfName, len(name) )
        
        return (namePerplexity, name)