def generateName(self): name = INITIAL_CHAR probabilityOfName = 1.0 while len(name) < self.maxNameLength_ + 1: minNgramLength, maxNGramLength = self.prefixCounter_.rangeTuple() #Do not try to have prefixes longer that what was already generated maxNGramLength = min(maxNGramLength, len(name)) #Compute weights to choose next character for length in range(minNgramLength, maxNGramLength + 1): prefix = name[-length:] if length > 0 else '' totalOccurences = self.prefixCounter_.countPrefixOccurences( length, prefix) perCharOccurences = list( self.prefixCounter_.perCharacterPrefixOccurences( length, prefix)) for char, charOccurences in perCharOccurences: self.scorer_.learn(length, char, totalOccurences, charOccurences) transitionCharacters, transitionScores = map( list, zip(*self.scorer_.scores())) #Discard the possibility to finish the name if not enough characters were generated. if len(name) < self.minNameLength_: if TERMINAL_CHAR in transitionCharacters: i = transitionCharacters.index(TERMINAL_CHAR) del transitionCharacters[i] del transitionScores[i] #If there is no meaningful character to follow the prefix, raise an exception #( Should never happen if `minNGramLength == 0`, since then the empty prefix is used and any character #that appears in the lexicon can be picked with at least very unlikely prabilities. if len(transitionCharacters) <= 0: namePerplexity = perplx(probabilityOfName, len(name)) raise InvalidGeneratedWord( ("No character to transition from \"%s\"" % prefix), name, namePerplexity) #Pick nexct character and build name i = discretepick(transitionScores) character = transitionCharacters[i] if character == TERMINAL_CHAR: break name += character probabilityOfName *= transitionScores[i] / sum(transitionScores) self.scorer_.reset() name = name[1:] namePerplexity = perplx(probabilityOfName, len(name)) return (namePerplexity, name)
def generateName( self ): name = INITIAL_CHAR probabilityOfName = 1.0 while len(name) < self.maxNameLength_ + 1: minNgramLength, maxNGramLength = self.prefixCounter_.rangeTuple() #Do not try to have prefixes longer that what was already generated maxNGramLength = min( maxNGramLength, len(name) ) #Compute weights to choose next character for length in range(minNgramLength,maxNGramLength+1): prefix = name[-length:] if length > 0 else '' totalOccurences = self.prefixCounter_.countPrefixOccurences( length, prefix ) perCharOccurences = list( self.prefixCounter_.perCharacterPrefixOccurences( length, prefix ) ) for char, charOccurences in perCharOccurences: self.scorer_.learn( length, char, totalOccurences, charOccurences ) transitionCharacters, transitionScores = map( list,zip( *self.scorer_.scores() ) ) #Discard the possibility to finish the name if not enough characters were generated. if len(name) < self.minNameLength_: if TERMINAL_CHAR in transitionCharacters: i = transitionCharacters.index( TERMINAL_CHAR ) del transitionCharacters[i] del transitionScores[i] #If there is no meaningful character to follow the prefix, raise an exception #( Should never happen if `minNGramLength == 0`, since then the empty prefix is used and any character #that appears in the lexicon can be picked with at least very unlikely prabilities. if len(transitionCharacters) <= 0: namePerplexity = perplx( probabilityOfName, len(name) ) raise InvalidGeneratedWord( ("No character to transition from \"%s\""%prefix), name, namePerplexity ) #Pick nexct character and build name i = discretepick( transitionScores ) character = transitionCharacters[i] if character == TERMINAL_CHAR: break name += character probabilityOfName *= transitionScores[i] / sum(transitionScores) self.scorer_.reset() name=name[1:] namePerplexity = perplx( probabilityOfName, len(name) ) return ( namePerplexity, name )
def generateName(self): name = INITIAL_CHAR probabilityOfName = 1.0 while len(name) < self.maxNameLength_ + 1: # Compute weights to choose next character prefix = name[-self.nGramLength_ :] if self.nGramLength_ > 0 else "" transitions = list(self.transitionTable_.transitionsForPrefix(prefix)) # If there is no meaningful character to follow the prefix, raise an exception if len(transitions) <= 0: namePerplexity = perplx(probabilityOfName, len(name)) raise InvalidGeneratedWord(('No character to transition from "%s"' % prefix), name, namePerplexity) transitionCharacters, transitionScores = map(list, zip(*transitions)) # Discard the possibility to finish the name if not enough characters were generated. if len(name) < self.minNameLength_: if TERMINAL_CHAR in transitionCharacters: i = transitionCharacters.index(TERMINAL_CHAR) del transitionCharacters[i] del transitionScores[i] if len(transitionCharacters) <= 0: namePerplexity = perplx(probabilityOfName, len(name)) raise InvalidGeneratedWord( ('Only end-of-word transition from "%s"' % prefix), name, namePerplexity ) i = discretepick(transitionScores) character = transitionCharacters[i] # Pick nexct character and build name if character == TERMINAL_CHAR: break name += character probabilityOfName *= transitionScores[i] / sum(transitionScores) name = name[1:] namePerplexity = perplx(probabilityOfName, len(name)) return (namePerplexity, name)
def generateName( self ): name = INITIAL_CHAR probabilityOfName = 1.0 while len(name) < self.maxNameLength_ + 1: #Compute weights to choose next character prefix = name[-self.nGramLength_:] if self.nGramLength_ > 0 else '' transitions = list( self.transitionTable_.transitionsForPrefix( prefix ) ) #If there is no meaningful character to follow the prefix, raise an exception if len(transitions) <= 0: namePerplexity = perplx( probabilityOfName, len(name) ) raise InvalidGeneratedWord( ("No character to transition from \"%s\""%prefix), name, namePerplexity ) transitionCharacters, transitionScores = map( list, zip( *transitions ) ) #Discard the possibility to finish the name if not enough characters were generated. if len(name) < self.minNameLength_: if TERMINAL_CHAR in transitionCharacters: i = transitionCharacters.index( TERMINAL_CHAR ) del transitionCharacters[i] del transitionScores[i] if len(transitionCharacters) <= 0: namePerplexity = perplx( probabilityOfName, len(name) ) raise InvalidGeneratedWord( ("Only end-of-word transition from \"%s\""%prefix), name, namePerplexity ) i = discretepick( transitionScores ) character = transitionCharacters[i] #Pick nexct character and build name if character == TERMINAL_CHAR: break name += character probabilityOfName *= transitionScores[i] / sum(transitionScores) name=name[1:] namePerplexity = perplx( probabilityOfName, len(name) ) return (namePerplexity, name)