def _digestTokens(tokens, db_con): (pastalie, pastalie_prefix_only, words, prefixes, suffixes, slots) = _sanitizePastalie(tokens) pastalie_prefix_valid = False #Enforces Pastalie when a Pastalie prefix is present. word_list = lookup.readWords(words, db_con) decorated_words = [] words_details = [] for (w, p, s, l) in zip(words, prefixes, suffixes, slots): lexicon_entry = word_list.get(w.lower()) if lexicon_entry is None: if w.isdigit(): #It's a number. lexicon_entry = ([w, w, w, 8, 1, None, ''],) if p: #Reattach the prefix, since it may be a song or a mistakenly capitalized word. song_check = p.lower() + w.lower() p = None lexicon_entry = lookup.readWords((song_check,), db_con).get(song_check) if lexicon_entry is None: raise ContentError("unknown word in input: %(word)s" % { 'word': w, }) elif pastalie and p: pastalie_prefix_valid = True pastalie_entry = 0 for (i, l_e) in enumerate(lexicon_entry): if l_e[4] % _DIALECT_SHIFT == _DLCT_PASTALIE: #Favour Pastalie forms. pastalie_entry = i #Duplicate the best candidate, mark it as a noun, and use it to replace the list. new_entry = lexicon_entry[i][:] new_entry[3] = 4 lexicon_entry = (new_entry,) else: if not s and w in _COLLIDING_EMOTION_VERBS: #Handle exceptions where Emotion Verbs match basic words. basic_form = w.replace('.', '') l_e = lookup.readWords((basic_form,), db_con).get(basic_form) if l_e: #Just in case this fails somehow. lexicon_entry = tuple([l_e[0]] + list(lexicon_entry)) decorated_words.append(_decorateWord(lexicon_entry[0][0], p, s, l, False)) words_details.append((lexicon_entry, p, s, l)) return (words_details, ' '.join(decorated_words), pastalie_prefix_valid or (pastalie and not pastalie_prefix_only))
def _divideAndCapitaliseLine(words, db_con): lines = [] buffer = [] unknown = set() es_i_values = lookup.SYNTAX_CLASS_REV['ES(I)'] plain_words = lookup.readWords(tuple(set(words)), db_con) for word in words: (word, syntax_class, dialect, syllables) = _readWord(word.lower(), plain_words, db_con) if syntax_class > 0: if syntax_class in es_i_values and buffer: #Trailing ES(I) lines.append(' '.join(buffer)) buffer = [] buffer.append(word) else: word = word.lower() buffer.append(word) unknown.add(word) return (lines + [' '.join(buffer)], unknown)
def _dissectSyllables(words, db_con): lines = [] buffer = [] line_syllables = [] unknown = set() es_i_values = lookup.SYNTAX_CLASS_REV['ES(I)'] plain_words = lookup.readWords(tuple(set(words)), db_con) for word in words: (word, syntax_class, dialect, syllables) = _readWord(word, plain_words, db_con) if syntax_class > 0: if syntax_class in es_i_values and buffer: #Trailing ES(I) lines.append(' '.join(buffer)) buffer = [] buffer.append(word) line_syllables += [syllable.upper() + 'x' for syllable in syllables[:-1]] + [syllables[-1].upper()] else: buffer.append(word.lower()) line_syllables.append(word.upper()) unknown.add(word) return (lines + [' '.join(buffer)], line_syllables, unknown)
def _applyPersistentEmotionSounds(es_i, es_ii, es_iii, words, db_con): lines = [] buffer = [] line = [] unknown = set() es_i_values = lookup.SYNTAX_CLASS_REV['ES(I)'] plain_words = lookup.readWords(tuple(set(words)), db_con) for word in words: (word, syntax_class, dialect, syllables) = _readWord(word, plain_words, db_con) if syntax_class > 0: if syntax_class in es_i_values and buffer: #Trailing ES(I) lines.append(' '.join(buffer)) buffer = [] if not buffer and not lines and not syntax_class in lookup.SYNTAX_CLASS_REV['ES(I)']: buffer = [es_i, es_ii, es_iii] buffer.append(word) line.append(word) else: word = word.lower() buffer.append(word) line.append(word) unknown.add(word) return (' '.join(line), lines + [' '.join(buffer)], unknown)