def parse_word_clip(word, entryText): _FUNC_NAME_ = "parse_word_clip" searchWord = word pronunciationURLs = [] pronunciationURL = DICT_UNICODE_EMPTY_STR pronunciationWords = [] pronunciationWord = DICT_UNICODE_EMPTY_STR DEBUG_VAR="entryText" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(entryText))) sourceText = cparser.cleanse_text(entryText, DICT_CLEAN_TEXT_PATTERNS, DICT_CLEAN_INNER_TEXT_PATTERNS, DICT_CLEAN_OUTER_TEXT_PATTERNS) DEBUG_VAR="sourceText" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(sourceText))) pronunciationURLs = pronunciationURLs + cparser.find_enclosed_text(DICT_MARKER_PRONUNCIATION_URL[0], DICT_MARKER_PRONUNCIATION_URL[1], sourceText) if len(pronunciationURLs) > 0: pronunciationURL = DICT_AUDIO_URL.format(PATH=pronunciationURLs[0]) pronunciationWords = pronunciationWords + cparser.find_enclosed_text(DICT_MARKER_PRONUNCIATION_WORD[0], DICT_MARKER_PRONUNCIATION_WORD[1], sourceText) if len(pronunciationWords) > 0: pronunciationWord = pronunciationWords[0] return [pronunciationWord, pronunciationURL]
def save_evaluation_practice_words(self, practiceMode, saveEnabled): _FUNC_NAME_ = "save_evaluation_practice_words" if saveEnabled: if len(self.activePracticeWords) > 0: if practiceMode.lower() == "test": practiceFileName = SB_DATA_DIR + SB_PRACTICE_WORD_FILE practiceFileName = practiceFileName.format(LISTID=self.contestList) elif practiceMode.lower() == "revise": practiceFileName = SB_DATA_DIR + SB_REVISION_WORD_FILE currentPracticeWordList = [] # Get previously saved practice words if os.path.isfile(practiceFileName) and os.path.getsize(practiceFileName) > 0: currentPracticeWordList = cfile.read(practiceFileName).splitlines() # Use of splitlines() avoids the newline character from being stored in the word list # Save practice words to practice file, if not already saved for word in self.activePracticeWords: DEBUG_VAR="word" coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(word))) coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) practiceFileText = SB_EMPTY_STRING if word not in currentPracticeWordList: practiceFileText = practiceFileText + word + SB_NEWLINE if practiceFileText != SB_EMPTY_STRING: cfile.append(practiceFileName, practiceFileText)
def get_dictionary_entry(connectionPool, word): _FUNC_NAME_ = "get_dictionary_entry" # Download dictionary entry dictEntryURL = DICT_ENTRY_URL.format(WORD=word).replace(" ", "%20") dictEntryURL = dictEntryURL.encode('utf-8') # Handle URL strings in ascii DEBUG_VAR="dictEntryURL" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(dictEntryURL))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) dictEntryResponse = connectionPool.urlopen('GET', dictEntryURL) DEBUG_VAR="dictEntryResponse.data" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(dictEntryResponse.data))) #coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) responseText = dictEntryResponse.data DEBUG_VAR="responseText" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(responseText))) # Convert entry text to unicode if isinstance(dictEntryResponse.data, str): responseText = unicode(dictEntryResponse.data, 'utf-8') else: responseText = dictEntryResponse.data DEBUG_VAR="responseText" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(responseText))) return responseText
def play(fileName, audioOutput, loopCount, loopDelaySec): # Reference: # https://realpython.com/playing-and-recording-sound-python/#playing-audio-files # https://askubuntu.com/questions/115369/how-to-play-mp3-files-from-the-command-line # https://www.ffmpeg.org/ffplay.html # https://www.ffmpeg.org/ffmpeg-devices.html#Examples-8 # Use aplay -L to find audio output device. e.g. HDMI is plughw playCommand = "ffmpeg -f alsa {outputdevice} -loglevel quiet -i {filename} 2>/dev/null".format( outputdevice=get_audio_output(audioOutput), filename=fileName) try: coutput.print_watcher("fileName") for loopIndex in range(0, loopCount): coutput.print_debug("Executing play") coutput.print_watcher("playCommand") os.system(playCommand) if loopIndex != (loopCount - 1): time.sleep(loopDelaySec) except: coutput.print_err("Unable to play audio from " + fileName) coutput.print_watcher("sys.exc_info()")
def override_definitions(self, source, entry_word, overrides): _FUNC_NAME_ = "SimplifiedWordEntry.override_definitions" if len(overrides) > 0: self.source = source self.entry_word = entry_word # Remove duplicate definitions for override in overrides: # Handle overrides that are marked special by the application using a prefix e.g. * override_text = re.sub(ur'(^[^\(a-zA-Z0-9]|[\. ]+$)', DICT_UNICODE_EMPTY_STR, override, flags=re.IGNORECASE) coutput.print_watcher(MOD_ERR_DEBUG, _FUNC_NAME_, 'override') coutput.print_watcher(MOD_ERR_DEBUG, _FUNC_NAME_, 'override_text') for definition in self.definitions: definition_text = re.sub(ur'(^[^\(a-zA-Z0-9]|[\. ]+$)', DICT_UNICODE_EMPTY_STR, definition, flags=re.IGNORECASE) coutput.print_watcher(MOD_ERR_DEBUG, _FUNC_NAME_, 'definition') coutput.print_watcher(MOD_ERR_DEBUG, _FUNC_NAME_, 'definition_text') if definition_text == override_text: self.definitions.remove(definition) coutput.print_debug(MOD_ERR_DEBUG, _FUNC_NAME_, "Removed duplicate definition") # Override definitions self.definitions = overrides + self.definitions
def get_dictionary_source(): _FUNC_NAME_ = "get_dictionary_source" initialize_source() DEBUG_VAR="cdict.DICT_SOURCE_NAME" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(cdict.DICT_SOURCE_NAME))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) return cdict.get_dictionary_source()
def get_dictionary_source(): _FUNC_NAME_ = "get_dictionary_source" initialize_source() DEBUG_VAR = "cdict.DICT_SOURCE_NAME" coutput.print_debug( ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(cdict.DICT_SOURCE_NAME))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) return cdict.get_dictionary_source()
def download(connectionPool, sourceURL, targetFileName): _FUNC_NAME_ = "download" DEBUG_VAR = "sourceURL" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(sourceURL))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) fileData = connectionPool.request('GET', sourceURL).data targetFile = open(targetFileName, "wb") targetFile.write(fileData) targetFile.close()
def cleanse_dictionary_entry(entryXML): _FUNC_NAME_ = "cleanse_dictionary_entry" DEBUG_VAR="entryXML" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(entryXML))) # Handle XML cleansing in ascii if isinstance(entryXML, str): cleansedXML = entryXML else: cleansedXML = entryXML.encode('utf-8') DEBUG_VAR="cleansedXML" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(cleansedXML))) cleanseTagList = [] cleanseElementList = [] cleanseTagList = ['d_link', 'fw', 'it', 'un'] for tag in cleanseTagList: DEBUG_VAR="tag" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(tag))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) cleansedXML = cleansedXML.replace("<{0}>".format(tag), DICT_ASCII_EMPTY_STR).replace("</{0}>".format(tag), DICT_ASCII_EMPTY_STR) # Convert XML to unicode if isinstance(cleansedXML, str): outputXML = unicode(cleansedXML, 'utf-8') else: outputXML = cleansedXML return outputXML
def parse_word_definition(word, entryText): _FUNC_NAME_ = "parse_word_definition" searchWord = word wordDefinitions = [] DEBUG_VAR = "entryText" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(entryText))) sourceText = cparser.cleanse_text(entryText, DICT_CLEAN_TEXT_PATTERNS, DICT_CLEAN_INNER_TEXT_PATTERNS, DICT_CLEAN_OUTER_TEXT_PATTERNS) DEBUG_VAR = "sourceText" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(sourceText))) for marker in DICT_MARKER_DEFINITION: wordDefinitions = wordDefinitions + cparser.find_enclosed_text( marker[0], marker[1], sourceText) DEBUG_VAR = "wordDefinitions" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(wordDefinitions))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) return wordDefinitions
def play_url(connectionPool, sourceURL, audioOutput, loopCount, loopDelay): try: coutput.print_debug("Executing set_audio_output") set_audio_output(audioOutput) if '.mp3' in sourceURL or '.wav' in sourceURL: tempFileName = "dlfile_ts{TIMESTAMP}_rnd{RAND}.tmp".format( TIMESTAMP=time.strftime("%Y%m%d%H%M%S"), RAND=str(uuid.uuid4())) download(connectionPool, sourceURL, tempFileName) play(tempFileName, audioOutput, loopCount, loopDelay) delete(tempFileName) else: coutput.print_err("Unable to play audio from " + sourceURL) set_audio_output('auto') except: coutput.print_err("Unable to play audio from " + sourceURL)
def lookup_word(connectionPool, pronAudioOutput, pronLoopCount, pronLoopDelaySec, word, *lookupSource): _FUNC_NAME_ = "lookup_word" isError = False dictSources = [] if len(lookupSource) == 0: dictEntry = fetch_dictionary_entry(connectionPool, word) currentDefinitions = dictEntry[1] source = dictEntry[2] currentClipWord = dictEntry[3] currentClipURL = dictEntry[4] pronSource = dictEntry[5] display_dictionary_entry(connectionPool, pronAudioOutput, pronLoopCount, pronLoopDelaySec, word, currentDefinitions, source, currentClipWord, currentClipURL, pronSource) elif lookupSource[0].lower() == 'all' or lookupSource[0].lower() in DICT_SOURCES.keys(): if lookupSource[0].lower() == 'all': dictSources = dictSources + PRIORITIZED_DICT_SOURCES else: dictSources.append(DICT_SOURCES[lookupSource[0].lower()]) DEBUG_VAR="dictSources" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(dictSources))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) for dictSource in dictSources: source = dictSource.get_dictionary_source() pronSource = dictSource.get_dictionary_source() DEBUG_VAR="source" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(source))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) dictEntryText = dictSource.get_dictionary_entry(connectionPool, word) currentDefinitions = dictSource.parse_word_definition(word, dictEntryText) [currentClipWord, currentClipURL] = dictSource.parse_word_clip(word, dictEntryText) display_dictionary_entry(connectionPool, pronAudioOutput, pronLoopCount, pronLoopDelaySec, word, currentDefinitions, source, currentClipWord, currentClipURL, pronSource) else: print "" displayMessage = "ERROR: Unable to lookup {WORD}. Dictionary source {SOURCE} not supported".format(WORD=word, SOURCE=lookupSource[0]) coutput.print_color(ERROR_TEXT_COLOR, displayMessage) print ""
def get_dictionary_entry(connectionPool, word): _FUNC_NAME_ = "get_dictionary_entry" # Download dictionary entry dictEntryURL = DICT_ENTRY_URL.format(WORD=word).replace(" ", "%20") dictEntryURL = dictEntryURL.encode('utf-8') # Handle URL strings in ascii DEBUG_VAR = "dictEntryURL" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(dictEntryURL))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) try: dictEntryResponse = connectionPool.urlopen('GET', dictEntryURL) DEBUG_VAR = "dictEntryResponse.data" coutput.print_debug( ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(dictEntryResponse.data))) #coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) responseText = dictEntryResponse.data except urllib3.exceptions.MaxRetryError: responseText = "" DEBUG_VAR = "responseText" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(responseText))) # Convert entry text to unicode if isinstance(responseText, str): responseText = unicode(responseText, 'utf-8') DEBUG_VAR = "responseText" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(responseText))) return responseText
def parse_word_clip(word, entryText): _FUNC_NAME_ = "parse_word_clip" searchWord = word pronunciationURLs = [] pronunciationURL = DICT_UNICODE_EMPTY_STR pronunciationWords = [] pronunciationWord = DICT_UNICODE_EMPTY_STR DEBUG_VAR = "entryText" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(entryText))) sourceText = cparser.cleanse_text(entryText, DICT_CLEAN_TEXT_PATTERNS, DICT_CLEAN_INNER_TEXT_PATTERNS, DICT_CLEAN_OUTER_TEXT_PATTERNS) DEBUG_VAR = "sourceText" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(sourceText))) pronunciationURLs = pronunciationURLs + cparser.find_enclosed_text( DICT_MARKER_PRONUNCIATION_URL[0], DICT_MARKER_PRONUNCIATION_URL[1], sourceText) if len(pronunciationURLs) > 0: pronunciationURL = DICT_AUDIO_URL.format(PATH=pronunciationURLs[0]) pronunciationWords = pronunciationWords + cparser.find_enclosed_text( DICT_MARKER_PRONUNCIATION_WORD[0], DICT_MARKER_PRONUNCIATION_WORD[1], sourceText) if len(pronunciationWords) > 0: pronunciationWord = pronunciationWords[0] return [pronunciationWord, pronunciationURL]
def fetch_dictionary_entry(connectionPool, word): _FUNC_NAME_ = "fetch_dictionary_entry" wordDefinitionSource = "" wordDefinitions = [] wordDefinitionFound = False pronunciationSource = "" pronunciationWord = "" pronunciationURL = "" wordPronunciationFound = False for dictSource in PRIORITIZED_DICT_SOURCES: dictEntryText = dictSource.get_dictionary_entry(connectionPool, word) if wordDefinitionFound == False: currentDefinitions = dictSource.parse_word_definition(word, dictEntryText) if len(currentDefinitions) > 0: wordDefinitionSource = dictSource.get_dictionary_source() wordDefinitions = currentDefinitions wordDefinitionFound = True DEBUG_VAR="wordDefinitionSource" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(wordDefinitionSource))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) if wordPronunciationFound == False: [currentClipWord, currentClipURL] = dictSource.parse_word_clip(word, dictEntryText) if currentClipWord != "": pronunciationSource = dictSource.get_dictionary_source() [pronunciationWord, pronunciationURL] = [currentClipWord, currentClipURL] wordPronunciationFound = True DEBUG_VAR="pronunciationSource" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(pronunciationSource))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) if wordDefinitionFound == True and wordPronunciationFound == True: break return [word, wordDefinitions, wordDefinitionSource, pronunciationWord, pronunciationURL, pronunciationSource]
def _get_inflections(self, root): """ Returns a generator of Inflections found in root. inflection nodes that have <il>also</il> will have their inflected form added to the previous inflection entry. """ _FUNC_NAME_ = "CollegiateDictionary._get_inflections" dict_helper = MWDictionaryEntry() for node in root.findall("in"): label, forms, spellings, sound_fragments, sound_urls, pronunciations = None, [], [], [], [], [] for child in node: coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format("child.tag", child.tag)) if child.tag == 'il': if child.text in ['also', 'or']: pass # next form will be added to prev inflection-list else: if label is not None or forms != []: yield Inflection(label, forms, spellings, sound_fragments, sound_urls, pronunciations) label, forms, spellings, sound_fragments, sound_urls, pronunciations = child.text, [], [], [], [], [] if child.tag == 'if': forms.append(child.text) spellings.append(re.sub("\*", "", child.text)) if child.tag == 'sound': coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format("child.find(\"wav\").text", child.find("wav").text)) sound_fragments.append(child.find("wav").text) sound_urls.append(dict_helper.build_sound_url(child.find("wav").text)) if child.tag == 'pr': pronunciations.append(child.text) if label is not None or forms != []: yield Inflection(label, forms, spellings, sound_fragments, sound_urls, pronunciations) for node in root.findall("uro"): label, forms, spellings, sound_fragments, sound_urls, pronunciations = None, [], [], [], [], [] for child in node: if child.tag == 'ure': forms.append(child.text) spellings.append(re.sub("\*", "", child.text)) if child.tag == 'sound': coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format("child.find(\"wav\").text", child.find("wav").text)) sound_fragments.append(child.find("wav").text) sound_urls.append(dict_helper.build_sound_url(child.find("wav").text)) if child.tag == 'pr': pronunciations.append(child.text) if label is not None or forms != []: yield Inflection(label, forms, spellings, sound_fragments, sound_urls, pronunciations)
def parse_word_definition(word, entryText): _FUNC_NAME_ = "parse_word_definition" searchWord = word wordDefinitions = [] DEBUG_VAR="entryText" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(entryText))) sourceText = cparser.cleanse_text(entryText, DICT_CLEAN_TEXT_PATTERNS, DICT_CLEAN_INNER_TEXT_PATTERNS, DICT_CLEAN_OUTER_TEXT_PATTERNS) DEBUG_VAR="sourceText" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(sourceText))) for marker in DICT_MARKER_DEFINITION: wordDefinitions = wordDefinitions + cparser.find_enclosed_text(marker[0], marker[1], sourceText) DEBUG_VAR="wordDefinitions" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(wordDefinitions))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) return wordDefinitions
def __init__(self, listID, mode, selection): _FUNC_NAME_ = "__init__" self.contestList = listID self.wordList = [] wordFileDir = SB_WORD_MULTI_FILES.format(WORD_FILE_PATTERN=listID) for wordFileName in sorted(glob.glob(wordFileDir)): coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "wordFileName :: {0}".format(wordFileName)) self.wordList = self.wordList + cfile.read(wordFileName).splitlines() # Use of splitlines() avoids the newline character from being stored in the word list rangeSelection = selection.split("-") self.activeChapter = "0" if mode.lower() == "chapter": self.activeChapter = int(rangeSelection[0]) self.activeRangeStart = (self.activeChapter - 1) * SB_CHAPTER_SIZE self.activeRangeEnd = self.activeRangeStart + SB_CHAPTER_SIZE - 1 self.activeWordIndexList = list(range(self.activeRangeStart, self.activeRangeEnd+1)) elif mode.lower() == "count": self.activeRangeStart = int(rangeSelection[0]) - 1 if len(rangeSelection) > 1: self.activeRangeEnd = int(rangeSelection[1]) - 1 else: self.activeRangeEnd = len(self.wordList) - 1 self.activeWordIndexList = list(range(self.activeRangeStart, self.activeRangeEnd+1)) elif mode.lower() == "word": self.activeRangeStart = self.get_word_index(rangeSelection[0]) if self.activeRangeStart < 0: print "ERROR: Unable to locate '{0}' in word list".format(rangeSelection[0]) exit(1) if len(rangeSelection) > 1: self.activeRangeEnd = self.get_word_index(rangeSelection[1]) if self.activeRangeEnd < 0: print "ERROR: Unable to locate '{0}' in word list".format(rangeSelection[1]) exit(1) else: self.activeRangeEnd = len(self.wordList) - 1 self.activeWordIndexList = list(range(self.activeRangeStart, self.activeRangeEnd+1)) else: self.activeRangeStart = -1 self.activeRangeEnd = -1 sampleSize = int(rangeSelection[0]) if sampleSize > self.word_count(): sampleSize = self.word_count() self.activeWordIndexList = random.sample(xrange(0, self.word_count()), sampleSize) if mode.lower() != "random": if self.activeRangeEnd >= len(self.wordList): self.activeRangeEnd = len(self.wordList) - 1 self.activeWord = SB_EMPTY_STRING self.activeEntry = SB_EMPTY_STRING self.activeDefinition = [] self.activePronunciation = SB_EMPTY_STRING self.activePronunciationWord = SB_EMPTY_STRING self.activeTestDate = SB_EMPTY_STRING self.activeTestScore = SB_EMPTY_STRING self.activeTestValuations = [] self.activePracticeWords = []
def parse_word_definition(word, entryXML): _FUNC_NAME_ = "parse_word_definition" searchWord = word DEBUG_VAR="entryXML" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(entryXML))) sourceXML = cleanse_dictionary_entry(entryXML) # Handle XML cleansing in ascii if isinstance(sourceXML, unicode): sourceXML = sourceXML.encode('utf-8') DEBUG_VAR="sourceXML" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(sourceXML))) dictEntryXML = minidom.parseString(sourceXML) wordDefinition = [] DEBUG_VAR="dictEntryXML" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(dictEntryXML))) # Process <entry> elements to locate match entryElements = dictEntryXML.getElementsByTagName('entry') for entryElement in entryElements: wordFound = False # Pass #1: Process <hw> tags to locate match hwElements = entryElement.getElementsByTagName('hw') for hwElement in hwElements: if hwElement.firstChild.nodeType == hwElements[0].firstChild.TEXT_NODE: hwText = hwElement.firstChild.data.replace("*", DICT_ASCII_EMPTY_STR) if hwText.lower() == searchWord.lower(): wordFound = True break # Pass #2: Process <ure> tags to locate match ureElements = entryElement.getElementsByTagName('ure') for ureElement in ureElements: if ureElement.firstChild.nodeType == ureElements[0].firstChild.TEXT_NODE: ureText = ureElement.firstChild.data.replace("*", DICT_ASCII_EMPTY_STR) if ureText.lower() == searchWord.lower(): wordFound = True break # Pass #3: Process <if> tags to locate match ifElements = entryElement.getElementsByTagName('if') for ifElement in ifElements: if ifElement.firstChild.nodeType == ifElements[0].firstChild.TEXT_NODE: ifText = ifElement.firstChild.data.replace("*", DICT_ASCII_EMPTY_STR) if ifText.lower() == searchWord.lower(): wordFound = True break # Process <dt> elements to retrieve definition, if matched if wordFound: dtElements = entryElement.getElementsByTagName('dt') for dtIndex, dtElement in enumerate(dtElements, start=0): if dtElement.firstChild.nodeType == dtElement.firstChild.TEXT_NODE: dtText = re.sub("^[^:]*:", DICT_ASCII_EMPTY_STR, dtElement.firstChild.data) dtText = re.sub(":[^:]*$", DICT_ASCII_EMPTY_STR, dtText) if dtText != DICT_ASCII_EMPTY_STR: DEBUG_VAR="dtText" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(dtText))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) if isinstance(dtText, str): wordDefinition.append(unicode(dtText, 'utf-8')) else: wordDefinition.append(dtText) # Process <sx> elements sxElements = dtElement.getElementsByTagName('sx') sxCombinedText = DICT_ASCII_EMPTY_STR for sxIndex, sxElement in enumerate(sxElements, start=0): if sxElement.firstChild.nodeType == dtElement.firstChild.TEXT_NODE: sxText = re.sub("^[^:]*:", DICT_ASCII_EMPTY_STR, sxElement.firstChild.data) sxText = re.sub(":[^:]*$", DICT_ASCII_EMPTY_STR, sxText) if sxText != DICT_ASCII_EMPTY_STR: if sxIndex < len(sxElements) - 1: sxCombinedText = sxCombinedText + sxText + ", " else: sxCombinedText = sxCombinedText + sxText if sxCombinedText != DICT_ASCII_EMPTY_STR: if isinstance(sxCombinedText, str): wordDefinition.append(unicode(sxCombinedText, 'utf-8')) else: wordDefinition.append(sxCombinedText) # Scan all entries without matching, if no definitions were retrieved if len(wordDefinition) == 0: # Process <entry> elements to locate match entryElements = dictEntryXML.getElementsByTagName('entry') for entryElement in entryElements: # Process <dt> elements to retrieve definition dtElements = entryElement.getElementsByTagName('dt') for dtIndex, dtElement in enumerate(dtElements, start=0): if dtElement.firstChild.nodeType == dtElement.firstChild.TEXT_NODE: dtText = re.sub("^[^:]*:", DICT_ASCII_EMPTY_STR, dtElement.firstChild.data) dtText = re.sub(":[^:]*$", DICT_ASCII_EMPTY_STR, dtText) if dtText != DICT_ASCII_EMPTY_STR: if isinstance(dtText, str): wordDefinition.append(unicode(dtText, 'utf-8')) else: wordDefinition.append(dtText) # Process <sx> elements sxElements = dtElement.getElementsByTagName('sx') sxCombinedText = DICT_ASCII_EMPTY_STR for sxIndex, sxElement in enumerate(sxElements, start=0): if sxElement.firstChild.nodeType == dtElement.firstChild.TEXT_NODE: sxText = re.sub("^[^:]*:", DICT_ASCII_EMPTY_STR, sxElement.firstChild.data) sxText = re.sub(":[^:]*$", DICT_ASCII_EMPTY_STR, sxText) if sxText != DICT_ASCII_EMPTY_STR: if sxIndex < len(sxElements) - 1: sxCombinedText = sxCombinedText + sxText + ", " else: sxCombinedText = sxCombinedText + sxText if sxCombinedText != DICT_ASCII_EMPTY_STR: if isinstance(sxCombinedText, str): wordDefinition.append(unicode(sxCombinedText, 'utf-8')) else: wordDefinition.append(sxCombinedText) # Handle word definitions in unicode return wordDefinition
def parse_word_clip(word, entryXML): _FUNC_NAME_ = "parse_word_clip" searchWord = word DEBUG_VAR="entryXML" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(entryXML))) sourceXML = cleanse_dictionary_entry(entryXML) # Handle XML cleansing in ascii if isinstance(sourceXML, unicode): sourceXML = sourceXML.encode('utf-8') DEBUG_VAR="sourceXML" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(sourceXML))) dictEntryXML = minidom.parseString(sourceXML) DEBUG_VAR="dictEntryXML" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(dictEntryXML))) # Pass #1: Process <uro> tag to locate matching entry wordFound = False audioClipFound = False audioClip = DICT_UNICODE_EMPTY_STR audioClipWord = DICT_UNICODE_EMPTY_STR # Process <entry> elements entryElements = dictEntryXML.getElementsByTagName('entry') for entryElement in entryElements: uroElements = entryElement.getElementsByTagName('uro') for uroElement in uroElements: # Process first populated <ure> element to get root word ureElements = uroElement.getElementsByTagName('ure') for ureElement in ureElements: if ureElement.firstChild.nodeType == ureElement.firstChild.TEXT_NODE: audioClipWord = ureElement.firstChild.data.replace("*", DICT_UNICODE_EMPTY_STR).strip() if audioClipWord != DICT_UNICODE_EMPTY_STR: break # Process first populated <wav> element to get audio clip wavElements = uroElement.getElementsByTagName('wav') for wavElement in wavElements: if wavElement.firstChild.nodeType == wavElement.firstChild.TEXT_NODE: audioClip = wavElement.firstChild.data.strip() if audioClip != DICT_UNICODE_EMPTY_STR: break if audioClipWord == searchWord: wordFound = True if audioClip != DICT_UNICODE_EMPTY_STR: audioClipFound = True if wordFound: break if wordFound: break # Pass #2: Process <in> tag to locate matching entry if audioClipFound == False: wordFound = False audioClipFound = False audioClip = DICT_UNICODE_EMPTY_STR audioClipWord = DICT_UNICODE_EMPTY_STR # Process <entry> elements entryElements = dictEntryXML.getElementsByTagName('entry') for entryIndex, entryElement in enumerate(entryElements, start=0): inElements = entryElement.getElementsByTagName('in') for inElement in inElements: # Process first populated <if> element to get root word ifElements = inElement.getElementsByTagName('if') for ifElement in ifElements: if ifElement.firstChild.nodeType == ifElement.firstChild.TEXT_NODE: audioClipWord = ifElement.firstChild.data.replace("*", DICT_UNICODE_EMPTY_STR).strip() if audioClipWord != DICT_UNICODE_EMPTY_STR: break # Process first populated <wav> element to get audio clip wavElements = inElement.getElementsByTagName('wav') for wavElement in wavElements: if wavElement.firstChild.nodeType == wavElement.firstChild.TEXT_NODE: audioClip = wavElement.firstChild.data.strip() if audioClip != DICT_UNICODE_EMPTY_STR: break if audioClipWord == searchWord: wordFound = True if audioClip != DICT_UNICODE_EMPTY_STR: audioClipFound = True if wordFound: break if wordFound: break # Pass #3: Process <hw> tag to locate matching entry, if no match found if audioClipFound == False: wordFound = False audioClipFound = False audioClip = DICT_UNICODE_EMPTY_STR audioClipWord = DICT_UNICODE_EMPTY_STR # Process <entry> elements entryElements = dictEntryXML.getElementsByTagName('entry') for entryElement in entryElements: hwElements = entryElement.getElementsByTagName('hw') for hwElement in hwElements: if hwElement.firstChild.nodeType == hwElement.firstChild.TEXT_NODE: audioClipWord = hwElement.firstChild.data.replace("*", DICT_UNICODE_EMPTY_STR).strip() if audioClipWord == searchWord: wordFound = True # Process <wav> elements to get audio clip wavElements = entryElement.getElementsByTagName('wav') for wavElement in wavElements: if wavElement.firstChild.nodeType == wavElement.firstChild.TEXT_NODE: audioClip = wavElement.firstChild.data.strip() if audioClip != DICT_UNICODE_EMPTY_STR: audioClipFound = True break if wordFound == True: break if wordFound == True: break # Pass #4: Process <wav> tag to locate first entry, if no match found if audioClipFound == False: wordFound = False audioClipFound = False audioClip = DICT_UNICODE_EMPTY_STR audioClipWord = DICT_UNICODE_EMPTY_STR # Process <entry> elements entryElements = dictEntryXML.getElementsByTagName('entry') for entryElement in entryElements: wavElements = entryElement.getElementsByTagName('wav') for wavElement in wavElements: if wavElement.firstChild.nodeType == wavElement.firstChild.TEXT_NODE: audioClip = wavElement.firstChild.data.strip() if audioClip != DICT_UNICODE_EMPTY_STR: audioClipFound = True # Process <hw> elements to get root word hwElements = entryElement.getElementsByTagName('hw') for hwElement in hwElements: if hwElement.firstChild.nodeType == hwElement.firstChild.TEXT_NODE: audioClipWord = hwElement.firstChild.data.replace("*", DICT_UNICODE_EMPTY_STR).strip() if audioClipWord != DICT_UNICODE_EMPTY_STR: wordFound == True break if audioClipFound == True: break if audioClipFound == True: break DEBUG_VAR="searchWord" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(searchWord))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) DEBUG_VAR="audioClipWord" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(audioClipWord))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) DEBUG_VAR="audioClip" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(audioClip))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) # Return audioClipWord and audioClip, if found if audioClipFound: # Determine audio clip folder # Reference: http://www.dictionaryapi.com/info/faq-audio-image.htm if re.match('^bix.*', audioClip): audioClipFolder = "bix" elif re.match('^gg.*', audioClip): audioClipFolder = "gg" elif re.match('^[0-9].*', audioClip): audioClipFolder = "number" else: audioClipFolder = audioClip[0:1] # Determine audio clip URL audioClipURL = DICT_AUDIO_URL.format(FOLDER=audioClipFolder, CLIP=audioClip) return [audioClipWord, audioClipURL] else: return [DICT_UNICODE_EMPTY_STR, DICT_UNICODE_EMPTY_STR]
def cleanse_text(rawText, rawTextPatterns, rawInnerTextPatterns, rawOuterTextPatterns ): _FUNC_NAME_ = "cleanse_text" DEBUG_VAR="rawText" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(rawText))) cleansedText = rawText DEBUG_VAR="cleansedText" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(cleansedText))) cleanseTextPatterns = rawTextPatterns cleanseInnerTextPatterns = rawInnerTextPatterns cleanseOuterTextPatterns = rawOuterTextPatterns # Cleanse text patterns for pattern in cleanseTextPatterns: DEBUG_VAR="pattern" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(pattern))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) cleansedText = re.sub(pattern, UNICODE_EMPTY_STR, cleansedText, flags=re.DOTALL) DEBUG_VAR="cleansedText" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(cleansedText))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) # Cleanse inner text surrounded by text patterns for enclosure in cleanseInnerTextPatterns: DEBUG_VAR="enclosure" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(enclosure))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) pattern = r'(' + enclosure[0] + r').*?(' + enclosure[1] + r')' cleansedText = re.sub(pattern, r'\g<1>\g<2>', cleansedText, flags=re.DOTALL) # Cleanse outer text patterns preserving enclosed contents for enclosure in cleanseOuterTextPatterns: DEBUG_VAR="enclosure" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(enclosure))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) pattern = enclosure[0] + r'(.*?)' + enclosure[1] cleansedText = re.sub(pattern, r'\g<1>', cleansedText, flags=re.DOTALL) outputText = cleansedText return outputText
def parse_word_clip(word, entryXML): _FUNC_NAME_ = "parse_word_clip" searchWord = word coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'entryXML') sourceXML = entryXML if isinstance(sourceXML, unicode): sourceXML = sourceXML.encode('utf-8') coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'sourceXML') dictionary = api.CollegiateDictionary(DICT_KEY) wordFound = False audioClipFound = False audioClip = DICT_UNICODE_EMPTY_STR audioClipWord = DICT_UNICODE_EMPTY_STR audioClipPron = DICT_UNICODE_EMPTY_STR try: # Pass #1: Find matching headword spelling coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "Start of Pass #1") entries = dictionary.lookup(searchWord, sourceXML) for entry in entries: coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'entry.spelling') if searchWord == entry.spelling: for audio in entry.audio: audioClipWord = entry.spelling audioClipPron = entry.pronunciation wordFound = True audioClip = audio audioClipFound = True if wordFound: break if wordFound: break coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "End of Pass #1") coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'audioClipFound') coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'wordFound') # Pass #2: Find matching inflection coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "Start of Pass #2") if audioClipFound == False: wordFound = False audioClip = DICT_UNICODE_EMPTY_STR audioClipWord = DICT_UNICODE_EMPTY_STR audioClipPron = DICT_UNICODE_EMPTY_STR entries = dictionary.lookup(searchWord, sourceXML) for entry in entries: coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'entry.spelling') for inflection in entry.inflections: coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'inflection.spellings') for spelling in inflection.spellings: coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'searchWord') coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'spelling') if searchWord == spelling: audioClipWord = spelling wordFound = True coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'inflection.sound_urls') for sound_url in inflection.sound_urls: coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'sound_url') audioClip = sound_url audioClipFound = True break if wordFound: break if wordFound: break coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "End of Pass #2") coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'audioClipFound') coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'wordFound') # Pass #3: Find pronunciation for first entry, if no match found coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "Start of Pass #3") if audioClipFound == False: wordFound = False audioClip = DICT_UNICODE_EMPTY_STR audioClipWord = DICT_UNICODE_EMPTY_STR audioClipPron = DICT_UNICODE_EMPTY_STR entries = dictionary.lookup(searchWord, sourceXML) for entry in entries: for audio in entry.audio: audioClipWord = entry.spelling wordFound = True audioClip = audio audioClipFound = True if wordFound: break if wordFound: break coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "End of Pass #3") coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'audioClipFound') coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'wordFound') except api.WordNotFoundException: audioClip = DICT_UNICODE_EMPTY_STR audioClipWord = DICT_UNICODE_EMPTY_STR audioClipPron = DICT_UNICODE_EMPTY_STR if not audioClipFound: audioClip = DICT_UNICODE_EMPTY_STR audioClipWord = DICT_UNICODE_EMPTY_STR audioClipPron = DICT_UNICODE_EMPTY_STR coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'searchWord') coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'audioClipWord') coutput.print_watcher(ERR_DEBUG, _FUNC_NAME_, 'audioClip') # Return audioClipWord and audioClip, if found if isinstance(audioClipWord, str): audioClipWord = unicode(audioClipWord, 'utf-8') if isinstance(audioClip, str): audioClip = unicode(audioClip, 'utf-8') if isinstance(audioClipPron, str): audioClipPron = unicode(audioClipPron, 'utf-8') return [audioClipWord, audioClip, audioClipPron]
def override_entry(self, source, entry_word, overrides): if len(overrides) > 0: self.entry_word = entry_word overrideInfo = {} overrideDefinitions = [] for override in overrides: if override.startswith('#!'): override_elements = override.split(':') override_name = override_elements[0].strip() override_value = re.sub('^#![a-zA-Z0-9]+: ', DICT_UNICODE_EMPTY_STR, override).strip() if override_value != DICT_UNICODE_EMPTY_STR: overrideInfo[override_name] = override_value else: if override != DICT_UNICODE_EMPTY_STR: overrideDefinitions.append(override) # Process info lines for key in overrideInfo: if key == '#!Etymology': self.etymology = overrideInfo[key] elif key == '#!AudioURL': if self.pronunciation is None: self.pronunciation = WordPronunciation( overrideInfo[key]) else: self.pronunciation.audio_url = overrideInfo[key] if '#!Word' in overrideInfo.keys(): self.pronunciation.form = overrideInfo['#!Word'] self.pronunciation.spelling = overrideInfo['#!Word'] elif key == '#!Respelling': if self.respelling is None: #self.respelling = WordRespelling(overrideInfo[key], overrideInfo['#!Source']) self.respelling = WordRespelling( overrideInfo[key], source) else: #self.respelling.source = self.respelling.source + ';' + overrideInfo['#!Source'] self.respelling.source = self.respelling.source + ';' + source self.respelling.text = overrideInfo[key] if '#!Word' in overrideInfo.keys(): self.respelling.form = overrideInfo['#!Word'] self.respelling.spelling = overrideInfo['#!Word'] elif key == '#!Sentence': self.usage = [overrideInfo[key]] + self.usage elif key == '#!Examples': self.examples = overrideInfo[key] else: self.definitions.append("{}: {}".format( key, overrideInfo[key])) # Process #!Source info lines if '#!Source' in overrideInfo.keys(): altSource = overrideInfo['#!Source'] else: altSource = source if self.source == DICT_UNICODE_EMPTY_STR: self.source = altSource else: self.source = self.source + ';' + altSource # Process definitions # Remove duplicate definitions for override in overrideDefinitions: # Handle overrides that are marked special by the application using a prefix e.g. * override_text = re.sub(r'(^[^\(a-zA-Z0-9]|[\. ]+$)', DICT_UNICODE_EMPTY_STR, override, flags=re.IGNORECASE) coutput.print_watcher('override') coutput.print_watcher('override_text') for definition in self.definitions: definition_text = re.sub(r'(^[^\(a-zA-Z0-9]|[\. ]+$)', DICT_UNICODE_EMPTY_STR, definition, flags=re.IGNORECASE) coutput.print_watcher('definition') coutput.print_watcher('definition_text') if definition_text == override_text: self.definitions.remove(definition) coutput.print_debug("Removed duplicate definition") # Override definitions self.definitions = overrideDefinitions + self.definitions
def play_legacy(fileName, audioOutput, loopCount, loopDelaySec): # Reference: # https://www.pygame.org/docs/ref/mixer.html#pygame.mixer.init # http://techqa.info/programming/question/27745134/how-can-i-extract-the-metadata-and-bitrate-info-from-a-audio/video-file-in-python try: #Enable for RaspberryPi coutput.print_debug("Executing set_audio_output") set_audio_output(audioOutput) coutput.print_debug("Executing mediainfo") fileInfo = mediainfo(fileName) coutput.print_watcher("fileName") coutput.print_watcher("fileInfo['sample_rate']") coutput.print_watcher("fileInfo['bits_per_sample']") coutput.print_watcher("fileInfo['channels']") for loopIndex in range(0, loopCount): # Syntax: init(frequency=22050, size=-16, channels=2, buffer=4096) pygame.mixer.init() #pygame.mixer.init(frequency=long(float(fileInfo['sample_rate'])), channels=int(fileInfo['channels'])) coutput.print_debug("Executing pygame.mixer.music.load") pygame.mixer.music.load(fileName) coutput.print_debug("Executing pygame.mixer.music.play") pygame.mixer.music.play() while pygame.mixer.music.get_busy() == True: continue time.sleep( 0.06 ) # introduce delay to ensure that the end of the audio is not clipped during playback coutput.print_debug("Executing pygame.mixer.stop") pygame.mixer.stop() coutput.print_debug("Executing pygame.mixer.quit") pygame.mixer.quit() if loopIndex != (loopCount - 1): time.sleep(loopDelaySec) set_audio_output('auto') except: coutput.print_err("Unable to play audio from " + fileName) coutput.print_watcher("sys.exc_info()")
def play(fileName, audioOutput, loopCount, loopDelaySec): # Reference: # https://www.pygame.org/docs/ref/mixer.html#pygame.mixer.init # http://techqa.info/programming/question/27745134/how-can-i-extract-the-metadata-and-bitrate-info-from-a-audio/video-file-in-python _FUNC_NAME_ = "play" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "Executing set_audio_output") set_audio_output(audioOutput) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "Executing mediainfo") fileInfo = mediainfo(fileName) # todo: Print filename as part of debug message coutput.print_debug( ERR_DEBUG, _FUNC_NAME_, "{TITLE} [{VALUE}]".format(TITLE="sample_rate", VALUE=fileInfo['sample_rate'])) coutput.print_debug( ERR_DEBUG, _FUNC_NAME_, "{TITLE} [{VALUE}]".format(TITLE="bits_per_sample", VALUE=fileInfo['bits_per_sample'])) coutput.print_debug( ERR_DEBUG, _FUNC_NAME_, "{TITLE} [{VALUE}]".format(TITLE="channels", VALUE=fileInfo['channels'])) for loopIndex in range(0, loopCount): # Syntax: init(frequency=22050, size=-16, channels=2, buffer=4096) pygame.mixer.init() #pygame.mixer.init(frequency=long(float(fileInfo['sample_rate'])), channels=int(fileInfo['channels'])) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "Executing pygame.mixer.music.load") pygame.mixer.music.load(fileName) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "Executing pygame.mixer.music.play") pygame.mixer.music.play() while pygame.mixer.music.get_busy() == True: continue time.sleep( 0.06 ) # introduce delay to ensure that the end of the audio is not clipped during playback coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "Executing pygame.mixer.stop") pygame.mixer.stop() coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "Executing pygame.mixer.quit") pygame.mixer.quit() if loopIndex != (loopCount - 1): time.sleep(loopDelaySec) set_audio_output('auto')
def set_word_entries(self): soup = BeautifulSoup(self.entry_raw_text, self.config.parser) nameFilter = re.compile(r'(hw|fl|pr|et|sound|def|cx|art)') for entry in soup.find_all('entry'): """ <!ELEMENT entry (((subj?, art?, formula?, table?), hw, (pr?, pr_alt?, pr_ipa?, pr_wod?, sound?)*, (ahw, (pr, pr_alt?, pr_ipa?, pr_wod?, sound?)?)*, vr?), (fl?, in*, lb*, ((cx, (ss | us)*) | et)*, sl*), (dx | def)*, (list? | (uro*, dro*, ((pl, pt, sa?) | (note) | quote+)*)))> """ # Capture and exclude miscellaneous entries from main entry: # * inflections <in> # * defined run-on entries <dro> # * undefined run-on entries <uro> # * variants <vr> miscElements = entry.find_all(['in', 'dro', 'uro', 'vr']) [x.extract() for x in entry.findAll(['in', 'dro', 'uro', 'vr'])] coutput.print_debug("Process all <ew> elements") for element in entry.find_all('ew'): elementText = element.get_text().strip() wordEntry = cdict.WordEntry(self.config.name, elementText) for element in entry.find_all(nameFilter): elementText = element.get_text().strip() coutput.print_watcher('element.name') coutput.print_watcher('elementText') if element.name == 'hw': coutput.print_debug("Process <hw> element") wordEntry.head_word = elementText elif element.name == 'fl': coutput.print_debug("Process <fl> element") wordEntry.functional_label = elementText elif element.name == 'et': coutput.print_debug("Process <et> element") wordEntry.etymology = elementText elif element.name == 'pr': wordEntry.respelling = self.build_respelling( element, wordEntry.entry_word) elif element.name == 'sound': wordEntry.pronunciation = self.build_pronunciation( element, wordEntry.entry_word) elif element.name == 'art': wordEntry.illustrations.extend( self.build_illustrations(element, wordEntry.entry_word)) elif element.name == 'def': coutput.print_debug("Process <def> element") wordEntry.senses.extend(self.build_senses(element)) elif element.name == 'cx': # Process cross-entry <cx> elements as inflections wordEntry.inflections.append( self.build_cross_entries(element, wordEntry.entry_word)) # Process previously captured misc. elements from main entry as inflections for miscElement in miscElements: for element in miscElement.find_all(['if', 'ure', 'drp', 'va']): elementText = element.get_text().strip() winf = cdict.WordInflection(elementText) winf.spelling = elementText.replace('*', '') if element.name == 'ure': winf.functional_label = "undefined run-on entry" elif element.name == 'drp': winf.functional_label = "defined run-on phrase" elif element.name == 'va': winf.functional_label = "variant form" for element in miscElement.find_all( ['il', 'sound', 'pr', 'def']): DEBUG_VAR = "element.name" coutput.print_debug("{0} :: {1}".format( DEBUG_VAR, eval(DEBUG_VAR))) elementText = element.get_text().strip() DEBUG_VAR = "elementText" coutput.print_debug("{0} :: {1}".format( DEBUG_VAR, eval(DEBUG_VAR))) if element.name == 'il': winf.functional_label = elementText elif element.name == 'pr': winf.respelling = self.build_respelling( element, winf.form) elif element.name == 'sound': winf.pronunciation = self.build_pronunciation( element, winf.form) elif element.name == 'def': winf.senses.extend(self.build_senses(element)) wordEntry.inflections.append(winf) coutput.print_watcher('wordEntry') self.word_entries.append(wordEntry)
def run_revision(spellBee): _FUNC_NAME_ = "run_revision" spellBee.print_active_word_list() display_help("revise") userInput = cinput.get_keypress("\nReady to revise? Press any key when ready ... ") testDate = time.strftime('%a %d-%b-%Y %H:%M:%S') testTotalCount = spellBee.active_word_count() testCorrectCount = 0 userResponse = SB_EMPTY_STRING testValuation = SB_EMPTY_STRING spellBee.reset_test_result() activeWordIndex = 0 while True: coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "activeWordIndexList :: {0}".format(spellBee.activeWordIndexList)) coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "activeWordIndex :: {0}".format(activeWordIndex)) if (activeWordIndex < 0) or (activeWordIndex >= len(spellBee.activeWordIndexList)): break wordIndex = spellBee.activeWordIndexList[activeWordIndex] coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "wordIndex :: {0}".format(wordIndex)) # Lookup word definition spellBee.lookup_dictionary_by_index(wordIndex) spellBee.display_word_cue(SB_STUDY_WORD_DEFN_TITLE.format(INDEX=wordIndex + 1, WORD=spellBee.activeWord)) userResponse = cinput.get_keypress("Enter response: ") # E[x]it test if userResponse.lower() == "x": break # Re[v]iew active word list elif userResponse.lower() == "v": print SB_EMPTY_STRING spellBee.print_active_word_list() continue # Display [h]elp and statistics elif userResponse.lower() == "h": print SB_EMPTY_STRING spellBee.display_about() display_help("revise") continue # Process correct response elif userResponse.lower() == "y": correctResponse = True testValuation = SB_RIGHT_SYMBOL + " " + spellBee.activeWord testCorrectCount += 1 # Display valuation # Handle display text in ascii asciiTestValuation = testValuation.encode('utf-8') coutput.print_color('green', " " * 50 + asciiTestValuation) # Save valuation spellBee.log_test_valuation(testValuation) # Move to next word activeWordIndex += 1 # Process incorrect response elif userResponse.lower() == "n": correctResponse = False testValuation = SB_WRONG_SYMBOL + " " + spellBee.activeWord spellBee.log_practice_word(spellBee.activeWord) # Display valuation # Handle display text in ascii asciiTestValuation = testValuation.encode('utf-8') coutput.print_color('red', " " * 50 + asciiTestValuation) # Save valuation spellBee.log_test_valuation(testValuation) # Move to next word activeWordIndex += 1 # [R]epeat question as default action else: continue spellBee.log_test_result(testDate, str(testCorrectCount) + "/" + str(testTotalCount)) print "\nYour revision is complete. Displaying results..." spellBee.display_evaluation_result('revise', SB_TEST_SAVE_RESULT, True)
def run_test(spellBee): _FUNC_NAME_ = "run_test" spellBee.display_about() display_help("test") userInput = cinput.get_keypress("\nReady for the test? Press any key when ready ... ") testDate = time.strftime('%a %d-%b-%Y %H:%M:%S') testTotalCount = spellBee.active_word_count() testCorrectCount = 0 userResponse = SB_EMPTY_STRING testValuation = SB_EMPTY_STRING spellBee.reset_test_result() # Disable saving practice words if : # saving is disabled for test results, or # the test is based on practice, revision or wild card lists savePracticeWordsEnabled = SB_TEST_SAVE_PRACTICE if SB_TEST_SAVE_RESULT == False or 'practice' in spellBee.contestList.lower() or 'revision' in spellBee.contestList.lower() or '*' in spellBee.contestList: savePracticeWordsEnabled = False coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "SB_TEST_SAVE_PRACTICE :: {0}".format(SB_TEST_SAVE_PRACTICE)) coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "savePracticeWordsEnabled :: {0}".format(savePracticeWordsEnabled)) activeWordIndex = 0 while True: coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "activeWordIndexList :: {0}".format(spellBee.activeWordIndexList)) coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "activeWordIndex :: {0}".format(activeWordIndex)) if (activeWordIndex < 0) or (activeWordIndex >= len(spellBee.activeWordIndexList)): break wordIndex = spellBee.activeWordIndexList[activeWordIndex] coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "wordIndex :: {0}".format(wordIndex)) # Lookup word definition spellBee.lookup_dictionary_by_index(wordIndex) spellBee.display_word_cue(SB_PRACTICE_WORD_DEFN_TITLE.format(INDEX=wordIndex + 1)) userResponse = cinput.get_input("Enter spelling: ") # E[x]it test if userResponse.lower() == "x": break # [R]epeat question elif userResponse.lower() == "r": continue # Display [h]elp and statistics elif userResponse.lower() == "h": print SB_EMPTY_STRING spellBee.display_about() display_help("test") continue else: correctResponse = False # Process correct response if spellBee.valuate_test_response(userResponse, spellBee.activeWord, SB_TEST_MODE): correctResponse = True testValuation = SB_RIGHT_SYMBOL + " " + userResponse testCorrectCount += 1 # Process incorrect response else: testValuation = SB_WRONG_SYMBOL + " " + userResponse spellBee.log_practice_word(spellBee.activeWord) # Indicate correct form of the answer, if different from the response if userResponse != spellBee.activeWord: testValuation = testValuation + " (" + spellBee.activeWord + ")" # Display valuation # Handle display text in ascii asciiTestValuation = testValuation.encode('utf-8') if correctResponse: coutput.print_color('green', " " * 50 + asciiTestValuation) else: coutput.print_color('red', " " * 50 + asciiTestValuation) # Save valuation spellBee.log_test_valuation(testValuation) # Move to next word activeWordIndex += 1 spellBee.log_test_result(testDate, str(testCorrectCount) + "/" + str(testTotalCount)) print "\nYour test is complete. Displaying results..." spellBee.display_evaluation_result('test', SB_TEST_SAVE_RESULT, savePracticeWordsEnabled)
def cleanse_text(rawText, rawTextPatterns, rawInnerTextPatterns, rawOuterTextPatterns): _FUNC_NAME_ = "cleanse_text" DEBUG_VAR = "rawText" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(rawText))) cleansedText = rawText DEBUG_VAR = "cleansedText" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(cleansedText))) cleanseTextPatterns = rawTextPatterns cleanseInnerTextPatterns = rawInnerTextPatterns cleanseOuterTextPatterns = rawOuterTextPatterns # Cleanse text patterns for pattern in cleanseTextPatterns: DEBUG_VAR = "pattern" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(pattern))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) cleansedText = re.sub(pattern, UNICODE_EMPTY_STR, cleansedText, flags=re.DOTALL) DEBUG_VAR = "cleansedText" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(cleansedText))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) # Cleanse inner text surrounded by text patterns for enclosure in cleanseInnerTextPatterns: DEBUG_VAR = "enclosure" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(enclosure))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) pattern = r'(' + enclosure[0] + r').*?(' + enclosure[1] + r')' cleansedText = re.sub(pattern, r'\g<1>\g<2>', cleansedText, flags=re.DOTALL) # Cleanse outer text patterns preserving enclosed contents for enclosure in cleanseOuterTextPatterns: DEBUG_VAR = "enclosure" coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(enclosure))) coutput.print_debug(ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) pattern = enclosure[0] + r'(.*?)' + enclosure[1] cleansedText = re.sub(pattern, r'\g<1>', cleansedText, flags=re.DOTALL) outputText = cleansedText return outputText
def lookup_dictionary_by_word(self, word): _FUNC_NAME_ = "lookup_dictionary_by_word" DEBUG_VAR="self.wordList[0]" coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(self.wordList[0]))) coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) DEBUG_VAR="word" coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(word))) coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) self.activeWord = word.strip() DEBUG_VAR="self.activeWord" coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(self.activeWord))) coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) # Setup connection and error logging connectionPool = urllib3.PoolManager(10, headers=SB_USER_AGENT) errorFileName = SB_DATA_DIR + SB_ERR_LOG # Check offline for dictionary entry self.activeEntry = SB_EMPTY_STRING self.activeDefinition = [] overrideDefnFileName = SB_DICT_OVERRIDE_DIR + SB_DICT_OVERRIDE_DEFN.format(WORD=word).replace(" ", "_") offlineEntryFileName = SB_DICT_OFFLINE_DIR + SB_DICT_OFFLINE_ENTR.format(WORD=word).replace(" ", "_") # Check for dictionary definition override if os.path.isfile(overrideDefnFileName) and os.path.getsize(overrideDefnFileName) > 0: self.activeEntry = unicode("[Dictionary Definition Override]", 'utf-8') self.activeDefinition = cfile.read(overrideDefnFileName).splitlines() DEBUG_VAR="self.activeEntry" coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(self.activeEntry))) coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) # Check primary source for dictionary entry elif os.path.isfile(offlineEntryFileName) and os.path.getsize(offlineEntryFileName) > 100: coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "offlineEntryFile size :: {0}".format(os.path.getsize(offlineEntryFileName))) self.activeEntry = cfile.read(offlineEntryFileName) self.activeDefinition = cdict.parse_word_definition(self.activeWord, self.activeEntry) DEBUG_VAR="self.activeEntry" coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(self.activeEntry))) coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) else: # Download dictionary entry self.activeEntry = cdict.get_dictionary_entry(connectionPool, self.activeWord) DEBUG_VAR="self.activeEntry" coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(self.activeEntry))) coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) # Save dictionary entry offline cfile.write(offlineEntryFileName, self.activeEntry) # Retrieve word definition self.activeDefinition = cdict.parse_word_definition(self.activeWord, self.activeEntry) if len(self.activeDefinition) == 0: # Log missing definition error errorText = unicode("ERROR:Missing Definition:{0}\n", 'utf-8') errorText = errorText.format(self.activeWord) cfile.append(errorFileName, errorText) # Check offline for word pronunciation self.activePronunciation = SB_EMPTY_STRING self.activePronunciationWord = SB_EMPTY_STRING overrideProncnFileName = SB_DICT_OVERRIDE_DIR + SB_DICT_OVERRIDE_CLIP.format(WORD=self.activeWord).replace(" ", "_") offlineProncnFileName = SB_DICT_OFFLINE_DIR + SB_DICT_OFFLINE_CLIP.format(WORD=self.activeWord).replace(" ", "_") # Check for dictionary pronunciation override if os.path.isfile(overrideProncnFileName) and os.path.getsize(overrideProncnFileName) > 0: self.activePronunciation = overrideProncnFileName self.activePronunciationWord = self.activeWord # Check primary source for dictionary entry and pronunciation elif os.path.isfile(offlineEntryFileName) and os.path.getsize(offlineEntryFileName) > 100 and os.path.isfile(offlineProncnFileName) and os.path.getsize(offlineProncnFileName) > 1000: coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "offlineProncnFile size :: {0}".format(os.path.getsize(offlineProncnFileName))) self.activePronunciation = offlineProncnFileName # Retrieve pronunciation audio clip word form and filename [wordClipForm, wordClipURL] = cdict.parse_word_clip(self.activeWord, self.activeEntry) self.activePronunciationWord = wordClipForm else: # Retrieve pronunciation audio clip word form and filename [wordClipForm, wordClipURL] = cdict.parse_word_clip(self.activeWord, self.activeEntry) # Save pronunciation offline if wordClipURL == SB_EMPTY_STRING: # Log missing audio error errorText = unicode("ERROR:Missing Audio:{0}\n", 'utf-8') errorText = errorText.format(self.activeWord) cfile.append(errorFileName, errorText) else: # Download audio clip cfile.download(connectionPool, wordClipURL, offlineProncnFileName) self.activePronunciation = offlineProncnFileName self.activePronunciationWord = wordClipForm # Log audio mismatch error wordToken = re.sub('[^a-zA-Z]', SB_EMPTY_STRING, self.activeWord.lower()) pronunciationToken = re.sub('[^a-zA-Z]', SB_EMPTY_STRING, self.activePronunciationWord.lower()) if self.activePronunciation != SB_EMPTY_STRING and wordToken != pronunciationToken: errorText = unicode("ERROR:Audio Mismatch:{0}\n", 'utf-8') errorText = errorText.format(self.activeWord) cfile.append(errorFileName, errorText) # Close connection connectionPool.clear()