listFile = 'data/spelling_bee_overrides.txt' connectionPool = urllib3.PoolManager(10, headers=SB_USER_AGENT) for activeWord in cfile.read(listFile).splitlines(): print "+++++++++++++++++++++++++++++++++++++++++++" print activeWord print "+++++++++++++++++++++++++++++++++++++++++++" #activeWord=u'cephalalgia' saveFile = activeWord + u".html" overrideFile = activeWord + u".dat" ##### Online Entry ##### activeEntry = dictAssist.download_entry(connectionPool, activeWord) cfile.write(saveFile, activeEntry) ##### Offline Entry ##### connectionData = cfile.read(saveFile) wordDictionary = cdictapi.DictionaryEntry(dictConfig, activeWord, connectionData) print len(wordDictionary.word_entries) print "+++++++++++++++++++++++++++++++++++++++++++" print wordDictionary.simplified_word_entry print "+++++++++++++++++++++++++++++++++++++++++++" overrideData = wordDictionary.simplified_word_entry.generate_override() print overrideData #cfile.write(overrideFile, overrideData)
wordEntry = wordEntry + APP_NEWLINE + "#!Examples: " + wordExamples wordEntry = wordEntry + APP_NEWLINE + "#!Related: " + wordRelated wordEntry = wordEntry + APP_NEWLINE + "({}) {}".format(wordFuncLabel, wordDefinition) print("\nEntry for {}: ".format(displayWord)) print(wordEntry) print("\nAdding to word list file: " + APP_LIST_FILE) cfile.append(APP_LIST_FILE, listWord) APP_DICT_ENTR_FILE = APP_DICT_DIR + cfile.cleanse_filename(APP_DICT_ENTR.format(WORD=listWord)) if os.path.isfile(APP_DICT_ENTR_FILE) and os.path.getsize(APP_DICT_ENTR_FILE) > 100: print("Definition file {} exists. Skipping.".format(APP_DICT_ENTR_FILE)) else: print("Creating definition file: " + APP_DICT_ENTR_FILE) cfile.write(APP_DICT_ENTR_FILE, wordEntry) APP_DICT_CLIP_FILE = APP_DICT_DIR + cfile.cleanse_filename(APP_DICT_CLIP.format(WORD=listWord)) if os.path.isfile(APP_DICT_CLIP_FILE) and os.path.getsize(APP_DICT_CLIP_FILE) > 100: print("Pronunciation file {} exists. Skipping.".format(APP_DICT_CLIP_FILE)) else: print("Creating pronunciation file: " + APP_DICT_CLIP_FILE) cfile.download(connectionPool, wordAudioURL, APP_DICT_CLIP_FILE) while True: nextButton = browser.find_element_by_id("nextButton") nextButton.click() print("Clicked Next button") wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, ".col-sm-9.col-md-9"))) time.sleep(APP_WAIT_DELAY)
cleansedText = cparser.cleanse_text(listRawText, SB_CLEAN_TEXT_PATTERNS, SB_CLEAN_INNER_TEXT_PATTERNS, SB_CLEAN_OUTER_TEXT_PATTERNS) listLang = listID.lower().replace(' ', '-') # Process basic word list listType = "basic" listFileName = SB_WORD_LIST_OUT[args.contestYear].format(YEAR=listYear, SEQ=listSeq, LANG=listLang, TYPE=listType) sectionOuterTextPatterns = [ [r'.*<div class="section word study">', r'</div>.*'] ] sectionText = cparser.cleanse_text(cleansedText, SB_CLEAN_TEXT_PATTERNS, SB_CLEAN_INNER_TEXT_PATTERNS, sectionOuterTextPatterns) words = cparser.find_enclosed_text(r'<li>\s*', r'\s*</li>', sectionText) print "Writing " + listFileName cfile.write(listFileName, coutput.multiline_text(words)) # Process challenge word list listType = "challenge" listFileName = SB_WORD_LIST_OUT[args.contestYear].format(YEAR=listYear, SEQ=listSeq, LANG=listLang, TYPE=listType) sectionOuterTextPatterns = [ [r'.*<div class="section word challenge">', r'</div>.*'] ] sectionText = cparser.cleanse_text(cleansedText, SB_CLEAN_TEXT_PATTERNS, SB_CLEAN_INNER_TEXT_PATTERNS, sectionOuterTextPatterns) words = cparser.find_enclosed_text(r'<li>\s*', r'\s*</li>', sectionText) print "Writing " + listFileName cfile.write(listFileName, coutput.multiline_text(words)) connectionPool.clear()
def lookup_dictionary_by_word(self, word): _FUNC_NAME_ = "lookup_dictionary_by_word" DEBUG_VAR="self.wordList[0]" coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(self.wordList[0]))) coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) DEBUG_VAR="word" coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(word))) coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) self.activeWord = word.strip() DEBUG_VAR="self.activeWord" coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(self.activeWord))) coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) # Setup connection and error logging connectionPool = urllib3.PoolManager(10, headers=SB_USER_AGENT) errorFileName = SB_DATA_DIR + SB_ERR_LOG # Check offline for dictionary entry self.activeEntry = SB_EMPTY_STRING self.activeDefinition = [] overrideDefnFileName = SB_DICT_OVERRIDE_DIR + SB_DICT_OVERRIDE_DEFN.format(WORD=word).replace(" ", "_") offlineEntryFileName = SB_DICT_OFFLINE_DIR + SB_DICT_OFFLINE_ENTR.format(WORD=word).replace(" ", "_") # Check for dictionary definition override if os.path.isfile(overrideDefnFileName) and os.path.getsize(overrideDefnFileName) > 0: self.activeEntry = unicode("[Dictionary Definition Override]", 'utf-8') self.activeDefinition = cfile.read(overrideDefnFileName).splitlines() DEBUG_VAR="self.activeEntry" coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(self.activeEntry))) coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) # Check primary source for dictionary entry elif os.path.isfile(offlineEntryFileName) and os.path.getsize(offlineEntryFileName) > 100: coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "offlineEntryFile size :: {0}".format(os.path.getsize(offlineEntryFileName))) self.activeEntry = cfile.read(offlineEntryFileName) self.activeDefinition = cdict.parse_word_definition(self.activeWord, self.activeEntry) DEBUG_VAR="self.activeEntry" coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(self.activeEntry))) coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) else: # Download dictionary entry self.activeEntry = cdict.get_dictionary_entry(connectionPool, self.activeWord) DEBUG_VAR="self.activeEntry" coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(self.activeEntry))) coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR)) # Save dictionary entry offline cfile.write(offlineEntryFileName, self.activeEntry) # Retrieve word definition self.activeDefinition = cdict.parse_word_definition(self.activeWord, self.activeEntry) if len(self.activeDefinition) == 0: # Log missing definition error errorText = unicode("ERROR:Missing Definition:{0}\n", 'utf-8') errorText = errorText.format(self.activeWord) cfile.append(errorFileName, errorText) # Check offline for word pronunciation self.activePronunciation = SB_EMPTY_STRING self.activePronunciationWord = SB_EMPTY_STRING overrideProncnFileName = SB_DICT_OVERRIDE_DIR + SB_DICT_OVERRIDE_CLIP.format(WORD=self.activeWord).replace(" ", "_") offlineProncnFileName = SB_DICT_OFFLINE_DIR + SB_DICT_OFFLINE_CLIP.format(WORD=self.activeWord).replace(" ", "_") # Check for dictionary pronunciation override if os.path.isfile(overrideProncnFileName) and os.path.getsize(overrideProncnFileName) > 0: self.activePronunciation = overrideProncnFileName self.activePronunciationWord = self.activeWord # Check primary source for dictionary entry and pronunciation elif os.path.isfile(offlineEntryFileName) and os.path.getsize(offlineEntryFileName) > 100 and os.path.isfile(offlineProncnFileName) and os.path.getsize(offlineProncnFileName) > 1000: coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "offlineProncnFile size :: {0}".format(os.path.getsize(offlineProncnFileName))) self.activePronunciation = offlineProncnFileName # Retrieve pronunciation audio clip word form and filename [wordClipForm, wordClipURL] = cdict.parse_word_clip(self.activeWord, self.activeEntry) self.activePronunciationWord = wordClipForm else: # Retrieve pronunciation audio clip word form and filename [wordClipForm, wordClipURL] = cdict.parse_word_clip(self.activeWord, self.activeEntry) # Save pronunciation offline if wordClipURL == SB_EMPTY_STRING: # Log missing audio error errorText = unicode("ERROR:Missing Audio:{0}\n", 'utf-8') errorText = errorText.format(self.activeWord) cfile.append(errorFileName, errorText) else: # Download audio clip cfile.download(connectionPool, wordClipURL, offlineProncnFileName) self.activePronunciation = offlineProncnFileName self.activePronunciationWord = wordClipForm # Log audio mismatch error wordToken = re.sub('[^a-zA-Z]', SB_EMPTY_STRING, self.activeWord.lower()) pronunciationToken = re.sub('[^a-zA-Z]', SB_EMPTY_STRING, self.activePronunciationWord.lower()) if self.activePronunciation != SB_EMPTY_STRING and wordToken != pronunciationToken: errorText = unicode("ERROR:Audio Mismatch:{0}\n", 'utf-8') errorText = errorText.format(self.activeWord) cfile.append(errorFileName, errorText) # Close connection connectionPool.clear()
logValues = entry.split(':') word = logValues[1] wordEntry = cdict.fetch_dictionary_entry(connectionPool, word) coutput.print_watcher(SB_ERR_DEBUG, _FUNC_NAME_, 'wordEntry') SDO_ERR_DEFN_MISSING = False SDO_ERR_CLIP_MISSING = False print unicode("Word: {0}\t{1}", 'utf-8').format(word, logValues[2]) if SDO_ERR_DEFN_REGEX_PATTERN.match(logValues[2]): coutput.print_watcher(SB_ERR_DEBUG, _FUNC_NAME_, 'wordEntry[1]') if len(wordEntry[1]) > 0: print ">> Downloaded definition override" cfile.write(SDO_OVERRIDE_DEFN_FILE.format(WORD=word), coutput.multiline_text(wordEntry[1])) else: SDO_ERR_DEFN_MISSING = True coutput.print_color('yellow', "WARNING: Definition override not available") if SDO_ERR_AUDIO_REGEX_PATTERN.match(logValues[2]): coutput.print_watcher(SB_ERR_DEBUG, _FUNC_NAME_, 'wordEntry[4]') if wordEntry[4] != "": print ">> Downloaded pronunciation override" cfile.download(connectionPool, wordEntry[4], SDO_OVERRIDE_PRON_FILE.format(WORD=word)) else: SDO_ERR_CLIP_MISSING = True coutput.print_color('yellow', "WARNING: Pronunciation override not available") # Log errors errorText = unicode("ERROR:{0}:", 'utf-8').format(word)
connectionPool = urllib3.PoolManager(10, headers=SDO_USER_AGENT) logEntries = cfile.read(SDO_LIST_FILE).splitlines() print "Downloading overrides ..." for entry in logEntries: coutput.print_watcher(SDO_ERR_DEBUG, _FUNC_NAME_, 'entry') logValues = entry.split(':') word = logValues[1] if not os.path.isfile(SDO_OVERRIDE_ENTRY_FILE.format(WORD=word)): cfile.write(SDO_OVERRIDE_ENTRY_FILE.format(WORD=word), dictAssist.download_entry(connectionPool, word)) wordEntry = cfile.read(SDO_OVERRIDE_ENTRY_FILE.format(WORD=word)) wordDictionary = cdictapi.DictionaryEntry(dictConfig, word, wordEntry) coutput.print_watcher(SDO_ERR_DEBUG, _FUNC_NAME_, 'wordEntry') SDO_ERR_DEFN_MISSING = False SDO_ERR_CLIP_MISSING = False print unicode("\nWord: {0}\t{1}", 'utf-8').format(word, logValues[2]) if SDO_ERR_DEFN_REGEX_PATTERN.match(logValues[2]): coutput.print_watcher(SDO_ERR_DEBUG, _FUNC_NAME_, 'wordEntry[1]') if len(wordDictionary.word_entries) > 0 and len(wordEntry[1]) > 0: print ">> Downloaded definition override"
################################################################ # Main Program ################################################################ connectionPool = urllib3.PoolManager(10, headers=SDO_USER_AGENT) wordList = cfile.read(SDO_CONF_FILE).splitlines() print u"Downloading overrides ..." for word in wordList: print u"Word: " + word wordEntry = cdict.fetch_dictionary_entry(connectionPool, word) if len(wordEntry[1]) > 0: cfile.write(SDO_OVERRIDE_DEFN_FILE.format(WORD=word), coutput.multiline_text(wordEntry[1])) else: coutput.print_color('yellow', "WARNING: Definition override not available") if wordEntry[4] != "": cfile.download(connectionPool, wordEntry[4], SDO_OVERRIDE_PRON_FILE.format(WORD=word)) else: coutput.print_color('yellow', "WARNING: Pronunciation override not available") connectionPool.clear() ######################################################################## # Debugging Commands ######################################################################## '''