Ejemplo n.º 1
0
listFile = 'data/spelling_bee_overrides.txt'
connectionPool = urllib3.PoolManager(10, headers=SB_USER_AGENT)

for activeWord in cfile.read(listFile).splitlines():

    print "+++++++++++++++++++++++++++++++++++++++++++"
    print activeWord
    print "+++++++++++++++++++++++++++++++++++++++++++"

    #activeWord=u'cephalalgia'
    saveFile = activeWord + u".html"
    overrideFile = activeWord + u".dat"

    ##### Online Entry #####
    activeEntry = dictAssist.download_entry(connectionPool, activeWord)
    cfile.write(saveFile, activeEntry)

    ##### Offline Entry #####
    connectionData = cfile.read(saveFile)

    wordDictionary = cdictapi.DictionaryEntry(dictConfig, activeWord,
                                              connectionData)
    print len(wordDictionary.word_entries)
    print "+++++++++++++++++++++++++++++++++++++++++++"
    print wordDictionary.simplified_word_entry
    print "+++++++++++++++++++++++++++++++++++++++++++"

    overrideData = wordDictionary.simplified_word_entry.generate_override()
    print overrideData
    #cfile.write(overrideFile, overrideData)
Ejemplo n.º 2
0
            wordEntry = wordEntry + APP_NEWLINE + "#!Examples: " + wordExamples
            wordEntry = wordEntry + APP_NEWLINE + "#!Related: " + wordRelated
            wordEntry = wordEntry + APP_NEWLINE + "({}) {}".format(wordFuncLabel, wordDefinition)

            print("\nEntry for {}: ".format(displayWord))
            print(wordEntry)

            print("\nAdding to word list file: " + APP_LIST_FILE)
            cfile.append(APP_LIST_FILE, listWord)

            APP_DICT_ENTR_FILE = APP_DICT_DIR + cfile.cleanse_filename(APP_DICT_ENTR.format(WORD=listWord))
            if os.path.isfile(APP_DICT_ENTR_FILE) and os.path.getsize(APP_DICT_ENTR_FILE) > 100:
                print("Definition file {} exists. Skipping.".format(APP_DICT_ENTR_FILE))
            else:
                print("Creating definition file: " + APP_DICT_ENTR_FILE)
                cfile.write(APP_DICT_ENTR_FILE, wordEntry)

            APP_DICT_CLIP_FILE = APP_DICT_DIR + cfile.cleanse_filename(APP_DICT_CLIP.format(WORD=listWord))
            if os.path.isfile(APP_DICT_CLIP_FILE) and os.path.getsize(APP_DICT_CLIP_FILE) > 100:
                print("Pronunciation file {} exists. Skipping.".format(APP_DICT_CLIP_FILE))
            else:
                print("Creating pronunciation file: " + APP_DICT_CLIP_FILE)
                cfile.download(connectionPool, wordAudioURL, APP_DICT_CLIP_FILE)

            while True:

                nextButton = browser.find_element_by_id("nextButton")
                nextButton.click()
                print("Clicked Next button")
                wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, ".col-sm-9.col-md-9")))
                time.sleep(APP_WAIT_DELAY)
Ejemplo n.º 3
0
    
    cleansedText = cparser.cleanse_text(listRawText, SB_CLEAN_TEXT_PATTERNS, SB_CLEAN_INNER_TEXT_PATTERNS, SB_CLEAN_OUTER_TEXT_PATTERNS)

    listLang = listID.lower().replace(' ', '-')

    # Process basic word list
    listType = "basic"
    listFileName = SB_WORD_LIST_OUT[args.contestYear].format(YEAR=listYear, SEQ=listSeq, LANG=listLang, TYPE=listType)

    sectionOuterTextPatterns = [
    [r'.*<div class="section word study">', r'</div>.*']
    ]
    sectionText = cparser.cleanse_text(cleansedText, SB_CLEAN_TEXT_PATTERNS, SB_CLEAN_INNER_TEXT_PATTERNS, sectionOuterTextPatterns)
    words = cparser.find_enclosed_text(r'<li>\s*', r'\s*</li>', sectionText)
    print "Writing " + listFileName
    cfile.write(listFileName, coutput.multiline_text(words))

    # Process challenge word list
    listType = "challenge" 
    listFileName = SB_WORD_LIST_OUT[args.contestYear].format(YEAR=listYear, SEQ=listSeq, LANG=listLang, TYPE=listType)

    sectionOuterTextPatterns = [
    [r'.*<div class="section word challenge">', r'</div>.*']
    ]
    sectionText = cparser.cleanse_text(cleansedText, SB_CLEAN_TEXT_PATTERNS, SB_CLEAN_INNER_TEXT_PATTERNS, sectionOuterTextPatterns)
    words = cparser.find_enclosed_text(r'<li>\s*', r'\s*</li>', sectionText)
    print "Writing " + listFileName
    cfile.write(listFileName, coutput.multiline_text(words))

connectionPool.clear()
Ejemplo n.º 4
0
    def lookup_dictionary_by_word(self, word):
        _FUNC_NAME_ = "lookup_dictionary_by_word"

        DEBUG_VAR="self.wordList[0]"
        coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(self.wordList[0])))
        coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR))

        DEBUG_VAR="word"
        coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(word)))
        coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR))

        self.activeWord = word.strip()

        DEBUG_VAR="self.activeWord"
        coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(self.activeWord)))
        coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR))
        
        # Setup connection and error logging
        connectionPool = urllib3.PoolManager(10, headers=SB_USER_AGENT)
        errorFileName = SB_DATA_DIR + SB_ERR_LOG

        # Check offline for dictionary entry
        self.activeEntry = SB_EMPTY_STRING
        self.activeDefinition = []

        overrideDefnFileName = SB_DICT_OVERRIDE_DIR + SB_DICT_OVERRIDE_DEFN.format(WORD=word).replace(" ", "_")
        offlineEntryFileName = SB_DICT_OFFLINE_DIR + SB_DICT_OFFLINE_ENTR.format(WORD=word).replace(" ", "_")

        # Check for dictionary definition override
        if os.path.isfile(overrideDefnFileName) and os.path.getsize(overrideDefnFileName) > 0:
            self.activeEntry = unicode("[Dictionary Definition Override]", 'utf-8')
            self.activeDefinition = cfile.read(overrideDefnFileName).splitlines()

            DEBUG_VAR="self.activeEntry"
            coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(self.activeEntry)))
            coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR))

        # Check primary source for dictionary entry
        elif os.path.isfile(offlineEntryFileName) and os.path.getsize(offlineEntryFileName) > 100:
            coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "offlineEntryFile size :: {0}".format(os.path.getsize(offlineEntryFileName)))
            self.activeEntry = cfile.read(offlineEntryFileName)
            self.activeDefinition = cdict.parse_word_definition(self.activeWord, self.activeEntry)

            DEBUG_VAR="self.activeEntry"
            coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(self.activeEntry)))
            coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR))

        else:
            # Download dictionary entry
            self.activeEntry = cdict.get_dictionary_entry(connectionPool, self.activeWord)

            DEBUG_VAR="self.activeEntry"
            coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "{0} :: {1}".format(DEBUG_VAR, type(self.activeEntry)))
            coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, eval(DEBUG_VAR))

            # Save dictionary entry offline
            cfile.write(offlineEntryFileName, self.activeEntry)

            # Retrieve word definition
            self.activeDefinition = cdict.parse_word_definition(self.activeWord, self.activeEntry)
            if len(self.activeDefinition) == 0:
                # Log missing definition error
                errorText = unicode("ERROR:Missing Definition:{0}\n", 'utf-8')
                errorText = errorText.format(self.activeWord)
                cfile.append(errorFileName, errorText)

        # Check offline for word pronunciation
        self.activePronunciation = SB_EMPTY_STRING
        self.activePronunciationWord = SB_EMPTY_STRING

        overrideProncnFileName = SB_DICT_OVERRIDE_DIR + SB_DICT_OVERRIDE_CLIP.format(WORD=self.activeWord).replace(" ", "_")
        offlineProncnFileName = SB_DICT_OFFLINE_DIR + SB_DICT_OFFLINE_CLIP.format(WORD=self.activeWord).replace(" ", "_")

        # Check for dictionary pronunciation override
        if os.path.isfile(overrideProncnFileName) and os.path.getsize(overrideProncnFileName) > 0:
            self.activePronunciation = overrideProncnFileName
            self.activePronunciationWord = self.activeWord

        # Check primary source for dictionary entry and pronunciation
        elif os.path.isfile(offlineEntryFileName) and os.path.getsize(offlineEntryFileName) > 100 and os.path.isfile(offlineProncnFileName) and os.path.getsize(offlineProncnFileName) > 1000:
            coutput.print_debug(SB_ERR_DEBUG, _FUNC_NAME_, "offlineProncnFile size :: {0}".format(os.path.getsize(offlineProncnFileName)))

            self.activePronunciation = offlineProncnFileName

            # Retrieve pronunciation audio clip word form and filename
            [wordClipForm, wordClipURL] = cdict.parse_word_clip(self.activeWord, self.activeEntry)
            self.activePronunciationWord = wordClipForm

        else:
            # Retrieve pronunciation audio clip word form and filename
            [wordClipForm, wordClipURL] = cdict.parse_word_clip(self.activeWord, self.activeEntry)

            # Save pronunciation offline
            if wordClipURL == SB_EMPTY_STRING:
                # Log missing audio error
                errorText = unicode("ERROR:Missing Audio:{0}\n", 'utf-8')
                errorText = errorText.format(self.activeWord)
                cfile.append(errorFileName, errorText)
            else:
                # Download audio clip
                cfile.download(connectionPool, wordClipURL, offlineProncnFileName)

                self.activePronunciation = offlineProncnFileName
                self.activePronunciationWord = wordClipForm

        # Log audio mismatch error
        wordToken = re.sub('[^a-zA-Z]', SB_EMPTY_STRING, self.activeWord.lower())
        pronunciationToken = re.sub('[^a-zA-Z]', SB_EMPTY_STRING, self.activePronunciationWord.lower())
        if self.activePronunciation != SB_EMPTY_STRING and wordToken != pronunciationToken:
            errorText = unicode("ERROR:Audio Mismatch:{0}\n", 'utf-8')
            errorText = errorText.format(self.activeWord)
            cfile.append(errorFileName, errorText)

        # Close connection
        connectionPool.clear()
    logValues = entry.split(':')
    
    word = logValues[1]
    wordEntry = cdict.fetch_dictionary_entry(connectionPool, word)
    coutput.print_watcher(SB_ERR_DEBUG, _FUNC_NAME_, 'wordEntry')

    SDO_ERR_DEFN_MISSING = False
    SDO_ERR_CLIP_MISSING = False
    
    print unicode("Word: {0}\t{1}", 'utf-8').format(word, logValues[2])

    if SDO_ERR_DEFN_REGEX_PATTERN.match(logValues[2]):
        coutput.print_watcher(SB_ERR_DEBUG, _FUNC_NAME_, 'wordEntry[1]')
        if len(wordEntry[1]) > 0:
            print ">> Downloaded definition override"
            cfile.write(SDO_OVERRIDE_DEFN_FILE.format(WORD=word), coutput.multiline_text(wordEntry[1]))
        else:
            SDO_ERR_DEFN_MISSING = True
            coutput.print_color('yellow', "WARNING: Definition override not available")
 
    if SDO_ERR_AUDIO_REGEX_PATTERN.match(logValues[2]):
        coutput.print_watcher(SB_ERR_DEBUG, _FUNC_NAME_, 'wordEntry[4]')
        if wordEntry[4] != "":
            print ">> Downloaded pronunciation override"
            cfile.download(connectionPool, wordEntry[4], SDO_OVERRIDE_PRON_FILE.format(WORD=word))
        else:
            SDO_ERR_CLIP_MISSING = True
            coutput.print_color('yellow', "WARNING: Pronunciation override not available")

    # Log errors
    errorText = unicode("ERROR:{0}:", 'utf-8').format(word)
Ejemplo n.º 6
0
connectionPool = urllib3.PoolManager(10, headers=SDO_USER_AGENT)

logEntries = cfile.read(SDO_LIST_FILE).splitlines()

print "Downloading overrides ..."

for entry in logEntries:
    coutput.print_watcher(SDO_ERR_DEBUG, _FUNC_NAME_, 'entry')

    logValues = entry.split(':')

    word = logValues[1]

    if not os.path.isfile(SDO_OVERRIDE_ENTRY_FILE.format(WORD=word)):
        cfile.write(SDO_OVERRIDE_ENTRY_FILE.format(WORD=word),
                    dictAssist.download_entry(connectionPool, word))

    wordEntry = cfile.read(SDO_OVERRIDE_ENTRY_FILE.format(WORD=word))
    wordDictionary = cdictapi.DictionaryEntry(dictConfig, word, wordEntry)
    coutput.print_watcher(SDO_ERR_DEBUG, _FUNC_NAME_, 'wordEntry')

    SDO_ERR_DEFN_MISSING = False
    SDO_ERR_CLIP_MISSING = False

    print unicode("\nWord: {0}\t{1}", 'utf-8').format(word, logValues[2])

    if SDO_ERR_DEFN_REGEX_PATTERN.match(logValues[2]):
        coutput.print_watcher(SDO_ERR_DEBUG, _FUNC_NAME_, 'wordEntry[1]')
        if len(wordDictionary.word_entries) > 0 and len(wordEntry[1]) > 0:
            print ">> Downloaded definition override"
Ejemplo n.º 7
0
################################################################
# Main Program
################################################################

connectionPool = urllib3.PoolManager(10, headers=SDO_USER_AGENT)

wordList = cfile.read(SDO_CONF_FILE).splitlines()

print u"Downloading overrides ..."
for word in wordList:
    print u"Word: " + word
    wordEntry = cdict.fetch_dictionary_entry(connectionPool, word)
    
    if len(wordEntry[1]) > 0:
        cfile.write(SDO_OVERRIDE_DEFN_FILE.format(WORD=word), coutput.multiline_text(wordEntry[1]))
    else:
        coutput.print_color('yellow', "WARNING: Definition override not available")
    if wordEntry[4] != "":
        cfile.download(connectionPool, wordEntry[4], SDO_OVERRIDE_PRON_FILE.format(WORD=word))
    else:
        coutput.print_color('yellow', "WARNING: Pronunciation override not available")


connectionPool.clear()


########################################################################
# Debugging Commands
########################################################################
'''