Ejemplo n.º 1
0
 def test_word_extractor(self):
     extractor = WordExtractor()
     rawText = "What? Wait! Stop now. We-are-the-champs. & $%^&*()!@"
     expected = [
         "What", "Wait", "Stop", "now", "We", "are", "the", "champs"
     ]
     actual = extractor.get_words_from_text(rawText)
     self.assertListEqual(actual, expected)
Ejemplo n.º 2
0
from database import MySqlDataSouce
from wordextractor import WordExtractor
import time

ENGLISH = "EN"
DEUTSCH = "DE"
NOUN = "NOUN"
SUCCESS = "SUCCESS"
ERROR_RATE_LIMIT = "ERROR RATE LIMIT"
ERROR_CLIENT_POOL ="ERROR CLIENT POOL"
ERROR_UKNOWN = "ERROR UKNOWN"
THROTTLE_SECONDS = 10
EXCEPTION_SLEEP_SECONDS = 60
ERROR_COUNT_SLEEP_SECONDS = 500

extractor = WordExtractor()
fetcher = LeoFetcher()
dao = MySqlDataSouce()
parser = LeoParser()

with open('resources/words.txt', 'r') as hall:
    data = hall.read()

words = extractor.get_words_from_text(data)
error_count = 0
for word in words :

    if error_count > 5 :
        print("Error count reached sleeping " + ERROR_COUNT_SLEEP_SECONDS+ " seconds")
        error_count = 0
        time.sleep(ERROR_COUNT_SLEEP_SECONDS)