def add_words(self, filename): print("Adding words from {}...".format(filename)) added, skipped = set(), [] with open(filename) as f: for term in f.readlines(): term = clean_and_qualify_term(term) if term: slug = slugify(term) if slug not in added: time.sleep(config.add_word_interval) added.add(slug) add(term) else: skipped.append(term) else: skipped.append(term) print "Added {} terms, skipped {}".format(len(added), len(skipped))
def add_words(bucket, key): contents = config.s3.Object(bucket, key).get() words = contents['Body'].read().splitlines() added, skipped = set(), [] for term in words: term = clean_and_qualify_term(term) if term: slug = hashslug(term) if slug not in added: added.add(slug) message = {'word': term, 'hashslug': slug} tasks.write_message('search', message) else: skipped.append(term) else: skipped.append(term) print "Added {} terms, skipped {}".format(len(added), len(skipped))
def test_disqualify(): with codecs.open("serapis/tests/data/words_disqualified.txt", "r", "utf-8") as wordlist: for word in wordlist.readlines(): assert not clean_and_qualify_term(word), "Word '{}' falsely marked as valid".format(word.strip())
def test_disqualify(): with codecs.open("serapis/tests/data/words_disqualified.txt", 'r', 'utf-8') as wordlist: for word in wordlist.readlines(): assert not clean_and_qualify_term( word), "Word '{}' falsely marked as valid".format(word.strip())