def cleanup_keywords(keywords): """ Normalize the keywords: convert to titlecase, strip whitespace, split if necessary, remove duplicates, remove empty strings, sort. """ cleaned_keywords = set() for kw in keywords: kw = kw.strip() if not ALPHANUM_RE.match(kw): continue # If the keyword contains commas or semicolons we split it and # re-process if u";" in kw: cleaned_keywords.update(cleanup_keywords(kw.split(u";"))) continue if u"," in kw: cleaned_keywords.update(cleanup_keywords(kw.split(u","))) continue if u"|" in kw: cleaned_keywords.update(cleanup_keywords(kw.split(u"|"))) continue # If the keyword contains one and only one quote char we remove it if kw.count(u'"') == 1: kw = kw.replace(u'"', u'') if not ALL_CAPS.match(kw): kw = titlecase(kw) cleaned_keywords.add(kw) return sorted(cleaned_keywords)
def test_all_caps_regex(): """Test - all capitals regex""" from titlecase import ALL_CAPS assert bool(ALL_CAPS.match('THIS IS ALL CAPS')) is True
def test_all_caps_regex_2(): """Test - all capitals regex with numer""" from titlecase import ALL_CAPS assert bool(ALL_CAPS.match('THIS IS ALL CAPS JOHHNY7')) is True