예제 #1
0
def cleanup_keywords(keywords):
    """
    Normalize the keywords: convert to titlecase, strip whitespace, split if
    necessary, remove duplicates, remove empty strings, sort.
    """

    cleaned_keywords = set()
    for kw in keywords:

        kw = kw.strip()

        if not ALPHANUM_RE.match(kw):
            continue

        # If the keyword contains commas or semicolons we split it and
        # re-process
        if u";" in kw:
            cleaned_keywords.update(cleanup_keywords(kw.split(u";")))
            continue
        if u"," in kw:
            cleaned_keywords.update(cleanup_keywords(kw.split(u",")))
            continue
        if u"|" in kw:
            cleaned_keywords.update(cleanup_keywords(kw.split(u"|")))
            continue

        # If the keyword contains one and only one quote char we remove it
        if kw.count(u'"') == 1:
            kw = kw.replace(u'"', u'')

        if not ALL_CAPS.match(kw):
            kw = titlecase(kw)

        cleaned_keywords.add(kw)

    return sorted(cleaned_keywords)
예제 #2
0
파일: tests.py 프로젝트: ixc/titlecase.py
def test_all_caps_regex():
    """Test - all capitals regex"""
    from titlecase import ALL_CAPS
    assert bool(ALL_CAPS.match('THIS IS ALL CAPS')) is True
예제 #3
0
def test_all_caps_regex():
    """Test - all capitals regex"""
    from titlecase import ALL_CAPS
    assert bool(ALL_CAPS.match('THIS IS ALL CAPS')) is True
예제 #4
0
def test_all_caps_regex_2():
    """Test - all capitals regex with numer"""
    from titlecase import ALL_CAPS
    assert bool(ALL_CAPS.match('THIS IS ALL CAPS JOHHNY7')) is True