Python get_stoplist Examples

Programming Language: Python

Namespace/Package Name: justext.utils

Method/Function: get_stoplist

Examples at hotexamples.com: 8

Python get_stoplist - 8 examples found. These are the top rated real world Python examples of justext.utils.get_stoplist extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: external.py Project: zanachka/trafilatura

def try_justext(tree, url, target_language):
    '''Second safety net: try with the generic algorithm justext'''
    result_body = etree.Element('body')
    # determine language
    if target_language is not None and target_language in JUSTEXT_LANGUAGES:
        justext_stoplist = get_stoplist(JUSTEXT_LANGUAGES[target_language])
    else:
        justext_stoplist = JT_STOPLIST
    # extract
    try:
        paragraphs = custom_justext(tree, justext_stoplist)
    except ValueError as err:  # not an XML element: HtmlComment
        LOGGER.error('justext %s %s', err, url)
        result_body = None
    else:
        for paragraph in [p for p in paragraphs if not p.is_boilerplate]:
            #if duplicate_test(paragraph) is not True:
            elem, elem.text = etree.Element('p'), paragraph.text
            result_body.append(elem)
    return result_body

Example #2

Show file

File: test_utils.py Project: pombredanne/jusText

    def test_get_real_stoplist(self):
        stopwords = get_stoplist("Slovak")

        tools.assert_true(len(stopwords) > 0)

Example #3

Show file

File: external.py Project: zanachka/trafilatura

def jt_stoplist_init():
    'Retrieve and return the content of all JusText stoplists'
    stoplist = set()
    for language in get_stoplists():
        stoplist.update(get_stoplist(language))
    return stoplist

Example #4

Show file

 def test_get_missing_stoplist(self):
     with pytest.raises(ValueError):
         get_stoplist("Klingon")

Example #5

Show file

    def test_get_real_stoplist(self):
        stopwords = get_stoplist("Slovak")

        assert len(stopwords) > 0

Example #6

Show file

File: test_utils.py Project: anukat2015/jusText

 def test_get_missing_stoplist(self):
     with pytest.raises(ValueError):
         get_stoplist("Klingon")

Example #7

Show file

File: test_utils.py Project: anukat2015/jusText

    def test_get_real_stoplist(self):
        stopwords = get_stoplist("Slovak")

        assert len(stopwords) > 0

Example #8

Show file

File: test_utils.py Project: Almad/jusText

    def test_get_real_stoplist(self):
        stopwords = get_stoplist("Slovak")

        tools.assert_true(len(stopwords) > 0)