Beispiel #1
0
def test_keyword_in_context_ignore_case():
    for keyword in ('All', 'all'):
        results = list(text_utils.keyword_in_context(
            TEXT, keyword, ignore_case=False, window_width=50, print_only=False))
        for pre, kw, post in results:
            assert kw == keyword
    # also test for a null result, bc of case
    results = list(text_utils.keyword_in_context(
            TEXT, 'clinton', ignore_case=False, window_width=50, print_only=False))
    assert results == []
Beispiel #2
0
 def test_keyword_in_context_unicode(self):
     text = 'No llores porque ya se terminó, sonríe porque sucedió.'
     observed = list(text_utils.keyword_in_context(
         text, 'terminó', print_only=False))
     expected = [('No llores porque ya se ', 'terminó', ', sonríe porque sucedió.')]
     for o, e in zip(observed, expected):
         self.assertEqual(o, e)
Beispiel #3
0
 def test_keyword_in_context_unicode(self):
     text = 'No llores porque ya se terminó, sonríe porque sucedió.'
     observed = list(text_utils.keyword_in_context(
         text, 'terminó', print_only=False))
     expected = [('No llores porque ya se ', 'terminó', ', sonríe porque sucedió.')]
     for o, e in zip(observed, expected):
         self.assertEqual(o, e)
Beispiel #4
0
def test_keyword_in_context_unicode():
    keyword = 'terminó'
    results = list(text_utils.keyword_in_context(
        'No llores porque ya se terminó, sonríe porque sucedió.',
        keyword,
        print_only=False))
    for pre, kw, post in results:
        assert kw == keyword
Beispiel #5
0
 def test_keyword_in_context_width(self):
     observed = list(text_utils.keyword_in_context(
         TEXT, 'clinton', ignore_case=True, window_width=10, print_only=False))
     expected = [
         ('when Bill ', 'Clinton', ' was elect'),
         ('d Hillary ', 'Clinton', ' have pled')]
     for o, e in zip(observed, expected):
         self.assertEqual(o, e)
Beispiel #6
0
 def test_keyword_in_context_width(self):
     observed = list(text_utils.keyword_in_context(
         TEXT, 'clinton', ignore_case=True, window_width=10, print_only=False))
     expected = [
         ('when Bill ', 'Clinton', ' was elect'),
         ('d Hillary ', 'Clinton', ' have pled')]
     for o, e in zip(observed, expected):
         self.assertEqual(o, e)
Beispiel #7
0
 def test_keyword_in_context(self):
     observed = list(text_utils.keyword_in_context(
         TEXT, 'clinton', ignore_case=True, window_width=50, print_only=False))
     expected = [
         ('rtunes has been stark. Two decades ago, when Bill ', 'Clinton', ' was elected president, the 400 highest-earning ta'),
         ('  While Democrats like Bernie Sanders and Hillary ', 'Clinton', ' have pledged to raise taxes on these voters, virt')]
     for o, e in zip(observed, expected):
         self.assertEqual(o, e)
Beispiel #8
0
 def test_keyword_in_context(self):
     observed = list(text_utils.keyword_in_context(
         TEXT, 'clinton', ignore_case=True, window_width=50, print_only=False))
     expected = [
         ('rtunes has been stark. Two decades ago, when Bill ', 'Clinton', ' was elected president, the 400 highest-earning ta'),
         ('  While Democrats like Bernie Sanders and Hillary ', 'Clinton', ' have pledged to raise taxes on these voters, virt')]
     for o, e in zip(observed, expected):
         self.assertEqual(o, e)
Beispiel #9
0
def test_keyword_in_context_window_width():
    for window_width in (10, 20):
        results = list(text_utils.keyword_in_context(
            TEXT, 'clinton', ignore_case=True, print_only=False,
            window_width=window_width))
        for pre, kw, post in results:
            assert len(pre) <= window_width
            assert len(post) <= window_width
Beispiel #10
0
 def test_keyword_in_context_ignore_case(self):
     for keyword in ('All', 'all'):
         results = list(
             text_utils.keyword_in_context(TEXT,
                                           keyword,
                                           ignore_case=False,
                                           window_width=50,
                                           print_only=False))
         for pre, kw, post in results:
             self.assertEqual(kw, keyword)
     # also test for a null result, bc of case
     results = list(
         text_utils.keyword_in_context(TEXT,
                                       'clinton',
                                       ignore_case=False,
                                       window_width=50,
                                       print_only=False))
     self.assertEqual(results, [])
Beispiel #11
0
 def test_keyword_in_context_case(self):
     observed = list(
         text_utils.keyword_in_context(TEXT,
                                       'clinton',
                                       ignore_case=False,
                                       window_width=50,
                                       print_only=False))
     expected = []
     for o, e in zip(observed, expected):
         self.assertEqual(o, e)
Beispiel #12
0
def test_keyword_in_context_keyword():
    for keyword in ("clinton", "all"):
        results = list(
            text_utils.keyword_in_context(TEXT,
                                          keyword,
                                          ignore_case=True,
                                          window_width=50,
                                          print_only=False))
        for pre, kw, post in results:
            assert kw.lower() == keyword
Beispiel #13
0
 def test_keyword_in_context_keyword(self):
     for keyword in ('clinton', 'all'):
         results = list(
             text_utils.keyword_in_context(TEXT,
                                           keyword,
                                           ignore_case=True,
                                           window_width=50,
                                           print_only=False))
         for pre, kw, post in results:
             self.assertEqual(kw.lower(), keyword)
Beispiel #14
0
def test_plaintext_functionality(text):
    preprocessed_text = preprocess_text(text, lowercase=True,
                                        no_punct=True)[:100]
    assert all(char.islower() for char in preprocessed_text if char.isalpha())
    assert all(char.isalnum() or char.isspace() for char in preprocessed_text)
    keyword = 'America'
    kwics = text_utils.keyword_in_context(text,
                                          keyword,
                                          window_width=35,
                                          print_only=False)
    for pre, kw, post in kwics:
        assert kw == keyword
        assert isinstance(pre, compat.unicode_)
        assert isinstance(post, compat.unicode_)
Beispiel #15
0
def test_plaintext_functionality(text):
    preprocessed_text = preprocessing.normalize_whitespace(text)
    preprocessed_text = preprocessing.remove_punctuation(text)
    preprocessed_text = preprocessed_text.lower()
    assert all(char.islower() for char in preprocessed_text if char.isalpha())
    assert all(char.isalnum() or char.isspace() for char in preprocessed_text)
    keyword = "America"
    kwics = text_utils.keyword_in_context(text,
                                          keyword,
                                          window_width=35,
                                          print_only=False)
    for pre, kw, post in kwics:
        assert kw == keyword
        assert isinstance(pre, compat.unicode_)
        assert isinstance(post, compat.unicode_)
Beispiel #16
0
def test_keyword_in_context(text, keyword, ignore_case, window_width,
                            has_results):
    results = list(
        text_utils.keyword_in_context(
            text,
            keyword,
            ignore_case=ignore_case,
            window_width=window_width,
            print_only=False,
        ))
    # check if any results
    if has_results:
        assert results
    else:
        assert not results
    for pre, kw, post in results:
        # check kw match by case
        if ignore_case is True:
            assert kw.lower() == keyword.lower()
        else:
            assert kw == keyword
        # check pre/post window widths
        assert len(pre) <= window_width
        assert len(post) <= window_width
Beispiel #17
0
 def test_keyword_in_context_case(self):
     observed = list(text_utils.keyword_in_context(
         TEXT, 'clinton', ignore_case=False, window_width=50, print_only=False))
     expected = []
     for o, e in zip(observed, expected):
         self.assertEqual(o, e)