コード例 #1
0
ファイル: test_scrubber.py プロジェクト: vbaira/Lexos
 def test_keep_words_normal(self):
     assert keep_words(self.test_string, "is") == " is"
     assert keep_words(self.test_string, "Test") == "Test"
     assert keep_words(self.test_string, "here") == " here"
     assert keep_words(self.test_string, "missing") == ""
     assert keep_words(self.test_string, "") == \
         keep_words(self.test_string, "missing")
     assert keep_words(self.test_string, " ") == \
         keep_words(self.test_string, "")
     assert keep_words(self.test_string, "text") == " text text"
     assert keep_words(self.test_string, "Test, here, is") == \
         "Test is here"
     assert keep_words(self.test_string, "Test,missing,text") == \
         "Test text text"
     assert keep_words(self.test_string, "Test missing text") == \
         keep_words(self.test_string, "Test,missing,text")
     assert keep_words(self.test_string, "Test\nmissing\ntext") == \
         keep_words(self.test_string, "Test,missing,text")
     assert keep_words("Word word word word gone word", "word") == \
         " word word word word"
     assert keep_words(self.test_string, self.test_string) == \
         self.test_string
     assert keep_words(self.test_string, "is, this") == \
         remove_stopwords(self.test_string, "Test, text, here")
     assert keep_words(
         "Test\u1680unicode\u205Fwhite\u2007spaces\u2001now",
         "unicode, white, now") == "\u1680unicode\u205Fwhite\u2001now"
     assert keep_words(
         "Test\nsome\t\tkeep words\n\nwhitespace\tpreservation\nwith  this"
         "\t sentence \n now", "Test, keep, whitespace, with, this, now") \
         == "Test\t\tkeep\n\nwhitespace\nwith  this\t \n now"
コード例 #2
0
ファイル: test_scrubber.py プロジェクト: WheatonCS/Lexos
 def test_keep_words_normal(self):
     assert keep_words(self.test_string, "is") == " is"
     assert keep_words(self.test_string, "Test") == "Test"
     assert keep_words(self.test_string, "here") == " here"
     assert keep_words(self.test_string, "missing") == ""
     assert keep_words(self.test_string, "") == \
         keep_words(self.test_string, "missing")
     assert keep_words(self.test_string, " ") == \
         keep_words(self.test_string, "")
     assert keep_words(self.test_string, "text") == " text text"
     assert keep_words(self.test_string, "Test, here, is") == \
         "Test is here"
     assert keep_words(self.test_string, "Test,missing,text") == \
         "Test text text"
     assert keep_words(self.test_string, "Test missing text") == \
         keep_words(self.test_string, "Test,missing,text")
     assert keep_words(self.test_string, "Test\nmissing\ntext") == \
         keep_words(self.test_string, "Test,missing,text")
     assert keep_words("Word word word word gone word", "word") == \
         " word word word word"
     assert keep_words(self.test_string, self.test_string) == \
         self.test_string
     assert keep_words(self.test_string, "is, this") == \
         remove_stopwords(self.test_string, "Test, text, here")
     assert keep_words(
         "Test\u1680unicode\u205Fwhite\u2007spaces\u2001now",
         "unicode, white, now") == "\u1680unicode\u205Fwhite\u2001now"
     assert keep_words(
         "Test\nsome\t\tkeep words\n\nwhitespace\tpreservation\nwith  this"
         "\t sentence \n now", "Test, keep, whitespace, with, this, now") \
         == "Test\t\tkeep\n\nwhitespace\nwith  this\t \n now"
コード例 #3
0
ファイル: test_scrubber.py プロジェクト: vbaira/Lexos
 def test_remove_stopwords_normal(self):
     assert remove_stopwords(self.test_string, "is") == \
         "This a 'long' story. It time for this long story to end " \
         "to-night. end."
     assert remove_stopwords(self.test_string, "This") == \
         " is a 'long' story. It is time for this long story to end " \
         "to-night. end."
     assert remove_stopwords(self.test_string, "this") == \
         "This is a 'long' story. It is time for long story to end " \
         "to-night. end."
     assert remove_stopwords(self.test_string, "This,this") == \
         " is a 'long' story. It is time for long story to end " \
         "to-night. end."
     assert remove_stopwords(self.test_string, "is,this\na, for") == \
         "This 'long' story. It time long story to end to-night. end."
     assert remove_stopwords(self.test_string, "story") == \
         "This is a 'long' story. It is time for this long to end " \
         "to-night. end."
     assert remove_stopwords(self.test_string, "long,to") == \
         "This is a 'long' story. It is time for this story end " \
         "to-night. end."
     assert remove_stopwords(
         "  Weird \t\t spacing\n\t\nhere   \tin\n\n\nthis\n \t text",
         "Weird, here, in, text") == "  \t\t spacing\n\t   \n\n\nthis\n \t"
コード例 #4
0
ファイル: test_scrubber.py プロジェクト: WheatonCS/Lexos
 def test_remove_stopwords_normal(self):
     assert remove_stopwords(self.test_string, "is") == \
         "This a 'long' story. It time for this long story to end " \
         "to-night. end."
     assert remove_stopwords(self.test_string, "This") == \
         " is a 'long' story. It is time for this long story to end " \
         "to-night. end."
     assert remove_stopwords(self.test_string, "this") == \
         "This is a 'long' story. It is time for long story to end " \
         "to-night. end."
     assert remove_stopwords(self.test_string, "This,this") == \
         " is a 'long' story. It is time for long story to end " \
         "to-night. end."
     assert remove_stopwords(self.test_string, "is,this\na, for") == \
         "This 'long' story. It time long story to end to-night. end."
     assert remove_stopwords(self.test_string, "story") == \
         "This is a 'long' story. It is time for this long to end " \
         "to-night. end."
     assert remove_stopwords(self.test_string, "long,to") == \
         "This is a 'long' story. It is time for this story end " \
         "to-night. end."
     assert remove_stopwords(
         "  Weird \t\t spacing\n\t\nhere   \tin\n\n\nthis\n \t text",
         "Weird, here, in, text") == "  \t\t spacing\n\t   \n\n\nthis\n \t"
コード例 #5
0
ファイル: test_scrubber.py プロジェクト: vbaira/Lexos
 def test_remove_stopwords_edge(self):
     assert remove_stopwords(self.test_string, "") == self.test_string
     assert remove_stopwords(self.test_string, " ") == self.test_string
     assert remove_stopwords("test\nstring", "\n") == "test\nstring"
     assert remove_stopwords("test\nstring", "\nstring") == "test"
     assert remove_stopwords("test", "test") == ""
     assert remove_stopwords("   test   ", "test") == "     "
     assert remove_stopwords("\ntest\n", "test") == "\n"
     assert remove_stopwords("Test this code", "Test,this,code") == ""
     assert remove_stopwords("Another test", "test, test, test") == \
         "Another"
     assert remove_stopwords(self.test_string, "This\nend.\nfor") == \
         " is a 'long' story. It is time this long story to end to-night."
     assert remove_stopwords(self.test_string, "This long story") == \
         remove_stopwords(self.test_string, "This,long,story")
     assert remove_stopwords(self.test_string, ".") == self.test_string
コード例 #6
0
ファイル: test_scrubber.py プロジェクト: WheatonCS/Lexos
 def test_remove_stopwords_edge(self):
     assert remove_stopwords(self.test_string, "") == self.test_string
     assert remove_stopwords(self.test_string, " ") == self.test_string
     assert remove_stopwords("test\nstring", "\n") == "test\nstring"
     assert remove_stopwords("test\nstring", "\nstring") == "test"
     assert remove_stopwords("test", "test") == ""
     assert remove_stopwords("   test   ", "test") == "     "
     assert remove_stopwords("\ntest\n", "test") == "\n"
     assert remove_stopwords("Test this code", "Test,this,code") == ""
     assert remove_stopwords("Another test", "test, test, test") == \
         "Another"
     assert remove_stopwords(self.test_string, "This\nend.\nfor") == \
         " is a 'long' story. It is time this long story to end to-night."
     assert remove_stopwords(self.test_string, "This long story") == \
         remove_stopwords(self.test_string, "This,long,story")
     assert remove_stopwords(self.test_string, ".") == self.test_string