예제 #1
0
파일: test_web.py 프로젝트: krisk24/pattern
 def test_collapse_tabs(self):
     # Assert collapse multiple tabs to 1 space.
     for a, b in (("\t\t\t", ""), ("\t..\t", ".."), (".\t\t.", ". ."), (".\t\n", ".")):
         self.assertEqual(web.collapse_tabs(a), b)
     # Assert preserve indendation.
     self.assertEqual(web.collapse_tabs("\t\t .\t\n", indentation=True), "\t\t .")
     print "pattern.web.collapse_tabs()"
예제 #2
0
 def test_collapse_tabs(self):
     # Assert collapse multiple tabs to 1 space.
     for a, b in (("\t\t\t", ""), ("\t..\t", ".."), (".\t\t.", ". ."),
                  (".\t\n", ".")):
         self.assertEqual(web.collapse_tabs(a), b)
     # Assert preserve indendation.
     self.assertEqual(web.collapse_tabs("\t\t .\t\n", indentation=True),
                      "\t\t .")
     print "pattern.web.collapse_tabs()"
def cleanup(text, remove_punctuation=True):
    if remove_punctuation:
        text = re.sub('[^A-Za-z0-9\s\n]+', '', text)
    text = collapse_linebreaks(text, threshold=1).replace('\n', ' ')
    text = collapse_tabs(text, indentation=False, replace=' ')
    text = collapse_spaces(text, indentation=False, replace=' ')
    return text.strip()