Example #1
0
    def test_inspect_wikipedia_text(self):
        inspector = Inspector('tests/src/source_files', self._wikipedia_text_data_classes, match_pattern='*.txt')
        self.assertEquals(self._wikipedia_text_data_classes, inspector.get_data_classes())

        self.assertEquals(self._wikipedia_text_files.sort(),
                          inspector.get_match_files(WikipediaTextFileContentMatch).sort())

        # With match pattern on all files, it will work to (but less speed)
        inspector = Inspector('tests/src/source_files', self._wikipedia_text_data_classes, match_pattern='*')
        self.assertEquals(self._wikipedia_text_files.sort(),
                          inspector.get_match_files(WikipediaTextFileContentMatch).sort())
Example #2
0
    def test_inspect_britannica_html(self):
        inspector = Inspector('tests/src/source_files', self._britannica_html_data_classes, match_pattern='*.html')
        self.assertEquals(self._britannica_html_data_classes, inspector.get_data_classes())

        self.assertEquals(self._britannica_html_files.sort(),
                          inspector.get_match_files(BritannicaHTMLFileContentMatch).sort())

        # With match pattern on all files, it will work to (but less speed)
        inspector = Inspector('tests/src/source_files', self._britannica_html_data_classes, match_pattern='*')
        self.assertEquals(self._britannica_html_files.sort(),
                          inspector.get_match_files(BritannicaHTMLFileContentMatch).sort())
Example #3
0
    def test_inspect_wikipedia_text(self):
        inspector = Inspector('tests/src/source_files',
                              self._wikipedia_text_data_classes,
                              match_pattern='*.txt')
        self.assertEquals(self._wikipedia_text_data_classes,
                          inspector.get_data_classes())

        self.assertEquals(
            self._wikipedia_text_files.sort(),
            inspector.get_match_files(WikipediaTextFileContentMatch).sort())

        # With match pattern on all files, it will work to (but less speed)
        inspector = Inspector('tests/src/source_files',
                              self._wikipedia_text_data_classes,
                              match_pattern='*')
        self.assertEquals(
            self._wikipedia_text_files.sort(),
            inspector.get_match_files(WikipediaTextFileContentMatch).sort())
Example #4
0
    def test_inspect_britannica_html(self):
        inspector = Inspector('tests/src/source_files',
                              self._britannica_html_data_classes,
                              match_pattern='*.html')
        self.assertEquals(self._britannica_html_data_classes,
                          inspector.get_data_classes())

        self.assertEquals(
            self._britannica_html_files.sort(),
            inspector.get_match_files(BritannicaHTMLFileContentMatch).sort())

        # With match pattern on all files, it will work to (but less speed)
        inspector = Inspector('tests/src/source_files',
                              self._britannica_html_data_classes,
                              match_pattern='*')
        self.assertEquals(
            self._britannica_html_files.sort(),
            inspector.get_match_files(BritannicaHTMLFileContentMatch).sort())