def test_html2(): """Test all document errors, as long as the document is valid.""" from sourcefile import SourceFile myfile = SourceFile() assert myfile myfile.load_xhtml("data/testfiles/miscerrors.html") assert myfile.tree x = KXhtml() x.check_css(myfile) x.check_title(myfile) x.check_document(myfile) x.epub_toc(myfile) x.check_anchors(myfile) x.check_unicode(myfile) # CSS assert x.cssutils_errors == [] assert x.sel_unchecked == [] assert len(x.sel_unused) == 2 assert '.large' in x.sel_unused assert '.pagenum' in x.sel_unused assert x.classes_undefined == [[17, 'asdfgh']] # Title assert x.good_format == False assert x.title == 'no title — no author' assert x.author == None # Encoding assert myfile.encoding == 'utf-8' assert x.meta_encoding == myfile.encoding assert len(x.encoding_errors) == 0 # TOC - not tested here assert len(x.toc) == 2 # Languages assert x.document_lang == "fr" assert x.document_xmllang == "en" # h1 assert x.num_h1 == 2 # sup stars assert len(x.stars_in_sup) == 2 # Inline style assert len(x.inline_style) == 2 assert x.inline_style[0][1] == 'div' assert x.inline_style[0][2] == 'text-indent:2em' assert x.inline_style[1][1] == 'span' assert x.inline_style[1][2] == 'margin-left: 1em;' # Something after <sup> tag assert len(x.text_after_sup) == 2 assert x.text_after_sup == [37, 38] # Empty lines at the end assert myfile.ending_empty_lines == 5
def test_html3(): """Test document with no error.""" from sourcefile import SourceFile myfile = SourceFile() assert myfile myfile.load_xhtml("data/testfiles/noerror.html") assert myfile.tree x = KXhtml() x.check_css(myfile) x.check_title(myfile) x.check_document(myfile) x.epub_toc(myfile) x.check_anchors(myfile) x.check_unicode(myfile) # CSS assert x.cssutils_errors == [] assert x.sel_unchecked == [] assert len(x.sel_unused) == 0 assert len(x.classes_undefined) == 0 # Title assert x.good_format == True assert x.title == 'Voyage à Cayenne, Vol. 1' assert x.author == 'L. A. Pitou' # Encoding assert myfile.encoding == 'utf-8' assert x.meta_encoding == myfile.encoding assert len(x.encoding_errors) == 0 # TOC assert len(x.toc) == 10 assert x.toc[0][0] == 0 assert x.toc[0][1] == 'one header' assert x.toc[3][0] == 3 assert x.toc[3][1] == 'lvl 4-1' assert x.toc[8][0] == 2 assert x.toc[8][1] == 'other' assert x.toc[9][0] == 3 assert x.toc[9][1] == 'lvl 4-3 on 2 lines' # Languages assert x.document_lang == "fr" assert x.document_xmllang == "fr" # h1 assert x.num_h1 == 1 # sup stars assert len(x.stars_in_sup) == 0 # Inline style assert len(x.inline_style) == 0 # Something after <sup> tag assert len(x.text_after_sup) == 0 assert myfile.ending_empty_lines == 1
def test_html3(): """Test document with no error.""" from sourcefile import SourceFile myfile = SourceFile() assert myfile myfile.load_xhtml("data/testfiles/noerror.html") assert myfile.tree x = KXhtml() x.check_css(myfile) x.check_title(myfile) x.check_document(myfile) x.epub_toc(myfile) x.check_anchors(myfile) x.check_unicode(myfile) # CSS assert x.cssutils_errors == [] assert x.sel_unchecked == [] assert len(x.sel_unused) == 0 assert len(x.classes_undefined) == 0 # Title assert x.good_format == True assert x.title == 'Voyage à Cayenne, Vol. 1' assert x.author == 'L. A. Pitou' # Encoding assert myfile.encoding == 'utf-8' assert x.meta_encoding == myfile.encoding assert len(x.encoding_errors) == 0 # TOC assert len(x.toc) == 10 assert x.toc[0][0] == 0 assert x.toc[0][1] == 'one header' assert x.toc[3][0] == 3 assert x.toc[3][1] == 'lvl 4-1' assert x.toc[8][0] == 2 assert x.toc[8][1] == 'other' assert x.toc[9][0] == 3 assert x.toc[9][1] == 'lvl 4-3 on 2 lines' # Languages assert x.document_lang == "fr" assert x.document_xmllang == "fr" # h1 assert x.num_h1 == 1 # sup stars assert len(x.stars_in_sup) == 0 # Inline style assert len(x.inline_style) == 0 # Something after <sup> tag assert len(x.text_after_sup) == 0 assert myfile.ending_empty_lines == 1
def test_html2(): """Test all document errors, as long as the document is valid.""" from sourcefile import SourceFile myfile = SourceFile() assert myfile myfile.load_xhtml("data/testfiles/miscerrors.html") assert myfile.tree x = KXhtml() x.check_css(myfile) x.check_title(myfile) x.check_document(myfile) x.epub_toc(myfile) x.check_anchors(myfile) x.check_unicode(myfile) # CSS assert x.cssutils_errors == [] assert x.sel_unchecked == [] assert len(x.sel_unused) == 2 assert '.large' in x.sel_unused assert '.pagenum' in x.sel_unused assert x.classes_undefined == [[17, 'asdfgh']] # Title assert x.good_format == False assert x.title == 'no title — no author' assert x.author == None # Encoding assert myfile.encoding == 'utf-8' assert x.meta_encoding == myfile.encoding assert len(x.encoding_errors) == 0 # TOC - not tested here assert len(x.toc) == 2 # Languages assert x.document_lang == "fr" assert x.document_xmllang == "en" # h1 assert x.num_h1 == 2 # sup stars assert len(x.stars_in_sup) == 2 # Inline style assert len(x.inline_style) == 2 assert x.inline_style[0][1] == 'div' assert x.inline_style[0][2] == 'text-indent:2em' assert x.inline_style[1][1] == 'span' assert x.inline_style[1][2] == 'margin-left: 1em;' # Something after <sup> tag assert len(x.text_after_sup) == 2 assert x.text_after_sup == [37, 38] # Empty lines at the end assert myfile.ending_empty_lines == 5
def test_html2(): from sourcefile import SourceFile myfile = SourceFile() myfile.load_xhtml("data/testfiles/nocharset.html") assert myfile.tree x = KXhtml() x.check_document(myfile) assert x.meta_encoding == None assert myfile.encoding == 'utf-8' assert len(x.encoding_errors) == 0
def test_html2(): from sourcefile import SourceFile myfile = SourceFile() myfile.load_xhtml("data/testfiles/nocharset.html") assert myfile.tree x = KXhtml() x.check_document(myfile) assert x.meta_encoding == None assert myfile.encoding == 'utf-8' assert len(x.encoding_errors) == 0
def test_html1(): from sourcefile import SourceFile myfile = SourceFile() myfile.load_xhtml("data/testfiles/badcharset.html") assert myfile.tree x = KXhtml() x.check_document(myfile) assert x.meta_encoding == 'iso-8859-1' assert myfile.encoding == 'utf-8' assert len(x.encoding_errors) == 1 assert myfile.ending_empty_lines == 1
def test_html1(): from sourcefile import SourceFile myfile = SourceFile() myfile.load_xhtml("data/testfiles/badcharset.html") assert myfile.tree x = KXhtml() x.check_document(myfile) assert x.meta_encoding == 'iso-8859-1' assert myfile.encoding == 'utf-8' assert len(x.encoding_errors) == 1 assert myfile.ending_empty_lines == 1
def main(url): files = [] for root, directories, filenames in os.walk(url): for filename in filenames: file = SourceFile(os.path.join(root, filename)) files.append(file) try: print("Parsing " + file.fullpath) file.parse() if len(file.profanewords) > 0: for index, word in enumerate(file.profanewords): print("Line " + str(file.profanelines[index] + 1) + ": " + word) print("Found " + str(len(file.profanewords)) + " words for a score of " + str(file.profanityscore)) print() except Exception as ex: print("Failed to parse file: ", ex) # Calculate and display statistics mostprofanefile = max(files, key=lambda curfile: len(curfile.profanewords)) from collections import Counter mostprofanewords = [] for file in files: word = file.favoriteprofaneword() if word is not None: mostprofanewords.append(word) if len(mostprofanewords) > 0: profanewords = Counter(mostprofanewords) mostcommonprofaneword = [ elem[0] for elem in profanewords.most_common(1) ][0] else: mostcommonprofaneword = "N/A" print() print("Total files scanned: " + str(len(files))) print("Words found: " + str(sum(file.profanewordcount[1] for file in files)) + " Mild, " + str(sum(file.profanewordcount[2] for file in files)) + " Medium, " + str(sum(file.profanewordcount[3] for file in files)) + " Strong, " + str(sum(file.profanewordcount[4] for file in files)) + " Very Strong") totalprofanityscore = sum(file.profanityscore for file in files) if totalprofanityscore > 0: print("Most profane file: " + str(mostprofanefile.fullpath) + " with " + str(len(mostprofanefile.profanewords)) + " words for a score of " + str(mostprofanefile.profanityscore)) print("Most common word: " + mostcommonprofaneword) print("Total score: " + str(totalprofanityscore))
def test_encoding1(): """No encoding.""" from sourcefile import SourceFile myfile = SourceFile() assert myfile myfile.load_xhtml("data/testfiles/noencoding.html") assert myfile.tree x = KXhtml() x.check_document(myfile) # Encoding assert myfile.encoding == 'utf-8' assert x.meta_encoding == None
def prepare_bc(self): bc_dir = self.get_bc_dir_of_project() bitcode_files = get_files_in_dir( bc_dir, ext='.bc', search_spaces=self.arguments.search_spaces) if len(bitcode_files) == 0: print 'There is no bitcode in:', bc_dir exit(0) print 'Total number of c files : {}'.format(len(bitcode_files)) for bitcode_file in bitcode_files: sf = SourceFile('', arguments=self.arguments) sf.emit_llvm_ll_and_functions(bitcode_file)
def test_encoding1(): """No encoding.""" from sourcefile import SourceFile myfile = SourceFile() assert myfile myfile.load_xhtml("data/testfiles/noencoding.html") assert myfile.tree x = KXhtml() x.check_document(myfile) # Encoding assert myfile.encoding == 'utf-8' assert x.meta_encoding == None
def update(self, tests_root, url_base, new_rev, committed_changes=None, local_changes=None, remove_missing_local=False): if local_changes is None: local_changes = {} if committed_changes is not None: for rel_path, status in committed_changes: self.remove_path(rel_path) if status == "modified": use_committed = rel_path in local_changes source_file = SourceFile(tests_root, rel_path, url_base, use_committed=use_committed) self.extend(source_file.manifest_items()) self.local_changes = LocalChanges(self) local_paths = set() for rel_path, status in local_changes.iteritems(): local_paths.add(rel_path) if status == "modified": existing_items = self._committed_with_path(rel_path) source_file = SourceFile(tests_root, rel_path, url_base, use_committed=False) local_items = set(source_file.manifest_items()) updated_items = local_items - existing_items self.local_changes.extend(updated_items) else: self.local_changes.add_deleted(rel_path) if remove_missing_local: for path in self._committed_paths() - local_paths: self.local_changes.add_deleted(path) self.update_reftests() if new_rev is not None: self.rev = new_rev self.url_base = url_base
def test_encoding2(): """validly declared us-ascii encoding, read as utf-8.""" from sourcefile import SourceFile myfile = SourceFile() assert myfile myfile.load_xhtml("data/testfiles/asciiencoding.html") assert myfile.tree x = KXhtml() x.check_document(myfile) # Encoding assert myfile.encoding == 'utf-8' assert x.meta_encoding == 'us-ascii' assert len(x.encoding_errors) == 0
def test_encoding4(): """invalid encoding.""" from sourcefile import SourceFile myfile = SourceFile() assert myfile myfile.load_xhtml("data/testfiles/inv-encoding.html") assert myfile.tree x = KXhtml() x.check_document(myfile) # Encoding assert myfile.encoding == 'utf-8' assert x.meta_encoding == 'ascii' assert len(x.encoding_errors) == 2 # invalid + different encodings
def test_encoding3(): """declared ascii but contains unicode.""" from sourcefile import SourceFile myfile = SourceFile() assert myfile myfile.load_xhtml("data/testfiles/notasciiencoding.html") assert myfile.tree x = KXhtml() x.check_document(myfile) # Encoding assert myfile.encoding == 'utf-8' assert x.meta_encoding == 'us-ascii' assert len(x.encoding_errors) == 1
def test_encoding2(): """validly declared us-ascii encoding, read as utf-8.""" from sourcefile import SourceFile myfile = SourceFile() assert myfile myfile.load_xhtml("data/testfiles/asciiencoding.html") assert myfile.tree x = KXhtml() x.check_document(myfile) # Encoding assert myfile.encoding == 'utf-8' assert x.meta_encoding == 'us-ascii' assert len(x.encoding_errors) == 0
def test_encoding3(): """declared ascii but contains unicode.""" from sourcefile import SourceFile myfile = SourceFile() assert myfile myfile.load_xhtml("data/testfiles/notasciiencoding.html") assert myfile.tree x = KXhtml() x.check_document(myfile) # Encoding assert myfile.encoding == 'utf-8' assert x.meta_encoding == 'us-ascii' assert len(x.encoding_errors) == 1
def test_encoding4(): """invalid encoding.""" from sourcefile import SourceFile myfile = SourceFile() assert myfile myfile.load_xhtml("data/testfiles/inv-encoding.html") assert myfile.tree x = KXhtml() x.check_document(myfile) # Encoding assert myfile.encoding == 'utf-8' assert x.meta_encoding == 'ascii' assert len(x.encoding_errors) == 2 # invalid + different encodings
def process_sample(source_file, args, project_dir, compile_arguments, type=''): if type == 'ast' or type == 'bc': source_file_obj = SourceFile(source_file=source_file, arguments=args, project_dir=project_dir, compile_arguments=compile_arguments, analysis_type=type) try: source_file_obj.analyze() except: pass # print 'CRASH', type, source_file if type == 'pdg' or type == 'as': file_handler = BitCodeFile(file_info=source_file, arguments=args, analysis_type=type) success = file_handler.analyze() return int(success) return 1
def update(self, tests_root, url_base, new_rev, committed_changes=None, local_changes=None, remove_missing_local=False): if local_changes is None: local_changes = {} if committed_changes is not None: for rel_path, status in committed_changes: self.remove_path(rel_path) if status == "modified": use_committed = rel_path in local_changes source_file = SourceFile(tests_root, rel_path, url_base, use_committed=use_committed) self.extend(source_file.manifest_items()) self.local_changes = LocalChanges(self) local_paths = set() for rel_path, status in local_changes.iteritems(): local_paths.add(rel_path) if status == "modified": existing_items = self._committed_with_path(rel_path) source_file = SourceFile(tests_root, rel_path, url_base, use_committed=False) local_items = set(source_file.manifest_items()) updated_items = local_items - existing_items self.local_changes.extend(updated_items) else: self.local_changes.add_deleted(rel_path) if remove_missing_local: for path in self._committed_paths() - local_paths: self.local_changes.add_deleted(path) self.update_reftests() if new_rev is not None: self.rev = new_rev self.url_base = url_base
def check_parsed(path, f): source_file = SourceFile(repo_root, path, "/") errors = [] if source_file.root is None: return [("PARSE-FAILED", "Unable to parse file %s" % path, None)] if source_file.testharness_nodes: if len(source_file.testharness_nodes) > 1: errors.append(("MULTIPLE-TESTHARNESS", "%s more than one <script src='/resources/testharness.js'>" % path, None)) if not source_file.testharnessreport_nodes: errors.append(("MISSING-TESTHARNESSREPORT", "%s missing <script src='/resources/testharnessreport.js'>" % path, None)) if source_file.testharnessreport_nodes: if len(source_file.testharnessreport_nodes) > 1: errors.append(("MULTIPLE-TESTHARNESSREPORT", "%s more than one <script src='/resources/testharnessreport.js'>" % path, None)) if not source_file.testharness_nodes: errors.append(("MISSING-TESTHARNESS", "%s missing <script src='/resources/TESTHARNESS.js'>" % path, None)) return errors
def make_new(): from sourcefile import SourceFile return SourceFile(tests_root, path, manifest.url_base)