def validate(filename): """ Use W3C validator service: https://bitbucket.org/nmb10/py_w3c/ . :param filename: the filename to validate """ import HTMLParser from py_w3c.validators.html.validator import HTMLValidator h = HTMLParser.HTMLParser() # for unescaping WC3 messages vld = HTMLValidator() LOG.info("Validating: {0}".format(filename)) # call w3c webservice vld.validate_file(filename) # display errors and warning for err in vld.errors: LOG.error(u'line: {0}; col: {1}; message: {2}'. format(err['line'], err['col'], h.unescape(err['message'])) ) for err in vld.warnings: LOG.warning(u'line: {0}; col: {1}; message: {2}'. format(err['line'], err['col'], h.unescape(err['message'])) )
def validate( self, # The relative URL to validate. url, # An optional string that, if provided, must be in the text returned by the server expected_string='', # The number of validation errors expected expected_errors=0): self.get(url) assert self.status == 200 if expected_string: assert expected_string in self.text vld = HTMLValidator() vld.validate_fragment(self.text) if len(vld.errors) != expected_errors: print('Errors for {}: {}'.format(url, len(vld.errors))) pprint(vld.errors) # Save the HTML to make fixing the errors easier. with open(url.replace('/', '-') + '.html', 'w') as f: f.write(self.text.replace('\r\n', '\n')) assert False if vld.warnings: print('Warnings for {}: {}'.format(url, len(vld.warnings))) pprint(vld.warnings)
def is_socket_response_html(self): """ Validate socket response for html syntax :return: """ val = HTMLValidator() return val.validate_fragment(self.reply.decode())
def _test_url(self, url): """ Test a single URL. """ logger.debug('Fetching URL %s', url) response = self.client.get(url, follow=True) # Assert return status self.assertEquals(response.status_code, 200) # Assert non-empty content self.assertTrue(response.content) # Optionally, validate HTML content_type = response['Content-Type'].split(';') mimetype = content_type[0] if mimetype == 'text/html' and self.validate_html: logger.debug('Validating %s', url) vld = HTMLValidator() vld.validate_fragment(response.content) self.assertFalse(vld.errors, u'HTML validation error for %s' % url) logger.warning(u'HTML validation: %s', vld.warnings)
def validate(filename): """ Use W3C validator service: https://bitbucket.org/nmb10/py_w3c/ . :param filename: the filename to validate """ try: from html.parser import HTMLParser except ImportError: # fallback for Python 2: from HTMLParser import HTMLParser from py_w3c.validators.html.validator import HTMLValidator h = HTMLParser() # for unescaping WC3 messages vld = HTMLValidator() LOG.info("Validating: {0}".format(filename)) # call w3c webservice vld.validate_file(filename) # display errors and warning for err in vld.errors: line = err.get('line') or err['lastLine'] col = err.get('col') or '{}-{}'.format(err['firstColumn'], err['lastColumn']) LOG.error(u'line: {0}; col: {1}; message: {2}'.format( line, col, h.unescape(err['message']))) for err in vld.warnings: line = err.get('line') or err['lastLine'] col = err.get('col') or '{}-{}'.format(err['firstColumn'], err['lastColumn']) LOG.warning(u'line: {0}; col: {1}; message: {2}'.format( line, col, h.unescape(err['message'])))
def validate(filename): """ Use W3C validator service: https://bitbucket.org/nmb10/py_w3c/ . :param filename: the filename to validate """ # Python3 html parser is in different spot from html.parser import HTMLParser from py_w3c.validators.html.validator import HTMLValidator h = HTMLParser() # for unescaping WC3 messages vld = HTMLValidator() LOG.info("Validating: {0}".format(filename)) # call w3c webservice vld.validate_file(filename) # display errors and warning for err in vld.errors: pprint.pprint(err) if "lastLine" in err.keys(): LOG.error("line: {0}; col: {1}; message: {2}".format( err['lastLine'], err['lastColumn'], h.unescape(err['message']))) else: LOG.error("message: {0}".format(h.unescape(err['message']))) for err in vld.warnings: if "lastLine" in err.keys(): LOG.error("line: {0}; col: {1}; message: {2}".format( err['lastLine'], err['lastColumn'], h.unescape(err['message']))) else: LOG.error("message: {0}".format(h.unescape(err['message'])))
def isValidHTML(result): html_validator = HTMLValidator() html_validator.validate_fragment(result) errors = list(filter(is_really_error, html_validator.errors)) if len(errors) != 0: pprint.pprint(errors) return len(errors) == 0
def testW3pages(self): from py_w3c.validators.html.validator import HTMLValidator for eachRoute in self.testRoutes: vld = HTMLValidator() resp = self.testapp.get(eachRoute[0]) vld.validate_fragment(resp) if len(vld.errors) > 0 : print("Errors in page {}\n{}\n{}".format(eachRoute[0], "_"*60, pprint.pformat(vld.errors)))
def parse(self, response): with open("page.html", "a") as f: f.write(response.text) vld = HTMLValidator() vld.validate_file("page.html") with open("validatePage.json", "a") as f: f.write(f"{json.dumps(vld.errors, indent=4)}")
def setUp(self): self.token = genToken() self.server = InMemServer(auth_token=self.token) self.server.start() self.db = self.server.connect(self.token) self.db.subscribeToSchema(test_schema) self.cells = Cells(self.db) self.validator = HTMLValidator()
def run(self): html_validator = HTMLValidator() for file in self.files: html_validator.validate_file(os.path.join(self.project_path, file)) print(os.path.join(self.project_path, file)) if len(html_validator.errors): self.show_error_report(html_validator.errors) raise Exception( "Failed to validate HTML file: {}".format(file))
def check_template(name): rv = app.get('/template/%s' % name) HV = HTMLValidator() HV.validate_fragment(rv.data) errors = [e['message'] for e in HV.errors] warnings = [w['message'] for w in HV.warnings] eq_(len(errors), 0, msg='%s contained errors:\n%s' % (name, '\n'.join(errors))) eq_(len(warnings), 0, msg='%s contained warnings:\n%s' % (name, '\n'.join(warnings)))
def check_url(self, url): response = self.client.get(url) vld = HTMLValidator() vld.validate_fragment(response.content) if vld.errors or vld.warnings: print(response.content) if vld.errors: print("ERRORS: %s" % json.dumps(vld.errors, indent=4)) if vld.warnings: print("WARNINGS: %s" % json.dumps(vld.warnings, indent=4)) self.assertEqual(vld.errors, []) self.assertEqual(vld.warnings, [])
def validate_html(fragment_string): if type(fragment_string) is not unicode : return ('error', 'not unicode') ret = ('passed', None) try: vld = HTMLValidator() vld.validate_fragment(html_template_head+fragment_string+html_template_tail) if (vld.errors) : ret = ('error', vld.errors) elif vld.warnings : ret = ('warning', vld.warnings) except Exception as e: ret =('unchecked', e) return ret
class HTMLValidationThread(threading.Thread): def __init__(self, validationQueue, siteReporter, verboseOutput, exceptions): threading.Thread.__init__(self) self.queue = validationQueue self.siteReporter = siteReporter self.verboseOutput = verboseOutput self.exceptions = exceptions self.validatorURL = "http://validator.forion.com/check" self.htmlValidator = HTMLValidator(validator_url=self.validatorURL, charset="UTF-8") def run(self): while True: try: job = self.queue.get() if ((type(job) == str or type(job) == unicode) and job == "die"): self.queue.task_done() sys.exit(0) try: if (self.verboseOutput): print " ["+self.getName()+"] "+job['currentURL'] self.htmlValidator.validate_fragment(job['html']) self.siteReporter.addValidationMessageToValidationReport(job['currentURL'], self.htmlValidator.errors, self.htmlValidator.warnings, job['time']) self.queue.task_done() except ValidationFault, errorMsg: self.siteReporter.addValidationMessageToValidationReport(job['currentURL'], errorMsg, "", "") self.queue.task_done() except Exception: self.exceptions.append(sys.exc_info()) self.queue.task_done() sys.exit(0)
def __init__(self, validationQueue, siteReporter, verboseOutput, exceptions): threading.Thread.__init__(self) self.queue = validationQueue self.siteReporter = siteReporter self.verboseOutput = verboseOutput self.exceptions = exceptions self.validatorURL = "http://validator.forion.com/check" self.htmlValidator = HTMLValidator(validator_url=self.validatorURL, charset="UTF-8")
def test_excel(self): """test XLS, XLSX parsing""" test_files = { "Revised.Haplogroups.1000G.20140205.xlsx": { "contents": [ "Continent", "Population", "ID", "Macrohaplogroup", "Haplogroup", "Informative SNPs", "NA19239", "NA19256", "E1b1a1a1g1a2", ] }, "lclarke_phase1_sequence_stats_20120330.xls": { "contents": [ "Breakdown of data generated by project, technology, submitting centre", "92219554043", "90363687334" ] } } vld = HTMLValidator() for file, expected_data in test_files.items(): in_file = os.path.join(BASE_DIR, "excel", file) with open(in_file, mode="rb") as excel: for html in [False, True]: body, _ = extract_excel(excel, as_html=html) # print(body) tags = ['<div>', '<tbody>', '<th>', '<td>', '<tr>'] if html: vld.validate_fragment(body) assert all(t in body for t in tags) serious_errors = [ e for e in vld.errors if e["message"] not in ACCEPTABLE_ERROR_MESSAGES ] assert not serious_errors print(vld.warnings) else: assert not any(t in body for t in tags) assert all(c in body for c in expected_data["contents"])
def validate( self, # The relative URL to validate. url, # An optional string that, if provided, must be in the text returned by the server expected_string='', # The number of validation errors expected. If None, no validation is performed. expected_errors=None, # An optional dictionary of query parameters. params=None, # The expected status code from the request. expected_status=200, # All additional keyword arguments are passed to the ``post`` method. **kwargs): try: self.post(url, **kwargs) assert self.status == expected_status if expected_string: assert expected_string in self.text if expected_errors is not None: vld = HTMLValidator() vld.validate_fragment(self.text) if len(vld.errors) != expected_errors: print('Errors for {}: {}'.format(url, len(vld.errors))) pprint(vld.errors) assert False if vld.warnings: print('Warnings for {}: {}'.format(url, len(vld.warnings))) pprint(vld.warnings) except AssertionError: # Save the HTML to make fixing the errors easier. Note that ``self.text`` is already encoded as utf-8. print(self.text[:200]) with open(url.replace('/', '-') + '.html', 'wb') as f: f.write(self.text.replace('\r\n', '\n')) raise
def test_relatorio_html(self): from src.dropbox_files import download_dropbox_file download_dropbox_file() df = get_operations_dataframe() df = df.tail(150) calcula_custodia(df) calculo_ir = CalculoIr(df=df) calculo_ir.calcula() from py_w3c.validators.html.validator import HTMLValidator assert HTMLValidator().validate_fragment(relatorio_html(calculo_ir))
class LandingTestCase(APITestCase): def setUp(self): self.url = reverse('landing:main_page') self.validator = HTMLValidator() def ignore(self, error_message): """ Ignoring minor errors in W3C specifications """ if "for attribute “media” on element “link”: Expected a CSS media feature but saw" in error_message: return True else: return False def test_valid_html(self): """ Checking that we have valid html """ response = self.client.get(self.url) self.validator.validate_fragment(str(response.content, encoding='utf8')) filtered_errors = [ msg for msg in self.validator.errors if not self.ignore(msg['message']) ] self.assertFalse( filtered_errors) # Empty lists/dicts evaluate to False def test_valid_manifest(self): """ Testing that manifest is valid and accessible """ # import ipdb;ipdb.set_trace() manifest_path = finders.find('manifest.json') with open(manifest_path, 'r') as manifest: self.assertTrue(json.load(manifest))
def test_relatorio_html(self): data = [{ 'ticker': 'MAXR11', 'qtd': 100, 'data': datetime.date(2019, 3, 11), 'preco': 100, 'aquisicao_via': 'HomeBroker' }, { 'ticker': 'PETR4', 'qtd': 100, 'data': datetime.date(2019, 4, 11), 'preco': 100, 'aquisicao_via': 'HomeBroker' }, { 'ticker': 'XPLG11', 'qtd': 100, 'data': datetime.date(2019, 4, 12), 'preco': 200, 'aquisicao_via': 'HomeBroker' }, { 'ticker': 'XPLG11', 'qtd': -50, 'data': datetime.date(2019, 5, 12), 'preco': 220, 'aquisicao_via': 'HomeBroker' }] df = create_testing_dataframe(data) custodia = calcula_custodia(df) calculo_ir = CalculoIr(df=df) calculo_ir.calcula() from py_w3c.validators.html.validator import HTMLValidator assert HTMLValidator().validate_fragment( relatorio_html(custodia, calculo_ir, datetime.date.today(), False))
else: dump = sys.argv[2] # Validation Type vtype = 'HTML' # Specify a file path for storing results pt = '../target/' # Create a filename to store the results in fn = 'Results_' + vtype + '_' + time.strftime('%m%d%y_%H%M%S') + '.txt' # Create validator object vld = HTMLValidator() # Validate URL vld.validate(url) # Capture validator errors e = vld.errors # Capture validator warnings w = vld.warnings # Create a line separator sep = ''.join(['=']*80) # Output the results res = ''
class CellsHTMLTests(unittest.TestCase): @classmethod def setUpClass(cls): configureLogging(preamble="cells_html_test", level=logging.INFO) cls._logger = logging.getLogger(__name__) def setUp(self): self.token = genToken() self.server = InMemServer(auth_token=self.token) self.server.start() self.db = self.server.connect(self.token) self.db.subscribeToSchema(test_schema) self.cells = Cells(self.db) self.validator = HTMLValidator() def tearDown(self): self.server.stop() def assertHTMLValid(self, html): self.validator.validate_fragment(html) if len(self.validator.errors) > 2: error_str = 'INVALID HTML:\n\n %s\n' % html error_str += str(self.validator.errors) raise AssertionError(error_str) if '__identity__' in html: raise AssertionError( "Html shouldn't contain __identity__ - should already be replaced." ) def assertHTMLNotEmpty(self, html): if html == "": raise AssertionError("Cell does not produce any HTML!") def test_card_html_valid(self): cell = Card("Some text body") cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_card_title_html_valid(self): cell = CardTitle("Some title body") cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_modal_html_valid(self): cell = Modal("Title", "Modal Message") cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_octicon_html_valid(self): cell = Octicon("which-example") cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_badge_html_valid(self): cell = Badge("Some inner content here") cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) @unittest.skip("skipping: cell.recalculate() fails") def test_collapsible_panel_html_valid(self): cell = CollapsiblePanel("Inner panel content", "Other content", True) cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_text_html_valid(self): cell = Text("This is some text") cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) @unittest.skip("skipping: cell.recalculate() DB fails") def test_padding_html_valid(self): cell = Padding() cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_span_html_valid(self): cell = Span("Some spanned text") cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_sequence_html_valid(self): elements = [Text("Element One"), Text("Element two")] cell = Sequence(elements) cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_columns_html_valid(self): elements = [Text("Element One"), Text("Element Two")] cell = Columns(elements) cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_lg_pending_download_html_valid(self): cell = LargePendingDownloadDisplay() cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_code_html_valid(self): cell = Code("function(){console.log('hi');}") cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_contextual_display_html_valid(self): cell = ContextualDisplay(object) cell.cells = self.cells cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_subscribed_html_valid(self): child = Text("Subscribed Text") cell = Subscribed(child) # TODO: does this makes sense? cell.cells = self.cells cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_header_bar_html_valid(self): leftItems = [Text("Left One"), Text("Left Two")] centerItems = [Text("Center item")] rightItems = [ Text("Right One"), Text("Right Two"), Text("Right Three") ] cell = HeaderBar(leftItems, centerItems, rightItems) cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_main_html_valid(self): child = Text("This is a child cell") cell = Main(child) cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_tabs_html_valid(self): cell = Tabs(Tab1=Card("Tab1 Content"), Tab2=Card("Tab2 Content")) cell.recalculate() html = cell.contents # TODO: placeholder text can't be in element <ul> html = html.replace(" ____header_0__ ____header_1__", "<li>_content</li>") self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) @unittest.skip("Skipping until we refactor init") def test_table_html_valid(self): pass def test_dropdown_html_valid(self): # CURRENTLY FAILING vals = [1, 2, 3, 4] def func(x): return x + 1 cell = Dropdown("title", vals, func) cell.cells = self.cells cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_container_html_valid(self): child = Text("Child cell") cell = Container(child) cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_scrollable_html_valid(self): child = Text("Child cell") cell = Scrollable(child) cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_root_cell_html_valid(self): cell = RootCell() cell.recalculate() html = cell.contents self.assertHTMLValid(html) def test_traceback_html_valid(self): cell = Traceback("Some traceback information here") cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_subscribed_sequence_html_valid(self): cell = SubscribedSequence( lambda: Thing.lookupAll(k=0), lambda thing: Span("X: %s, K: %s" % (thing.x, thing.k))) cell.cells = self.cells cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_plot_html_valid(self): cell = Plot("Some plot data subscriptions here") cell.cells = self.cells cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_popover_html_valid(self): cell = Popover(Text("This is the content"), Text("This is the title"), Text("This is the detail")) cell.cells = self.cells cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_sheet_html_valid(self): cell = Sheet(["col1", "col2"], 10, lambda x: range(10)) cell.cells = self.cells cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_grid_html_valid(self): columns = ['One', 'Two', 'Three'] rows = ['Thing1', 'Thing2', 'Thing3'] cell = Grid(lambda: columns, lambda: rows, lambda x: x, lambda rowLabel: rowLabel, lambda x: x) cell.cells = self.cells cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_codeeditor_html_valid(self): cell = CodeEditor() cell.cells = self.cells cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_expands_html_valid(self): cell = Expands(Text("closed"), Text("open")) cell.cells = self.cells cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_loadcontentfromurl_html_valid(self): cell = LoadContentsFromUrl("url") cell._identity = "id" cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_button_html_valid(self): cell = Button(content="", f=lambda x: x) cell.cells = self.cells cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_buttongroup_html_valid(self): b1 = Button(content="", f=lambda x: x) b2 = Button(content="", f=lambda x: x) cell = ButtonGroup([b1, b2]) cell.cells = self.cells cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_clickable_html_valid(self): cell = Clickable(Text("content"), f=lambda x: x) cell.cells = self.cells cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_singlelinetextbox_html_valid(self): class MockSlot(): def __init__(self): pass def get(self): return "inputValue" cell = SingleLineTextBox(MockSlot()) cell.cells = self.cells cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_async_dropdown_initial_html_valid(self): def handler(): return Text("RESULT") cell = AsyncDropdown('Untitled', handler) cell.cells = self.cells # Initial render cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_async_dropdown_changed_html_valid(self): def handler(): return Text("RESULT") cell = AsyncDropdown('Untitled', handler) cell.cells = self.cells # Async changed render cell.recalculate() cell.slot.set(True) html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html) def test_circle_loeader_html_valid(self): cell = CircleLoader() cell.recalculate() html = cell.contents self.assertHTMLNotEmpty(html) self.assertHTMLValid(html)
def main(): build_dir = "./build/initial" # clear old build products: if os.path.exists(build_dir): shutil.rmtree(build_dir) ensure_dir(os.path.join(build_dir, "media")) media_dir = "./media/" media_subdirs = os.listdir(media_dir) for media_subdir in media_subdirs: media_filenames = os.listdir(os.path.join(media_dir, media_subdir)) for media_filename in media_filenames: source = os.path.join(media_dir, media_subdir, media_filename) destination = os.path.join(build_dir, "media", media_filename) if os.path.isdir (source): shutil.copytree(source, destination) else: shutil.copy(source, destination) shutil.copy( os.path.join(build_dir, "media/favicon.ico"), os.path.join(build_dir, "favicon.ico")) shutil.copytree( os.path.join("./media/hexy"), os.path.join(build_dir, "hexy")) page_dict = {} utils.checked_insert(page_dict, css.domain_relative_url(), css.build()) utils.checked_insert(page_dict, "/robots.txt", '''''') bars.add_home_page(page_dict) blog.add_pages(page_dict) javascript.add_files(page_dict) category_pages.add_category_pages(page_dict) game_pages.add_game_pages(page_dict) comics.add_comic_pages(page_dict) rss.add_feed(page_dict) redirects.add_redirects(page_dict) reached_pages = {} orphaned_pages = {} orphaned_pages_display = [ ("/stories/the-console-of-the-time-cops", "The Console of the Time Cops, a short story"), ("/2013-04-29-lasercake-talk-script", "The script for my 2013-04-29 Lasercake talk"), ("/some-thoughts-about-undyne-the-character-from-the-game-undertale", "Some thoughts about Undyne, the character from the game Undertale"), ("/the-morality-of-legend-of-korra", "A post about the morality of Legend of Korra"), ] def reach_page (path): if path in page_dict and path not in reached_pages: reached_pages [path] = True for destination in re.finditer ('href="(.+?)"', page_dict [path]): reach_page (destination.group (1) + ".html") reach_page ("/index.html") for (path, _) in orphaned_pages_display: reach_page (path+".html") def find_orphaned_pages (): for path,contents in page_dict.items(): if path.endswith (".html") and path not in reached_pages: reach_page (path) orphaned_pages [path [0:-5]] = True orphaned_pages_display.append ((path [0:-5], path [0:-5])) find_orphaned_pages () print ("Orphaned pages:") print (orphaned_pages) category_pages.add_secrets (page_dict, orphaned_pages_display) for path,contents in page_dict.items(): if False and path.endswith(".html"): from py_w3c.validators.html.validator import HTMLValidator vld = HTMLValidator() vld.validate_fragment(contents) print(vld.errors) print(vld.warnings) assert(path[0] == '/') if path .endswith (".301") and contents [0] == '/' and contents != "/" and "." not in contents and contents + ".html" not in page_dict: print (path + " redirects to nonexistent " + contents) buildpath = build_dir + path ensure_dir(os.path.dirname(buildpath)) with open(buildpath, "w", encoding='utf-8') as f: f.write(contents) if "--no-jshint" not in sys.argv and path.endswith (".js"): print('jshinting ' +buildpath) subprocess.run(['jshint', buildpath]) if "--accessibility" in sys.argv and path.endswith (".html"): subprocess.run(['pa11y', "--ignore", 'warning;notice', "file://" + os.path.abspath (buildpath)]) # TODO real cmdline processing if '--no-idupree-websitepy' not in sys.argv: import idupree_websitepy.build import idupree_websitepy.tests config = idupree_websitepy.build.Config( site_source_dir = build_dir, build_output_dir = './build/idupree_websitepy_output/', doindexfrom = ['/', "/harry-potter-and-the-methods-of-rationality-commentary", "/the-morality-of-legend-of-korra", "/some-thoughts-about-undyne-the-character-from-the-game-undertale","/stories/the-console-of-the-time-cops","/2013-04-29-lasercake-talk-script", ], butdontindexfrom = [], error_on_missing_resource = False, error_on_broken_internal_link = False, canonical_scheme_and_domain = utils.canonical_scheme_and_domain, list_of_compilation_source_files = ['build.py'], published_as_is = (lambda path: bool(re.search(r'\.(txt|asc|pdf|rss|atom|zip|tar\.(gz|bz2|xz)|appcache|cpp|hs|js\.mem)$|'+ r'^/favicon.ico$|^/atom\.xml$|^/media/affirmative-consent-poster\.png$|^/media/colby_comic.*\.png$|^/media/interval_optimized_1_hour.ogg$', path))), test_host = 'localhost', test_port = 84, test_host_header = 'www.elidupree.com', test_canonical_origin = utils.canonical_scheme_and_domain, test_status_codes = { '/': 200, "/blog": 200, "/comics": 200, "/stories": 200, "/games": 200, "/voldemorts-children": 200, "/voldemorts-children/archive": 200, "/voldemorts-children/5": 200, "/games/green-caves": 200, "/blog/tags/gender": 200, "/blog/page/3": 200, "/blog/page/3/chronological": 200, "/stories/not-what-i-am": 200, "/stories/not-what-i-am/discussion": 200, "/games/pac-asteroids": 200, '/blog/happy-tau-day': 200, '/hexy': 200, "/main/blog": 301, "/main/posts/1-the-epic-first-post": 301, "/EoHS": 301, '/sdhofhnkfjdsdsf': 404, "/blog/gibberish-gibberish": 404, "/404": 404, } ) idupree_websitepy.build.build(config) subprocess.check_call(['/usr/bin/sudo', '/bin/systemctl', 'reload-or-try-restart', 'nginx.service']) time.sleep(1) if '--no-idupree-websitepy-tests' not in sys.argv: # tests not working? # sudo systemctl restart validatornu.service # and wait 3 minutes idupree_websitepy.tests.test(config) if "--deploy" in sys.argv: with open(build_dir + "/deploy_ready", 'w') as deploy_file: deploy_file.write ("yes")
def validate( self, # The relative URL to validate. url, # An optional string that, if provided, must be in the text returned by the server expected_string='', # The number of validation errors expected. If None, no validation is performed. expected_errors=None, # An optional dictionary of query parameters. params=None, # The expected status code from the request. expected_status=200, # All additional keyword arguments are passed to the ``post`` method. **kwargs): try: self.post(url, **kwargs) assert self.status == expected_status if expected_string: assert expected_string in self.text if expected_errors is not None: vld = HTMLValidator() vld.validate_fragment(self.text) if len(vld.errors) != expected_errors: print('Errors for {}: {}'.format(url, len(vld.errors))) pprint(vld.errors) assert False if vld.warnings: print('Warnings for {}: {}'.format(url, len(vld.warnings))) pprint(vld.warnings) except AssertionError: # Save the HTML to make fixing the errors easier. Note that ``self.text`` is already encoded as utf-8. validation_file = url.replace('/', '-') + '.html' with open(validation_file, 'wb') as f: f.write(self.text.replace('\r\n', '\n')) print('Validation failure saved to {}.'.format(validation_file)) raise except RuntimeError as e: # Provide special handling for web2py exceptions by saving the # resulting traceback. if e.args[0].startswith('ticket '): # Create a client to access the admin interface. admin_client = WebClient('http://127.0.0.1:8000/admin/', postbacks=True) # Log in. admin_client.post( '', data={'password': self.web2py_server.password}) assert admin_client.status == 200 # Get the error. error_code = e.args[0][len('ticket '):] admin_client.get('default/ticket/' + error_code) assert admin_client.status == 200 # Save it to a file. traceback_file = url.replace('/', '-') + '_traceback.html' with open(traceback_file, 'wb') as f: f.write(admin_client.text.replace('\r\n', '\n')) print('Traceback saved to {}.'.format(traceback_file)) raise
import nose import os import re import copy from py_w3c.validators.html.validator import HTMLValidator files = [] results = {} for root, dirs, file in os.walk('.'): for name in file: if re.search('.*\.html?$',name): validator = HTMLValidator(charset="utf8") filepath = os.path.join(root,name) validator.validate_file(filepath) results[filepath] = copy.copy(validator) def validation(errortype): if errortype not in ('errors','warnings'): raise NotImplementedError(errortype + ' is not implemented at Validator Object') count = 0 for path,result in results.items(): obj = None if errortype == 'errors': obj = result.errors elif errortype == 'warnings': obj = result.warnings
with open(csv_file, 'rU') as f: reader = csv.reader(f) csv_hash = {rows[0]: rows[1] for rows in reader} for lib_name, url in csv_hash.items(): try: url = url.replace(" ", "") lib_name = lib_name.replace(" ", "_") # Create folder for your current library's data print "Creating folder for " + lib_name subdirectory = lib_name dir_path = "./" + directory + "/" + subdirectory if not os.path.exists(dir_path): os.makedirs(dir_path) # W3C Query/Response vld = HTMLValidator() vld.validate(url) w3c_warnings = convert(vld.warnings) save(w3c_warnings, 'w3c_warnings', lib_name, directory, subdirectory) w3c_errors = convert(vld.errors) save(w3c_errors, 'w3c_errors', lib_name, directory, subdirectory) w3c_warnings = solrize_w3c(w3c_warnings, "w3c_warnings") w3c_errors = solrize_w3c(w3c_errors, "w3c_errors") w3c = dict(w3c_warnings, **w3c_errors) w3c = flatten(w3c) time.sleep(1)
def validate( self, # The relative URL to validate. url, # An optional string that, if provided, must be in the text returned by the server. If this is a list of strings, at least one of the provided strings but be in the text returned by the server. expected_string='', # The number of validation errors expected. If None, no validation is performed. expected_errors=None, # The expected status code from the request. expected_status=200, # All additional keyword arguments are passed to the ``post`` method. **kwargs): try: try: self.post(url, **kwargs) except HTTPError as e: # If this was the expected result, return. if e.code == expected_status: # Since this is an error of some type, these paramets must be empty, since they can't be checked. assert not expected_string assert not expected_errors return '' else: raise assert self.status == expected_status if expected_string: if isinstance(expected_string, str): assert expected_string in self.text else: # Assume ``expected_string`` is a list of strings. assert all(string in self.text for string in expected_string) if expected_errors is not None: vld = HTMLValidator() vld.validate_fragment(self.text) if len(vld.errors) != expected_errors: print('Errors for {}: {}'.format(url, len(vld.errors))) pprint(vld.errors) assert False if vld.warnings: print('Warnings for {}: {}'.format(url, len(vld.warnings))) pprint(vld.warnings) return self.text except AssertionError: # Save the HTML to make fixing the errors easier. Note that ``self.text`` is already encoded as utf-8. validation_file = url.replace('/', '-') + '.html' with open(validation_file, 'wb') as f: f.write(_html_prep(self.text)) print('Validation failure saved to {}.'.format(validation_file)) raise except RuntimeError as e: # Provide special handling for web2py exceptions by saving the # resulting traceback. if e.args[0].startswith('ticket '): # Create a client to access the admin interface. admin_client = WebClient('{}/admin/'.format( self.web2py_server_address), postbacks=True) # Log in. admin_client.post( '', data={'password': self.web2py_server.password}) assert admin_client.status == 200 # Get the error. error_code = e.args[0][len('ticket '):] admin_client.get('default/ticket/' + error_code) assert admin_client.status == 200 # Save it to a file. traceback_file = url.replace('/', '-') + '_traceback.html' with open(traceback_file, 'wb') as f: f.write(_html_prep(admin_client.text)) print('Traceback saved to {}.'.format(traceback_file)) raise
def setUp(self): self.url = reverse('landing:main_page') self.validator = HTMLValidator()
def validate(self, # The relative URL to validate. url, # An optional string that, if provided, must be in the text returned by the server. If this is a list of strings, at least one of the provided strings but be in the text returned by the server. expected_string='', # The number of validation errors expected. If None, no validation is performed. expected_errors=None, # The expected status code from the request. expected_status=200, # All additional keyword arguments are passed to the ``post`` method. **kwargs): try: try: self.post(url, **kwargs) except HTTPError as e: # If this was the expected result, return. if e.code == expected_status: # Since this is an error of some type, these paramets must be empty, since they can't be checked. assert not expected_string assert not expected_errors return '' else: raise assert self.status == expected_status if expected_string: if isinstance(expected_string, str): assert expected_string in self.text else: # Assume ``expected_string`` is a list of strings. assert all(string in self.text for string in expected_string) if expected_errors is not None: vld = HTMLValidator() vld.validate_fragment(self.text) if len(vld.errors) != expected_errors: print('Errors for {}: {}'.format(url, len(vld.errors))) pprint(vld.errors) assert False if vld.warnings: print('Warnings for {}: {}'.format(url, len(vld.warnings))) pprint(vld.warnings) return self.text if six.PY3 else self.text.decode('utf-8') except AssertionError: # Save the HTML to make fixing the errors easier. Note that ``self.text`` is already encoded as utf-8. validation_file = url.replace('/', '-') + '.html' with open(validation_file, 'wb') as f: f.write(_html_prep(self.text)) print('Validation failure saved to {}.'.format(validation_file)) raise except RuntimeError as e: # Provide special handling for web2py exceptions by saving the # resulting traceback. if e.args[0].startswith('ticket '): # Create a client to access the admin interface. admin_client = WebClient('http://127.0.0.1:8000/admin/', postbacks=True) # Log in. admin_client.post('', data={'password': self.web2py_server.password}) assert admin_client.status == 200 # Get the error. error_code = e.args[0][len('ticket '):] admin_client.get('default/ticket/' + error_code) assert admin_client.status == 200 # Save it to a file. traceback_file = url.replace('/', '-') + '_traceback.html' with open(traceback_file, 'wb') as f: f.write(_html_prep(admin_client.text)) print('Traceback saved to {}.'.format(traceback_file)) raise
def validate(self): vld = HTMLValidator() vld.validate(self.url) self.result.w3c = vld.errors
'/home/user/workspace/Problem Statement Two/StatementTwo/files/25 errors - a1 (1).html', 'w') txt.write(text.get(0.0, Tkinter.END)) txt.close() def key(event): if event.char == '\x1b': root.destroy() else: val = text.index(Tkinter.INSERT).split('.') lineNum.config(text=str(val[0])) colNum.config(text=str(val[1])) vld = HTMLValidator(doctype="HTML 4.01 Strict") vld.validate_file( '/home/user/workspace/Problem Statement Two/StatementTwo/files/25 errors - a1 (1).html' ) errors = vld.errors locationList = [] for error in range(len(errors)): locationList.append(errors[error]['line']) print locationList txt = open( '/home/user/workspace/Problem Statement Two/StatementTwo/files/25 errors - a1 (1).html', 'r') lineNum = 1 errorIndex = 0 for line in txt:
import sys import time from py_w3c.validators.html.validator import HTMLValidator with open('CNAME', 'r') as f: cname = f.readline().splitlines()[0] vld = HTMLValidator() vld.validate("http://%s/" % cname) if len(vld.errors) > 0: for error in vld.errors: print "#%s: %s" % (error['line'], error['message']) sys.exit(1) print '' print 'W3C validation passed'
def test_deve_retornar_relatorio_html_com_todos_os_meses(self): data = [{ 'ticker': 'MAXR11', 'qtd': 100, 'data': datetime.date(2019, 3, 11), 'preco': 100, 'aquisicao_via': 'HomeBroker' }, { 'ticker': 'PETR4', 'qtd': 100, 'data': datetime.date(2019, 4, 11), 'preco': 100, 'aquisicao_via': 'HomeBroker' }, { 'ticker': 'XPLG11', 'qtd': 100, 'data': datetime.date(2019, 4, 12), 'preco': 200, 'aquisicao_via': 'HomeBroker' }, { 'ticker': 'XPLG11', 'qtd': -50, 'data': datetime.date(2019, 5, 12), 'preco': 220, 'aquisicao_via': 'HomeBroker' }, { 'ticker': 'MAXR11', 'qtd': 100, 'data': datetime.date(2019, 6, 11), 'preco': 100, 'aquisicao_via': 'HomeBroker' }, { 'ticker': 'PETR4', 'qtd': 100, 'data': datetime.date(2019, 10, 11), 'preco': 100, 'aquisicao_via': 'HomeBroker' }, { 'ticker': 'XPLG11', 'qtd': 100, 'data': datetime.date(2019, 10, 12), 'preco': 200, 'aquisicao_via': 'HomeBroker' }, { 'ticker': 'XPLG11', 'qtd': -50, 'data': datetime.date(2019, 11, 12), 'preco': 220, 'aquisicao_via': 'HomeBroker' }] df = create_testing_dataframe(data) calcula_custodia(df) calculo_ir = CalculoIr(df=df) calculo_ir.calcula() from py_w3c.validators.html.validator import HTMLValidator html = relatorio_html(calculo_ir) assert HTMLValidator().validate_fragment(html) assert 'MES : 05/2019' in html assert 'MES : 11/2019' in html