def validate_task(url): sc = Schemato(url) try: res = sc.validate() except Exception, e: print e res = {"msg": e.message}
def test_html_parse(self): text = open("test_documents/foxnews.html", "r").read() scm = Schemato(text, url="http://foxnews.com", loglevel="ERROR") results = scm.validate() for res in results: if res.classname == "ParselyPageValidator": self.assertTrue("ttle - invalid parsely-page field" in [a.string for a in res.errors]) self.assertTrue(len(res.errors) == 1)
def test_html_parse(self): text = open("test_documents/foxnews.html", "r").read() scm = Schemato(text, url="http://foxnews.com", loglevel="ERROR") results = scm.validate() for res in results: if res.classname == "ParselyPageValidator": self.assertTrue("ttle - invalid parsely-page field" in [a.string for a in res.errors]) self.assertTrue(len(res.errors) == 1)
def test_schema_errors(self): scm = Schemato("test_documents/schema_errors.html") results = scm.validate() expected = { 'classname': 'SchemaOrgSchemaDef', 'errors': [{ 'line': '<a itemprop="copyrightNotice"', 'num': 63, 'string': 'copyrightNotice - invalid member of NewsArticle', 'level': 'Error' }, { 'line': '<meta itemprop="tickerSymbol" content="NYSE NYT"/>', 'num': 74, 'string': 'tickerSymbol - invalid member of Organization', 'level': 'Error' }, { 'line': 'itemprop="createdBy"', 'num': 47, 'string': 'createdBy - invalid member of NewsArticle', 'level': 'Error' }, { 'line': '<a itemprop="usageTerms"', 'num': 78, 'string': 'usageTerms - invalid member of NewsArticle', 'level': 'Error' }, { 'line': 'itemtype="http://schema.org/UserComment"', 'num': 111, 'string': 'UserComment - invalid class', 'level': 'Error' }, { 'line': 'itemtype="http://schema.org/UserComment"', 'num': 111, 'string': 'UserComment - invalid class', 'level': 'Error' }, { 'line': 'itemtype="http://schema.org/UserComment"', 'num': 111, 'string': 'UserComment - invalid class', 'level': 'Error' }], 'namespace': 'http://schema.org/', 'warnings': [] } for res in results: if res.classname == 'SchemaOrgSchemaDef': self.assertTrue(len(res.errors) > 0) for err in res.errors: self.assertTrue(err.string in [a['string'] for a in expected['errors']])
def test_schema_errors(self): scm = Schemato("test_documents/schema_errors.html") results = scm.validate() expected = {'classname': 'SchemaOrgSchemaDef', 'errors': [ {'line': '<a itemprop="copyrightNotice"', 'num': 63, 'string': 'copyrightNotice - invalid member of NewsArticle', 'level': 'Error'}, {'line': '<meta itemprop="tickerSymbol" content="NYSE NYT"/>', 'num': 74, 'string': 'tickerSymbol - invalid member of Organization', 'level': 'Error'}, {'line': 'itemprop="createdBy"', 'num': 47, 'string': 'createdBy - invalid member of NewsArticle', 'level': 'Error'}, {'line': '<a itemprop="usageTerms"', 'num': 78, 'string': 'usageTerms - invalid member of NewsArticle', 'level': 'Error'}, {'line': 'itemtype="http://schema.org/UserComment"', 'num': 111, 'string': 'UserComment - invalid class', 'level': 'Error'}, {'line': 'itemtype="http://schema.org/UserComment"', 'num': 111, 'string': 'UserComment - invalid class', 'level': 'Error'}, {'line': 'itemtype="http://schema.org/UserComment"', 'num': 111, 'string': 'UserComment - invalid class', 'level': 'Error'} ], 'namespace': 'http://schema.org/', 'warnings': []} for res in results: if res.classname == 'SchemaOrgSchemaDef': self.assertTrue(len(res.errors) > 0) for err in res.errors: self.assertTrue(err.string in [a['string'] for a in expected['errors']])
def test_schema_no_errors(self): scm = Schemato("test_documents/schema.html") results = scm.validate() for res in results: self.assertTrue(len(res) == 0)
def assert_no_validation_errors(self, doc): sc = Schemato(doc) res = sc.validate() assert all([len(a.warnings) == 0 and len(a.errors) == 0 for a in res])
def test_schema_no_errors(self): scm = Schemato("test_documents/schema.html") results = scm.validate() for res in results: self.assertTrue(len(res) == 0)
def assert_no_validation_errors(self, doc): sc = Schemato(doc) res = sc.validate() assert all([len(a.warnings) == 0 and len(a.errors) == 0 for a in res])