def test_can_get_default_violations_values(self): config = Config() config.REQUIRED_META_TAGS = ['description'] page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[] ) validator = RequiredMetaTagsValidator(reviewer) violations_values = validator.get_default_violations_values(config) expect(violations_values).to_include('absent.meta.tags') expect(violations_values['absent.meta.tags']).to_length(2) expect(violations_values['absent.meta.tags']).to_be_like({ 'value': config.REQUIRED_META_TAGS, 'description': config.get_description('REQUIRED_META_TAGS') })
def test_update_pages_score(self): config = Config() config.MAX_PAGE_SCORE = 15000000 self.db.query(Page).delete() self.sync_cache.redis.delete('pages-score') page1 = PageFactory.create(score=3) page2 = PageFactory.create(score=0) for i in range(3): self.sync_cache.increment_page_score(page1.id) self.sync_cache.increment_page_score(page2.id) expect(page1.score).to_equal(3) expect(page2.score).to_equal(0) Page.update_pages_score(self.db, self.sync_cache, config) self.db.flush() self.db.refresh(page1) self.db.refresh(page2) expect(page1.score).to_equal(6) expect(page2.score).to_equal(1)
def test_can_get_default_violations_values(self): config = Config() config.MAX_TITLE_SIZE = 70 page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[] ) validator = TitleValidator(reviewer) violations_values = validator.get_default_violations_values(config) expect(violations_values).to_include('page.title.size') expect(violations_values['page.title.size']).to_length(2) expect(violations_values['page.title.size']).to_be_like({ 'value': config.MAX_TITLE_SIZE, 'description': config.get_description('MAX_TITLE_SIZE') })
def test_can_get_default_violations_values(self): config = Config() config.MAX_HEADING_HIEARARCHY_SIZE = 150 page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[] ) validator = HeadingHierarchyValidator(reviewer) violations_values = validator.get_default_violations_values(config) expect(violations_values).to_include('page.heading_hierarchy.size') expect(violations_values['page.heading_hierarchy.size']).to_length(2) expect(violations_values['page.heading_hierarchy.size']).to_equal({ 'value': config.MAX_HEADING_HIEARARCHY_SIZE, 'description': config.get_description('MAX_HEADING_HIEARARCHY_SIZE') })
def test_query_string_without_params(self): config = Config() config.FORCE_CANONICAL = False page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[]) content = '<html><head></head></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) validator = LinkWithRelCanonicalValidator(reviewer) validator.add_violation = Mock() validator.review.data = {'page.head': [{}]} validator.validate() expect(validator.add_violation.called).to_be_false()
def test_can_get_default_violations_values(self): config = Config() config.FORCE_CANONICAL = False page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[]) validator = LinkWithRelCanonicalValidator(reviewer) violations_values = validator.get_default_violations_values(config) expect(violations_values).to_include('absent.meta.canonical') expect(violations_values['absent.meta.canonical']).to_length(2) expect(violations_values['absent.meta.canonical']).to_be_like({ 'value': config.FORCE_CANONICAL, 'description': config.get_description('FORCE_CANONICAL') })
def test_can_get_default_violations_values(self): config = Config() config.REQUIRED_META_TAGS = ['description'] page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[]) validator = RequiredMetaTagsValidator(reviewer) violations_values = validator.get_default_violations_values(config) expect(violations_values).to_include('absent.meta.tags') expect(violations_values['absent.meta.tags']).to_length(2) expect(violations_values['absent.meta.tags']).to_be_like({ 'value': config.REQUIRED_META_TAGS, 'description': config.get_description('REQUIRED_META_TAGS') })
def test_can_get_default_violations_values(self): config = Config() config.SCHEMA_ORG_ITEMTYPE = [ 'http://schema.org/WebPage', 'http://schema.org/AboutPage', ] page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[] ) validator = SchemaOrgItemTypeValidator(reviewer) violations_values = validator.get_default_violations_values(config) expect(violations_values).to_include('invalid.schema.itemtype') expect(violations_values['invalid.schema.itemtype']).to_length(2) expect(violations_values['invalid.schema.itemtype']).to_equal({ 'value': config.SCHEMA_ORG_ITEMTYPE, 'description': config.get_description('SCHEMA_ORG_ITEMTYPE') })
def test_can_get_default_violations_values(self): config = Config() config.MAX_HEADING_HIEARARCHY_SIZE = 150 page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[]) validator = HeadingHierarchyValidator(reviewer) violations_values = validator.get_default_violations_values(config) expect(violations_values).to_include('page.heading_hierarchy.size') expect(violations_values['page.heading_hierarchy.size']).to_length(2) expect(violations_values['page.heading_hierarchy.size']).to_equal({ 'value': config.MAX_HEADING_HIEARARCHY_SIZE, 'description': config.get_description('MAX_HEADING_HIEARARCHY_SIZE') })
def test_authenticate_user_not_valid_for_this_app(self): self.db.query(User).delete() UserFactory(email='*****@*****.**') mock_response = Mock( code=200, body='{"issued_to": "222", "email": "*****@*****.**"}' ) def handle_request(url, handler, proxy_host, proxy_port): handler(mock_response) fetch_mock = Mock() fetch_mock.side_effect = handle_request config = Config() config.GOOGLE_CLIENT_ID = '000' access_token = '111' User.authenticate( access_token, fetch_mock, self.db, config, callback=self.stop ) response = self.wait() expect(response.get('status')).to_equal(401) expect(response.get('reason')).to_equal( "Token's client ID does not match app's." )
def test_can_get_default_violations_values(self): config = Config() config.BLACKLIST_DOMAIN = ['a.com'] page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[]) validator = BlackListValidator(reviewer) violations_values = validator.get_default_violations_values(config) expect(violations_values).to_include('blacklist.domains') expect(violations_values['blacklist.domains']).to_length(2) expect(violations_values['blacklist.domains']).to_be_like({ 'value': config.BLACKLIST_DOMAIN, 'description': config.get_description('BLACKLIST_DOMAIN') })
def test_can_get_default_violations_values(self): config = Config() config.BLACKLIST_DOMAIN = ['a.com'] page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[] ) validator = BlackListValidator(reviewer) violations_values = validator.get_default_violations_values(config) expect(violations_values).to_include('blacklist.domains') expect(violations_values['blacklist.domains']).to_length(2) expect(violations_values['blacklist.domains']).to_be_like({ 'value': config.BLACKLIST_DOMAIN, 'description': config.get_description('BLACKLIST_DOMAIN') })
def test_page_without_head_tag(self): config = Config() config.FORCE_CANONICAL = False page = PageFactory.create(url='http://globo.com/1?item=test') reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[] ) content = '<html></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) validator = LinkWithRelCanonicalValidator(reviewer) validator.add_violation = Mock() validator.review.data = {'page.head': None} validator.validate() expect(validator.add_violation.called).to_be_false()
def test_can_get_default_violations_values(self): config = Config() config.MAX_IMAGE_ALT_SIZE = 70 page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[] ) validator = ImageAltValidator(reviewer) violations_values = validator.get_default_violations_values(config) expect(violations_values).to_include('invalid.images.alt_too_big') expect(violations_values['invalid.images.alt_too_big']).to_length(2) expect(violations_values['invalid.images.alt_too_big']).to_be_like({ 'value': config.MAX_IMAGE_ALT_SIZE, 'description': config.get_description('MAX_IMAGE_ALT_SIZE') })
def test_authenticate_unauthorized_user(self): self.db.query(User).delete() mock_response = Mock( code=200, body='{"issued_to": "000", "email": "*****@*****.**"}' ) def handle_request(url, handler, proxy_host, proxy_port): handler(mock_response) fetch_mock = Mock() fetch_mock.side_effect = handle_request config = Config() config.GOOGLE_CLIENT_ID = '000' access_token = '111' User.authenticate( access_token, fetch_mock, self.db, config, callback=self.stop ) response = self.wait() expect(response.get('status')).to_equal(403) expect(response.get('reason')).to_equal('Unauthorized user')
def test_can_get_default_violations_values(self): config = Config() config.METATAG_DESCRIPTION_MAX_SIZE = 300 page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[] ) validator = MetaTagsValidator(reviewer) violations_values = validator.get_default_violations_values(config) expect(violations_values).to_include('page.metatags.description_too_big') expect(violations_values['page.metatags.description_too_big']).to_length(2) expect(violations_values['page.metatags.description_too_big']).to_be_like({ 'value': config.METATAG_DESCRIPTION_MAX_SIZE, 'description': config.get_description('METATAG_DESCRIPTION_MAX_SIZE') })
def test_can_get_default_violations_values(self): config = Config() config.MAX_TITLE_SIZE = 70 page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[]) validator = TitleValidator(reviewer) violations_values = validator.get_default_violations_values(config) expect(violations_values).to_include('page.title.size') expect(violations_values['page.title.size']).to_length(2) expect(violations_values['page.title.size']).to_be_like({ 'value': config.MAX_TITLE_SIZE, 'description': config.get_description('MAX_TITLE_SIZE') })
def test_can_get_default_violations_values(self): config = Config() config.SCHEMA_ORG_ITEMTYPE = [ 'http://schema.org/WebPage', 'http://schema.org/AboutPage', ] page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[]) validator = SchemaOrgItemTypeValidator(reviewer) violations_values = validator.get_default_violations_values(config) expect(violations_values).to_include('invalid.schema.itemtype') expect(violations_values['invalid.schema.itemtype']).to_length(2) expect(violations_values['invalid.schema.itemtype']).to_equal({ 'value': config.SCHEMA_ORG_ITEMTYPE, 'description': config.get_description('SCHEMA_ORG_ITEMTYPE') })
def test_query_string_without_params(self): config = Config() config.FORCE_CANONICAL = False page = PageFactory.create() reviewer = Reviewer( api_url="http://localhost:2368", page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[], ) content = "<html><head></head></html>" result = {"url": page.url, "status": 200, "content": content, "html": lxml.html.fromstring(content)} reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) validator = LinkWithRelCanonicalValidator(reviewer) validator.add_violation = Mock() validator.review.data = {"page.head": [{}]} validator.validate() expect(validator.add_violation.called).to_be_false()
def test_can_get_default_violations_values(self): config = Config() config.FORCE_CANONICAL = False page = PageFactory.create() reviewer = Reviewer( api_url="http://localhost:2368", page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[], ) validator = LinkWithRelCanonicalValidator(reviewer) violations_values = validator.get_default_violations_values(config) expect(violations_values).to_include("absent.meta.canonical") expect(violations_values["absent.meta.canonical"]).to_length(2) expect(violations_values["absent.meta.canonical"]).to_be_like( {"value": config.FORCE_CANONICAL, "description": config.get_description("FORCE_CANONICAL")} )
def test_can_get_default_violations_values(self): config = Config() config.MAX_JS_KB_PER_PAGE_AFTER_GZIP = 70 config.MAX_JS_REQUESTS_PER_PAGE = 5 page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[] ) validator = JSRequestsValidator(reviewer) violations_values = validator.get_default_violations_values(config) expect(violations_values).to_include('total.size.js') expect(violations_values).to_include('total.requests.js') expect(violations_values['total.size.js']).to_length(2) expect(violations_values['total.requests.js']).to_length(2) expect(violations_values['total.size.js']).to_be_like({ 'value': config.MAX_JS_KB_PER_PAGE_AFTER_GZIP, 'description': config.get_description('MAX_JS_KB_PER_PAGE_AFTER_GZIP') }) expect(violations_values['total.requests.js']).to_be_like({ 'value': config.MAX_JS_REQUESTS_PER_PAGE, 'description': config.get_description('MAX_JS_REQUESTS_PER_PAGE') })
def test_can_validate_title_size_with_domain(self): self.db.query(Key).delete() self.db.query(KeysCategory).delete() config = Config() config.MAX_TITLE_SIZE = 70 key = Key(name='page.title.size') domain = DomainFactory.create(name='globo.com', url='http://globo.com') page = PageFactory.create(domain=domain, url='http://globo.com/a.html') self.sync_cache.redis.delete('violations-prefs-%s' % domain.name) DomainsViolationsPrefsFactory.create( domain=domain, key=key, value='10' ) reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[], cache=self.sync_cache ) title = 'a' * 80 content = '<html><title>%s</title></html>' % title result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) reviewer.violation_definitions = { 'page.title.size': {'default_value': 70, 'key': key}, } validator = TitleValidator(reviewer) validator.add_violation = Mock() validator.review.data = { 'page.title_count': 1, 'page.title': title } validator.validate() validator.add_violation.assert_called_once_with( key='page.title.size', value={'max_size': 10, 'page_url': page.url}, points=10 )
def test_can_validate_image_requests_on_globo_html(self): config = Config() config.MAX_IMG_REQUESTS_PER_PAGE = 50 config.MAX_KB_SINGLE_IMAGE = 6 config.MAX_IMG_KB_PER_PAGE = 100 page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[] ) content = self.get_file('globo.html') result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) validator = ImageRequestsValidator(reviewer) validator.add_violation = Mock() validator.review.data = { 'page.images': [ ( 'some_image.jpg', Mock(status_code=200, text=self.get_file('2x2.png')) ) for i in xrange(60) ], 'total.size.img': 106, } validator.validate() expect(validator.add_violation.call_args_list).to_include( call( key='total.requests.img', value={'total': 60, 'limit': 10}, points=50 )) expect(validator.add_violation.call_args_list).to_include( call( key='single.size.img', value={ 'limit': 6, 'over_max_size': set([('some_image.jpg', 6.57421875)]) }, points=0.57421875 ))
def test_can_validate_title_size_with_domain(self): self.db.query(Key).delete() self.db.query(KeysCategory).delete() config = Config() config.MAX_TITLE_SIZE = 70 key = Key(name='page.title.size') domain = DomainFactory.create(name='globo.com', url='http://globo.com') page = PageFactory.create(domain=domain, url='http://globo.com/a.html') self.sync_cache.redis.delete('violations-prefs-%s' % domain.name) DomainsViolationsPrefsFactory.create(domain=domain, key=key, value='10') reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[], cache=self.sync_cache) title = 'a' * 80 content = '<html><title>%s</title></html>' % title result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) reviewer.violation_definitions = { 'page.title.size': { 'default_value': 70, 'key': key }, } validator = TitleValidator(reviewer) validator.add_violation = Mock() validator.review.data = {'page.title_count': 1, 'page.title': title} validator.validate() validator.add_violation.assert_called_once_with(key='page.title.size', value={ 'max_size': 10, 'page_url': page.url }, points=10)
def test_can_validate_css_requests_on_globo_html(self): config = Config() config.MAX_CSS_REQUESTS_PER_PAGE = 1 config.MAX_CSS_KB_PER_PAGE_AFTER_GZIP = 0.0 page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[] ) content = self.get_file('globo.html') result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) validator = CSSRequestsValidator(reviewer) css = { 'url': 'some_style.css', 'status': 200, 'content': '#id{display:none}', 'html': None } validator.get_response = Mock(return_value=css) validator.add_violation = Mock() validator.review.data = { 'total.requests.css': 7, 'total.size.css.gzipped': 0.05 } validator.validate() expect(validator.add_violation.call_args_list).to_include( call( key='total.requests.css', value={'over_limit': 6, 'total_css_files': 7}, points=30 )) expect(validator.add_violation.call_args_list).to_include( call( key='total.size.css', value=0.05, points=0 ))
def test_can_get_violation_definitions(self): config = Config() config.MAX_JS_REQUESTS_PER_PAGE = 1 config.MAX_JS_KB_PER_PAGE_AFTER_GZIP = 0.03 page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[] ) content = self.get_file('globo.html') result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) validator = JSRequestsValidator(reviewer) definitions = validator.get_violation_definitions() expect(definitions).to_length(2) expect('total.size.js' in definitions).to_be_true() expect('total.requests.js' in definitions).to_be_true() total_size_message = validator.get_total_size_message(0.03) requests_js_message = validator.get_requests_js_message({ 'total_js_files': 7, 'over_limit': 6 }) expect(total_size_message).to_equal( 'There\'s 0.03kb of JavaScript in this page and that adds ' 'up to more download time slowing down the page rendering ' 'to the user.' ) expect(requests_js_message).to_equal( 'This page has 7 JavaScript request (6 over limit). Having too ' 'many requests impose a tax in the browser due to handshakes.' )
def test_can_get_next_jobs_count(self): config = Config() config.REVIEW_EXPIRATION_IN_SECONDS = 100 for x in range(3): PageFactory.create() next_job_list = Page.get_next_jobs_count(self.db, config) expect(next_job_list).to_equal(3) for x in range(2): PageFactory.create() next_job_list = Page.get_next_jobs_count(self.db, config) expect(next_job_list).to_equal(5)
def test_can_validate_without_meta_tags(self): config = Config() page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[]) content = '<html></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) validator = OpenGraphValidator(reviewer) validator.add_violation = Mock() validator.validate() expect(validator.add_violation.called).to_be_false()
def test_can_validate_last_modified(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[] ) validator = LastModifiedValidator(reviewer) validator.add_violation = Mock() validator.review.data = { 'page.last_modified': None } validator.review.facts = { 'page.last_modified': None } validator.validate() validator.add_violation.assert_called_once_with( key='page.last_modified.not_found', value=page.url, points=50 )
def test_validate(self): page = PageFactory.create(url='http://globo.com/') reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[] ) validator = RobotsValidator(reviewer) response = Mock(status_code=200, text='key:value') validator.review.data['robots.response'] = response validator.add_violation = Mock() validator.validate() expect(validator.add_violation.call_args_list).to_include( call( key='robots.sitemap.not_found', value=None, points=100 )) expect(validator.add_violation.call_args_list).to_include( call( key='robots.disallow.not_found', value=None, points=100 ))
def test_can_load_url_with_empy_headers(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = '<html></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content), } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = LastModifiedFacter(reviewer) facter.add_fact = Mock() facter.get_facts() expect(facter.review.data).to_length(0) expect(facter.review.data).to_be_like({}) expect(facter.add_fact.called).to_be_false()
def test_can_validate_css_requests_empty_html(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[], cache=self.sync_cache ) reviewer.violation_definitions = { 'total.requests.css': {'default_value': 1}, 'total.size.css': {'default_value': 0.0}, } result = { 'url': page.url, 'status': 200, 'content': None, 'html': None } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) validator = CSSRequestsValidator(reviewer) validator.add_violation = Mock() validator.validate() expect(validator.add_violation.called).to_be_false()
def test_can_validate_css_requests_on_globo_html(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[], cache=self.sync_cache ) reviewer.violation_definitions = { 'total.requests.css': {'default_value': 1}, 'total.size.css': {'default_value': 0.0}, } content = self.get_file('globo.html') result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) validator = CSSRequestsValidator(reviewer) css = { 'url': 'some_style.css', 'status': 200, 'content': '#id{display:none}', 'html': None } validator.get_response = Mock(return_value=css) validator.add_violation = Mock() validator.review.data = { 'total.requests.css': 7, 'total.size.css.gzipped': 0.05 } validator.validate() expect(validator.add_violation.call_args_list).to_include( call( key='total.requests.css', value={'over_limit': 6, 'total_css_files': 7}, points=30 )) expect(validator.add_violation.call_args_list).to_include( call( key='total.size.css', value=0.05, points=0 ))
def test_get_robots_from_root_domain(self): page = PageFactory.create(url="http://www.globo.com") reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[]) facter = RobotsFacter(reviewer) facter.async_get = Mock() facter.add_fact = Mock() facter.get_facts() robots_url = 'http://www.globo.com/robots.txt' expect(facter.review.data).to_length(1) expect(facter.review.data['robots.response']).to_equal(None) facter.async_get.assert_called_once_with(robots_url, facter.handle_robots_loaded) response = Mock(status_code=200, text='', headers={}) facter.handle_robots_loaded(robots_url, response) expect(facter.review.data['robots.response']).to_equal(response) expect(facter.add_fact.call_args_list).to_include( call( key='robots.url', value=robots_url, ))
def test_add_violation_when_sitemap_with_good_link(self): page = PageFactory.create(url='http://globo.com') reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[]) validator = SitemapValidator(reviewer) validator.review.data['sitemap.files.size'] = { 'http://g1.globo.com/sitemap.xml': 10 } validator.review.data['sitemap.data'] = { 'http://g1.globo.com/sitemap.xml': Mock(status_code=200, text='data', url='http://g1.globo.com/%C3%BCmlat.php&q=name') } validator.review.data['sitemap.files.urls'] = { 'http://g1.globo.com/sitemap.xml': 20 } validator.review.data['sitemap.urls'] = { 'http://g1.globo.com/sitemap.xml': ['http://g1.globo.com/%C3%BCmlat.php&q=name'] } validator.add_violation = Mock() validator.flush = Mock() validator.validate() expect(validator.add_violation.call_count).to_equal(0) expect(validator.flush.call_count).to_equal(1)
def test_add_violation_when_sitemap_has_links_that_not_need_to_be_encoded( self): page = PageFactory.create(url='http://globo.com') reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[]) validator = SitemapValidator(reviewer) validator.review.data['sitemap.files.size'] = { 'http://g1.globo.com/sitemap.xml': 10 } validator.review.data['sitemap.data'] = { 'http://g1.globo.com/sitemap.xml': Mock(status_code=200, text='data') } validator.review.data['sitemap.files.urls'] = { 'http://g1.globo.com/sitemap.xml': 20 } validator.review.data['sitemap.urls'] = { 'http://g1.globo.com/sitemap.xml': ['http://g1.globo.com/1.html'] } validator.add_violation = Mock() validator.validate() expect(validator.add_violation.call_count).to_equal(0)
def test_handle_sitemap_url_loaded(self): page = PageFactory.create(url="http://g1.globo.com/") reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[] ) reviewer.enqueue = Mock() content = self.get_file('url_sitemap.xml') response = Mock(status_code=200, text=content) facter = SitemapFacter(reviewer) facter.async_get = Mock() facter.get_facts() facter.handle_sitemap_loaded("http://g1.globo.com/sitemap.xml", response) expect(facter.review.data['sitemap.files.size']["http://g1.globo.com/sitemap.xml"]).to_equal(0.296875) expect(facter.review.data['sitemap.urls']["http://g1.globo.com/sitemap.xml"]).to_equal(set(['http://domain.com/1.html', 'http://domain.com/2.html'])) expect(facter.review.facts['total.size.sitemap']['value']).to_equal(0.296875) expect(facter.review.facts['total.size.sitemap.gzipped']['value']).to_equal(0.1494140625) expect(facter.review.data['total.size.sitemap']).to_equal(0.296875) expect(facter.review.data['total.size.sitemap.gzipped']).to_equal(0.1494140625) expect(facter.review.data['sitemap.files.urls']["http://g1.globo.com/sitemap.xml"]).to_equal(2) expect(facter.review.facts['total.sitemap.urls']['value']).to_equal(2)
def test_add_violation_when_404(self): page = PageFactory.create(url='http://globo.com') reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[]) validator = SitemapValidator(reviewer) validator.review.data['sitemap.files.size'] = { 'http://g1.globo.com/sitemap.xml': 10 } validator.review.data['sitemap.data'] = { 'http://g1.globo.com/sitemap.xml': Mock(status_code=404, text=None) } validator.add_violation = Mock() validator.validate() validator.add_violation.assert_called_once_with( key='sitemap.not_found', value='http://g1.globo.com/sitemap.xml', points=100)
def test_validate(self): config = Config() page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[]) content = '<html><body></body></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) validator = SchemaOrgItemTypeValidator(reviewer) validator.add_violation = Mock() validator.review.data = {'page.body': [{}]} validator.validate() expect(validator.add_violation.call_args_list).to_include( call(key='absent.schema.itemscope', value=None, points=10)) expect(validator.add_violation.call_args_list).to_include( call(key='absent.schema.itemtype', value=None, points=10))
def test_can_validate_no_title_tag(self): page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[]) content = '<html></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) validator = TitleValidator(reviewer) validator.add_violation = Mock() validator.validate() validator.add_violation.assert_called_once_with( key='page.title.not_found', value=page.url, points=50)
def test_can_validate_with_headers(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[] ) validator = LastModifiedValidator(reviewer) validator.add_violation = Mock() validator.review.data = { 'page.last_modified': datetime.datetime(2014, 1, 13, 1, 16, 10) } validator.review.facts = { 'page.last_modified': datetime.datetime(2014, 1, 13, 1, 16, 10) } validator.validate() expect(validator.add_violation.called).to_be_false()
def test_can_get_facts(self): page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[]) content = '<html><body class="test"></body></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = BodyFacter(reviewer) facter.add_fact = Mock() facter.get_facts() expect(facter.review.data).to_length(1) expect(facter.review.data).to_include('page.body') expect(facter.review.data['page.body'][0].tag).to_equal('body') expect(facter.add_fact.called).to_be_false()
def test_add_violation_when_sitemap_is_too_large(self): page = PageFactory.create(url='http://globo.com') reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[]) validator = SitemapValidator(reviewer) validator.review.data['sitemap.files.size'] = { 'http://g1.globo.com/sitemap.xml': 10241 } validator.review.data['sitemap.data'] = { 'http://g1.globo.com/sitemap.xml': Mock(status_code=200, text='data') } validator.review.data['sitemap.files.urls'] = { 'http://g1.globo.com/sitemap.xml': 10 } validator.review.data['sitemap.urls'] = { 'http://g1.globo.com/sitemap.xml': [] } validator.add_violation = Mock() validator.validate() validator.add_violation.assert_called_once_with( key='total.size.sitemap', value={ 'url': 'http://g1.globo.com/sitemap.xml', 'size': 10.0009765625 }, points=10)
def test_can_validate_page_with_metatag_description_too_long(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[], cache=self.sync_cache ) reviewer.violation_definitions = { 'page.metatags.description_too_big': {'default_value': 300}, } validator = MetaTagsValidator(reviewer) validator.add_violation = Mock() validator.review.data['meta.tags'] = [ {'content': 'X' * 301, 'property': 'name', 'key': 'description'}, ] validator.validate() validator.add_violation.assert_called_once_with( key='page.metatags.description_too_big', value={'max_size': 300}, points=20 ) validator.add_violation = Mock() validator.review.data['meta.tags'] = [ {'content': 'X' * 300, 'property': 'name', 'key': 'description'}, ] validator.validate() expect(validator.add_violation.called).to_be_false()
def test_validate(self): config = Config() page = PageFactory.create(url='http://globo.com/1?item=test') reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[]) content = '<html><head></head></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) validator = LinkWithRelCanonicalValidator(reviewer) validator.add_violation = Mock() validator.review.data = {'page.head': [{}]} validator.validate() expect(validator.add_violation.call_args_list).to_include( call(key='absent.meta.canonical', value=None, points=30))
def test_handle_url_loaded(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = self.get_file('globo.html') result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = LinkFacter(reviewer) facter.async_get = Mock() facter.get_facts() facter.handle_url_loaded(page.url, response) expect(facter.review.data).to_include('page.links') data = set([(page.url, response)]) expect(facter.review.data['page.links']).to_equal(data)
def test_handle_url_loaded_with_empty_content(self): page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[]) content = '' result = { 'url': page.url, 'status': 200, 'content': content, 'html': content } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = JSFacter(reviewer) facter.async_get = Mock() facter.get_facts() facter.handle_url_loaded(page.url, response) expect(facter.review.data).to_include('total.size.js') expect(facter.review.data['total.size.js']).to_equal(0) expect(facter.review.data).to_include('total.size.js.gzipped') expect(facter.review.data['total.size.js.gzipped']).to_equal(0)
def test_authenticate(self, datetime_mock): dt = datetime(2014, 2, 14, 15, 0, 30) datetime_mock.now.return_value = dt self.db.query(User).delete() UserFactory(email='*****@*****.**') mock_response = Mock( code=200, body='{"issued_to": "000", "email": "*****@*****.**"}' ) def handle_request(url, handler, proxy_host, proxy_port): handler(mock_response) fetch_mock = Mock() fetch_mock.side_effect = handle_request config = Config() config.GOOGLE_CLIENT_ID = '000' access_token = '111' User.authenticate( access_token, fetch_mock, self.db, config, callback=self.stop ) response = self.wait() expect(response).to_be_like({ 'status': 200, 'user': { 'is_superuser': True, 'fullname': u'Marcelo Jorge Vieira', 'last_login': dt, 'email': u'*****@*****.**' } }) loaded_user = User.by_email('*****@*****.**', self.db) expect(loaded_user.last_login).to_equal(dt)
def test_can_validate_title_size(self): config = Config() config.MAX_TITLE_SIZE = 70 page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[], cache=self.sync_cache ) title = 'a' * 80 content = '<html><title>%s</title></html>' % title result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) reviewer.violation_definitions = { 'page.title.size': {'default_value': 70}, } validator = TitleValidator(reviewer) validator.add_violation = Mock() validator.review.data = { 'page.title_count': 1, 'page.title': title } validator.validate() validator.add_violation.assert_called_once_with( key='page.title.size', value={'max_size': 70, 'page_url': page.url}, points=10 )
def test_can_validate_page_without_required_meta_tag(self): config = Config() config.REQUIRED_META_TAGS = ['description'] page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[], cache=self.sync_cache ) content = '<html></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) reviewer.violation_definitions = { 'absent.meta.tags': {'default_value': ['description']}, } validator = RequiredMetaTagsValidator(reviewer) validator.add_violation = Mock() validator.review.data = { 'meta.tags': [{'key': None}] } validator.validate() for tag in reviewer.config.REQUIRED_META_TAGS: validator.add_violation.assert_called_with( key='absent.meta.tags', value=[tag], points=20 )
def test_can_remove_old_requests(self): self.db.query(Request).delete() config = Config() config.DAYS_TO_KEEP_REQUESTS = 1 for i in range(4): RequestFactory.create( url='http://m.com/page-%d' % i, domain_name='m.com', status_code=200, completed_date=date.today() - timedelta(days=i) ) Request.delete_old_requests(self.db, config) requests = self.db.query(Request).all() expect(requests).to_length(1)
def test_can_validate(self): config = Config() config.BLACKLIST_DOMAIN = ['a.com'] page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[] ) content = '<a href="http://a.com/test1">A</a>' \ '<a href="http://b.com/test2">B</a>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) validator = BlackListValidator(reviewer) validator.review.data = { 'page.all_links': [ {'href': 'http://a.com/test1'}, {'href': 'http://b.com/test2'} ] } validator.add_violation = Mock() validator.validate() validator.add_violation.assert_called_once_with( points=100, key='blacklist.domains', value=['http://a.com/test1'] )
def main(): parser = SearchProvider.argparser() args = parser.parse_args() try: config = Config() if args.conf: config = config.load(args.conf[0]) search_providers = load_classes(default=[config['SEARCH_PROVIDER']]) if isinstance(search_providers, list) and len(search_providers) == 1: search_provider = search_providers.pop() search_provider.main() else: logging.error('Could not instantiate search provider!') sys.exit(1) except ConfigurationError: logging.error('Could not load config! Use --conf conf_file') sys.exit(1) except KeyError: logging.error('Could not parse config! Check it\'s contents') sys.exit(1)
def test_remove_old_reviews(self): self.db.query(Violation).delete() self.db.query(Fact).delete() self.db.query(Key).delete() self.db.query(Review).delete() self.db.query(Page).delete() config = Config() config.NUMBER_OF_REVIEWS_TO_KEEP = 4 dt = datetime(2013, 12, 11, 10, 9, 8) page1 = PageFactory.create() ReviewFactory.create(page=page1, is_active=True, completed_date=dt, number_of_violations=1, number_of_facts=1) page2 = PageFactory.create() ReviewFactory.create(page=page2, is_active=True, completed_date=dt) for x in range(6): dt = datetime(2013, 12, 11, 10, 10, x) ReviewFactory.create( page=page1, is_active=False, completed_date=dt, number_of_violations=2, number_of_facts=1 ) self.db.flush() reviews = self.db.query(Review).all() expect(reviews).to_length(8) violations = self.db.query(Violation).all() expect(violations).to_length(13) facts = self.db.query(Fact).all() expect(facts).to_length(7) Review.delete_old_reviews(self.db, config, page1) reviews = self.db.query(Review).all() expect(reviews).to_length(6) violations = self.db.query(Violation).all() expect(violations).to_length(9) facts = self.db.query(Fact).all() expect(facts).to_length(5)
def test_authenticate_invalid_token(self): self.db.query(User).delete() UserFactory(email='*****@*****.**') mock_response = Mock( code=400, body=dumps({ "error": "invalid_token", "error_description": "Invalid Value" }) ) def handle_request(url, handler, proxy_host, proxy_port): handler(mock_response) fetch_mock = Mock() fetch_mock.side_effect = handle_request config = Config() config.GOOGLE_CLIENT_ID = '000' access_token = '111' User.authenticate( access_token, fetch_mock, self.db, config, callback=self.stop ) response = self.wait() expect(response).to_be_like({ 'status': 400, 'reason': 'Error', 'details': '{"error_description":"Invalid Value", \ "error":"invalid_token"}' })
def test_can_validate_single_image_html(self): config = Config() config.MAX_IMG_REQUESTS_PER_PAGE = 50 config.MAX_KB_SINGLE_IMAGE = 6 config.MAX_IMG_KB_PER_PAGE = 100 page = PageFactory.create(url="http://globo.com") reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[] ) content = "<html><img src='/some_image.jpg'/></html>" result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) validator = ImageRequestsValidator(reviewer) validator.add_violation = Mock() validator.review.data = { 'page.images': [ ('http://globo.com/some_image.jpg', Mock(status_code=200, text='bla')) ], 'total.size.img': 60, } validator.validate() expect(validator.add_violation.called).to_be_false()
def test_force_canonical(self): config = Config() config.FORCE_CANONICAL = True page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[] ) content = '<html><head></head></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) validator = LinkWithRelCanonicalValidator(reviewer) validator.add_violation = Mock() validator.review.data = {'page.head': [{}]} validator.validate() expect(validator.add_violation.call_args_list).to_include( call( key='absent.meta.canonical', value=None, points=30 ))
def test_can_validate_image_404(self): config = Config() config.MAX_IMG_REQUESTS_PER_PAGE = 50 config.MAX_KB_SINGLE_IMAGE = 6 config.MAX_IMG_KB_PER_PAGE = 100 page = PageFactory.create(url="http://globo.com") reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[] ) validator = ImageRequestsValidator(reviewer) validator.add_violation = Mock() img_url = 'http://globo.com/some_image.jpg' validator.review.data = { 'page.images': [ (img_url, Mock(status_code=404, text=None)) ], 'total.size.img': 60, } validator.validate() expect(validator.add_violation.call_args_list).to_include( call( key='broken.img', value=set(['http://globo.com/some_image.jpg']), points=50 ))