Beispiel #1
0
    def test_can_get_domains_details(self):
        self.db.query(Domain).delete()

        details = Domain.get_domains_details(self.db)

        expect(details).to_length(0)

        domain = DomainFactory.create(name='domain-1.com',
                                      url='http://domain-1.com/')
        domain2 = DomainFactory.create(name='domain-2.com',
                                       url='http://domain-2.com/')
        DomainFactory.create()

        page = PageFactory.create(domain=domain)
        page2 = PageFactory.create(domain=domain)
        page3 = PageFactory.create(domain=domain2)

        ReviewFactory.create(domain=domain,
                             page=page,
                             is_active=True,
                             number_of_violations=20)
        ReviewFactory.create(domain=domain,
                             page=page2,
                             is_active=True,
                             number_of_violations=10)
        ReviewFactory.create(domain=domain2,
                             page=page3,
                             is_active=True,
                             number_of_violations=30)

        RequestFactory.create(status_code=200,
                              domain_name=domain.name,
                              response_time=0.25)
        RequestFactory.create(status_code=304,
                              domain_name=domain.name,
                              response_time=0.35)
        RequestFactory.create(status_code=400,
                              domain_name=domain.name,
                              response_time=0.25)
        RequestFactory.create(status_code=403,
                              domain_name=domain.name,
                              response_time=0.35)
        RequestFactory.create(status_code=404,
                              domain_name=domain.name,
                              response_time=0.25)

        details = Domain.get_domains_details(self.db)

        expect(details).to_length(3)
        expect(details[0]).to_length(10)
        expect(details[0]['url']).to_equal('http://domain-1.com/')
        expect(details[0]['name']).to_equal('domain-1.com')
        expect(details[0]['violationCount']).to_equal(30)
        expect(details[0]['pageCount']).to_equal(2)
        expect(details[0]['reviewCount']).to_equal(2)
        expect(details[0]['reviewPercentage']).to_equal(100.0)
        expect(details[0]['errorPercentage']).to_equal(60.0)
        expect(details[0]['is_active']).to_be_true()
        expect(details[0]['averageResponseTime']).to_equal(0.3)
Beispiel #2
0
    def save_request(self, url, response):
        if not response:
            return

        request_time = response.request_time
        effective_url = response.effective_url
        status_code = response.status_code

        domain_name, domain_url = get_domain_from_url(url)
        if domain_name not in Domain.get_domain_names(self.db):
            return

        req = Request(
            domain_name=domain_name,
            url=url,
            effective_url=effective_url,
            status_code=int(status_code),
            response_time=request_time,
            completed_date=datetime.now().date(),
            review_url=self.page_url
        )

        self.db.add(req)

        self.cache.increment_requests_count()

        self.publish(dumps({
            'type': 'new-request',
            'url': str(url)
        }))
Beispiel #3
0
    def get(self, domain_name):
        domain = Domain.get_domain_by_name(domain_name, self.db)

        if not domain:
            self.set_status(404, 'Domain %s not found' % domain_name)
            return

        violation_defs = self.application.violation_definitions

        grouped_violations = self.girl.get('violation_count_by_category_for_domains')

        total = 0
        violations = []

        for item in grouped_violations.get(domain.id, []):
            key_name, key_category_id, count = item['key_name'], item['category_id'], item['violation_count']
            violations.append({
                'categoryId': key_category_id,
                'categoryName': violation_defs[key_name]['category'],
                'count': count
            })
            total += count

        result = {
            "domainId": domain.id,
            'domainName': domain.name,
            'domainURL': domain.url,
            'total': total,
            'violations': violations
        }

        self.write_json(result)
Beispiel #4
0
    def get(self, domain_name):
        term = self.get_argument('term', None)
        current_page = int(self.get_argument('current_page', 1))
        page_size = int(self.get_argument('page_size', 10))

        domain = Domain.get_domain_by_name(domain_name, self.db)

        if not domain:
            self.set_status(404, self._('Domain %s not found') % domain_name)
            return

        reviews = yield self.application.search_provider.get_domain_active_reviews(
            domain=domain,
            current_page=current_page,
            page_size=page_size,
            page_filter=term,
        )

        if 'error' in reviews:
            self.set_status(reviews['error']['status_code'],
                            reviews['error']['reason'])
            self.finish()
            return

        if 'reviewsCount' not in reviews:
            if not term:
                reviews[
                    'reviewsCount'] = yield self.cache.get_active_review_count(
                        domain)
            else:
                reviews['reviewsCount'] = None

        self.write_json(reviews)
        self.finish()
Beispiel #5
0
    def save_request(self, url, response):
        if not response:
            return

        request_time = response.request_time
        effective_url = response.effective_url
        status_code = response.status_code

        if self.domain_name not in Domain.get_domain_names(self.db):
            return

        req = Request(
            domain_name=self.domain_name,
            url=url,
            effective_url=effective_url,
            status_code=int(status_code),
            response_time=request_time,
            completed_date=datetime.now().date(),
            review_url=self.page_url
        )

        self.db.add(req)

        url = url.encode('utf-8')

        self.publish(dumps({
            'type': 'new-request',
            'url': str(url)
        }))
Beispiel #6
0
    def get(self, domain_name):
        domain = Domain.get_domain_by_name(domain_name, self.db)

        if not domain:
            self.set_status(404, self._('Domain %s not found') % domain_name)
            return

        prefs = DomainsViolationsPrefs.get_domains_violations_prefs_by_domain(
            self.db, domain.name
        )

        violation_defs = self.application.violation_definitions

        result = []

        for pref in prefs:
            key = violation_defs.get(pref.get('key'))

            if key is None:
                continue

            result.append({
                'key': pref.get('key'),
                'title': key.get('default_value_description', None),
                'category': key.get('category', None),
                'value': pref.get('value'),
                'default_value': key.get('default_value', None),
                'unit': key.get('unit', None)
            })

        self.write_json(result)
Beispiel #7
0
    def get_count(self, key, domain_name, expiration, get_count_method):
        cache_key = '%s-%s' % (self.get_domain_name(domain_name), key)

        count = self.redis.get(cache_key)

        if count is not None:
            return int(count)

        domain = domain_name
        if domain and not isinstance(domain, Domain):
            domain = Domain.get_domain_by_name(domain_name, self.db)

        if domain is None:
            count = Page.get_page_count(self.db)
        else:
            count = get_count_method(domain)

        cache_key = '%s-%s' % (self.get_domain_name(domain), key)

        self.redis.setex(
            cache_key,
            expiration,
            value=int(count)
        )

        return int(count)
Beispiel #8
0
    def test_can_insert_default_violations_values_for_all_domains(self):
        DomainsViolationsPrefsFactory.create(domain=Domain(name='globo.com'),
                                             key=Key(name='some.random.fact'),
                                             value='whatever')

        for x in range(3):
            DomainFactory.create(name='g%d.com' % x)

        domains_violations_prefs = \
            DomainsViolationsPrefs.get_domains_violations_prefs(self.db)

        expect(domains_violations_prefs).to_length(1)

        default_violations_values = {
            'page.title.size': 100,
            'total.requests.img': 5,
        }

        page_title_size = KeyFactory.create(name='page.title.size')
        total_requests_img = KeyFactory.create(name='total.requests.img')

        violation_definitions = {
            'page.title.size': {
                'key': page_title_size,
                'default_value': 100
            },
            'total.requests.img': {
                'key': total_requests_img,
                'default_value': 5
            }
        }

        DomainsViolationsPrefs.insert_default_violations_values_for_all_domains(
            self.db, default_violations_values, violation_definitions,
            self.cache)

        domains_violations_prefs = \
            DomainsViolationsPrefs.get_domains_violations_prefs(self.db)

        expect(domains_violations_prefs).to_length(4)

        expect(domains_violations_prefs).to_be_like({
            'globo.com': {
                'some.random.fact': 'whatever',
                'total.requests.img': 5,
                'page.title.size': 100
            },
            'g0.com': {
                'page.title.size': 100,
                'total.requests.img': 5
            },
            'g1.com': {
                'page.title.size': 100,
                'total.requests.img': 5
            },
            'g2.com': {
                'page.title.size': 100,
                'total.requests.img': 5
            },
        })
Beispiel #9
0
    def get(self, domain_name, key_category_id):
        domain = Domain.get_domain_by_name(domain_name, self.db)

        if not domain:
            self.set_status(404, 'Domain %s not found' % domain_name)
            return

        violation_defs = self.application.violation_definitions

        top_violations = yield self.cache.get_top_in_category_for_domain(
            domain,
            key_category_id,
            self.application.config.get('TOP_CATEGORY_VIOLATIONS_LIMIT')
        )

        violations = []
        for key_name, count in top_violations:
            violations.append({
                'title': violation_defs[key_name]['title'],
                'count': count
            })

        result = {
            "domainId": domain.id,
            'domainName': domain.name,
            'domainURL': domain.url,
            'categoryId': key_category_id,
            'violations': violations
        }

        self.write_json(result)
Beispiel #10
0
    def post(self, domain_name):
        domain = Domain.get_domain_by_name(domain_name, self.db)

        if not domain:
            self.set_status(404, 'Domain %s not found' % domain_name)
            return

        domain.is_active = not domain.is_active
Beispiel #11
0
    def test_domains_violations_prefs_str(self):
        data = DomainsViolationsPrefsFactory.create(
            domain=Domain(name='globo.com'),
            key=Key(name='some.random.fact'),
            value='whatever')

        loaded = self.db.query(DomainsViolationsPrefs).get(data.id)

        expect(
            str(loaded)).to_be_like('some.random.fact (globo.com): whatever')
Beispiel #12
0
    def test_can_get_active_domains(self):
        self.db.query(Domain).delete()

        domain = DomainFactory(is_active=True)
        DomainFactory(is_active=False)

        domains = Domain.get_active_domains(self.db)

        expect(domains).to_length(1)
        expect(domains[0].id).to_equal(domain.id)
Beispiel #13
0
    def test_get_domain_names(self):
        self.db.query(Domain).delete()

        DomainFactory.create(name="g1.globo.com")
        DomainFactory.create(name="globoesporte.globo.com")

        domain_names = Domain.get_domain_names(self.db)

        expect(domain_names).to_be_like(
            ['g1.globo.com', 'globoesporte.globo.com'])
Beispiel #14
0
    def test_can_get_active_domains(self):
        self.db.query(Domain).delete()

        domain = DomainFactory(is_active=True)
        DomainFactory(is_active=False)

        domains = Domain.get_active_domains(self.db)

        expect(domains).to_length(1)
        expect(domains[0].id).to_equal(domain.id)
Beispiel #15
0
    def test_can_set_domain_to_inactive(self):
        domain = DomainFactory.create(url="http://www.domain.com",
                                      name="domain.com",
                                      is_active=True)

        response = yield self.authenticated_fetch(
            '/domains/%s/change-status/' % domain.name, method='POST', body='')
        expect(response.code).to_equal(200)
        domain_from_db = Domain.get_domain_by_name(domain.name, self.db)
        expect(domain_from_db.is_active).to_be_false()
Beispiel #16
0
    def post(self, domain_name):
        domain = Domain.get_domain_by_name(domain_name, self.db)

        if not domain:
            self.set_status(404, self._('Domain %s not found') % domain_name)
            return

        domain.is_active = not domain.is_active

        if not domain.is_active:
            yield self.cache.delete_limit_usage_by_domain(domain.url)
Beispiel #17
0
    def test_can_set_domain_to_active(self):
        domain = DomainFactory.create(url="http://www.domain.com", name="domain.com", is_active=False)

        response = yield self.http_client.fetch(
            self.get_url(r'/domains/%s/change-status/' % domain.name),
            method='POST',
            body=''
        )
        expect(response.code).to_equal(200)
        domain_from_db = Domain.get_domain_by_name(domain.name, self.db)
        expect(domain_from_db.is_active).to_be_true()
Beispiel #18
0
    def test_can_convert_to_dict(self):
        data = DomainsViolationsPrefsFactory.create(
            domain=Domain(name='globo.com'),
            key=Key(name='some.random.fact'),
            value='whatever')

        expect(data.to_dict()).to_be_like({
            'domain': 'globo.com',
            'key': 'some.random.fact',
            'value': 'whatever',
        })
Beispiel #19
0
    def test_can_create_domains_violations_prefs(self):
        data = DomainsViolationsPrefsFactory.create(
            domain=Domain(name='globo.com'),
            key=Key(name='some.random.fact'),
            value='whatever')

        loaded = self.db.query(DomainsViolationsPrefs).get(data.id)

        expect(loaded.domain.name).to_equal('globo.com')
        expect(loaded.key.name).to_equal('some.random.fact')
        expect(loaded.value).to_equal('whatever')
Beispiel #20
0
    def test_can_set_domain_to_inactive(self):
        domain = DomainFactory.create(url="http://www.domain.com", name="domain.com", is_active=True)

        response = yield self.authenticated_fetch(
            '/domains/%s/change-status/' % domain.name,
            method='POST',
            body=''
        )
        expect(response.code).to_equal(200)
        domain_from_db = Domain.get_domain_by_name(domain.name, self.db)
        expect(domain_from_db.is_active).to_be_false()
Beispiel #21
0
    def test_get_domain_names(self):
        self.db.query(Domain).delete()

        DomainFactory.create(name="g1.globo.com")
        DomainFactory.create(name="globoesporte.globo.com")

        domain_names = Domain.get_domain_names(self.db)

        expect(domain_names).to_be_like([
            'g1.globo.com',
            'globoesporte.globo.com'
        ])
Beispiel #22
0
    def test_can_get_domains_violations_prefs(self):
        data = DomainsViolationsPrefsFactory.create(
            domain=Domain(name='globo.com'),
            key=Key(name='some.random.fact'),
            value='whatever')

        data = DomainsViolationsPrefs.get_domains_violations_prefs(self.db)

        expect(data).to_be_like(
            {'globo.com': {
                'some.random.fact': 'whatever'
            }})
Beispiel #23
0
    def get_next_jobs_count(cls, db, config):
        from holmes.models import Domain

        active_domains = Domain.get_active_domains(db)
        active_domains_ids = [item.id for item in active_domains]

        return db \
                .query(
                    sa.func.count(Page.id)
                ) \
                .filter(Page.domain_id.in_(active_domains_ids)) \
                .scalar()
Beispiel #24
0
    def test_can_get_domains_details(self):
        self.db.query(Domain).delete()

        details = Domain.get_domains_details(self.db)

        expect(details).to_length(0)

        domain = DomainFactory.create(name='domain-1.com', url='http://domain-1.com/')
        domain2 = DomainFactory.create(name='domain-2.com', url='http://domain-2.com/')
        DomainFactory.create()

        page = PageFactory.create(domain=domain)
        page2 = PageFactory.create(domain=domain)
        page3 = PageFactory.create(domain=domain2)

        ReviewFactory.create(domain=domain, page=page, is_active=True, number_of_violations=20)
        ReviewFactory.create(domain=domain, page=page2, is_active=True, number_of_violations=10)
        ReviewFactory.create(domain=domain2, page=page3, is_active=True, number_of_violations=30)

        RequestFactory.create(status_code=200, domain_name=domain.name, response_time=0.25)
        RequestFactory.create(status_code=304, domain_name=domain.name, response_time=0.35)
        RequestFactory.create(status_code=400, domain_name=domain.name, response_time=0.25)
        RequestFactory.create(status_code=403, domain_name=domain.name, response_time=0.35)
        RequestFactory.create(status_code=404, domain_name=domain.name, response_time=0.25)

        details = Domain.get_domains_details(self.db)

        expect(details).to_length(3)
        expect(details[0]).to_length(10)
        expect(details[0]['url']).to_equal('http://domain-1.com/')
        expect(details[0]['name']).to_equal('domain-1.com')
        expect(details[0]['violationCount']).to_equal(30)
        expect(details[0]['pageCount']).to_equal(2)
        expect(details[0]['reviewCount']).to_equal(2)
        expect(details[0]['reviewPercentage']).to_equal(100.0)
        expect(details[0]['errorPercentage']).to_equal(60.0)
        expect(details[0]['is_active']).to_be_true()
        expect(details[0]['averageResponseTime']).to_equal(0.3)
Beispiel #25
0
        def handle(has_key):
            domain = domain_name
            if domain and not isinstance(domain, Domain):
                domain = Domain.get_domain_by_name(domain_name, self.db)

            if has_key:
                self.redis.incrby(key, increment, callback=callback)
            else:
                if domain is None:
                    value = Page.get_page_count(self.db) + increment - 1
                else:
                    value = get_default_method(domain) + increment - 1

                self.redis.set(key, value, callback=callback)
Beispiel #26
0
    def test_can_get_pages_per_domain(self):
        domain = DomainFactory.create()
        domain2 = DomainFactory.create()
        DomainFactory.create()

        PageFactory.create(domain=domain)
        PageFactory.create(domain=domain)
        PageFactory.create(domain=domain2)
        PageFactory.create(domain=domain2)
        PageFactory.create(domain=domain2)

        pages_per_domain = Domain.get_pages_per_domain(self.db)

        expect(pages_per_domain).to_be_like({domain.id: 2, domain2.id: 3})
Beispiel #27
0
        def handle(has_key):
            domain = domain_name
            if domain and not isinstance(domain, Domain):
                domain = Domain.get_domain_by_name(domain_name, self.db)

            if has_key:
                self.redis.incrby(key, increment, callback=callback)
            else:
                if domain is None:
                    value = Page.get_page_count(self.db) + increment - 1
                else:
                    value = get_default_method(domain) + increment - 1

                self.redis.set(key, value, callback=callback)
Beispiel #28
0
    def get(self, key_name):
        current_page = int(self.get_argument('current_page', 1))
        page_size = int(self.get_argument('page_size', 10))
        domain_filter = self.get_argument('domain_filter', None)
        page_filter = self.get_argument('page_filter', None)

        domain = None
        if domain_filter is not None:
            domain = Domain.get_domain_by_name(domain_filter, self.db)
            if not domain:
                self.set_status(404,
                                self._('Domain %s not found') % domain_filter)
                self.finish()
                return

        violations = self.application.violation_definitions
        if key_name not in violations:
            self.set_status(404, self._('Invalid violation key %s') % key_name)
            self.finish()
            return

        violation_title = violations[key_name]['title']
        key_id = violations[key_name]['key'].id

        violation = yield self.application.search_provider.get_by_violation_key_name(
            key_id=key_id,
            current_page=current_page,
            page_size=page_size,
            domain=domain,
            page_filter=page_filter,
        )

        if 'error' in violation:
            self.set_status(violation['error']['status_code'],
                            violation['error']['reason'])
            self.finish()
            return

        if 'reviewsCount' not in violation:
            if not domain and not page_filter:
                violation['reviewsCount'] = Review.count_by_violation_key_name(
                    self.db, key_id)
            else:
                violation['reviewsCount'] = None

        violation['title'] = violation_title

        self.write_json(violation)
        self.finish()
Beispiel #29
0
    def get(self, domain_name):
        domain = Domain.get_domain_by_name(domain_name, self.db)

        if not domain:
            self.set_status(404, self._('Domain %s not found') % domain_name)
            return

        result = self.girl.get('domains_details') or []
        data = next((l for l in result if l['name'] == domain_name), None)

        if not data:
            self.set_status(404, self._('Domain %s not found') % domain_name)
            return

        page_count = data.get('pageCount', 0)
        review_count = data.get('reviewCount', 0)
        violation_count = data.get('violationCount', 0)
        error_percentage = data.get('errorPercentage', 0)
        response_time_avg = data.get('averageResponseTime', 0)
        review_percentage = data.get('reviewPercentage', 0)

        domain_json = {
            "id": domain.id,
            "name": domain.name,
            "url": domain.url,
            "pageCount": page_count,
            "reviewCount": review_count,
            "violationCount": violation_count,
            "reviewPercentage": review_percentage,
            "is_active": domain.is_active,
            "errorPercentage": error_percentage,
            "averageResponseTime": response_time_avg,
            "homepageId": "",
            "homepageReviewId": "",
        }

        homepage = domain.get_homepage(self.db)

        if homepage:
            if homepage.uuid:
                domain_json["homepageId"] = str(homepage.uuid)
            if homepage.last_review_uuid:
                domain_json["homepageReviewId"] = str(
                    homepage.last_review_uuid)

        self.write_json(domain_json)
Beispiel #30
0
    def get(self, domain_name):
        domain = Domain.get_domain_by_name(domain_name, self.db)

        if not domain:
            self.set_status(404, self._('Domain %s not found') % domain_name)
            return

        violations_per_day = domain.get_violations_per_day(self.db)

        domain_json = {
            "id": domain.id,
            "name": domain.name,
            "url": domain.url,
            "violations": violations_per_day
        }

        self.write_json(domain_json)
Beispiel #31
0
    def test_can_get_pages_per_domain(self):
        domain = DomainFactory.create()
        domain2 = DomainFactory.create()
        DomainFactory.create()

        PageFactory.create(domain=domain)
        PageFactory.create(domain=domain)
        PageFactory.create(domain=domain2)
        PageFactory.create(domain=domain2)
        PageFactory.create(domain=domain2)

        pages_per_domain = Domain.get_pages_per_domain(self.db)

        expect(pages_per_domain).to_be_like({
            domain.id: 2,
            domain2.id: 3
        })
Beispiel #32
0
    def get(self, domain_name):
        domain = Domain.get_domain_by_name(domain_name, self.db)

        if not domain:
            self.set_status(404, 'Domain %s not found' % domain_name)
            return

        violations_per_day = domain.get_violations_per_day(self.db)

        domain_json = {
            "id": domain.id,
            "name": domain.name,
            "url": domain.url,
            "violations": violations_per_day
        }

        self.write_json(domain_json)
Beispiel #33
0
    def increment_count(self, key, domain_name, get_default_method, increment=1):
        key = '%s-%s' % (self.get_domain_name(domain_name), key)

        has_key = self.has_key(key)

        domain = domain_name
        if domain and not isinstance(domain, Domain):
            domain = Domain.get_domain_by_name(domain_name, self.db)

        if has_key:
            self.redis.incrby(key, increment)
        else:
            if domain is None:
                value = Page.get_page_count(self.db) + increment - 1
            else:
                value = get_default_method(domain) + increment - 1

            self.redis.set(key, value)
Beispiel #34
0
    def add_domain(cls, url, db, publish_method, config, girl,
                   default_violations_values, violation_definitions, cache):

        from holmes.models import Domain, DomainsViolationsPrefs
        from holmes.material import expire_materials

        domain_name, domain_url = get_domain_from_url(url)

        domains = db.query(Domain).filter(
            or_(Domain.name == domain_name,
                Domain.name == domain_name.rstrip('/'),
                Domain.name == "%s/" % domain_name)).all()

        if not domains:
            domain = None
        else:
            domain = domains[0]

        if not domain:
            url_hash = hashlib.sha512(domain_url).hexdigest()
            domain = Domain(url=domain_url,
                            url_hash=url_hash,
                            name=domain_name)
            db.add(domain)
            db.flush()

            expire_materials(girl)

            publish_method(
                dumps({
                    'type': 'new-domain',
                    'domainUrl': str(domain_url)
                }))

            keys = default_violations_values.keys()

            DomainsViolationsPrefs.insert_default_violations_values_for_domain(
                db, domain, keys, violation_definitions, cache)

            from holmes.models import Limiter
            connections = config.DEFAULT_NUMBER_OF_CONCURRENT_CONNECTIONS
            Limiter.add_or_update_limiter(db, domain_url, connections)

        return domain
Beispiel #35
0
    def test_can_get_violations_per_domain(self):
        domain = DomainFactory.create()
        domain2 = DomainFactory.create()
        DomainFactory.create()

        page = PageFactory.create(domain=domain)
        page2 = PageFactory.create(domain=domain)
        page3 = PageFactory.create(domain=domain2)
        page4 = PageFactory.create(domain=domain2)
        page5 = PageFactory.create(domain=domain2)

        ReviewFactory.create(domain=domain,
                             page=page,
                             number_of_violations=40,
                             is_active=True,
                             is_complete=True)
        ReviewFactory.create(domain=domain,
                             page=page2,
                             number_of_violations=20,
                             is_active=True,
                             is_complete=True)
        ReviewFactory.create(domain=domain2,
                             page=page3,
                             number_of_violations=15,
                             is_active=True,
                             is_complete=True)
        ReviewFactory.create(domain=domain2,
                             page=page4,
                             number_of_violations=25,
                             is_active=True,
                             is_complete=True)
        ReviewFactory.create(domain=domain2,
                             page=page5,
                             number_of_violations=50,
                             is_active=True,
                             is_complete=True)

        violations_per_domain = Domain.get_violations_per_domain(self.db)

        expect(violations_per_domain).to_be_like({
            domain.id: 60,
            domain2.id: 90
        })
Beispiel #36
0
    def get(self, domain_name):
        domain = Domain.get_domain_by_name(domain_name, self.db)

        if not domain:
            self.set_status(404, 'Domain %s not found' % domain_name)
            return

        page_count = yield self.cache.get_page_count(domain)
        review_count = yield self.cache.get_active_review_count(domain)
        violation_count = yield self.cache.get_violation_count(domain)

        bad_request_count = yield self.cache.get_bad_request_count(domain)
        good_request_count = yield self.cache.get_good_request_count(domain)
        total_request_count = good_request_count + bad_request_count
        if total_request_count > 0:
            error_percentage = round(float(bad_request_count) / total_request_count * 100, 2)
        else:
            error_percentage = 0

        response_time_avg = yield self.cache.get_response_time_avg(domain)

        status_code_info = Request.get_status_code_info(domain_name, self.db)

        if page_count > 0:
            review_percentage = round(float(review_count) / page_count * 100, 2)
        else:
            review_percentage = 0

        domain_json = {
            "id": domain.id,
            "name": domain.name,
            "url": domain.url,
            "pageCount": page_count,
            "reviewCount": review_count,
            "violationCount": violation_count,
            "reviewPercentage": review_percentage,
            "is_active": domain.is_active,
            "statusCodeInfo": status_code_info,
            "errorPercentage": error_percentage,
            "averageResponseTime": response_time_avg,
        }

        self.write_json(domain_json)
Beispiel #37
0
    def get(self, key_name):
        current_page = int(self.get_argument("current_page", 1))
        page_size = int(self.get_argument("page_size", 10))
        domain_filter = self.get_argument("domain_filter", None)
        page_filter = self.get_argument("page_filter", None)

        domain = None
        if domain_filter is not None:
            domain = Domain.get_domain_by_name(domain_filter, self.db)
            if not domain:
                self.set_status(404, self._("Domain %s not found") % domain_filter)
                self.finish()
                return

        violations = self.application.violation_definitions
        if key_name not in violations:
            self.set_status(404, self._("Invalid violation key %s") % key_name)
            self.finish()
            return

        violation_title = violations[key_name]["title"]
        key_id = violations[key_name]["key"].id

        violation = yield self.application.search_provider.get_by_violation_key_name(
            key_id=key_id, current_page=current_page, page_size=page_size, domain=domain, page_filter=page_filter
        )

        if "error" in violation:
            self.set_status(violation["error"]["status_code"], violation["error"]["reason"])
            self.finish()
            return

        if "reviewsCount" not in violation:
            if not domain and not page_filter:
                violation["reviewsCount"] = Review.count_by_violation_key_name(self.db, key_id)
            else:
                violation["reviewsCount"] = None

        violation["title"] = violation_title

        self.write_json(violation)
        self.finish()
Beispiel #38
0
    def get_next_job_list(cls, db, expiration, current_page=1, page_size=200):
        from holmes.models import Domain

        lower_bound = (current_page - 1) * page_size
        upper_bound = lower_bound + page_size

        active_domains = Domain.get_active_domains(db)
        active_domains_ids = [item.id for item in active_domains]

        pages_query = db \
            .query(
                Page.uuid,
                Page.url,
                Page.score,
                Page.last_review_date
            ) \
            .filter(Page.domain_id.in_(active_domains_ids)) \
            .order_by(Page.score.desc())

        return pages_query[lower_bound:upper_bound]
Beispiel #39
0
    def post(self, domain_name):
        if not self.validate_superuser():
            return

        domain = Domain.get_domain_by_name(domain_name, self.db)

        if not domain:
            self.set_status(404, self._('Domain %s not found') % domain_name)
            return

        post_data = loads(self.request.body)

        DomainsViolationsPrefs.update_by_domain(
            self.db, self.cache, domain, post_data
        )

        self.write_json({
            'reason': self._('Preferences successfully saved!'),
            'description': self._('Preferences successfully saved!')
        })
Beispiel #40
0
    def get(self, domain_name, key_category_id):
        domain = Domain.get_domain_by_name(domain_name, self.db)

        if not domain:
            self.set_status(404, self._('Domain %s not found') % domain_name)
            return

        key_category = KeysCategory.get_by_id(self.db, key_category_id)

        if not key_category:
            self.set_status(
                404,
                self._('Key category %s not found') % key_category_id)
            return

        violation_defs = self.application.violation_definitions

        top_violations = self.girl.get(
            'top_violations_in_category_for_domains') or {}

        violations = []
        for top_violation in top_violations.get(domain_name,
                                                {}).get(key_category.id, []):
            violations.append({
                'title':
                self._(violation_defs[top_violation['key_name']]['title']),
                'count':
                top_violation['count'],
                'key':
                top_violation['key_name'],
            })

        result = {
            "domainId": domain.id,
            'domainName': domain.name,
            'domainURL': domain.url,
            'categoryId': key_category_id,
            'violations': violations
        }

        self.write_json(result)
Beispiel #41
0
    def insert_default_violations_values_for_all_domains(
            cls, db, default_violations_values, violation_definitions, cache):

        from holmes.models import Domain

        domains_violations_prefs = DomainsViolationsPrefs.get_domains_violations_prefs(
            db)

        domains = Domain.get_all_domains(db)

        for domain in domains:
            domain_data = domains_violations_prefs.get(domain.name, None)

            if domain_data:
                keys = set(default_violations_values.keys()) - set(
                    domain_data.keys())
            else:
                keys = default_violations_values.keys()

            DomainsViolationsPrefs.insert_default_violations_values_for_domain(
                db, domain, keys, violation_definitions, cache)
Beispiel #42
0
    def get_count(self, key, domain_name, expiration, get_count_method):
        cache_key = '%s-%s' % (self.get_domain_name(domain_name), key)

        count = self.redis.get(cache_key)

        if count is not None:
            return int(count)

        domain = domain_name
        if domain and not isinstance(domain, Domain):
            domain = Domain.get_domain_by_name(domain_name, self.db)

        if domain is None:
            count = Page.get_page_count(self.db)
        else:
            count = get_count_method(domain)

        cache_key = '%s-%s' % (self.get_domain_name(domain), key)

        self.redis.setex(cache_key, expiration, value=int(count))

        return int(count)
Beispiel #43
0
    def increment_count(self,
                        key,
                        domain_name,
                        get_default_method,
                        increment=1):
        key = '%s-%s' % (self.get_domain_name(domain_name), key)

        has_key = self.has_key(key)

        domain = domain_name
        if domain and not isinstance(domain, Domain):
            domain = Domain.get_domain_by_name(domain_name, self.db)

        if has_key:
            self.redis.incrby(key, increment)
        else:
            if domain is None:
                value = Page.get_page_count(self.db) + increment - 1
            else:
                value = get_default_method(domain) + increment - 1

            self.redis.set(key, value)
Beispiel #44
0
    def get(self, domain_name):
        term = self.get_argument('term', None)
        current_page = int(self.get_argument('current_page', 1))
        page_size = int(self.get_argument('page_size', 10))

        domain = Domain.get_domain_by_name(domain_name, self.db)

        if not domain:
            self.set_status(404, 'Domain %s not found' % domain_name)
            return

        reviews = domain.get_active_reviews(
            self.db,
            url_starts_with=term,
            current_page=current_page,
            page_size=page_size
        )

        if term:
            review_count = len(reviews)
        else:
            review_count = yield self.cache.get_active_review_count(domain)

        result = {
            'reviewCount': review_count,
            'pages': [],
        }

        for page in reviews:
            result['pages'].append({
                "url": page.url,
                "uuid": str(page.uuid),
                "violationCount": page.violations_count,
                "completedAt": page.last_review_date,
                "reviewId": str(page.last_review_uuid)
            })

        self.write_json(result)
Beispiel #45
0
    def test_can_get_violations_per_domain(self):
        domain = DomainFactory.create()
        domain2 = DomainFactory.create()
        DomainFactory.create()

        page = PageFactory.create(domain=domain)
        page2 = PageFactory.create(domain=domain)
        page3 = PageFactory.create(domain=domain2)
        page4 = PageFactory.create(domain=domain2)
        page5 = PageFactory.create(domain=domain2)

        ReviewFactory.create(domain=domain, page=page, number_of_violations=40, is_active=True, is_complete=True)
        ReviewFactory.create(domain=domain, page=page2, number_of_violations=20, is_active=True, is_complete=True)
        ReviewFactory.create(domain=domain2, page=page3, number_of_violations=15, is_active=True, is_complete=True)
        ReviewFactory.create(domain=domain2, page=page4, number_of_violations=25, is_active=True, is_complete=True)
        ReviewFactory.create(domain=domain2, page=page5, number_of_violations=50, is_active=True, is_complete=True)

        violations_per_domain = Domain.get_violations_per_domain(self.db)

        expect(violations_per_domain).to_be_like({
            domain.id: 60,
            domain2.id: 90
        })
Beispiel #46
0
    def get(self, domain_name):
        domain = Domain.get_domain_by_name(domain_name, self.db)

        if not domain:
            self.set_status(404, self._('Domain %s not found') % domain_name)
            return

        violation_defs = self.application.violation_definitions

        grouped_violations = self.girl.get(
            'violation_count_by_category_for_domains') or {}

        total = 0
        violations = []

        for item in grouped_violations.get(domain.id, []):
            key_name, key_category_id, count = item['key_name'], item[
                'category_id'], item['violation_count']
            violations.append({
                'categoryId':
                key_category_id,
                'categoryName':
                self._(violation_defs[key_name]['category']),
                'count':
                count
            })
            total += count

        result = {
            "domainId": domain.id,
            'domainName': domain.name,
            'domainURL': domain.url,
            'total': total,
            'violations': violations
        }

        self.write_json(result)
    def insert_default_violations_values_for_all_domains(
        cls, db, default_violations_values, violation_definitions, cache):

        from holmes.models import Domain

        domains_violations_prefs = DomainsViolationsPrefs.get_domains_violations_prefs(db)

        domains = Domain.get_all_domains(db)

        for domain in domains:
            domain_data = domains_violations_prefs.get(domain.name, None)

            if domain_data:
                keys = set(default_violations_values.keys()) - set(domain_data.keys())
            else:
                keys = default_violations_values.keys()

            DomainsViolationsPrefs.insert_default_violations_values_for_domain(
                db,
                domain,
                keys,
                violation_definitions,
                cache
            )
Beispiel #48
0
 def _verify_workers_limits(self, url, avg_links_per_page=10):
     active_domains = Domain.get_active_domains(self.db)
     return LimiterModel.has_limit_to_work(self.db, active_domains, url, avg_links_per_page)
Beispiel #49
0
    def load_all_domains_violations_prefs(self):
        from holmes.models import Domain

        for domain in Domain.get_all_domains(self.db):
            self.cache.get_domain_violations_prefs(domain.name)
Beispiel #50
0
    def load_all_domains_violations_prefs(self):
        from holmes.models import Domain

        for domain in Domain.get_all_domains(self.db):
            self.cache.get_domain_violations_prefs(domain.name)
Beispiel #51
0
 def get_domain(self, domain_name):
     domain = domain_name
     if domain and not isinstance(domain, Domain):
         domain = Domain.get_domain_by_name(domain_name, self.db)
     return domain
Beispiel #52
0
    def test_can_get_domain_by_name(self):
        domain = DomainFactory.create()

        loaded_domain = Domain.get_domain_by_name(domain.name, self.db)

        expect(loaded_domain.id).to_equal(domain.id)
Beispiel #53
0
 def get_domain(self, domain_name):
     domain = domain_name
     if domain and not isinstance(domain, Domain):
         domain = Domain.get_domain_by_name(domain_name, self.db)
     return domain
Beispiel #54
0
    def test_invalid_domain_returns_None(self):
        domain_name = 'domain-details.com'
        domain = Domain.get_domain_by_name(domain_name, self.db)

        expect(domain).to_be_null()
Beispiel #55
0
    def get_next_job(cls, db, expiration, cache, lock_expiration, avg_links_per_page=10):
        from holmes.models import Settings, Worker, Domain, Limiter  # Avoid circular dependency

        page = None
        lock = None
        settings = Settings.instance(db)
        workers = db.query(Worker).all()
        number_of_workers = len(workers)

        active_domains = Domain.get_active_domains(db)
        active_domains_ids = [item.id for item in active_domains]

        all_domains_pages_in_need_of_review = {}

        for domain_id in active_domains_ids:
            pages = db \
                .query(
                    Page.uuid,
                    Page.url,
                    Page.score,
                    Page.last_review_date
                ) \
                .filter(Page.domain_id == domain_id) \
                .order_by(Page.score.desc())[:number_of_workers]
            if pages:
                all_domains_pages_in_need_of_review[domain_id] = pages

        pages_in_need_of_review = []
        current_domain = 0
        while all_domains_pages_in_need_of_review:
            domains = all_domains_pages_in_need_of_review.keys()
            if current_domain >= len(domains):
                current_domain = 0

            domain_id = domains[current_domain]

            item = all_domains_pages_in_need_of_review[domain_id].pop(0)
            pages_in_need_of_review.append(item)

            if not all_domains_pages_in_need_of_review[domain_id]:
                del all_domains_pages_in_need_of_review[domain_id]

            current_domain += 1

        if not pages_in_need_of_review:
            return None

        if settings.lambda_score > 0 and settings.lambda_score > pages_in_need_of_review[0].score:
            cls.update_pages_score_by(settings, settings.lambda_score, db)

        for i in range(len(pages_in_need_of_review)):
            if not Limiter.has_limit_to_work(db, active_domains, pages_in_need_of_review[i].url, avg_links_per_page):
                continue

            lock = cache.has_next_job_lock(
                pages_in_need_of_review[i].url,
                lock_expiration
            )

            if lock is not None:
                page = pages_in_need_of_review[i]
                break

        if page is None:
            return None

        return {
            'page': str(page.uuid),
            'url': page.url,
            'score': page.score,
            'lock': lock
        }
Beispiel #56
0
    def fill_job_bucket(self,
                        expiration,
                        look_ahead_pages=1000,
                        avg_links_per_page=10.0):
        try:
            with Lock('next-job-fill-bucket-lock', redis=self.redis):
                logging.info('Refilling job bucket. Lock acquired...')
                expired_time = datetime.utcnow() - timedelta(
                    seconds=expiration)

                active_domains = Domain.get_active_domains(self.db)

                if not active_domains:
                    return

                active_domains_ids = [item.id for item in active_domains]

                limiter_buckets = self.get_limiter_buckets(
                    active_domains, avg_links_per_page)

                all_domains_pages_in_need_of_review = []

                for domain_id in active_domains_ids:
                    pages = self.db \
                        .query(
                            Page.uuid,
                            Page.url,
                            Page.score,
                            Page.last_review_date
                        ) \
                        .filter(Page.domain_id == domain_id) \
                        .filter(or_(
                            Page.last_review_date == None,
                            Page.last_review_date <= expired_time
                        ))[:look_ahead_pages]

                    if pages:
                        all_domains_pages_in_need_of_review.append(pages)

                logging.debug(
                    'Total of %d pages found to add to redis.' % (sum([
                        len(item)
                        for item in all_domains_pages_in_need_of_review
                    ])))

                item_count = int(self.redis.zcard('next-job-bucket'))
                current_domain = 0
                while item_count < look_ahead_pages and len(
                        all_domains_pages_in_need_of_review) > 0:
                    if current_domain >= len(
                            all_domains_pages_in_need_of_review):
                        current_domain = 0

                    item = all_domains_pages_in_need_of_review[
                        current_domain].pop(0)

                    has_limit = True
                    logging.debug('Available Limit Buckets: %s' %
                                  limiter_buckets)
                    for index, (limit,
                                available) in enumerate(limiter_buckets):
                        if limit.matches(item.url):
                            if available <= 0:
                                has_limit = False
                                break
                            limiter_buckets[index] = (limit, available - 1)

                    if has_limit:
                        self.add_next_job_bucket(item.uuid, item.url)
                        item_count += 1

                    # if there are not any more pages in this domain remove it from dictionary
                    if not all_domains_pages_in_need_of_review[current_domain]:
                        del all_domains_pages_in_need_of_review[current_domain]

                    current_domain += 1

                logging.debug('ADDED A TOTAL of %d ITEMS TO REDIS...' %
                              item_count)

        except LockTimeout:
            logging.info("Can't acquire lock. Moving on...")
Beispiel #57
0
    def test_invalid_domain_returns_None(self):
        domain_name = 'domain-details.com'
        domain = Domain.get_domain_by_name(domain_name, self.db)

        expect(domain).to_be_null()
Beispiel #58
0
    def test_can_get_domain_by_name(self):
        domain = DomainFactory.create()

        loaded_domain = Domain.get_domain_by_name(domain.name, self.db)

        expect(loaded_domain.id).to_equal(domain.id)