Example #1
0
    def run(self, execution, page, validator, response):
        self.execution = execution
        self.page = page
        self.validator = validator

        if response.status >= 400:
            errors_json = { "page": response.url, "errors": [] }
            errors_json['errors'].append({ "error": response.status })

            result = Result(description=errors_json, execution=self.execution, page=self.page, validator=self.validator)
            result.save()
Example #2
0
    def run(self, execution, page, validator):
        self.execution = execution
        self.page = page
        self.validator = validator

        errors_json = { "page": self.response.url, "errors": [] }

        document, errors = tidy_document(self.response.body, options={'numeric-entities': 1})
        for error in errors.split('\n'):
            errors_json['errors'].append({ "error": error })

        result = Result(description=errors_json, execution=self.execution, page=self.page, validator=self.validator)
        result.save()
Example #3
0
    def handleError(self, reason):
        with transaction.commit_manually():
            try:
                result = Result.objects.get(execution=self.execution, page=self.page, validator=self.validator)
                errors_json = result.description
            except:
                result = Result(execution=self.execution, page=self.page, validator=self.validator)
                errors_json = { "page": self.response.url, "errors": [] }

            errors_json['errors'].append({ "error": reason.getErrorMessage() })

            result.description = errors_json
            result.save()
            transaction.commit()
Example #4
0
    def process_exception(self, request, exception, spider):
        if 'BrokenLinksValidator' in spider.validators:
            validator = spider.validators['BrokenLinksValidator']
            page = spider.pages.get(request.headers['Referer'])
            try:
                result = Result.objects.get(execution=spider.execution, page=page, validator=validator)
            except ObjectDoesNotExist:
                errors_json = { "page": request.url, "errors": [] }
                result = Result(description=errors_json, execution=spider.execution, page=page, validator=validator)

            result.description['errors'].append({ "error": str(exception) })
            result.save()

        return None
Example #5
0
    def process_response(self, request, response, spider):
        domain = urlparse(response.url).netloc
        if 'W3CValidator' in spider.validators and domain in spider.allow_domains:
            validator = spider.validators['W3CValidator']
            page = spider.pages.get(response.url)

            errors_json = { "page": response.url, "errors": [] }

            document, errors = tidy_document(response.body, options={ 'numeric-entities': 1 })
            for error in errors.split('\n'):
#                if error not in spider.errors:
                errors_json['errors'].append({ "error": error })
#                    spider.errors[error] = True

            result = Result(description=errors_json, execution=spider.execution, page=page, validator=validator)
            result.save()

        return response
Example #6
0
    def process_response(self, request, response, spider):
        domain = urlparse(response.url).netloc
        if 'DownloadTimeValidator' in spider.validators and domain in spider.allow_domains:
            validator = spider.validators['DownloadTimeValidator']
            page = spider.pages.get(response.url)

            start_time = request.meta['__start_time']
            end_time = time()

            limit_time = float(spider.parameters["DownloadTimeValidator"]["fields"]["time"])
            if end_time - start_time > limit_time:
                log.msg("R1 " + request.url + "   " + str(end_time) + " - " + str(request.meta['__start_time']) + " = " + str(end_time - request.meta['__start_time']), level=log.INFO)
                errors_json = { "page": response.url, "errors": [] }
                errors_json['errors'].append({ "error": "Download Time exceded (> " + str(limit_time) + ")"})
                result = Result(description=errors_json, execution=spider.execution, page=page, validator=validator)
                result.save()

        return response
Example #7
0
    def process_response(self, request, response, spider):
        domain = urlparse(response.url).netloc
        if 'SpellingValidator' in spider.validators and domain in spider.allow_domains:
            if 'text/html' in response.headers['Content-Type']:
                validator = spider.validators['SpellingValidator']
                page = spider.pages.get(response.url)

                cleaner = Cleaner(scripts=True, embedded=True, meta=True, page_structure=True, links=True, style=True, processing_instructions=True, annoying_tags=True,
                        remove_tags = ['a', 'ul', 'li', 'table', 'tr', 'td', 'div', 'span', 'img', 'p', 'h1', 'h2', 'h3', 'strong', 'body', 'br'])
                text = cleaner.clean_html(response.body) 

                lang = spider.parameters["SpellingValidator"]["fields"]["language"]
                errors = self.check_errors({lang, "en_GB"}, text)
                errors_json = { "page": response.url, "errors": [] }
                for error in errors:
                    errors_json['errors'].append({ "error": error })

                result = Result(description=errors_json, execution=spider.execution, page=page, validator=validator)
                result.save()

        return response
Example #8
0
    def dataReceived(self, body):
        p = ImageFile.Parser()
        p.feed(body)
        (self.width, self.height) = p.image.size
#        log.msg("B1 " + " " + self.url + " " + str(self.filesize) + " " + str(p.image.size), level=log.INFO)
        try:
            result = Result.objects.get(execution=self.spider.execution, page=self.page, validator=self.validator)
        except ObjectDoesNotExist:
            errors_json = { "page": self.page.url, "errors": [] }
            result = Result(description=errors_json, execution=self.spider.execution, page=self.page, validator=self.validator)

        width = int(self.spider.parameters["ImagesValidator"]["fields"]["width"])
        height = int(self.spider.parameters["ImagesValidator"]["fields"]["height"])
        size = int(self.spider.parameters["ImagesValidator"]["fields"]["size"])

        if self.filesize > size * 1024:
            result.description['errors'].append({ "url": self.url, "error": "TamaƱo del fichero demasiado grande." })
        if self.width > width or self.height > height:
            result.description['errors'].append({ "url": self.url, "error": "Imagen demasiado grande." })

        if len(result.description["errors"]) > 0:
            result.save()