def get_file(filename=None): """ Get file name located within 'data' directory. """ directory = os.path.join("tests", "checker", "data") if filename: return str_text(os.path.join(directory, filename)) return str_text(directory)
def log_url(self, url_data): """ Log URL data in custom XML format. """ self.xml_starttag(u'urldata') if self.has_part('url'): self.xml_tag(u"url", str_text(url_data.base_url)) if url_data.name and self.has_part('name'): self.xml_tag(u"name", str_text(url_data.name)) if url_data.parent_url and self.has_part('parenturl'): attrs = { u'line': u"%s" % url_data.line, u'column': u"%s" % url_data.column, } self.xml_tag(u"parent", str_text(url_data.parent_url), attrs=attrs) if url_data.base_ref and self.has_part('base'): self.xml_tag(u"baseref", str_text(url_data.base_ref)) if self.has_part("realurl"): self.xml_tag(u"realurl", str_text(url_data.url)) if self.has_part("extern"): self.xml_tag(u"extern", u"%d" % (1 if url_data.extern else 0)) if url_data.dltime >= 0 and self.has_part("dltime"): self.xml_tag(u"dltime", u"%f" % url_data.dltime) if url_data.size >= 0 and self.has_part("dlsize"): self.xml_tag(u"dlsize", u"%d" % url_data.size) if url_data.checktime and self.has_part("checktime"): self.xml_tag(u"checktime", u"%f" % url_data.checktime) if self.has_part("level"): self.xml_tag(u"level", u"%d" % url_data.level) if url_data.info and self.has_part('info'): self.xml_starttag(u"infos") for info in url_data.info: self.xml_tag(u"info", info) self.xml_endtag(u"infos") if url_data.modified and self.has_part('modified'): self.xml_tag(u"modified", self.format_modified(url_data.modified)) if url_data.warnings and self.has_part('warning'): self.xml_starttag(u"warnings") for tag, data in url_data.warnings: attrs = {} if tag: attrs["tag"] = tag self.xml_tag(u"warning", data, attrs) self.xml_endtag(u"warnings") if self.has_part("result"): attrs = {} if url_data.result: attrs["result"] = url_data.result self.xml_tag(u"valid", u"%d" % (1 if url_data.valid else 0), attrs) self.xml_endtag(u'urldata') self.flush()
def direct(self, url, resultlines, parts=None, recursionlevel=0, confargs=None, url_encoding=None): """Check url with expected result.""" assert isinstance(url, str_text), repr(url) if confargs is None: confargs = {'recursionlevel': recursionlevel} else: confargs['recursionlevel'] = recursionlevel logargs = {'expected': resultlines} if parts is not None: logargs['parts'] = parts aggregate = get_test_aggregate(confargs, logargs) # initial URL has recursion level zero url_reclevel = 0 url_data = get_url_from(url, url_reclevel, aggregate, url_encoding=url_encoding) aggregate.urlqueue.put(url_data) linkcheck.director.check_urls(aggregate) diff = aggregate.config['logger'].diff if diff: l = [u"Differences found testing %s" % url] l.extend(x.rstrip() for x in diff[2:]) self.fail_unicode(str_text(os.linesep).join(l))
def __bytes__(self): """ Get URL info. @return: URL info, encoded with the output logger encoding @rtype: string """ s = str_text(self) return self.aggregate.config['logger'].encode(s)
def end_output (self, linknumber=-1, **kwargs): """ Stores differences between expected and result in self.diff. """ self.expected = self.normalize(self.expected) self.result = self.normalize(self.result) for line in difflib.unified_diff(self.expected, self.result, fromfile="expected", tofile="result", lineterm=""): if not isinstance(line, str_text): # The ---, +++ and @@ lines from diff format are ascii encoded. # Make them unicode. line = str_text(line, "ascii", "replace") self.diff.append(line)
def check_content(self): """Check content of URL. @return: True if content can be parsed, else False """ if self.do_check_content and self.valid: # check content and recursion try: if self.can_get_content(): self.aggregate.plugin_manager.run_content_plugins(self) if self.allows_recursion(): return True except tuple(ExcList): value = self.handle_exception() self.add_warning(_("could not get content: %(msg)s") % {"msg": str_text(value)}, tag=WARN_URL_ERROR_GETTING_CONTENT) return False
def file_test(self, filename, confargs=None): """Check <filename> with expected result in <filename>.result.""" url = self.get_url(filename) if confargs is None: confargs = {} logargs = {'expected': self.get_resultlines(filename)} aggregate = get_test_aggregate(confargs, logargs, logger=self.logger) url_data = get_url_from(url, 0, aggregate, extern=(0, 0)) aggregate.urlqueue.put(url_data) linkcheck.director.check_urls(aggregate) logger = aggregate.config['logger'] diff = logger.diff if diff: msg = str_text(os.linesep).join([url] + diff) self.fail_unicode(msg) if logger.stats.internal_errors: self.fail_unicode("%d internal errors occurred!" % logger.stats.internal_errors)
def handle_exception (self): """ An exception occurred. Log it and set the cache flag. """ etype, evalue = sys.exc_info()[:2] log.debug(LOG_CHECK, "Error in %s: %s %s", self.url, etype, evalue, exception=True) # note: etype must be the exact class, not a subclass if (etype in ExcNoCacheList) or \ (etype == socket.error and evalue.args[0]==errno.EBADF) or \ not evalue: # EBADF occurs when operating on an already socket self.caching = False # format unicode message "<exception name>: <error message>" errmsg = str_text(etype.__name__) uvalue = strformat.unicode_safe(evalue) if uvalue: errmsg += u": %s" % uvalue # limit length to 240 return strformat.limit(errmsg, length=240)
def local_check (self): """Local check function can be overridden in subclasses.""" log.debug(LOG_CHECK, "Checking %s", str_text(self)) # strict extern URLs should not be checked assert not self.extern[1], 'checking strict extern URL' # check connection log.debug(LOG_CHECK, "checking connection") try: self.check_connection() self.set_content_type() self.add_size_info() self.aggregate.plugin_manager.run_connection_plugins(self) except tuple(ExcList) as exc: value = self.handle_exception() # make nicer error msg for unknown hosts if isinstance(exc, socket.error) and exc.args[0] == -2: value = _('Hostname not found') elif isinstance(exc, UnicodeError): # idna.encode(host) failed value = _('Bad hostname %(host)r: %(msg)s') % {'host': self.host, 'msg': str_text(value)} self.set_result(unicode_safe(value), valid=False)
def local_check(self): """Disable content checks.""" log.debug(LOG_CHECK, "Checking %s", str_text(self)) pass
def indent(text, indent_string=" "): """Indent each line of text with the given indent string.""" lines = str_text(text).splitlines() return os.linesep.join("%s%s" % (indent_string, x) for x in lines)
def write_real(self, url_data): """Write url_data.url.""" self.write(self.part("realurl") + self.spaces("realurl")) self.writeln(str_text(url_data.url), color=self.colorreal)
def writeln(self, s=u"", **args): """ Write string to output descriptor plus a newline. """ self.write(u"%s%s" % (s, str_text(os.linesep)), **args)