Ejemplo n.º 1
0
 def _check_redirect_to_other_host(self, url):
     redirect_url = self._redirect_url()
     if not redirect_url:
         return
     if url_utils.hostname_from_url(url) == url_utils.hostname_from_url(redirect_url):
         return
     msg = "%s redirects to other host %s" % (url, redirect_url)
     msg += "\nPlease provide non-redirecting URL"
     error_utils.exit_with_message(msg)
Ejemplo n.º 2
0
 def _analyze(self, link):
     base_url_host = url_utils.hostname_from_url(self.base_url)
     if self.base_href:
         link = url_utils.make_absolute_url(self.base_href, link)
     link_host = url_utils.hostname_from_url(link)
     if not link_host or link_host == base_url_host:
         link = url_utils.make_absolute_url(self.base_url, link)
         link = url_utils.relative_url(link)
         return "internal", link
     else:
         return "external", link
Ejemplo n.º 3
0
 def _analyze(self, link):
     link = url_utils.prepend_missing_scheme(link, self.base_url)
     if self.base_href_tag:
         link = url_utils.make_absolute_url(self.base_href_tag, link)
     link_host = url_utils.hostname_from_url(link)
     base_url_host = url_utils.hostname_from_url(self.base_url)
     if not link_host or link_host == base_url_host:
         link = url_utils.make_absolute_url(self.base_url, link)
         link = url_utils.relative_url(link)
         return "internal", link
     else:
         return "external", link
Ejemplo n.º 4
0
 def __init__(self, page_start, sqlite_file, pages_list_file,
              config_to_save):
     self.page_start = page_start
     self.page_host = url_utils.hostname_from_url(page_start)
     self.sqlite_file = sqlite_file
     self.pages_list = self._parse_pages_list_file(
         pages_list_file) if pages_list_file else False
     self.config_to_save = config_to_save
     self.conn = None
    def _get_resource_id(self, url, is_truncated):
        page_host = url_utils.hostname_from_url(self.url)
        relative_url = url_utils.internal_relative_url(url, page_host)
        if relative_url is False:
            is_external = 1
        else:
            url = relative_url
            is_external = 0

        c = self.conn.cursor()
        c.execute(
            "SELECT id FROM devtools_resource WHERE url = ? AND is_truncated = ? AND is_external = ?",
            (url, is_truncated, is_external))
        result = c.fetchone()
        if result:
            return result[0]
        c.execute(
            "INSERT INTO devtools_resource (url, is_truncated, is_external) VALUES (?, ?, ?)",
            (url, is_truncated, is_external))
        return c.lastrowid
Ejemplo n.º 6
0
 def __init__(self):
     self.page_host = url_utils.hostname_from_url(config.start_url)
     self.pages_list = self._parse_pages_list_file()
     self.conn = None