def _get_remote_projects(self): headers = {"Accept": "text/html"} # use a minimum of 30 seconds as timeout for remote server and # 60s when running as replica, because the list can be quite large # and the master might take a while to process it if self.xom.is_replica(): timeout = max(self.timeout, 60) else: timeout = max(self.timeout, 30) response = self.httpget( self.mirror_url, allow_redirects=True, extra_headers=headers, timeout=timeout) if response.status_code != 200: raise self.UpstreamError("URL %r returned %s %s", self.mirror_url, response.status_code, response.reason) page = HTMLPage(response.text, response.url) projects = set() baseurl = URL(response.url) basehost = baseurl.replace(path='') for link in page.links: newurl = URL(link.url) # remove trailing slashes, so basename works correctly newurl = newurl.asfile() if not newurl.is_valid_http_url(): continue if not newurl.path.startswith(baseurl.path): continue if basehost != newurl.replace(path=''): continue projects.add(newurl.basename) return projects
def _get_remote_projects(self): headers = {"Accept": "text/html"} response = self.httpget(self.mirror_url, allow_redirects=True, extra_headers=headers) if response.status_code != 200: raise self.UpstreamError("URL %r returned %s", self.mirror_url, response.status_code) page = HTMLPage(response.text, response.url) projects = set() baseurl = URL(response.url) basehost = baseurl.replace(path='') for link in page.links: newurl = URL(link.url) # remove trailing slashes, so basename works correctly newurl = newurl.asfile() if not newurl.is_valid_http_url(): continue if not newurl.path.startswith(baseurl.path): continue if basehost != newurl.replace(path=''): continue projects.add(newurl.basename) return projects