예제 #1
0
 def getExternalFile(self, url, output_path, headers=None):
     host = urlsplit(url).netloc
     if output_path:
         if not headers:
             headers = self.HEADERS
         else:
             headers.update(self.HEADERS)
         session = self.getExternalSession(host)
         r = session.get(url, headers=headers, stream=True, verify=certifi.where())
         if r.status_code != 200:
             file_error = "File transfer failed: [%s]" % output_path
             url_error = 'HTTP GET Failed for url: %s' % url
             host_error = "Host %s responded:\n\n%s" % (urlsplit(url).netloc, r.text)
             raise RuntimeError('%s\n\n%s\n%s' % (file_error, url_error, host_error))
         else:
             total = 0
             start = datetime.datetime.now()
             logging.debug("Transferring file %s to %s" % (url, output_path))
             with open(output_path, 'wb') as data_file:
                 for chunk in r.iter_content(chunk_size=DEFAULT_CHUNK_SIZE):
                     data_file.write(chunk)
                     total += len(chunk)
             elapsed = datetime.datetime.now() - start
             summary = get_transfer_summary(total, elapsed)
             logging.info("File [%s] transfer successful. %s" % (output_path, summary))
             return output_path, r
예제 #2
0
    def getExternalUrl(self, url):
        urlparts = urlsplit(url)
        if urlparts.path.startswith(self.store_base):
            path_only = url.startswith(self.store_base)
            server_uri = urlparts.scheme + "://" + urlparts.netloc
            if server_uri == self.store.get_server_uri() or path_only:
                url = ''.join([self.store.get_server_uri(), url]) if path_only else url
        else:
            if not (urlparts.scheme and urlparts.netloc):
                urlparts = urlsplit(self.catalog.get_server_uri())
                server_uri = urlparts.scheme + "://" + urlparts.netloc
                url = ''.join([server_uri, url])

        return url
예제 #3
0
 def getHatracStore(self, url):
     urlparts = urlsplit(url)
     if not urlparts.path.startswith(self.store_base):
         return None
     if url.startswith(self.store_base):
         return self.store
     else:
         serverURI = urlparts.scheme + "://" + urlparts.netloc
         if serverURI == self.store.get_server_uri():
             return self.store
         else:
             # do we need to deal with the possibility of a fully qualified URL referencing a different hatrac host?
             raise DerivaDownloadConfigurationError(
                 "Got a reference to a Hatrac server [%s] that is different from the expected Hatrac server: %s" % (
                     serverURI, self.store.get_server_uri))
예제 #4
0
    def headForHeaders(self, url, raise_for_status=False):
        store = self.getHatracStore(url)
        if store:
            r = store.head(url, headers=self.HEADERS)
            if raise_for_status:
                r.raise_for_status()
            headers = r.headers
        else:
            session = self.getExternalSession(urlsplit(url).hostname)
            r = session.head(url, headers=self.HEADERS)
            if raise_for_status:
                r.raise_for_status()
            headers = r.headers

        return headers
예제 #5
0
    def process(self):
        target_url_param = "target_url"
        target_url = self.parameters.get(target_url_param)
        if not target_url:
            raise DerivaDownloadConfigurationError(
                "%s is missing required parameter '%s' from %s" %
                (self.__class__.__name__, target_url_param,
                 PROCESSOR_PARAMS_KEY))
        if self.envars:
            target_url = target_url.format(**self.envars)
        target_url = target_url.strip(" ")
        upr = urlsplit(target_url, "https")
        self.scheme = upr.scheme.lower()
        self.netloc = upr.netloc
        self.path = upr.path.strip("/")
        host = urlunsplit((self.scheme, upr.netloc, "", "", ""))
        creds = get_credential(host)
        if not creds:
            logging.info("Unable to locate credential entry for: %s" % host)
        self.credentials = creds or dict()

        return self.outputs