Esempio n. 1
0
    def get(self,
            target_file,
            start_info=None,
            use_cache=False,
            save_properties=False,
            auth=None):
        """Download the URL and save it to `target_file`

        Appropriate hook functions are automatically invoked

        - start_info: message to print (LOG.info) when transfer begins, cache is
          validated or or an error occurs; it can also be a callable.

        - use_cache: if True, download will not actually happen if `target_file`
          is already available *and* its ETag is not changed in the server. For
          this to work, the previous call to `get` must have passed
          `save_properties=True` (in order to save the ETag).

        - save_properties: if True, a file named .$target_file.urlprops
          containing request headers and other metadata will be saved along side
          the target_file. (This is required for `use_cache` in future
          downloads)
          
        - auth: (username, password) -- optional http basic auth data

        Return True only if the download actually happened.
        """
        def invoke_start_info(status):
            if not start_info:
                return
            if six.callable(start_info):
                i = start_info(status)
            else:
                i = start_info.format(status=status)
            if i:
                LOG.info(i)

        if not P.exists(target_file):
            use_cache = False

        urlprops = URLProperties(target_file)
        props = urlprops.load()
        if props:
            # write back the new value of `last_attempt_utc` *now* so we don't
            # have to deal with it when an exception arises later.
            # last_attempt_utc is simpy the time of the last download attempt
            props.custom['last_attempt_utc'] = BareDateTime.to_string(
                datetime.utcnow())
            urlprops.save(props.headers, props.custom)

        req = None
        if use_cache and props:
            # Enable the cache header `If-None-Match`
            etag = props.headers.get('ETag', props.headers.get('etag', None))
            if etag:
                req = six.moves.Request(self.url,
                                        headers={'If-None-Match': etag})
            else:
                LOG.warn('no ETag in last headers: %s', props.headers)
                req = six.moves.Request(self.url)
        else:
            req = six.moves.Request(self.url)

        if auth:
            username, password = auth
            req.add_header('Authorization',
                           _create_http_basicauth_header(username, password))

        # Set User-Agent
        # XXX: (in 2.6) urllib2.py does not expose its default user-agent string
        # so we copy-paste that code here (from urllib2.OpenerDirector)
        urllib2_user_agent = "Python-urllib/%s" % six.moves.urllib_version
        pypm_user_agent = get_user_agent(urllib2_user_agent)
        assert pypm_user_agent
        req.add_header('User-Agent', pypm_user_agent)

        try:
            u = six.moves.urlopen(req)
        except six.moves.HTTPError as e:
            if e.code == 304 and use_cache:
                invoke_start_info('Hit')
                return False  # file not changed in server
            else:
                invoke_start_info('Get')
                raise
        else:
            invoke_start_info('Get')

        bs = 1024 * 8  # block size; from urllib.py:urlretrieve
        headers = u.info()

        # detect total size of the file to be downloaded
        if 'Content-Length' in headers:
            total_size = int(headers['Content-Length'])
            assert total_size >= 0
        else:
            total_size = None

        total_bytes_transferred = 0
        chunk_size = 0

        # Hook 1: initialize
        self.hook_initialize(total_size)

        with open(target_file + '.part', 'wb') as f:
            while True:
                if total_size:
                    assert total_bytes_transferred <= total_size, str(
                        (total_bytes_transferred, total_size))

                # Hook 2: transferring
                self.hook_transferring(chunk_size, total_bytes_transferred)

                data = u.read(bs)
                chunk_size = len(data)

                if len(data) == 0:
                    break

                f.write(data)
                total_bytes_transferred += chunk_size

        sh.mv(target_file + '.part', target_file)

        # Hook 3: completed
        self.hook_completed()

        # save the new headers and other properties
        if save_properties:
            custom_dict = props and props.custom or {
            }  # reuse existing props.custom
            custom_dict['last_attempt_utc'] = BareDateTime.to_string(
                datetime.utcnow())
            urlprops.save(dict(headers), custom_dict)

        return True
Esempio n. 2
0
    def get(self,
            target_file,
            start_info=None,
            use_cache=False,
            save_properties=False,
            auth=None):
        """Download the URL and save it to `target_file`

        Appropriate hook functions are automatically invoked

        - start_info: message to print (LOG.info) when transfer begins, cache is
          validated or or an error occurs; it can also be a callable.

        - use_cache: if True, download will not actually happen if `target_file`
          is already available *and* its ETag is not changed in the server. For
          this to work, the previous call to `get` must have passed
          `save_properties=True` (in order to save the ETag).

        - save_properties: if True, a file named .$target_file.urlprops
          containing request headers and other metadata will be saved along side
          the target_file. (This is required for `use_cache` in future
          downloads)
          
        - auth: (username, password) -- optional http basic auth data

        Return True only if the download actually happened.
        """
        def invoke_start_info(status):
            if not start_info:
                return
            if six.callable(start_info):
                i = start_info(status)
            else:
                i = start_info.format(status=status)
            if i:
                LOG.info(i)
            
        if not P.exists(target_file):
            use_cache = False

        urlprops = URLProperties(target_file)
        props = urlprops.load()
        if props:
            # write back the new value of `last_attempt_utc` *now* so we don't
            # have to deal with it when an exception arises later.
            # last_attempt_utc is simpy the time of the last download attempt
            props.custom['last_attempt_utc'] = BareDateTime.to_string(datetime.utcnow())
            urlprops.save(props.headers, props.custom)

        req = None
        if use_cache and props:
            # Enable the cache header `If-None-Match`
            etag = props.headers.get('ETag', props.headers.get('etag', None))
            if etag:
                req = six.moves.Request(self.url, headers={'If-None-Match': etag})
            else:
                LOG.warn('no ETag in last headers: %s', props.headers)
                req = six.moves.Request(self.url)
        else:
            req = six.moves.Request(self.url)
            
        if auth:
            username, password = auth
            req.add_header('Authorization', _create_http_basicauth_header(
                username, password
            ))

        # Set User-Agent
        # XXX: (in 2.6) urllib2.py does not expose its default user-agent string
        # so we copy-paste that code here (from urllib2.OpenerDirector)
        urllib2_user_agent = "Python-urllib/%s" % six.moves.urllib_version
        pypm_user_agent = get_user_agent(urllib2_user_agent)
        assert pypm_user_agent
        req.add_header('User-Agent', pypm_user_agent)

        try:
            u = six.moves.urlopen(req)
        except six.moves.HTTPError as e:
            if e.code == 304 and use_cache:
                invoke_start_info('Hit')
                return False # file not changed in server
            else:
                invoke_start_info('Get')
                raise
        else:
            invoke_start_info('Get')

        bs = 1024*8 # block size; from urllib.py:urlretrieve
        headers = u.info()

        # detect total size of the file to be downloaded
        if 'Content-Length' in headers:
            total_size = int(headers['Content-Length'])
            assert total_size >= 0
        else:
            total_size = None

        total_bytes_transferred = 0
        chunk_size = 0

        # Hook 1: initialize
        self.hook_initialize(total_size)

        with open(target_file + '.part', 'wb') as f:
            while True:
                if total_size:
                    assert total_bytes_transferred <= total_size, str((
                        total_bytes_transferred, total_size))

                # Hook 2: transferring
                self.hook_transferring(chunk_size, total_bytes_transferred)
                
                data = u.read(bs)
                chunk_size = len(data)
                
                if len(data) == 0:
                    break
                
                f.write(data)
                total_bytes_transferred += chunk_size

        sh.mv(target_file + '.part', target_file)

        # Hook 3: completed
        self.hook_completed()
        
        # save the new headers and other properties
        if save_properties:
            custom_dict = props and props.custom or {} # reuse existing props.custom
            custom_dict['last_attempt_utc'] = BareDateTime.to_string(datetime.utcnow())
            urlprops.save(dict(headers), custom_dict)

        return True
Esempio n. 3
0
def _rollover_log(logfile, maxsize=(2<<20)):
    """Move $logfile to $logfile.old if its size exceeds `maxsize`"""
    if exists(logfile):
        filesize = os.stat(logfile)[stat.ST_SIZE]
        if filesize >= maxsize:
            sh.mv(logfile, logfile+'.old')
Esempio n. 4
0
def _rollover_log(logfile, maxsize=(2 << 20)):
    """Move $logfile to $logfile.old if its size exceeds `maxsize`"""
    if exists(logfile):
        filesize = os.stat(logfile)[stat.ST_SIZE]
        if filesize >= maxsize:
            sh.mv(logfile, logfile + '.old')