Beispiel #1
0
    def get_remote_index_last_download_attempt_time(self, remote_repository):
        """Return the UTC datetime when the index file was last *attempted* to
        download

        The download may not have happened, however, due to unmodified ETag.

        If no index is available (as in, 'pypm sync' or an equivalent was never
        run in the first place), return None
        """
        original_index_file = join(
            dirname(self.get_local_index_path(remote_repository)),
            remote_repository.REMOTE_INDEX_FILENAME)
        urlprops = URLGetter.URLProperties(original_index_file).load()
        if urlprops:
            return BareDateTime.to_datetime(
                urlprops.custom['last_attempt_utc'])
Beispiel #2
0
    def get_remote_index_last_download_attempt_time(self, remote_repository):
        """Return the UTC datetime when the index file was last *attempted* to
        download

        The download may not have happened, however, due to unmodified ETag.

        If no index is available (as in, 'pypm sync' or an equivalent was never
        run in the first place), return None
        """
        original_index_file = P.join(
            P.dirname(self.get_local_index_path(remote_repository)),
            remote_repository.REMOTE_INDEX_FILENAME)
        urlprops = URLProperties(original_index_file).load()
        if urlprops:
            return BareDateTime.to_datetime(
                urlprops.custom['last_attempt_utc'])
Beispiel #3
0
    def get(self,
            target_file,
            start_info=None,
            use_cache=False,
            save_properties=False,
            auth=None):
        """Download the URL and save it to `target_file`

        Appropriate hook functions are automatically invoked

        - start_info: message to print (LOG.info) when transfer begins, cache is
          validated or or an error occurs; it can also be a callable.

        - use_cache: if True, download will not actually happen if `target_file`
          is already available *and* its ETag is not changed in the server. For
          this to work, the previous call to `get` must have passed
          `save_properties=True` (in order to save the ETag).

        - save_properties: if True, a file named .$target_file.urlprops
          containing request headers and other metadata will be saved along side
          the target_file. (This is required for `use_cache` in future
          downloads)
          
        - auth: (username, password) -- optional http basic auth data

        Return True only if the download actually happened.
        """
        def invoke_start_info(status):
            if not start_info:
                return
            if six.callable(start_info):
                i = start_info(status)
            else:
                i = start_info.format(status=status)
            if i:
                LOG.info(i)

        if not P.exists(target_file):
            use_cache = False

        urlprops = URLProperties(target_file)
        props = urlprops.load()
        if props:
            # write back the new value of `last_attempt_utc` *now* so we don't
            # have to deal with it when an exception arises later.
            # last_attempt_utc is simpy the time of the last download attempt
            props.custom['last_attempt_utc'] = BareDateTime.to_string(
                datetime.utcnow())
            urlprops.save(props.headers, props.custom)

        req = None
        if use_cache and props:
            # Enable the cache header `If-None-Match`
            etag = props.headers.get('ETag', props.headers.get('etag', None))
            if etag:
                req = six.moves.Request(self.url,
                                        headers={'If-None-Match': etag})
            else:
                LOG.warn('no ETag in last headers: %s', props.headers)
                req = six.moves.Request(self.url)
        else:
            req = six.moves.Request(self.url)

        if auth:
            username, password = auth
            req.add_header('Authorization',
                           _create_http_basicauth_header(username, password))

        # Set User-Agent
        # XXX: (in 2.6) urllib2.py does not expose its default user-agent string
        # so we copy-paste that code here (from urllib2.OpenerDirector)
        urllib2_user_agent = "Python-urllib/%s" % six.moves.urllib_version
        pypm_user_agent = get_user_agent(urllib2_user_agent)
        assert pypm_user_agent
        req.add_header('User-Agent', pypm_user_agent)

        try:
            u = six.moves.urlopen(req)
        except six.moves.HTTPError as e:
            if e.code == 304 and use_cache:
                invoke_start_info('Hit')
                return False  # file not changed in server
            else:
                invoke_start_info('Get')
                raise
        else:
            invoke_start_info('Get')

        bs = 1024 * 8  # block size; from urllib.py:urlretrieve
        headers = u.info()

        # detect total size of the file to be downloaded
        if 'Content-Length' in headers:
            total_size = int(headers['Content-Length'])
            assert total_size >= 0
        else:
            total_size = None

        total_bytes_transferred = 0
        chunk_size = 0

        # Hook 1: initialize
        self.hook_initialize(total_size)

        with open(target_file + '.part', 'wb') as f:
            while True:
                if total_size:
                    assert total_bytes_transferred <= total_size, str(
                        (total_bytes_transferred, total_size))

                # Hook 2: transferring
                self.hook_transferring(chunk_size, total_bytes_transferred)

                data = u.read(bs)
                chunk_size = len(data)

                if len(data) == 0:
                    break

                f.write(data)
                total_bytes_transferred += chunk_size

        sh.mv(target_file + '.part', target_file)

        # Hook 3: completed
        self.hook_completed()

        # save the new headers and other properties
        if save_properties:
            custom_dict = props and props.custom or {
            }  # reuse existing props.custom
            custom_dict['last_attempt_utc'] = BareDateTime.to_string(
                datetime.utcnow())
            urlprops.save(dict(headers), custom_dict)

        return True
Beispiel #4
0
    def get(self,
            target_file,
            start_info=None,
            use_cache=False,
            save_properties=False,
            auth=None):
        """Download the URL and save it to `target_file`

        Appropriate hook functions are automatically invoked

        - start_info: message to print (LOG.info) when transfer begins, cache is
          validated or or an error occurs; it can also be a callable.

        - use_cache: if True, download will not actually happen if `target_file`
          is already available *and* its ETag is not changed in the server. For
          this to work, the previous call to `get` must have passed
          `save_properties=True` (in order to save the ETag).

        - save_properties: if True, a file named .$target_file.urlprops
          containing request headers and other metadata will be saved along side
          the target_file. (This is required for `use_cache` in future
          downloads)
          
        - auth: (username, password) -- optional http basic auth data

        Return True only if the download actually happened.
        """
        def invoke_start_info(status):
            if not start_info:
                return
            if six.callable(start_info):
                i = start_info(status)
            else:
                i = start_info.format(status=status)
            if i:
                LOG.info(i)
            
        if not P.exists(target_file):
            use_cache = False

        urlprops = URLProperties(target_file)
        props = urlprops.load()
        if props:
            # write back the new value of `last_attempt_utc` *now* so we don't
            # have to deal with it when an exception arises later.
            # last_attempt_utc is simpy the time of the last download attempt
            props.custom['last_attempt_utc'] = BareDateTime.to_string(datetime.utcnow())
            urlprops.save(props.headers, props.custom)

        req = None
        if use_cache and props:
            # Enable the cache header `If-None-Match`
            etag = props.headers.get('ETag', props.headers.get('etag', None))
            if etag:
                req = six.moves.Request(self.url, headers={'If-None-Match': etag})
            else:
                LOG.warn('no ETag in last headers: %s', props.headers)
                req = six.moves.Request(self.url)
        else:
            req = six.moves.Request(self.url)
            
        if auth:
            username, password = auth
            req.add_header('Authorization', _create_http_basicauth_header(
                username, password
            ))

        # Set User-Agent
        # XXX: (in 2.6) urllib2.py does not expose its default user-agent string
        # so we copy-paste that code here (from urllib2.OpenerDirector)
        urllib2_user_agent = "Python-urllib/%s" % six.moves.urllib_version
        pypm_user_agent = get_user_agent(urllib2_user_agent)
        assert pypm_user_agent
        req.add_header('User-Agent', pypm_user_agent)

        try:
            u = six.moves.urlopen(req)
        except six.moves.HTTPError as e:
            if e.code == 304 and use_cache:
                invoke_start_info('Hit')
                return False # file not changed in server
            else:
                invoke_start_info('Get')
                raise
        else:
            invoke_start_info('Get')

        bs = 1024*8 # block size; from urllib.py:urlretrieve
        headers = u.info()

        # detect total size of the file to be downloaded
        if 'Content-Length' in headers:
            total_size = int(headers['Content-Length'])
            assert total_size >= 0
        else:
            total_size = None

        total_bytes_transferred = 0
        chunk_size = 0

        # Hook 1: initialize
        self.hook_initialize(total_size)

        with open(target_file + '.part', 'wb') as f:
            while True:
                if total_size:
                    assert total_bytes_transferred <= total_size, str((
                        total_bytes_transferred, total_size))

                # Hook 2: transferring
                self.hook_transferring(chunk_size, total_bytes_transferred)
                
                data = u.read(bs)
                chunk_size = len(data)
                
                if len(data) == 0:
                    break
                
                f.write(data)
                total_bytes_transferred += chunk_size

        sh.mv(target_file + '.part', target_file)

        # Hook 3: completed
        self.hook_completed()
        
        # save the new headers and other properties
        if save_properties:
            custom_dict = props and props.custom or {} # reuse existing props.custom
            custom_dict['last_attempt_utc'] = BareDateTime.to_string(datetime.utcnow())
            urlprops.save(dict(headers), custom_dict)

        return True
Beispiel #5
0
    def get(self,
            target_file,
            info=None,
            use_cache=False,
            save_properties=False,
            auth=None):
        """Download the URL and save it to `target_file`

        Appropriate hook functions are automatically invoked

        - info: message to print (LOG.info) if transfer actually happens

        - use_cache: if True, download will not actually happen if `target_file`
          is already available *and* its ETag is not changed in the server. For
          this to work effectively, the previous ETag must be made available in
          the properties file that is created when `save_properties` flag is set
          to True in the *past* download call.

        - save_properties: if True, a file named .$target_file.urlprops
          containing request headers and other metadata is saved along side the
          target_file. (This is required for `use_cache` in future downloads)
          
        - auth: (username, password) -- optional http basic auth data
        """
        assert target_file

        urlprops = self.URLProperties(target_file)
        props = urlprops.load()
        if props:
            # write back the new value for last_attempt_utc *now* so we don't
            # have to deal with when an exception arises later.
            props.custom['last_attempt_utc'] = BareDateTime.to_string(
                datetime.utcnow())
            urlprops.save(props.headers, props.custom)

        if use_cache and props:
            req = urllib2.Request(
                self.url, headers={'If-None-Match': props.headers['etag']})
        else:
            req = urllib2.Request(self.url)

        if auth:
            username, password = auth
            req.add_header('Authorization',
                           _create_http_basicauth_header(username, password))

        # Set User-Agent
        # XXX: urllib2.py does not expose its default user-agent string; so
        # so we copy-paste that code here (from urllib2.OpenerDirector)
        urllib2_user_agent = "Python-urllib/%s" % urllib2.__version__
        pypm_user_agent = get_user_agent(urllib2_user_agent)
        req.add_header('User-Agent', pypm_user_agent)

        u = urllib2.urlopen(req)
        bs = 1024 * 8  # from urllib.py:urlretrieve
        headers = u.info()

        # detect total size of the file to be downloaded
        if 'Content-Length' in headers:
            total_size = int(headers['Content-Length'])
            assert total_size >= 0
        else:
            total_size = None

        total_bytes_transferred = 0
        chunk_size = 0

        self.hook_initialize(total_size)

        if info:
            LOG.info(info)

        with open(target_file, 'wb') as f:
            while True:
                if total_size:
                    assert total_bytes_transferred <= total_size, str(
                        (total_bytes_transferred, total_size))

                self.hook_transferring(chunk_size, total_bytes_transferred)

                data = u.read(bs)
                chunk_size = len(data)

                if data == '':
                    break

                f.write(data)
                total_bytes_transferred += chunk_size

        self.hook_completed()

        # save the new headers and other properties
        if save_properties:
            custom_dict = props and props.custom or {
            }  # reuse existing props.custom
            custom_dict['last_attempt_utc'] = BareDateTime.to_string(
                datetime.utcnow())
            urlprops.save(dict(headers), custom_dict)
Beispiel #6
0
    def get(self, target_file, info=None,
            use_cache=False, save_properties=False,
            auth=None):
        """Download the URL and save it to `target_file`

        Appropriate hook functions are automatically invoked

        - info: message to print (LOG.info) if transfer actually happens

        - use_cache: if True, download will not actually happen if `target_file`
          is already available *and* its ETag is not changed in the server. For
          this to work effectively, the previous ETag must be made available in
          the properties file that is created when `save_properties` flag is set
          to True in the *past* download call.

        - save_properties: if True, a file named .$target_file.urlprops
          containing request headers and other metadata is saved along side the
          target_file. (This is required for `use_cache` in future downloads)
          
        - auth: (username, password) -- optional http basic auth data
        """
        assert target_file

        urlprops = self.URLProperties(target_file)
        props = urlprops.load()
        if props:
            # write back the new value for last_attempt_utc *now* so we don't
            # have to deal with when an exception arises later.
            props.custom['last_attempt_utc'] = BareDateTime.to_string(datetime.utcnow())
            urlprops.save(props.headers, props.custom)

        req = None
        if use_cache and props:
            etag = props.headers.get('ETag', props.headers.get('etag', None))
            if etag:
                req = six.moves.Request(self.url, headers={'If-None-Match': etag})
            else:
                LOG.warn('no ETag in last headers: %s', props.headers)
                req = six.moves.Request(self.url)
        else:
            req = six.moves.Request(self.url)
            
        if auth:
            username, password = auth
            req.add_header('Authorization', _create_http_basicauth_header(
                username, password
            ))
            
        # Set User-Agent
        # XXX: (in 2.6) urllib2.py does not expose its default user-agent string
        # so we copy-paste that code here (from urllib2.OpenerDirector)
        urllib2_user_agent = "Python-urllib/%s" % six.moves.urllib_version
        pypm_user_agent = get_user_agent(urllib2_user_agent)
        assert pypm_user_agent
        req.add_header('User-Agent', pypm_user_agent)
        
        u = six.moves.urlopen(req)
        bs = 1024*8 # from urllib.py:urlretrieve
        headers = u.info()

        # detect total size of the file to be downloaded
        if 'Content-Length' in headers:
            total_size = int(headers['Content-Length'])
            assert total_size >= 0
        else:
            total_size = None
            
        total_bytes_transferred = 0
        chunk_size = 0

        self.hook_initialize(total_size)

        if info:
            LOG.info(info)

        with open(target_file, 'wb') as f:
            while True:
                if total_size:
                    assert total_bytes_transferred <= total_size, str((
                        total_bytes_transferred, total_size))
                    
                self.hook_transferring(chunk_size, total_bytes_transferred)
                
                data = u.read(bs)
                chunk_size = len(data)
                
                if len(data) == 0:
                    break
                
                f.write(data)
                total_bytes_transferred += chunk_size

        self.hook_completed()
        
        # save the new headers and other properties
        if save_properties:
            custom_dict = props and props.custom or {} # reuse existing props.custom
            custom_dict['last_attempt_utc'] = BareDateTime.to_string(datetime.utcnow())
            urlprops.save(dict(headers), custom_dict)