Exemple #1
0
    def file_hash(self):
        """ Hash the expected file.

        :return: The hash object.
        """
        if self.path:
            path = Path(self.path).resolve()
            if path.exists() and not path.is_dir():
                return file_hash(path, self.BLOCK_SIZE)

        return sha1()
Exemple #2
0
    def file_hash(self):
        """ Return the hash of the file.

        :return: The hash object (not the digest or the hex digest!) of the file.
        """
        if self.path:
            path = Path(self.path).resolve()
            if path.exists() and not path.is_dir():
                return file_hash(path, self.BLOCK_SIZE)

        return sha1()
Exemple #3
0
    def _update_file_cache(source_file: CachedFile, target_file: Path):
        """ Deprecated.

        :param source_file:
        :param target_file:
        :return:
        """
        session = get_session()
        source_file.hash = file_hash(target_file).hexdigest()
        source_file.last_download = datetime.now()
        source_file.size = target_file.stat().st_size
        session.add(source_file)
        session.commit()
Exemple #4
0
    def get_file(self,
                 cached_file_id: int,
                 source_url: str,
                 target_file: Path,
                 refresh_interval: timedelta,
                 url_params=None,
                 show_progress=False,
                 force_download=False) -> Optional[dict]:
        """ Download a file either using the FTP downloader or the generic downloader.

        :param cached_file_id: the id of the cached file.
        :param source_url: the url from which to get the file.
        :param target_file: the path to which the file shoudl be downloaded.
        :param refresh_interval: the maximum age of the file.
        :param url_params: optional query parameters.
        :param show_progress: whether to show a tqdm progress bar.
        :param force_download: whether to force download regardless of file presence.
        :return: A dict that contains the updated data for the cached file.
        """
        try:
            parsed_url = up.urlparse(source_url)
            if parsed_url.scheme == 'ftp':
                result = self._fetch_ftp_file(source_url,
                                              target_file,
                                              refresh_interval,
                                              show_progress=show_progress,
                                              force_download=force_download)
            else:
                result = self._fetch_generic_file(
                    source_url,
                    target_file,
                    refresh_interval,
                    url_params,
                    headers=self.headers,
                    auth=self.auth,
                    show_progress=show_progress,
                    force_download=force_download)

            if result:
                return {
                    'id': cached_file_id,
                    'hash': file_hash(target_file).hexdigest(),
                    'last_download': datetime.now(),
                    'size': target_file.stat().st_size
                }
            else:
                return None

        except Exception as ex:
            print(f'Could not download {source_url}: {ex}')
            return None
Exemple #5
0
def test_update_cache_file(session, tmp_path):

    tf = NamedTemporaryFile(dir=tmp_path, delete=False)
    tf.write(b'hello world')
    tf.close()

    api: models.API = APIFactory(name='my nice api')
    extractor = DefaultExtractor(api)

    cached_file: models.CachedFile = CachedFileFactory(path=tf.name)
    extractor._update_file_cache(cached_file, Path(tf.name))

    cached_file = session.query(models.CachedFile).get(cached_file.id)

    assert cached_file.path == tf.name
    assert cached_file.hash == file_hash(Path(tf.name)).hexdigest()
    assert cached_file.last_download is not None
    assert cached_file.size == 11