def file_hash(self): """ Hash the expected file. :return: The hash object. """ if self.path: path = Path(self.path).resolve() if path.exists() and not path.is_dir(): return file_hash(path, self.BLOCK_SIZE) return sha1()
def file_hash(self): """ Return the hash of the file. :return: The hash object (not the digest or the hex digest!) of the file. """ if self.path: path = Path(self.path).resolve() if path.exists() and not path.is_dir(): return file_hash(path, self.BLOCK_SIZE) return sha1()
def _update_file_cache(source_file: CachedFile, target_file: Path): """ Deprecated. :param source_file: :param target_file: :return: """ session = get_session() source_file.hash = file_hash(target_file).hexdigest() source_file.last_download = datetime.now() source_file.size = target_file.stat().st_size session.add(source_file) session.commit()
def get_file(self, cached_file_id: int, source_url: str, target_file: Path, refresh_interval: timedelta, url_params=None, show_progress=False, force_download=False) -> Optional[dict]: """ Download a file either using the FTP downloader or the generic downloader. :param cached_file_id: the id of the cached file. :param source_url: the url from which to get the file. :param target_file: the path to which the file shoudl be downloaded. :param refresh_interval: the maximum age of the file. :param url_params: optional query parameters. :param show_progress: whether to show a tqdm progress bar. :param force_download: whether to force download regardless of file presence. :return: A dict that contains the updated data for the cached file. """ try: parsed_url = up.urlparse(source_url) if parsed_url.scheme == 'ftp': result = self._fetch_ftp_file(source_url, target_file, refresh_interval, show_progress=show_progress, force_download=force_download) else: result = self._fetch_generic_file( source_url, target_file, refresh_interval, url_params, headers=self.headers, auth=self.auth, show_progress=show_progress, force_download=force_download) if result: return { 'id': cached_file_id, 'hash': file_hash(target_file).hexdigest(), 'last_download': datetime.now(), 'size': target_file.stat().st_size } else: return None except Exception as ex: print(f'Could not download {source_url}: {ex}') return None
def test_update_cache_file(session, tmp_path): tf = NamedTemporaryFile(dir=tmp_path, delete=False) tf.write(b'hello world') tf.close() api: models.API = APIFactory(name='my nice api') extractor = DefaultExtractor(api) cached_file: models.CachedFile = CachedFileFactory(path=tf.name) extractor._update_file_cache(cached_file, Path(tf.name)) cached_file = session.query(models.CachedFile).get(cached_file.id) assert cached_file.path == tf.name assert cached_file.hash == file_hash(Path(tf.name)).hexdigest() assert cached_file.last_download is not None assert cached_file.size == 11