def concurrent_download(self, dest_dir, job): """ Concurrently download a job :param dest_dir: Destination directory to write the file to :param job: boto.glacier.job.Job object (with a source_path attribute) to download :return: Path to downloaded file """ if not job.completed: raise IceItException("Job '%s' hasn't completed. Unable to download" % job.id) try: dest_dir = os.path.join(dest_dir, os.path.dirname(job.source_path)) dest_path = os.path.join(dest_dir, os.path.basename(job.source_path)) except AttributeError: # handle Inventory retrieval jobs: inventory = job.get_output() log.debug("Retrieved: %s" % inventory) dest_path = os.path.join(dest_dir, job.id) with open(dest_path, 'w') as f: f.writelines("ArchiveId,CreationDate,Size\n") for item in inventory['ArchiveList']: f.writelines("%s,%s,%s\n" % (item['ArchiveId'], item['CreationDate'], item['Size'])) return dest_path log.debug("Will download file to %s" % dest_path) if not os.path.exists(dest_dir): log.debug("Creating destination path %s" % dest_dir) os.makedirs(dest_dir) downloader = ConcurrentDownloader(job=job) log.info("Downloading file from Glacier for job %s to '%s'" % (job.id, dest_path)) max_retries = 5 attempt = 1 try: downloader.download(os.path.join(dest_dir, dest_path)) return dest_path except DownloadArchiveError as e: if attempt >= max_retries: log.error("Tried and failed to download file '%s' %d times." % (dest_path, attempt)) raise e log.info("Received error while trying to download '%s' from glacier. Will " "retry %d more times after sleeping a while..." % (dest_path, max_retries-attempt)) attempt += 1 # exponential back-off on failure sleep(2**attempt)
def download(vault, job_id, filename='archive.tar.gz.gpg'): """Actually download the archive from Glacier.""" download_job = vault.get_job(job_id) return ConcurrentDownloader(download_job, CHUNK_SIZE * MB).download(filename)