def _write_archive_retrieval_job(f, job, part_size, encryptor=None): if encryptor: destfile = tempfile.NamedTemporaryFile() else: destfile = f if job.archive_size > part_size: downloader = ConcurrentDownloader( job=job, part_size=part_size, num_threads=DEFAULT_NUM_THREADS ) downloader.download(destfile.name) else: destfile.write(job.get_output().read()) # Make sure that the file now exactly matches the downloaded archive, # even if the file existed before and was longer. try: destfile.truncate(job.archive_size) except IOError, e: # Allow ESPIPE, since the "file" couldn't have existed # before in this case. if e.errno != errno.ESPIPE: raise
def concurrent_download(self, dest_dir, job): """ Concurrently download a job :param dest_dir: Destination directory to write the file to :param job: boto.glacier.job.Job object (with a source_path attribute) to download :return: Path to downloaded file """ if not job.completed: raise IceItException("Job '%s' hasn't completed. Unable to download" % job.id) try: dest_dir = os.path.join(dest_dir, os.path.dirname(job.source_path)) dest_path = os.path.join(dest_dir, os.path.basename(job.source_path)) except AttributeError: # handle Inventory retrieval jobs: inventory = job.get_output() log.debug("Retrieved: %s" % inventory) dest_path = os.path.join(dest_dir, job.id) with open(dest_path, 'w') as f: f.writelines("ArchiveId,CreationDate,Size\n") for item in inventory['ArchiveList']: f.writelines("%s,%s,%s\n" % (item['ArchiveId'], item['CreationDate'], item['Size'])) return dest_path log.debug("Will download file to %s" % dest_path) if not os.path.exists(dest_dir): log.debug("Creating destination path %s" % dest_dir) os.makedirs(dest_dir) downloader = ConcurrentDownloader(job=job) log.info("Downloading file from Glacier for job %s to '%s'" % (job.id, dest_path)) max_retries = 5 attempt = 1 try: downloader.download(os.path.join(dest_dir, dest_path)) return dest_path except DownloadArchiveError as e: if attempt >= max_retries: log.error("Tried and failed to download file '%s' %d times." % (dest_path, attempt)) raise e log.info("Received error while trying to download '%s' from glacier. Will " "retry %d more times after sleeping a while..." % (dest_path, max_retries-attempt)) attempt += 1 # exponential back-off on failure sleep(2**attempt)
def retrieve_archive(archiveid, vault, jobid=None): glacier_backend = GlacierBackend(vault) job = glacier_backend.retrieve_archive(archiveid, jobid) if jobid is not None: sdb = SimpleDB() r = sdb.list_archives(vault) try: filename = [a['ArchiveDescription'] for a in r if a['ArchiveId'] == archiveid][0] except: log.warning("Archive not found in SimpleDB inventory") filename = archiveid[:8]+'.out' log.info("Output filename is {}".format(filename)) cd = ConcurrentDownloader(job, part_size=4194304, num_threads=8) cd.download(filename)
def download_archive(archiveID, vault, outputfile, layer2): vault_object = layer2.get_vault(vault) job = vault_object.retrieve_archive(archiveID, description="Retreiving archive") job_id = job.id print(job_id) while not job.completed: job = vault_object.get_job(job_id) time.sleep(job_check_delay) else: downloader = ConcurrentDownloader(job) downloader.download(outputfile)
def download(vault, job_id, filename='archive.tar.gz.gpg'): """Actually download the archive from Glacier.""" download_job = vault.get_job(job_id) return ConcurrentDownloader(download_job, CHUNK_SIZE * MB).download(filename)