def upload(source_dir, logger=None): if logger is None: logger = logging.getLogger("arvados") source_dir = os.path.abspath(source_dir) done = False if 'TASK_WORK' in os.environ: resume_cache = put.ResumeCache(os.path.join(arvados.current_task().tmpdir, "upload-output-checkpoint")) else: resume_cache = put.ResumeCache(put.ResumeCache.make_path(Args(source_dir))) reporter = put.progress_writer(machine_progress) bytes_expected = put.expected_bytes_for([source_dir]) backoff = 1 outuuid = None while not done: try: out = put.ArvPutCollectionWriter.from_cache(resume_cache, reporter, bytes_expected) out.do_queued_work() out.write_directory_tree(source_dir, max_manifest_depth=0) outuuid = out.finish() done = True except KeyboardInterrupt as e: logger.critical("caught interrupt signal 2") raise e except Exception as e: logger.exception("caught exception:") backoff *= 2 if backoff > 256: logger.critical("Too many upload failures, giving up") raise e else: logger.warning("Sleeping for %s seconds before trying again" % backoff) time.sleep(backoff) return outuuid
def test_cache_persistent(self): thing = ['test', 'list'] path = os.path.join(self.make_tmpdir(), 'cache') cache = arv_put.ResumeCache(path) cache.save(thing) cache.close() self.last_cache = arv_put.ResumeCache(path) self.assertEqual(thing, self.last_cache.load())
def test_resume_cache_with_current_stream_locators(self, keep_client_head): keep_client_head.side_effect = [True] thing = {} thing['_current_stream_locators'] = [ '098f6bcd4621d373cade4e832627b4f6+4', '1f253c60a2306e0ee12fb6ce0c587904+6' ] with tempfile.NamedTemporaryFile() as cachefile: self.last_cache = arv_put.ResumeCache(cachefile.name) self.last_cache.save(thing) self.last_cache.close() resume_cache = arv_put.ResumeCache(self.last_cache.filename) self.assertNotEqual(None, resume_cache)
def test_destroy_cache(self): cachefile = tempfile.NamedTemporaryFile(delete=False) try: cache = arv_put.ResumeCache(cachefile.name) cache.save('test') cache.destroy() try: arv_put.ResumeCache(cachefile.name) except arv_put.ResumeCacheConflict: self.fail("could not load cache after destroying it") self.assertRaises(ValueError, cache.load) finally: if os.path.exists(cachefile.name): os.unlink(cachefile.name)
def test_cache_stays_locked(self): with tempfile.NamedTemporaryFile() as cachefile: self.last_cache = arv_put.ResumeCache(cachefile.name) path = cachefile.name self.last_cache.save('test') self.assertRaises(arv_put.ResumeCacheConflict, arv_put.ResumeCache, path)
def test_restart_cache(self): path = os.path.join(self.make_tmpdir(), 'cache') cache = arv_put.ResumeCache(path) cache.save('test') cache.restart() self.assertRaises(ValueError, cache.load) self.assertRaises(arv_put.ResumeCacheConflict, arv_put.ResumeCache, path)
def test_multiple_cache_writes(self): thing = ['short', 'list'] with tempfile.NamedTemporaryFile() as cachefile: self.last_cache = arv_put.ResumeCache(cachefile.name) # Start writing an object longer than the one we test, to make # sure the cache file gets truncated. self.last_cache.save(['long', 'long', 'list']) self.last_cache.save(thing) self.assertEqual(thing, self.last_cache.load())
def test_resume_cache_with_finished_streams_error_on_head( self, keep_client_head): keep_client_head.side_effect = Exception('Locator not found') thing = {} thing['_finished_streams'] = [[ '.', [ '098f6bcd4621d373cade4e832627b4f6+4', '1f253c60a2306e0ee12fb6ce0c587904+6' ] ]] with tempfile.NamedTemporaryFile() as cachefile: self.last_cache = arv_put.ResumeCache(cachefile.name) self.last_cache.save(thing) self.last_cache.close() resume_cache = arv_put.ResumeCache(self.last_cache.filename) self.assertNotEqual(None, resume_cache) self.assertRaises(None, resume_cache.check_cache())
def setUp(self): super(ArvadosPutCollectionWriterTest, self).setUp() run_test_server.authorize_with('active') with tempfile.NamedTemporaryFile(delete=False) as cachefile: self.cache = arv_put.ResumeCache(cachefile.name) self.cache_filename = cachefile.name
def test_cache_is_locked(self): with tempfile.NamedTemporaryFile() as cachefile: cache = arv_put.ResumeCache(cachefile.name) self.assertRaises(arv_put.ResumeCacheConflict, arv_put.ResumeCache, cachefile.name)
def test_empty_cache(self): with tempfile.NamedTemporaryFile() as cachefile: cache = arv_put.ResumeCache(cachefile.name) self.assertRaises(ValueError, cache.load)
def test_basic_cache_storage(self): thing = ['test', 'list'] with tempfile.NamedTemporaryFile() as cachefile: self.last_cache = arv_put.ResumeCache(cachefile.name) self.last_cache.save(thing) self.assertEqual(thing, self.last_cache.load())