def test_corrupt_download(self): q = Queue.Queue() out_q = Queue.Queue() ret_codes = Queue.Queue() tmp_dir = tempfile.mkdtemp() sha1_hash = '7871c8e24da15bad8b0be2c36edc9dc77e37727f' output_filename = os.path.join(tmp_dir, 'lorem_ipsum.txt') q.put(('7871c8e24da15bad8b0be2c36edc9dc77e37727f', output_filename)) q.put((None, None)) def _write_bad_file(): with open(output_filename, 'w') as f: f.write('foobar') self.gsutil.add_expected(0, '', '') self.gsutil.add_expected(0, '', '', _write_bad_file) download_from_google_storage._downloader_worker_thread( 1, q, True, self.base_url, self.gsutil, out_q, ret_codes, True, False) self.assertTrue(q.empty()) msg = ('1> ERROR remote sha1 (%s) does not match expected sha1 (%s).' % ('8843d7f92416211de9ebb963ff4ce28125932878', sha1_hash)) self.assertEquals(out_q.get(), '1> Downloading %s...' % output_filename) self.assertEquals(out_q.get(), msg) self.assertEquals(ret_codes.get(), (20, msg)) self.assertTrue(out_q.empty()) self.assertTrue(ret_codes.empty())
def test_download_worker_skips_not_found_file(self): sha1_hash = '7871c8e24da15bad8b0be2c36edc9dc77e37727f' input_filename = '%s/%s' % (self.base_url, sha1_hash) output_filename = os.path.join(self.base_path, 'uploaded_lorem_ipsum.txt') self.queue.put((sha1_hash, output_filename)) self.queue.put((None, None)) stdout_queue = Queue.Queue() self.gsutil.add_expected(1, '', '') # Return error when 'ls' is called. download_from_google_storage._downloader_worker_thread( 0, self.queue, False, self.base_url, self.gsutil, stdout_queue, self.ret_codes, True, False) expected_output = [ '0> Failed to fetch file %s for %s, skipping. [Err: ]' % ( input_filename, output_filename), ] expected_calls = [ ('check_call', ('ls', input_filename)) ] expected_ret_codes = [ (1, 'Failed to fetch file %s for %s. [Err: ]' % ( input_filename, output_filename)) ] self.assertEqual(list(stdout_queue.queue), expected_output) self.assertEqual(self.gsutil.history, expected_calls) self.assertEqual(list(self.ret_codes.queue), expected_ret_codes)
def test_download_worker_single_file(self): sha1_hash = self.lorem_ipsum_sha1 input_filename = '%s/%s' % (self.base_url, sha1_hash) output_filename = os.path.join(self.base_path, 'uploaded_lorem_ipsum.txt') self.gsutil.add_expected(0, '', '') # ls self.gsutil.add_expected( 0, '', '', lambda: shutil.copyfile(self.lorem_ipsum, output_filename)) # cp self.queue.put((sha1_hash, output_filename)) self.queue.put((None, None)) stdout_queue = Queue.Queue() download_from_google_storage._downloader_worker_thread( 0, self.queue, False, self.base_url, self.gsutil, stdout_queue, self.ret_codes, True, False) expected_calls = [('check_call', ('ls', input_filename)), ('check_call', ('cp', input_filename, output_filename))] if sys.platform != 'win32': expected_calls.append(('check_call', ( 'stat', 'gs://sometesturl/7871c8e24da15bad8b0be2c36edc9dc77e37727f'))) expected_output = ['0> Downloading %s...' % output_filename] expected_ret_codes = [] self.assertEqual(list(stdout_queue.queue), expected_output) self.assertEqual(self.gsutil.history, expected_calls) self.assertEqual(list(self.ret_codes.queue), expected_ret_codes)
def test_download_worker_single_file(self): sha1_hash = self.lorem_ipsum_sha1 input_filename = '%s/%s' % (self.base_url, sha1_hash) output_filename = os.path.join(self.base_path, 'uploaded_lorem_ipsum.txt') self.gsutil.add_expected(0, '', '') # ls self.gsutil.add_expected(0, '', '', lambda: shutil.copyfile( self.lorem_ipsum, output_filename)) # cp self.queue.put((sha1_hash, output_filename)) self.queue.put((None, None)) stdout_queue = Queue.Queue() download_from_google_storage._downloader_worker_thread( 0, self.queue, False, self.base_url, self.gsutil, stdout_queue, self.ret_codes, True, False) expected_calls = [ ('check_call', ('ls', input_filename)), ('check_call', ('cp', input_filename, output_filename))] if sys.platform != 'win32': expected_calls.append( ('check_call', ('stat', 'gs://sometesturl/7871c8e24da15bad8b0be2c36edc9dc77e37727f'))) expected_output = [ '0> Downloading %s...' % output_filename] expected_ret_codes = [] self.assertEqual(list(stdout_queue.queue), expected_output) self.assertEqual(self.gsutil.history, expected_calls) self.assertEqual(list(self.ret_codes.queue), expected_ret_codes)
def test_download_worker_skips_not_found_file(self): sha1_hash = '7871c8e24da15bad8b0be2c36edc9dc77e37727f' input_filename = '%s/%s' % (self.base_url, sha1_hash) output_filename = os.path.join(self.base_path, 'uploaded_lorem_ipsum.txt') self.queue.put((sha1_hash, output_filename)) self.queue.put((None, None)) stdout_queue = Queue.Queue() self.gsutil.add_expected(1, '', '') # Return error when 'ls' is called. download_from_google_storage._downloader_worker_thread( 0, self.queue, False, self.base_url, self.gsutil, stdout_queue, self.ret_codes, True) expected_output = [ '0> Failed to fetch file %s for %s, skipping. [Err: ]' % ( input_filename, output_filename), ] expected_calls = [ ('check_call', ('ls', input_filename)) ] expected_ret_codes = [ (1, 'Failed to fetch file %s for %s. [Err: ]' % ( input_filename, output_filename)) ] self.assertEqual(list(stdout_queue.queue), expected_output) self.assertEqual(self.gsutil.history, expected_calls) self.assertEqual(list(self.ret_codes.queue), expected_ret_codes)
def test_download_worker_skips_file(self): sha1_hash = 'e6c4fbd4fe7607f3e6ebf68b2ea4ef694da7b4fe' output_filename = os.path.join(self.base_path, 'rootfolder_text.txt') self.queue.put((sha1_hash, output_filename)) self.queue.put((None, None)) stdout_queue = Queue.Queue() download_from_google_storage._downloader_worker_thread( 0, self.queue, False, self.base_url, self.gsutil, stdout_queue, self.ret_codes, True, False) # dfgs does not output anything in the no-op case. self.assertEqual(list(stdout_queue.queue), []) self.assertEqual(self.gsutil.history, [])
def test_download_worker_skips_file(self): sha1_hash = 'e6c4fbd4fe7607f3e6ebf68b2ea4ef694da7b4fe' output_filename = os.path.join(self.base_path, 'rootfolder_text.txt') self.queue.put((sha1_hash, output_filename)) self.queue.put((None, None)) stdout_queue = Queue.Queue() download_from_google_storage._downloader_worker_thread( 0, self.queue, False, self.base_url, self.gsutil, stdout_queue, self.ret_codes, True) expected_output = [ '0> File %s exists and SHA1 matches. Skipping.' % output_filename ] self.assertEqual(list(stdout_queue.queue), expected_output) self.assertEqual(self.gsutil.history, [])
def test_download_worker_single_file(self): sha1_hash = '7871c8e24da15bad8b0be2c36edc9dc77e37727f' input_filename = '%s/%s' % (self.base_url, sha1_hash) output_filename = os.path.join(self.base_path, 'uploaded_lorem_ipsum.txt') self.queue.put((sha1_hash, output_filename)) self.queue.put((None, None)) stdout_queue = Queue.Queue() download_from_google_storage._downloader_worker_thread( 0, self.queue, False, self.base_url, self.gsutil, stdout_queue, self.ret_codes) expected_calls = [('check_call', ('ls', input_filename)), ('check_call', ('cp', '-q', input_filename, output_filename))] expected_output = ['0> Downloading %s...' % output_filename] expected_ret_codes = [] self.assertEqual(list(stdout_queue.queue), expected_output) self.assertEqual(self.gsutil.history, expected_calls) self.assertEqual(list(self.ret_codes.queue), expected_ret_codes)
def test_download_worker_single_file(self): sha1_hash = '7871c8e24da15bad8b0be2c36edc9dc77e37727f' input_filename = '%s/%s' % (self.base_url, sha1_hash) output_filename = os.path.join(self.base_path, 'uploaded_lorem_ipsum.txt') self.queue.put((sha1_hash, output_filename)) self.queue.put((None, None)) stdout_queue = Queue.Queue() download_from_google_storage._downloader_worker_thread( 0, self.queue, False, self.base_url, self.gsutil, stdout_queue, self.ret_codes) expected_calls = [ ('check_call', ('ls', input_filename)), ('check_call', ('cp', '-q', input_filename, output_filename))] expected_output = [ '0> Downloading %s...' % output_filename] expected_ret_codes = [] self.assertEqual(list(stdout_queue.queue), expected_output) self.assertEqual(self.gsutil.history, expected_calls) self.assertEqual(list(self.ret_codes.queue), expected_ret_codes)
def test_download_extract_archive(self): # Generate a gzipped tarfile output_filename = os.path.join(self.base_path, 'subfolder.tar.gz') output_dirname = os.path.join(self.base_path, 'subfolder') extracted_filename = os.path.join(output_dirname, 'subfolder_text.txt') with tarfile.open(output_filename, 'w:gz') as tar: tar.add(output_dirname, arcname='subfolder') shutil.rmtree(output_dirname) sha1_hash = download_from_google_storage.get_sha1(output_filename) input_filename = '%s/%s' % (self.base_url, sha1_hash) self.queue.put((sha1_hash, output_filename)) self.queue.put((None, None)) stdout_queue = Queue.Queue() download_from_google_storage._downloader_worker_thread(0, self.queue, True, self.base_url, self.gsutil, stdout_queue, self.ret_codes, True, True, delete=False) expected_calls = [('check_call', ('ls', input_filename)), ('check_call', ('cp', input_filename, output_filename))] if sys.platform != 'win32': expected_calls.append( ('check_call', ('stat', 'gs://sometesturl/%s' % sha1_hash))) expected_output = ['0> Downloading %s...' % output_filename] expected_output.extend([ '0> Extracting 3 entries from %s to %s' % (output_filename, output_dirname) ]) expected_ret_codes = [] self.assertEqual(list(stdout_queue.queue), expected_output) self.assertEqual(self.gsutil.history, expected_calls) self.assertEqual(list(self.ret_codes.queue), expected_ret_codes) self.assertTrue(os.path.exists(output_dirname)) self.assertTrue(os.path.exists(extracted_filename))
def test_download_extract_archive(self): # Generate a gzipped tarfile output_filename = os.path.join(self.base_path, 'subfolder.tar.gz') output_dirname = os.path.join(self.base_path, 'subfolder') extracted_filename = os.path.join(output_dirname, 'subfolder_text.txt') with tarfile.open(output_filename, 'w:gz') as tar: tar.add(output_dirname, arcname='subfolder') shutil.rmtree(output_dirname) sha1_hash = download_from_google_storage.get_sha1(output_filename) input_filename = '%s/%s' % (self.base_url, sha1_hash) self.queue.put((sha1_hash, output_filename)) self.queue.put((None, None)) stdout_queue = Queue.Queue() download_from_google_storage._downloader_worker_thread( 0, self.queue, True, self.base_url, self.gsutil, stdout_queue, self.ret_codes, True, True, delete=False) expected_calls = [ ('check_call', ('ls', input_filename)), ('check_call', ('cp', input_filename, output_filename))] if sys.platform != 'win32': expected_calls.append( ('check_call', ('stat', 'gs://sometesturl/%s' % sha1_hash))) expected_output = [ '0> Downloading %s...' % output_filename] expected_output.extend([ '0> Extracting 3 entries from %s to %s' % (output_filename, output_dirname)]) expected_ret_codes = [] self.assertEqual(list(stdout_queue.queue), expected_output) self.assertEqual(self.gsutil.history, expected_calls) self.assertEqual(list(self.ret_codes.queue), expected_ret_codes) self.assertTrue(os.path.exists(output_dirname)) self.assertTrue(os.path.exists(extracted_filename))