def isolate_and_archive(trees, isolate_server, namespace): """Isolates and uploads a bunch of isolated trees. Args: trees: list of pairs (Options, working directory) that describe what tree to isolate. Options are processed by 'process_isolate_options'. isolate_server: URL of Isolate Server to upload to. namespace: namespace to upload to. Returns a dict {target name -> isolate hash or None}, where target name is a name of *.isolated file without an extension (e.g. 'base_unittests'). Have multiple failure modes: * If the upload fails due to server or network error returns None. * If some *.isolate file is incorrect (but rest of them are fine and were successfully uploaded), returns a dict where the value of the entry corresponding to invalid *.isolate file is None. """ if not trees: return {} # Helper generator to avoid materializing the full (huge) list of files until # the very end (in upload_tree). def emit_files(root_dir, files): for path, meta in files.iteritems(): yield (os.path.join(root_dir, path), meta) # Process all *.isolate files, it involves parsing, file system traversal and # hashing. The result is a list of generators that produce files to upload # and the mapping {target name -> hash of *.isolated file} to return from # this function. files_generators = [] isolated_hashes = {} with tools.Profiler("Isolate"): for opts, cwd in trees: target_name = os.path.splitext(os.path.basename(opts.isolated))[0] try: complete_state, files, isolated_hash = prepare_for_archival(opts, cwd) files_generators.append(emit_files(complete_state.root_dir, files)) isolated_hashes[target_name] = isolated_hash[0] print("%s %s" % (isolated_hash[0], target_name)) except Exception: logging.exception("Exception when isolating %s", target_name) isolated_hashes[target_name] = None # All bad? Nothing to upload. if all(v is None for v in isolated_hashes.itervalues()): return isolated_hashes # Now upload all necessary files at once. with tools.Profiler("Upload"): try: isolateserver.upload_tree( base_url=isolate_server, infiles=itertools.chain(*files_generators), namespace=namespace ) except Exception: logging.exception("Exception while uploading files") return None return isolated_hashes
class IsolateServerStorageSmokeTest(unittest.TestCase): """Tests public API of Storage class using file system as a store.""" def setUp(self): super(IsolateServerStorageSmokeTest, self).setUp() self.tempdir = tempfile.mkdtemp(prefix=u'isolateserver') self.server = isolateserver_mock.MockIsolateServer() def tearDown(self): try: self.server.close_start() file_path.rmtree(self.tempdir) self.server.close_end() finally: super(IsolateServerStorageSmokeTest, self).tearDown() def run_synchronous_push_test(self, namespace): storage = isolateserver.get_storage(self.server.url, namespace) # Items to upload. items = [isolateserver.BufferItem('item %d' % i) for i in xrange(10)] # Storage is empty, all items are missing. missing = dict(storage.get_missing_items(items)) self.assertEqual(set(items), set(missing)) # Push, one by one. for item, push_state in missing.iteritems(): storage.push(item, push_state) # All items are there now. self.assertFalse(dict(storage.get_missing_items(items))) def test_synchronous_push(self): self.run_synchronous_push_test('default') def test_synchronous_push_gzip(self): self.run_synchronous_push_test('default-gzip') def run_upload_items_test(self, namespace): storage = isolateserver.get_storage(self.server.url, namespace) # Items to upload. items = [isolateserver.BufferItem('item %d' % i) for i in xrange(10)] # Do it. uploaded = storage.upload_items(items) self.assertEqual(set(items), set(uploaded)) # All items are there now. self.assertFalse(dict(storage.get_missing_items(items))) # Now ensure upload_items skips existing items. more = [ isolateserver.BufferItem('more item %d' % i) for i in xrange(10) ] # Uploaded only |more|. uploaded = storage.upload_items(items + more) self.assertEqual(set(more), set(uploaded)) def test_upload_items(self): self.run_upload_items_test('default') def test_upload_items_gzip(self): self.run_upload_items_test('default-gzip') def run_push_and_fetch_test(self, namespace): storage = isolateserver.get_storage(self.server.url, namespace) # Upload items. items = [isolateserver.BufferItem('item %d' % i) for i in xrange(10)] uploaded = storage.upload_items(items) self.assertEqual(set(items), set(uploaded)) # Fetch them all back into local memory cache. cache = isolateserver.MemoryCache() queue = isolateserver.FetchQueue(storage, cache) # Start fetching. pending = set() for item in items: pending.add(item.digest) queue.add(item.digest) # Wait for fetch to complete. while pending: fetched = queue.wait(pending) pending.discard(fetched) # Ensure fetched same data as was pushed. self.assertEqual([i.buffer for i in items], [cache.read(i.digest) for i in items]) def test_push_and_fetch(self): self.run_push_and_fetch_test('default') def test_push_and_fetch_gzip(self): self.run_push_and_fetch_test('default-gzip') if sys.maxsize == (2**31) - 1: def test_archive_multiple_huge_file(self): self.server.discard_content() # Create multiple files over 2.5gb. This test exists to stress the virtual # address space on 32 bits systems. Make real files since it wouldn't fit # memory by definition. # Sadly, this makes this test very slow so it's only run on 32 bits # platform, since it's known to work on 64 bits platforms anyway. # # It's a fairly slow test, well over 15 seconds. files = {} size = 512 * 1024 * 1024 for i in xrange(5): name = '512mb_%d.%s' % ( i, isolateserver.ALREADY_COMPRESSED_TYPES[0]) p = os.path.join(self.tempdir, name) with open(p, 'wb') as f: # Write 512mb. h = hashlib.sha1() data = os.urandom(1024) for _ in xrange(size / 1024): f.write(data) h.update(data) os.chmod(p, 0600) files[p] = { 'h': h.hexdigest(), 'm': 0600, 's': size, } if sys.platform == 'win32': files[p].pop('m') # upload_tree() is a thin wrapper around Storage. isolateserver.upload_tree(self.server.url, files.items(), 'testing') expected = { 'testing': {f['h']: '<skipped>' for f in files.itervalues()} } self.assertEqual(expected, self.server.contents)
def test_upload_tree(self): files = { '/a': { 's': 100, 'h': 'hash_a', }, '/some/dir/b': { 's': 200, 'h': 'hash_b', }, '/another/dir/c': { 's': 300, 'h': 'hash_c', }, '/a_copy': { 's': 100, 'h': 'hash_a', }, } files_data = dict((k, 'x' * files[k]['s']) for k in files) all_hashes = set(f['h'] for f in files.itervalues()) missing_hashes = {'hash_a': 'push a', 'hash_b': 'push b'} # Files read by mocked_file_read. read_calls = [] def mocked_file_read(filepath, chunk_size=0, offset=0): self.assertIn(filepath, files_data) read_calls.append(filepath) return files_data[filepath] self.mock(isolateserver, 'file_read', mocked_file_read) storage_api = MockedStorageApi(missing_hashes) storage = isolateserver.Storage(storage_api) def mock_get_storage(base_url, namespace): self.assertEqual('base_url', base_url) self.assertEqual('some-namespace', namespace) return storage self.mock(isolateserver, 'get_storage', mock_get_storage) isolateserver.upload_tree('base_url', files.iteritems(), 'some-namespace') # Was reading only missing files. self.assertEqualIgnoringOrder( missing_hashes, [files[path]['h'] for path in read_calls]) # 'contains' checked for existence of all files. self.assertEqualIgnoringOrder( all_hashes, [i.digest for i in sum(storage_api.contains_calls, [])]) # Pushed only missing files. self.assertEqualIgnoringOrder( missing_hashes, [call[0].digest for call in storage_api.push_calls]) # Pushing with correct data, size and push state. for pushed_item, push_state, pushed_content in storage_api.push_calls: filenames = [ name for name, metadata in files.iteritems() if metadata['h'] == pushed_item.digest ] # If there are multiple files that map to same hash, upload_tree chooses # a first one. filename = filenames[0] self.assertEqual(filename, pushed_item.path) self.assertEqual(files_data[filename], pushed_content) self.assertEqual(missing_hashes[pushed_item.digest], push_state)
def test_upload_tree(self): root = 'root' files = { 'a': { 's': 100, 'h': 'hash_a', }, 'b': { 's': 200, 'h': 'hash_b', }, 'c': { 's': 300, 'h': 'hash_c', }, 'a_copy': { 's': 100, 'h': 'hash_a', }, } files_data = dict((k, 'x' * files[k]['s']) for k in files) all_hashes = set(f['h'] for f in files.itervalues()) missing_hashes = {'hash_a': 'push a', 'hash_b': 'push b'} # Files read by mocked_file_read. read_calls = [] def mocked_file_read(filepath, chunk_size=0, offset=0): self.assertEqual(root, os.path.dirname(filepath)) filename = os.path.basename(filepath) self.assertIn(filename, files_data) read_calls.append(filename) return files_data[filename] self.mock(isolateserver, 'file_read', mocked_file_read) storage_api = MockedStorageApi(missing_hashes) storage = isolateserver.Storage(storage_api) def mock_get_storage(base_url, namespace): self.assertEqual('base_url', base_url) self.assertEqual('some-namespace', namespace) return storage self.mock(isolateserver, 'get_storage', mock_get_storage) isolateserver.upload_tree('base_url', root, files, 'some-namespace') # Was reading only missing files. self.assertEqualIgnoringOrder( missing_hashes, [files[path]['h'] for path in read_calls]) # 'contains' checked for existence of all files. self.assertEqualIgnoringOrder( all_hashes, [i.digest for i in sum(storage_api.contains_calls, [])]) # Pushed only missing files. self.assertEqualIgnoringOrder( missing_hashes, [call[0].digest for call in storage_api.push_calls]) # Pushing with correct data, size and push state. for pushed_item, push_state, pushed_content in storage_api.push_calls: filenames = [ name for name, metadata in files.iteritems() if metadata['h'] == pushed_item.digest ] # If there are multiple files that map to same hash, upload_tree chooses # a first one. filename = filenames[0] self.assertEqual(os.path.join(root, filename), pushed_item.path) self.assertEqual(files_data[filename], pushed_content) self.assertEqual(missing_hashes[pushed_item.digest], push_state)
def isolate_and_archive(trees, isolate_server, namespace): """Isolates and uploads a bunch of isolated trees. Args: trees: list of pairs (Options, working directory) that describe what tree to isolate. Options are processed by 'process_isolate_options'. isolate_server: URL of Isolate Server to upload to. namespace: namespace to upload to. Returns a dict {target name -> isolate hash or None}, where target name is a name of *.isolated file without an extension (e.g. 'base_unittests'). Have multiple failure modes: * If the upload fails due to server or network error returns None. * If some *.isolate file is incorrect (but rest of them are fine and were successfully uploaded), returns a dict where the value of the entry corresponding to invalid *.isolate file is None. """ if not trees: return {} # Helper generator to avoid materializing the full (huge) list of files until # the very end (in upload_tree). def emit_files(root_dir, files): for path, meta in files.iteritems(): yield (os.path.join(root_dir, path), meta) # Process all *.isolate files, it involves parsing, file system traversal and # hashing. The result is a list of generators that produce files to upload # and the mapping {target name -> hash of *.isolated file} to return from # this function. files_generators = [] isolated_hashes = {} with tools.Profiler('Isolate'): for opts, cwd in trees: target_name = os.path.splitext(os.path.basename(opts.isolated))[0] try: complete_state, files, isolated_hash = prepare_for_archival( opts, cwd) files_generators.append( emit_files(complete_state.root_dir, files)) isolated_hashes[target_name] = isolated_hash[0] print('%s %s' % (isolated_hash[0], target_name)) except Exception: logging.exception('Exception when isolating %s', target_name) isolated_hashes[target_name] = None # All bad? Nothing to upload. if all(v is None for v in isolated_hashes.itervalues()): return isolated_hashes # Now upload all necessary files at once. with tools.Profiler('Upload'): try: isolateserver.upload_tree( base_url=isolate_server, infiles=itertools.chain(*files_generators), namespace=namespace) except Exception: logging.exception('Exception while uploading files') return None return isolated_hashes