def test_HitsCacheSecondTime(self): # Test that the computation is not performed on a second instance. with working_directory.TemporaryWorkingDirectory() as work_dir: self.GenerateTestData('HitsCacheSecondTime', work_dir) self._tally = 0 def check_call(cmd, **kwargs): self._tally += 1 subprocess.check_call(cmd, **kwargs) self._url = None def stash_url(urls): self._url = urls o = once.Once(storage=fake_storage.FakeStorage(), check_call=check_call, print_url=stash_url) o.Run('test', self._input_dirs, self._output_dirs[0], [ command.Copy('%(input0)s/in0', '%(output)s/out', cwd=work_dir) ]) initial_url = self._url self._url = None o.Run('test', self._input_dirs, self._output_dirs[1], [ command.Copy('%(input0)s/in0', '%(output)s/out', cwd=work_dir) ]) self.assertEquals(file_tools.ReadFile(self._input_files[0]), file_tools.ReadFile(self._output_files[0])) self.assertEquals(file_tools.ReadFile(self._input_files[0]), file_tools.ReadFile(self._output_files[1])) self.assertEquals(1, self._tally) self.assertEquals(initial_url, self._url)
def test_HitsCacheSecondTime(self): # Test that the computation is not performed on a second instance. with working_directory.TemporaryWorkingDirectory() as work_dir: self.GenerateTestData('HitsCacheSecondTime', work_dir) self._tally = 0 def Copy(subst, src, dst): self._tally += 1 shutil.copyfile(subst.SubstituteAbsPaths(src), subst.SubstituteAbsPaths(dst)) self._url = None def stash_url(urls): self._url = urls o = once.Once(storage=fake_storage.FakeStorage(), print_url=stash_url, system_summary='test') o.Run('test', self._input_dirs, self._output_dirs[0], [command.Runnable(Copy, '%(input0)s/in0', '%(output)s/out')]) initial_url = self._url self._url = None o.Run('test', self._input_dirs, self._output_dirs[1], [command.Runnable(Copy, '%(input0)s/in0', '%(output)s/out')]) self.assertEquals(file_tools.ReadFile(self._input_files[0]), file_tools.ReadFile(self._output_files[0])) self.assertEquals(file_tools.ReadFile(self._input_files[0]), file_tools.ReadFile(self._output_files[1])) self.assertEquals(1, self._tally) self.assertEquals(initial_url, self._url)
def test_UseCachedResultsFalse(self): # Check that the use_cached_results=False does indeed cause computations # to be redone, even when present in the cache. with working_directory.TemporaryWorkingDirectory() as work_dir: self.GenerateTestData('UseCachedResultsFalse', work_dir) self._tally = 0 def check_call(cmd, **kwargs): subprocess.check_call(cmd, **kwargs) self._tally += 1 o = once.Once(storage=fake_storage.FakeStorage(), use_cached_results=False, check_call=check_call) o.Run('test', self._input_dirs, self._output_dirs[0], [ command.Copy('%(input0)s/in0', '%(output)s/out', cwd=work_dir) ]) o.Run('test', self._input_dirs, self._output_dirs[1], [ command.Copy('%(input0)s/in0', '%(output)s/out', cwd=work_dir) ]) self.assertEquals(2, self._tally) self.assertEquals(file_tools.ReadFile(self._input_files[0]), file_tools.ReadFile(self._output_files[0])) self.assertEquals(file_tools.ReadFile(self._input_files[0]), file_tools.ReadFile(self._output_files[1]))
def test_CacheResultsFalse(self): # Check that setting cache_results=False prevents results from being written # to the cache. with working_directory.TemporaryWorkingDirectory() as work_dir: self.GenerateTestData('CacheResultsFalse', work_dir) storage = fake_storage.FakeStorage() o = once.Once(storage=storage, cache_results=False) o.Run('test', self._input_dirs, self._output_dirs[0], [ command.Copy('%(input0)s/in0', '%(output)s/out', cwd=work_dir) ]) self.assertEquals(0, storage.ItemCount()) self.assertEquals(file_tools.ReadFile(self._input_files[0]), file_tools.ReadFile(self._output_files[0]))
def CanBeReadBothWays(self, storage, key, out_file, expected): # Check that reading key with both GetData and GetFile yields expected. # out_file is used for GetFile output. self.assertEquals(expected, storage.GetData(key)) url = storage.GetFile(key, out_file) self.assertNotEquals(None, url) self.assertEquals(expected, file_tools.ReadFile(out_file))
def PutFile(self, path, key): """Write a file to storage. Args: path: Path of the file to write. key: Key to store file under. Returns: URL written to. """ return self.PutData(file_tools.ReadFile(path), key)
def test_FirstTime(self): # Test that the computation is always performed if the cache is empty. with working_directory.TemporaryWorkingDirectory() as work_dir: self.GenerateTestData('FirstTime', work_dir) o = once.Once(storage=fake_storage.FakeStorage(), system_summary='test') o.Run('test', self._input_dirs, self._output_dirs[0], [command.Copy('%(input0)s/in0', '%(output)s/out')]) self.assertEquals('FirstTimedata0', file_tools.ReadFile(self._output_files[0]))
def GetData(self, key): """Read data from global storage. Args: key: Key to store file under. Returns: Data from storage, or None for failure. """ ValidateKey(key) cache_file = os.path.join(self._cache_path, key) if os.path.exists(cache_file): return file_tools.ReadFile(cache_file) else: return self._storage.GetData(key)
def test_UnpackCommands(self): # Test that unpack commnds get run first and hashed_inputs get # used when present. with working_directory.TemporaryWorkingDirectory() as work_dir: self.GenerateTestData('UnpackCommands', work_dir) self._tally = 0 def check_call(cmd, **kwargs): self._tally += 1 subprocess.check_call(cmd, **kwargs) o = once.Once(storage=fake_storage.FakeStorage(), check_call=check_call) alt_inputs = {'input0': os.path.join(work_dir, 'alt_input')} unpack_commands = [ command.Copy('%(input0)s/in0', alt_inputs['input0']) ] commands = [ command.Copy('%(input0)s', '%(output)s/out', cwd=work_dir) ] o.Run('test', self._input_dirs, self._output_dirs[0], commands=commands, unpack_commands=unpack_commands, hashed_inputs=alt_inputs) o.Run('test', self._input_dirs, self._output_dirs[1], commands=commands, unpack_commands=unpack_commands, hashed_inputs=alt_inputs) self.assertEquals(file_tools.ReadFile(self._input_files[0]), file_tools.ReadFile(self._output_files[0])) self.assertEquals(file_tools.ReadFile(self._input_files[0]), file_tools.ReadFile(self._output_files[1])) self.assertEquals(3, self._tally)
def test_Command(self): # Test a plain command. with working_directory.TemporaryWorkingDirectory() as work_dir: self.GenerateTestData('Command', work_dir) o = once.Once(storage=fake_storage.FakeStorage(), system_summary='test') o.Run('test', self._input_dirs, self._output_dirs[0], [ command.Command([ sys.executable, '-c', 'import sys; open(sys.argv[1], "wb").write("hello")', '%(output)s/out' ]) ]) self.assertEquals('hello', file_tools.ReadFile(self._output_files[0]))
def test_UseCachedResultsFalse(self): # Check that the use_cached_results=False does indeed cause computations # to be redone, even when present in the cache. with working_directory.TemporaryWorkingDirectory() as work_dir: self.GenerateTestData('UseCachedResultsFalse', work_dir) self._tally = 0 def Copy(subst, src, dst): self._tally += 1 shutil.copyfile(subst.SubstituteAbsPaths(src), subst.SubstituteAbsPaths(dst)) o = once.Once(storage=fake_storage.FakeStorage(), use_cached_results=False, system_summary='test') o.Run('test', self._input_dirs, self._output_dirs[0], [command.Runnable(Copy, '%(input0)s/in0', '%(output)s/out')]) o.Run('test', self._input_dirs, self._output_dirs[1], [command.Runnable(Copy, '%(input0)s/in0', '%(output)s/out')]) self.assertEquals(2, self._tally) self.assertEquals(file_tools.ReadFile(self._input_files[0]), file_tools.ReadFile(self._output_files[0])) self.assertEquals(file_tools.ReadFile(self._input_files[0]), file_tools.ReadFile(self._output_files[1]))
def GetData(self, key): """Read data from global storage. Args: key: Key to store file under. Returns: Data from storage, or None for failure. """ work_dir = tempfile.mkdtemp(prefix='gdstore', suffix='.tmp') try: path = os.path.join(work_dir, 'data') if self.GetFile(key, path) is not None: return file_tools.ReadFile(path) return None finally: shutil.rmtree(work_dir)
def GetFile(self, key, path): """Read a file from storage. Args: key: Key to store file under. path: Destination filename. Returns: URL used on success or None for failure. """ ValidateKey(key) cache_file = os.path.join(self._cache_path, key) if os.path.exists(cache_file): data = file_tools.ReadFile(cache_file) file_tools.WriteFile(data, path) return 'LOCAL_CACHE_NO_URL_AVAILABLE' else: return self._storage.GetFile(key, path)
def call(cmd): if step[0] == 0: self.assertEqual(['mygsutil', 'cp', '-a', 'public-read'], cmd[0:4]) self.assertEqual('foo', file_tools.ReadFile(cmd[4][len('file://'):])) self.assertEqual('gs://mybucket/bar', cmd[5].split('.')[0]) elif step[0] == 1: self.assertEqual(['mygsutil', 'cp', '-a', 'public-read'], cmd[0:4]) self.assertEqual('gs://mybucket/bar', cmd[4].split('.')[0]) self.assertEqual('gs://mybucket/bar', cmd[5]) elif step[0] == 2: self.assertEqual(['mygsutil', 'rm'], cmd[0:2]) self.assertEqual('gs://mybucket/bar', cmd[2].split('.')[0]) else: self.assertTrue(False) step[0] += 1 return 0
def test_RecomputeHashMatches(self): # Test that things don't get stored to the output cache if they exist # already. with working_directory.TemporaryWorkingDirectory() as work_dir: # Setup test data in input0, input1 using memory storage. self.GenerateTestData('RecomputeHashMatches', work_dir) fs = fake_storage.FakeStorage() ds = directory_storage.DirectoryStorageAdapter(storage=fs) o = once.Once(storage=fs) # Run the computation (compute the length of a file) from input0 to # output0. o.Run('test', self._input_dirs, self._output_dirs[0], [ self.FileLength( '%(input0)s/in0', '%(output)s/out', cwd=work_dir) ]) # Check that 2 writes have occurred. One to write a mapping from in->out, # and one for the output data. self.assertEquals(2, fs.WriteCount()) # Run the computation again from input1 to output1. # (These should have the same length.) o.Run('test', self._input_dirs, self._output_dirs[1], [ self.FileLength( '%(input1)s/in1', '%(output)s/out', cwd=work_dir) ]) # Write count goes up by one as an in->out hash is added, # but no new output is stored (as it is the same). self.assertEquals(3, fs.WriteCount()) # Check that the test is still valid: # - in0 and in1 have equal length. # - out0 and out1 have that length in them. # - out0 and out1 agree. self.assertEquals( str(len(file_tools.ReadFile(self._input_files[0]))), file_tools.ReadFile(self._output_files[0])) self.assertEquals( str(len(file_tools.ReadFile(self._input_files[1]))), file_tools.ReadFile(self._output_files[1])) self.assertEquals(file_tools.ReadFile(self._output_files[0]), file_tools.ReadFile(self._output_files[1]))
def PutFile(self, path, key): self.PutData(file_tools.ReadFile(path), key) # Use form: fake://<key> for make-believe URLs. return 'fake://' + key
def call(cmd): self.assertEqual(['mygsutil', 'cp', '-a', 'public-read'], cmd[0:4]) self.assertEqual('foo', file_tools.ReadFile(cmd[4][len('file://'):])) self.assertEqual('gs://mybucket/bar', cmd[5]) return 0