def test_dir_hash_independent_of_file_order(self): file1 = tempfile.NamedTemporaryFile(delete=False, dir=self.temp_dir) file1.write(b"Testfile") file1.close() file2 = tempfile.NamedTemporaryFile(delete=False, dir=self.temp_dir) file2.write(b"Testfile") file2.close() dir_checksums = {} with patch("os.walk") as mockwalk: mockwalk.return_value = [ (self.temp_dir, (), ( file1.name, file2.name, )), ] dir_checksums["first"] = dir_checksum(self.temp_dir) with patch("os.walk") as mockwalk: mockwalk.return_value = [ (self.temp_dir, (), ( file2.name, file1.name, )), ] dir_checksums["second"] = dir_checksum(self.temp_dir) self.assertEqual(dir_checksums["first"], dir_checksums["second"])
def test_dir_hash_same_contents_diff_file_per_directory(self): _file = tempfile.NamedTemporaryFile(delete=False, dir=self.temp_dir) _file.write(b"Testfile") _file.close() checksum_before = dir_checksum(os.path.dirname(_file.name)) shutil.move( os.path.abspath(_file.name), os.path.join(os.path.dirname(_file.name), "different_name")) checksum_after = dir_checksum(os.path.dirname(_file.name)) self.assertNotEqual(checksum_before, checksum_after)
def test_dir_hash_independent_of_location(self): temp_dir1 = os.path.join(self.temp_dir, "temp-dir-1") os.mkdir(temp_dir1) with open(os.path.join(temp_dir1, "test-file"), "w+") as f: f.write("Testfile") checksum1 = dir_checksum(temp_dir1) temp_dir2 = shutil.move(temp_dir1, os.path.join(self.temp_dir, "temp-dir-2")) checksum2 = dir_checksum(temp_dir2) self.assertEqual(checksum1, checksum2)
def test_dir_cyclic_links(self): _file = tempfile.NamedTemporaryFile(delete=False, dir=self.temp_dir) _file.write(b"Testfile") _file.close() os.symlink(os.path.abspath(_file.name), os.path.join(os.path.dirname(_file.name), "symlink")) os.symlink(os.path.join(os.path.dirname(_file.name), "symlink"), os.path.join(os.path.dirname(_file.name), "symlink2")) os.unlink(os.path.abspath(_file.name)) os.symlink(os.path.join(os.path.dirname(_file.name), "symlink2"), os.path.abspath(_file.name)) with self.assertRaises(OSError) as ex: dir_checksum(os.path.dirname(_file.name)) self.assertIn("Too many levels of symbolic links", ex.message)
def calculate_checksum(path): path_obj = Path(path) if path_obj.is_file(): checksum = file_checksum(path) else: checksum = dir_checksum(path) return checksum
def build_single_layer_definition(self, layer_definition): """ Builds single layer definition with caching """ code_dir = str(pathlib.Path(self._base_dir, layer_definition.codeuri).resolve()) source_md5 = dir_checksum(code_dir) cache_function_dir = pathlib.Path(self._cache_dir, layer_definition.uuid) layer_build_result = {} if not cache_function_dir.exists() or layer_definition.source_md5 != source_md5: LOG.info("Cache is invalid, running build and copying resources to layer build definition of %s", layer_definition.uuid) build_result = self._delegate_build_strategy.build_single_layer_definition(layer_definition) layer_build_result.update(build_result) if cache_function_dir.exists(): shutil.rmtree(str(cache_function_dir)) layer_definition.source_md5 = source_md5 # Since all the build contents are same for a build definition, just copy any one of them into the cache for _, value in build_result.items(): osutils.copytree(value, cache_function_dir) break else: LOG.info("Valid cache found, copying previously built resources from layer build definition of %s", layer_definition.uuid) # artifacts directory will be created by the builder artifacts_dir = str(pathlib.Path(self._build_dir, layer_definition.layer.name)) LOG.debug("Copying artifacts from %s to %s", cache_function_dir, artifacts_dir) osutils.copytree(cache_function_dir, artifacts_dir) layer_build_result[layer_definition.layer.name] = artifacts_dir return layer_build_result
def zip_folder(folder_path): """ Zip the entire folder and return a file to the zip. Use this inside a "with" statement to cleanup the zipfile after it is used. Parameters ---------- folder_path : str The path of the folder to zip Yields ------ zipfile_name : str Name of the zipfile md5hash : str The md5 hash of the directory """ md5hash = dir_checksum(folder_path, followlinks=True) filename = os.path.join(tempfile.gettempdir(), "data-" + md5hash) zipfile_name = make_zip(filename, folder_path) try: yield zipfile_name, md5hash finally: if os.path.exists(zipfile_name): os.remove(zipfile_name)
def build_single_function_definition( self, build_definition: FunctionBuildDefinition) -> Dict[str, str]: """ Builds single function definition with caching """ if build_definition.packagetype == IMAGE: return self._delegate_build_strategy.build_single_function_definition( build_definition) code_dir = str( pathlib.Path(self._base_dir, build_definition.codeuri).resolve()) source_md5 = dir_checksum(code_dir) cache_function_dir = pathlib.Path(self._cache_dir, build_definition.uuid) function_build_results = {} if not cache_function_dir.exists( ) or build_definition.source_md5 != source_md5: LOG.info( "Cache is invalid, running build and copying resources to function build definition of %s", build_definition.uuid, ) build_result = self._delegate_build_strategy.build_single_function_definition( build_definition) function_build_results.update(build_result) if cache_function_dir.exists(): shutil.rmtree(str(cache_function_dir)) build_definition.source_md5 = source_md5 # Since all the build contents are same for a build definition, just copy any one of them into the cache for _, value in build_result.items(): osutils.copytree(value, cache_function_dir) break else: LOG.info( "Valid cache found, copying previously built resources from function build definition of %s", build_definition.uuid, ) for function in build_definition.functions: # artifacts directory will be created by the builder artifacts_dir = str( pathlib.Path(self._build_dir, function.name)) LOG.debug("Copying artifacts from %s to %s", cache_function_dir, artifacts_dir) osutils.copytree(cache_function_dir, artifacts_dir) function_build_results[function.name] = artifacts_dir return function_build_results
def test_package_with_deep_nested_template(self): """ this template contains two nested stacks: - root - FunctionA - ChildStackX - FunctionB - ChildStackY - FunctionA - MyLayerVersion """ template_file = os.path.join("deep-nested", "template.yaml") template_path = self.test_data_path.joinpath(template_file) command_list = self.get_command_list(s3_bucket=self.s3_bucket.name, template=template_path, force_upload=True) process = Popen(command_list, stdout=PIPE, stderr=PIPE) try: _, stderr = process.communicate(timeout=TIMEOUT) except TimeoutExpired: process.kill() raise process_stderr = stderr.strip().decode("utf-8") # there are in total 3 function dir, 1 layer dir and 2 child templates to upload uploads = re.findall(r"Uploading to.+", process_stderr) self.assertEqual(len(uploads), 6) # make sure uploads' checksum match the dirs and child templates build_dir = pathlib.Path(os.path.dirname(__file__)).parent.joinpath( "testdata", "package", "deep-nested") dirs = [ build_dir.joinpath("FunctionA"), build_dir.joinpath("ChildStackX", "FunctionB"), build_dir.joinpath("ChildStackX", "ChildStackY", "FunctionA"), build_dir.joinpath("ChildStackX", "ChildStackY", "MyLayerVersion"), ] # here we only verify function/layer code dirs' hash # because templates go through some pre-process before being uploaded and the hash can not be determined for dir in dirs: checksum = dir_checksum(dir.absolute()) self.assertIn(checksum, process_stderr) # verify both child templates are uploaded uploads = re.findall(r"\.template", process_stderr) self.assertEqual(len(uploads), 2)