def test_atomicity(self): """Making sure the status file can be written to atomically.""" proc_count = 10 fn = Path(tempfile.mktemp()) fight_path = Path(__file__).resolve().parent / 'status_fight.py' procs = [] for i in range(proc_count): procs.append( subprocess.Popen( ['python3', fight_path.as_posix(), fn.as_posix()])) time.sleep(0.2) # Create the status file, which should start the procs writing. with fn.open('w'): pass for proc in procs: self.assertEqual(proc.wait(), 0, msg="status_fight sub-test had an error.") # Make sure the entire history can be read sanely. status = StatusFile(fn) history = status.history() for entry in history: # All the states should be running # (The first is normally 'CREATED', but we bypassed the normal # creation of the file) self.assertEqual(entry.state, STATES.RUNNING) # Make sure the state never bled into the note, and that the # note was found. self.assert_(STATES.RUNNING not in entry.note) self.assertNotEqual(entry.note, '') # Make sure nothing else bled into the note (namely, the timestamp) self.assertLess(len(entry.note), 20) # Make sure we have a sane date on all lines. This will be None # if we couldn't parse # the date. self.assertIsNot(entry.when, None) fn.unlink()
class TestRun: """The central pavilion test object. Handle saving, monitoring and running tests. **Test LifeCycle** 1. Test Object is Created -- ``TestRun.__init__`` 1. Test id and directory (``working_dir/test_runs/0000001``) are created. 2. Most test information files (config, status, etc) are created. 3. Build script is created. 4. Build hash is generated. 5. Run script dry run generation is performed. 2. Test is built. -- ``test.build()`` 3. Test is finalized. -- ``test.finalize()`` 1. Variables and config go through final resolution. 2. Final run script is generated. 4. Test is run. -- ``test.run()`` 5. Results are gathered. -- ``test.gather_results()`` :ivar int id: The test id. :ivar dict config: The test's configuration. :ivar Path test.path: The path to the test's test_run directory. :ivar str build_hash: The full build hash. :ivar str build_name: The shortened build hash (the build directory name in working_dir/builds). :ivar Path build_path: The path to the test's copied build directory. :ivar Path build_origin: The build the test will copy (in the build directory). :ivar StatusFile status: The status object for this test. """ # We have to worry about hash collisions, but we don't need all the bytes # of hash most algorithms give us. The birthday attack math for 64 bits ( # 8 bytes) of hash and 10 million items yields a collision probability of # just 0.00027%. Easily good enough. BUILD_HASH_BYTES = 8 _BLOCK_SIZE = 4096 * 1024 logger = logging.getLogger('pav.TestRun') def __init__(self, pav_cfg, config, var_man=None, _id=None): """Create an new TestRun object. If loading an existing test instance, use the ``TestRun.from_id()`` method. :param pav_cfg: The pavilion configuration. :param dict config: The test configuration dictionary. :param variables.VariableSetManager var_man: The variable set manager for this test. :param int _id: The test id of an existing test. (You should be using TestRun.load). """ # Just about every method needs this self._pav_cfg = pav_cfg self.load_ok = True # Compute the actual name of test, using the subtitle config parameter. self.name = '.'.join([ config.get('suite', '<unknown>'), config.get('name', '<unnamed>') ]) if 'subtitle' in config and config['subtitle']: self.name = self.name + '.' + config['subtitle'] self.scheduler = config['scheduler'] # Create the tests directory if it doesn't already exist. tests_path = pav_cfg.working_dir / 'test_runs' self.config = config self.id = None # pylint: disable=invalid-name # Get an id for the test, if we weren't given one. if _id is None: self.id, self.path = self.create_id_dir(tests_path) self._save_config() self.var_man = var_man self.var_man.save(self.path / 'variables') else: self.id = _id self.path = utils.make_id_path(tests_path, self.id) if not self.path.is_dir(): raise TestRunNotFoundError( "No test with id '{}' could be found.".format(self.id)) try: self.var_man = variables.VariableSetManager.load(self.path / 'variables') except RuntimeError as err: raise TestRunError(*err.args) # Set a logger more specific to this test. self.logger = logging.getLogger('pav.TestRun.{}'.format(self.id)) # This will be set by the scheduler self._job_id = None # Setup the initial status file. self.status = StatusFile(self.path / 'status') if _id is None: self.status.set(STATES.CREATED, "Test directory and status file created.") self._started = None self._finished = None self.build_path = None # type: Path self.build_name = None self.build_hash = None # type: str self.build_origin = None # type: Path self.run_log = self.path / 'run.log' self.results_path = self.path / 'results.json' build_config = self.config.get('build', {}) # make sure build source_download_name is not set without # source_location try: if build_config['source_download_name'] is not None: if build_config['source_location'] is None: msg = "Test could not be build. Need 'source_location'." fprint(msg) self.status.set( STATES.BUILD_ERROR, "'source_download_name is set without a " "'source_location'") raise TestConfigError(msg) except KeyError: # this is mostly for unit tests that create test configs without a # build section at all pass self.build_script_path = self.path / 'build.sh' # type: Path self.build_path = self.path / 'build' if _id is None: self._write_script(path=self.build_script_path, config=build_config) if _id is None: self.build_hash = self._create_build_hash(build_config) with (self.path / 'build_hash').open('w') as build_hash_file: build_hash_file.write(self.build_hash) else: build_hash_fn = self.path / 'build_hash' if build_hash_fn.exists(): with build_hash_fn.open() as build_hash_file: self.build_hash = build_hash_file.read() if self.build_hash is not None: short_hash = self.build_hash[:self.BUILD_HASH_BYTES * 2] self.build_name = '{hash}'.format(hash=short_hash) self.build_origin = pav_cfg.working_dir / 'builds' / self.build_name run_config = self.config.get('run', {}) self.run_tmpl_path = self.path / 'run.tmpl' self.run_script_path = self.path / 'run.sh' if _id is None: self._write_script(path=self.run_tmpl_path, config=run_config) if _id is None: self.status.set(STATES.CREATED, "Test directory setup complete.") # Checking validity of timeout values. for loc in ['build', 'run']: if loc in config and 'timeout' in config[loc]: try: if config[loc]['timeout'] is None: test_timeout = None else: test_timeout = int(config[loc]['timeout']) if test_timeout < 0: raise ValueError() except ValueError: raise TestRunError( "{} timeout must be a non-negative " "integer or empty. Received {}.".format( loc, config[loc]['timeout'])) else: if loc == 'build': self._build_timeout = test_timeout else: self._run_timeout = test_timeout @classmethod def load(cls, pav_cfg, test_id): """Load an old TestRun object given a test id. :param pav_cfg: The pavilion config :param int test_id: The test's id number. """ path = utils.make_id_path(pav_cfg.working_dir / 'test_runs', test_id) if not path.is_dir(): raise TestRunError("Test directory for test id {} does not exist " "at '{}' as expected.".format(test_id, path)) config = cls._load_config(path) return TestRun(pav_cfg, config, _id=test_id) def finalize(self, var_man): """Resolve any remaining deferred variables, and generate the final run script.""" self.var_man.undefer(new_vars=var_man, parser=string_parser.parse) self.config = resolve_deferred(self.config, self.var_man) self._save_config() self._write_script( self.run_script_path, self.config['run'], ) def run_cmd(self): """Construct a shell command that would cause pavilion to run this test.""" pav_path = self._pav_cfg.pav_root / 'bin' / 'pav' return '{} run {}'.format(pav_path, self.id) def _save_config(self): """Save the configuration for this test to the test config file.""" config_path = self.path / 'config' try: with config_path.open('w') as json_file: pavilion.output.json_dump(self.config, json_file) except (OSError, IOError) as err: raise TestRunError( "Could not save TestRun ({}) config at {}: {}".format( self.name, self.path, err)) except TypeError as err: raise TestRunError("Invalid type in config for ({}): {}".format( self.name, err)) @classmethod def _load_config(cls, test_path): """Load a saved test configuration.""" config_path = test_path / 'config' if not config_path.is_file(): raise TestRunError( "Could not find config file for test at {}.".format(test_path)) try: with config_path.open('r') as config_file: # Because only string keys are allowed in test configs, # this is a reasonable way to load them. return json.load(config_file) except TypeError as err: raise TestRunError("Bad config values for config '{}': {}".format( config_path, err)) except (IOError, OSError) as err: raise TestRunError("Error reading config file '{}': {}".format( config_path, err)) def _find_file(self, file, sub_dir=None): """Look for the given file and return a full path to it. Relative paths are searched for in all config directories under 'test_src'. :param Path file: The path to the file. :param str sub_dir: The subdirectory in each config directory in which to search. :returns: The full path to the found file, or None if no such file could be found. """ if file.is_absolute(): if file.exists(): return file else: return None # Assemble a potential location from each config dir. for config_dir in self._pav_cfg.config_dirs: path = config_dir if sub_dir is not None: path = path / sub_dir path = path / file if path.exists(): return path return None @staticmethod def _isurl(url): """Determine if the given path is a url.""" parsed = urllib.parse.urlparse(url) return parsed.scheme != '' def _download_path(self, loc, filename): """Get the path to where a source_download would be downloaded. :param str loc: The url for the download, from the config's source_location field. :param str filename: The name of the download, from the config's source_download_name field.""" if filename is None: url_parts = urllib.parse.urlparse(loc) path_parts = url_parts.path.split('/') if path_parts and path_parts[-1]: filename = path_parts[-1] else: # Use a hash of the url if we can't get a name from it. filename = hashlib.sha256(loc.encode()).hexdigest() return self._pav_cfg.working_dir / 'downloads' / filename def _update_src(self, build_config): """Retrieve and/or check the existence of the files needed for the build. This can include pulling from URL's. :param dict build_config: The build configuration dictionary. :returns: src_path, extra_files """ src_loc = build_config.get('source_location') if src_loc is None: return None # For URL's, check if the file needs to be updated, and try to do so. if self._isurl(src_loc): missing_libs = wget.missing_libs() if missing_libs: raise TestRunError( "The dependencies needed for remote source retrieval " "({}) are not available on this system. Please provide " "your test source locally.".format( ', '.join(missing_libs))) dwn_name = build_config.get('source_download_name') src_dest = self._download_path(src_loc, dwn_name) wget.update(self._pav_cfg, src_loc, src_dest) return src_dest src_path = self._find_file(Path(src_loc), 'test_src') if src_path is None: raise TestRunError( "Could not find and update src location '{}'".format(src_loc)) if src_path.is_dir(): # For directories, update the directories mtime to match the # latest mtime in the entire directory. self._date_dir(src_path) return src_path elif src_path.is_file(): # For static files, we'll end up just hashing the whole thing. return src_path else: raise TestRunError("Source location '{}' points to something " "unusable.".format(src_path)) def _create_build_hash(self, build_config): """Turn the build config, and everything the build needs, into hash. This includes the build config itself, the source tarball, and all extra files. Additionally, system variables may be included in the hash if specified via the pavilion config.""" # The hash order is: # - The build script # - The build specificity # - The src archive. # - For directories, the mtime (updated to the time of the most # recently updated file) is hashed instead. # - All of the build's 'extra_files' hash_obj = hashlib.sha256() # Update the hash with the contents of the build script. hash_obj.update(self._hash_file(self.build_script_path)) specificity = build_config.get('specificity', '') hash_obj.update(specificity.encode('utf8')) src_path = self._update_src(build_config) if src_path is not None: if src_path.is_file(): hash_obj.update(self._hash_file(src_path)) elif src_path.is_dir(): hash_obj.update(self._hash_dir(src_path)) else: raise TestRunError("Invalid src location {}.".format(src_path)) for extra_file in build_config.get('extra_files', []): extra_file = Path(extra_file) full_path = self._find_file(extra_file, 'test_src') if full_path is None: raise TestRunError( "Could not find extra file '{}'".format(extra_file)) elif full_path.is_file(): hash_obj.update(self._hash_file(full_path)) elif full_path.is_dir(): self._date_dir(full_path) hash_obj.update(self._hash_dir(full_path)) else: raise TestRunError("Extra file '{}' must be a regular " "file or directory.".format(extra_file)) hash_obj.update(build_config.get('specificity', '').encode('utf-8')) return hash_obj.hexdigest()[:self.BUILD_HASH_BYTES * 2] def build(self): """Perform the build if needed, do a soft-link copy of the build directory into our test directory, and note that we've used the given build. :return: True if these steps completed successfully. """ # Only try to do the build if it doesn't already exist. if not self.build_origin.exists(): fprint("Test {s.name} run {s.id} building {s.build_hash}".format( s=self), file=sys.stderr) self.status.set(STATES.BUILDING, "Starting build {}.".format(self.build_hash)) # Make sure another test doesn't try to do the build at # the same time. # Note cleanup of failed builds HAS to occur under this lock to # avoid a race condition, even though it would be way simpler to # do it in .build() lock_path = self.build_origin.with_suffix('.lock') with lockfile.LockFile(lock_path, group=self._pav_cfg.shared_group): # Make sure the build wasn't created while we waited for # the lock. if not self.build_origin.exists(): build_dir = self.build_origin.with_suffix('.tmp') # Attempt to perform the actual build, this shouldn't # raise an exception unless something goes terribly # wrong. # This will also set the test status for # non-catastrophic cases. if not self._build(build_dir): # If the build didn't succeed, copy the attempted build # into the test run, and set the run as complete. if build_dir.exists(): build_dir.rename(self.build_path) self.set_run_complete() return False # Rename the build to it's final location. build_dir.rename(self.build_origin) else: self.status.set( STATES.BUILDING, "Build {} created while waiting for build lock.". format(self.build_hash)) # Make a symlink in the build directory that points to # the original test that built it try: dst = self.build_origin / '.built_by' src = self.path dst.symlink_to(src, True) dst.resolve() except OSError: self.logger.warning("Could not create symlink to test") else: fprint( "Test {s.name} run {s.id} reusing build {s.build_hash}".format( s=self), file=sys.stderr) self.status.set(STATES.BUILDING, "Build {} already exists.".format(self.build_hash)) # Perform a symlink copy of the original build directory into our test # directory. try: shutil.copytree(self.build_origin.as_posix(), self.build_path.as_posix(), symlinks=True, copy_function=utils.symlink_copy) except OSError as err: msg = "Could not perform the build directory copy: {}".format(err) self.status.set(STATES.BUILD_ERROR, msg) self.logger.error(msg) self.set_run_complete() return False # Touch the original build directory, so that we know it was used # recently. try: now = time.time() os.utime(self.build_origin.as_posix(), (now, now)) except OSError as err: self.logger.warning( "Could not update timestamp on build directory '%s': %s", self.build_origin, err) return True def _build(self, build_dir): """Perform the build. This assumes there actually is a build to perform. :param Path build_dir: The directory in which to perform the build. :returns: True or False, depending on whether the build appears to have been successful. """ try: self._setup_build_dir(build_dir) except TestRunError as err: self.status.set( STATES.BUILD_ERROR, "Error setting up build directory '{}': {}".format( build_dir, err)) return False build_log_path = build_dir / 'pav_build_log' try: # Do the build, and wait for it to complete. with build_log_path.open('w') as build_log: # Build scripts take the test id as a first argument. cmd = [self.build_script_path.as_posix(), str(self.id)] proc = subprocess.Popen(cmd, cwd=build_dir.as_posix(), stdout=build_log, stderr=subprocess.STDOUT) timeout = self._build_timeout result = None while result is None: try: result = proc.wait(timeout=timeout) except subprocess.TimeoutExpired: log_stat = build_log_path.stat() quiet_time = time.time() - log_stat.st_mtime # Has the output file changed recently? if self._build_timeout < quiet_time: # Give up on the build, and call it a failure. proc.kill() self.status.set( STATES.BUILD_TIMEOUT, "Build timed out after {} seconds.".format( self._build_timeout)) return False else: # Only wait a max of self._build_timeout next # 'wait' timeout = self._build_timeout - quiet_time except subprocess.CalledProcessError as err: self.status.set(STATES.BUILD_ERROR, "Error running build process: {}".format(err)) return False except (IOError, OSError) as err: self.status.set( STATES.BUILD_ERROR, "Error that's probably related to writing the " "build output: {}".format(err)) return False try: self._fix_build_permissions() except OSError as err: self.logger.warning("Error fixing build permissions: %s", err) if result != 0: self.status.set(STATES.BUILD_FAILED, "Build returned a non-zero result.") return False else: self.status.set(STATES.BUILD_DONE, "Build completed successfully.") return True TAR_SUBTYPES = ( 'gzip', 'x-gzip', 'x-bzip2', 'x-xz', 'x-tar', 'x-lzma', ) def _setup_build_dir(self, build_path): """Setup the build directory, by extracting or copying the source and any extra files. :param build_path: Path to the intended build directory. :return: None """ build_config = self.config.get('build', {}) src_loc = build_config.get('source_location') if src_loc is None: src_path = None elif self._isurl(src_loc): # Remove special characters from the url to get a reasonable # default file name. download_name = build_config.get('source_download_name') # Download the file to the downloads directory. src_path = self._download_path(src_loc, download_name) else: src_path = self._find_file(Path(src_loc), 'test_src') if src_path is None: raise TestRunError( "Could not find source file '{}'".format(src_path)) # Resolve any softlinks to get the real file. src_path = src_path.resolve() if src_path is None: # If there is no source archive or data, just make the build # directory. build_path.mkdir() elif src_path.is_dir(): # Recursively copy the src directory to the build directory. self.status.set( STATES.BUILDING, "Copying source directory {} for build {} " "as the build directory.".format(src_path, build_path)) shutil.copytree(src_path.as_posix(), build_path.as_posix(), symlinks=True) elif src_path.is_file(): # Handle decompression of a stream compressed file. The interfaces # for the libs are all the same; we just have to choose the right # one to use. Zips are handled as an archive, below. category, subtype = utils.get_mime_type(src_path) if category == 'application' and subtype in self.TAR_SUBTYPES: if tarfile.is_tarfile(src_path.as_posix()): try: with tarfile.open(src_path.as_posix(), 'r') as tar: # Filter out all but the top level items. top_level = [ m for m in tar.members if '/' not in m.name ] # If the file contains only a single directory, # make that directory the build directory. This # should be the default in most cases. if len(top_level) == 1 and top_level[0].isdir(): self.status.set( STATES.BUILDING, "Extracting tarfile {} for build {} " "as the build directory.".format( src_path, build_path)) tmpdir = build_path.with_suffix('.extracted') tmpdir.mkdir() tar.extractall(tmpdir.as_posix()) opath = tmpdir / top_level[0].name opath.rename(build_path) tmpdir.rmdir() else: # Otherwise, the build path will contain the # extracted contents of the archive. self.status.set( STATES.BUILDING, "Extracting tarfile {} for build {} " "into the build directory.".format( src_path, build_path)) build_path.mkdir() tar.extractall(build_path.as_posix()) except (OSError, IOError, tarfile.CompressionError, tarfile.TarError) as err: raise TestRunError( "Could not extract tarfile '{}' into '{}': {}". format(src_path, build_path, err)) else: # If it's a compressed file but isn't a tar, extract the # file into the build directory. # All the python compression libraries have the same basic # interface, so we can just dynamically switch between # modules. if subtype in ('gzip', 'x-gzip'): comp_lib = gzip elif subtype == 'x-bzip2': comp_lib = bz2 elif subtype in ('x-xz', 'x-lzma'): comp_lib = lzma elif subtype == 'x-tar': raise TestRunError( "Test src file '{}' is a bad tar file.".format( src_path)) else: raise RuntimeError( "Unhandled compression type. '{}'".format(subtype)) self.status.set( STATES.BUILDING, "Extracting {} file {} for build {} " "into the build directory.".format( subtype, src_path, build_path)) decomp_fn = src_path.with_suffix('').name decomp_fn = build_path / decomp_fn build_path.mkdir() try: with comp_lib.open(src_path.as_posix()) as infile, \ decomp_fn.open('wb') as outfile: shutil.copyfileobj(infile, outfile) except (OSError, IOError, lzma.LZMAError) as err: raise TestRunError( "Error decompressing compressed file " "'{}' into '{}': {}".format( src_path, decomp_fn, err)) elif category == 'application' and subtype == 'zip': try: # Extract the zipfile, under the same conditions as # above with tarfiles. with ZipFile(src_path.as_posix()) as zipped: tmpdir = build_path.with_suffix('.unzipped') tmpdir.mkdir() zipped.extractall(tmpdir.as_posix()) files = os.listdir(tmpdir.as_posix()) if len(files) == 1 and (tmpdir / files[0]).is_dir(): self.status.set( STATES.BUILDING, "Extracting zip file {} for build {} " "as the build directory.".format( src_path, build_path)) # Make the zip's root directory the build dir. (tmpdir / files[0]).rename(build_path) tmpdir.rmdir() else: self.status.set( STATES.BUILDING, "Extracting zip file {} for build {} " "into the build directory.".format( src_path, build_path)) # The overall contents of the zip are the build dir. tmpdir.rename(build_path) except (OSError, IOError, zipfile.BadZipFile) as err: raise TestRunError( "Could not extract zipfile '{}' into destination " "'{}': {}".format(src_path, build_path, err)) else: # Finally, simply copy any other types of files into the build # directory. self.status.set( STATES.BUILDING, "Copying file {} for build {} " "into the build directory.".format(src_path, build_path)) dest = build_path / src_path.name try: build_path.mkdir() shutil.copy(src_path.as_posix(), dest.as_posix()) except OSError as err: raise TestRunError( "Could not copy test src '{}' to '{}': {}".format( src_path, dest, err)) # Now we just need to copy over all of the extra files. for extra in build_config.get('extra_files', []): extra = Path(extra) path = self._find_file(extra, 'test_src') dest = build_path / path.name try: shutil.copy(path.as_posix(), dest.as_posix()) except OSError as err: raise TestRunError( "Could not copy extra file '{}' to dest '{}': {}".format( path, dest, err)) def _fix_build_permissions(self): """The files in a build directory should never be writable, but directories should be. Users are thus allowed to delete build directories and their files, but never modify them. Additions, deletions within test build directories will effect the soft links, not the original files themselves. (This applies both to owner and group). :raises OSError: If we lack permissions or something else goes wrong.""" # We rely on the umask to handle most restrictions. # This just masks out the write bits. file_mask = 0o777555 # We shouldn't have to do anything to directories, they should have # the correct permissions already. for path, _, files in os.walk(self.build_origin.as_posix()): path = Path(path) for file in files: file_path = path / file file_stat = file_path.stat() file_path.lchmod(file_stat.st_mode & file_mask) def run(self): """Run the test. :rtype: bool :returns: True if the test completed and returned zero, false otherwise. :raises TimeoutError: When the run times out. :raises TestRunError: We don't actually raise this, but might in the future. """ self.status.set(STATES.PREPPING_RUN, "Converting run template into run script.") with self.run_log.open('wb') as run_log: self.status.set(STATES.RUNNING, "Starting the run script.") self._started = datetime.datetime.now() # Set the working directory to the build path, if there is one. run_wd = None if self.build_path is not None: run_wd = self.build_path.as_posix() # Run scripts take the test id as a first argument. cmd = [self.run_script_path.as_posix(), str(self.id)] proc = subprocess.Popen(cmd, cwd=run_wd, stdout=run_log, stderr=subprocess.STDOUT) self.status.set(STATES.RUNNING, "Currently running.") # Run the test, but timeout if it doesn't produce any output every # self._run_timeout seconds timeout = self._run_timeout result = None while result is None: try: result = proc.wait(timeout=timeout) except subprocess.TimeoutExpired: out_stat = self.run_log.stat() quiet_time = time.time() - out_stat.st_mtime # Has the output file changed recently? if self._run_timeout < quiet_time: # Give up on the build, and call it a failure. proc.kill() msg = ("Run timed out after {} seconds".format( self._run_timeout)) self.status.set(STATES.RUN_TIMEOUT, msg) self._finished = datetime.datetime.now() raise TimeoutError(msg) else: # Only wait a max of run_silent_timeout next 'wait' timeout = timeout - quiet_time self._finished = datetime.datetime.now() self.status.set(STATES.RUN_DONE, "Test run has completed.") if result == 0: return True # Return False in all other circumstances. return False def set_run_complete(self): """Write a file in the test directory that indicates that the test has completed a run, one way or another. This should only be called when we're sure their won't be any more status changes.""" # Write the current time to the file. We don't actually use the contents # of the file, but it's nice to have another record of when this was # run. with (self.path / 'RUN_COMPLETE').open('w') as run_complete: json.dump({ 'ended': datetime.datetime.now().isoformat(), }, run_complete) WAIT_INTERVAL = 0.5 def wait(self, timeout=None): """Wait for the test run to be complete. This works across hosts, as it simply checks for files in the run directory. :param Union(None,float) timeout: How long to wait in seconds. If this is None, wait forever. :raises TimeoutError: if the timeout expires. """ run_complete_file = self.path / 'RUN_COMPLETE' if timeout is not None: timeout = time.time() + timeout while 1: if run_complete_file.exists(): return time.sleep(self.WAIT_INTERVAL) if timeout is not None and time.time() > timeout: raise TimeoutError("Timed out waiting for test '{}' to " "complete".format(self.id)) def gather_results(self, run_result): """Process and log the results of the test, including the default set of result keys. Default Result Keys: name The name of the test id The test id created When the test was created. started When the test was started. finished When the test finished running (or failed). duration Length of the test run. user The user who ran the test. sys_name The system (cluster) on which the test ran. job_id The job id set by the scheduler. result Defaults to PASS if the test completed (with a zero exit status). Is generally expected to be overridden by other result parsers. :param str run_result: The result of the run. """ if self._finished is None: raise RuntimeError( "test.gather_results can't be run unless the test was run" "(or an attempt was made to run it. " "This occurred for test {s.name}, #{s.id}".format(s=self)) parser_configs = self.config['results'] # Create a human readable timestamp from the test directories # modified (should be creation) timestamp. created = datetime.datetime.fromtimestamp( self.path.stat().st_mtime).isoformat(" ") if run_result: default_result = result_parsers.PASS else: default_result = result_parsers.FAIL results = { # These can't be overridden 'name': self.name, 'id': self.id, 'created': created, 'started': self._started.isoformat(" "), 'finished': self._finished.isoformat(" "), 'duration': str(self._finished - self._started), 'user': self.var_man['pav.user'], 'job_id': self.job_id, 'sys_name': self.var_man['sys.sys_name'], # This may be overridden by result parsers. 'result': default_result } self.status.set(STATES.RESULTS, "Parsing {} result types.".format(len(parser_configs))) results = result_parsers.parse_results(self, results) return results def save_results(self, results): """Save the results to the results file. :param dict results: The results dictionary. """ with self.results_path.open('w') as results_file: json.dump(results, results_file) def load_results(self): """Load results from the results file. :returns A dict of results, or None if the results file doesn't exist. :rtype: dict """ if self.results_path.exists(): with self.results_path.open() as results_file: return json.load(results_file) else: return None @property def is_built(self): """Whether the build for this test exists. :returns: True if the build exists (or the test doesn't have a build), False otherwise. :rtype: bool """ if self.build_path.resolve().exists(): return True else: return False @property def job_id(self): """The job id of this test (saved to a ``jobid`` file). This should be set by the scheduler plugin as soon as it's known.""" path = self.path / 'jobid' if self._job_id is not None: return self._job_id try: with path.open('r') as job_id_file: self._job_id = job_id_file.read() except FileNotFoundError: return None except (OSError, IOError) as err: self.logger.error("Could not read jobid file '%s': %s", path, err) return None return self._job_id @job_id.setter def job_id(self, job_id): path = self.path / 'jobid' try: with path.open('w') as job_id_file: job_id_file.write(job_id) except (IOError, OSError) as err: self.logger.error("Could not write jobid file '%s': %s", path, err) self._job_id = job_id @property def timestamp(self): """Return the unix timestamp for this test, based on the last modified date for the test directory.""" return self.path.stat().st_mtime def _hash_dict(self, mapping): """Create a hash from the keys and items in 'mapping'. Keys are processed in order. Can handle lists and other dictionaries as values. :param dict mapping: The dictionary to hash. """ hash_obj = hashlib.sha256() for key in sorted(mapping.keys()): hash_obj.update(str(key).encode('utf-8')) val = mapping[key] if isinstance(val, str): hash_obj.update(val.encode('utf-8')) elif isinstance(val, list): for item in val: hash_obj.update(item.encode('utf-8')) elif isinstance(val, dict): hash_obj.update(self._hash_dict(val)) return hash_obj.digest() def _hash_file(self, path): """Hash the given file (which is assumed to exist). :param Path path: Path to the file to hash. """ hash_obj = hashlib.sha256() with path.open('rb') as file: chunk = file.read(self._BLOCK_SIZE) while chunk: hash_obj.update(chunk) chunk = file.read(self._BLOCK_SIZE) return hash_obj.digest() @staticmethod def _hash_dir(path): """Instead of hashing the files within a directory, we just create a 'hash' based on it's name and mtime, assuming we've run _date_dir on it before hand. This produces an arbitrary string, not a hash. :param Path path: The path to the directory. :returns: The 'hash' """ dir_stat = path.stat() return '{} {:0.5f}'.format(path, dir_stat.st_mtime).encode('utf-8') @staticmethod def _date_dir(base_path): """Update the mtime of the given directory or path to the the latest mtime contained within. :param Path base_path: The root of the path to evaluate. """ src_stat = base_path.stat() latest = src_stat.st_mtime paths = utils.flat_walk(base_path) for path in paths: dir_stat = path.stat() if dir_stat.st_mtime > latest: latest = dir_stat.st_mtime if src_stat.st_mtime != latest: os.utime(base_path.as_posix(), (src_stat.st_atime, latest)) def _write_script(self, path, config): """Write a build or run script or template. The formats for each are identical. :param Path path: Path to the template file to write. :param dict config: Configuration dictionary for the script file. :return: """ script = scriptcomposer.ScriptComposer( details=scriptcomposer.ScriptDetails( path=path, group=self._pav_cfg.shared_group, )) verbose = config.get('verbose', 'false').lower() == 'true' if verbose: script.comment('# Echoing all commands to log.') script.command('set -v') script.newline() pav_lib_bash = self._pav_cfg.pav_root / 'bin' / 'pav-lib.bash' # If we include this directly, it breaks build hashing. script.comment('The first (and only) argument of the build script is ' 'the test id.') script.env_change({ 'TEST_ID': '${1:-0}', # Default to test id 0 if one isn't given. 'PAV_CONFIG_FILE': self._pav_cfg['pav_cfg_file'] }) script.command('source {}'.format(pav_lib_bash)) if config.get('preamble', []): script.newline() script.comment('Preamble commands') for cmd in config['preamble']: script.command(cmd) modules = config.get('modules', []) if modules: script.newline() script.comment( 'Perform module related changes to the environment.') for module in config.get('modules', []): script.module_change(module, self.var_man) env = config.get('env', {}) if env: script.newline() script.comment("Making any environment changes needed.") script.env_change(config.get('env', {})) if verbose: script.newline() script.comment('List all the module modules for posterity') script.command("module -t list") script.newline() script.comment('Output the environment for posterity') script.command("declare -p") script.newline() cmds = config.get('cmds', []) if cmds: script.comment("Perform the sequence of test commands.") for line in config.get('cmds', []): for split_line in line.split('\n'): script.command(split_line) else: script.comment('No commands given for this script.') script.write() @staticmethod def create_id_dir(id_dir): """In the given directory, create the lowest numbered (positive integer) directory that doesn't already exist. :param Path id_dir: Path to the directory that contains these 'id' directories :returns: The id and path to the created directory. :rtype: list(int, Path) :raises OSError: on directory creation failure. :raises TimeoutError: If we couldn't get the lock in time. """ lockfile_path = id_dir / '.lockfile' with lockfile.LockFile(lockfile_path, timeout=1): ids = list(os.listdir(str(id_dir))) # Only return the test directories that could be integers. ids = [id_ for id_ in ids if id_.isdigit()] ids = [id_ for id_ in ids if (id_dir / id_).is_dir()] ids = [int(id_) for id_ in ids] ids.sort() # Find the first unused id. id_ = 1 while id_ in ids: id_ += 1 path = utils.make_id_path(id_dir, id_) path.mkdir() return id_, path def __repr__(self): return "TestRun({s.name}-{s.id})".format(s=self)
def __init__(self, pav_cfg, config, var_man=None, _id=None): """Create an new TestRun object. If loading an existing test instance, use the ``TestRun.from_id()`` method. :param pav_cfg: The pavilion configuration. :param dict config: The test configuration dictionary. :param variables.VariableSetManager var_man: The variable set manager for this test. :param int _id: The test id of an existing test. (You should be using TestRun.load). """ # Just about every method needs this self._pav_cfg = pav_cfg self.load_ok = True # Compute the actual name of test, using the subtitle config parameter. self.name = '.'.join([ config.get('suite', '<unknown>'), config.get('name', '<unnamed>') ]) if 'subtitle' in config and config['subtitle']: self.name = self.name + '.' + config['subtitle'] self.scheduler = config['scheduler'] # Create the tests directory if it doesn't already exist. tests_path = pav_cfg.working_dir / 'test_runs' self.config = config self.id = None # pylint: disable=invalid-name # Get an id for the test, if we weren't given one. if _id is None: self.id, self.path = self.create_id_dir(tests_path) self._save_config() self.var_man = var_man self.var_man.save(self.path / 'variables') else: self.id = _id self.path = utils.make_id_path(tests_path, self.id) if not self.path.is_dir(): raise TestRunNotFoundError( "No test with id '{}' could be found.".format(self.id)) try: self.var_man = variables.VariableSetManager.load(self.path / 'variables') except RuntimeError as err: raise TestRunError(*err.args) # Set a logger more specific to this test. self.logger = logging.getLogger('pav.TestRun.{}'.format(self.id)) # This will be set by the scheduler self._job_id = None # Setup the initial status file. self.status = StatusFile(self.path / 'status') if _id is None: self.status.set(STATES.CREATED, "Test directory and status file created.") self._started = None self._finished = None self.build_path = None # type: Path self.build_name = None self.build_hash = None # type: str self.build_origin = None # type: Path self.run_log = self.path / 'run.log' self.results_path = self.path / 'results.json' build_config = self.config.get('build', {}) # make sure build source_download_name is not set without # source_location try: if build_config['source_download_name'] is not None: if build_config['source_location'] is None: msg = "Test could not be build. Need 'source_location'." fprint(msg) self.status.set( STATES.BUILD_ERROR, "'source_download_name is set without a " "'source_location'") raise TestConfigError(msg) except KeyError: # this is mostly for unit tests that create test configs without a # build section at all pass self.build_script_path = self.path / 'build.sh' # type: Path self.build_path = self.path / 'build' if _id is None: self._write_script(path=self.build_script_path, config=build_config) if _id is None: self.build_hash = self._create_build_hash(build_config) with (self.path / 'build_hash').open('w') as build_hash_file: build_hash_file.write(self.build_hash) else: build_hash_fn = self.path / 'build_hash' if build_hash_fn.exists(): with build_hash_fn.open() as build_hash_file: self.build_hash = build_hash_file.read() if self.build_hash is not None: short_hash = self.build_hash[:self.BUILD_HASH_BYTES * 2] self.build_name = '{hash}'.format(hash=short_hash) self.build_origin = pav_cfg.working_dir / 'builds' / self.build_name run_config = self.config.get('run', {}) self.run_tmpl_path = self.path / 'run.tmpl' self.run_script_path = self.path / 'run.sh' if _id is None: self._write_script(path=self.run_tmpl_path, config=run_config) if _id is None: self.status.set(STATES.CREATED, "Test directory setup complete.") # Checking validity of timeout values. for loc in ['build', 'run']: if loc in config and 'timeout' in config[loc]: try: if config[loc]['timeout'] is None: test_timeout = None else: test_timeout = int(config[loc]['timeout']) if test_timeout < 0: raise ValueError() except ValueError: raise TestRunError( "{} timeout must be a non-negative " "integer or empty. Received {}.".format( loc, config[loc]['timeout'])) else: if loc == 'build': self._build_timeout = test_timeout else: self._run_timeout = test_timeout
def __init__(self, pav_cfg, config, sys_vars, _id=None): """Create an new PavTest object. If loading an existing test instance, use the PavTest.from_id method. :param pav_cfg: The pavilion configuration. :param config: The test configuration dictionary. :param Union(dict, None) sys_vars: System variables. :param _id: The test id of an existing test. (You should be using PavTest.load). """ if _id is None and sys_vars is None: raise RuntimeError("New PavTest objects require a sys_vars dict. ") # Just about every method needs this self._pav_cfg = pav_cfg # Compute the actual name of test, using the subtest config parameter. self.name = config['name'] if 'subtest' in config and config['subtest']: self.name = self.name + '.' + config['subtest'] self.scheduler = config['scheduler'] # Create the tests directory if it doesn't already exist. tests_path = pav_cfg.working_dir / 'tests' self.config = config self.id = None # pylint: disable=invalid-name # Get an id for the test, if we weren't given one. if _id is None: self.id, self.path = self.create_id_dir(tests_path) self._save_config() else: self.id = _id self.path = utils.make_id_path(tests_path, self.id) if not self.path.is_dir(): raise PavTestNotFoundError( "No test with id '{}' could be found.".format(self.id)) # Set a logger more specific to this test. self.logger = logging.getLogger('pav.PavTest.{}'.format(self.id)) # This will be set by the scheduler self._job_id = None # Setup the initial status file. self.status = StatusFile(self.path / 'status') if _id is None: self.status.set(STATES.CREATED, "Test directory and status file created.") self._started = None self._finished = None self.build_path = None # type: Path self.build_name = None self.build_hash = None # type: str self.build_script_path = None # type: Path self.build_origin = None # type: Path self.run_log = self.path / 'run.log' self.results_path = self.path / 'results.json' build_config = self.config.get('build', {}) self.build_script_path = self.path / 'build.sh' if not self.build_script_path.exists(): self._write_script(self.build_script_path, build_config, sys_vars) self.build_path = self.path / 'build' if self.build_path.is_symlink(): build_rp = self.build_path.resolve() self.build_hash = build_rp.name else: self.build_hash = self._create_build_hash(build_config) short_hash = self.build_hash[:self.BUILD_HASH_BYTES * 2] self.build_name = '{hash}'.format(hash=short_hash) self.build_origin = pav_cfg.working_dir / 'builds' / self.build_name run_config = self.config.get('run', {}) if run_config: self.run_tmpl_path = self.path / 'run.tmpl' self.run_script_path = self.path / 'run.sh' if not self.run_tmpl_path.exists(): self._write_script(self.run_tmpl_path, run_config, sys_vars) else: self.run_tmpl_path = None self.run_script_path = None if _id is None: self.status.set(STATES.CREATED, "Test directory setup complete.")
def test_status(self): """Checking status object basic functionality.""" fn = Path(tempfile.mktemp()) status = StatusFile(fn) self.assertTrue(fn.exists()) status_info = status.current() self.assertEqual(status_info.state, 'CREATED') # Get timestamp. now = datetime.datetime.now() # Make sure the timestamp is before now. self.assertLess(status_info.when, now) # Make sure the timestamp is less than a few seconds in the future. # If things are wrong with our timestamping code, they'll be much # farther off than this. self.assertGreater(now + datetime.timedelta(seconds=5), status_info.when) self.assertEqual(status_info.note, 'Created status file.') # Dump a bunch of states to the status file. states = [ STATES.UNKNOWN, STATES.INVALID, STATES.CREATED, STATES.RUNNING, STATES.RESULTS ] for state in states: status.set(state, '{}_{}'.format(state, state.lower())) self.assertEqual(len(status.history()), 6) self.assertEqual(status.current().state, 'RESULTS') self.assertEqual([s.state for s in status.history()].sort(), (states + ['CREATED']).sort()) # Make sure too long statuses are handled correctly. status.set("AN_EXCESSIVELY_LONG_STATE_NAME", "This is " + "way " * 10000 + "too long.") status_info = status.current() self.assertLessEqual(len(status_info.state), STATES.max_length) self.assertEqual(status_info.state, STATES.INVALID) self.assertLessEqual(len(status_info.note), StatusInfo.NOTE_MAX) with fn.open() as sf: lines = sf.readlines() self.assertLessEqual(len(lines[-1]), StatusInfo.LINE_MAX) fn.unlink()
class TestRun(TestAttributes): """The central pavilion test object. Handle saving, monitoring and running tests. **Test LifeCycle** 1. Test Object is Created -- ``TestRun.__init__`` 1. Test id and directory (``working_dir/test_runs/0000001``) are created. 2. Most test information files (config, status, etc) are created. 3. Build script is created. 4. Build hash is generated. 5. Run script dry run generation is performed. 2. Test is built. -- ``test.build()`` 3. Test is finalized. -- ``test.finalize()`` 1. Variables and config go through final resolution. 2. Final run script is generated. 4. Test is run. -- ``test.run()`` 5. Results are gathered. -- ``test.gather_results()`` :ivar int ~.id: The test id. :ivar dict config: The test's configuration. :ivar Path test.path: The path to the test's test_run directory. :ivar Path suite_path: The path to the test suite file that this test came from. May be None for artificially generated tests. :ivar dict results: The test results. Set None if results haven't been gathered. :ivar TestBuilder builder: The test builder object, with information on the test's build. :ivar Path build_origin_path: The path to the symlink to the original build directory. For bookkeeping. :ivar StatusFile status: The status object for this test. :ivar TestRunOptions opt: Test run options defined by OPTIONS_DEFAULTS """ logger = logging.getLogger('pav.TestRun') JOB_ID_FN = 'job_id' COMPLETE_FN = 'RUN_COMPLETE' def __init__(self, pav_cfg, config, build_tracker=None, var_man=None, _id=None, rebuild=False, build_only=False): """Create an new TestRun object. If loading an existing test instance, use the ``TestRun.from_id()`` method. :param pav_cfg: The pavilion configuration. :param dict config: The test configuration dictionary. :param builder.MultiBuildTracker build_tracker: Tracker for watching and managing the status of multiple builds. :param variables.VariableSetManager var_man: The variable set manager for this test. :param bool build_only: Only build this test run, do not run it. :param bool rebuild: After determining the build name, deprecate it and select a new, non-deprecated build. :param int _id: The test id of an existing test. (You should be using TestRun.load). """ # Just about every method needs this self._pav_cfg = pav_cfg self.scheduler = config['scheduler'] # Create the tests directory if it doesn't already exist. tests_path = pav_cfg.working_dir / 'test_runs' self.config = config group, umask = self.get_permissions(pav_cfg, config) # Get an id for the test, if we weren't given one. if _id is None: id_tmp, run_path = dir_db.create_id_dir(tests_path, group, umask) super().__init__(path=run_path, group=group, umask=umask) # Set basic attributes self.id = id_tmp self.build_only = build_only self.complete = False self.created = dt.datetime.now() self.name = self.make_name(config) self.rebuild = rebuild self.suite_path = Path(config.get('suite_path', '.')) self.user = utils.get_login() self.uuid = str(uuid.uuid4()) else: # Load the test info from the given id path. super().__init__(path=dir_db.make_id_path(tests_path, _id), group=group, umask=umask) self.load_attributes() self.test_version = config.get('test_version') if not self.path.is_dir(): raise TestRunNotFoundError( "No test with id '{}' could be found.".format(self.id)) # Mark the run to build locally. self.build_local = config.get('build', {}) \ .get('on_nodes', 'false').lower() != 'true' self._variables_path = self.path / 'variables' if _id is None: with PermissionsManager(self.path, self.group, self.umask): self._save_config() if var_man is None: var_man = variables.VariableSetManager() self.var_man = var_man self.var_man.save(self._variables_path) self.sys_name = self.var_man.get('sys_name', '<unknown>') else: try: self.var_man = variables.VariableSetManager.load( self._variables_path) except RuntimeError as err: raise TestRunError(*err.args) # This will be set by the scheduler self._job_id = None with PermissionsManager(self.path / 'status', self.group, self.umask): # Setup the initial status file. self.status = StatusFile(self.path / 'status') if _id is None: self.status.set(STATES.CREATED, "Test directory and status file created.") self.run_timeout = self.parse_timeout( 'run', config.get('run', {}).get('timeout')) self.build_timeout = self.parse_timeout( 'build', config.get('build', {}).get('timeout')) self.run_log = self.path / 'run.log' self.build_log = self.path / 'build.log' self.results_log = self.path / 'results.log' self.results_path = self.path / 'results.json' self.build_origin_path = self.path / 'build_origin' self.build_timeout_file = config.get('build', {}).get('timeout_file') # Use run.log as the default run timeout file self.timeout_file = self.run_log run_timeout_file = config.get('run', {}).get('timeout_file') if run_timeout_file is not None: self.timeout_file = self.path / run_timeout_file build_config = self.config.get('build', {}) self.build_script_path = self.path / 'build.sh' # type: Path self.build_path = self.path / 'build' if _id is None: self._write_script('build', path=self.build_script_path, config=build_config) try: self.builder = builder.TestBuilder(pav_cfg=pav_cfg, test=self, mb_tracker=build_tracker, build_name=self.build_name) self.build_name = self.builder.name except builder.TestBuilderError as err: raise TestRunError( "Could not create builder for test {s.name} (run {s.id}): {err}" .format(s=self, err=err)) run_config = self.config.get('run', {}) self.run_tmpl_path = self.path / 'run.tmpl' self.run_script_path = self.path / 'run.sh' if _id is None: self._write_script('run', path=self.run_tmpl_path, config=run_config) if _id is None: self.save_attributes() self.status.set(STATES.CREATED, "Test directory setup complete.") self._results = None self.skipped = self._get_skipped() # eval skip. @classmethod def load(cls, pav_cfg, test_id): """Load an old TestRun object given a test id. :param pav_cfg: The pavilion config :param int test_id: The test's id number. :rtype: TestRun """ path = dir_db.make_id_path(pav_cfg.working_dir / 'test_runs', test_id) if not path.is_dir(): raise TestRunError("Test directory for test id {} does not exist " "at '{}' as expected.".format(test_id, path)) config = cls._load_config(path) return TestRun(pav_cfg, config, _id=test_id) def finalize(self, var_man): """Resolve any remaining deferred variables, and generate the final run script.""" self.var_man.undefer(new_vars=var_man) self.config = resolver.TestConfigResolver.resolve_deferred( self.config, self.var_man) self._save_config() # Save our newly updated variables. self.var_man.save(self._variables_path) # Create files specified via run config key. files_to_create = self.config['run'].get('create_files', []) if files_to_create: for file, contents in files_to_create.items(): file_path = Path(self.build_path / file) # Prevent files from being written outside build directory. if not utils.dir_contains(file_path, self.build_path): raise TestRunError( "'create_file: {}': file path" " outside build context.".format(file_path)) # Prevent files from overwriting existing directories. if file_path.is_dir(): raise TestRunError( "'create_file: {}' clashes with" " existing directory in build dir.".format(file_path)) # Create file parent directory(ies). dirname = file_path.parent (self.build_path / dirname).mkdir(parents=True, exist_ok=True) # Don't try to overwrite a symlink without removing it first. if file_path.is_symlink(): file_path.unlink() # Write file. with PermissionsManager(file_path, self.group, self.umask), \ file_path.open('w') as file_: for line in contents: file_.write("{}\n".format(line)) if not self.skipped: self.skipped = self._get_skipped() self.save_attributes() self._write_script( 'run', self.run_script_path, self.config['run'], ) @staticmethod def get_permissions(pav_cfg, config) -> (str, int): """Get the permissions to use on file creation, either from the pav_cfg or test config it that overrides. :returns: A tuple of the group and umask. """ # If a test access group was given, make sure it exists and the # current user is a member. group = config.get('group', pav_cfg['shared_group']) if group is not None: try: group_data = grp.getgrnam(group) user = utils.get_login() if group != user and user not in group_data.gr_mem: raise TestConfigError( "Test specified group '{}', but the current user '{}' " "is not a member of that group.".format(group, user)) except KeyError as err: raise TestConfigError( "Test specified group '{}', but that group does not " "exist on this system. {}".format(group, err)) umask = config.get('umask') if umask is None: umask = pav_cfg['umask'] if umask is not None: try: umask = int(umask, 8) except ValueError: raise RuntimeError( "Invalid umask. This should have been enforced by the " "by the config format.") else: umask = 0o077 return group, umask @staticmethod def make_name(config): """Create the name for the build given the configuration values.""" name_parts = [ config.get('suite', '<unknown>'), config.get('name', '<unnamed>'), ] subtitle = config.get('subtitle') # Don't add undefined or empty subtitles. if subtitle: name_parts.append(subtitle) return '.'.join(name_parts) def run_cmd(self): """Construct a shell command that would cause pavilion to run this test.""" pav_path = self._pav_cfg.pav_root / 'bin' / 'pav' return '{} run {}'.format(pav_path, self.id) def _save_config(self): """Save the configuration for this test to the test config file.""" config_path = self.path / 'config' # make lock tmp_path = config_path.with_suffix('.tmp') try: with PermissionsManager(config_path, self.group, self.umask), \ tmp_path.open('w') as json_file: output.json_dump(self.config, json_file) try: config_path.unlink() except OSError: pass tmp_path.rename(config_path) except (OSError, IOError) as err: raise TestRunError( "Could not save TestRun ({}) config at {}: {}".format( self.name, self.path, err)) except TypeError as err: raise TestRunError("Invalid type in config for ({}): {}".format( self.name, err)) @classmethod def _load_config(cls, test_path): """Load a saved test configuration.""" config_path = test_path / 'config' if not config_path.is_file(): raise TestRunError( "Could not find config file for test at {}.".format(test_path)) try: with config_path.open('r') as config_file: # Because only string keys are allowed in test configs, # this is a reasonable way to load them. return json.load(config_file) except TypeError as err: raise TestRunError("Bad config values for config '{}': {}".format( config_path, err)) except (IOError, OSError) as err: raise TestRunError("Error reading config file '{}': {}".format( config_path, err)) def build(self, cancel_event=None): """Build the test using its builder object and symlink copy it to it's final location. The build tracker will have the latest information on any encountered errors. :param threading.Event cancel_event: Event to tell builds when to die. :returns: True if build successful """ if self.build_origin_path.exists(): raise RuntimeError( "Whatever called build() is calling it for a second time." "This should never happen for a given test run ({s.id}).". format(s=self)) if cancel_event is None: cancel_event = threading.Event() if self.builder.build(cancel_event=cancel_event): # Create the build origin path, to make tracking a test's build # a bit easier. with PermissionsManager(self.build_origin_path, self.group, self.umask): self.build_origin_path.symlink_to(self.builder.path) with PermissionsManager(self.build_path, self.group, self.umask): if not self.builder.copy_build(self.build_path): cancel_event.set() build_result = True else: with PermissionsManager(self.build_path, self.group, self.umask): self.builder.fail_path.rename(self.build_path) for file in utils.flat_walk(self.build_path): file.chmod(file.stat().st_mode | 0o200) build_result = False self.build_log.symlink_to(self.build_path / 'pav_build_log') return build_result def run(self): """Run the test. :rtype: bool :returns: The return code of the test command. :raises TimeoutError: When the run times out. :raises TestRunError: We don't actually raise this, but might in the future. """ if self.build_only: self.status.set(STATES.RUN_ERROR, "Tried to run a 'build_only' test object.") return False self.status.set(STATES.PREPPING_RUN, "Converting run template into run script.") with PermissionsManager(self.path, self.group, self.umask), \ self.run_log.open('wb') as run_log: self.status.set(STATES.RUNNING, "Starting the run script.") self.started = dt.datetime.now() # Set the working directory to the build path, if there is one. run_wd = None if self.build_path is not None: run_wd = self.build_path.as_posix() # Run scripts take the test id as a first argument. cmd = [self.run_script_path.as_posix(), str(self.id)] proc = subprocess.Popen(cmd, cwd=run_wd, stdout=run_log, stderr=subprocess.STDOUT) self.status.set(STATES.RUNNING, "Currently running.") # Run the test, but timeout if it doesn't produce any output every # self._run_timeout seconds timeout = self.run_timeout ret = None while ret is None: try: ret = proc.wait(timeout=timeout) except subprocess.TimeoutExpired: if self.timeout_file.exists(): timeout_file = self.timeout_file else: timeout_file = self.run_log try: out_stat = timeout_file.stat() quiet_time = time.time() - out_stat.st_mtime except OSError: pass # Has the output file changed recently? if self.run_timeout < quiet_time: # Give up on the build, and call it a failure. proc.kill() msg = ("Run timed out after {} seconds".format( self.run_timeout)) self.status.set(STATES.RUN_TIMEOUT, msg) self.finished = dt.datetime.now() self.save_attributes() raise TimeoutError(msg) else: # Only wait a max of run_silent_timeout next 'wait' timeout = timeout - quiet_time self.finished = dt.datetime.now() self.save_attributes() self.status.set(STATES.RUN_DONE, "Test run has completed.") return ret def set_run_complete(self): """Write a file in the test directory that indicates that the test has completed a run, one way or another. This should only be called when we're sure their won't be any more status changes.""" # Write the current time to the file. We don't actually use the contents # of the file, but it's nice to have another record of when this was # run. import stat complete_path = self.path / self.COMPLETE_FN complete_tmp_path = complete_path.with_suffix('.tmp') with PermissionsManager(complete_tmp_path, self.group, self.umask), \ complete_tmp_path.open('w') as run_complete: json.dump({'complete': dt.datetime.now().isoformat()}, run_complete) complete_tmp_path.rename(complete_path) self.complete = True self.save_attributes() def check_run_complete(self): """Return the complete time from the run complete file, or None if the test was never marked as complete.""" run_complete_path = self.path / self.COMPLETE_FN if run_complete_path.exists(): try: with run_complete_path.open() as complete_file: data = json.load(complete_file) return data.get('complete') except (OSError, ValueError, json.JSONDecodeError) as err: self.logger.warning( "Failed to read run complete file for at %s: %s", run_complete_path.as_posix(), err) return None else: return None WAIT_INTERVAL = 0.5 def wait(self, timeout=None): """Wait for the test run to be complete. This works across hosts, as it simply checks for files in the run directory. :param Union(None,float) timeout: How long to wait in seconds. If this is None, wait forever. :raises TimeoutError: if the timeout expires. """ if timeout is not None: timeout = time.time() + timeout while 1: if self.check_run_complete() is not None: return time.sleep(self.WAIT_INTERVAL) if timeout is not None and time.time() > timeout: raise TimeoutError("Timed out waiting for test '{}' to " "complete".format(self.id)) def gather_results(self, run_result, regather=False, log_file=None): """Process and log the results of the test, including the default set of result keys. :param int run_result: The return code of the test run. :param bool regather: Gather results without performing any changes to the test itself. :param IO[str] log_file: The file to save result logs to. """ import pprint if self.finished is None: raise RuntimeError( "test.gather_results can't be run unless the test was run" "(or an attempt was made to run it. " "This occurred for test {s.name}, #{s.id}".format(s=self)) parser_configs = self.config['result_parse'] result_log = utils.IndentedLog(log_file) result_log("Gathering base results.") results = result.base_results(self) results['return_value'] = run_result result_log("Base results:") result_log.indent = 1 result_log(pprint.pformat(results)) if not regather: self.status.set( STATES.RESULTS, "Parsing {} result types.".format(len(parser_configs))) try: result.parse_results(self, results, log=result_log) except pavilion.result.common.ResultError as err: results['result'] = self.ERROR results['pav_result_errors'].append( "Error parsing results: {}".format(err.args[0])) if not regather: self.status.set(STATES.RESULTS_ERROR, results['pav_result_errors'][-1]) if not regather: self.status.set( STATES.RESULTS, "Performing {} result evaluations.".format( len(self.config['result_evaluate']))) try: result.evaluate_results(results, self.config['result_evaluate'], result_log) except pavilion.result.common.ResultError as err: results['result'] = self.ERROR results['pav_result_errors'].append(err.args[0]) if not regather: self.status.set(STATES.RESULTS_ERROR, results['pav_result_errors'][-1]) if results['result'] is True: results['result'] = self.PASS elif results['result'] is False: results['result'] = self.FAIL else: results['pav_result_errors'].append( "The value for the 'result' key in the results must be a " "boolean. Got '{}' instead".format(results['result'])) results['result'] = self.ERROR result_log("Set final result key to: '{}'".format(results['result'])) result_log("See results.json for the final result json.") result_log("Removing temporary values.") result_log.indent = 1 result.remove_temp_results(results, result_log) self._results = results return results def save_results(self, results): """Save the results to the test specific results file and the general pavilion results file. :param dict results: The results dictionary. """ results_tmp_path = self.results_path.with_suffix('.tmp') with PermissionsManager(results_tmp_path, self.group, self.umask), \ results_tmp_path.open('w') as results_file: json.dump(results, results_file) try: self.results_path.unlink() except OSError: pass results_tmp_path.rename(self.results_path) self.result = results.get('result') self.save_attributes() result_logger = logging.getLogger('common_results') if self._pav_cfg.get('flatten_results') and results.get('per_file'): # Flatten 'per_file' results into separate result records. base = results.copy() del base['per_file'] for per_file, values in results['per_file'].items(): per_result = base.copy() per_result['file'] = per_file per_result.update(values) result_logger.info(output.json_dumps(per_result)) else: result_logger.info(output.json_dumps(results)) def load_results(self): """Load results from the results file. :returns A dict of results, or None if the results file doesn't exist. :rtype: dict """ if self.results_path.exists(): with self.results_path.open() as results_file: return json.load(results_file) else: return None PASS = '******' FAIL = 'FAIL' ERROR = 'ERROR' @property def results(self): """The test results. Returns a dictionary of basic information if the test has no results.""" if self.results_path.exists() and (self._results is None or self._results['result'] is None): with self.results_path.open() as results_file: self._results = json.load(results_file) if self._results is None: return { 'name': self.name, 'sys_name': self.var_man['sys_name'], 'created': self.created, 'id': self.id, 'result': None, } else: return self._results @property def is_built(self): """Whether the build for this test exists. :returns: True if the build exists (or the test doesn't have a build), False otherwise. :rtype: bool """ if self.build_path.resolve().exists(): return True else: return False @property def job_id(self): """The job id of this test (saved to a ``jobid`` file). This should be set by the scheduler plugin as soon as it's known.""" path = self.path / self.JOB_ID_FN if self._job_id is not None: return self._job_id try: with path.open() as job_id_file: self._job_id = job_id_file.read() except FileNotFoundError: return None except (OSError, IOError) as err: self.logger.error("Could not read jobid file '%s': %s", path, err) return None return self._job_id @job_id.setter def job_id(self, job_id): path = self.path / self.JOB_ID_FN try: with PermissionsManager(path, self.group, self.umask), \ path.open('w') as job_id_file: job_id_file.write(job_id) except (IOError, OSError) as err: self.logger.error("Could not write jobid file '%s': %s", path, err) self._job_id = job_id def _write_script(self, stype, path, config): """Write a build or run script or template. The formats for each are mostly identical. :param str stype: The type of script (run or build). :param Path path: Path to the template file to write. :param dict config: Configuration dictionary for the script file. :return: """ script = scriptcomposer.ScriptComposer() verbose = config.get('verbose', 'false').lower() == 'true' if verbose: script.comment('# Echoing all commands to log.') script.command('set -v') script.newline() pav_lib_bash = self._pav_cfg.pav_root / 'bin' / 'pav-lib.bash' # If we include this directly, it breaks build hashing. script.comment('The first (and only) argument of the build script is ' 'the test id.') script.env_change({ 'TEST_ID': '${1:-0}', # Default to test id 0 if one isn't given. 'PAV_CONFIG_FILE': self._pav_cfg['pav_cfg_file'] }) script.command('source {}'.format(pav_lib_bash)) if config.get('preamble', []): script.newline() script.comment('Preamble commands') for cmd in config['preamble']: script.command(cmd) if stype == 'build' and not self.build_local: script.comment('To be built in an allocation.') modules = config.get('modules', []) if modules: script.newline() script.comment( 'Perform module related changes to the environment.') for module in config.get('modules', []): script.module_change(module, self.var_man) env = config.get('env', {}) if env: script.newline() script.comment("Making any environment changes needed.") script.env_change(config.get('env', {})) if verbose: script.newline() script.comment('List all the module modules for posterity') script.command("module -t list") script.newline() script.comment('Output the environment for posterity') script.command("declare -p") script.newline() cmds = config.get('cmds', []) if cmds: script.comment("Perform the sequence of test commands.") for line in config.get('cmds', []): for split_line in line.split('\n'): script.command(split_line) else: script.comment('No commands given for this script.') with PermissionsManager(path, self.group, self.umask): script.write(path) def __repr__(self): return "TestRun({s.name}-{s.id})".format(s=self) def _get_skipped(self): """Kicks off assessing if current test is skipped.""" if self.skipped: return True skip_reason_list = self._evaluate_skip_conditions() matches = " ".join(skip_reason_list) if len(skip_reason_list) == 0: return False else: self.status.set(STATES.SKIPPED, matches) self.set_run_complete() return True def _evaluate_skip_conditions(self): """Match grabs conditional keys from the config. It checks for matches and depending on the results will skip or continue a test. :return The match list after being populated :rtype list[str]""" match_list = [] only_if = self.config.get('only_if', {}) not_if = self.config.get('not_if', {}) for key in not_if: # Skip any keys that were deferred. if resolver.TestConfigResolver.was_deferred(key): continue for val in not_if[key]: # Also skip deferred values. if resolver.TestConfigResolver.was_deferred(val): continue if not val.endswith('$'): val = val + '$' if bool(re.match(val, key)): message = ("Skipping due to not_if match for key '{}' " "with '{}'".format(key, val)) match_list.append(message) for key in only_if: match = False if resolver.TestConfigResolver.was_deferred(key): continue for val in only_if[key]: # We have to assume a match if one of the values is deferred. if resolver.TestConfigResolver.was_deferred(val): match = True break if not val.endswith('$'): val = val + '$' if bool(re.match(val, key)): match = True if match is False: message = ("Skipping because only_if key '{}' failed to match " "any of '{}'".format(key, only_if[key])) match_list.append(message) return match_list # returns list, can be empty. @staticmethod def parse_timeout(section, value): """Parse the timeout value from either the run or build section into an int (or none). :param str section: The config section the value came from. :param Union[str,None] value: The value to parse. """ if value is None: return None if value.strip().isdigit(): return int(value) raise TestRunError( "Invalid value for {} timeout. Must be a positive int.".format( section))
def __init__(self, pav_cfg, config, build_tracker=None, var_man=None, _id=None, rebuild=False, build_only=False): """Create an new TestRun object. If loading an existing test instance, use the ``TestRun.from_id()`` method. :param pav_cfg: The pavilion configuration. :param dict config: The test configuration dictionary. :param builder.MultiBuildTracker build_tracker: Tracker for watching and managing the status of multiple builds. :param variables.VariableSetManager var_man: The variable set manager for this test. :param bool build_only: Only build this test run, do not run it. :param bool rebuild: After determining the build name, deprecate it and select a new, non-deprecated build. :param int _id: The test id of an existing test. (You should be using TestRun.load). """ # Just about every method needs this self._pav_cfg = pav_cfg self.scheduler = config['scheduler'] # Create the tests directory if it doesn't already exist. tests_path = pav_cfg.working_dir / 'test_runs' self.config = config group, umask = self.get_permissions(pav_cfg, config) # Get an id for the test, if we weren't given one. if _id is None: id_tmp, run_path = dir_db.create_id_dir(tests_path, group, umask) super().__init__(path=run_path, group=group, umask=umask) # Set basic attributes self.id = id_tmp self.build_only = build_only self.complete = False self.created = dt.datetime.now() self.name = self.make_name(config) self.rebuild = rebuild self.suite_path = Path(config.get('suite_path', '.')) self.user = utils.get_login() self.uuid = str(uuid.uuid4()) else: # Load the test info from the given id path. super().__init__(path=dir_db.make_id_path(tests_path, _id), group=group, umask=umask) self.load_attributes() self.test_version = config.get('test_version') if not self.path.is_dir(): raise TestRunNotFoundError( "No test with id '{}' could be found.".format(self.id)) # Mark the run to build locally. self.build_local = config.get('build', {}) \ .get('on_nodes', 'false').lower() != 'true' self._variables_path = self.path / 'variables' if _id is None: with PermissionsManager(self.path, self.group, self.umask): self._save_config() if var_man is None: var_man = variables.VariableSetManager() self.var_man = var_man self.var_man.save(self._variables_path) self.sys_name = self.var_man.get('sys_name', '<unknown>') else: try: self.var_man = variables.VariableSetManager.load( self._variables_path) except RuntimeError as err: raise TestRunError(*err.args) # This will be set by the scheduler self._job_id = None with PermissionsManager(self.path / 'status', self.group, self.umask): # Setup the initial status file. self.status = StatusFile(self.path / 'status') if _id is None: self.status.set(STATES.CREATED, "Test directory and status file created.") self.run_timeout = self.parse_timeout( 'run', config.get('run', {}).get('timeout')) self.build_timeout = self.parse_timeout( 'build', config.get('build', {}).get('timeout')) self.run_log = self.path / 'run.log' self.build_log = self.path / 'build.log' self.results_log = self.path / 'results.log' self.results_path = self.path / 'results.json' self.build_origin_path = self.path / 'build_origin' self.build_timeout_file = config.get('build', {}).get('timeout_file') # Use run.log as the default run timeout file self.timeout_file = self.run_log run_timeout_file = config.get('run', {}).get('timeout_file') if run_timeout_file is not None: self.timeout_file = self.path / run_timeout_file build_config = self.config.get('build', {}) self.build_script_path = self.path / 'build.sh' # type: Path self.build_path = self.path / 'build' if _id is None: self._write_script('build', path=self.build_script_path, config=build_config) try: self.builder = builder.TestBuilder(pav_cfg=pav_cfg, test=self, mb_tracker=build_tracker, build_name=self.build_name) self.build_name = self.builder.name except builder.TestBuilderError as err: raise TestRunError( "Could not create builder for test {s.name} (run {s.id}): {err}" .format(s=self, err=err)) run_config = self.config.get('run', {}) self.run_tmpl_path = self.path / 'run.tmpl' self.run_script_path = self.path / 'run.sh' if _id is None: self._write_script('run', path=self.run_tmpl_path, config=run_config) if _id is None: self.save_attributes() self.status.set(STATES.CREATED, "Test directory setup complete.") self._results = None self.skipped = self._get_skipped() # eval skip.
def __init__(self, pav_cfg, config, test_id=None): """Create an new PavTest object. If loading an existing test instance, use the PavTest.from_id method. :param pav_cfg: The pavilion configuration. :param config: The test configuration dictionary. :param test_id: The test id (for an existing test). """ # Just about every method needs this self._pav_cfg = pav_cfg # Compute the actual name of test, using the subtest config parameter. self.name = config['name'] if 'subtest' in config and config['subtest']: self.name = self.name + '.' + config['subtest'] # Create the tests directory if it doesn't already exist. tests_path = os.path.join(pav_cfg.working_dir, 'tests') self.config = config # Get an id for the test, if we weren't given one. if test_id is None: self.id, self.path = utils.create_id_dir(tests_path) self._save_config() else: self.id = test_id self.path = utils.make_id_path(tests_path, self.id) if not os.path.isdir(self.path): raise PavTestNotFoundError( "No test with id '{}' could be found.".format(self.id)) # Set a logger more specific to this test. self.LOGGER = logging.getLogger('pav.PavTest.{}'.format(self.id)) # This will be set by the scheduler self._job_id = None # Setup the initial status file. self.status = StatusFile(os.path.join(self.path, 'status')) self.status.set(STATES.CREATED, "Test directory and status file created.") self.build_path = None self.build_name = None self.build_hash = None self.build_script_path = None build_config = self.config.get('build', {}) if build_config: self.build_path = os.path.join(self.path, 'build') if os.path.islink(self.build_path): build_rp = os.path.realpath(self.build_path) build_fn = os.path.basename(build_rp) self.build_hash = build_fn.split('-')[-1] else: self.build_hash = self._create_build_hash(build_config) short_hash = self.build_hash[:self.BUILD_HASH_BYTES*2] self.build_name = '{hash}'.format(hash=short_hash) self.build_origin = os.path.join(pav_cfg.working_dir, 'builds', self.build_name) self.build_script_path = os.path.join(self.path, 'build.sh') self._write_script(self.build_script_path, build_config) run_config = self.config.get('run', {}) if run_config: self.run_tmpl_path = os.path.join(self.path, 'run.tmpl') self.run_script_path = os.path.join(self.path, 'run.sh') self._write_script(self.run_tmpl_path, run_config) else: self.run_tmpl_path = None self.run_script_path = None self.status.set(STATES.CREATED, "Test directory setup complete.")
class PavTest: """The central pavilion test object. Handle saving, monitoring and running tests. :cvar TEST_ID_DIGITS: How many digits should be in the test folder names. :cvar _BLOCK_SIZE: Blocksize for hashing files. """ # By default we support up to 10 million tests. TEST_ID_DIGITS = 7 # We have to worry about hash collisions, but we don't need all the bytes # of hash most algorithms give us. The birthday attack math for 64 bits ( # 8 bytes) of hash and 10 million items yields a collision probability of # just 0.00027%. Easily good enough. BUILD_HASH_BYTES = 8 _BLOCK_SIZE = 4096*1024 LOGGER = logging.getLogger('pav.PavTest') def __init__(self, pav_cfg, config, test_id=None): """Create an new PavTest object. If loading an existing test instance, use the PavTest.from_id method. :param pav_cfg: The pavilion configuration. :param config: The test configuration dictionary. :param test_id: The test id (for an existing test). """ # Just about every method needs this self._pav_cfg = pav_cfg # Compute the actual name of test, using the subtest config parameter. self.name = config['name'] if 'subtest' in config and config['subtest']: self.name = self.name + '.' + config['subtest'] # Create the tests directory if it doesn't already exist. tests_path = os.path.join(pav_cfg.working_dir, 'tests') self.config = config # Get an id for the test, if we weren't given one. if test_id is None: self.id, self.path = utils.create_id_dir(tests_path) self._save_config() else: self.id = test_id self.path = utils.make_id_path(tests_path, self.id) if not os.path.isdir(self.path): raise PavTestNotFoundError( "No test with id '{}' could be found.".format(self.id)) # Set a logger more specific to this test. self.LOGGER = logging.getLogger('pav.PavTest.{}'.format(self.id)) # This will be set by the scheduler self._job_id = None # Setup the initial status file. self.status = StatusFile(os.path.join(self.path, 'status')) self.status.set(STATES.CREATED, "Test directory and status file created.") self.build_path = None self.build_name = None self.build_hash = None self.build_script_path = None build_config = self.config.get('build', {}) if build_config: self.build_path = os.path.join(self.path, 'build') if os.path.islink(self.build_path): build_rp = os.path.realpath(self.build_path) build_fn = os.path.basename(build_rp) self.build_hash = build_fn.split('-')[-1] else: self.build_hash = self._create_build_hash(build_config) short_hash = self.build_hash[:self.BUILD_HASH_BYTES*2] self.build_name = '{hash}'.format(hash=short_hash) self.build_origin = os.path.join(pav_cfg.working_dir, 'builds', self.build_name) self.build_script_path = os.path.join(self.path, 'build.sh') self._write_script(self.build_script_path, build_config) run_config = self.config.get('run', {}) if run_config: self.run_tmpl_path = os.path.join(self.path, 'run.tmpl') self.run_script_path = os.path.join(self.path, 'run.sh') self._write_script(self.run_tmpl_path, run_config) else: self.run_tmpl_path = None self.run_script_path = None self.status.set(STATES.CREATED, "Test directory setup complete.") @classmethod def from_id(cls, pav_cfg, test_id): """Load a new PavTest object based on id.""" path = utils.make_id_path(os.path.join(pav_cfg.working_dir, 'tests'), test_id) if not os.path.isdir(path): raise PavTestError("Test directory for test id {} does not exist " "at '{}' as expected." .format(test_id, path)) config = cls._load_config(path) return PavTest(pav_cfg, config, test_id) def run_cmd(self): """Construct a shell command that would cause pavilion to run this test.""" pav_path = os.path.join(self._pav_cfg.pav_root, 'bin', 'pav') return '{} run {}'.format(pav_path, self.id) def _save_config(self): """Save the configuration for this test to the test config file.""" config_path = os.path.join(self.path, 'config') try: with open(config_path, 'w') as json_file: json.dump(self.config, json_file) except (OSError, IOError) as err: raise PavTestError("Could not save PavTest ({}) config at {}: {}" .format(self.name, self.path, err)) except TypeError as err: raise PavTestError("Invalid type in config for ({}): {}" .format(self.name, err)) @classmethod def _load_config(cls, test_path): config_path = os.path.join(test_path, 'config') if not os.path.isfile(config_path): raise PavTestError("Could not find config file for test at {}." .format(test_path)) try: with open(config_path, 'r') as config_file: return json.load(config_file) except TypeError as err: raise PavTestError("Bad config values for config '{}': {}" .format(config_path, err)) except (IOError, OSError) as err: raise PavTestError("Error reading config file '{}': {}" .format(config_path, err)) def _find_file(self, file, sub_dir=None): """Look for the given file and return a full path to it. Relative paths are searched for in all config directories under 'test_src'. :param file: The path to the file. :param sub_dir: The subdirectory in each config directory in which to search. :returns: The full path to the found file, or None if no such file could be found. """ if os.path.isabs(file): if os.path.exists(file): return file else: return None for config_dir in self._pav_cfg.config_dirs: path = [config_dir] if sub_dir is not None: path.append(sub_dir) path.append(file) path = os.path.realpath(os.path.join(*path)) if os.path.exists(path): return path return None @staticmethod def _isurl(url): """Determine if the given path is a url.""" parsed = urllib.parse.urlparse(url) return parsed.scheme != '' def _download_path(self, loc, name): """Get the path to where a source_download would be downloaded. :param str loc: The url for the download, from the config's source_location field. :param str name: The name of the download, from the config's source_download_name field.""" fn = name if fn is None: url_parts = urllib.parse.urlparse(loc) path_parts = url_parts.path.split('/') if path_parts and path_parts[-1]: fn = path_parts[-1] else: # Use a hash of the url if we can't get a name from it. fn = hashlib.sha256(loc.encode()).hexdigest() return os.path.join(self._pav_cfg.working_dir, 'downloads', fn) def _update_src(self, build_config): """Retrieve and/or check the existence of the files needed for the build. This can include pulling from URL's. :param dict build_config: The build configuration dictionary. :returns: src_path, extra_files """ src_loc = build_config.get('source_location') if src_loc is None: return None # For URL's, check if the file needs to be updated, and try to do so. if self._isurl(src_loc): dwn_name = build_config.get('source_download_name') src_dest = self._download_path(src_loc, dwn_name) wget.update(self._pav_cfg, src_loc, src_dest) return src_dest src_path = self._find_file(src_loc, 'test_src') if src_path is None: raise PavTestError("Could not find and update src location '{}'" .format(src_loc)) if os.path.isdir(src_path): # For directories, update the directories mtime to match the # latest mtime in the entire directory. self._date_dir(src_path) return src_path elif os.path.isfile(src_path): # For static files, we'll end up just hashing the whole thing. return src_path else: raise PavTestError("Source location '{}' points to something " "unusable.".format(src_path)) def _create_build_hash(self, build_config): """Turn the build config, and everything the build needs, into hash. This includes the build config itself, the source tarball, and all extra files. Additionally, system variables may be included in the hash if specified via the pavilion config.""" # The hash order is: # - The build config (sorted by key) # - The src archive. # - For directories, the mtime (updated to the time of the most # recently updated file) is hashed instead. # - All of the build's 'extra_files' # - Each of the pav_cfg.build_hash_vars hash_obj = hashlib.sha256() # Update the hash with the contents of the build config. hash_obj.update(self._hash_dict(build_config)) src_path = self._update_src(build_config) if src_path is not None: if os.path.isfile(src_path): hash_obj.update(self._hash_file(src_path)) elif os.path.isdir(src_path): hash_obj.update(self._hash_dir(src_path)) else: raise PavTestError("Invalid src location {}.".format(src_path)) for extra_file in build_config.get('extra_files', []): full_path = self._find_file(extra_file, 'test_src') if full_path is None: raise PavTestError("Could not find extra file '{}'" .format(extra_file)) elif os.path.isfile(full_path): hash_obj.update(self._hash_file(full_path)) elif os.path.isdir(full_path): self._date_dir(full_path) hash_obj.update(self._hash_dir(full_path)) else: raise PavTestError("Extra file '{}' must be a regular " "file or directory.".format(extra_file)) hash_obj.update(build_config.get('specificity', '').encode('utf-8')) return hash_obj.hexdigest()[:self.BUILD_HASH_BYTES*2] def build(self): """Perform the build if needed, do a soft-link copy of the build directory into our test directory, and note that we've used the given build. Returns True if these steps completed successfully. """ # Only try to do the build if it doesn't already exist. if not os.path.exists(self.build_origin): # Make sure another test doesn't try to do the build at # the same time. # Note cleanup of failed builds HAS to occur under this lock to # avoid a race condition, even though it would be way simpler to # do it in .build() lock_path = '{}.lock'.format(self.build_origin) with lockfile.LockFile(lock_path, group=self._pav_cfg.shared_group): # Make sure the build wasn't created while we waited for # the lock. if not os.path.exists(self.build_origin): build_dir = self.build_origin + '.tmp' # Attempt to perform the actual build, this shouldn't # raise an exception unless # something goes terribly wrong. if not self._build(build_dir): # The build failed. The reason should already be set # in the status file. def handle_error(_, path, exc_info): self.LOGGER.error("Error removing temporary build " "directory '{}': {}" .format(path, exc_info)) # Cleanup the temporary build tree. shutil.rmtree(path=build_dir, onerror=handle_error) return False # Rename the build to it's final location. os.rename(build_dir, self.build_origin) # Perform a symlink copy of the original build directory into our test # directory. try: shutil.copytree(self.build_origin, self.build_path, symlinks=True, copy_function=utils.symlink_copy) except OSError as err: msg = "Could not perform the build directory copy: {}".format(err) self.status.set(STATES.BUILD_ERROR, msg) self.LOGGER.error(msg) return False # Touch the original build directory, so that we know it was used # recently. try: now = time.time() os.utime(self.build_origin, (now, now)) except OSError as err: self.LOGGER.warning("Could not update timestamp on build directory " "'{}': {}" .format(self.build_origin, err)) return True # A process should produce some output at least once every this many # seconds. BUILD_SILENT_TIMEOUT = 30 def _build(self, build_dir): """Perform the build. This assumes there actually is a build to perform. :returns: True or False, depending on whether the build appears to have been successful. """ try: self._setup_build_dir(build_dir) except PavTestError as err: self.status.set(STATES.BUILD_ERROR, "Error setting up build directory '{}': {}" .format(build_dir, err)) return False build_log_path = os.path.join(build_dir, 'pav_build_log') try: with open(build_log_path, 'w') as build_log: proc = subprocess.Popen([self.build_script_path], cwd=build_dir, stdout=build_log, stderr=build_log) timeout = self.BUILD_SILENT_TIMEOUT result = None while result is None: try: result = proc.wait(timeout=timeout) except subprocess.TimeoutExpired: log_stat = os.stat(build_log_path) quiet_time = time.time() - log_stat.st_mtime # Has the output file changed recently? if self.BUILD_SILENT_TIMEOUT < quiet_time: # Give up on the build, and call it a failure. proc.kill() self.status.set(STATES.BUILD_FAILED, "Build timed out after {} seconds." .format(self.BUILD_SILENT_TIMEOUT)) return False else: # Only wait a max of BUILD_SILENT_TIMEOUT next # 'wait' timeout = self.BUILD_SILENT_TIMEOUT - quiet_time except subprocess.CalledProcessError as err: self.status.set(STATES.BUILD_ERROR, "Error running build process: {}".format(err)) return False except (IOError, OSError) as err: self.status.set(STATES.BUILD_ERROR, "Error that's probably related to writing the " "build output: {}".format(err)) return False try: self._fix_build_permissions() except OSError as err: self.LOGGER.warning("Error fixing build permissions: {}" .format(err)) if result != 0: self.status.set(STATES.BUILD_FAILED, "Build returned a non-zero result.") return False else: self.status.set(STATES.BUILD_DONE, "Build completed successfully.") return True TAR_SUBTYPES = ( 'gzip', 'x-gzip', 'x-bzip2', 'x-xz', 'x-tar', 'x-lzma', ) def _setup_build_dir(self, build_path): """Setup the build directory, by extracting or copying the source and any extra files. :param build_path: Path to the intended build directory. :return: None """ build_config = self.config.get('build', {}) src_loc = build_config.get('source_location') if src_loc is None: src_path = None elif self._isurl(src_loc): # Remove special characters from the url to get a reasonable # default file name. download_name = build_config.get('source_download_name') # Download the file to the downloads directory. src_path = self._download_path(src_loc, download_name) else: src_path = self._find_file(src_loc, 'test_src') if src_path is None: raise PavTestError("Could not find source file '{}'" .format(src_path)) if src_path is None: # If there is no source archive or data, just make the build # directory. os.mkdir(build_path) elif os.path.isdir(src_path): # Recursively copy the src directory to the build directory. shutil.copytree(src_path, build_path, symlinks=True) elif os.path.isfile(src_path): # Handle decompression of a stream compressed file. The interfaces # for the libs are all the same; we just have to choose the right # one to use. Zips are handled as an archive, below. category, subtype = utils.get_mime_type(src_path) if category == 'application' and subtype in self.TAR_SUBTYPES: if tarfile.is_tarfile(src_path): try: with tarfile.open(src_path, 'r') as tar: # Filter out all but the top level items. top_level = [m for m in tar.members if '/' not in m.name] # If the file contains only a single directory, # make that directory the build directory. This # should be the default in most cases. if len(top_level) == 1 and top_level[0].isdir(): tmpdir = '{}.zip'.format(build_path) os.mkdir(tmpdir) tar.extractall(tmpdir) opath = os.path.join(tmpdir, top_level[0].name) os.rename(opath, build_path) os.rmdir(tmpdir) else: # Otherwise, the build path will contain the # extracted contents of the archive. os.mkdir(build_path) tar.extractall(build_path) except (OSError, IOError, tarfile.CompressionError, tarfile.TarError) as err: raise PavTestError( "Could not extract tarfile '{}' into '{}': {}" .format(src_path, build_path, err)) else: # If it's a compressed file but isn't a tar, extract the # file into the build directory. # All the python compression libraries have the same basic # interface, so we can just dynamically switch between # modules. if subtype in ('gzip', 'x-gzip'): comp_lib = gzip elif subtype == 'x-bzip2': comp_lib = bz2 elif subtype in ('x-xz', 'x-lzma'): comp_lib = lzma elif subtype == 'x-tar': raise PavTestError( "Test src file '{}' is a bad tar file." .format(src_path)) else: raise RuntimeError("Unhandled compression type. '{}'" .format(subtype)) decomp_fn = src_path.split('/')[-1] decomp_fn = decomp_fn.split('.', 1)[0] decomp_fn = os.path.join(build_path, decomp_fn) os.mkdir(build_path) try: with comp_lib.open(src_path) as infile, \ open(decomp_fn, 'wb') as outfile: shutil.copyfileobj(infile, outfile) except (OSError, IOError, lzma.LZMAError) as err: raise PavTestError( "Error decompressing compressed file " "'{}' into '{}': {}" .format(src_path, decomp_fn, err)) elif category == 'application' and subtype == 'zip': try: # Extract the zipfile, under the same conditions as # above with tarfiles. with zipfile.ZipFile(src_path) as zipped: tmpdir = '{}.unzipped'.format(build_path) os.mkdir(tmpdir) zipped.extractall(tmpdir) files = os.listdir(tmpdir) if (len(files) == 1 and os.path.isdir(os.path.join(tmpdir, files[0]))): # Make the zip's root directory the build dir. os.rename(os.path.join(tmpdir, files[0]), build_path) os.rmdir(tmpdir) else: # The overall contents of the zip are the build dir. os.rename(tmpdir, build_path) except (OSError, IOError, zipfile.BadZipFile) as err: raise PavTestError( "Could not extract zipfile '{}' into destination " "'{}': {}".format(src_path, build_path, err)) else: # Finally, simply copy any other types of files into the build # directory. dest = os.path.join(build_path, os.path.basename(src_path)) try: os.mkdir(build_path) shutil.copyfile(src_path, dest) except OSError as err: raise PavTestError( "Could not copy test src '{}' to '{}': {}" .format(src_path, dest, err)) # Now we just need to copy over all of the extra files. for extra in build_config.get('extra_files', []): path = self._find_file(extra, 'test_src') dest = os.path.join(build_path, os.path.basename(path)) try: shutil.copyfile(path, dest) except OSError as err: raise PavTestError( "Could not copy extra file '{}' to dest '{}': {}" .format(path, dest, err)) RUN_SILENT_TIMEOUT = 5*60 def _fix_build_permissions(self): """The files in a build directory should never be writable, but directories should be. Users are thus allowed to delete build directories and their files, but never modify them. Additions, deletions within test build directories will effect the soft links, not the original files themselves. (This applies both to owner and group). :raises OSError: If we lack permissions or something else goes wrong.""" # We rely on the umask to handle most restrictions. # This just masks out the write bits. file_mask = 0o777555 # We shouldn't have to do anything to directories, they should have # the correct permissions already. for path, _, files in os.walk(self.build_origin): for file in files: file_path = os.path.join(path, file) st = os.stat(file_path) os.chmod(file_path, st.st_mode & file_mask) def run(self, sched_vars): """Run the test, returning True on success, False otherwise. :param dict sched_vars: The scheduler variables for resolving the build template. """ if self.run_tmpl_path is not None: # Convert the run script template into the final run script. try: var_man = variables.VariableSetManager() var_man.add_var_set('sched', sched_vars) var_man.add_var_set('sys', self._pav_cfg.sys_vars) self.resolve_template(self.run_tmpl_path, self.run_script_path, var_man) except KeyError as err: msg = ("Error converting run template '{}' into the final " "script: {}" .format(self.run_tmpl_path, err)) self.LOGGER.error(msg) self.status.set(STATES.RUN_ERROR, msg) except PavTestError as err: self.LOGGER.error(err) self.status.set(STATES.RUN_ERROR, err) run_log_path = os.path.join(self.path, 'run.log') with open(run_log_path, 'wb') as run_log: proc = subprocess.Popen([self.run_script_path], cwd=self.build_path, stdout=run_log, stderr=run_log) # Run the test, but timeout if it doesn't produce any output every # RUN_SILENT_TIMEOUT seconds timeout = self.RUN_SILENT_TIMEOUT result = None while result is None: try: result = proc.wait(timeout=timeout) except subprocess.TimeoutExpired: out_stat = os.stat(run_log_path) quiet_time = time.time() - out_stat.st_mtime # Has the output file changed recently? if self.RUN_SILENT_TIMEOUT < quiet_time: # Give up on the build, and call it a failure. proc.kill() self.status.set(STATES.RUN_FAILED, "Run timed out after {} seconds." .format(self.RUN_SILENT_TIMEOUT)) return False else: # Only wait a max of BUILD_SILENT_TIMEOUT next 'wait' timeout = self.RUN_SILENT_TIMEOUT - quiet_time if result != 0: self.status.set(STATES.RUN_FAILED, "Test run failed.") return False else: self.status.set(STATES.RUN_DONE, "Test run has completed successfully.") return True def process_results(self): """Process the results of the test.""" @property def is_built(self): """Whether the build for this test exists. :returns: True if the build exists (or the test doesn't have a build), False otherwise. """ if 'build' not in self.config: return True if os.path.islink(self.build_path): # The file is expected to be a softlink, but we need to make sure # the path it points to exists. The most robust way is to check # it with stat, which will throw an exception if it doesn't # exist (an OSError in certain weird cases like symlink loops). try: os.stat(self.build_path) except (OSError, FileNotFoundError): return False return True @property def job_id(self): path = os.path.join(self.path, 'jobid') if self._job_id is not None: return self._job_id try: with os.path.isfile(path) as job_id_file: self._job_id = job_id_file.read() except FileNotFoundError: return None except (OSError, IOError) as err: self.LOGGER.error("Could not read jobid file '{}': {}" .format(path, err)) return None return self._job_id @job_id.setter def job_id(self, job_id): path = os.path.join(self.path, 'jobid') try: with open(path, 'w') as job_id_file: job_id_file.write(job_id) except (IOError, OSError) as err: self.LOGGER.error("Could not write jobid file '{}': {}" .format(path, err)) self._job_id = job_id @property def ts(self): """Return the unix timestamp for this test, based on the last modified date for the test directory.""" return os.stat(self.path).st_mtime def _hash_dict(self, mapping): """Create a hash from the keys and items in 'mapping'. Keys are processed in order. Can handle lists and other dictionaries as values. :param dict mapping: The dictionary to hash. """ hash_obj = hashlib.sha256() for key in sorted(mapping.keys()): hash_obj.update(str(key).encode('utf-8')) val = mapping[key] if isinstance(val, str): hash_obj.update(val.encode('utf-8')) elif isinstance(val, list): for item in val: hash_obj.update(item.encode('utf-8')) elif isinstance(val, dict): hash_obj.update(self._hash_dict(val)) return hash_obj.digest() def _hash_file(self, path): """Hash the given file (which is assumed to exist). :param str path: Path to the file to hash. """ hash_obj = hashlib.sha256() with open(path, 'rb') as file: chunk = file.read(self._BLOCK_SIZE) while chunk: hash_obj.update(chunk) chunk = file.read(self._BLOCK_SIZE) return hash_obj.digest() @staticmethod def _hash_dir(path): """Instead of hashing the files within a directory, we just create a 'hash' based on it's name and mtime, assuming we've run _date_dir on it before hand. This produces an arbitrary string, not a hash. :param str path: The path to the directory. :returns: The 'hash' """ dir_stat = os.stat(path) return '{} {:0.5f}'.format(path, dir_stat.st_mtime).encode('utf-8') @staticmethod def _date_dir(base_path): """Update the mtime of the given directory or path to the the latest mtime contained within. :param str base_path: The root of the path to evaluate. """ src_stat = os.stat(base_path) latest = src_stat.st_mtime paths = utils.flat_walk(base_path) for path in paths: dir_stat = os.stat(path) if dir_stat.st_mtime > latest: latest = dir_stat.st_mtime if src_stat.st_mtime != latest: os.utime(base_path, (src_stat.st_atime, latest)) def _write_script(self, path, config): """Write a build or run script or template. The formats for each are identical. :param str path: Path to the template file to write. :param dict config: Configuration dictionary for the script file. :return: """ script = scriptcomposer.ScriptComposer( details=scriptcomposer.ScriptDetails( path=path, group=self._pav_cfg.shared_group, )) pav_lib_bash = os.path.join(self._pav_cfg.pav_root, 'bin', 'pav-lib.bash') script.comment('The following is added to every test build and ' 'run script.') script.env_change({'TEST_ID': '{}'.format(self.id)}) script.command('source {}'.format(pav_lib_bash)) modules = config.get('modules', []) if modules: script.newline() script.comment('Perform module related changes to the environment.') for module in config.get('modules', []): script.module_change(module, self._pav_cfg.sys_vars) env = config.get('env', {}) if env: script.newline() script.comment("Making any environment changes needed.") script.env_change(config.get('env', {})) script.newline() cmds = config.get('cmds', []) if cmds: script.comment("Perform the sequence of test commands.") for line in config.get('cmds', []): for split_line in line.split('\n'): script.command(split_line) else: script.comment('No commands given for this script.') script.write() @classmethod def resolve_template(cls, tmpl_path, script_path, var_man): """Resolve the test deferred variables using the appropriate escape sequence. :param str tmpl_path: Path to the template file to read. :param str script_path: Path to the script file to write. :param variables.VariableSetManager var_man: A variable set manager for retrieving found variables. Is expected to contain the sys and sched variable sets. :raises KeyError: For unknown variables in the template. """ try: with open(tmpl_path, 'r') as tmpl, \ open(script_path, 'w') as script: for line in tmpl.readlines(): script.write(var_man.resolve_deferred_str(line)) # Add group and owner execute permissions to the produced script. new_mode = (os.stat(script_path).st_mode | stat.S_IXGRP | stat.S_IXUSR) os.chmod(script_path, new_mode) except ValueError as err: raise PavTestError("Problem escaping run template file '{}': {}" .format(tmpl_path, err)) except (IOError, OSError) as err: raise PavTestError("Failed processing run template file '{}' into " "run script'{}': {}" .format(tmpl_path, script_path, err))
def __init__(self, pav_cfg, config, build_tracker=None, var_man=None, _id=None, rebuild=False, build_only=False): """Create an new TestRun object. If loading an existing test instance, use the ``TestRun.from_id()`` method. :param pav_cfg: The pavilion configuration. :param dict config: The test configuration dictionary. :param builder.MultiBuildTracker build_tracker: Tracker for watching and managing the status of multiple builds. :param variables.VariableSetManager var_man: The variable set manager for this test. :param bool build_only: Only build this test run, do not run it. :param bool rebuild: After determining the build name, deprecate it and select a new, non-deprecated build. :param int _id: The test id of an existing test. (You should be using TestRun.load). """ # Just about every method needs this self._pav_cfg = pav_cfg self.load_ok = True self.scheduler = config['scheduler'] # Create the tests directory if it doesn't already exist. tests_path = pav_cfg.working_dir/'test_runs' self.config = config self.id = None # pylint: disable=invalid-name self._attrs = {} # Mark the run to build locally. self.build_local = config.get('build', {}) \ .get('on_nodes', 'false').lower() != 'true' # If a test access group was given, make sure it exists and the # current user is a member. self.group = config.get('group') if self.group is not None: try: group_data = grp.getgrnam(self.group) user = utils.get_login() if self.group != user and user not in group_data.gr_mem: raise TestConfigError( "Test specified group '{}', but the current user '{}' " "is not a member of that group." .format(self.group, user)) except KeyError as err: raise TestConfigError( "Test specified group '{}', but that group does not " "exist on this system. {}" .format(self.group, err)) self.umask = config.get('umask') if self.umask is not None: try: self.umask = int(self.umask, 8) except ValueError: raise RuntimeError( "Invalid umask. This should have been enforced by the " "by the config format.") self.build_only = build_only self.rebuild = rebuild self.suite_path = None if self.config.get('suite_path') is not None: try: self.suite_path = Path(self.config['suite_path']) except ValueError: pass # Get an id for the test, if we weren't given one. if _id is None: self.id, self.path = self.create_id_dir(tests_path) with PermissionsManager(self.path, self.group, self.umask): self._save_config() if var_man is None: var_man = variables.VariableSetManager() self.var_man = var_man self._variables_path = self.path / 'variables' self.var_man.save(self._variables_path) self.save_attributes() else: self.id = _id self.path = utils.make_id_path(tests_path, self.id) self._variables_path = self.path / 'variables' if not self.path.is_dir(): raise TestRunNotFoundError( "No test with id '{}' could be found.".format(self.id)) try: self.var_man = variables.VariableSetManager.load( self._variables_path ) except RuntimeError as err: raise TestRunError(*err.args) self.load_attributes() name_parts = [ self.config.get('suite', '<unknown>'), self.config.get('name', '<unnamed>'), ] subtitle = self.config.get('subtitle') # Don't add undefined or empty subtitles. if subtitle: name_parts.append(subtitle) self.name = '.'.join(name_parts) # Set a logger more specific to this test. self.logger = logging.getLogger('pav.TestRun.{}'.format(self.id)) # This will be set by the scheduler self._job_id = None with PermissionsManager(self.path/'status', self.group, self.umask): # Setup the initial status file. self.status = StatusFile(self.path/'status') if _id is None: self.status.set(STATES.CREATED, "Test directory and status file created.") self.run_timeout = self.parse_timeout( 'run', config.get('run', {}).get('timeout')) self.build_timeout = self.parse_timeout( 'build', config.get('build', {}).get('timeout')) self._attributes = {} self.build_name = None self.run_log = self.path/'run.log' self.results_path = self.path/'results.json' self.build_origin_path = self.path/'build_origin' build_config = self.config.get('build', {}) if (build_config.get('source_path') is None and build_config.get('source_url') is not None): raise TestConfigError( "Build source_url specified, but not a source_path.") self.build_script_path = self.path/'build.sh' # type: Path self.build_path = self.path/'build' if _id is None: self._write_script( 'build', path=self.build_script_path, config=build_config) build_name = None self._build_name_fn = self.path / 'build_name' if _id is not None: build_name = self._load_build_name() try: self.builder = builder.TestBuilder( pav_cfg=pav_cfg, test=self, mb_tracker=build_tracker, build_name=build_name ) except builder.TestBuilderError as err: raise TestRunError( "Could not create builder for test {s.name} (run {s.id}): {err}" .format(s=self, err=err) ) self.save_build_name() run_config = self.config.get('run', {}) self.run_tmpl_path = self.path/'run.tmpl' self.run_script_path = self.path/'run.sh' if _id is None: self._write_script( 'run', path=self.run_tmpl_path, config=run_config) if _id is None: self.status.set(STATES.CREATED, "Test directory setup complete.") self._results = None self._created = None self.skipped = self._get_skipped()
def __init__(self, pav_cfg, config, build_tracker=None, var_man=None, _id=None, **options): """Create an new TestRun object. If loading an existing test instance, use the ``TestRun.from_id()`` method. :param pav_cfg: The pavilion configuration. :param dict config: The test configuration dictionary. :param builder.MultiBuildTracker build_tracker: Tracker for watching and managing the status of multiple builds. :param variables.VariableSetManager var_man: The variable set manager for this test. :param bool build_only: Only build this test run, do not run it. :param bool rebuild: After determining the build name, deprecate it and select a new, non-deprecated build. :param int _id: The test id of an existing test. (You should be using TestRun.load). """ # Just about every method needs this self._pav_cfg = pav_cfg self.load_ok = True # Compute the actual name of test, using the subtitle config parameter. self.name = '.'.join([ config.get('suite', '<unknown>'), config.get('name', '<unnamed>') ]) if 'subtitle' in config and config['subtitle']: self.name = self.name + '.' + config['subtitle'] self.scheduler = config['scheduler'] # Create the tests directory if it doesn't already exist. tests_path = pav_cfg.working_dir / 'test_runs' self.config = config self.id = None # pylint: disable=invalid-name # Mark the run to build locally. self.build_local = config.get('build', {}) \ .get('on_nodes', 'false').lower() != 'true' # Get an id for the test, if we weren't given one. if _id is None: self.id, self.path = self.create_id_dir(tests_path) self._save_config() if var_man is None: var_man = variables.VariableSetManager() self.var_man = var_man self._variables_path = self.path / 'variables' self.var_man.save(self._variables_path) self.opts = TestRunOptions(**options) self.opts.save(self) else: self.id = _id self.path = utils.make_id_path(tests_path, self.id) self._variables_path = self.path / 'variables' if not self.path.is_dir(): raise TestRunNotFoundError( "No test with id '{}' could be found.".format(self.id)) try: self.var_man = variables.VariableSetManager.load( self._variables_path) except RuntimeError as err: raise TestRunError(*err.args) self.opts = TestRunOptions.load(self) # Set a logger more specific to this test. self.logger = logging.getLogger('pav.TestRun.{}'.format(self.id)) # This will be set by the scheduler self._job_id = None # Setup the initial status file. self.status = StatusFile(self.path / 'status') if _id is None: self.status.set(STATES.CREATED, "Test directory and status file created.") self.run_timeout = self.parse_timeout( 'run', config.get('run', {}).get('timeout')) self.build_timeout = self.parse_timeout( 'build', config.get('build', {}).get('timeout')) self._started = None self._finished = None self.build_name = None self.run_log = self.path / 'run.log' self.results_path = self.path / 'results.json' self.build_origin_path = self.path / 'build_origin' build_config = self.config.get('build', {}) # make sure build source_download_name is not set without # source_location try: if build_config['source_download_name'] is not None: if build_config['source_location'] is None: msg = "Test could not be built. Need 'source_location'." self.status.set( STATES.BUILD_ERROR, "'source_download_name is set without a " "'source_location'") raise TestConfigError(msg) except KeyError: # this is mostly for unit tests that create test configs without a # build section at all pass self.build_script_path = self.path / 'build.sh' # type: Path self.build_path = self.path / 'build' if _id is None: self._write_script('build', path=self.build_script_path, config=build_config) build_name = None self._build_name_fn = self.path / 'build_name' if _id is not None: build_name = self._load_build_name() try: self.builder = builder.TestBuilder(pav_cfg=pav_cfg, test=self, mb_tracker=build_tracker, build_name=build_name) except builder.TestBuilderError as err: raise TestRunError( "Could not create builder for test {s.name} (run {s.id}): {err}" .format(s=self, err=err)) self.save_build_name() run_config = self.config.get('run', {}) self.run_tmpl_path = self.path / 'run.tmpl' self.run_script_path = self.path / 'run.sh' if _id is None: self._write_script('run', path=self.run_tmpl_path, config=run_config) if _id is None: self.status.set(STATES.CREATED, "Test directory setup complete.") self._results = None self._created = None
class TestRun: """The central pavilion test object. Handle saving, monitoring and running tests. **Test LifeCycle** 1. Test Object is Created -- ``TestRun.__init__`` 1. Test id and directory (``working_dir/test_runs/0000001``) are created. 2. Most test information files (config, status, etc) are created. 3. Build script is created. 4. Build hash is generated. 5. Run script dry run generation is performed. 2. Test is built. -- ``test.build()`` 3. Test is finalized. -- ``test.finalize()`` 1. Variables and config go through final resolution. 2. Final run script is generated. 4. Test is run. -- ``test.run()`` 5. Results are gathered. -- ``test.gather_results()`` :ivar int id: The test id. :ivar dict config: The test's configuration. :ivar Path test.path: The path to the test's test_run directory. :ivar dict results: The test results. Set None if results haven't been gathered. :ivar TestBuilder builder: The test builder object, with information on the test's build. :ivar Path build_origin_path: The path to the symlink to the original build directory. For bookkeeping. :ivar StatusFile status: The status object for this test. :ivar TestRunOptions opt: Test run options defined by OPTIONS_DEFAULTS :cvar OPTIONS_DEFAULTS: A dictionary of defaults for additional options for the test run. Values given to Pavilion are expected to be the same type as the default value. """ logger = logging.getLogger('pav.TestRun') JOB_ID_FN = 'job_id' COMPLETE_FN = 'RUN_COMPLETE' OPTIONS_DEFAULTS = { 'build_only': False, 'rebuild': False, } def __init__(self, pav_cfg, config, build_tracker=None, var_man=None, _id=None, **options): """Create an new TestRun object. If loading an existing test instance, use the ``TestRun.from_id()`` method. :param pav_cfg: The pavilion configuration. :param dict config: The test configuration dictionary. :param builder.MultiBuildTracker build_tracker: Tracker for watching and managing the status of multiple builds. :param variables.VariableSetManager var_man: The variable set manager for this test. :param bool build_only: Only build this test run, do not run it. :param bool rebuild: After determining the build name, deprecate it and select a new, non-deprecated build. :param int _id: The test id of an existing test. (You should be using TestRun.load). """ # Just about every method needs this self._pav_cfg = pav_cfg self.load_ok = True # Compute the actual name of test, using the subtitle config parameter. self.name = '.'.join([ config.get('suite', '<unknown>'), config.get('name', '<unnamed>') ]) if 'subtitle' in config and config['subtitle']: self.name = self.name + '.' + config['subtitle'] self.scheduler = config['scheduler'] # Create the tests directory if it doesn't already exist. tests_path = pav_cfg.working_dir / 'test_runs' self.config = config self.id = None # pylint: disable=invalid-name # Mark the run to build locally. self.build_local = config.get('build', {}) \ .get('on_nodes', 'false').lower() != 'true' # Get an id for the test, if we weren't given one. if _id is None: self.id, self.path = self.create_id_dir(tests_path) self._save_config() if var_man is None: var_man = variables.VariableSetManager() self.var_man = var_man self._variables_path = self.path / 'variables' self.var_man.save(self._variables_path) self.opts = TestRunOptions(**options) self.opts.save(self) else: self.id = _id self.path = utils.make_id_path(tests_path, self.id) self._variables_path = self.path / 'variables' if not self.path.is_dir(): raise TestRunNotFoundError( "No test with id '{}' could be found.".format(self.id)) try: self.var_man = variables.VariableSetManager.load( self._variables_path) except RuntimeError as err: raise TestRunError(*err.args) self.opts = TestRunOptions.load(self) # Set a logger more specific to this test. self.logger = logging.getLogger('pav.TestRun.{}'.format(self.id)) # This will be set by the scheduler self._job_id = None # Setup the initial status file. self.status = StatusFile(self.path / 'status') if _id is None: self.status.set(STATES.CREATED, "Test directory and status file created.") self.run_timeout = self.parse_timeout( 'run', config.get('run', {}).get('timeout')) self.build_timeout = self.parse_timeout( 'build', config.get('build', {}).get('timeout')) self._started = None self._finished = None self.build_name = None self.run_log = self.path / 'run.log' self.results_path = self.path / 'results.json' self.build_origin_path = self.path / 'build_origin' build_config = self.config.get('build', {}) # make sure build source_download_name is not set without # source_location try: if build_config['source_download_name'] is not None: if build_config['source_location'] is None: msg = "Test could not be built. Need 'source_location'." self.status.set( STATES.BUILD_ERROR, "'source_download_name is set without a " "'source_location'") raise TestConfigError(msg) except KeyError: # this is mostly for unit tests that create test configs without a # build section at all pass self.build_script_path = self.path / 'build.sh' # type: Path self.build_path = self.path / 'build' if _id is None: self._write_script('build', path=self.build_script_path, config=build_config) build_name = None self._build_name_fn = self.path / 'build_name' if _id is not None: build_name = self._load_build_name() try: self.builder = builder.TestBuilder(pav_cfg=pav_cfg, test=self, mb_tracker=build_tracker, build_name=build_name) except builder.TestBuilderError as err: raise TestRunError( "Could not create builder for test {s.name} (run {s.id}): {err}" .format(s=self, err=err)) self.save_build_name() run_config = self.config.get('run', {}) self.run_tmpl_path = self.path / 'run.tmpl' self.run_script_path = self.path / 'run.sh' if _id is None: self._write_script('run', path=self.run_tmpl_path, config=run_config) if _id is None: self.status.set(STATES.CREATED, "Test directory setup complete.") self._results = None self._created = None @classmethod def load(cls, pav_cfg, test_id): """Load an old TestRun object given a test id. :param pav_cfg: The pavilion config :param int test_id: The test's id number. :rtype: TestRun """ path = utils.make_id_path(pav_cfg.working_dir / 'test_runs', test_id) if not path.is_dir(): raise TestRunError("Test directory for test id {} does not exist " "at '{}' as expected.".format(test_id, path)) config = cls._load_config(path) return TestRun(pav_cfg, config, _id=test_id) def finalize(self, var_man): """Resolve any remaining deferred variables, and generate the final run script.""" self.var_man.undefer(new_vars=var_man, parser=string_parser.parse) self.config = resolve_deferred(self.config, self.var_man) self._save_config() # Save our newly updated variables. self.var_man.save(self._variables_path) self._write_script( 'run', self.run_script_path, self.config['run'], ) def run_cmd(self): """Construct a shell command that would cause pavilion to run this test.""" pav_path = self._pav_cfg.pav_root / 'bin' / 'pav' return '{} run {}'.format(pav_path, self.id) def _save_config(self): """Save the configuration for this test to the test config file.""" config_path = self.path / 'config' try: with config_path.open('w') as json_file: pavilion.output.json_dump(self.config, json_file) except (OSError, IOError) as err: raise TestRunError( "Could not save TestRun ({}) config at {}: {}".format( self.name, self.path, err)) except TypeError as err: raise TestRunError("Invalid type in config for ({}): {}".format( self.name, err)) @classmethod def _load_config(cls, test_path): """Load a saved test configuration.""" config_path = test_path / 'config' if not config_path.is_file(): raise TestRunError( "Could not find config file for test at {}.".format(test_path)) try: with config_path.open('r') as config_file: # Because only string keys are allowed in test configs, # this is a reasonable way to load them. return json.load(config_file) except TypeError as err: raise TestRunError("Bad config values for config '{}': {}".format( config_path, err)) except (IOError, OSError) as err: raise TestRunError("Error reading config file '{}': {}".format( config_path, err)) def build(self, cancel_event=None): """Build the test using its builder object and symlink copy it to it's final location. The build tracker will have the latest information on any encountered errors. :param threading.Event cancel_event: Event to tell builds when to die. :returns: True if build successful """ if cancel_event is None: cancel_event = threading.Event() if self.builder.build(cancel_event=cancel_event): # Create the build origin path, to make tracking a test's build # a bit easier. self.build_origin_path.symlink_to(self.builder.path) return self.builder.copy_build(self.build_path) else: self.builder.fail_path.rename(self.build_path) return False def save_build_name(self): """Save the builder's build name to the build name file for the test.""" try: with self._build_name_fn.open('w') as build_name_file: build_name_file.write(self.builder.name) except OSError as err: raise TestRunError( "Could not save build name to build name file at '{}': {}". format(self._build_name_fn, err)) def _load_build_name(self): """Load the build name from the build name file.""" try: with self._build_name_fn.open() as build_name_file: return build_name_file.read() except OSError as err: raise TestRunError( "All existing test runs must have a readable 'build_name' " "file, but test run {s.id} did not: {err}".format(s=self, err=err)) def run(self): """Run the test. :rtype: bool :returns: True if the test completed and returned zero, false otherwise. :raises TimeoutError: When the run times out. :raises TestRunError: We don't actually raise this, but might in the future. """ if self.opts.build_only: self.status.set(STATES.RUN_ERROR, "Tried to run a 'build_only' test object.") return False self.status.set(STATES.PREPPING_RUN, "Converting run template into run script.") with self.run_log.open('wb') as run_log: self.status.set(STATES.RUNNING, "Starting the run script.") self._started = datetime.datetime.now() # Set the working directory to the build path, if there is one. run_wd = None if self.build_path is not None: run_wd = self.build_path.as_posix() # Run scripts take the test id as a first argument. cmd = [self.run_script_path.as_posix(), str(self.id)] proc = subprocess.Popen(cmd, cwd=run_wd, stdout=run_log, stderr=subprocess.STDOUT) self.status.set(STATES.RUNNING, "Currently running.") # Run the test, but timeout if it doesn't produce any output every # self._run_timeout seconds timeout = self.run_timeout result = None while result is None: try: result = proc.wait(timeout=timeout) except subprocess.TimeoutExpired: out_stat = self.run_log.stat() quiet_time = time.time() - out_stat.st_mtime # Has the output file changed recently? if self.run_timeout < quiet_time: # Give up on the build, and call it a failure. proc.kill() msg = ("Run timed out after {} seconds".format( self.run_timeout)) self.status.set(STATES.RUN_TIMEOUT, msg) self._finished = datetime.datetime.now() raise TimeoutError(msg) else: # Only wait a max of run_silent_timeout next 'wait' timeout = timeout - quiet_time self._finished = datetime.datetime.now() self.status.set(STATES.RUN_DONE, "Test run has completed.") if result == 0: return True # Return False in all other circumstances. return False def set_run_complete(self): """Write a file in the test directory that indicates that the test has completed a run, one way or another. This should only be called when we're sure their won't be any more status changes.""" # Write the current time to the file. We don't actually use the contents # of the file, but it's nice to have another record of when this was # run. with (self.path / self.COMPLETE_FN).open('w') as run_complete: json.dump({ 'complete': datetime.datetime.now().isoformat(), }, run_complete) @property def complete(self): """Return the complete time from the run complete file, or None if the test was never marked as complete.""" run_complete_path = self.path / self.COMPLETE_FN if run_complete_path.exists(): try: with run_complete_path.open() as complete_file: data = json.load(complete_file) return data.get('complete') except (OSError, ValueError, json.JSONDecodeError) as err: self.logger.warning( "Failed to read run complete file for at %s: %s", run_complete_path.as_posix(), err) return None else: return None WAIT_INTERVAL = 0.5 def wait(self, timeout=None): """Wait for the test run to be complete. This works across hosts, as it simply checks for files in the run directory. :param Union(None,float) timeout: How long to wait in seconds. If this is None, wait forever. :raises TimeoutError: if the timeout expires. """ if timeout is not None: timeout = time.time() + timeout while 1: if self.complete is not None: return time.sleep(self.WAIT_INTERVAL) if timeout is not None and time.time() > timeout: raise TimeoutError("Timed out waiting for test '{}' to " "complete".format(self.id)) def gather_results(self, run_result): """Process and log the results of the test, including the default set of result keys. Default Result Keys: name The name of the test id The test id created When the test was created. started When the test was started. finished When the test finished running (or failed). duration Length of the test run. user The user who ran the test. sys_name The system (cluster) on which the test ran. job_id The job id set by the scheduler. result Defaults to PASS if the test completed (with a zero exit status). Is generally expected to be overridden by other result parsers. :param bool run_result: The result of the run. """ if self._finished is None: raise RuntimeError( "test.gather_results can't be run unless the test was run" "(or an attempt was made to run it. " "This occurred for test {s.name}, #{s.id}".format(s=self)) parser_configs = self.config['results'] # Create a human readable timestamp from the test directories # modified (should be creation) timestamp. created = datetime.datetime.fromtimestamp( self.path.stat().st_mtime).isoformat(" ") if run_result: default_result = result_parsers.PASS else: default_result = result_parsers.FAIL results = { # These can't be overridden 'name': self.name, 'id': self.id, 'created': created, 'started': self._started.isoformat(" "), 'finished': self._finished.isoformat(" "), 'duration': str(self._finished - self._started), 'user': self.var_man['pav.user'], 'job_id': self.job_id, 'sys_name': self.var_man['sys.sys_name'], # This may be overridden by result parsers. 'result': default_result } self.status.set(STATES.RESULTS, "Parsing {} result types.".format(len(parser_configs))) results = result_parsers.parse_results(self, results) self._results = results return results def save_results(self, results): """Save the results to the results file. :param dict results: The results dictionary. """ with self.results_path.open('w') as results_file: json.dump(results, results_file) def load_results(self): """Load results from the results file. :returns A dict of results, or None if the results file doesn't exist. :rtype: dict """ if self.results_path.exists(): with self.results_path.open() as results_file: return json.load(results_file) else: return None @property def results(self): """The test results. Returns a dictionary of basic information if the test has no results.""" if self._results is None and self.results_path.exists(): with self.results_path.open() as results_file: self._results = json.load(results_file) if self._results is None: return { 'name': self.name, 'sys_name': self.var_man['sys_name'], 'created': self.created, 'id': self.id, 'result': None, } else: return self._results @property def created(self): """When this test run was created (the creation time of the test run directory).""" if self._created is None: timestamp = self.path.stat().st_mtime self._created = datetime.datetime.fromtimestamp(timestamp)\ .isoformat(" ") return self._created @property def is_built(self): """Whether the build for this test exists. :returns: True if the build exists (or the test doesn't have a build), False otherwise. :rtype: bool """ if self.build_path.resolve().exists(): return True else: return False @property def job_id(self): """The job id of this test (saved to a ``jobid`` file). This should be set by the scheduler plugin as soon as it's known.""" path = self.path / self.JOB_ID_FN if self._job_id is not None: return self._job_id try: with path.open('r') as job_id_file: self._job_id = job_id_file.read() except FileNotFoundError: return None except (OSError, IOError) as err: self.logger.error("Could not read jobid file '%s': %s", path, err) return None return self._job_id @job_id.setter def job_id(self, job_id): path = self.path / self.JOB_ID_FN try: with path.open('w') as job_id_file: job_id_file.write(job_id) except (IOError, OSError) as err: self.logger.error("Could not write jobid file '%s': %s", path, err) self._job_id = job_id @property def timestamp(self): """Return the unix timestamp for this test, based on the last modified date for the test directory.""" return self.path.stat().st_mtime def _write_script(self, stype, path, config): """Write a build or run script or template. The formats for each are mostly identical. :param str stype: The type of script (run or build). :param Path path: Path to the template file to write. :param dict config: Configuration dictionary for the script file. :return: """ script = scriptcomposer.ScriptComposer( details=scriptcomposer.ScriptDetails( path=path, group=self._pav_cfg.shared_group, )) verbose = config.get('verbose', 'false').lower() == 'true' if verbose: script.comment('# Echoing all commands to log.') script.command('set -v') script.newline() pav_lib_bash = self._pav_cfg.pav_root / 'bin' / 'pav-lib.bash' # If we include this directly, it breaks build hashing. script.comment('The first (and only) argument of the build script is ' 'the test id.') script.env_change({ 'TEST_ID': '${1:-0}', # Default to test id 0 if one isn't given. 'PAV_CONFIG_FILE': self._pav_cfg['pav_cfg_file'] }) script.command('source {}'.format(pav_lib_bash)) if config.get('preamble', []): script.newline() script.comment('Preamble commands') for cmd in config['preamble']: script.command(cmd) if stype == 'build' and not self.build_local: script.comment('To be built in an allocation.') modules = config.get('modules', []) if modules: script.newline() script.comment( 'Perform module related changes to the environment.') for module in config.get('modules', []): script.module_change(module, self.var_man) env = config.get('env', {}) if env: script.newline() script.comment("Making any environment changes needed.") script.env_change(config.get('env', {})) if verbose: script.newline() script.comment('List all the module modules for posterity') script.command("module -t list") script.newline() script.comment('Output the environment for posterity') script.command("declare -p") script.newline() cmds = config.get('cmds', []) if cmds: script.comment("Perform the sequence of test commands.") for line in config.get('cmds', []): for split_line in line.split('\n'): script.command(split_line) else: script.comment('No commands given for this script.') script.write() @staticmethod def create_id_dir(id_dir): """In the given directory, create the lowest numbered (positive integer) directory that doesn't already exist. :param Path id_dir: Path to the directory that contains these 'id' directories :returns: The id and path to the created directory. :rtype: list(int, Path) :raises OSError: on directory creation failure. :raises TimeoutError: If we couldn't get the lock in time. """ lockfile_path = id_dir / '.lockfile' with lockfile.LockFile(lockfile_path, timeout=1): ids = list(os.listdir(str(id_dir))) # Only return the test directories that could be integers. ids = [id_ for id_ in ids if id_.isdigit()] ids = [id_ for id_ in ids if (id_dir / id_).is_dir()] ids = [int(id_) for id_ in ids] ids.sort() # Find the first unused id. id_ = 1 while id_ in ids: id_ += 1 path = utils.make_id_path(id_dir, id_) path.mkdir() return id_, path def __repr__(self): return "TestRun({s.name}-{s.id})".format(s=self) @staticmethod def parse_timeout(section, value): """Parse the timeout value from either the run or build section into an int (or none). :param str section: The config section the value came from. :param Union[str,None] value: The value to parse. """ if value is None: return None if value.strip().isdigit(): return int(value) raise TestRunError( "Invalid value for {} timeout. Must be a positive int.".format( section))