def _do_lock_concurrency(self, pav_cfg, test): """Acquire the concurrency lock for this scheduler, if necessary. :param pav_cfg: The pavilion configuration. :param pavilion.pav_config.test.TestRun test: The pavilion test to lock concurrency for. """ if test.config[self.name]['concurrent'] in ('false', 'False'): return None lock_name = '{s.name}_sched.lock'.format(s=self) # Most schedulers shouldn't have to do this. lock_path = pav_cfg.working_dir / lock_name lock = LockFile( lock_path, group=pav_cfg.shared_group, # Expire after 24 hours. expires_after=60 * 60 * 24, ) test.status.set( STATES.SCHEDULED, "Test is non-concurrent, and waiting on the " "concurrency lock for scheduler {s.name}.".format(s=self)) lock.lock() return lock
def __init__(self, file_name, max_bytes=0, backup_count=0, lock_timeout=10, encoding=None): """Initialize the Locking File Handler. This will attempt to open the file and use the lockfile, just to check permissions. :param Union(str,Path) file_name: The path to the log file. :param int max_bytes: The limit of how much data can go in a single log file before rolling over. Zero denotes no limit. :param int backup_count: How many backups (logfile.1, etc) to keep. :param int lock_timeout: Wait this long before declaring a lock deadlock, and giving up. :param str encoding: The file encoding to use for the log file. """ self.file_name = Path(file_name) self.max_bytes = max_bytes self.backup_count = backup_count self.mode = 'a' self.encoding = encoding self.lock_timeout = lock_timeout lockfile_path = self.file_name.parent/(self.file_name.name + '.lock') self.lock_file = LockFile(lockfile_path, timeout=self.lock_timeout) super().__init__() # Test acquire the lock file and test open the file. with self.lock_file: with self.file_name.open(self.mode, encoding=self.encoding): pass
def _save_series_id(self): """Save the series id to json file that tracks last series ran by user on a per system basis.""" sys_vars = system_variables.get_vars(True) sys_name = sys_vars['sys_name'] json_file = self.pav_cfg.working_dir/'users' json_file /= '{}.json'.format(utils.get_login()) lockfile_path = json_file.with_suffix('.lock') with LockFile(lockfile_path): data = {} try: with json_file.open('r') as json_series_file: try: data = json.load(json_series_file) except json.decoder.JSONDecodeError: # File was empty, therefore json couldn't be loaded. pass with PermissionsManager(json_file, self.pav_cfg['shared_group'], self.pav_cfg['umask']), \ json_file.open('w') as json_series_file: data[sys_name] = self.sid json_series_file.write(json.dumps(data)) except FileNotFoundError: # File hadn't been created yet. with PermissionsManager(json_file, self.pav_cfg['shared_group'], self.pav_cfg['umask']), \ json_file.open('w') as json_series_file: data[sys_name] = self.sid json_series_file.write(json.dumps(data))
def _build(self, build_dir, cancel_event, lock: lockfile.LockFile = None): """Perform the build. This assumes there actually is a build to perform. :param Path build_dir: The directory in which to perform the build. :param threading.Event cancel_event: Event to signal that the build should stop. :param lock: The lockfile object. This will need to be refreshed to keep it from expiring. :returns: True or False, depending on whether the build appears to have been successful. """ try: future = ThreadPoolExecutor().submit(self._setup_build_dir, build_dir) while future.running(): time.sleep(lock.SLEEP_PERIOD) lock.renew() # Raise any errors raised by the thread. future.result() except TestBuilderError as err: self.tracker.error( note=("Error setting up build directory '{}': {}".format( build_dir, err))) return False try: # Do the build, and wait for it to complete. with self.tmp_log_path.open('w') as build_log: # Build scripts take the test id as a first argument. cmd = [self._script_path.as_posix(), str(self.test.id)] proc = subprocess.Popen(cmd, cwd=build_dir.as_posix(), stdout=build_log, stderr=build_log) result = None timeout = self._timeout while result is None: try: result = proc.wait(timeout=1) except subprocess.TimeoutExpired: lock.renew() if self._timeout_file.exists(): timeout_file = self._timeout_file else: timeout_file = self.tmp_log_path try: timeout = max( timeout, timeout_file.stat().st_mtime + self._timeout) except OSError: pass # Has the output file changed recently? if time.time() > timeout: # Give up on the build, and call it a failure. proc.kill() cancel_event.set() self.tracker.fail( state=STATES.BUILD_TIMEOUT, note="Build timed out after {} seconds.". format(self._timeout)) return False if cancel_event is not None and cancel_event.is_set(): proc.kill() self.tracker.update( state=STATES.ABORTED, note="Build canceled due to other builds " "failing.") return False except subprocess.CalledProcessError as err: if cancel_event is not None: cancel_event.set() self.tracker.error( note="Error running build process: {}".format(err)) return False except (IOError, OSError) as err: if cancel_event is not None: cancel_event.set() self.tracker.error( note="Error that's probably related to writing the " "build output: {}".format(err)) return False finally: try: self.tmp_log_path.rename(build_dir / self.LOG_NAME) except OSError as err: self.tracker.warn( "Could not move build log from '{}' to final location " "'{}': {}".format(self.tmp_log_path, build_dir, err)) try: self._fix_build_permissions(build_dir) except OSError as err: self.tracker.warn("Error fixing build permissions: %s".format(err)) if result != 0: if cancel_event is not None: cancel_event.set() self.tracker.fail(note="Build returned a non-zero result.") return False else: self.tracker.update(state=STATES.BUILD_DONE, note="Build completed successfully.") return True