def test_bad_job_no_predecessors(self, walk_class, setup_sbx): """Simple case of a leaf job failing.""" actions = DAG() actions.add_vertex("1.bad") c = walk_class(actions) job = c.saved_jobs["1.bad"] assert isinstance(job, ControlledJob) assert job.should_skip is False assert job.status == ReturnValue(1) assert c.job_status == {"1.bad": ReturnValue(1)} assert c.requeued == {} # In the situation where we are using fingerprints, # verify the behavior when re-doing a walk with # the same DAG. if walk_class == FingerprintWalk: r2 = walk_class(actions) job = r2.saved_jobs["1.bad"] assert isinstance(job, ControlledJob) assert job.should_skip is False assert job.status == ReturnValue(1) assert r2.job_status == {"1.bad": ReturnValue(1)} assert r2.requeued == {}
def __init__( self, spec_repository: AnodSpecRepository, default_env: Optional[BaseEnv] = None, reject_duplicates: bool = False, ): """Initialize a new context. :param spec_repository: an Anod repository :param default_env: an env that should be considered as the default for the current context. Mainly useful to simulate another server context. If None then we assume that the context if the local server :param reject_duplicates: if True, raise SchedulingError when two duplicated action are generated """ self.repo = spec_repository if default_env is None: self.default_env = BaseEnv() else: self.default_env = default_env.copy() self.reject_duplicates = reject_duplicates self.tree = DAG() self.root = Root() self.dependencies: Dict[str, Dict[str, Tuple[Dependency, Anod]]] = {} self.add(self.root) self.cache: Dict[CacheKeyType, Anod] = {} self.sources: Dict[str, Tuple[str, SourceBuilder]] = {}
def test_job_never_ready(self, walk_class, setup_sbx): """Trying to run a job repeatedly returning notready.""" actions = DAG() actions.add_vertex("1.notready:always") c = walk_class(actions) job = c.saved_jobs["1.notready:always"] assert isinstance(job, ControlledJob) assert job.should_skip is False assert job.status == ReturnValue.notready assert c.job_status == {"1.notready:always": ReturnValue.notready} assert c.requeued == {"1.notready:always": 3} # In the situation where we are using fingerprints, # verify the behavior when re-doing a walk with # the same DAG. if walk_class == FingerprintWalk: r2 = walk_class(actions) job = r2.saved_jobs["1.notready:always"] assert isinstance(job, ControlledJob) assert job.should_skip is False assert job.status == ReturnValue.notready assert r2.job_status == {"1.notready:always": ReturnValue.notready} assert r2.requeued == {"1.notready:always": 3}
def test_requeue(self): """Requeue test. Same as previous example except that all tests are requeued once. """ results = {} def collect(job): if job.uid not in results: results[job.uid] = True return True else: return False # This time test with two interdependent jobs dag = DAG() dag.add_vertex('1') dag.add_vertex('2') s = Scheduler(Scheduler.simple_provider(NopJob), tokens=2, collect=collect) s.run(dag) assert s.max_active_jobs == 2 assert results['1'] assert results['2']
def test_job_not_ready_then_ok(self, walk_class, setup_sbx): """Rerunning a job that first returned notready.""" actions = DAG() actions.add_vertex("1.notready:once") c = walk_class(actions) job = c.saved_jobs["1.notready:once"] assert isinstance(job, ControlledJob) assert job.should_skip is False assert job.status == ReturnValue.success assert c.job_status == {"1.notready:once": ReturnValue.success} assert c.requeued == {"1.notready:once": 1} # In the situation where we are using fingerprints, # verify the behavior when re-doing a walk with # the same DAG. if walk_class == FingerprintWalk: r2 = walk_class(actions) job = r2.saved_jobs["1.notready:once"] assert isinstance(job, EmptyJob) assert job.should_skip is True assert job.status == ReturnValue.skip assert r2.job_status == {"1.notready:once": ReturnValue.skip} assert r2.requeued == {}
def __init__(self, spec_repository, default_env=None, reject_duplicates=False): """Initialize a new context. :param spec_repository: an Anod repository :type spec_repository: e3.anod.loader.AnodSpecRepository :param default_env: an env that should be considered as the default for the current context. Mainly useful to simulate another server context. If None then we assume that the context if the local server :type default_env: BaseEnv | None :param reject_duplicates: if True, raise SchedulingError when two duplicated action are generated :type reject_duplicates: bool """ self.repo = spec_repository if default_env is None: self.default_env = BaseEnv() else: self.default_env = default_env.copy() self.reject_duplicates = reject_duplicates self.tree = DAG() self.root = Root() self.dependencies = {} self.add(self.root) self.cache = {} self.sources = {}
def test_keyboard_interrupt(self): """Ensure that jobs can be interrupted.""" results = {} pytest.importorskip('psutil') def get_job(uid, data, predecessors, notify_end): return NopJob(uid, data, notify_end) def collect(job): results[job.uid] = job dag = DAG() dag.add_vertex('1') dag.add_vertex('2') s = Scheduler(get_job, tokens=2, collect=collect, job_timeout=2) # fake log_state that will raise a KeyboardInterrupt def fake_log_state(): raise KeyboardInterrupt s.log_state = fake_log_state with pytest.raises(KeyboardInterrupt): s.run(dag) for k, v in results.items(): assert v.interrupted
def test_minimal_run(self): """Test with only two independent jobs.""" dag = DAG() dag.add_vertex('1') dag.add_vertex('2') s = Scheduler(Scheduler.simple_provider(NopJob), tokens=2) s.run(dag) assert s.max_active_jobs == 2
def test_iter_with_busy_state(): d = DAG() d.add_vertex('a') d.add_vertex('b', predecessors=['a']) it = DAGIterator(d, enable_busy_state=True) for nid, data in it: if nid is None: it.leave('a')
def test_simple_dag(): d = DAG() d.add_vertex('a') d.add_vertex('b') d.add_vertex('c') result = [] for vertex_id, data in d: result.append(vertex_id) result.sort() assert result == ['a', 'b', 'c']
def test_dag_merge(): d = DAG() d.add_vertex('b') d.add_vertex('a', predecessors=['b']) d2 = DAG() d2.add_vertex('c') d2.add_vertex('b', predecessors=['c']) d2.add_vertex('a', predecessors=['c']) d3 = d | d2 result = [] for vertex_id, data in d3: result.append(vertex_id) assert data is None assert result == ['c', 'b', 'a']
def test_inexisting(): d = DAG() d.add_vertex('a') assert 'a' in d d.update_vertex('a', data='NOT B', predecessors=['b'], enable_checks=False) assert 'b' not in d assert d['a'] == 'NOT B' with pytest.raises(DAGError): d.check()
def test_inexisting(): d = DAG() d.add_vertex("a") assert "a" in d d.update_vertex("a", data="NOT B", predecessors=["b"], enable_checks=False) assert "b" not in d assert d["a"] == "NOT B" with pytest.raises(DAGError): d.check()
def test_reverse_dag(): d = DAG() d.add_vertex("a") d.add_vertex("b", predecessors=["a"]) d.add_vertex("c", predecessors=["b"]) d.add_vertex("d", predecessors=["c"]) it = DAGIterator(d) assert [k for k, _ in it] == ["a", "b", "c", "d"] reverse_d = d.reverse_graph() reverse_it = DAGIterator(reverse_d) assert [k for k, _ in reverse_it] == ["d", "c", "b", "a"]
def test_reverse_dag(): d = DAG() d.add_vertex('a') d.add_vertex('b', predecessors=['a']) d.add_vertex('c', predecessors=['b']) d.add_vertex('d', predecessors=['c']) it = DAGIterator(d) assert [k for k, _ in it] == ['a', 'b', 'c', 'd'] reverse_d = d.reverse_graph() reverse_it = DAGIterator(reverse_d) assert [k for k, _ in reverse_it] == ['d', 'c', 'b', 'a']
def test_predecessor_with_no_fingerprint(setup_sbx): actions = DAG() actions.add_vertex("1") actions.add_vertex("2.no_fingerprint", predecessors=["1"]) actions.add_vertex("3", predecessors=["2.no_fingerprint"]) actions.add_vertex("4", predecessors=["3"]) # Execute our planned actions for the first time... r1 = FingerprintWalk(actions) for uid in ("1", "2.no_fingerprint", "3", "4"): job = r1.saved_jobs[uid] assert isinstance(job, ControlledJob) assert job.should_skip is False assert job.status == ReturnValue.success assert r1.job_status == { "1": ReturnValue.success, "2.no_fingerprint": ReturnValue.success, "3": ReturnValue.success, "4": ReturnValue.success, } assert r1.requeued == {} # Re-execute the plan a second time. Because '2.no_fingerprint' # has no fingerprint, both '2.no_fingerprint', but also the node # that depends directly on it should be re-executed. Node '4, # on the other hand, should only be re-executed if Node "3"'s # fingerprint changed. The way things are set up in this testsuite # is such that the fingerprint remained the same, so '4' is not # expected to be re-run. r2 = FingerprintWalk(actions) for uid in ("1", "4"): job = r2.saved_jobs[uid] assert isinstance(job, EmptyJob) assert job.should_skip is True assert job.status == ReturnValue.skip for uid in ("2.no_fingerprint", "3"): job = r2.saved_jobs[uid] assert isinstance(job, ControlledJob) assert job.should_skip is False assert job.status == ReturnValue.success assert r2.job_status == { "1": ReturnValue.skip, "2.no_fingerprint": ReturnValue.success, "3": ReturnValue.success, "4": ReturnValue.skip, } assert r2.requeued == {}
def test_timeout(self): """Ensure that jobs are interrupted correctly on timeout.""" results = {} pytest.importorskip('psutil') def get_job(uid, data, predecessors, notify_end): return SleepJob(uid, data, notify_end) def collect(job): results[job.uid] = job dag = DAG() dag.add_vertex('1') dag.add_vertex('2') s = Scheduler(get_job, tokens=2, collect=collect, job_timeout=2) s.run(dag) for k, v in results.items(): assert v.interrupted
def test_job_depending_on_job_with_no_predicted_fingerprint_failed(setup_sbx): """Test case where job depends on failed job with late fingerprint.""" actions = DAG() actions.add_vertex("fingerprint_after_job.bad") actions.add_vertex("2", predecessors=["fingerprint_after_job.bad"]) r1 = FingerprintWalk(actions) assert ( r1.compute_fingerprint("fingerprint_after_job.bad", None, is_prediction=True) is None ) # Check the status of the first job ('fingerprint_after_job.bad'). # It should be a real job that returned a failure. job = r1.saved_jobs["fingerprint_after_job.bad"] assert isinstance(job, ControlledJob) assert job.should_skip is False assert job.status == ReturnValue(1) # Check the status of the second job ('2'); because that job depends # on a job that failed, it should show that the job was skipped. job = r1.saved_jobs["2"] assert isinstance(job, EmptyJob) assert job.should_skip is True assert job.status == ReturnValue.force_fail # Check that no job was requeued. assert r1.requeued == {}
def test_collect_feedback_scheme(self): """Collect feedback construction. Scheme in which if a job predecessor "fails" then job is skipped In order to do that get_job and collect should have access to common data. Note that scheduler ensure that these functions are called sequentially. """ class SchedulerContext(object): def __init__(self): # Save in results tuples with first element being a bool # indicating success or failure and the second the job itself self.results = {} def get_job(self, uid, data, predecessors, notify_end): result = NopJob(uid, data, notify_end) # If any of the predecessor failed skip the job for k in predecessors: if not self.results[k][0]: result.should_skip = True return result def collect(self, job): if job.should_skip: # Skipped jobs are considered failed self.results[job.uid] = [False, job] else: # Job '2' is always failing if job.uid == "2": self.results[job.uid] = [False, job] else: self.results[job.uid] = [True, job] dag = DAG() dag.add_vertex("1") dag.add_vertex("2") dag.add_vertex("3", predecessors=["1", "2"]) dag.add_vertex("4", predecessors=["3"]) c = SchedulerContext() s = Scheduler(c.get_job, tokens=2, collect=c.collect) s.run(dag) assert ( not c.results["2"][1].should_skip and not c.results["2"][0] ), 'job "2" is run and should be marked as failed' assert c.results["3"][1].should_skip, 'job "3" should be skipped' assert c.results["4"][1].should_skip, 'job "4" should be skipped'
def test_simple_dag(): d = DAG() d.add_vertex('a') d.add_vertex('b') d.add_vertex('c') result = [] for vertex_id, data in d: result.append(vertex_id) result.sort() assert result == ['a', 'b', 'c'] assert d.check() is None
def test_simple_dag(): d = DAG() d.add_vertex("a") d.add_vertex("b") d.add_vertex("c") result = [] for vertex_id, _ in d: result.append(vertex_id) result.sort() assert result == ["a", "b", "c"] assert d.check() is None
def test_cycle_detection(): d = DAG() d.add_vertex('a') d.add_vertex('b') d.update_vertex('a', predecessors=['b']) with pytest.raises(DAGError): d.update_vertex('b', data='newb', predecessors=['a']) # Ensure that DAG is still valid and that previous # update_vertex has no effect result = [] for vertex_id, data in d: result.append(vertex_id) assert data is None assert result == ['b', 'a']
def test_minimal_run2(self): """Test with two interdependent jobs.""" dag = DAG() dag.add_vertex('1') dag.add_vertex('2', predecessors=['1']) s = Scheduler(Scheduler.simple_provider(NopJob), tokens=2) s.run(dag) assert s.max_active_jobs == 1
def test_skip(self): """Simple example in which all the tests are skipped.""" results = {} def get_job(uid, data, predecessors, notify_end): result = NopJob(uid, data, notify_end) result.should_skip = True return result def collect(job): results[job.uid] = job.timing_info # This time test with two interdependent jobs dag = DAG() dag.add_vertex('1') dag.add_vertex('2') s = Scheduler(get_job, tokens=2, collect=collect) s.run(dag) # Check start_time end_time to be sure tests have not been run for k, v in results.items(): assert v.start_time is None assert v.stop_time is None
def __init__(self, spec_repository, default_env=None): """Initialize a new context. :param spec_repository: an Anod repository :type spec_repository: e3.anod.AnodSpecRepository :param default_env: an env that should be considered as the default for the current context. Mainly useful to simulate another server context. If None then we assume that the context if the local server :type default_env: BaseEnv | None """ self.repo = spec_repository if default_env is None: self.default_env = BaseEnv() else: self.default_env = default_env.copy() self.tree = DAG() self.root = Root() self.add(self.root) self.cache = {} self.sources = {}
def test_iter_with_busy_state(): d = DAG() d.add_vertex("a") d.add_vertex("b", predecessors=["a"]) it = DAGIterator(d, enable_busy_state=True) for nid, _ in it: if nid is None: it.leave("a")
def test_ordering(self): """Test that jobs are ordered correctly.""" results = [] def collect(job): results.append(job.uid) dag = DAG() dag.add_vertex("3") dag.add_vertex("0") dag.add_vertex("1") s = Scheduler(Scheduler.simple_provider(NopJob), tokens=1, collect=collect) s.run(dag) assert tuple(results) == ("0", "1", "3")
def test_do_nothing_job(self, walk_class, setup_sbx): """Test DAG leading us to create a DoNothingJob object.""" actions = DAG() actions.add_vertex("1.do-nothing") actions.add_vertex("2", predecessors=["1.do-nothing"]) c = walk_class(actions) job = c.saved_jobs["1.do-nothing"] assert isinstance(job, DoNothingJob) assert job.should_skip is False assert job.status == ReturnValue.success job = c.saved_jobs["2"] assert isinstance(job, ControlledJob) assert job.should_skip is False assert job.status == ReturnValue.success assert c.job_status == { "1.do-nothing": ReturnValue.success, "2": ReturnValue.success, } assert c.requeued == {} # In the situation where we are using fingerprints, # verify the behavior when re-doing a walk with # the same DAG. if walk_class == FingerprintWalk: r2 = walk_class(actions) job = r2.saved_jobs["1.do-nothing"] assert isinstance(job, EmptyJob) assert job.should_skip is True assert job.status == ReturnValue.skip job = r2.saved_jobs["2"] assert isinstance(job, EmptyJob) assert job.should_skip is True assert job.status == ReturnValue.skip assert r2.job_status == { "1.do-nothing": ReturnValue.skip, "2": ReturnValue.skip, } assert r2.requeued == {}
def test_failed_predecessor(self, walk_class, setup_sbx): """Simulate the scenarior when a predecessor failed.""" actions = DAG() actions.add_vertex("1.bad") actions.add_vertex("2", predecessors=["1.bad"]) c = walk_class(actions) job = c.saved_jobs["1.bad"] assert isinstance(job, ControlledJob) assert job.should_skip is False assert job.status == ReturnValue(1) job = c.saved_jobs["2"] assert isinstance(job, EmptyJob) assert job.should_skip is True assert job.status == ReturnValue.force_fail assert c.job_status == { "1.bad": ReturnValue(1), "2": ReturnValue.force_fail } assert c.requeued == {} # In the situation where we are using fingerprints, # verify the behavior when re-doing a walk with # the same DAG. if walk_class == FingerprintWalk: r2 = walk_class(actions) job = r2.saved_jobs["1.bad"] assert isinstance(job, ControlledJob) assert job.should_skip is False assert job.status == ReturnValue(1) job = r2.saved_jobs["2"] assert isinstance(job, EmptyJob) assert job.should_skip is True assert job.status == ReturnValue.force_fail assert r2.job_status == { "1.bad": ReturnValue(1), "2": ReturnValue.force_fail, } assert r2.requeued == {}
class AnodContext(object): """Anod context. :ivar repo: an anod spec repository :ivar tree: a DAG containing the list of possible actions :ivar root: root node of the DAG :ivar cache: cache of anod instances :ivar sources: list of available sources in the current context :ivar default_env: default environment (used to override build='default') when simulating a list of action from another machine. """ def __init__(self, spec_repository, default_env=None): """Initialize a new context. :param spec_repository: an Anod repository :type spec_repository: e3.anod.AnodSpecRepository :param default_env: an env that should be considered as the default for the current context. Mainly useful to simulate another server context. If None then we assume that the context if the local server :type default_env: BaseEnv | None """ self.repo = spec_repository if default_env is None: self.default_env = BaseEnv() else: self.default_env = default_env.copy() self.tree = DAG() self.root = Root() self.add(self.root) self.cache = {} self.sources = {} def load(self, name, env, qualifier, kind): """Load a spec instance. :param name: spec name :type name: str :param env: environment to use for the spec instance :type env: BaseEnv :param qualifier: spec qualifier :type qualifier: str | None :param kind: primitive used for the loaded spec :type kind: str :return: a spec instance """ # Key used for the spec instance cache key = (name, env.build, env.host, env.target, qualifier, kind) if key not in self.cache: # Spec is not in cache so create a new instance self.cache[key] = self.repo.load(name)(qualifier=qualifier, env=env, kind=kind) # Update the list of available sources. ??? Should be done # once per spec (and not once per spec instance). Need some # spec cleanup to achieve that ??? if self.cache[key].source_pkg_build is not None: for s in self.cache[key].source_pkg_build: self.sources[s.name] = (name, s) return self.cache[key] def add(self, data, *args): """Add node to context tree. :param data: node data :type data: e3.anod.action.Action :param args: list of predecessors :type args: list[e3.anod.action.Action] """ preds = [k.uid for k in args] self.tree.update_vertex(data.uid, data, predecessors=preds, enable_checks=False) def add_decision(self, decision_class, root, left, right): """Add a decision node. This create the following subtree inside the dag:: root --> decision --> left |-> right :param decision_class: Decision subclass to use :type decision_class: T :param root: parent node of the decision node :type root: e3.anod.action.Action :param left: left decision (child of Decision node) :type left: e3.anod.action.Action :param right: right decision (child of Decision node) :type right: e3.anod.action.Action """ decision_action = decision_class(root, left, right) self.add(decision_action, left, right) self.connect(root, decision_action) def connect(self, action, *args): """Add predecessors to a node. :param action: parent node :type action: e3.anod.action.Action :param args: list of predecessors :type args: list[e3.anod.action.Action] """ preds = [k.uid for k in args] self.tree.update_vertex(action.uid, predecessors=preds, enable_checks=False) def __contains__(self, data): """Check if a given action is already in the internal DAG. :param data: an Action :type data: e3.anod.action.Action """ return data.uid in self.tree def __getitem__(self, key): """Retrieve action from the internal DAG based on its key. :param key: action uid :type key: str :return: an Action :rtype: e3.node.action.Action """ return self.tree[key] def predecessors(self, action): """Retrieve predecessors of a given action. :param key: the parent action :type key: Action :return: the predecessor list :rtype: list[Action] """ return [self[el] for el in self.tree.vertex_predecessors[action.uid]] def add_anod_action(self, name, env=None, primitive=None, qualifier=None, upload=True): """Add an Anod action to the context. :param name: spec name :type name: str :param env: spec environment :type env: BaseEnv | None :param primitive: spec primitive :type primitive: str :param qualifier: qualifier :type qualifier: str | None :param upload: if True consider uploading to the store :type upload: bool :return: the root added action :rtype: Action """ # First create the subtree for the spec result = self.add_spec(name, env, primitive, qualifier) # Resulting subtree should be connected to the root node self.connect(self.root, result) # Ensure decision is set in case of explicit build or install if primitive == 'build': build_action = None for el in self.predecessors(result): if isinstance(el, BuildOrInstall): el.set_decision(BuildOrInstall.BUILD) build_action = self[el.left] if build_action is None and isinstance(result, Build): build_action = result if build_action is not None: spec = build_action.data if spec.component is not None and upload: if spec.has_package: upload_bin = UploadBinaryComponent(spec) else: upload_bin = UploadSourceComponent(spec) self.add(upload_bin) self.connect(self.root, upload_bin) self.connect(upload_bin, build_action) elif primitive == 'install': for el in self.predecessors(result): if isinstance(el, BuildOrInstall): el.set_decision(BuildOrInstall.INSTALL) return result def add_spec(self, name, env=None, primitive=None, qualifier=None, expand_build=True, source_name=None): """Internal function. The function expand an anod action into a tree :param name: spec name :type name: str :param env: spec environment :type env: BaseEnv | None :param primitive: spec primitive :type primitive: str :param qualifier: qualifier :type qualifier: str | None :param expand_build: should build primitive be expanded :type expand_build: bool :param source_name: source name associated with the source primitive :type source_name: str | None """ # Initialize a spec instance spec = self.load(name, qualifier=qualifier, env=env, kind=primitive) # Initialize the resulting action based on the primitive name if primitive == 'source': result = CreateSource(spec, source_name) elif primitive == 'build': result = Build(spec) elif primitive == 'test': result = Test(spec) elif primitive == 'install': result = Install(spec) else: raise Exception(primitive) if not spec.has_package and primitive == 'install' and \ has_primitive(spec, 'build'): # Case in which we have an install dependency but no install # primitive. In that case the real dependency is a build tree # dependency. In case there is no build primitive and no # package keep the install primitive (usually this means there # is an overloaded download procedure). return self.add_spec(name, env, 'build', qualifier, expand_build=False) if expand_build and primitive == 'build' and \ spec.has_package: # A build primitive is required and the spec defined a binary # package. In that case the implicit post action of the build # will be a call to the install primitive return self.add_spec(name, env, 'install', qualifier) # Add this stage if the action is already in the DAG, then it has # already been added. if result in self: return result # Add the action in the DAG self.add(result) if primitive == 'install': # Expand an install node to # install --> decision --> build # \-> download binary download_action = DownloadBinary(spec) self.add(download_action) if has_primitive(spec, 'build'): build_action = self.add_spec(name, env, 'build', qualifier, expand_build=False) self.add_decision(BuildOrInstall, result, build_action, download_action) else: self.connect(result, download_action) # Look for dependencies if '%s_deps' % primitive in dir(spec): for e in getattr(spec, '%s_deps' % primitive): if isinstance(e, Dependency): if e.kind == 'source': # A source dependency does not create a new node but # ensure that sources associated with it are available self.load(e.name, kind='source', env=BaseEnv(), qualifier=None) continue child_action = self.add_spec(e.name, e.env(spec, self.default_env), e.kind, e.qualifier) if e.kind == 'build' and \ self[child_action.uid].data.kind == 'install': # We have a build tree dependency that produced a # subtree starting with an install node. In that case # we expect the user to choose BUILD as decision. dec = self.predecessors(child_action)[0] if isinstance(dec, BuildOrInstall): dec.add_trigger(result, BuildOrInstall.BUILD) # Connect child dependency self.connect(result, child_action) # Look for source dependencies (i.e sources needed) if '%s_source_list' % primitive in dir(spec): for s in getattr(spec, '%s_source_list' % primitive): # add source install node src_install_uid = result.uid.rsplit('.', 1)[0] + \ '.source_install.' + s.name src_install_action = InstallSource(src_install_uid, s) self.add(src_install_action) self.connect(result, src_install_action) # Then add nodes to create that source (download or creation # using anod source and checkouts) if s.name in self.sources: spec_decl, obj = self.sources[s.name] else: raise AnodError( origin='expand_spec', message='source %s does not exist ' '(referenced by %s)' % (s.name, result.uid)) src_get_action = GetSource(obj) if src_get_action in self: self.connect(src_install_action, src_get_action) continue self.add(src_get_action) self.connect(src_install_action, src_get_action) src_download_action = DownloadSource(obj) self.add(src_download_action) if isinstance(obj, UnmanagedSourceBuilder): # In that case only download is available self.connect(src_get_action, src_download_action) else: source_action = self.add_spec(spec_decl, BaseEnv(), 'source', None, source_name=s.name) for repo in obj.checkout: r = Checkout(repo) self.add(r) self.connect(source_action, r) self.add_decision(CreateSourceOrDownload, src_get_action, source_action, src_download_action) return result @classmethod def always_download_source_resolver(cls, action, decision): """Resolver for the schedule method. The resolver takes the following decision: * sources are always downloaded * any build that produces a package should be added explicitely :param action: action to consider :type action: Action :param decision: decisition to resolve :type decison: Decision :return: True if the action should be scheduled, False otherwise :rtype: False :raise SchedulingError: in case no decision can be taken """ if isinstance(action, CreateSource): return False elif isinstance(action, DownloadSource): return True else: if decision.choice is None: msg = 'a decision should be taken between %s and %s' % \ (decision.left, decision.right) if decision.expected_choice == Decision.LEFT: msg += '(first expected)' elif decision.expected_choice == Decision.RIGHT: msg += '(second expected)' elif decision.choice == Decision.BOTH: msg = 'cannot do both %s and %s' % \ (decision.left, decision.right) else: msg = 'cannot do %s as %s is expected after ' \ 'scheduling resolution' % \ (action.uid, decision.get_expected_decision()) raise SchedulingError(msg) def schedule(self, resolver): """Compute a DAG of scheduled actions. :param resolver: a function that helps the scheduler resolve cases for which a decision should be taken :type resolver: (Action, Decision) -> bool """ rev = self.tree.reverse_graph() uploads = [] dag = DAG() for uid, action in rev: if uid == 'root': # Root node is alway in the final DAG dag.add_vertex(uid, action) elif isinstance(action, Decision): # Decision node does not appears in the final DAG but we need # to apply the triggers based on the current list of scheduled # actions. action.apply_triggers(dag) elif isinstance(action, UploadComponent): uploads.append((action, self.tree.vertex_predecessors[uid])) else: # Compute the list of successors for the current node (i.e: # predecessors in the reversed graph). Ignore UploadComponent # nodes as they will be processed only once the scheduling # is done. preds = list([k for k in rev.vertex_predecessors[uid] if not isinstance(rev[k], UploadComponent)]) if len(preds) == 1 and isinstance(rev[preds[0]], Decision): decision = rev[preds[0]] # The current node addition is driven by a decision # First check that the parent of the decision is # scheduled. If not discard the item. if decision.initiator not in dag: continue # Now check decision made. If the decision cannot be made # delegate to the resolve function. choice = decision.get_decision() if choice == uid: dag.add_vertex(uid, action) dag.update_vertex(decision.initiator, predecessors=[uid], enable_checks=False) elif choice is None: # delegate to resolver try: if resolver(action, decision): dag.add_vertex(uid, action) dag.update_vertex(decision.initiator, predecessors=[uid], enable_checks=False) except SchedulingError as e: # In order to help the analysis of a scheduling # error compute the explicit initiators of that # action dag.add_vertex(uid, action) dag.update_vertex(decision.initiator, predecessors=[action.uid], enable_checks=False) rev_graph = dag.reverse_graph() # Initiators are explicit actions (connected to # 'root') that are in the closure of the failing # node. initiators = [ iuid for iuid in rev_graph.get_closure(uid) if 'root' in rev_graph.vertex_predecessors[iuid]] raise SchedulingError(e.message, uid=uid, initiators=initiators) else: # An action is scheduled only if one of its successors is # scheduled. successors = [k for k in preds if k in dag] if successors: dag.add_vertex(uid, action) for a in successors: dag.update_vertex(a, predecessors=[uid], enable_checks=False) # Handle UploadComponent nodes. Add the node only if all predecessors # are scheduled. for action, predecessors in uploads: if len([p for p in predecessors if p not in dag]) == 0: dag.update_vertex(action.uid, action, predecessors=predecessors, enable_checks=False) # connect upload to the root node dag.update_vertex('root', predecessors=[action.uid]) return dag
def schedule(self, resolver): """Compute a DAG of scheduled actions. :param resolver: a function that helps the scheduler resolve cases for which a decision should be taken :type resolver: (Action, Decision) -> bool """ rev = self.tree.reverse_graph() uploads = [] dag = DAG() for uid, action in rev: if uid == 'root': # Root node is alway in the final DAG dag.add_vertex(uid, action) elif isinstance(action, Decision): # Decision node does not appears in the final DAG but we need # to apply the triggers based on the current list of scheduled # actions. action.apply_triggers(dag) elif isinstance(action, UploadComponent): uploads.append((action, self.tree.vertex_predecessors[uid])) else: # Compute the list of successors for the current node (i.e: # predecessors in the reversed graph). Ignore UploadComponent # nodes as they will be processed only once the scheduling # is done. preds = list([k for k in rev.vertex_predecessors[uid] if not isinstance(rev[k], UploadComponent)]) if len(preds) == 1 and isinstance(rev[preds[0]], Decision): decision = rev[preds[0]] # The current node addition is driven by a decision # First check that the parent of the decision is # scheduled. If not discard the item. if decision.initiator not in dag: continue # Now check decision made. If the decision cannot be made # delegate to the resolve function. choice = decision.get_decision() if choice == uid: dag.add_vertex(uid, action) dag.update_vertex(decision.initiator, predecessors=[uid], enable_checks=False) elif choice is None: # delegate to resolver try: if resolver(action, decision): dag.add_vertex(uid, action) dag.update_vertex(decision.initiator, predecessors=[uid], enable_checks=False) except SchedulingError as e: # In order to help the analysis of a scheduling # error compute the explicit initiators of that # action dag.add_vertex(uid, action) dag.update_vertex(decision.initiator, predecessors=[action.uid], enable_checks=False) rev_graph = dag.reverse_graph() # Initiators are explicit actions (connected to # 'root') that are in the closure of the failing # node. initiators = [ iuid for iuid in rev_graph.get_closure(uid) if 'root' in rev_graph.vertex_predecessors[iuid]] raise SchedulingError(e.message, uid=uid, initiators=initiators) else: # An action is scheduled only if one of its successors is # scheduled. successors = [k for k in preds if k in dag] if successors: dag.add_vertex(uid, action) for a in successors: dag.update_vertex(a, predecessors=[uid], enable_checks=False) # Handle UploadComponent nodes. Add the node only if all predecessors # are scheduled. for action, predecessors in uploads: if len([p for p in predecessors if p not in dag]) == 0: dag.update_vertex(action.uid, action, predecessors=predecessors, enable_checks=False) # connect upload to the root node dag.update_vertex('root', predecessors=[action.uid]) return dag
def __init__(self, item_list, run_job, collect_result, parallelism=None, abort_file=None, dyn_poll_interval=True): """Launch loop. :param item_list: a list of jobs or a dag :param run_job: a function that takes a job for argument and return the spawned process (:class:`e3.os_process.Run` object). Its prototype should be ``func (name, job_info)`` with name the job identifier and job_info the related information, passed in a tuple (slot_number, job_retry). Note that if you want to take advantage of the parallelism the spawned process should be launched in background (ie with bg=True when using :class:`e3.os_process.Run`). If run_job returns SKIP_EXECUTION instead of a Run object the mainloop will directly call collect_result without waiting. :param collect_result: a function called when a job is finished. The prototype should be func (name, process, job_info). If collect_result raise NeedRequeue then the test will be requeued. job_info is a tuple: (slot_number, job_nb_retry) :param parallelism: number of workers :type parallelism: int | None :param abort_file: If specified, the loop will abort if the file is present :type abort_file: str | None :param dyn_poll_interval: If True the interval between each polling iteration is automatically updated. Otherwise it's set to 0.1 seconds. :type dyn_poll_interval: bool """ e = e3.env.Env() self.parallelism = e.get_attr("main_options.mainloop_jobs", default_value=1, forced_value=parallelism) self.abort_file = e.get_attr("main_options.mainloop_abort_file", default_value=None, forced_value=abort_file) if self.parallelism == 0: if e.build.cpu.cores != UNKNOWN: self.parallelism = e.build.cpu.cores else: self.parallelism = 1 e3.log.debug("start main loop with %d workers (abort on %s)", self.parallelism, self.abort_file) self.workers = [None] * self.parallelism self.locked_items = [None] * self.parallelism if not isinstance(item_list, DAG): self.item_list = DAG(item_list) else: self.item_list = item_list self.iterator = self.item_list.__iter__() self.collect_result = collect_result active_workers = 0 max_active_workers = self.parallelism poll_sleep = 0.1 no_free_item = False try: while True: # Check for abortion if self.abort_file is not None and \ os.path.isfile(self.abort_file): logger.info('Aborting: file %s has been found', self.abort_file) self.abort() return # Exit the loop # Find free workers for slot, worker in enumerate(self.workers): if worker is None: # a worker slot is free so use it for next job next_id, next_job = self.iterator.next() if next_job is None: no_free_item = True break else: self.locked_items[slot] = next_id self.workers[slot] = Worker(next_job, run_job, collect_result, slot) active_workers += 1 poll_counter = 0 e3.log.debug('Wait for free worker') while active_workers >= max_active_workers or no_free_item: # All worker are occupied so wait for one to finish poll_counter += 1 for slot, worker in enumerate(self.workers): if worker is None: continue # Test if the worker is still active and have more # job pending if not (worker.poll() or worker.execute_next()): # If not the case free the worker slot active_workers -= 1 self.workers[slot] = None self.item_list.release(self.locked_items[slot]) no_free_item = False self.locked_items[slot] = None sleep(poll_sleep) if dyn_poll_interval: poll_sleep = compute_next_dyn_poll(poll_counter, poll_sleep) except (StopIteration, KeyboardInterrupt) as e: if e.__class__ == KeyboardInterrupt: # Got ^C, abort the mainloop logger.error("User interrupt") # All the jobs are finished while active_workers > 0: for slot, worker in enumerate(self.workers): if worker is None: continue # Test if the worker is still active and ignore any # job pending try: still_running = worker.poll() except TooManyErrors: still_running = False # We're not spawing more jobs so we can safely # ignore all TooManyErrors exceptions. if not still_running: active_workers -= 1 self.workers[slot] = None sleep(0.1) if e.__class__ == KeyboardInterrupt: self.abort() raise except TooManyErrors: # too many failures, abort the execution logger.error("Too many errors, aborting") self.abort()
def schedule(self, resolver): """Compute a DAG of scheduled actions. :param resolver: a function that helps the scheduler resolve cases for which a decision should be taken :type resolver: (Action, Decision) -> bool """ rev = self.tree.reverse_graph() uploads = [] dag = DAG() # Retrieve existing tags dag.tags = self.tree.tags # Note that schedule perform a pruning on the DAG, thus no cycle can # be introduced. That's why checks are disabled when creating the # result graph. for uid, action in rev: if uid == "root": # Root node is always in the final DAG dag.update_vertex(uid, action, enable_checks=False) elif isinstance(action, Decision): # Decision node does not appears in the final DAG but we need # to apply the triggers based on the current list of scheduled # actions. action.apply_triggers(dag) elif isinstance(action, Upload): uploads.append((action, self.tree.get_predecessors(uid))) else: # Compute the list of successors for the current node (i.e: # predecessors in the reversed graph). Ignore Upload # nodes as they will be processed only once the scheduling # is done. preds = list([ k for k in rev.get_predecessors(uid) if not isinstance(rev[k], Upload) ]) if len(preds) == 1 and isinstance(rev[preds[0]], Decision): decision = rev[preds[0]] # The current node addition is driven by a decision # First check that the parent of the decision is # scheduled. If not discard the item. if decision.initiator not in dag: continue # Now check decision made. If the decision cannot be made # delegate to the resolve function. choice = decision.get_decision() if choice == uid: dag.update_vertex(uid, action, enable_checks=False) dag.update_vertex(decision.initiator, predecessors=[uid], enable_checks=False) elif choice is None: # delegate to resolver try: if resolver(action, decision): dag.update_vertex(uid, action, enable_checks=False) dag.update_vertex( decision.initiator, predecessors=[uid], enable_checks=False, ) except SchedulingError as e: # In order to help the analysis of a scheduling # error compute the explicit initiators of that # action dag.update_vertex(uid, action, enable_checks=False) dag.update_vertex( decision.initiator, predecessors=[action.uid], enable_checks=False, ) rev_graph = dag.reverse_graph() # Initiators are explicit actions (connected to # 'root') that are in the closure of the failing # node. initiators = [ iuid for iuid in rev_graph.get_closure(uid) if "root" in rev_graph.get_predecessors(iuid) ] raise SchedulingError(e.messages, uid=uid, initiators=initiators) else: # An action is scheduled only if one of its successors is # scheduled. successors = [k for k in preds if k in dag] if successors: dag.update_vertex(uid, action, enable_checks=False) for a in successors: dag.update_vertex(a, predecessors=[uid], enable_checks=False) # Handle Upload nodes. Add the node only if all predecessors # are scheduled. for action, predecessors in uploads: if len([p for p in predecessors if p not in dag]) == 0: dag.update_vertex(action.uid, action, predecessors=predecessors, enable_checks=False) # connect upload to the root node dag.update_vertex("root", predecessors=[action.uid], enable_checks=False) return dag
class AnodContext(object): """Anod context. :ivar repo: an anod spec repository :vartype repo: e3.anod.loader.AnodSpecRepository :ivar tree: a DAG containing the list of possible actions :ivar root: root node of the DAG :ivar cache: cache of anod instances, indexed by the spec's name. :vartype cache: dict[e3.anod.spec.Anod] :ivar sources: list of available sources in the current context, indexed by the source's name. :vartype sources: list[e3.anod.package.SourceBuilder] :ivar default_env: default environment (used to override build='default') when simulating a list of action from another machine. :ivar plan: maintain a link between a plan line and the generated actions which is useful for setting parameters such as weather or process that are conveyed by the plan and not by the specs """ def __init__(self, spec_repository, default_env=None, reject_duplicates=False): """Initialize a new context. :param spec_repository: an Anod repository :type spec_repository: e3.anod.loader.AnodSpecRepository :param default_env: an env that should be considered as the default for the current context. Mainly useful to simulate another server context. If None then we assume that the context if the local server :type default_env: BaseEnv | None :param reject_duplicates: if True, raise SchedulingError when two duplicated action are generated :type reject_duplicates: bool """ self.repo = spec_repository if default_env is None: self.default_env = BaseEnv() else: self.default_env = default_env.copy() self.reject_duplicates = reject_duplicates self.tree = DAG() self.root = Root() self.dependencies = {} self.add(self.root) self.cache = {} self.sources = {} def load(self, name, env, qualifier, kind, sandbox=None, source_name=None): """Load a spec instance. :param name: spec name :type name: str :param env: environment to use for the spec instance :type env: BaseEnv | None :param qualifier: spec qualifier :type qualifier: str | None :param kind: primitive used for the loaded spec :type kind: str :param sandbox: is not None bind the anod instances to a sandbox :type sandbox: None | Sandbox :param source_name: when the primitive is "source" we create a specific instance for each source package we have to create. :type source_name: str | None :return: a spec instance :rtype: e3.anod.spec.Anod """ if env is None: env = self.default_env # Key used for the spec instance cache key = (name, env.build, env.host, env.target, qualifier, kind, source_name) if key not in self.cache: # Spec is not in cache so create a new instance self.cache[key] = self.repo.load(name)(qualifier=qualifier, env=env, kind=kind) if sandbox is not None: self.cache[key].bind_to_sandbox(sandbox) # Update tracking of dependencies self.dependencies[self.cache[key].uid] = {} # Update the list of available sources. ??? Should be done # once per spec (and not once per spec instance). Need some # spec cleanup to achieve that ??? if self.cache[key].source_pkg_build is not None: for s in self.cache[key].source_pkg_build: self.sources[s.name] = (name, s) return self.cache[key] def add(self, data, *args): """Add node to context tree. :param data: node data :type data: e3.anod.action.Action :param args: list of predecessors :type args: e3.anod.action.Action """ preds = [k.uid for k in args] self.tree.update_vertex(data.uid, data, predecessors=preds, enable_checks=False) def add_decision(self, decision_class, root, left, right): """Add a decision node. This create the following subtree inside the dag:: root --> decision --> left |-> right :param decision_class: Decision subclass to use :type decision_class: () -> Decision :param root: parent node of the decision node :type root: e3.anod.action.Action :param left: left decision (child of Decision node) :type left: e3.anod.action.Action :param right: right decision (child of Decision node) :type right: e3.anod.action.Action """ decision_action = decision_class(root, left, right) self.add(decision_action, left, right) self.connect(root, decision_action) def connect(self, action, *args): """Add predecessors to a node. :param action: parent node :type action: e3.anod.action.Action :param args: list of predecessors :type args: list[e3.anod.action.Action] """ preds = [k.uid for k in args] self.tree.update_vertex(action.uid, predecessors=preds, enable_checks=False) def __contains__(self, data): """Check if a given action is already in the internal DAG. :param data: an Action :type data: e3.anod.action.Action """ return data.uid in self.tree def __getitem__(self, key): """Retrieve action from the internal DAG based on its key. :param key: action uid :type key: str :return: an Action :rtype: e3.node.action.Action """ return self.tree[key] def predecessors(self, action): """Retrieve predecessors of a given action. :param action: the parent action :type action: e3.anod.action.Action :return: the predecessor list :rtype: list[e3.anod.action.Action] """ return [self[el] for el in self.tree.get_predecessors(action.uid)] def link_to_plan(self, vertex_id, plan_line, plan_args): """Tag the vertex with plan info. :param vertex_id: ID of the vertex :type vertex_id: str :param plan_line: corresponding line:linenumber in the plan :type plan_line: str :param plan_args: action args after plan execution, taking into account plan context (such as with defaults(XXX):) :type plan_args: dict """ if self.reject_duplicates: previous_tag = self.tree.get_tag(vertex_id=vertex_id) if previous_tag and previous_tag["plan_line"] != plan_line: raise SchedulingError( "entries {} and {} conflict because they result in " "the same build space (id: {}). Check your " "build_space_name property or your qualifiers".format( previous_tag["plan_line"], plan_line, vertex_id)) self.tree.add_tag(vertex_id, { "plan_line": plan_line, "plan_args": plan_args }) def add_anod_action( self, name, env=None, primitive=None, qualifier=None, source_packages=None, upload=True, plan_line=None, plan_args=None, sandbox=None, ): """Add an Anod action to the context. :param name: spec name :type name: str :param env: spec environment :type env: BaseEnv | None :param primitive: spec primitive :type primitive: str :param qualifier: qualifier :type qualifier: str | None :param source_packages: if not empty only create the specified list of source packages and not all source packages defined in the anod specification file :type source_packages: list[str] | None :param upload: if True consider uploading to the store :type upload: bool :param plan_line: corresponding line:linenumber in the plan :type plan_line: str :param plan_args: action args after plan execution, taking into account plan context (such as with defaults(XXX):) :type plan_args: dict :return: the root added action :rtype: Action """ # First create the subtree for the spec result = self.add_spec( name, env, primitive, qualifier, source_packages=source_packages, plan_line=plan_line, plan_args=plan_args, sandbox=sandbox, upload=upload, ) # Resulting subtree should be connected to the root node self.connect(self.root, result) # Ensure decision is set in case of explicit build or install if primitive == "build": build_action = None for el in self.predecessors(result): if isinstance(el, BuildOrDownload): el.set_decision(BuildOrDownload.BUILD, plan_line) build_action = self[el.left] if build_action is None and isinstance(result, Build): build_action = result # Create upload nodes if build_action is not None: spec = build_action.data if spec.component is not None and upload: if spec.has_package: upload_bin = UploadBinaryComponent(spec) else: upload_bin = UploadSourceComponent(spec) self.add(upload_bin) # ??? is it needed? if plan_line is not None and plan_args is not None: self.link_to_plan( vertex_id=upload_bin.uid, plan_line=plan_line, plan_args=plan_args, ) self.connect(self.root, upload_bin) self.connect(upload_bin, build_action) elif primitive == "install": for el in self.predecessors(result): if isinstance(el, BuildOrDownload): el.set_decision(BuildOrDownload.INSTALL, plan_line) return result def add_spec( self, name, env=None, primitive=None, qualifier=None, source_packages=None, expand_build=True, source_name=None, plan_line=None, plan_args=None, sandbox=None, upload=False, ): """Expand an anod action into a tree (internal). :param name: spec name :type name: str :param env: spec environment :type env: BaseEnv | None :param primitive: spec primitive :type primitive: str :param qualifier: qualifier :type qualifier: str | None :param source_packages: if not empty only create the specified list of source packages and not all source packages defined in the anod specification file :type source_packages: list[str] | None :param expand_build: should build primitive be expanded :type expand_build: bool :param source_name: source name associated with the source primitive :type source_name: str | None :param plan_line: corresponding line:linenumber in the plan :type plan_line: str :param plan_args: action args after plan execution, taking into account plan context (such as with defaults(XXX):) :type plan_args: dict :param sandbox: if not None, anod instance are automatically bind to the given sandbox :type sandbox: None | Sandbox :param upload: if True consider uploads to the store (sources and binaries) :type upload: bool """ def add_action(data, connect_with=None): self.add(data) if connect_with is not None: self.connect(connect_with, data) def add_dep(spec_instance, dep, dep_instance): """Add a new dependency in an Anod instance dependencies dict. :param spec_instance: an Anod instance :type spec_instance: Anod :param dep: the dependency we want to add :type dep: Dependency :param dep_instance: the Anod instance loaded for that dependency :type dep_instance: Anod """ if dep.local_name in spec_instance.deps: raise AnodError( origin="expand_spec", message="The spec {} has two dependencies with the same " "local_name attribute ({})".format(spec_instance.name, dep.local_name), ) spec_instance.deps[dep.local_name] = dep_instance # Initialize a spec instance e3.log.debug("add spec: name:{}, qualifier:{}, primitive:{}".format( name, qualifier, primitive)) spec = self.load( name, qualifier=qualifier, env=env, kind=primitive, sandbox=sandbox, source_name=source_name, ) # Initialize the resulting action based on the primitive name if primitive == "source": if source_name is not None: result = CreateSource(spec, source_name) else: # Create the root node result = CreateSources(spec) # A consequence of calling add_action here # will result in skipping dependencies parsing. add_action(result) # Then one node for each source package for sb in spec.source_pkg_build: if source_packages and sb.name not in source_packages: # This source package is defined in the spec but # explicitly excluded in the plan continue if isinstance(sb, UnmanagedSourceBuilder): # do not create source package for unmanaged source continue sub_result = self.add_spec( name=name, env=env, primitive="source", source_name=sb.name, plan_line=plan_line, plan_args=plan_args, sandbox=sandbox, upload=upload, ) self.connect(result, sub_result) elif primitive == "build": result = Build(spec) elif primitive == "test": result = Test(spec) elif primitive == "install": result = Install(spec) else: # defensive code raise ValueError("add_spec error: %s is not known" % primitive) # If this action is directly linked with a plan line make sure # to register the link between the action and the plan even # if the action has already been added via another dependency if plan_line is not None and plan_args is not None: self.link_to_plan(vertex_id=result.uid, plan_line=plan_line, plan_args=plan_args) if (primitive == "install" and not spec.has_package and has_primitive(spec, "build")): if plan_line is not None and plan_args is not None: # We have an explicit call to install() in the plan but the # spec has no binary package to download. raise SchedulingError( "error in plan at {}: " "install should be replaced by build".format(plan_line)) # Case in which we have an install dependency but no install # primitive. In that case the real dependency is a build tree # dependency. In case there is no build primitive and no # package keep the install primitive (usually this means there # is an overloaded download procedure). return self.add_spec( name, env, "build", qualifier, expand_build=False, plan_args=plan_args, plan_line=plan_line, sandbox=sandbox, upload=upload, ) if expand_build and primitive == "build" and spec.has_package: # A build primitive is required and the spec defined a binary # package. In that case the implicit post action of the build # will be a call to the install primitive return self.add_spec( name, env, "install", qualifier, plan_args=None, plan_line=plan_line, sandbox=sandbox, upload=upload, ) # Add this stage if the action is already in the DAG, then it has # already been added. if result in self: return result if not has_primitive(spec, primitive): raise SchedulingError("spec %s does not support primitive %s" % (name, primitive)) # Add the action in the DAG add_action(result) if primitive == "install": # Expand an install node to # install --> decision --> build # \-> download binary download_action = DownloadBinary(spec) add_action(download_action) if has_primitive(spec, "build"): build_action = self.add_spec( name=name, env=env, primitive="build", qualifier=qualifier, expand_build=False, plan_args=None, plan_line=plan_line, sandbox=sandbox, upload=upload, ) self.add_decision(BuildOrDownload, result, build_action, download_action) else: self.connect(result, download_action) elif primitive == "source": if source_name is not None: # Also add an UploadSource action if upload: upload_src = UploadSource(spec, source_name) self.add(upload_src) # Link the upload to the current context if plan_line is not None and plan_args is not None: self.link_to_plan( vertex_id=upload_src.uid, plan_line=plan_line, plan_args=plan_args, ) self.connect(self.root, upload_src) self.connect(upload_src, result) for sb in spec.source_pkg_build: if sb.name == source_name: for checkout in sb.checkout: if checkout not in self.repo.repos: raise SchedulingError( origin="add_spec", message="unknown repository {}".format( checkout), ) co = Checkout(checkout, self.repo.repos.get(checkout)) add_action(co, result) # Look for dependencies spec_dependencies = [] if ("%s_deps" % primitive in dir(spec) and getattr(spec, "%s_deps" % primitive) is not None): spec_dependencies += getattr(spec, "%s_deps" % primitive) for e in spec_dependencies: if isinstance(e, Dependency): if e.kind == "source": # A source dependency does not create a new node but # ensure that sources associated with it are available child_instance = self.load( e.name, kind="source", env=self.default_env, qualifier=None, sandbox=sandbox, ) add_dep(spec_instance=spec, dep=e, dep_instance=child_instance) self.dependencies[spec.uid][e.local_name] = ( e, spec.deps[e.local_name], ) continue child_action = self.add_spec( name=e.name, env=e.env(spec, self.default_env), primitive=e.kind, qualifier=e.qualifier, plan_args=None, plan_line=plan_line, sandbox=sandbox, upload=upload, ) add_dep(spec_instance=spec, dep=e, dep_instance=child_action.anod_instance) self.dependencies[spec.uid][e.local_name] = ( e, spec.deps[e.local_name]) if e.kind == "build" and self[ child_action.uid].data.kind == "install": # We have a build tree dependency that produced a # subtree starting with an install node. In that case # we expect the user to choose BUILD as decision. dec = self.predecessors(child_action)[0] if isinstance(dec, BuildOrDownload): dec.add_trigger( result, BuildOrDownload.BUILD, plan_line if plan_line is not None else "unknown line", ) # Connect child dependency self.connect(result, child_action) # Look for source dependencies (i.e sources needed) if "%s_source_list" % primitive in dir(spec): source_list = getattr(spec, "{}_source_list".format(primitive)) for s in source_list: # set source builder if s.name in self.sources: s.set_builder(self.sources[s.name]) # set other sources to compute source ignore s.set_other_sources(source_list) # add source install node src_install_uid = (result.uid.rsplit(".", 1)[0] + ".source_install." + s.name) src_install_action = InstallSource(src_install_uid, spec, s) add_action(src_install_action, connect_with=result) # Then add nodes to create that source (download or creation # using anod source and checkouts) if s.name in self.sources: spec_decl, obj = self.sources[s.name] else: raise AnodError( origin="expand_spec", message="source %s does not exist " "(referenced by %s)" % (s.name, result.uid), ) src_get_action = GetSource(obj) if src_get_action in self: self.connect(src_install_action, src_get_action) continue add_action(src_get_action, connect_with=src_install_action) src_download_action = DownloadSource(obj) add_action(src_download_action) if isinstance(obj, UnmanagedSourceBuilder): # In that case only download is available self.connect(src_get_action, src_download_action) else: source_action = self.add_spec( name=spec_decl, env=self.default_env, primitive="source", plan_args=None, plan_line=plan_line, source_name=s.name, sandbox=sandbox, upload=upload, ) for repo in obj.checkout: r = Checkout(repo, self.repo.repos.get(repo)) add_action(r, connect_with=source_action) self.add_decision( CreateSourceOrDownload, src_get_action, source_action, src_download_action, ) return result @classmethod def decision_error(cls, action, decision): """Raise SchedulingError. :param action: action to consider :type action: Action :param decision: decision to resolve :type decision: Decision :raise SchedulingError """ if decision.choice is None and decision.expected_choice in ( Decision.LEFT, Decision.RIGHT, ): if decision.expected_choice == BuildOrDownload.BUILD: msg = ("A spec in the plan has a build_tree dependency" " on {spec}. Either explicitly add the line {plan_line}" " or change the dependency to set" ' require="installation" if possible'.format( spec=action.data.name, plan_line=decision.suggest_plan_fix( decision.expected_choice), )) else: msg = "This plan resolver requires an explicit {}".format( decision.suggest_plan_fix(decision.expected_choice)) elif decision.choice is None and decision.expected_choice is None: left_decision = decision.suggest_plan_fix(Decision.LEFT) right_decision = decision.suggest_plan_fix(Decision.RIGHT) msg = ("This plan resolver cannot decide whether what to do for" " resolving {}.".format(decision.initiator)) if left_decision is not None and right_decision is not None: msg += " Please either add {} or {} in the plan".format( left_decision, right_decision) elif decision.choice == Decision.BOTH: msg = "cannot do both %s and %s" % (decision.left, decision.right) else: trigger_decisions = "\n".join( "{} made by {} initiated by {}".format( decision.left if trigger_decision == Decision.LEFT else decision.right, trigger_action, trigger_plan_line, ) for ( trigger_action, trigger_decision, trigger_plan_line, ) in decision.triggers) msg = ("explicit {} decision made by {} conflicts with the " "following decision{}:\n{}".format( decision.description(decision.get_expected_decision()), decision.decision_maker, "s" if len(decision.triggers) > 1 else "", trigger_decisions, )) raise SchedulingError(msg) @classmethod def always_download_source_resolver(cls, action, decision): """Force source download when scheduling a plan. The resolver takes the following decision: * sources are always downloaded * any build that produces a package should be added explicitly :param action: action to consider :type action: Action :param decision: decision to resolve :type decision: Decision :return: True if the action should be scheduled, False otherwise :rtype: False :raise SchedulingError: in case no decision can be taken """ if isinstance(action, CreateSource): return False elif isinstance(action, DownloadSource): return True else: return cls.decision_error(action, decision) @classmethod def always_create_source_resolver(cls, action, decision): """Force source creation when scheduling a plan.""" if isinstance(action, CreateSource): return True elif isinstance(action, DownloadSource): return False else: return cls.decision_error(action, decision) def schedule(self, resolver): """Compute a DAG of scheduled actions. :param resolver: a function that helps the scheduler resolve cases for which a decision should be taken :type resolver: (Action, Decision) -> bool """ rev = self.tree.reverse_graph() uploads = [] dag = DAG() # Retrieve existing tags dag.tags = self.tree.tags # Note that schedule perform a pruning on the DAG, thus no cycle can # be introduced. That's why checks are disabled when creating the # result graph. for uid, action in rev: if uid == "root": # Root node is always in the final DAG dag.update_vertex(uid, action, enable_checks=False) elif isinstance(action, Decision): # Decision node does not appears in the final DAG but we need # to apply the triggers based on the current list of scheduled # actions. action.apply_triggers(dag) elif isinstance(action, Upload): uploads.append((action, self.tree.get_predecessors(uid))) else: # Compute the list of successors for the current node (i.e: # predecessors in the reversed graph). Ignore Upload # nodes as they will be processed only once the scheduling # is done. preds = list([ k for k in rev.get_predecessors(uid) if not isinstance(rev[k], Upload) ]) if len(preds) == 1 and isinstance(rev[preds[0]], Decision): decision = rev[preds[0]] # The current node addition is driven by a decision # First check that the parent of the decision is # scheduled. If not discard the item. if decision.initiator not in dag: continue # Now check decision made. If the decision cannot be made # delegate to the resolve function. choice = decision.get_decision() if choice == uid: dag.update_vertex(uid, action, enable_checks=False) dag.update_vertex(decision.initiator, predecessors=[uid], enable_checks=False) elif choice is None: # delegate to resolver try: if resolver(action, decision): dag.update_vertex(uid, action, enable_checks=False) dag.update_vertex( decision.initiator, predecessors=[uid], enable_checks=False, ) except SchedulingError as e: # In order to help the analysis of a scheduling # error compute the explicit initiators of that # action dag.update_vertex(uid, action, enable_checks=False) dag.update_vertex( decision.initiator, predecessors=[action.uid], enable_checks=False, ) rev_graph = dag.reverse_graph() # Initiators are explicit actions (connected to # 'root') that are in the closure of the failing # node. initiators = [ iuid for iuid in rev_graph.get_closure(uid) if "root" in rev_graph.get_predecessors(iuid) ] raise SchedulingError(e.messages, uid=uid, initiators=initiators) else: # An action is scheduled only if one of its successors is # scheduled. successors = [k for k in preds if k in dag] if successors: dag.update_vertex(uid, action, enable_checks=False) for a in successors: dag.update_vertex(a, predecessors=[uid], enable_checks=False) # Handle Upload nodes. Add the node only if all predecessors # are scheduled. for action, predecessors in uploads: if len([p for p in predecessors if p not in dag]) == 0: dag.update_vertex(action.uid, action, predecessors=predecessors, enable_checks=False) # connect upload to the root node dag.update_vertex("root", predecessors=[action.uid], enable_checks=False) return dag
def testsuite_main(self, args: Optional[List[str]] = None) -> int: """Main for the main testsuite script. :param args: Command line arguments. If None, use `sys.argv`. :return: The testsuite status code (0 for success, a positive for failure). """ self.main = Main(platform_args=True) # Add common options parser = self.main.argument_parser temp_group = parser.add_argument_group( title="temporaries handling arguments") temp_group.add_argument("-t", "--temp-dir", metavar="DIR", default=Env().tmp_dir) temp_group.add_argument( "--no-random-temp-subdir", dest="random_temp_subdir", action="store_false", help="Disable the creation of a random subdirectory in the" " temporary directory. Use this when you know that you have" " exclusive access to the temporary directory (needed in order to" " avoid name clashes there) to get a deterministic path for" " testsuite temporaries.") temp_group.add_argument( "-d", "--dev-temp", metavar="DIR", nargs="?", default=None, const="tmp", help="Convenience shortcut for dev setups: forces `-t DIR" " --no-random-temp-subdir --cleanup-mode=none` and cleans up `DIR`" ' first. If no directory is provided, use the local "tmp"' " directory.") cleanup_mode_map = enum_to_cmdline_args_map(CleanupMode) temp_group.add_argument( "--cleanup-mode", choices=list(cleanup_mode_map), help="Control the cleanup of working spaces.\n" + "\n".join(f"{name}: {CleanupMode.descriptions()[value]}" for name, value in cleanup_mode_map.items())) temp_group.add_argument( "--disable-cleanup", action="store_true", help="Disable cleanup of working spaces. This option is deprecated" " and will disappear in a future version of e3-testsuite. Please" " use --cleanup-mode instead.") output_group = parser.add_argument_group( title="results output arguments") output_group.add_argument( "-o", "--output-dir", metavar="DIR", default="./out", help="Select the output directory, where test results are to be" " stored (default: './out'). If --old-output-dir=DIR2 is passed," " the new results are stored in DIR while DIR2 contains results" " from a previous run. Otherwise, the new results are stored in" " DIR/new/ while the old ones are stored in DIR/old. In both" " cases, the testsuite cleans the directory for new results" " first.", ) output_group.add_argument( "--old-output-dir", metavar="DIR", help="Select the old output directory, for baseline comparison." " See --output-dir.", ) output_group.add_argument( "--rotate-output-dirs", default=False, action="store_true", help="Rotate testsuite results: move the new results directory to" " the old results one before running testcases (this removes the" " old results directory first). If not passed, we just remove the" " new results directory before running testcases (i.e. just ignore" " the old results directory).", ) output_group.add_argument( "--show-error-output", "-E", action="store_true", help="When testcases fail, display their output. This is for" " convenience for interactive use.", ) output_group.add_argument( "--show-time-info", action="store_true", help="Display time information for test results, if available", ) output_group.add_argument( "--xunit-output", dest="xunit_output", metavar="FILE", help="Output testsuite report to the given file in the standard" " XUnit XML format. This is useful to display results in" " continuous build systems such as Jenkins.", ) output_group.add_argument( "--gaia-output", action="store_true", help="Output a GAIA-compatible testsuite report next to the YAML" " report.", ) output_group.add_argument( "--status-update-interval", default=1.0, type=float, help="Minimum number of seconds between status file updates. The" " more often we update this file, the more often one will read" " garbage.") auto_gen_default = ("enabled" if self.auto_generate_text_report else "disabled") output_group.add_argument( "--generate-text-report", action="store_true", dest="generate_text_report", default=self.auto_generate_text_report, help=( f"When the testsuite completes, generate a 'report' text file" f" in the output directory ({auto_gen_default} by default)."), ) output_group.add_argument( "--no-generate-text-report", action="store_false", dest="generate_text_report", help="Disable the generation of a 'report' text file (see" "--generate-text-report).", ) output_group.add_argument( "--truncate-logs", "-T", metavar="N", type=int, default=200, help="When outputs (for instance subprocess outputs) exceed 2*N" " lines, only include the first and last N lines in logs. This is" " necessary when storage for testsuite results have size limits," " and the useful information is generally either at the beginning" " or the end of such outputs. If 0, never truncate logs.", ) output_group.add_argument( "--dump-environ", dest="dump_environ", action="store_true", default=False, help="Dump all environment variables in a file named environ.sh," " located in the output directory (see --output-dir). This" " file can then be sourced from a Bourne shell to recreate" " the environement that existed when this testsuite was run" " to produce a given testsuite report.", ) exec_group = parser.add_argument_group( title="execution control arguments") exec_group.add_argument( "--max-consecutive-failures", "-M", metavar="N", type=int, default=self.default_max_consecutive_failures, help="Number of test failures (FAIL or ERROR) that trigger the" " abortion of the testuite. If zero, this behavior is disabled. In" " some cases, aborting the testsuite when there are just too many" " failures saves time and costs: the software to test/environment" " is too broken, there is no point to continue running the" " testsuite.", ) exec_group.add_argument( "-j", "--jobs", dest="jobs", type=int, metavar="N", default=Env().build.cpu.cores, help="Specify the number of jobs to run simultaneously", ) exec_group.add_argument( "--failure-exit-code", metavar="N", type=int, default=self.default_failure_exit_code, help="Exit code the testsuite must use when at least one test" " result shows a failure/error. By default, this is" f" {self.default_failure_exit_code}. This option is useful when" " running a testsuite in a continuous integration setup, as this" " can make the testing process stop when there is a regression.", ) exec_group.add_argument( "--force-multiprocessing", action="store_true", help="Force the use of subprocesses to execute tests, for" " debugging purposes. This is normally automatically enabled when" " both the level of requested parallelism is high enough (to make" " it profitable regarding the contention of Python's GIL) and no" " test fragment has dependencies on other fragments. This flag" " forces the use of multiprocessing even if any of these two" " conditions is false.") parser.add_argument("sublist", metavar="tests", nargs="*", default=[], help="test") # Add user defined options self.add_options(parser) # Parse options self.main.parse_args(args) assert self.main.args is not None # If there is a chance for the logging to end up in a non-tty stream, # disable colors. If not, be user-friendly and automatically show error # outputs. if (self.main.args.log_file or not isatty(sys.stdout) or not isatty(sys.stderr)): enable_colors = False else: # interactive-only enable_colors = True self.main.args.show_error_output = True self.colors = ColorConfig(enable_colors) self.Fore = self.colors.Fore self.Style = self.colors.Style self.env = Env() self.env.enable_colors = enable_colors self.env.root_dir = self.root_dir self.env.test_dir = self.test_dir # Setup output directories and create an index for the results we are # going to produce. self.output_dir: str self.old_output_dir: Optional[str] self.setup_result_dirs() self.report_index = ReportIndex(self.output_dir) # Set the cleanup mode from command-line arguments if self.main.args.cleanup_mode is not None: self.env.cleanup_mode = ( cleanup_mode_map[self.main.args.cleanup_mode]) elif self.main.args.disable_cleanup: logger.warning( "--disable-cleanup is deprecated and will disappear in a" " future version of e3-testsuite. Please use --cleanup-mode" " instead.") self.env.cleanup_mode = CleanupMode.NONE else: self.env.cleanup_mode = CleanupMode.default() # Settings for temporary directory creation temp_dir: str = self.main.args.temp_dir random_temp_subdir: bool = self.main.args.random_temp_subdir # The "--dev-temp" option forces several settings if self.main.args.dev_temp: self.env.cleanup_mode = CleanupMode.NONE temp_dir = self.main.args.dev_temp random_temp_subdir = False # Now actually setup the temporary directory: make sure we start from a # clean directory if we use a deterministic directory. # # Note that we do make sure that working_dir is an absolute path, as we # are likely to be changing directories when running each test. A # relative path would no longer work under those circumstances. temp_dir = os.path.abspath(temp_dir) if not random_temp_subdir: self.working_dir = temp_dir rm(self.working_dir, recursive=True) mkdir(self.working_dir) elif not os.path.isdir(temp_dir): # If the temp dir is supposed to be randomized, we need to create a # subdirectory, so check that the parent directory exists first. logger.critical("temp dir '%s' does not exist", temp_dir) return 1 else: self.working_dir = tempfile.mkdtemp("", "tmp", temp_dir) # Create the exchange directory (to exchange data between the testsuite # main and the subprocesses running test fragments). Compute the name # of the file to pass environment data to subprocesses. self.exchange_dir = os.path.join(self.working_dir, "exchange") self.env_filename = os.path.join(self.exchange_dir, "_env.bin") mkdir(self.exchange_dir) # Make them both available to test fragments self.env.exchange_dir = self.exchange_dir self.env.env_filename = self.env_filename self.gaia_result_files: Dict[str, GAIAResultFiles] = {} """Mapping from test names to files for results in the GAIA report.""" # Store in global env: target information and common paths self.env.output_dir = self.output_dir self.env.working_dir = self.working_dir self.env.options = self.main.args # Create an object to report testsuite execution status to users from e3.testsuite.running_status import RunningStatus self.running_status = RunningStatus( os.path.join(self.output_dir, "status"), self.main.args.status_update_interval, ) # User specific startup self.set_up() # Retrieve the list of test self.test_list = self.get_test_list(self.main.args.sublist) # Create a DAG to constraint the test execution order dag = DAG() for parsed_test in self.test_list: self.add_test(dag, parsed_test) self.adjust_dag_dependencies(dag) dag.check() self.running_status.set_dag(dag) # Determine whether to use multiple processes for fragment execution # parallelism. self.use_multiprocessing = self.compute_use_multiprocessing() self.env.use_multiprocessing = self.use_multiprocessing # Record modules lookup path, including for the file corresponding to # the __main__ module. Subprocesses will need it to have access to the # same modules. main_module = sys.modules["__main__"] self.env.modules_search_path = [ os.path.dirname(os.path.abspath(main_module.__file__)) ] + sys.path # Now that the env is supposed to be complete, dump it for the test # fragments to pick it up. self.env.store(self.env_filename) # For debugging purposes, dump the final DAG to a DOT file with open(os.path.join(self.output_dir, "tests.dot"), "w") as fd: fd.write(dag.as_dot()) if self.use_multiprocessing: self.run_multiprocess_mainloop(dag) else: self.run_standard_mainloop(dag) self.report_index.write() self.dump_testsuite_result() if self.main.args.xunit_output: dump_xunit_report(self, self.main.args.xunit_output) if self.main.args.gaia_output: dump_gaia_report(self, self.output_dir, self.gaia_result_files) # Clean everything self.tear_down() # If requested, generate a text report if self.main.args.generate_text_report: # Use the previous testsuite results for comparison, if available old_index = (ReportIndex.read(self.old_output_dir) if self.old_output_dir else None) # Include all information, except logs for successful tests, which # is just too verbose. with open(os.path.join(self.output_dir, "report"), "w", encoding="utf-8") as f: generate_report( output_file=f, new_index=self.report_index, old_index=old_index, colors=ColorConfig(colors_enabled=False), show_all_logs=False, show_xfail_logs=True, show_error_output=True, show_time_info=True, ) # Return the appropriate status code: 1 when there is a framework # issue, the failure status code from the --failure-exit-code=N option # when there is a least one testcase failure, or 0. statuses = { s for s, count in self.report_index.status_counters.items() if count } if TestStatus.FAIL in statuses or TestStatus.ERROR in statuses: return self.main.args.failure_exit_code else: return 0
def test_dag_len(): d = DAG() d.add_vertex('a') d.add_vertex('b') d.update_vertex('a', predecessors=['b']) assert len(d) == 2
def test_dag_str(): d = DAG() d.add_vertex('a') d.add_vertex('b') d.update_vertex('a', predecessors=['b']) assert str(d)
def test_add_vertex(): d = DAG() # add_vertex should fail in case a dep does not exist with pytest.raises(DAGError): d.add_vertex('a', predecessors=['b']) # check order of a iteration with simple dependency d.add_vertex('b') d.add_vertex('a', predecessors=['b']) result = [] for vertex_id, data in d: result.append(vertex_id) assert result == ['b', 'a'] # check that add_vertex fails on attempt to add already existing nodde with pytest.raises(DAGError): d.add_vertex('a') # check update with new dependency d.add_vertex('c') d.update_vertex('b', predecessors=['c']) result = [] for vertex_id, data in d: result.append(vertex_id) assert result == ['c', 'b', 'a'] d.update_vertex('a', data='datafora_') d.update_vertex('c', data='dataforc_') result = [] compound_data = '' for vertex_id, data in d: if data is not None: compound_data += data result.append(vertex_id) assert result == ['c', 'b', 'a'] assert compound_data == 'dataforc_datafora_'