def get_last_build(self, history: int = 1, stop_at: Build = None, with_results=False, force_fetch=False) -> Build: if self.version: versions = [self.version] else: if force_fetch or not self._last_100: versions = self.method.get_last_versions(100, force_fetch=force_fetch) self._last_100 = versions else: versions = self._last_100 last_build = None for i, version in enumerate(versions): if stop_at and version == stop_at.version: if i == 0: return None break last_build = Build(self, version, self.options.result_path) if not with_results or last_build.hasResults(): if history <= 1: break else: history -= 1 if i > 100: last_build = None break return last_build
def get_old_results(self, last_graph: Build, num_old: int, testie): graphs_series = [] # Todo parents = self.method.gitrepo().iter_commits(last_graph.version) next(parents) # The first commit is last_graph itself for i, commit in enumerate(parents): # Get old results for graph g_build = Build(self, commit.hexsha[:7], self.options.result_path) if not g_build.hasResults(testie): continue g_all_results = g_build.load_results(testie) graphs_series.append((testie, g_build, g_all_results)) if (i > 100 or len(graphs_series) == num_old): break return graphs_series
def checkout(self, branch=None): if branch is None: branch = self.repo.version url = npf.replace_path(self.repo.url,Build(self.repo,branch,self.options.result_path)) if not Path(self.repo.get_build_path()).exists(): os.makedirs(self.repo.get_build_path()) try: filename, headers = urllib.request.urlretrieve(url, self.repo.get_build_path() + os.path.basename(url)) except URLError: print("ERROR : Could not download %s : bad URL?" % url) return False t = tarfile.open(filename) t.extractall(self.repo.get_build_path()) t.close() os.unlink(filename) return True
def test_paths(): args = test_args() args.do_test = False args.do_conntest = False args.experiment_folder = "test_root" local = Node.makeLocal(args, test_access=False) ssh = Node.makeSSH(addr="cluster01.sample", user=None, path=None, options=args) ssh2 = Node.makeSSH(addr="cluster01.sample", user=None, path=None, options=args) ssh.executor.path = "/different/path/to/root/" ssh2.executor.path = npf.experiment_path() + os.sep #Test the constants are correct testie = Testie("tests/examples/math.npf", options=args, tags=args.tags) repo = test_repo() build = Build(repo, "version") v = {} testie.update_constants(v, build, ssh.experiment_path() + "/testie-1/", out_path=None) v2 = {} testie.update_constants(v2, build, ssh2.experiment_path() + "/testie-1/", out_path=None) vl = {} testie.update_constants(vl, build, local.experiment_path() + "/testie-1/", out_path=None) for d in [vl, v, v2]: assert v['NPF_REPO'] == 'Click_2021' assert v['NPF_ROOT_PATH'] == '../..' assert v['NPF_SCRIPT_PATH'] == '../../tests/examples' assert v['NPF_RESULT_PATH'] == '../../results/click-2021'
def checkout(self, branch=None): if branch is None: branch = self.repo.version url = npf.replace_path( self.repo.url, Build(self.repo, branch, self.repo.options.result_path)) if not Path(self.repo.get_build_path()).exists(): os.makedirs(self.repo.get_build_path()) try: proxy = urllib.request.ProxyHandler({}) opener = urllib.request.build_opener(proxy) opener.addheaders = [('User-Agent', 'NPF')] urllib.request.install_opener(opener) filename, headers = urllib.request.urlretrieve( url, self.repo.get_build_path() + os.path.basename(url)) except URLError: print("ERROR : Could not download %s : bad URL?" % url) return False t = tarfile.open(filename) t.extractall(self.repo.get_build_path()) t.close() os.unlink(filename) return True
def run(build: Build, all_results: Dataset, testie: Testie, max_depth=3, filename=None): print("Building dataset...") X, y = Statistics.buildDataset(all_results, testie) print("Learning dataset built with %d samples and %d features..." % (X.shape[0], X.shape[1])) clf = tree.DecisionTreeRegressor(max_depth=max_depth) clf = clf.fit(X, y) if max_depth is None or max_depth > 8: print("No tree graph when maxdepth is > 8") else: dot_data = tree.export_graphviz( clf, out_file=None, filled=True, rounded=True, special_characters=True, feature_names=testie.variables.dtype()['names']) graph = pydotplus.graph_from_dot_data(dot_data) if filename: f = filename else: f = build.result_path(testie.filename, 'pdf', suffix='_clf') graph.write(f, format=os.path.splitext(f)[1][1:]) print("Decision tree visualization written to %s" % f) print("") print("Feature importances :") # noinspection PyUnresolvedReferences for key, f in zip(testie.variables.dtype()['names'], clf.feature_importances_): print(" %s : %0.2f" % (key, f)) vars_values = {} for run, results in all_results.items(): for k, v in run.variables.items(): vars_values.setdefault(k, set()).add(v) print('') print("Better :") best = X[y['result'].argmax()] print(" ", end='') f = next(iter(all_results.items())) for i, (k, v) in enumerate(f[0].variables.items()): print("%s = %s, " % (k, best[i]), end='') print(' : %.02f' % y['result'].max()) print('') print("Means and std/mean per variables :") for k, vals in vars_values.items(): if len(vals) is 1: continue print("%s :" % k) for v in sorted(vals): tot = 0 std = 0 n = 0 for run, results in all_results.items(): if run.variables[k] == v: if not results is None: tot += np.mean(results) std += np.std(results) n += 1 if n == 0: print(" %s : None" % v) else: print(" %s : (%.02f,%.02f), " % (v, tot / n, std / n / (tot / n))) print("")
def current_version(self): build = self.current_build() if build: return build.version return Build.get_current_version(self)
def execute_all( self, build, options, prev_results: Dataset = None, do_test=True, allowed_types=SectionScript.ALL_TYPES_SET) -> Tuple[Dataset, bool]: """Execute script for all variables combinations. All tools reliy on this function for execution of the testie :param allowed_types:Tyeps of scripts allowed to run. Set with either init, scripts or both :param do_test: Actually run the tests :param options: NPF options object :param build: A build object :param prev_results: Previous set of result for the same build to update or retrieve :return: Dataset(Dict of variables as key and arrays of results as value) """ init_done = False if not SectionScript.TYPE_SCRIPT in allowed_types: # If scripts is not in allowed_types, we have to run the init by force now self.do_init_all(build, options, do_test=do_test, allowed_types=allowed_types) return {}, True all_results = {} for variables in self.variables: run = Run(variables) if hasattr(self, 'late_variables'): variables = self.late_variables.execute(variables, self) r_status, r_out, r_err = self.test_require(variables, build) if not r_status: if not self.options.quiet: print("Requirement not met for %s" % run.format_variables(self.config["var_hide"])) if r_out.strip(): print(output.strip()) if r_err.strip(): print(err.strip()) continue if prev_results and prev_results is not None and not options.force_test: run_results = prev_results.get(run, {}) if run_results is None: run_results = {} else: run_results = {} if not run_results and options.use_last: for version in build.repo.method.get_history(build.version): oldb = Build(build.repo, version) r = oldb.load_results(self) if r and run in r: run_results = r[run] break for result_type in self.config['results_expect']: if result_type not in run_results: run_results = {} have_new_results = False n_runs = self.config["n_runs"] - ( 0 if options.force_test or len(run_results) == 0 else min([ len(results) for result_type, results in run_results.items() ])) if n_runs > 0 and do_test: if not init_done: self.do_init_all(build, options, do_test, allowed_types=allowed_types) init_done = True if not self.options.quiet: print(run.format_variables(self.config["var_hide"])) new_results, output, err = self.execute( build, run, variables, n_runs, n_retry=self.config["n_retry"], allowed_types={SectionScript.TYPE_SCRIPT}) if new_results: if self.options.show_full: print("stdout:") print(output) print("stderr:") print(err) for k, v in new_results.items(): run_results.setdefault(k, []).extend(v) have_new_results = True else: if not self.options.quiet: print(run.format_variables(self.config["var_hide"])) if len(run_results) > 0: if not self.options.quiet: if len(run_results) == 1: print(list(run_results.values())[0]) else: print(run_results) all_results[run] = run_results else: all_results[run] = {} # Save results if all_results and have_new_results: if prev_results: prev_results[run] = all_results[run] build.writeversion(self, prev_results) else: build.writeversion(self, all_results) return all_results, init_done