def benchmark() -> None: print( '{:^30} | {:^12} | {:^15} | {:^16} | {:^17} | {:^12} | {:^19}'.format( 'Random MDP properties', '# states (n)', '# actions (a)', 'time to generate', 'reachability to T', 'SSPE to T', 'SSPP from s0 to T (l=5)')) print(143 * '-') T = [1] for x in [100, 200, 500, 1000, 1500, 2000]: for y in [5, 10, 20, 50]: for (name, n, a, strictly_a, complete, force_weakly_connected_to_T) in [ ('Any', x, y, False, False, False), ('Complete', x, y, False, True, False), ('Weakly connected to T', x, y, False, False, True), ('|A(s)| = a ∀s', x, y, True, False, False), ('Complete & |A(s)| = a ∀s', x, y, True, True, False) ]: print('{:^30} | {:^12d} | {:^15d} | '.format(name, n, a), end='') # Building a random MDP t = Timer(verbose=False) with t: mdp = generator.random_MDP( n, a, strictly_A=strictly_a, complete_graph=complete, force_weakly_connected_to=force_weakly_connected_to_T) time_taken = t.interval print('{:^16f} | '.format(time_taken), end='') # reach T t = Timer(verbose=False) with t: reach(mdp, T) time_taken = t.interval print('{:^17f} | '.format(time_taken), end='') # expected cost to T t = Timer(verbose=False) with t: min_expected_cost(mdp, T) time_taken = t.interval print('{:^12f} | '.format(time_taken), end='') # SSPP t = Timer(verbose=False) with t: force_short_paths_from(mdp, 0, T, 5, 0) time_taken = t.interval print('{:^19f}'.format(time_taken))
def fit(self, data, args): self.model = LogisticRegressionCV() with Timer() as t: self.model.fit(data.X_train, data.y_train) return t.interval
def fit(self, data, args): self.model = StandardScaler() with Timer() as t: self.model.fit(data.X_train, data.y_train) return t.interval
def fit(self, data, args): params = self.configure(data, args) if data.learning_task == LearningTask.REGRESSION: self.model = lgb.LGBMRegressor( max_depth=params["max_depth"], n_estimators=params["ntrees"], num_leaves=params["max_leaves"], learning_rate=params["learning_rate"], objective=params["objective"], n_jobs=params["njobs"], reg_lambda=params["reg_lambda"], ) else: self.model = lgb.LGBMClassifier( max_depth=params["max_depth"], n_estimators=params["ntrees"], num_leaves=params["max_leaves"], learning_rate=params["learning_rate"], objective=params["objective"], n_jobs=params["njobs"], reg_lambda=params["reg_lambda"], **(params["args"])) with Timer() as t: self.model.fit(data.X_train, data.y_train) return t.interval
def fit(self, data, args): self.model = LinearSVC() with Timer() as t: self.model.fit(data.X_train, data.y_train) return t.interval
def fit(self, data, args): self.model = BernoulliNB() with Timer() as t: self.model.fit(data.X_train, data.y_train) return t.interval
def fit(self, data, args): self.model = PolynomialFeatures() with Timer() as t: self.model.fit(data.X_train, data.y_train) return t.interval
def fit(self, data, args): self.model = KBinsDiscretizer() with Timer() as t: self.model.fit(data.X_train, data.y_train) return t.interval
def fit(self, data, args): self.model = Normalizer(norm="l2") with Timer() as t: self.model.fit(data.X_train, data.y_train) return t.interval
def predict(self, data): assert self.model is not None with Timer() as t: self.predictions = self.test(data) return t.interval
def fit(self, data, args): self.model = MLPClassifier() with Timer() as t: self.model.fit(data.X_train, data.y_train) return t.interval
def fit(self, data, args): self.model = SVC(probability=True) with Timer() as t: self.model.fit(data.X_train, data.y_train) return t.interval
def fit(self, data, args): self.model = SGDClassifier(loss="log") with Timer() as t: self.model.fit(data.X_train, data.y_train) return t.interval
def fit(self, data, args): self.model = LogisticRegression(solver="liblinear") with Timer() as t: self.model.fit(data.X_train, data.y_train) return t.interval
def predict(self, data): assert self.model is not None with Timer() as t: self.predictions = self.model.predict_proba(data.X_test) return t.interval
def predict(self, model, data, args): batch_size = args.batch_size predict_data = data.X_test with Timer() as t: total_size = len(predict_data) iterations = total_size // batch_size iterations += 1 if total_size % batch_size > 0 else 0 iterations = max(1, iterations) if data.learning_task == LearningTask.CLASSIFICATION: self.predictions = np.empty([total_size, 2], dtype="f4") predict_fn = model.predict_proba if data.learning_task == LearningTask.MULTICLASS_CLASSIFICATION: self.predictions = np.empty([total_size, model.n_classes_], dtype="f4") predict_fn = model.predict_proba if data.learning_task == LearningTask.REGRESSION: self.predictions = np.empty([total_size], dtype="f4") predict_fn = model.predict for i in range(0, iterations): start = i * batch_size end = min(start + batch_size, total_size) batch = TrainEnsembleAlgorithm.get_data( predict_data, start, end) self.predictions[start:end] = predict_fn(batch) return t.interval
def predict(self, data): assert self.model is not None data = self.get_data(data) with Timer() as t: self.predictions = self.model.predict(data) return t.interval
def predict(self, data): assert self.model is not None with Timer() as t: predict_data = self.get_data(data.X_test) if data.learning_task == LearningTask.REGRESSION: self.predictions = self.model.predict(predict_data) else: self.predictions = self.model.predict_proba(predict_data) return t.interval
def convert(self, model, data, args): self.configure(data, model, args) data = self.get_data(data) with Timer() as t: self.model = hummingbird.ml.convert(model, self._backend_name, data, device=self.params["device"]) return t.interval
def predict(self, data): import onnxruntime as ort assert self.model is not None remainder_sess = None sess_options = ort.SessionOptions() sess_options.intra_op_num_threads = self.params["nthread"] sess_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL sess = ort.InferenceSession(self.model.SerializeToString(), sess_options=sess_options) if self.remainder_model is not None: remainder_sess = ort.InferenceSession(self.remainder_model.SerializeToString(), sess_options=sess_options) batch_size = 1 if self.params["operator"] == "xgb" else self.params["batch_size"] input_name = sess.get_inputs()[0].name is_regression = data.learning_task == LearningTask.REGRESSION or "SVC" in self.params["operator"] if is_regression: output_name_index = 0 else: output_name_index = 1 output_name = sess.get_outputs()[output_name_index].name with Timer() as t: predict_data = ScoreBackend.get_data(data.X_test) total_size = len(predict_data) iterations = total_size // batch_size iterations += 1 if total_size % batch_size > 0 else 0 iterations = max(1, iterations) for i in range(0, iterations): start = i * batch_size end = min(start + batch_size, total_size) if self.params["operator"] == "xgb": self.predictions[start:end, :] = sess.run([output_name], {input_name: predict_data[start:end, :]}) else: if i == iterations - 1 and self.remainder_model is not None: pred = remainder_sess.run([output_name], {input_name: predict_data[start:end, :]}) else: pred = sess.run([output_name], {input_name: predict_data[start:end, :]}) if is_regression: self.predictions = pred[0] else: self.predictions = list(map(lambda x: list(x.values()), pred[0])) del sess if remainder_sess is not None: del remainder_sess return t.interval
def predict(self, data): assert self.model is not None is_regression = data.learning_task == LearningTask.REGRESSION or "SVC" in self.params["operator"] with Timer() as t: predict_data = self.get_data(data.X_test) if self.params["transform"]: self.predictions = self.model.transform(predict_data) elif is_regression: self.predictions = self.model.predict(predict_data) else: self.predictions = self.model.predict_proba(predict_data) return t.interval
def convert(self, model, data, args, model_name): self.configure(data, model, args) test_data = self.get_data(data.X_test) with Timer() as t: self.model = convert( model, self.backend, test_data, device=self.params["device"], extra_config={constants.N_THREADS: self.params["nthread"], constants.BATCH_SIZE: self.params["batch_size"]}, ) return t.interval
def convert(self, model, data, args, model_name): from skl2onnx import convert_sklearn from onnxmltools.convert.common.data_types import FloatTensorType self.configure(data, model, args) with Timer() as t: batch = min(len(data.X_test), self.params["batch_size"]) remainder = len(data.X_test) % batch initial_type = [("input", FloatTensorType([batch, self.params["input_size"]]))] self.model = convert_sklearn(model, initial_types=initial_type) if remainder > 0: initial_type = [("input", FloatTensorType([remainder, self.params["input_size"]]))] self.remainder_model = convert_sklearn(model, initial_types=initial_type, target_opset=11) return t.interval
def fit(self, data, args): params = self.configure(data, args) if data.learning_task == LearningTask.REGRESSION: self.model = RandomForestRegressor(max_depth=params["max_depth"], n_estimators=params["ntrees"], n_jobs=params["njobs"]) else: self.model = RandomForestClassifier(max_depth=params["max_depth"], n_estimators=params["ntrees"], n_jobs=params["njobs"]) with Timer() as t: self.model.fit(data.X_train, data.y_train.astype("|i4")) return t.interval
def convert(self, model, data, args, model_name): self.configure(data, model, args) test_data = self.get_data(data.X_test) remainder_size = test_data.shape[0] % self.params["batch_size"] with Timer() as t: self.model = convert_batch( model, self.backend, test_data, remainder_size, device=self.params["device"], extra_config={constants.N_THREADS: self.params["nthread"]}, ) return t.interval
def convert(self, model, data, args, model_name): from onnxmltools.convert import convert_xgboost from onnxmltools.convert import convert_lightgbm from skl2onnx import convert_sklearn from onnxmltools.convert.common.data_types import FloatTensorType self.configure(data, model, args) with Timer() as t: if self.params["operator"] == "xgb": initial_type = [ ("input", FloatTensorType([1, self.params["input_size"]])) ] fixed_names = list( map(lambda x: str(x), range(len(model._Booster.feature_names)))) model._Booster.feature_names = fixed_names self.model = convert_xgboost(model, initial_types=initial_type, target_opset=11) else: batch = min(len(data.X_test), self.params["batch_size"]) remainder = len(data.X_test) % batch initial_type = [ ("input", FloatTensorType([batch, self.params["input_size"]])) ] if self.params["operator"] == "lgbm": converter = convert_lightgbm elif self.params["operator"] == "rf": converter = convert_sklearn self.model = converter(model, initial_types=initial_type) if remainder > 0: initial_type = [("input", FloatTensorType([ remainder, self.params["input_size"] ]))] self.remainder_model = converter( model, initial_types=initial_type, target_opset=11) return t.interval
def main(): args = parse_args() print(args.gpu) print_sys_info(args) results = {} set_signal() skipped = 0 total = 0 if not os.path.exists(args.pipedir): raise Exception(args.pipedir + " directory not found") tasks = os.listdir(args.pipedir) tasks = list(map(lambda x: int(x), tasks)) tasks.sort() for task in list(map(lambda x: str(x), tasks)): print("Task-{}".format(task)) task_dir = os.path.join(args.pipedir, task) task_pip_dir = os.path.join(task_dir, "pipelines") X = np.load(os.path.join(task_dir, "data", "X.dat.npy")) results[task] = { "dataset_size": X.shape[0], "num_features": X.shape[1] } pipelines = os.listdir(os.path.join(task_pip_dir)) pipelines = list(map(lambda x: int(x[:-4]), pipelines)) pipelines.sort() res = [] for pipeline in list(map(lambda x: str(x), pipelines)): total += 1 with open(os.path.join(task_pip_dir, pipeline + ".pkl"), "rb") as f: model = joblib.load(f) assert model is not None results[task][pipeline] = {} times = [] mean = 0 print("Pipeline-{}".format(pipeline)) try: for i in range(args.niters): set_alarm(3600) with Timer() as t: res = model.predict(X) times.append(t.interval) set_alarm(0) mean = stats.trim_mean( times, 1 / len(times)) if args.niters > 1 else times[0] gc.collect() except Exception as e: print(e) pass results[task][pipeline] = {"prediction_time": mean} for backend in args.backend.split(","): print("Running '%s' ..." % backend) scorer = score.ScoreBackend.create(backend) with scorer: try: conversion_time = scorer.convert(model, X, args) except Exception as e: skipped += 1 print(e) continue times = [] prediction_time = 0 try: for i in range(args.niters): set_alarm(3600) times.append(scorer.predict(X)) set_alarm(0) prediction_time = times[ 0] if args.niters == 1 else stats.trim_mean( times, 1 / len(times)) gc.collect() except Exception as e: skipped += 1 print(e) pass results[task][pipeline][backend] = { "conversion_time": str(conversion_time), "prediction_time": str(prediction_time), "speedup": "0" if mean == 0 or prediction_time == 0 else str(mean / prediction_time) if prediction_time < mean else str(-prediction_time / mean), } print(results[task][pipeline][backend]) if args.validate: np.testing.assert_allclose(scorer.predictions, res, rtol=1e-5, atol=1e-6) output = json.dumps(results, indent=2) output_file = open(args.output, "w") output_file.write(output + "\n") output_file.close() print("All results written to file '%s'" % args.output) print("Total num of pipelines: {}; skipped: {}".format(total, skipped))
def worst_case_benchmark(number_of_states=50, number_of_actions=10, dimensions=3) -> None: number_of_states += 1 number_of_actions += 1 T = [0] x = 1 if dimensions == 3 else number_of_actions - 1 Y1 = numpy.zeros((number_of_actions, number_of_states)) Y2 = numpy.zeros((number_of_actions, number_of_states)) print('{:^12} | {:^15} | {:^12} | {:^19}'.format( '# states (n)', '# actions (a)', 'SSPE to T', 'SSPP from s0 to T (l=5)')) for alpha in range(x, number_of_actions): for mdp in map(lambda s: generator.complete_MDP(s, alpha), range(2, number_of_states)): n = mdp.number_of_states print('{:^12d} | {:^15d} | '.format(n, alpha), end='') # expected cost to T t = Timer(verbose=False) with t: min_expected_cost(mdp, T) time_taken = t.interval Y1[alpha][n] = time_taken print('{:^12f} | '.format(time_taken), end='') # SSPP t = Timer(verbose=False) with t: for s in range(mdp.number_of_states): force_short_paths_from(mdp, mdp.number_of_states - 1, T, 5, 0) time_taken = t.interval Y2[alpha][n] = time_taken print('{:^19f}'.format(time_taken)) if dimensions == 3: N, A = numpy.meshgrid(range(number_of_states), range(number_of_actions)) fig = plt.figure(figsize=(8, 6)) ax = Axes3D(fig) ax.plot_surface(N, A, Y1, rstride=1, cstride=1, cmap='Blues_r') ax.set_xlabel("Nombre d'états") ax.set_ylabel("Nombre d'actions possibles par état") ax.set_zlabel("Temps (sec) pour résoudre le problème SSPE") ax.set_title("PDMP complet") plt.savefig('benchmarks/sspe1.png', dpi=300) ax.view_init(elev=30., azim=-114.) plt.savefig('benchmarks/sspe2.png', dpi=300) # plt.show() fig = plt.figure(figsize=(8, 6)) ax = Axes3D(fig) ax.plot_surface(N, A, Y2, rstride=1, cstride=1, cmap='Reds_r') ax.set_xlabel("Nombre d'états") ax.set_ylabel("Nombre d'actions possibles par état") ax.set_zlabel("Temps (sec) pour résoudre le problème SSPP (l=5)") ax.set_title("PDMP complet") plt.savefig('benchmarks/sspp1.png', dpi=300) ax.view_init(elev=30., azim=-114.) plt.savefig('benchmarks/sspp2.png', dpi=300) # plt.show() if dimensions >= 2: fig = plt.figure(figsize=(8, 6)) fig.suptitle('PDMP Complet avec |A| = %d' % int(number_of_actions - 1)) fig.subplots_adjust(top=0.81) plt.plot( Y1[number_of_actions - 1], label="Problème de l'espérance du plus court chemin stochastique") plt.plot( Y2[number_of_actions - 1], label= "Problème des plus courts chemins stochastiques de taille limitée " "(l=5)") plt.xlabel("Nombre d'états") plt.ylabel("Temps (sec)") plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, borderaxespad=0., prop={'size': 10}) plt.savefig('benchmarks/solvers.png', dpi=300) fig = plt.figure(figsize=(8, 6)) fig.suptitle('PDMP Complet avec |A| = %d (échelle logarithmique)' % int(number_of_actions - 1)) fig.subplots_adjust(top=0.81) plt.plot( Y2[number_of_actions - 1], 'r', label= "Problème des plus courts chemins stochastiques de taille limitée " "(l=5)") plt.xlabel("Nombre d'états") plt.ylabel("Temps (sec)") plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, borderaxespad=0., prop={'size': 10}) plt.yscale('log') plt.savefig('benchmarks/solvers2.png', dpi=300)
def predict(self, data): with Timer() as t: self.predictions = self.test(data) data.learning_task = LearningTask.REGRESSION return t.interval
def worst_case_benchmark_sspp(number_of_states=51, number_of_actions=11, l_max=21, dimensions=3) -> None: number_of_states += 1 l_max += 1 T = [0] Y = numpy.zeros((l_max, number_of_states)) x = 2 if dimensions == 3 else number_of_states - 1 print('Number of actions : %d' % number_of_actions) print('{:^12} | {:^16} | {:^19}'.format('# states (n)', 'length threshold', 'SSPP from s0 to T')) for l in range(1, l_max): for mdp in map(lambda s: generator.complete_MDP(s, number_of_actions), range(x, number_of_states)): n = mdp.number_of_states print('{:^12d} | {:^16d} | '.format(n, l), end='') # expected cost to T t = Timer(verbose=False) with t: force_short_paths_from(mdp, mdp.number_of_states - 1, T, l, 0) time_taken = t.interval Y[l][n] = time_taken print('{:^19f}'.format(time_taken)) if dimensions == 3: N, L = numpy.meshgrid(range(number_of_states), range(l_max)) fig = plt.figure(figsize=(8, 6)) ax = Axes3D(fig) ax.plot_surface(N, L, Y, rstride=1, cstride=1, cmap='Greens_r') ax.set_xlabel("Nombre d'états") ax.set_ylabel("l") ax.set_zlabel("Temps (sec) pour résoudre le problème SSPP") ax.set_title("PDMP complet avec |A| = %d" % number_of_actions) plt.savefig('benchmarks/sspp_pseudopoly1.png', dpi=300) ax.view_init(elev=30., azim=-114.) plt.savefig('benchmarks/ssp_pseudopoly15.png', dpi=300) # plt.show() fig = plt.figure(figsize=(8, 6)) fig.suptitle( 'Résolution du problème SSPP pour un PDMP Complet avec |S| = %d et |A| = %d' % (number_of_states - 1, number_of_actions)) plt.plot([Y[i][number_of_states - 1] for i in range(l_max)], 'g') plt.xlabel("Valeur numérique de l (taille de l'entrée en unaire)") plt.ylabel("Temps (sec)") plt.savefig('benchmarks/sspp_pseudopoly2.png', dpi=300) # plt.show() fig = plt.figure(figsize=(8, 6)) fig.suptitle( 'Résolution du problème SSPP pour un PDMP Complet avec |S| = %d et |A| = %d' % (number_of_states - 1, number_of_actions)) plt.plot([0.] + list(map(lambda i: log(i) / log(2) + 1, range(1, l_max))), [Y[i][number_of_states - 1] for i in range(0, l_max)], 'r') plt.xlabel("Taille de l'entrée l en binaire") plt.ylabel("Temps (sec)") plt.savefig('benchmarks/sspp_pseudopoly3.png', dpi=300) # plt.show() fig = plt.figure(figsize=(8, 6)) fig.suptitle( 'Résolution du problème SSPP pour un PDMP Complet avec |S| = %d et |A| = %d' ' (échelle logarithmique)' % (number_of_states - 1, number_of_actions), fontsize=11) plt.plot([0.] + list(map(lambda i: log(i) / log(2) + 1, range(1, l_max))), [Y[i][number_of_states - 1] for i in range(0, l_max)], 'r') plt.yscale('log') plt.xlabel("Taille de l'entrée l en binaire") plt.ylabel("Temps (sec)") plt.savefig('benchmarks/sspp_pseudopoly4.png', dpi=300)