def benchmark() -> None:
    print(
        '{:^30} | {:^12} | {:^15} | {:^16} | {:^17} | {:^12} | {:^19}'.format(
            'Random MDP properties', '# states (n)', '# actions (a)',
            'time to generate', 'reachability to T', 'SSPE to T',
            'SSPP from s0 to T (l=5)'))
    print(143 * '-')
    T = [1]
    for x in [100, 200, 500, 1000, 1500, 2000]:
        for y in [5, 10, 20, 50]:
            for (name, n, a, strictly_a, complete,
                 force_weakly_connected_to_T) in [
                     ('Any', x, y, False, False, False),
                     ('Complete', x, y, False, True, False),
                     ('Weakly connected to T', x, y, False, False, True),
                     ('|A(s)| = a ∀s', x, y, True, False, False),
                     ('Complete & |A(s)| = a ∀s', x, y, True, True, False)
                 ]:
                print('{:^30} | {:^12d} | {:^15d} | '.format(name, n, a),
                      end='')

                # Building a random MDP
                t = Timer(verbose=False)
                with t:
                    mdp = generator.random_MDP(
                        n,
                        a,
                        strictly_A=strictly_a,
                        complete_graph=complete,
                        force_weakly_connected_to=force_weakly_connected_to_T)
                time_taken = t.interval
                print('{:^16f} | '.format(time_taken), end='')

                # reach T
                t = Timer(verbose=False)
                with t:
                    reach(mdp, T)
                time_taken = t.interval
                print('{:^17f} | '.format(time_taken), end='')

                # expected cost to T
                t = Timer(verbose=False)
                with t:
                    min_expected_cost(mdp, T)
                time_taken = t.interval
                print('{:^12f} | '.format(time_taken), end='')

                # SSPP
                t = Timer(verbose=False)
                with t:
                    force_short_paths_from(mdp, 0, T, 5, 0)
                time_taken = t.interval
                print('{:^19f}'.format(time_taken))
Example #2
0
    def fit(self, data, args):
        self.model = LogisticRegressionCV()

        with Timer() as t:
            self.model.fit(data.X_train, data.y_train)

        return t.interval
Example #3
0
    def fit(self, data, args):
        self.model = StandardScaler()

        with Timer() as t:
            self.model.fit(data.X_train, data.y_train)

        return t.interval
Example #4
0
    def fit(self, data, args):
        params = self.configure(data, args)

        if data.learning_task == LearningTask.REGRESSION:
            self.model = lgb.LGBMRegressor(
                max_depth=params["max_depth"],
                n_estimators=params["ntrees"],
                num_leaves=params["max_leaves"],
                learning_rate=params["learning_rate"],
                objective=params["objective"],
                n_jobs=params["njobs"],
                reg_lambda=params["reg_lambda"],
            )
        else:
            self.model = lgb.LGBMClassifier(
                max_depth=params["max_depth"],
                n_estimators=params["ntrees"],
                num_leaves=params["max_leaves"],
                learning_rate=params["learning_rate"],
                objective=params["objective"],
                n_jobs=params["njobs"],
                reg_lambda=params["reg_lambda"],
                **(params["args"]))

        with Timer() as t:
            self.model.fit(data.X_train, data.y_train)

        return t.interval
Example #5
0
    def fit(self, data, args):
        self.model = LinearSVC()

        with Timer() as t:
            self.model.fit(data.X_train, data.y_train)

        return t.interval
Example #6
0
    def fit(self, data, args):
        self.model = BernoulliNB()

        with Timer() as t:
            self.model.fit(data.X_train, data.y_train)

        return t.interval
Example #7
0
    def fit(self, data, args):
        self.model = PolynomialFeatures()

        with Timer() as t:
            self.model.fit(data.X_train, data.y_train)

        return t.interval
Example #8
0
    def fit(self, data, args):
        self.model = KBinsDiscretizer()

        with Timer() as t:
            self.model.fit(data.X_train, data.y_train)

        return t.interval
Example #9
0
    def fit(self, data, args):
        self.model = Normalizer(norm="l2")

        with Timer() as t:
            self.model.fit(data.X_train, data.y_train)

        return t.interval
Example #10
0
    def predict(self, data):
        assert self.model is not None

        with Timer() as t:
            self.predictions = self.test(data)

        return t.interval
Example #11
0
    def fit(self, data, args):
        self.model = MLPClassifier()

        with Timer() as t:
            self.model.fit(data.X_train, data.y_train)

        return t.interval
Example #12
0
    def fit(self, data, args):
        self.model = SVC(probability=True)

        with Timer() as t:
            self.model.fit(data.X_train, data.y_train)

        return t.interval
Example #13
0
    def fit(self, data, args):
        self.model = SGDClassifier(loss="log")

        with Timer() as t:
            self.model.fit(data.X_train, data.y_train)

        return t.interval
Example #14
0
    def fit(self, data, args):
        self.model = LogisticRegression(solver="liblinear")

        with Timer() as t:
            self.model.fit(data.X_train, data.y_train)

        return t.interval
Example #15
0
    def predict(self, data):
        assert self.model is not None

        with Timer() as t:
            self.predictions = self.model.predict_proba(data.X_test)

        return t.interval
Example #16
0
    def predict(self, model, data, args):
        batch_size = args.batch_size
        predict_data = data.X_test

        with Timer() as t:
            total_size = len(predict_data)
            iterations = total_size // batch_size
            iterations += 1 if total_size % batch_size > 0 else 0
            iterations = max(1, iterations)

            if data.learning_task == LearningTask.CLASSIFICATION:
                self.predictions = np.empty([total_size, 2], dtype="f4")
                predict_fn = model.predict_proba
            if data.learning_task == LearningTask.MULTICLASS_CLASSIFICATION:
                self.predictions = np.empty([total_size, model.n_classes_],
                                            dtype="f4")
                predict_fn = model.predict_proba
            if data.learning_task == LearningTask.REGRESSION:
                self.predictions = np.empty([total_size], dtype="f4")
                predict_fn = model.predict

            for i in range(0, iterations):
                start = i * batch_size
                end = min(start + batch_size, total_size)
                batch = TrainEnsembleAlgorithm.get_data(
                    predict_data, start, end)
                self.predictions[start:end] = predict_fn(batch)

        return t.interval
Example #17
0
    def predict(self, data):
        assert self.model is not None

        data = self.get_data(data)

        with Timer() as t:
            self.predictions = self.model.predict(data)

        return t.interval
Example #18
0
    def predict(self, data):
        assert self.model is not None

        with Timer() as t:
            predict_data = self.get_data(data.X_test)
            if data.learning_task == LearningTask.REGRESSION:
                self.predictions = self.model.predict(predict_data)
            else:
                self.predictions = self.model.predict_proba(predict_data)

        return t.interval
Example #19
0
    def convert(self, model, data, args):
        self.configure(data, model, args)

        data = self.get_data(data)

        with Timer() as t:
            self.model = hummingbird.ml.convert(model,
                                                self._backend_name,
                                                data,
                                                device=self.params["device"])

        return t.interval
Example #20
0
    def predict(self, data):
        import onnxruntime as ort

        assert self.model is not None

        remainder_sess = None
        sess_options = ort.SessionOptions()
        sess_options.intra_op_num_threads = self.params["nthread"]
        sess_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
        sess = ort.InferenceSession(self.model.SerializeToString(), sess_options=sess_options)
        if self.remainder_model is not None:
            remainder_sess = ort.InferenceSession(self.remainder_model.SerializeToString(), sess_options=sess_options)

        batch_size = 1 if self.params["operator"] == "xgb" else self.params["batch_size"]
        input_name = sess.get_inputs()[0].name
        is_regression = data.learning_task == LearningTask.REGRESSION or "SVC" in self.params["operator"]
        if is_regression:
            output_name_index = 0
        else:
            output_name_index = 1
        output_name = sess.get_outputs()[output_name_index].name

        with Timer() as t:
            predict_data = ScoreBackend.get_data(data.X_test)
            total_size = len(predict_data)
            iterations = total_size // batch_size
            iterations += 1 if total_size % batch_size > 0 else 0
            iterations = max(1, iterations)

            for i in range(0, iterations):
                start = i * batch_size
                end = min(start + batch_size, total_size)

                if self.params["operator"] == "xgb":
                    self.predictions[start:end, :] = sess.run([output_name], {input_name: predict_data[start:end, :]})
                else:
                    if i == iterations - 1 and self.remainder_model is not None:
                        pred = remainder_sess.run([output_name], {input_name: predict_data[start:end, :]})
                    else:
                        pred = sess.run([output_name], {input_name: predict_data[start:end, :]})

                    if is_regression:
                        self.predictions = pred[0]
                    else:
                        self.predictions = list(map(lambda x: list(x.values()), pred[0]))

        del sess
        if remainder_sess is not None:
            del remainder_sess

        return t.interval
Example #21
0
    def predict(self, data):
        assert self.model is not None

        is_regression = data.learning_task == LearningTask.REGRESSION or "SVC" in self.params["operator"]

        with Timer() as t:
            predict_data = self.get_data(data.X_test)
            if self.params["transform"]:
                self.predictions = self.model.transform(predict_data)
            elif is_regression:
                self.predictions = self.model.predict(predict_data)
            else:
                self.predictions = self.model.predict_proba(predict_data)

        return t.interval
Example #22
0
    def convert(self, model, data, args, model_name):
        self.configure(data, model, args)

        test_data = self.get_data(data.X_test)

        with Timer() as t:
            self.model = convert(
                model,
                self.backend,
                test_data,
                device=self.params["device"],
                extra_config={constants.N_THREADS: self.params["nthread"], constants.BATCH_SIZE: self.params["batch_size"]},
            )

        return t.interval
Example #23
0
    def convert(self, model, data, args, model_name):
        from skl2onnx import convert_sklearn
        from onnxmltools.convert.common.data_types import FloatTensorType

        self.configure(data, model, args)

        with Timer() as t:
            batch = min(len(data.X_test), self.params["batch_size"])
            remainder = len(data.X_test) % batch
            initial_type = [("input", FloatTensorType([batch, self.params["input_size"]]))]

            self.model = convert_sklearn(model, initial_types=initial_type)
            if remainder > 0:
                initial_type = [("input", FloatTensorType([remainder, self.params["input_size"]]))]
                self.remainder_model = convert_sklearn(model, initial_types=initial_type, target_opset=11)
        return t.interval
Example #24
0
    def fit(self, data, args):
        params = self.configure(data, args)

        if data.learning_task == LearningTask.REGRESSION:
            self.model = RandomForestRegressor(max_depth=params["max_depth"],
                                               n_estimators=params["ntrees"],
                                               n_jobs=params["njobs"])
        else:
            self.model = RandomForestClassifier(max_depth=params["max_depth"],
                                                n_estimators=params["ntrees"],
                                                n_jobs=params["njobs"])

        with Timer() as t:
            self.model.fit(data.X_train, data.y_train.astype("|i4"))

        return t.interval
Example #25
0
    def convert(self, model, data, args, model_name):
        self.configure(data, model, args)

        test_data = self.get_data(data.X_test)
        remainder_size = test_data.shape[0] % self.params["batch_size"]

        with Timer() as t:
            self.model = convert_batch(
                model,
                self.backend,
                test_data,
                remainder_size,
                device=self.params["device"],
                extra_config={constants.N_THREADS: self.params["nthread"]},
            )

        return t.interval
Example #26
0
    def convert(self, model, data, args, model_name):
        from onnxmltools.convert import convert_xgboost
        from onnxmltools.convert import convert_lightgbm
        from skl2onnx import convert_sklearn
        from onnxmltools.convert.common.data_types import FloatTensorType

        self.configure(data, model, args)

        with Timer() as t:
            if self.params["operator"] == "xgb":
                initial_type = [
                    ("input", FloatTensorType([1, self.params["input_size"]]))
                ]
                fixed_names = list(
                    map(lambda x: str(x),
                        range(len(model._Booster.feature_names))))
                model._Booster.feature_names = fixed_names
                self.model = convert_xgboost(model,
                                             initial_types=initial_type,
                                             target_opset=11)
            else:
                batch = min(len(data.X_test), self.params["batch_size"])
                remainder = len(data.X_test) % batch
                initial_type = [
                    ("input",
                     FloatTensorType([batch, self.params["input_size"]]))
                ]

                if self.params["operator"] == "lgbm":
                    converter = convert_lightgbm
                elif self.params["operator"] == "rf":
                    converter = convert_sklearn

                self.model = converter(model, initial_types=initial_type)
                if remainder > 0:
                    initial_type = [("input",
                                     FloatTensorType([
                                         remainder, self.params["input_size"]
                                     ]))]
                    self.remainder_model = converter(
                        model, initial_types=initial_type, target_opset=11)
        return t.interval
Example #27
0
def main():
    args = parse_args()
    print(args.gpu)
    print_sys_info(args)
    results = {}
    set_signal()
    skipped = 0
    total = 0

    if not os.path.exists(args.pipedir):
        raise Exception(args.pipedir + " directory not found")

    tasks = os.listdir(args.pipedir)
    tasks = list(map(lambda x: int(x), tasks))
    tasks.sort()
    for task in list(map(lambda x: str(x), tasks)):
        print("Task-{}".format(task))
        task_dir = os.path.join(args.pipedir, task)
        task_pip_dir = os.path.join(task_dir, "pipelines")

        X = np.load(os.path.join(task_dir, "data", "X.dat.npy"))
        results[task] = {
            "dataset_size": X.shape[0],
            "num_features": X.shape[1]
        }
        pipelines = os.listdir(os.path.join(task_pip_dir))
        pipelines = list(map(lambda x: int(x[:-4]), pipelines))
        pipelines.sort()
        res = []
        for pipeline in list(map(lambda x: str(x), pipelines)):
            total += 1
            with open(os.path.join(task_pip_dir, pipeline + ".pkl"),
                      "rb") as f:
                model = joblib.load(f)

                assert model is not None

                results[task][pipeline] = {}
                times = []
                mean = 0
                print("Pipeline-{}".format(pipeline))

                try:
                    for i in range(args.niters):
                        set_alarm(3600)
                        with Timer() as t:
                            res = model.predict(X)
                        times.append(t.interval)
                        set_alarm(0)
                    mean = stats.trim_mean(
                        times, 1 / len(times)) if args.niters > 1 else times[0]
                    gc.collect()
                except Exception as e:
                    print(e)
                    pass
                results[task][pipeline] = {"prediction_time": mean}

                for backend in args.backend.split(","):
                    print("Running '%s' ..." % backend)
                    scorer = score.ScoreBackend.create(backend)
                    with scorer:
                        try:
                            conversion_time = scorer.convert(model, X, args)
                        except Exception as e:
                            skipped += 1
                            print(e)
                            continue

                        times = []
                        prediction_time = 0
                        try:
                            for i in range(args.niters):
                                set_alarm(3600)
                                times.append(scorer.predict(X))
                                set_alarm(0)
                            prediction_time = times[
                                0] if args.niters == 1 else stats.trim_mean(
                                    times, 1 / len(times))
                            gc.collect()
                        except Exception as e:
                            skipped += 1
                            print(e)
                            pass

                        results[task][pipeline][backend] = {
                            "conversion_time":
                            str(conversion_time),
                            "prediction_time":
                            str(prediction_time),
                            "speedup":
                            "0" if mean == 0 or prediction_time == 0 else
                            str(mean / prediction_time) if
                            prediction_time < mean else str(-prediction_time /
                                                            mean),
                        }

                        print(results[task][pipeline][backend])

                        if args.validate:
                            np.testing.assert_allclose(scorer.predictions,
                                                       res,
                                                       rtol=1e-5,
                                                       atol=1e-6)

    output = json.dumps(results, indent=2)
    output_file = open(args.output, "w")
    output_file.write(output + "\n")
    output_file.close()

    print("All results written to file '%s'" % args.output)
    print("Total num of pipelines: {}; skipped: {}".format(total, skipped))
def worst_case_benchmark(number_of_states=50,
                         number_of_actions=10,
                         dimensions=3) -> None:
    number_of_states += 1
    number_of_actions += 1
    T = [0]
    x = 1 if dimensions == 3 else number_of_actions - 1
    Y1 = numpy.zeros((number_of_actions, number_of_states))
    Y2 = numpy.zeros((number_of_actions, number_of_states))
    print('{:^12} | {:^15} | {:^12} | {:^19}'.format(
        '# states (n)', '# actions (a)', 'SSPE to T',
        'SSPP from s0 to T (l=5)'))
    for alpha in range(x, number_of_actions):
        for mdp in map(lambda s: generator.complete_MDP(s, alpha),
                       range(2, number_of_states)):
            n = mdp.number_of_states
            print('{:^12d} | {:^15d} | '.format(n, alpha), end='')
            # expected cost to T
            t = Timer(verbose=False)
            with t:
                min_expected_cost(mdp, T)
            time_taken = t.interval
            Y1[alpha][n] = time_taken
            print('{:^12f} | '.format(time_taken), end='')
            # SSPP
            t = Timer(verbose=False)
            with t:
                for s in range(mdp.number_of_states):
                    force_short_paths_from(mdp, mdp.number_of_states - 1, T, 5,
                                           0)
            time_taken = t.interval
            Y2[alpha][n] = time_taken
            print('{:^19f}'.format(time_taken))
    if dimensions == 3:
        N, A = numpy.meshgrid(range(number_of_states),
                              range(number_of_actions))
        fig = plt.figure(figsize=(8, 6))
        ax = Axes3D(fig)
        ax.plot_surface(N, A, Y1, rstride=1, cstride=1, cmap='Blues_r')

        ax.set_xlabel("Nombre d'états")
        ax.set_ylabel("Nombre d'actions possibles par état")
        ax.set_zlabel("Temps (sec) pour résoudre le problème SSPE")
        ax.set_title("PDMP complet")

        plt.savefig('benchmarks/sspe1.png', dpi=300)
        ax.view_init(elev=30., azim=-114.)
        plt.savefig('benchmarks/sspe2.png', dpi=300)
        # plt.show()

        fig = plt.figure(figsize=(8, 6))
        ax = Axes3D(fig)
        ax.plot_surface(N, A, Y2, rstride=1, cstride=1, cmap='Reds_r')

        ax.set_xlabel("Nombre d'états")
        ax.set_ylabel("Nombre d'actions possibles par état")
        ax.set_zlabel("Temps (sec) pour résoudre le problème SSPP (l=5)")
        ax.set_title("PDMP complet")

        plt.savefig('benchmarks/sspp1.png', dpi=300)
        ax.view_init(elev=30., azim=-114.)
        plt.savefig('benchmarks/sspp2.png', dpi=300)
        # plt.show()

    if dimensions >= 2:
        fig = plt.figure(figsize=(8, 6))
        fig.suptitle('PDMP Complet avec |A| = %d' % int(number_of_actions - 1))
        fig.subplots_adjust(top=0.81)

        plt.plot(
            Y1[number_of_actions - 1],
            label="Problème de l'espérance du plus court chemin stochastique")
        plt.plot(
            Y2[number_of_actions - 1],
            label=
            "Problème des plus courts chemins stochastiques de taille limitée "
            "(l=5)")
        plt.xlabel("Nombre d'états")
        plt.ylabel("Temps (sec)")
        plt.legend(bbox_to_anchor=(0., 1.02, 1., .102),
                   loc=3,
                   borderaxespad=0.,
                   prop={'size': 10})
        plt.savefig('benchmarks/solvers.png', dpi=300)

        fig = plt.figure(figsize=(8, 6))
        fig.suptitle('PDMP Complet avec |A| = %d (échelle logarithmique)' %
                     int(number_of_actions - 1))
        fig.subplots_adjust(top=0.81)

        plt.plot(
            Y2[number_of_actions - 1],
            'r',
            label=
            "Problème des plus courts chemins stochastiques de taille limitée "
            "(l=5)")
        plt.xlabel("Nombre d'états")
        plt.ylabel("Temps (sec)")
        plt.legend(bbox_to_anchor=(0., 1.02, 1., .102),
                   loc=3,
                   borderaxespad=0.,
                   prop={'size': 10})
        plt.yscale('log')
        plt.savefig('benchmarks/solvers2.png', dpi=300)
Example #29
0
    def predict(self, data):
        with Timer() as t:
            self.predictions = self.test(data)

        data.learning_task = LearningTask.REGRESSION
        return t.interval
def worst_case_benchmark_sspp(number_of_states=51,
                              number_of_actions=11,
                              l_max=21,
                              dimensions=3) -> None:
    number_of_states += 1
    l_max += 1
    T = [0]
    Y = numpy.zeros((l_max, number_of_states))
    x = 2 if dimensions == 3 else number_of_states - 1
    print('Number of actions : %d' % number_of_actions)
    print('{:^12} | {:^16} | {:^19}'.format('# states (n)', 'length threshold',
                                            'SSPP from s0 to T'))
    for l in range(1, l_max):
        for mdp in map(lambda s: generator.complete_MDP(s, number_of_actions),
                       range(x, number_of_states)):
            n = mdp.number_of_states
            print('{:^12d} | {:^16d} | '.format(n, l), end='')
            # expected cost to T
            t = Timer(verbose=False)
            with t:
                force_short_paths_from(mdp, mdp.number_of_states - 1, T, l, 0)
            time_taken = t.interval
            Y[l][n] = time_taken
            print('{:^19f}'.format(time_taken))

    if dimensions == 3:
        N, L = numpy.meshgrid(range(number_of_states), range(l_max))
        fig = plt.figure(figsize=(8, 6))
        ax = Axes3D(fig)
        ax.plot_surface(N, L, Y, rstride=1, cstride=1, cmap='Greens_r')

        ax.set_xlabel("Nombre d'états")
        ax.set_ylabel("l")
        ax.set_zlabel("Temps (sec) pour résoudre le problème SSPP")
        ax.set_title("PDMP complet avec |A| = %d" % number_of_actions)

        plt.savefig('benchmarks/sspp_pseudopoly1.png', dpi=300)
        ax.view_init(elev=30., azim=-114.)
        plt.savefig('benchmarks/ssp_pseudopoly15.png', dpi=300)
        # plt.show()

    fig = plt.figure(figsize=(8, 6))
    fig.suptitle(
        'Résolution du problème SSPP pour un PDMP Complet avec |S| = %d et |A| = %d'
        % (number_of_states - 1, number_of_actions))

    plt.plot([Y[i][number_of_states - 1] for i in range(l_max)], 'g')
    plt.xlabel("Valeur numérique de l (taille de l'entrée en unaire)")
    plt.ylabel("Temps (sec)")
    plt.savefig('benchmarks/sspp_pseudopoly2.png', dpi=300)
    # plt.show()

    fig = plt.figure(figsize=(8, 6))
    fig.suptitle(
        'Résolution du problème SSPP pour un PDMP Complet avec |S| = %d et |A| = %d'
        % (number_of_states - 1, number_of_actions))

    plt.plot([0.] + list(map(lambda i: log(i) / log(2) + 1, range(1, l_max))),
             [Y[i][number_of_states - 1] for i in range(0, l_max)], 'r')

    plt.xlabel("Taille de l'entrée l en binaire")
    plt.ylabel("Temps (sec)")
    plt.savefig('benchmarks/sspp_pseudopoly3.png', dpi=300)
    # plt.show()

    fig = plt.figure(figsize=(8, 6))
    fig.suptitle(
        'Résolution du problème SSPP pour un PDMP Complet avec |S| = %d et |A| = %d'
        ' (échelle logarithmique)' % (number_of_states - 1, number_of_actions),
        fontsize=11)

    plt.plot([0.] + list(map(lambda i: log(i) / log(2) + 1, range(1, l_max))),
             [Y[i][number_of_states - 1] for i in range(0, l_max)], 'r')
    plt.yscale('log')

    plt.xlabel("Taille de l'entrée l en binaire")
    plt.ylabel("Temps (sec)")
    plt.savefig('benchmarks/sspp_pseudopoly4.png', dpi=300)