def predict(self, data_inst, predict_param=None):
        LOGGER.info("start predict")
        cache_dataset_key = self.predict_data_cache.get_data_key(data_inst)
        if cache_dataset_key in self.data_alignment_map:
            data_inst = self.data_alignment_map[cache_dataset_key]
        else:
            data_inst = self.data_alignment(data_inst)
            header = [None] * len(self.feature_name_fid_mapping)
            for idx, col in self.feature_name_fid_mapping.items():
                header[idx] = col
            data_inst = data_overview.header_alignment(data_inst, header)
            self.data_alignment_map[cache_dataset_key] = data_inst

        rounds = len(self.trees_) // self.tree_dim
        predict_start_round = self.sync_predict_start_round()
        for i in range(predict_start_round, rounds):
            # n_tree = self.trees_[i]
            for tidx in range(self.tree_dim):
                LOGGER.info(
                    "start to predict, boost round: {}, tree index: {}".format(
                        i, tidx))
                tree_inst = HeteroDecisionTreeHost(self.tree_param)
                tree_inst.load_model(self.tree_meta,
                                     self.trees_[i * self.tree_dim + tidx])
                # tree_inst.set_tree_model(self.trees_[i * self.tree_dim + tidx])
                tree_inst.set_flowid(self.generate_flowid(i, tidx))
                tree_inst.set_runtime_idx(
                    self.component_properties.local_partyid)

                tree_inst.predict(data_inst)

        LOGGER.info("end predict")
    def fit(self, data_inst):
        LOGGER.info("begin to train secureboosting guest model")
        self.gen_feature_fid_mapping(data_inst.schema)
        LOGGER.debug("schema is {}".format(data_inst.schema))
        data_inst = self.data_alignment(data_inst)
        self.convert_feature_to_bin(data_inst)
        self.sync_tree_dim()

        for i in range(self.num_trees):
            for tidx in range(self.tree_dim):
                tree_inst = HeteroDecisionTreeHost(self.tree_param)

                tree_inst.set_inputinfo(data_bin=self.data_bin, bin_split_points=self.bin_split_points,
                                        bin_sparse_points=self.bin_sparse_points)

                valid_features = self.sample_valid_features()
                tree_inst.set_flowid(self.generate_flowid(i, tidx))
                tree_inst.set_runtime_idx(self.runtime_idx)
                tree_inst.set_valid_features(valid_features)

                tree_inst.fit()
                tree_meta, tree_param = tree_inst.get_model()
                self.trees_.append(tree_param)
                if self.tree_meta is None:
                    self.tree_meta = tree_meta


            if self.n_iter_no_change is True:
                stop_flag = self.sync_stop_flag(i)
                if stop_flag:
                    break

        LOGGER.info("end to train secureboosting guest model")
    def fit(self, data_inst, validate_data=None):

        LOGGER.info("begin to train secureboosting guest model")
        self.gen_feature_fid_mapping(data_inst.schema)
        LOGGER.debug("schema is {}".format(data_inst.schema))
        data_inst = self.data_alignment(data_inst)
        self.convert_feature_to_bin(data_inst)
        self.sync_tree_dim()

        self.validation_strategy = self.init_validation_strategy(
            data_inst, validate_data)

        for i in range(self.num_trees):
            # n_tree = []
            for tidx in range(self.tree_dim):
                LOGGER.info(
                    "start to fit, boost round: {}, tree index: {}".format(
                        i, tidx))
                tree_inst = HeteroDecisionTreeHost(self.tree_param)

                tree_inst.set_inputinfo(
                    data_bin=self.data_bin,
                    bin_split_points=self.bin_split_points,
                    bin_sparse_points=self.bin_sparse_points)

                valid_features = self.sample_valid_features()
                tree_inst.set_flowid(self.generate_flowid(i, tidx))
                tree_inst.set_runtime_idx(
                    self.component_properties.local_partyid)
                tree_inst.set_valid_features(valid_features)

                tree_inst.fit()
                tree_meta, tree_param = tree_inst.get_model()
                self.trees_.append(tree_param)
                if self.tree_meta is None:
                    self.tree_meta = tree_meta
                # n_tree.append(tree_inst.get_tree_model())

            # self.trees_.append(n_tree)

            if self.validation_strategy:
                LOGGER.debug('host running validation')
                self.validation_strategy.validate(self, i)
                if self.validation_strategy.need_stop():
                    LOGGER.debug('early stopping triggered')
                    break

            if self.n_iter_no_change is True:
                stop_flag = self.sync_stop_flag(i)
                if stop_flag:
                    break

        if self.validation_strategy and self.validation_strategy.has_saved_best_model(
        ):
            self.load_model(self.validation_strategy.cur_best_model)

        LOGGER.info("end to train secureboosting guest model")
    def predict_f_value(self, data_inst, predict_start_round, rounds):
        for i in range(predict_start_round, rounds):
            # n_tree = self.trees_[i]
            for tidx in range(self.tree_dim):
                tree_inst = HeteroDecisionTreeHost(self.tree_param)
                tree_inst.load_model(self.tree_meta,
                                     self.trees_[i * self.tree_dim + tidx])
                # tree_inst.set_tree_model(self.trees_[i * self.tree_dim + tidx])
                tree_inst.set_flowid(self.generate_flowid(i, tidx))
                tree_inst.set_runtime_idx(
                    self.component_properties.local_partyid)

                tree_inst.predict(data_inst)
    def predict(self, data_inst, predict_param=None):
        LOGGER.info("start predict")
        data_inst = self.data_alignment(data_inst)
        rounds = len(self.trees_) // self.tree_dim
        for i in range(rounds):
            for tidx in range(self.tree_dim):
                tree_inst = HeteroDecisionTreeHost(self.tree_param)
                tree_inst.load_model(self.tree_meta, self.trees_[i * self.tree_dim + tidx])
                tree_inst.set_flowid(self.generate_flowid(i, tidx))
                tree_inst.set_runtime_idx(self.runtime_idx)

                tree_inst.predict(data_inst)

        LOGGER.info("end predict")
Exemple #6
0
    def predict(self, data_inst, predict_param=None):
        LOGGER.info("start predict")
        data_inst = self.data_alignment(data_inst)
        rounds = len(self.trees_) // self.tree_dim
        predict_start_round = self.sync_predict_start_round()
        for i in range(predict_start_round, rounds):
            # n_tree = self.trees_[i]
            for tidx in range(self.tree_dim):
                tree_inst = HeteroDecisionTreeHost(self.tree_param)
                tree_inst.load_model(self.tree_meta,
                                     self.trees_[i * self.tree_dim + tidx])
                # tree_inst.set_tree_model(self.trees_[i * self.tree_dim + tidx])
                tree_inst.set_flowid(self.generate_flowid(i, tidx))
                tree_inst.set_runtime_idx(
                    self.component_properties.local_partyid)

                tree_inst.predict(data_inst)

        LOGGER.info("end predict")