Esempio n. 1
0
    def fit(self, data_inst):
        LOGGER.info("begin to train secureboosting guest model")
        data_inst = self.data_alignment(data_inst)
        self.convert_feature_to_bin(data_inst)
        self.sync_tree_dim()

        for i in range(self.num_trees):
            # n_tree = []
            for tidx in range(self.tree_dim):
                tree_inst = HeteroDecisionTreeHost(self.tree_param)

                tree_inst.set_inputinfo(data_bin=self.data_bin, bin_split_points=self.bin_split_points,
                                        bin_sparse_points=self.bin_sparse_points)

                valid_features = self.sample_valid_features()
                tree_inst.set_flowid(self.generate_flowid(i, tidx))
                tree_inst.set_valid_features(valid_features)

                tree_inst.fit()
                tree_meta, tree_param = tree_inst.get_model()
                self.trees_.append(tree_param)
                if self.tree_meta is None:
                    self.tree_meta = tree_meta
                # n_tree.append(tree_inst.get_tree_model())

            # self.trees_.append(n_tree)

            if self.n_iter_no_change is True:
                stop_flag = self.sync_stop_flag(i)
                if stop_flag:
                    break

        LOGGER.info("end to train secureboosting guest model")
    def fit(self, data_inst, validate_data=None):

        LOGGER.info("begin to train secureboosting guest model")
        self.gen_feature_fid_mapping(data_inst.schema)
        LOGGER.debug("schema is {}".format(data_inst.schema))
        data_inst = self.data_alignment(data_inst)
        self.convert_feature_to_bin(data_inst)
        self.sync_tree_dim()

        self.validation_strategy = self.init_validation_strategy(
            data_inst, validate_data)

        for i in range(self.num_trees):
            # n_tree = []
            for tidx in range(self.tree_dim):
                LOGGER.info(
                    "start to fit, boost round: {}, tree index: {}".format(
                        i, tidx))
                tree_inst = HeteroDecisionTreeHost(self.tree_param)

                tree_inst.set_inputinfo(
                    data_bin=self.data_bin,
                    bin_split_points=self.bin_split_points,
                    bin_sparse_points=self.bin_sparse_points)

                valid_features = self.sample_valid_features()
                tree_inst.set_flowid(self.generate_flowid(i, tidx))
                tree_inst.set_runtime_idx(
                    self.component_properties.local_partyid)
                tree_inst.set_valid_features(valid_features)

                tree_inst.fit()
                tree_meta, tree_param = tree_inst.get_model()
                self.trees_.append(tree_param)
                if self.tree_meta is None:
                    self.tree_meta = tree_meta
                # n_tree.append(tree_inst.get_tree_model())

            # self.trees_.append(n_tree)

            if self.validation_strategy:
                LOGGER.debug('host running validation')
                self.validation_strategy.validate(self, i)
                if self.validation_strategy.need_stop():
                    LOGGER.debug('early stopping triggered')
                    break

            if self.n_iter_no_change is True:
                stop_flag = self.sync_stop_flag(i)
                if stop_flag:
                    break

        if self.validation_strategy and self.validation_strategy.has_saved_best_model(
        ):
            self.load_model(self.validation_strategy.cur_best_model)

        LOGGER.info("end to train secureboosting guest model")