コード例 #1
0
 def one_vs_rest_fit(self, train_data=None):
     self.need_one_vs_rest = True
     if self.role != consts.ARBITER:
         self.header = self.get_header(train_data)
     self.one_vs_rest_obj = OneVsRest(classifier=self, role=self.role, mode=self.mode,
                                 one_vs_rest_param=self._get_one_vs_rest_param())
     self.one_vs_rest_obj.fit(data_instances=train_data)
コード例 #2
0
    def one_vs_rest_predict(self, data_instance):
        if self.mode == consts.HETERO:
            LOGGER.debug("Star intersection before predict")
            intersect_flowid = "predict_module_0"
            data_instance = self.intersect(data_instance, intersect_flowid)
            LOGGER.debug("End intersection before predict")

        # data_instance = self.feature_selection_transform(data_instance)

        # data_instance, fit_config = self.scale(data_instance)
        one_vs_rest_param = OneVsRestParam()
        self.one_vs_rest_param = self._load_param(one_vs_rest_param)
        one_vs_rest = OneVsRest(self.model, self.role, self.mode,
                                self.one_vs_rest_param)
        one_vs_rest.load_model(self.workflow_param.model_table,
                               self.workflow_param.model_namespace)
        predict_result = one_vs_rest.predict(data_instance,
                                             self.workflow_param.predict_param)

        if not predict_result:
            return None

        if predict_result.count() > 10:
            local_predict = predict_result.collect()
            n = 0
            while n < 10:
                result = local_predict.__next__()
                LOGGER.debug("predict result: {}".format(result))
                n += 1

        return predict_result
コード例 #3
0
    def _load_model(self, model_dict):
        result_obj = list(model_dict.get('model').values())[0].get(self.model_param_name)
        self.header = list(result_obj.header)
        # For hetero-lr arbiter predict function
        if self.header is None:
            return

        feature_shape = len(self.header)
        self.need_one_vs_rest = result_obj.need_one_vs_rest
        if self.need_one_vs_rest:
            self.one_vs_rest_classes = list(map(int, list(result_obj.one_vs_rest_classes)))
            weight_dict = dict(result_obj.weight)
            self.one_vs_rest_obj = OneVsRest(classifier=self, role=self.role, mode=self.mode,
                                             one_vs_rest_param=self._get_one_vs_rest_param())
            self.one_vs_rest_obj.classes = self.one_vs_rest_classes
            for class_type in self.one_vs_rest_obj.classes:
                classifier = copy.deepcopy(self)
                classifier.coef_ = np.zeros(feature_shape)
                for i, feature_name in enumerate(self.header):
                    feature_name = "_".join(["class", str(class_type), feature_name])
                    classifier.coef_[i] = weight_dict.get(feature_name)
                intercept_name =  "_".join(["class", str(class_type), "intercept"])
                classifier.intercept_ = weight_dict.get(intercept_name)
                self.one_vs_rest_obj.models.append(classifier)
        else:
            self.coef_ = np.zeros(feature_shape)
            weight_dict = dict(result_obj.weight)
            self.intercept_ = result_obj.intercept

            for idx, header_name in enumerate(self.header):
                self.coef_[idx] = weight_dict.get(header_name)
コード例 #4
0
    def train(self, train_data, validation_data=None):
        if self.mode == consts.HETERO and self.role != consts.ARBITER:
            LOGGER.debug("Enter train function")
            LOGGER.debug("Star intersection before train")
            intersect_flowid = "train_0"
            train_data = self.intersect(train_data, intersect_flowid)
            LOGGER.debug("End intersection before train")

        sample_flowid = "train_sample_0"
        train_data = self.sample(train_data, sample_flowid)

        train_data = self.feature_selection_fit(train_data)
        validation_data = self.feature_selection_transform(validation_data)

        if self.mode == consts.HETERO and self.role != consts.ARBITER:
            train_data, cols_scale_value = self.scale(train_data)

        train_data = self.one_hot_encoder_fit_transform(train_data)
        validation_data = self.one_hot_encoder_transform(validation_data)

        if self.workflow_param.one_vs_rest:
            one_vs_rest_param = OneVsRestParam()
            self.one_vs_rest_param = ParamExtract.parse_param_from_config(
                one_vs_rest_param, self.config_path)
            one_vs_rest = OneVsRest(self.model, self.role, self.mode,
                                    self.one_vs_rest_param)
            self.model = one_vs_rest

        self.model.fit(train_data)
        self.save_model()
        LOGGER.debug("finish saving, self role: {}".format(self.role))
        if self.role == consts.GUEST or self.role == consts.HOST or \
                self.mode == consts.H**O:
            eval_result = {}
            LOGGER.debug("predicting...")
            predict_result = self.model.predict(
                train_data, self.workflow_param.predict_param)

            LOGGER.debug("evaluating...")
            train_eval = self.evaluate(predict_result)
            eval_result[consts.TRAIN_EVALUATE] = train_eval
            if validation_data is not None:
                self.model.set_flowid("1")
                if self.mode == consts.HETERO:
                    LOGGER.debug("Star intersection before predict")
                    intersect_flowid = "predict_0"
                    validation_data = self.intersect(validation_data,
                                                     intersect_flowid)
                    LOGGER.debug("End intersection before predict")

                    validation_data, cols_scale_value = self.scale(
                        validation_data, cols_scale_value)

                val_pred = self.model.predict(
                    validation_data, self.workflow_param.predict_param)
                val_eval = self.evaluate(val_pred)
                eval_result[consts.VALIDATE_EVALUATE] = val_eval
            LOGGER.info("{} eval_result: {}".format(self.role, eval_result))
            self.save_eval_result(eval_result)
コード例 #5
0
    def one_vs_rest_train(self, train_data, validation_data=None):
        one_vs_rest_param = OneVsRestParam()
        self.one_vs_rest_param = ParamExtract.parse_param_from_config(one_vs_rest_param, self.config_path)
        one_vs_rest = OneVsRest(self.model, self.role, self.mode, self.one_vs_rest_param)
        LOGGER.debug("Start OneVsRest train")
        one_vs_rest.fit(train_data)
        LOGGER.debug("Start OneVsRest predict")
        one_vs_rest.predict(validation_data, self.workflow_param.predict_param)
        save_result = one_vs_rest.save_model(self.workflow_param.model_table, self.workflow_param.model_namespace)
        if save_result is None:
            return

        for meta_buffer_type, param_buffer_type in save_result:
            self.pipeline.node_meta.append(meta_buffer_type)
            self.pipeline.node_param.append(param_buffer_type)
コード例 #6
0
    def run(self, config_json, job_id):
        self._init_argument(config_json, job_id)
        if self.workflow_param.method == "train":

            # create a new pipeline

            LOGGER.debug("In running function, enter train method")
            train_data_instance = None
            predict_data_instance = None
            if self.role != consts.ARBITER:
                LOGGER.debug("Input table:{}, input namesapce: {}".format(
                    self.workflow_param.train_input_table,
                    self.workflow_param.train_input_namespace))
                train_data_instance = self.gen_data_instance(
                    self.workflow_param.train_input_table,
                    self.workflow_param.train_input_namespace)
                LOGGER.debug("gen_data_finish")
                if self.workflow_param.predict_input_table is not None and self.workflow_param.predict_input_namespace is not None:
                    LOGGER.debug("Input table:{}, input namesapce: {}".format(
                        self.workflow_param.predict_input_table,
                        self.workflow_param.predict_input_namespace))
                    predict_data_instance = self.gen_data_instance(
                        self.workflow_param.predict_input_table,
                        self.workflow_param.predict_input_namespace,
                        mode='transform')

            self.train(train_data_instance,
                       validation_data=predict_data_instance)
            self._save_pipeline()

        elif self.workflow_param.method == "predict":
            data_instance = self.gen_data_instance(
                self.workflow_param.predict_input_table,
                self.workflow_param.predict_input_namespace,
                mode='transform')
            if self.workflow_param.one_vs_rest:
                one_vs_rest_param = OneVsRestParam()
                self.one_vs_rest_param = self._load_param(one_vs_rest_param)
                one_vs_rest = OneVsRest(self.model, self.role, self.mode,
                                        self.one_vs_rest_param)
                self.model = one_vs_rest
            self.load_model()
            self.predict(data_instance)

        elif self.workflow_param.method == "intersect":
            LOGGER.debug(
                "[Intersect]Input table:{}, input namesapce: {}".format(
                    self.workflow_param.data_input_table,
                    self.workflow_param.data_input_namespace))
            data_instance = self.gen_data_instance(
                self.workflow_param.data_input_table,
                self.workflow_param.data_input_namespace)

            self.intersect(data_instance)

        elif self.workflow_param.method == "cross_validation":
            data_instance = None
            if self.role != consts.ARBITER:
                data_instance = self.gen_data_instance(
                    self.workflow_param.data_input_table,
                    self.workflow_param.data_input_namespace)
            self.cross_validation(data_instance)

        elif self.workflow_param.method == "one_vs_rest_train":
            LOGGER.debug("In running function, enter one_vs_rest method")
            train_data_instance = None
            predict_data_instance = None
            if self.role != consts.ARBITER:
                LOGGER.debug("Input table:{}, input namesapce: {}".format(
                    self.workflow_param.train_input_table,
                    self.workflow_param.train_input_namespace))
                train_data_instance = self.gen_data_instance(
                    self.workflow_param.train_input_table,
                    self.workflow_param.train_input_namespace)
                LOGGER.debug("gen_data_finish")
                if self.workflow_param.predict_input_table is not None and self.workflow_param.predict_input_namespace is not None:
                    LOGGER.debug("Input table:{}, input namesapce: {}".format(
                        self.workflow_param.predict_input_table,
                        self.workflow_param.predict_input_namespace))
                    predict_data_instance = self.gen_data_instance(
                        self.workflow_param.predict_input_table,
                        self.workflow_param.predict_input_namespace)

            self.one_vs_rest_train(train_data_instance,
                                   validation_data=predict_data_instance)
            # self.one_vs_rest_predict(predict_data_instance)
            self._save_pipeline()

        else:
            raise TypeError("method %s is not support yet" %
                            (self.workflow_param.method))

        LOGGER.debug("run_DONE")
コード例 #7
0
    def run(self):
        self._init_argument()

        if self.workflow_param.method == "train":

            # create a new pipeline

            LOGGER.debug("In running function, enter train method")
            train_data_instance = None
            predict_data_instance = None
            if self.role != consts.ARBITER:
                LOGGER.debug("Input table:{}, input namesapce: {}".format(
                    self.workflow_param.train_input_table, self.workflow_param.train_input_namespace
                ))
                train_data_instance = self.gen_data_instance(self.workflow_param.train_input_table,
                                                             self.workflow_param.train_input_namespace)
                LOGGER.debug("gen_data_finish")
                if self.workflow_param.predict_input_table is not None and self.workflow_param.predict_input_namespace is not None:
                    LOGGER.debug("Input table:{}, input namesapce: {}".format(
                        self.workflow_param.predict_input_table, self.workflow_param.predict_input_namespace
                    ))
                    predict_data_instance = self.gen_data_instance(self.workflow_param.predict_input_table,
                                                                   self.workflow_param.predict_input_namespace,
                                                                   mode='transform')

            self.train(train_data_instance, validation_data=predict_data_instance)
            self._save_pipeline()

        elif self.workflow_param.method == 'neighbors_sampling':
            LOGGER.debug("In running function, enter neighbors sampling")
            
            LOGGER.debug("[Neighbors sampling]Input table:{}, input namespace:{}".format(
                self.workflow_param.data_input_table,
                self.workflow_param.data_input_namespace
            ))
            data_instance = self.gen_data_instance(self.workflow_param.data_input_table,
                                                   self.workflow_param.data_input_namespace)
            
            LOGGER.info("{}".format(self.workflow_param.local_samples_namespace))
            LOGGER.info("{}".format(self.workflow_param.distributed_samples_namespace))

            adj_instances = data_instance
            intersect_flowid = 'neigh_sam_intersect_0'
            common_instance = self.intersect(data_instance, intersect_flowid)
            LOGGER.info("The number of common nodes: {}".format(common_instance.count()))

            local_instances = self.neighbors_sampler.local_neighbors_sampling(adj_instances, self.role)
            # persistent
            local_instances.save_as(name=self.role,
                                    namespace=self.workflow_param.local_samples_namespace,
                                    partition=10)
            
            bridge_instances = NeighborsSampling.get_bridge_nodes(common_instance)

            intersect_flowid_2 = 'neigh_sam_intersect_1'
                
            bridge_instances = self.intersect(bridge_instances, intersect_flowid_2)

            logDtableInstances(LOGGER, bridge_instances, 5)
            
            
            distributed_instances_target, distributed_instances_anchor = self.neighbors_sampler.distributed_neighbors_sampling(bridge_instances, adj_instances)
            
            distributed_instances_target.save_as(name="target",
                                              namespace=self.workflow_param.distributed_samples_namespace + "/" 
                                              + self.role,
                                              partition=10) 
            
            distributed_instances_anchor.save_as(name='anchor',
                                              namespace=self.workflow_param.distributed_samples_namespace + "/" 
                                              + self.role,
                                              partition=10)

            if self.role == 'host':
                LOGGER.info("Neighbors_sampling_finish")

        elif self.workflow_param.method == "predict":
            data_instance = self.gen_data_instance(self.workflow_param.predict_input_table,
                                                   self.workflow_param.predict_input_namespace,
                                                   mode='transform')

            if self.workflow_param.one_vs_rest:
                one_vs_rest_param = OneVsRestParam()
                self.one_vs_rest_param = ParamExtract.parse_param_from_config(one_vs_rest_param, self.config_path)
                one_vs_rest = OneVsRest(self.model, self.role, self.mode, self.one_vs_rest_param)
                self.model = one_vs_rest

            self.load_model()
            self.predict(data_instance)

        elif self.workflow_param.method == "intersect":
            LOGGER.debug("[Intersect]Input table:{}, input namespace: {}".format(
                self.workflow_param.data_input_table,
                self.workflow_param.data_input_namespace
            ))
            data_instance = self.gen_data_instance(self.workflow_param.data_input_table,
                                                   self.workflow_param.data_input_namespace)
            self.intersect(data_instance)                                                                               


        elif self.workflow_param.method == "cross_validation":
            data_instance = None
            if self.role != consts.ARBITER:
                data_instance = self.gen_data_instance(self.workflow_param.data_input_table,
                                                       self.workflow_param.data_input_namespace)
            self.cross_validation(data_instance)
        elif self.workflow_param.method == "one_vs_rest_train":
            LOGGER.debug("In running function, enter one_vs_rest method")
            train_data_instance = None
            predict_data_instance = None
            if self.role != consts.ARBITER:
                LOGGER.debug("Input table:{}, input namesapce: {}".format(
                    self.workflow_param.train_input_table, self.workflow_param.train_input_namespace
                ))
                train_data_instance = self.gen_data_instance(self.workflow_param.train_input_table,
                                                             self.workflow_param.train_input_namespace)
                LOGGER.debug("gen_data_finish")
                if self.workflow_param.predict_input_table is not None and self.workflow_param.predict_input_namespace is not None:
                    LOGGER.debug("Input table:{}, input namesapce: {}".format(
                        self.workflow_param.predict_input_table, self.workflow_param.predict_input_namespace
                    ))
                    predict_data_instance = self.gen_data_instance(self.workflow_param.predict_input_table,
                                                                   self.workflow_param.predict_input_namespace)

            self.one_vs_rest_train(train_data_instance, validation_data=predict_data_instance)
            # self.one_vs_rest_predict(predict_data_instance)
            self._save_pipeline()

        else:
            raise TypeError("method %s is not support yet" % (self.workflow_param.method))
コード例 #8
0
class BaseLogisticRegression(ModelBase):
    def __init__(self):
        super(BaseLogisticRegression, self).__init__()
        self.model_param = LogisticParam()
        # attribute:
        self.n_iter_ = 0
        self.coef_ = None
        self.intercept_ = 0
        self.classes_ = None
        self.feature_shape = None

        self.gradient_operator = None
        self.initializer = Initializer()
        self.transfer_variable = None
        self.loss_history = []
        self.is_converged = False
        self.header = None
        self.class_name = self.__class__.__name__
        self.model_name = 'LogisticRegression'
        self.model_param_name = 'LogisticRegressionParam'
        self.model_meta_name = 'LogisticRegressionMeta'
        self.role = ''
        self.mode = ''
        self.schema = {}

        # one_ve_rest parameter
        self.need_one_vs_rest = False
        self.one_vs_rest_classes = []
        self.one_vs_rest_obj = None

    def _init_model(self, params):
        self.model_param = params
        self.alpha = params.alpha
        self.init_param_obj = params.init_param
        self.fit_intercept = self.init_param_obj.fit_intercept
        self.learning_rate = params.learning_rate
        self.encrypted_mode_calculator_param = params.encrypted_mode_calculator_param
        self.encrypted_calculator = None

        if params.penalty == consts.L1_PENALTY:
            self.updater = L1Updater(self.alpha, self.learning_rate)
        elif params.penalty == consts.L2_PENALTY:
            self.updater = L2Updater(self.alpha, self.learning_rate)
        else:
            self.updater = None

        self.eps = params.eps
        self.batch_size = params.batch_size
        self.max_iter = params.max_iter
        self.learning_rate = params.learning_rate
        self.party_weight = params.party_weight
        self.penalty = params.penalty

        if params.encrypt_param.method == consts.PAILLIER:
            self.encrypt_operator = PaillierEncrypt()
        else:
            self.encrypt_operator = FakeEncrypt()

        if params.converge_func == 'diff':
            self.converge_func = convergence.DiffConverge(eps=self.eps)
        elif params.converge_func == 'weight_diff':
            self.converge_func = convergence.WeightDiffConverge(eps=self.eps)
        else:
            self.converge_func = convergence.AbsConverge(eps=self.eps)

        self.re_encrypt_batches = params.re_encrypt_batches
        self.predict_param = params.predict_param
        self.optimizer = Optimizer(params.learning_rate, params.optimizer)
        self.key_length = params.encrypt_param.key_length

    def set_feature_shape(self, feature_shape):
        self.feature_shape = feature_shape

    def set_header(self, header):
        self.header = header

    def get_features_shape(self, data_instances):
        if self.feature_shape is not None:
            return self.feature_shape
        return data_overview.get_features_shape(data_instances)

    def get_header(self, data_instances):
        if self.header is not None:
            return self.header
        return data_instances.schema.get("header")

    def compute_wx(self, data_instances, coef_, intercept_=0):
        return data_instances.mapValues(lambda v: np.dot(v.features, coef_) + intercept_)

    def update_model(self, gradient):
        if self.fit_intercept:
            if self.updater is not None:
                self.coef_ = self.updater.update_coef(self.coef_, gradient[:-1])
            else:
                self.coef_ = self.coef_ - gradient[:-1]
            self.intercept_ -= gradient[-1]

        else:
            if self.updater is not None:
                self.coef_ = self.updater.update_coef(self.coef_, gradient)
            else:
                self.coef_ = self.coef_ - gradient

    def merge_model(self):
        w = self.coef_.copy()
        if self.fit_intercept:
            w = np.append(w, self.intercept_)
        return w

    def set_coef_(self, w):
        self.coef_ = []
        self.intercept_ = []
        if self.fit_intercept:
            self.coef_ = w[: -1]
            self.intercept_ = w[-1]
        else:
            self.coef_ = w
            self.intercept_ = 0

        LOGGER.debug("In set_coef_, coef: {}, intercept: {}, fit_intercept: {}".format(
            self.coef_, self.intercept_, self.fit_intercept
        ))

    def classified(self, prob_table, threshold):
        """
        convert a probability table into a predicted class table.
        """
        predict_table = prob_table.mapValues(lambda x: 1 if x > threshold else 0)
        return predict_table

    def fit(self, data_instance):
        pass

    def _get_meta(self):
        meta_protobuf_obj = lr_model_meta_pb2.LRModelMeta(penalty=self.model_param.penalty,
                                                          eps=self.eps,
                                                          alpha=self.alpha,
                                                          optimizer=self.model_param.optimizer,
                                                          party_weight=self.model_param.party_weight,
                                                          batch_size=self.batch_size,
                                                          learning_rate=self.learning_rate,
                                                          max_iter=self.max_iter,
                                                          converge_func=self.model_param.converge_func,
                                                          re_encrypt_batches=self.re_encrypt_batches)
        return meta_protobuf_obj

    def _get_param(self):
        header = self.header
        LOGGER.debug("In get_param, header: {}".format(header))
        if header is None:
            param_protobuf_obj = lr_model_param_pb2.LRModelParam()
            return param_protobuf_obj
        if self.need_one_vs_rest:
            one_vs_rest_class = list(map(str, self.one_vs_rest_obj.classes))
        else:
            one_vs_rest_class = None

        weight_dict = {}
        for idx, header_name in enumerate(header):
            if self.need_one_vs_rest:
                for class_idx, class_obj in enumerate(self.one_vs_rest_obj.models):
                    coef = class_obj.coef_[idx]
                    class_type = one_vs_rest_class[class_idx]
                    class_and_header_name = "_".join(["class", str(class_type), header_name])
                    weight_dict[class_and_header_name] = coef
            else:
                coef_i = self.coef_[idx]
                weight_dict[header_name] = coef_i

        if self.need_one_vs_rest:
            for class_idx, class_obj in enumerate(self.one_vs_rest_obj.models):
                intercept = class_obj.intercept_
                class_type = one_vs_rest_class[class_idx]
                intercept_name = "_".join(["class", str(class_type), "intercept"])
                weight_dict[intercept_name] = intercept

            self.intercept_ = 0

        param_protobuf_obj = lr_model_param_pb2.LRModelParam(iters=self.n_iter_,
                                                             loss_history=self.loss_history,
                                                             is_converged=self.is_converged,
                                                             weight=weight_dict,
                                                             intercept=self.intercept_,
                                                             header=header,
                                                             need_one_vs_rest=self.need_one_vs_rest,
                                                             one_vs_rest_classes=one_vs_rest_class
                                                             )
        json_result = json_format.MessageToJson(param_protobuf_obj)
        LOGGER.debug("json_result: {}".format(json_result))
        return param_protobuf_obj

    def export_model(self):
        meta_obj = self._get_meta()
        param_obj = self._get_param()
        result = {
            self.model_meta_name: meta_obj,
            self.model_param_name: param_obj
        }
        return result

    def _load_model(self, model_dict):
        result_obj = list(model_dict.get('model').values())[0].get(self.model_param_name)
        self.header = list(result_obj.header)
        # For hetero-lr arbiter predict function
        if self.header is None:
            return

        feature_shape = len(self.header)
        self.need_one_vs_rest = result_obj.need_one_vs_rest
        if self.need_one_vs_rest:
            self.one_vs_rest_classes = list(map(int, list(result_obj.one_vs_rest_classes)))
            weight_dict = dict(result_obj.weight)
            self.one_vs_rest_obj = OneVsRest(classifier=self, role=self.role, mode=self.mode,
                                             one_vs_rest_param=self._get_one_vs_rest_param())
            self.one_vs_rest_obj.classes = self.one_vs_rest_classes
            for class_type in self.one_vs_rest_obj.classes:
                classifier = copy.deepcopy(self)
                classifier.coef_ = np.zeros(feature_shape)
                for i, feature_name in enumerate(self.header):
                    feature_name = "_".join(["class", str(class_type), feature_name])
                    classifier.coef_[i] = weight_dict.get(feature_name)
                intercept_name =  "_".join(["class", str(class_type), "intercept"])
                classifier.intercept_ = weight_dict.get(intercept_name)
                self.one_vs_rest_obj.models.append(classifier)
        else:
            self.coef_ = np.zeros(feature_shape)
            weight_dict = dict(result_obj.weight)
            self.intercept_ = result_obj.intercept

            for idx, header_name in enumerate(self.header):
                self.coef_[idx] = weight_dict.get(header_name)

    def _abnormal_detection(self, data_instances):
        """
        Make sure input data_instances is valid.
        """
        abnormal_detection.empty_table_detection(data_instances)
        abnormal_detection.empty_feature_detection(data_instances)

    def update_local_model(self, fore_gradient, data_inst, coef, **training_info):
        """
        update local model that transforms features of raw input

        This 'update_local_model' function serves as a handler on updating local model that transforms features of raw
        input into more representative features. We typically adopt neural networks as the local model, which is
        typically updated/trained based on stochastic gradient descent algorithm. For concrete implementation, please
        refer to 'hetero_dnn_logistic_regression' folder.

        For this particular class (i.e., 'BaseLogisticRegression') that serves as a base class for neural-networks-based
        hetero-logistic-regression model, the 'update_local_model' function will do nothing. In other words, no updating
        performed on the local model since there is no one.

        Parameters:
        ___________
        :param fore_gradient: a table holding fore gradient
        :param data_inst: a table holding instances of raw input of guest side
        :param coef: coefficients of logistic regression model
        :param training_info: a dictionary holding training information
        """
        pass

    def transform(self, data_inst):
        """
        transform features of instances held by 'data_inst' table into more representative features

        This 'transform' function serves as a handler on transforming/extracting features from raw input 'data_inst' of
        guest. It returns a table that holds instances with transformed features. In theory, we can use any model to
        transform features. Particularly, we would adopt neural network models such as auto-encoder or CNN to perform
        the feature transformation task. For concrete implementation, please refer to 'hetero_dnn_logistic_regression'
        folder.

        For this particular class (i.e., 'BaseLogisticRegression') that serves as a base class for neural-networks-based
        hetero-logistic-regression model, the 'transform' function will do nothing but return whatever that has been
        passed to it. In other words, no feature transformation performed on the raw input of guest.

        Parameters:
        ___________
        :param data_inst: a table holding instances of raw input of guest side
        :return: a table holding instances with transformed features
        """
        return data_inst

    def cross_validation(self, data_instances):
        if not self.need_run:
            return data_instances
        kflod_obj = KFold()
        self.init_schema(data_instances)
        cv_param = self._get_cv_param()
        kflod_obj.run(cv_param, data_instances, self)
        LOGGER.debug("Finish kflod run")
        return data_instances

    def one_vs_rest_fit(self, train_data=None):
        self.need_one_vs_rest = True
        if self.role != consts.ARBITER:
            self.header = self.get_header(train_data)
        self.one_vs_rest_obj = OneVsRest(classifier=self, role=self.role, mode=self.mode,
                                    one_vs_rest_param=self._get_one_vs_rest_param())
        self.one_vs_rest_obj.fit(data_instances=train_data)

    def one_vs_rest_predict(self, validate_data):
        if not self.one_vs_rest_obj:
            LOGGER.warning("Not one_vs_rest fit before, return now")

        return self.one_vs_rest_obj.predict(data_instances=validate_data)

    def _get_one_vs_rest_param(self):
        return self.model_param.one_vs_rest_param

    def _get_cv_param(self):
        self.model_param.cv_param.role = self.role
        self.model_param.cv_param.mode = self.mode
        return self.model_param.cv_param

    def set_schema(self, data_instance, header=None):
        if header is None:
            self.schema["header"] = self.header
        else:
            self.schema["header"] = header
        data_instance.schema = self.schema
        return data_instance

    def init_schema(self, data_instance):
        if data_instance is None:
            return
        self.schema = data_instance.schema
        self.header = self.schema.get('header')