コード例 #1
0
    def __init__(self):
        super(HeteroNNBase, self).__init__()

        self.tol = None
        self.early_stop = None

        self.epochs = None
        self.batch_size = None

        self.predict_param = None
        self.hetero_nn_param = None

        self.model_builder = None

        self.batch_generator = None
        self.model = None

        self.partition = None
        self.validation_freqs = None
        self.early_stopping_rounds = None
        self.metrics = []
        self.use_first_metric_only = False

        self.data_x = []
        self.data_y = []
        self.transfer_variable = HeteroNNTransferVariable()
        self.model_param = HeteroNNParam()
        self.mode = consts.HETERO
コード例 #2
0
ファイル: hetero_nn_host.py プロジェクト: yubo1993/FATE
 def load_model(self, model_dict):
     model_dict = list(model_dict["model"].values())[0]
     param = model_dict.get(MODELPARAM)
     meta = model_dict.get(MODELMETA)
     if self.hetero_nn_param is None:
         self.hetero_nn_param = NNParameter()
         self.hetero_nn_param.check()
         self.predict_param = self.hetero_nn_param.predict_param
     self._build_model()
     self._restore_model_meta(meta)
     self._restore_model_param(param)
コード例 #3
0
ファイル: hetero_nn_host.py プロジェクト: yubo1993/FATE
class HeteroNNHost(HeteroNNBase):
    def __init__(self):
        super(HeteroNNHost, self).__init__()

        self.batch_generator = batch_generator.Host()
        self.model = None
        self.role = consts.HOST

        self.input_shape = None

    def _init_model(self, hetero_nn_param):
        super(HeteroNNHost, self)._init_model(hetero_nn_param)

    def export_model(self):
        if self.model is None:
            return

        return {MODELMETA: self._get_model_meta(),
                MODELPARAM: self._get_model_param()}

    def load_model(self, model_dict):
        model_dict = list(model_dict["model"].values())[0]
        param = model_dict.get(MODELPARAM)
        meta = model_dict.get(MODELMETA)
        if self.hetero_nn_param is None:
            self.hetero_nn_param = NNParameter()
            self.hetero_nn_param.check()
            self.predict_param = self.hetero_nn_param.predict_param
        self._build_model()
        self._restore_model_meta(meta)
        self._restore_model_param(param)

    def _build_model(self):
        self.model = model_builder("host", self.hetero_nn_param)
        self.model.set_transfer_variable(self.transfer_variable)

    def predict(self, data_inst):
        data_inst = self.align_data_header(data_inst, self._header)
        test_x = self._load_data(data_inst)
        self.set_partition(data_inst)

        self.model.predict(test_x)

    def fit(self, data_inst, validate_data=None):
        self.callback_list.on_train_begin(data_inst, validate_data)

        if not self.component_properties.is_warm_start:
            self._build_model()
            cur_epoch = 0
        else:
            self.model.warm_start()
            self.callback_warm_start_init_iter(self.history_iter_epoch)
            cur_epoch = self.history_iter_epoch + 1

        self.prepare_batch_data(self.batch_generator, data_inst)

        while cur_epoch < self.epochs:
            self.iter_epoch = cur_epoch
            for batch_idx in range(len(self.data_x)):
                self.model.train(self.data_x[batch_idx], cur_epoch, batch_idx)

            self.callback_list.on_epoch_end(cur_epoch)
            if self.callback_variables.stop_training:
                LOGGER.debug('early stopping triggered')
                break

            is_converge = self.transfer_variable.is_converge.get(idx=0,
                                                                 suffix=(cur_epoch,))

            if is_converge:
                LOGGER.debug("Training process is converged in epoch {}".format(cur_epoch))
                break

            cur_epoch += 1

        self.callback_list.on_train_end()
        # if self.validation_strategy and self.validation_strategy.has_saved_best_model():
        #     self.load_model(self.validation_strategy.cur_best_model)

    def prepare_batch_data(self, batch_generator, data_inst):
        self._header = data_inst.schema["header"]
        batch_generator.initialize_batch_generator(data_inst)
        batch_data_generator = batch_generator.generate_batch_data()

        for batch_data in batch_data_generator:
            batch_x = self._load_data(batch_data)
            self.data_x.append(batch_x)

        self.set_partition(data_inst)

    def _load_data(self, data_inst):
        data = list(data_inst.collect())
        data_keys = [key for (key, val) in data]
        data_keys_map = dict(zip(sorted(data_keys), range(len(data_keys))))
        batch_x = [None for i in range(len(data_keys))]

        for key, inst in data:
            batch_x[data_keys_map[key]] = inst.features

            if self.input_shape is None:
                self.input_shape = inst.features.shape

        batch_x = np.asarray(batch_x)

        return batch_x

    def _get_model_meta(self):
        model_meta = HeteroNNMeta()
        model_meta.batch_size = self.batch_size
        model_meta.hetero_nn_model_meta.CopyFrom(self.model.get_hetero_nn_model_meta())
        model_meta.module = 'HeteroNN'
        return model_meta

    def _get_model_param(self):
        model_param = HeteroNNParam()
        model_param.iter_epoch = self.iter_epoch
        model_param.header.extend(self._header)
        model_param.hetero_nn_model_param.CopyFrom(self.model.get_hetero_nn_model_param())
        model_param.best_iteration = self.callback_variables.best_iteration

        return model_param
コード例 #4
0
ファイル: hetero_nn_guest.py プロジェクト: yubo1993/FATE
class HeteroNNGuest(HeteroNNBase):
    def __init__(self):
        super(HeteroNNGuest, self).__init__()
        self.task_type = None
        self.converge_func = None

        self.batch_generator = batch_generator.Guest()
        self.data_keys = []

        self.model_builder = None
        self.label_dict = {}

        self.model = None
        self.role = consts.GUEST
        self.history_loss = []
        self.num_label = 2

        self.input_shape = None
        self._summary_buf = {"history_loss": [],
                             "is_converged": False,
                             "best_iteration": -1}

    def _init_model(self, hetero_nn_param):
        super(HeteroNNGuest, self)._init_model(hetero_nn_param)

        self.task_type = hetero_nn_param.task_type
        self.converge_func = converge_func_factory(self.early_stop, self.tol)

    def _build_model(self):
        # return a hetero NN model with keras backend
        self.model = model_builder("guest", self.hetero_nn_param)
        self.model.set_transfer_variable(self.transfer_variable)

    def _set_loss_callback_info(self):
        self.callback_meta("loss",
                           "train",
                           MetricMeta(name="train",
                                      metric_type="LOSS",
                                      extra_metas={"unit_name": "iters"}))

    def fit(self, data_inst, validate_data=None):
        self.callback_list.on_train_begin(data_inst, validate_data)

        # collect data from table to form data loader
        if not self.component_properties.is_warm_start:
            self._build_model()
            cur_epoch = 0
        else:
            self.model.warm_start()
            self.callback_warm_start_init_iter(self.history_iter_epoch)
            cur_epoch = self.history_iter_epoch + 1

        self.prepare_batch_data(self.batch_generator, data_inst)
        if not self.input_shape:
            self.model.set_empty()

        self._set_loss_callback_info()
        while cur_epoch < self.epochs:
            self.iter_epoch = cur_epoch
            LOGGER.debug("cur epoch is {}".format(cur_epoch))
            self.callback_list.on_epoch_begin(cur_epoch)
            epoch_loss = 0

            for batch_idx in range(len(self.data_x)):
                # hetero NN model
                batch_loss = self.model.train(self.data_x[batch_idx], self.data_y[batch_idx], cur_epoch, batch_idx)

                epoch_loss += batch_loss

            epoch_loss /= len(self.data_x)

            LOGGER.debug("epoch {}' loss is {}".format(cur_epoch, epoch_loss))

            self.callback_metric("loss",
                                 "train",
                                 [Metric(cur_epoch, epoch_loss)])

            self.history_loss.append(epoch_loss)

            self.callback_list.on_epoch_end(cur_epoch)
            if self.callback_variables.stop_training:
                LOGGER.debug('early stopping triggered')
                break

            if self.hetero_nn_param.selector_param.method:
                # when use selective bp, loss converge will be disabled
                is_converge = False
            else:
                is_converge = self.converge_func.is_converge(epoch_loss)
            self._summary_buf["is_converged"] = is_converge
            self.transfer_variable.is_converge.remote(is_converge,
                                                      role=consts.HOST,
                                                      idx=0,
                                                      suffix=(cur_epoch,))

            if is_converge:
                LOGGER.debug("Training process is converged in epoch {}".format(cur_epoch))
                break

            cur_epoch += 1

        if cur_epoch == self.epochs:
            LOGGER.debug("Training process reach max training epochs {} and not converged".format(self.epochs))

        self.callback_list.on_train_end()
        # if self.validation_strategy and self.validation_strategy.has_saved_best_model():
        #     self.load_model(self.validation_strategy.cur_best_model)

        self.set_summary(self._get_model_summary())

    @assert_io_num_rows_equal
    def predict(self, data_inst):
        data_inst = self.align_data_header(data_inst, self._header)
        keys, test_x, test_y = self._load_data(data_inst)
        self.set_partition(data_inst)

        preds = self.model.predict(test_x)

        if self.task_type == "regression":
            preds = [float(pred[0]) for pred in preds]
            predict_tb = session.parallelize(zip(keys, preds), include_key=True, partition=data_inst.partitions)
            result = self.predict_score_to_output(data_inst, predict_tb)
        else:
            if self.num_label > 2:
                preds = [list(map(float, pred)) for pred in preds]
                predict_tb = session.parallelize(zip(keys, preds), include_key=True, partition=data_inst.partitions)
                result = self.predict_score_to_output(data_inst, predict_tb, classes=list(range(self.num_label)))

            else:
                preds = [float(pred[0]) for pred in preds]
                predict_tb = session.parallelize(zip(keys, preds), include_key=True, partition=data_inst.partitions)
                threshold = self.predict_param.threshold
                result = self.predict_score_to_output(data_inst, predict_tb, classes=[0, 1], threshold=threshold)

        return result

    def export_model(self):
        if self.model is None:
            return

        return {MODELMETA: self._get_model_meta(),
                MODELPARAM: self._get_model_param()}

    def load_model(self, model_dict):

        model_dict = list(model_dict["model"].values())[0]
        param = model_dict.get(MODELPARAM)
        meta = model_dict.get(MODELMETA)
        if self.hetero_nn_param is None:
            self.hetero_nn_param = NNParameter()
            self.hetero_nn_param.check()
            self.predict_param = self.hetero_nn_param.predict_param
        self._build_model()
        self._restore_model_meta(meta)
        self._restore_model_param(param)

    def _get_model_summary(self):
        # self._summary_buf["best_iteration"] = -1 if self.validation_strategy is None else self.validation_strategy.best_iteration
        self._summary_buf["history_loss"] = self.history_loss
        if self.callback_variables.validation_summary:
            self._summary_buf["validation_metrics"] = self.callback_variables.validation_summary
        """
        if self.validation_strategy:
            validation_summary = self.validation_strategy.summary()
            if validation_summary:
                self._summary_buf["validation_metrics"] = validation_summary
        """

        return self._summary_buf

    def _get_model_meta(self):
        model_meta = HeteroNNMeta()
        model_meta.task_type = self.task_type
        model_meta.module = 'HeteroNN'
        model_meta.batch_size = self.batch_size
        model_meta.epochs = self.epochs
        model_meta.early_stop = self.early_stop
        model_meta.tol = self.tol
        # model_meta.interactive_layer_lr = self.hetero_nn_param.interacitve_layer_lr

        model_meta.hetero_nn_model_meta.CopyFrom(self.model.get_hetero_nn_model_meta())

        return model_meta

    def _get_model_param(self):
        model_param = HeteroNNParam()
        model_param.iter_epoch = self.iter_epoch
        model_param.hetero_nn_model_param.CopyFrom(self.model.get_hetero_nn_model_param())
        model_param.num_label = self.num_label
        model_param.best_iteration = self.callback_variables.best_iteration
        # model_param.best_iteration = -1 if self.validation_strategy is None else self.validation_strategy.best_iteration
        model_param.header.extend(self._header)

        for loss in self.history_loss:
            model_param.history_loss.append(loss)

        return model_param

    def get_metrics_param(self):
        if self.task_type == consts.CLASSIFICATION:
            if self.num_label == 2:
                return EvaluateParam(eval_type="binary",
                                     pos_label=1, metrics=self.metrics)
            else:
                return EvaluateParam(eval_type="multi", metrics=self.metrics)
        else:
            return EvaluateParam(eval_type="regression", metrics=self.metrics)

    def prepare_batch_data(self, batch_generator, data_inst):
        self._header = data_inst.schema["header"]
        batch_generator.initialize_batch_generator(data_inst, self.batch_size)
        batch_data_generator = batch_generator.generate_batch_data()

        for batch_data in batch_data_generator:
            keys, batch_x, batch_y = self._load_data(batch_data)
            self.data_x.append(batch_x)
            self.data_y.append(batch_y)
            self.data_keys.append(keys)

        self._convert_label()
        self.set_partition(data_inst)

    def _load_data(self, data_inst):
        data = list(data_inst.collect())
        data_keys = [key for (key, val) in data]
        data_keys_map = dict(zip(sorted(data_keys), range(len(data_keys))))

        keys = [None for idx in range(len(data_keys))]
        batch_x = [None for idx in range(len(data_keys))]
        batch_y = [None for idx in range(len(data_keys))]

        for (key, inst) in data:
            idx = data_keys_map[key]
            keys[idx] = key
            batch_x[idx] = inst.features
            batch_y[idx] = inst.label

            if self.input_shape is None:
                try:
                    self.input_shape = inst.features.shape[0]
                except AttributeError:
                    self.input_shape = 0

        batch_x = np.asarray(batch_x)
        batch_y = np.asarray(batch_y)

        return keys, batch_x, batch_y

    def _convert_label(self):
        diff_label = np.unique(np.concatenate(self.data_y))
        self.label_dict = dict(zip(diff_label, range(diff_label.shape[0])))

        transform_y = []
        self.num_label = diff_label.shape[0]

        if self.task_type == "regression" or self.num_label <= 2:
            for batch_y in self.data_y:
                new_batch_y = np.zeros((batch_y.shape[0], 1))
                for idx in range(new_batch_y.shape[0]):
                    new_batch_y[idx] = batch_y[idx]

                transform_y.append(new_batch_y)

            self.data_y = transform_y
            return

        for batch_y in self.data_y:
            new_batch_y = np.zeros((batch_y.shape[0], self.num_label))
            for idx in range(new_batch_y.shape[0]):
                y = batch_y[idx]
                new_batch_y[idx][y] = 1

            transform_y.append(new_batch_y)

        self.data_y = transform_y

    def _restore_model_param(self, param):
        super(HeteroNNGuest, self)._restore_model_param(param)
        self.num_label = param.num_label