예제 #1
0
class Participant(  # pylint: disable=too-few-public-methods,too-many-instance-attributes
        ParticipantABC):
    """An example of a Keras implementation of a participant for federated
    learning.

    The attributes for the model and the datasets are only for
    convenience, they might as well be loaded elsewhere.

    Attributes:

        regressor: The model to be trained.
        trainset_x: A dataset for training.
        trainset_y: Labels for training.
        testset_x: A dataset for test.
        testset_y: Labels for test.
        number_samples: The number of samples in the training dataset.
        performance_metrics: metrics collected after each round of training

    """
    def __init__(self, dataset_dir: str) -> None:
        """Initialize a custom participant."""
        super().__init__()
        self.load_random_dataset(dataset_dir)
        self.regressor = Regressor(len(self.trainset_x.columns))
        self.performance_metrics: List[Tuple[float, float]] = []

    def load_random_dataset(self, dataset_dir: str) -> None:
        """Load a random dataset from the data directory"""
        i = random.randrange(0, 10, 1)

        LOG.info("Train on sample number %d", i)
        trainset_file_path = os.path.join(dataset_dir, "split_data",
                                          f"data_part_{i}.csv")

        trainset = pd.read_csv(trainset_file_path, index_col=None)
        self.trainset_x = trainset.drop("Y", axis=1)
        self.trainset_y = trainset["Y"]
        self.number_of_samples = len(trainset)

        testset_file_path = os.path.join(dataset_dir, "test.csv")
        testset = pd.read_csv(testset_file_path, index_col=None)
        testset_x = testset.drop("Y", axis=1)
        self.testset_x: pd.DataFrame = testset_x.drop(testset_x.columns[0],
                                                      axis=1)
        self.testset_y = testset["Y"]

    def train_round(self, training_input: Optional[np.ndarray]) -> np.ndarray:
        """Train a model in a federated learning round.

        A model is given in terms of its weights and the model is
        trained on the participant's dataset for a number of
        epochs. The weights of the updated model are returned.

        Args:

            weights: The weights of the model to be trained.

        Returns:

            The updated model weights .
        """
        if training_input is None:
            # This is the first round: the coordinator doesn't have a
            # global model yet, so we need to initialize the weights
            self.regressor = Regressor(len(self.trainset_x.columns))
            return self.regressor.get_weights()

        weights = training_input
        epochs = 10
        self.regressor.set_weights(weights)
        self.regressor.train_n_epochs(epochs, self.trainset_x, self.trainset_y)

        loss: float
        r_squared: float
        loss, r_squared = self.regressor.evaluate_on_test(
            self.testset_x, self.testset_y)
        LOG.info("loss = %f, R² = %f", loss, r_squared)
        self.performance_metrics.append((loss, r_squared))

        return self.regressor.get_weights()

    def deserialize_training_input(self, global_model: list) -> np.ndarray:
        return np.array(global_model)

    def serialize_training_result(self, training_result: np.ndarray) -> list:
        return training_result.tolist()

    def on_stop(self) -> None:
        table = tabulate(self.performance_metrics, headers=["Loss", "R²"])
        print(table)
예제 #2
0
class Participant(  # pylint: disable=too-few-public-methods,too-many-instance-attributes
        ParticipantABC):
    """An example of a Keras implementation of a participant for federated
    learning.

    The attributes for the model and the datasets are only for
    convenience, they might as well be loaded elsewhere.

    Attributes:

        regressor: The model to be trained.
        trainset_x: A dataset for training.
        trainset_y: Labels for training.
        testset_x: A dataset for test.
        testset_y: Labels for test.
        number_samples: The number of samples in the training dataset.
        performance_metrics: metrics collected after each round of training

    """
    def __init__(self, dataset_dir: str) -> None:
        """Initialize a custom participant.
        """
        super(Participant, self).__init__()
        self.load_random_dataset(dataset_dir)
        self.regressor = Regressor(len(self.trainset_x.columns))
        self.performance_metrics: List[Tuple[float, float]] = []

    def load_random_dataset(self, dataset_dir: str) -> None:
        """Load a random dataset from the data directory

        """
        i = random.randrange(0, 10, 1)

        LOG.info("Train on sample number %d", i)
        trainset_file_path = os.path.join(dataset_dir, "split_data",
                                          f"data_part_{i}.csv")

        trainset = pd.read_csv(trainset_file_path, index_col=None)
        self.trainset_x = trainset.drop("Y", axis=1)
        self.trainset_y = trainset["Y"]
        self.number_of_samples = len(trainset)

        testset_file_path = os.path.join(dataset_dir, "test.csv")
        testset = pd.read_csv(testset_file_path, index_col=None)
        testset_x = testset.drop("Y", axis=1)
        self.testset_x: pd.DataFrame = testset_x.drop(testset_x.columns[0],
                                                      axis=1)
        self.testset_y = testset["Y"]

    def train_round(
            self,
            training_input: Optional[np.ndarray]) -> Tuple[np.ndarray, int]:
        """Train a model in a federated learning round.

        A model is given in terms of its weights and the model is
        trained on the participant's dataset for a number of
        epochs. The weights of the updated model are returned in
        combination with the number of samples of the train dataset.

        Args:

            weights: The weights of the model to be trained.

        Returns:

            The updated model weights and the number of training samples.

        """
        if training_input is None:
            # This is the first round: the coordinator doesn't have a
            # global model yet, so we need to initialize the weights
            self.regressor = Regressor(len(self.trainset_x.columns))
            return (self.regressor.get_weights(), 0)

        weights = training_input
        # FIXME: what should this be?
        epochs = 10
        self.regressor.set_weights(weights)
        self.regressor.train_n_epochs(epochs, self.trainset_x, self.trainset_y)

        loss: float
        r_squared: float
        loss, r_squared = self.regressor.evaluate_on_test(
            self.testset_x, self.testset_y)
        LOG.info("loss = %f, R² = %f", loss, r_squared)
        self.performance_metrics.append((loss, r_squared))

        return (self.regressor.get_weights(), self.number_of_samples)

    def deserialize_training_input(self, data: bytes) -> Optional[np.ndarray]:
        if not data:
            return None

        reader = BytesIO(data)
        return np.load(reader, allow_pickle=False)

    def serialize_training_result(
            self, training_result: Tuple[np.ndarray, int]) -> bytes:
        (weights, number_of_samples) = training_result

        writer = BytesIO()
        writer.write(number_of_samples.to_bytes(4, byteorder="big"))
        np.save(writer, weights, allow_pickle=False)
        return writer.getbuffer()[:]