Example #1
0
class TestHeteroLogisticGradient(unittest.TestCase):
    def setUp(self):
        self.paillier_encrypt = PaillierEncrypt()
        self.paillier_encrypt.generate_key()
        # self.hetero_lr_gradient = HeteroLogisticGradient(self.paillier_encrypt)
        self.hetero_lr_gradient = hetero_lr_gradient_and_loss.Guest()

        size = 10
        self.en_wx = session.parallelize([self.paillier_encrypt.encrypt(i) for i in range(size)],
                                         partition=48,
                                         include_key=False)
        # self.en_wx = session.parallelize([self.paillier_encrypt.encrypt(i) for i in range(size)])

        self.en_sum_wx_square = session.parallelize([self.paillier_encrypt.encrypt(np.square(i)) for i in range(size)],
                                                    partition=48,
                                                    include_key=False)
        self.wx = np.array([i for i in range(size)])
        self.w = self.wx / np.array([1 for _ in range(size)])
        self.data_inst = session.parallelize(
            [Instance(features=np.array([1 for _ in range(size)]), label=pow(-1, i % 2)) for i in range(size)],
            partition=48, include_key=False)

        # test fore_gradient
        self.fore_gradient_local = [-0.5, 0.75, 0, 1.25, 0.5, 1.75, 1, 2.25, 1.5, 2.75]
        # test gradient
        self.gradient = [1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125]
        self.gradient_fit_intercept = [1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125]

        self.loss = 4.505647

    def test_compute_partition_gradient(self):
        fore_gradient = self.en_wx.join(self.data_inst, lambda wx, d: 0.25 * wx - 0.5 * d.label)
        sparse_data = self._make_sparse_data()
        gradient_computer = hetero_linear_model_gradient.HeteroGradientBase()
        for fit_intercept in [True, False]:
            dense_result = gradient_computer.compute_gradient(self.data_inst, fore_gradient, fit_intercept)
            dense_result = [self.paillier_encrypt.decrypt(iterator) for iterator in dense_result]
            if fit_intercept:
                self.assertListEqual(dense_result, self.gradient_fit_intercept)
            else:
                self.assertListEqual(dense_result, self.gradient)
            sparse_result = gradient_computer.compute_gradient(sparse_data, fore_gradient, fit_intercept)
            sparse_result = [self.paillier_encrypt.decrypt(iterator) for iterator in sparse_result]
            self.assertListEqual(dense_result, sparse_result)

    def _make_sparse_data(self):
        def trans_sparse(instance):
            dense_features = instance.features
            indices = [i for i in range(len(dense_features))]
            sparse_features = SparseVector(indices=indices, data=dense_features, shape=len(dense_features))
            return Instance(inst_id=None,
                            features=sparse_features,
                            label=instance.label)

        return self.data_inst.mapValues(trans_sparse)
Example #2
0
    def test_cipher_add_sub_mul(self):

        encrypter = PaillierEncrypt()
        encrypter.generate_key(1024)
        en_1, en_2, en_3, en_4 = encrypter.encrypt(1), encrypter.encrypt(
            2), encrypter.encrypt(3), encrypter.encrypt(4)
        en_5, en_6, en_7, en_8 = encrypter.encrypt(5), encrypter.encrypt(
            6), encrypter.encrypt(7), encrypter.encrypt(8)
        a = PackingCipherTensor([en_1, en_2, en_3, en_4])
        b = PackingCipherTensor([en_5, en_6, en_7, en_8])
        c = PackingCipherTensor(encrypter.encrypt(1))
        d = PackingCipherTensor([encrypter.encrypt(5)])

        rs_1 = a + b
        rs_2 = b - a
        rs_3 = c + d
        rs_4 = 123 * c
        rs_5 = d * 456
        rs_6 = a * 114
        print(encrypter.recursive_decrypt(rs_1.ciphers))
        print(encrypter.recursive_decrypt(rs_2.ciphers))
        print(encrypter.recursive_decrypt(rs_3.ciphers))
        print(encrypter.decrypt(rs_4.ciphers))
        print(encrypter.decrypt(rs_5.ciphers))
        print(encrypter.recursive_decrypt(rs_6.ciphers))
        print('cipher test done')
        print('*' * 30)
Example #3
0
class TestHeteroLogisticGradient(unittest.TestCase):
    def setUp(self):
        self.paillier_encrypt = PaillierEncrypt()
        self.paillier_encrypt.generate_key()
        self.hetero_lr_gradient = HeteroLogisticGradient(self.paillier_encrypt)

        size = 10
        self.wx = eggroll.parallelize([self.paillier_encrypt.encrypt(i) for i in range(size)])
        self.en_sum_wx_square = eggroll.parallelize([self.paillier_encrypt.encrypt(np.square(i)) for i in range(size)])
        self.w = [i for i in range(size)]
        self.data_inst = eggroll.parallelize(
            [Instance(features=[1 for _ in range(size)], label=pow(-1, i % 2)) for i in range(size)], partition=1)

        # test fore_gradient
        self.fore_gradient_local = [-0.5, 0.75, 0, 1.25, 0.5, 1.75, 1, 2.25, 1.5, 2.75]
        # test gradient
        self.gradient = [1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125]
        self.gradient_fit_intercept = [1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125]

        self.loss = 4.505647

    def test_compute_fore_gradient(self):
        fore_gradient = self.hetero_lr_gradient.compute_fore_gradient(self.data_inst, self.wx)
        fore_gradient_local = [self.paillier_encrypt.decrypt(iterator[1]) for iterator in fore_gradient.collect()]

        self.assertListEqual(fore_gradient_local, self.fore_gradient_local)

    def test_compute_gradient(self):
        fore_gradient = self.hetero_lr_gradient.compute_fore_gradient(self.data_inst, self.wx)

        gradient = self.hetero_lr_gradient.compute_gradient(self.data_inst, fore_gradient, fit_intercept=False)
        de_gradient = [self.paillier_encrypt.decrypt(iterator) for iterator in gradient]
        self.assertListEqual(de_gradient, self.gradient)

        gradient = self.hetero_lr_gradient.compute_gradient(self.data_inst, fore_gradient, fit_intercept=True)
        de_gradient = [self.paillier_encrypt.decrypt(iterator) for iterator in gradient]
        self.assertListEqual(de_gradient, self.gradient_fit_intercept)

    def test_compute_gradient_and_loss(self):
        fore_gradient = self.hetero_lr_gradient.compute_fore_gradient(self.data_inst, self.wx)
        gradient, loss = self.hetero_lr_gradient.compute_gradient_and_loss(self.data_inst, fore_gradient, self.wx,
                                                                           self.en_sum_wx_square, False)
        de_gradient = [self.paillier_encrypt.decrypt(i) for i in gradient]
        self.assertListEqual(de_gradient, self.gradient)

        diff_loss = np.abs(self.loss - self.paillier_encrypt.decrypt(loss))
        self.assertLess(diff_loss, 1e-5)
Example #4
0
    def test_diff_mode(self, round=10, mode="strict", re_encrypted_rate=0.2):
        from federatedml.secureprotol.encrypt_mode import EncryptModeCalculator
        from federatedml.secureprotol import PaillierEncrypt
        encrypter = PaillierEncrypt()
        encrypter.generate_key(1024)
        encrypted_calculator = EncryptModeCalculator(encrypter, mode, re_encrypted_rate)        

        for i in range(round):
            data_i = self.data_numpy.mapValues(lambda v: v + i)
            data_i = encrypted_calculator.encrypt(data_i)
            decrypt_data_i = dict(data_i.mapValues(lambda arr: np.array([encrypter.decrypt(val) for val in arr])).collect())
            for j in range(30):
                self.assertTrue(np.fabs(self.numpy_data[j] - decrypt_data_i[j] + i).all() < 1e-5)
Example #5
0
class HeteroFeatureBinningGuest(BaseHeteroFeatureBinning):
    def __init__(self):
        super(HeteroFeatureBinningGuest, self).__init__()

        self.encryptor = PaillierEncrypt()
        self.encryptor.generate_key()
        self.local_transform_result = None
        self.party_name = consts.GUEST
        # self._init_binning_obj()

    def fit(self, data_instances):
        """
        Apply binning method for both data instances in local party as well as the other one. Afterwards, calculate
        the specific metric value for specific columns. Currently, iv is support for binary labeled data only.
        """
        LOGGER.info("Start feature binning fit and transform")
        self._abnormal_detection(data_instances)
        self._parse_cols(data_instances)

        self.binning_obj.fit_split_points(data_instances)
        LOGGER.debug("After fit, binning_obj split_points: {}".format(
            self.binning_obj.split_points))

        is_binary_data = data_overview.is_binary_labels(data_instances)

        if not is_binary_data:
            LOGGER.warning("Iv is not supported for Multiple-label data.")
            # data_instances = self.fit_local(data_instances)
            return data_instances

        # 1. Synchronize encryption information
        self.__synchronize_encryption()

        # 2. Prepare labels
        data_instances = data_instances.mapValues(self.load_data)
        self.set_schema(data_instances)

        label_table = data_instances.mapValues(lambda x: x.label)

        # 3. Transfer encrypted label
        f = functools.partial(self.encrypt, encryptor=self.encryptor)
        encrypted_label_table = label_table.mapValues(f)

        # encrypted_label_table_id = self.transfer_variable.generate_transferid(self.transfer_variable.encrypted_label)

        self.transfer_variable.encrypted_label.remote(encrypted_label_table,
                                                      role=consts.HOST,
                                                      idx=0)
        # federation.remote(encrypted_label_table, name=self.transfer_variable.encrypted_label.name,
        #                  tag=encrypted_label_table_id, role=consts.HOST, idx=0)

        LOGGER.info("Sent encrypted_label_table to host")

        # 4. Calculates self's binning. In case the other party need time to compute its data,
        #  do binning calculation at this point.
        data_instances = self.fit_local(data_instances, label_table)

        # 5. Received host result and calculate iv value

        encrypted_bin_sum = self.transfer_variable.encrypted_bin_sum.get(idx=0)

        LOGGER.info("Get encrypted_bin_sum from host")

        result_counts = self.__decrypt_bin_sum(encrypted_bin_sum)
        host_iv_attrs = self.binning_obj.cal_iv_woe(
            result_counts, self.model_param.adjustment_factor)

        # Support one host only in this version. Multiple host will be supported in the future.
        self.host_results[consts.HOST] = host_iv_attrs
        self.set_schema(data_instances)

        LOGGER.debug("Before transform, binning_obj split_points: {}".format(
            self.binning_obj.split_points))

        self.transform(data_instances)
        LOGGER.info("Finish feature binning fit and transform")
        return self.data_output

    @staticmethod
    def encrypt(x, encryptor):
        return encryptor.encrypt(x), encryptor.encrypt(1 - x)

    def transform_local(self, data_instances, label_table=None):
        self._abnormal_detection(data_instances)
        self._parse_cols(data_instances)
        split_points = {}
        for col_name, iv_attr in self.binning_result.items():
            split_points[col_name] = iv_attr.split_points

        self.local_transform_result = self.binning_obj.cal_local_iv(
            data_instances, split_points=split_points, label_table=label_table)

        for col_name, col_index in self.local_transform_result.items():
            LOGGER.info("The local feature {} 's iv is {}".format(
                col_name, self.local_transform_result[col_name].iv))
        self.set_schema(data_instances)
        return data_instances

    def __synchronize_encryption(self):
        pub_key = self.encryptor.get_public_key()
        # pubkey_id = self.transfer_variable.generate_transferid(self.transfer_variable.paillier_pubkey)

        self.transfer_variable.paillier_pubkey.remote(pub_key,
                                                      role=consts.HOST,
                                                      idx=0)
        """
        federation.remote(pub_key, name=self.transfer_variable.paillier_pubkey.name,
                          tag=pubkey_id, role=consts.HOST, idx=0)
        """

        LOGGER.info("send pubkey to host")
        self.has_synchronized = True

    def __decrypt_bin_sum(self, encrypted_bin_sum):
        # for feature_sum in encrypted_bin_sum:
        for col_name, count_list in encrypted_bin_sum.items():
            new_list = []
            for encrypted_event, encrypted_non_event in count_list:
                event_count = self.encryptor.decrypt(encrypted_event)
                non_event_count = self.encryptor.decrypt(encrypted_non_event)
                new_list.append((event_count, non_event_count))
            encrypted_bin_sum[col_name] = new_list
        return encrypted_bin_sum

    def fit_local(self, data_instances, label_table=None):
        self._abnormal_detection(data_instances)
        self._parse_cols(data_instances)

        iv_attrs = self.binning_obj.cal_local_iv(data_instances,
                                                 label_table=label_table)
        self.binning_result = iv_attrs
        self.set_schema(data_instances)
        return data_instances

    @staticmethod
    def load_data(data_instance):
        # Here suppose this is a binary question and the event label is 1
        if data_instance.label != 1:
            data_instance.label = 0
        return data_instance
Example #6
0
class HeteroFeatureBinningGuest(BaseHeteroFeatureBinning):
    def __init__(self, params: FeatureBinningParam):
        super(HeteroFeatureBinningGuest, self).__init__(params)

        self.encryptor = PaillierEncrypt()
        self.encryptor.generate_key()
        self.local_transform_result = None
        self.party_name = consts.GUEST
        self._init_binning_obj()

    def fit(self, data_instances):
        """
        Apply binning method for both data instances in local party as well as the other one. Afterwards, calculate
        the specific metric value for specific columns.
        """
        self._abnormal_detection(data_instances)
        self._parse_cols(data_instances)

        # 1. Synchronize encryption information
        self.__synchronize_encryption()

        # 2. Prepare labels
        data_instances = data_instances.mapValues(self.load_data)
        self.set_schema(data_instances)

        label_table = data_instances.mapValues(lambda x: x.label)

        # 3. Transfer encrypted label
        f = functools.partial(self.encrypt, encryptor=self.encryptor)
        encrypted_label_table = label_table.mapValues(f)

        encrypted_label_table_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.encrypted_label)
        federation.remote(encrypted_label_table,
                          name=self.transfer_variable.encrypted_label.name,
                          tag=encrypted_label_table_id,
                          role=consts.HOST,
                          idx=0)

        LOGGER.info("Sent encrypted_label_table to host")

        # 4. Calculates self's binning. In case the other party need time to compute its data,
        #  do binning calculation at this point.
        data_instances = self.fit_local(data_instances, label_table)

        # 5. Received host result and calculate iv value
        encrypted_bin_sum_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.encrypted_bin_sum)

        encrypted_bin_sum = federation.get(
            name=self.transfer_variable.encrypted_bin_sum.name,
            tag=encrypted_bin_sum_id,
            idx=0)

        LOGGER.info("Get encrypted_bin_sum from host")

        result_counts = self.__decrypt_bin_sum(encrypted_bin_sum)
        host_iv_attrs = self.binning_obj.cal_iv_woe(
            result_counts, self.bin_param.adjustment_factor)

        # Support one host only in this version. Multiple host will be supported in the future.
        self.host_results[consts.HOST] = host_iv_attrs

        for cols_name, iv_attr in host_iv_attrs.items():
            display_result = iv_attr.display_result(
                self.bin_param.display_result)
            LOGGER.info(
                "[Result][FeatureBinning][Host] feature {} 's result is : {}".
                format(cols_name, display_result))

        self.set_schema(data_instances)
        return data_instances

    def transform(self, data_instances):
        self._abnormal_detection(data_instances)
        self._parse_cols(data_instances)

        # 1. Synchronize encryption information
        self.__synchronize_encryption()

        # 2. Prepare labels
        data_instances = data_instances.mapValues(self.load_data)
        label_table = data_instances.mapValues(lambda x: x.label)
        self.set_schema(data_instances)

        # 3. Transfer encrypted label
        f = functools.partial(self.encrypt, encryptor=self.encryptor)
        encrypted_label_table = label_table.mapValues(f)
        encrypted_label_table_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.encrypted_label)
        federation.remote(encrypted_label_table,
                          name=self.transfer_variable.encrypted_label.name,
                          tag=encrypted_label_table_id,
                          role=consts.HOST,
                          idx=0)
        LOGGER.info("Sent encrypted_label_table to host for transform")

        # 4. Transform locally
        self.transform_local(data_instances,
                             label_table=label_table,
                             save_result=False)

        # 5. Received host result and calculate iv value
        encrypted_bin_sum_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.encrypted_bin_sum)
        encrypted_bin_sum = federation.get(
            name=self.transfer_variable.encrypted_bin_sum.name,
            tag=encrypted_bin_sum_id,
            idx=0)

        result_counts = self.__decrypt_bin_sum(encrypted_bin_sum)
        host_iv_attrs = self.binning_obj.cal_iv_woe(
            result_counts, self.bin_param.adjustment_factor)
        # host_results = {'host1': host_iv_attrs}

        # self.save_model(name=self.bin_param.transform_table,
        #                 namespace=self.bin_param.result_namespace,
        #                 binning_result=self.local_transform_result,
        #                 host_results=host_results)

        for col_name, iv_attr in host_iv_attrs.items():
            LOGGER.info("The remote feature {} 's iv is {}".format(
                col_name, iv_attr.iv))

        self.set_schema(data_instances)
        return data_instances

    @staticmethod
    def encrypt(x, encryptor):
        return encryptor.encrypt(x), encryptor.encrypt(1 - x)

    def transform_local(self,
                        data_instances,
                        label_table=None,
                        save_result=True):
        self._abnormal_detection(data_instances)
        self._parse_cols(data_instances)
        split_points = {}
        for col_name, iv_attr in self.binning_result.items():
            split_points[col_name] = iv_attr.split_points

        self.local_transform_result = self.binning_obj.cal_local_iv(
            data_instances, split_points=split_points, label_table=label_table)

        if save_result:
            self.save_model(name=self.bin_param.transform_table,
                            namespace=self.bin_param.result_namespace,
                            binning_result=self.local_transform_result,
                            host_results={})
        for col_name, col_index in self.local_transform_result.items():
            LOGGER.info("The local feature {} 's iv is {}".format(
                col_name, self.local_transform_result[col_name].iv))
        self.set_schema(data_instances)
        return data_instances

    def __synchronize_encryption(self):
        pub_key = self.encryptor.get_public_key()
        pubkey_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.paillier_pubkey)

        federation.remote(pub_key,
                          name=self.transfer_variable.paillier_pubkey.name,
                          tag=pubkey_id,
                          role=consts.HOST,
                          idx=0)

        LOGGER.info("send pubkey to host")
        self.has_synchronized = True

    def __decrypt_bin_sum(self, encrypted_bin_sum):
        # for feature_sum in encrypted_bin_sum:
        for col_name, count_list in encrypted_bin_sum.items():
            new_list = []
            for encrypted_event, encrypted_non_event in count_list:
                event_count = self.encryptor.decrypt(encrypted_event)
                non_event_count = self.encryptor.decrypt(encrypted_non_event)
                new_list.append((event_count, non_event_count))
            encrypted_bin_sum[col_name] = new_list
        return encrypted_bin_sum

    def fit_local(self, data_instances, label_table=None):
        self._abnormal_detection(data_instances)
        self._parse_cols(data_instances)

        iv_attrs = self.binning_obj.cal_local_iv(data_instances,
                                                 label_table=label_table)
        for col_name, iv_attr in iv_attrs.items():
            display_result = iv_attr.display_result(
                self.bin_param.display_result)
            LOGGER.info(
                "[Result][FeatureBinning][Guest] feature {} 's result is : {}".
                format(col_name, display_result))
            # LOGGER.info("[Result][FeatureBinning]The feature {} 's iv is {}".format(col_name, iv_attrs[col_name].iv))
        self.binning_result = iv_attrs
        self.set_schema(data_instances)
        return data_instances

    @staticmethod
    def load_data(data_instance):
        # Here suppose this is a binary question and the event label is 1
        if data_instance.label != 1:
            data_instance.label = 0
        return data_instance
Example #7
0
class HeteroFeatureBinningGuest(BaseHeteroFeatureBinning):
    def __init__(self, params: FeatureBinningParam):
        super(HeteroFeatureBinningGuest, self).__init__(params)

        self.encryptor = PaillierEncrypt()
        self.encryptor.generate_key()
        self.iv_attrs = None
        self.host_iv_attrs = None

    def fit(self, data_instances):
        """
        Apply binning method for both data instances in local party as well as the other one. Afterwards, calculate
        the specific metric value for specific columns.
        """
        self._abnormal_detection(data_instances)
        self._parse_cols(data_instances)

        # 1. Synchronize encryption information
        self.__synchronize_encryption()

        # 2. Prepare labels
        data_instances = data_instances.mapValues(self.load_data)
        label_table = data_instances.mapValues(lambda x: x.label)

        # 3. Transfer encrypted label
        f = functools.partial(self.encrypt, encryptor=self.encryptor)
        encrypted_label_table = label_table.mapValues(f)

        encrypted_label_table_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.encrypted_label)
        federation.remote(encrypted_label_table,
                          name=self.transfer_variable.encrypted_label.name,
                          tag=encrypted_label_table_id,
                          role=consts.HOST,
                          idx=0)

        LOGGER.info("Sent encrypted_label_table to host")

        # 4. Calculates self's binning. In case the other party need time to compute its data,
        #  do binning calculation at this point.
        local_iv = self.fit_local(data_instances, label_table)

        # 5. Received host result and calculate iv value
        encrypted_bin_sum_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.encrypted_bin_sum)

        encrypted_bin_sum = federation.get(
            name=self.transfer_variable.encrypted_bin_sum.name,
            tag=encrypted_bin_sum_id,
            idx=0)

        LOGGER.info("Get encrypted_bin_sum from host")

        result_counts = self.__decrypt_bin_sum(encrypted_bin_sum)
        host_iv_attrs = self.binning_obj.cal_iv_woe(
            result_counts, self.bin_param.adjustment_factor)
        self.host_iv_attrs = host_iv_attrs
        # LOGGER.debug("Lenght of host iv attrs: {}".format(len(self.host_iv_attrs)))
        # for idx, col in enumerate(self.cols):
        #     LOGGER.info("The local iv of {}th feature is {}".format(col, local_iv[idx].iv))

        for idx, iv_attr in enumerate(host_iv_attrs):
            LOGGER.info("The remote iv of {}th measured feature is {}".format(
                idx, iv_attr.iv))

        iv_result = {'local': local_iv, 'remote': host_iv_attrs}

        return iv_result

    def transform(self, data_instances):
        self._abnormal_detection(data_instances)

        self.header = data_instances.schema.get(
            'header')  # ['x1', 'x2', 'x3' ... ]

        self._parse_cols(data_instances)

        # 1. Synchronize encryption information
        self.__synchronize_encryption()

        # 2. Prepare labels
        data_instances = data_instances.mapValues(self.load_data)
        label_table = data_instances.mapValues(lambda x: x.label)

        # 3. Transfer encrypted label
        f = functools.partial(self.encrypt, encryptor=self.encryptor)
        encrypted_label_table = label_table.mapValues(f)
        encrypted_label_table_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.encrypted_label)
        federation.remote(encrypted_label_table,
                          name=self.transfer_variable.encrypted_label.name,
                          tag=encrypted_label_table_id,
                          role=consts.HOST,
                          idx=0)
        LOGGER.info("Sent encrypted_label_table to host for transform")

        # 4. Transform locally
        self.transform_local(data_instances, reformated=True)

        # 5. Received host result and calculate iv value
        encrypted_bin_sum_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.encrypted_bin_sum)
        encrypted_bin_sum = federation.get(
            name=self.transfer_variable.encrypted_bin_sum.name,
            tag=encrypted_bin_sum_id,
            idx=0)

        result_counts = self.__decrypt_bin_sum(encrypted_bin_sum)
        host_iv_attrs = self.binning_obj.cal_iv_woe(
            result_counts, self.bin_param.adjustment_factor)
        self.host_iv_attrs = host_iv_attrs
        for idx, iv_attr in enumerate(host_iv_attrs):
            LOGGER.info("The remote iv of {}th measured feature is {}".format(
                idx, iv_attr.iv))

        data_instances.schema['header'] = self.header
        return data_instances

    @staticmethod
    def encrypt(x, encryptor):
        return encryptor.encrypt(x), encryptor.encrypt(1 - x)

    def transform_local(self, data_instances, reformated=False):
        self._abnormal_detection(data_instances)

        self._parse_cols(data_instances)

        if not reformated:  # Reformat the label type
            data_instances = data_instances.mapValues(self.load_data)

        split_points = []
        for iv_attr in self.iv_attrs:
            s_p = list(iv_attr.split_points)
            split_points.append(s_p)

        self.iv_attrs = self.binning_obj.cal_local_iv(data_instances,
                                                      self.cols, split_points)
        for idx, col in enumerate(self.cols):
            LOGGER.info("The local iv of {}th feature is {}".format(
                col, self.iv_attrs[idx].iv))

    def __synchronize_encryption(self):
        pub_key = self.encryptor.get_public_key()
        pubkey_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.paillier_pubkey)
        # LOGGER.debug("pubkey_id is : {}".format(pubkey_id))

        federation.remote(pub_key,
                          name=self.transfer_variable.paillier_pubkey.name,
                          tag=pubkey_id,
                          role=consts.HOST,
                          idx=0)

        LOGGER.info("send pubkey to host")
        self.has_synchronized = True

    def __decrypt_bin_sum(self, encrypted_bin_sum):
        for feature_sum in encrypted_bin_sum:
            for idx, (encrypted_event,
                      encrypted_non_event) in enumerate(feature_sum):
                event_count = self.encryptor.decrypt(encrypted_event)
                non_event_count = self.encryptor.decrypt(encrypted_non_event)
                feature_sum[idx] = (event_count, non_event_count)
        return encrypted_bin_sum

    def fit_local(self, data_instances, label_table=None):
        self._abnormal_detection(data_instances)

        self._parse_cols(data_instances)

        iv_attrs = self.binning_obj.cal_local_iv(data_instances,
                                                 self.cols,
                                                 label_table=label_table)
        for idx, col in enumerate(self.cols):
            LOGGER.info("The local iv of {}th feature is {}".format(
                col, iv_attrs[idx].iv))
        self.iv_attrs = iv_attrs
        return iv_attrs

    @staticmethod
    def load_data(data_instance):
        # Here suppose this is a binary question and the event label is 1
        # LOGGER.debug('label type is {}'.format(type(data_instance.label)))
        if data_instance.label != 1:
            data_instance.label = 0
        return data_instance
Example #8
0
class TestHeteroLogisticGradient(unittest.TestCase):
    def setUp(self):
        self.paillier_encrypt = PaillierEncrypt()
        self.paillier_encrypt.generate_key()
        # self.hetero_lr_gradient = HeteroLogisticGradient(self.paillier_encrypt)
        self.hetero_lr_gradient = hetero_lr_gradient_and_loss.Guest()

        size = 10
        self.wx = session.parallelize(
            [self.paillier_encrypt.encrypt(i) for i in range(size)])
        self.en_sum_wx_square = session.parallelize(
            [self.paillier_encrypt.encrypt(np.square(i)) for i in range(size)])
        self.w = [i for i in range(size)]
        self.data_inst = session.parallelize([
            Instance(features=[1 for _ in range(size)], label=pow(-1, i % 2))
            for i in range(size)
        ],
                                             partition=1)

        # test fore_gradient
        self.fore_gradient_local = [
            -0.5, 0.75, 0, 1.25, 0.5, 1.75, 1, 2.25, 1.5, 2.75
        ]
        # test gradient
        self.gradient = [
            1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125,
            1.125
        ]
        self.gradient_fit_intercept = [
            1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125,
            1.125, 1.125
        ]

        self.loss = 4.505647

    def test_compute_fore_gradient(self):
        # fore_gradient = self.hetero_lr_gradient.compute_and_aggregate_forwards(self.data_inst, self.wx)
        model_weights = LinearModelWeights(l=self.w, fit_intercept=False)

        class EncryptedCalculator(object):
            encrypter = self.paillier_encrypt

            def encrypt_row(self, row):
                return np.array([self.encrypter.encrypt(row)])

            def encrypt(self, input_data):
                return input_data.mapValues(self.encrypt_row)

        encrypted_calculator = [EncryptedCalculator()]
        batch_index = 0
        fore_gradient = self.hetero_lr_gradient.compute_and_aggregate_forwards(
            self.data_inst, model_weights, encrypted_calculator, batch_index)

        fore_gradient_local = [
            self.paillier_encrypt.decrypt(iterator[1])
            for iterator in fore_gradient.collect()
        ]

        self.assertListEqual(fore_gradient_local, self.fore_gradient_local)

    def test_compute_gradient(self):
        fore_gradient = self.hetero_lr_gradient.compute_fore_gradient(
            self.data_inst, self.wx)

        gradient = self.hetero_lr_gradient.compute_gradient(
            self.data_inst, fore_gradient, fit_intercept=False)
        de_gradient = [
            self.paillier_encrypt.decrypt(iterator) for iterator in gradient
        ]
        self.assertListEqual(de_gradient, self.gradient)

        gradient = self.hetero_lr_gradient.compute_gradient(self.data_inst,
                                                            fore_gradient,
                                                            fit_intercept=True)
        de_gradient = [
            self.paillier_encrypt.decrypt(iterator) for iterator in gradient
        ]
        self.assertListEqual(de_gradient, self.gradient_fit_intercept)

    def test_compute_gradient_and_loss(self):
        fore_gradient = self.hetero_lr_gradient.compute_fore_gradient(
            self.data_inst, self.wx)
        gradient, loss = self.hetero_lr_gradient.compute_gradient_and_loss(
            self.data_inst, fore_gradient, self.wx, self.en_sum_wx_square,
            False)
        de_gradient = [self.paillier_encrypt.decrypt(i) for i in gradient]
        self.assertListEqual(de_gradient, self.gradient)

        diff_loss = np.abs(self.loss - self.paillier_encrypt.decrypt(loss))
        self.assertLess(diff_loss, 1e-5)