コード例 #1
0
    def test_distributed_calculate_sum_XY(self):
        print("--- test_distributed_calculate_sum_XY ---")

        X = np.array([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]])

        Y = np.array([[1], [-1], [1]])

        actual_sum_XY = np.sum(X * Y, axis=0)
        sum_XY = compute_sum_XY(X, Y)
        assert_array(actual_sum_XY, sum_XY)
コード例 #2
0
    def test_distributed_calculate_avg_XY_1(self):
        print("--- test_distributed_calculate_avg_XY_1 ---")

        X = np.array([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]])

        Y = np.array([[1], [-1], [1]])

        actual_avg_XY = np.mean(X * Y, axis=0)
        avg_XY = compute_avg_XY(X, Y)
        assert_array(actual_avg_XY, avg_XY)
コード例 #3
0
    def test_create_table_with_dict(self):

        row_count = 10
        expect_data = np.random.rand(row_count, 10)
        indexes = [1, 3, 5, 7, 9, 11, 13, 15, 17, 19]
        actual_data = {}
        dtable = create_table(expect_data, indexes)
        for item in dtable.collect():
            actual_data[item[0]] = item[1]

        assert dtable.count() == len(indexes)
        for i, index in enumerate(indexes):
            assert_array(actual_data[indexes[i]], expect_data[i])
コード例 #4
0
    def test_distributed_calculate_avg_XY_2(self):
        print("--- test_distributed_calculate_avg_XY_2 ---")

        X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.float64)
        Y = np.array([[1], [-1], [1]])
        Y = np.tile(Y, (1, X.shape[-1]))

        actual1 = np.sum(Y * X, axis=0) / len(Y)
        actual2 = np.sum(X * Y, axis=0) / len(Y)
        predict1 = compute_avg_XY(X, Y)
        predict2 = compute_avg_XY(Y, X)
        assert_array(actual1, predict1)
        assert_array(actual2, predict2)
コード例 #5
0
    def test_feed_into_dtable(self):

        ids = list(range(50))
        X = np.random.rand(50, 6)
        y = np.random.rand(50, 1)
        sample_range = (10, 30)
        feature_range = (2, 5)
        expected_sample_number = sample_range[1] - sample_range[0]

        expected_ids = ids[sample_range[0]: sample_range[1]]
        expected_X = X[sample_range[0]:sample_range[1], feature_range[0]: feature_range[1]]
        expected_y = y[sample_range[0]:sample_range[1]]

        expected_data = {}
        for i, id in enumerate(expected_ids):
            expected_data[id] = {
                "X": expected_X[i],
                "y": expected_y[i]
            }

        data_table = feed_into_dtable(ids, X, y, sample_range, feature_range)

        val = data_table.collect()
        data_dict = dict(val)

        actual_table_size = len(data_dict)
        assert expected_sample_number == actual_table_size
        for item in data_dict.items():
            id = item[0]
            inst = item[1]
            expected_item = expected_data[id]
            X_i = expected_item["X"]
            y_i = expected_item["y"]

            features = inst.features
            label = inst.label
            assert_array(X_i, features)
            assert y_i[0] == label
コード例 #6
0
         [-0.879933, 0.420589, -0.877527, -0.780484, -1.037534, -0.48388],
         [0.426758, 0.723479, 0.316885, 0.287273, 1.000835, 0.962702],
         [0.963102, 1.467675, 0.829202, 0.772457, -0.038076, -0.468613]])

    infile = "../../../../examples/data/unittest_data.csv"
    ids, X, y = load_data(infile, 0, (2, 8), 1)

    ids = np.array(ids, dtype=np.int32)
    X = np.array(X, dtype=np.float64)
    y = np.array(y, dtype=np.int32)

    print("ids shape", ids.shape)
    print("X shape", X.shape)
    print("y shape", y.shape)

    assert_array(expected_ids, ids)
    assert_array(expected_y, y)
    assert_matrix(expected_X, X)

    expected_data = {}
    for i, id in enumerate(expected_ids):
        expected_data[id] = {"X": expected_X[i], "y": expected_y[i]}

    init()
    data_table = feed_into_dtable(ids, X, y.reshape(-1, 1), (0, len(ids)),
                                  (0, X.shape[-1]))
    for item in data_table.collect():
        id = item[0]
        inst = item[1]
        expected_item = expected_data[id]
        X_i = expected_item["X"]
コード例 #7
0
    def test_create_n_guest_generators(self):

        X = np.random.rand(600, 33)
        y = np.random.rand(600, 1)
        overlap_ratio = 0.2
        guest_split_ratio = 0.3
        guest_feature_num = 16

        data_size = X.shape[0]
        overlap_size = int(data_size * overlap_ratio)
        expected_overlap_indexes = np.array(range(overlap_size))
        particular_guest_size = int((data_size - overlap_size) * guest_split_ratio)

        expected_guest_size = overlap_size + particular_guest_size
        expected_host_size = overlap_size + data_size - expected_guest_size

        guest_data_generator, host_data_generator, overlap_indexes = \
            create_guest_host_data_generator(X, y,
                                             overlap_ratio=overlap_ratio,
                                             guest_split_ratio=guest_split_ratio,
                                             guest_feature_num=guest_feature_num)

        guest_features_dict = {}
        guest_labels_dict = {}
        guest_instances_indexes = []
        guest_count = 0
        guest_feature_num = 0
        for item in guest_data_generator:
            key = item[0]
            instance = item[1]
            guest_feature_num = instance.features.shape[-1]
            guest_count += 1
            guest_instances_indexes.append(key)
            guest_features_dict[key] = instance.features
            guest_labels_dict[key] = instance.label

        host_features_dict = {}
        host_labels_dict = {}
        host_instances_indexes = []
        host_count = 0
        host_feature_num = 0
        for item in host_data_generator:
            key = item[0]
            instance = item[1]
            host_feature_num = instance.features.shape[-1]
            host_count += 1
            host_instances_indexes.append(key)
            host_features_dict[key] = instance.features
            host_labels_dict[key] = instance.label

        assert_array(expected_overlap_indexes, overlap_indexes)
        assert len(expected_overlap_indexes) == len(overlap_indexes)
        assert X.shape[-1] == guest_feature_num + host_feature_num
        assert expected_guest_size == guest_count
        assert expected_host_size == host_count

        for index in overlap_indexes:
            assert guest_labels_dict[index] == host_labels_dict[index]
            assert guest_labels_dict[index] == y[index]
            assert_matrix(guest_features_dict[index], X[index, :guest_feature_num].reshape(1, -1))
            assert_matrix(host_features_dict[index], X[index, guest_feature_num:].reshape(1, -1))