Python MERFDataGenerator Examples

Programming Language: Python

Namespace/Package Name: utils

Examples at hotexamples.com: 6

Python MERFDataGenerator - 6 examples found. These are the top rated real world Python examples of utils.MERFDataGenerator extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

MERFDataGenerator(4)

generate_split_samples(3)

create_cluster_sizes_array(1)

generate_samples(1)

ohe_clusters(1)

Example #1

Show file

File: tests.py Project: zjl0714/merf

    def test_ohe_clusters(self):
        training_cluster_ids = np.array([0, 1, 2, 3])
        # Training like encoding -- all categories in matrix
        X_ohe = MERFDataGenerator.ohe_clusters(
            pd.Series([0, 0, 1, 2, 2, 2, 3]),
            training_cluster_ids=training_cluster_ids)
        # check columns and sums
        self.assertListEqual(
            X_ohe.columns.tolist(),
            ["cluster_0", "cluster_1", "cluster_2", "cluster_3"])
        self.assertListEqual(X_ohe.sum().tolist(), [2, 1, 3, 1])

        # New encoding -- no categories in matrix
        X_ohe = MERFDataGenerator.ohe_clusters(
            pd.Series([4, 4, 5, 6, 6, 7]),
            training_cluster_ids=training_cluster_ids)
        # check columns and sums
        self.assertListEqual(
            X_ohe.columns.tolist(),
            ["cluster_0", "cluster_1", "cluster_2", "cluster_3"])
        self.assertListEqual(X_ohe.sum().tolist(), [0, 0, 0, 0])

        # Mixed encoding -- some categories in matrix
        X_ohe = MERFDataGenerator.ohe_clusters(
            pd.Series([1, 1, 3, 0, 0, 4, 5, 6, 6, 7]),
            training_cluster_ids=training_cluster_ids)
        # check columns and sums
        self.assertListEqual(
            X_ohe.columns.tolist(),
            ["cluster_0", "cluster_1", "cluster_2", "cluster_3"])
        self.assertListEqual(X_ohe.sum().tolist(), [2, 2, 0, 1])

Example #2

Show file

File: tests.py Project: zjl0714/merf

    def test_generate_split_samples(self):
        dg = MERFDataGenerator(m=0.7, sigma_b=2.7, sigma_e=1)
        train, test_known, test_new, training_ids, ptev, prev = dg.generate_split_samples(
            [1, 3], [3, 2], [1, 1])
        # check all have same columns
        self.assertListEqual(train.columns.tolist(),
                             ["y", "X_0", "X_1", "X_2", "Z", "cluster"])
        self.assertListEqual(test_known.columns.tolist(),
                             ["y", "X_0", "X_1", "X_2", "Z", "cluster"])
        self.assertListEqual(test_new.columns.tolist(),
                             ["y", "X_0", "X_1", "X_2", "Z", "cluster"])

        # check length
        self.assertEqual(len(train), 4)
        self.assertEqual(len(test_known), 5)
        self.assertEqual(len(test_new), 2)

        # check cluster sizes
        self.assertEqual(len(train[train["cluster"] == 0]), 1)
        self.assertEqual(len(train[train["cluster"] == 1]), 3)
        self.assertEqual(len(test_known[test_known["cluster"] == 0]), 3)
        self.assertEqual(len(test_known[test_known["cluster"] == 1]), 2)
        self.assertEqual(len(test_new[test_new["cluster"] == 2]), 1)
        self.assertEqual(len(test_new[test_new["cluster"] == 3]), 1)

        # Check training ids
        self.assertListEqual(training_ids.tolist(), [0, 1])

Example #3

Show file

File: merf_test.py Project: kiminh/merf

 def test_generate_samples(self):
     dg = MERFDataGenerator(m=0.6, sigma_b=4.5, sigma_e=1)
     df, ptev, prev = dg.generate_samples([1, 2, 3])
     # check columns
     self.assertListEqual(df.columns.tolist(), ["y", "X_0", "X_1", "X_2", "Z", "cluster"])
     # check length
     self.assertEqual(len(df), 6)
     # check cluster sizes
     self.assertEqual(len(df[df["cluster"] == 0]), 1)
     self.assertEqual(len(df[df["cluster"] == 1]), 2)
     self.assertEqual(len(df[df["cluster"] == 2]), 3)

Example #4

Show file

    def setUp(self):
        dg = MERFDataGenerator(m=0.6, sigma_b=4.5, sigma_e=1)
        train, test_known, test_new, train_cluster_ids, ptev, prev = dg.generate_split_samples(
            [1, 3], [3, 2], [1, 1])

        self.X_train = train[['X_0', 'X_1', 'X_2']]
        self.Z_train = train[['Z']]
        self.clusters_train = train['cluster']
        self.y_train = train['y']

        self.X_known = test_known[['X_0', 'X_1', 'X_2']]
        self.Z_known = test_known[['Z']]
        self.clusters_known = test_known['cluster']
        self.y_known = test_known['y']

        self.X_new = test_new[['X_0', 'X_1', 'X_2']]
        self.Z_new = test_new[['Z']]
        self.clusters_new = test_new['cluster']
        self.y_new = test_new['y']

Example #5

Show file

    def setUp(self):
        dg = MERFDataGenerator(m=0.6, sigma_b=4.5, sigma_e=1)
        train, test_known, test_new, train_cluster_ids, ptev, prev = dg.generate_split_samples(
            [1, 3], [3, 2], [1, 1])

        self.X_train = train[["X_0", "X_1", "X_2"]]
        self.Z_train = train[["Z"]]
        self.clusters_train = train["cluster"]
        self.y_train = train["y"]

        self.X_known = test_known[["X_0", "X_1", "X_2"]]
        self.Z_known = test_known[["Z"]]
        self.clusters_known = test_known["cluster"]
        self.y_known = test_known["y"]

        self.X_new = test_new[["X_0", "X_1", "X_2"]]
        self.Z_new = test_new[["Z"]]
        self.clusters_new = test_new["cluster"]
        self.y_new = test_new["y"]

Example #6

Show file

File: tests.py Project: zjl0714/merf

    def test_create_cluster_sizes(self):
        clusters = MERFDataGenerator.create_cluster_sizes_array([1, 2, 3], 1)
        self.assertListEqual(clusters, [1, 2, 3])

        clusters = MERFDataGenerator.create_cluster_sizes_array([30, 20, 7], 3)
        self.assertListEqual(clusters, [30, 30, 30, 20, 20, 20, 7, 7, 7])