Example #1
0
    def test_full_hawks_run_multiple(self):
        gen = hawks.create_generator("validation.json")
        gen.run()
        # Run a second time to ensure there's no carryover
        gen = hawks.create_generator("validation.json")
        gen.run()

        res = gen.get_stats()

        known_result = pd.read_csv("validation.csv", index_col=False)
        # Pandas can be iffy with data types
        equals = np.allclose(res.values, known_result.values)
        self.assertTrue(equals)
Example #2
0
    def test_full_hawks_run_multiple(self):
        test_fpath = Path(hawks.__file__).parents[1] / "tests"
        gen = hawks.create_generator(test_fpath / "validation.json")
        gen.run()
        # Run a second time to ensure there's no carryover
        gen = hawks.create_generator(test_fpath / "validation.json")
        gen.run()

        res = gen.get_stats()

        known_result = pd.read_csv(test_fpath / "validation.csv",
                                   index_col=False)
        print(res)
        print(known_result)
        # Pandas can be iffy with data types
        equals = np.allclose(res.values, known_result.values)
        self.assertTrue(equals)
Example #3
0
    def test_nested_config_arg(self):
        gen = hawks.create_generator(
            {"constraints": {
                "overlap": {
                    "limit": "TEST"
                }
            }})

        self.assertEqual(gen.full_config["constraints"]["overlap"]["limit"],
                         "TEST")
Example #4
0
    def test_full_hawks_run(self):
        gen = hawks.create_generator("validation.json")
        gen.run()

        res = gen.get_stats()

        known_result = pd.read_csv("validation.csv", index_col=False)
        print(res)
        print(known_result)
        # Pandas can be iffy with data types
        equals = np.allclose(res.values, known_result.values)
        self.assertTrue(equals)
Example #5
0
    def test_full_hawks_run(self):
        test_fpath = Path(hawks.__file__).parents[1] / "tests"
        gen = hawks.create_generator(test_fpath / "validation.json")
        gen.run()

        res = gen.get_stats()

        known_result = pd.read_csv(test_fpath / "validation.csv",
                                   index_col=False)
        print("Result:")
        print(res)
        print("Known result:")
        print(known_result)
        print("---")
        # Pandas can be iffy with data types
        equals = np.allclose(res.values, known_result.values)
        self.assertTrue(equals)
Example #6
0
 def test_incorrect_config_arg(self):
     with self.assertRaises(ValueError):
         gen = hawks.create_generator({
             "hawks": {
                 "seed_num": 4,
                 "num_runs": 1
             },
             "objectives": {
                 "silhouette": {
                     "target": 0.9
                 }
             },
             "constraints": {
                 "eigenval_ratio": {
                     "lim": "upper"  # <--- error
                 }
             }
         })
Example #7
0
 def test_multiconfig_deep(self):
     config = {
         "dataset": {
             "num_examples": [10, 100, 1000]
         },
         "constraints": {
             "overlap": {
                 "limit": ["upper", "lower"]
             }
         },
         "ga": {
             "num_gens": [50, 100, 10, 200],
             "mut_args_mean": {
                 "dims": ["each", "all"]
             }
         }
     }
     obj = hawks.create_generator(config)
     total_configs, _, _ = obj._count_multiconfigs()
     self.assertEqual(total_configs, 48)
Example #8
0
def main(ndim, nclusters, run_id):
    config = {
        "hawks": {
            "folder_name": "",
            "save_best_data": True
        },
        "dataset": {
            "num_clusters": nclusters,
            "num_dims": ndim
        },
        "ga": {
            "num_gens": 500
        },
        "constraints": {
            "overlap": {
                "threshold": 0.3,
                "limit": "lower"
            }
        }
    }

    generator = hawks.create_generator(config)
    print(generator.folder_name)
    print(generator.save_best_data)

    generator.run()
    # Get the best dataset found and it's labels
    datasets, label_sets = generator.get_best_dataset()
    # Stored as a list for multiple runs
    data, labels = datasets[0], label_sets[0]
    # Run KMeans on the data
    km = KMeans(n_clusters=len(np.unique(labels)), random_state=0).fit(data)
    # Get the Adjusted Rand Index for KMeans on the data
    ari = adjusted_rand_score(labels, km.labels_)
    sil = silhouette_score(data, km.labels_)
    print(f"ARI: {ari}, SIL: {sil}")

    generator.plot_best_indivs(show=True)
Example #9
0
"""
import seaborn as sns

import hawks

# Create the generator
generator = hawks.create_generator({
    "hawks": {
        "seed_num": 42,
        "num_runs": 5
    },
    "dataset": {
        "num_clusters": 5
    },
    "objectives": {
        "silhouette": {
            "target": 0.9
        }
    },
    "constraints": {
        "overlap": {
            "threshold": 0.05,
            "limit": "lower"
        }
    }
})
# Run HAWKS!
generator.run()
# Make a dictionary of options common to both plots
converg_kws = dict(show=True, xlabel="Generation", ci="sd", legend_type=None)
# Make the font etc. larger
sns.set_context("talk")
from pathlib import Path
import hawks

gen = hawks.create_generator("validation.json")

gen.run()

hawks.utils.df_to_csv(
    gen.stats,
    Path.cwd(),
    "validation"
)
Example #11
0
 def setUp(self):
     self.gen = hawks.create_generator("validation.json")
     self.init_pop = []
     for indiv in self.gen.create_individual():
         self.init_pop.append(indiv)
Example #12
0
from sklearn.datasets import make_blobs, make_moons
import seaborn as sns

import hawks

SEED_NUM = 10
NUM_RUNS = 10  # May take a few minutes
NUM_CLUSTERS = 5

generator = hawks.create_generator({
    "hawks": {
        "seed_num": SEED_NUM,
        "num_runs": int(NUM_RUNS / 2)  # for parity
    },
    "dataset": {
        "num_clusters": NUM_CLUSTERS
    },
    "objectives": {
        "silhouette": {
            "target": [0.5, 0.9]
        }
    }
})
generator.run()
# Analyse the hawks datasets
df, _ = hawks.analysis.analyse_datasets(generator=generator,
                                        source="HAWKS",
                                        seed=SEED_NUM,
                                        save=False)
# Make the blobs datasets
datasets = []
label_sets = []
Example #13
0
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
import hawks

SEED_NUM = 42

# Fix the seed number
config = {
    "hawks": {
        "seed_num": SEED_NUM
    }
}
# Any missing parameters will take the default seen in configs/defaults.json
generator = hawks.create_generator(config)
# Run the generator
generator.run()
# Get the best dataset found and it's labels
datasets, label_sets = generator.get_best_dataset()
# Stored as a list for multiple runs
data, labels = datasets[0], label_sets[0]
# Run KMeans on the data
km = KMeans(
    n_clusters=len(np.unique(labels)), random_state=SEED_NUM
).fit(data)
# Get the Adjusted Rand Index for KMeans on the data
ari = adjusted_rand_score(labels, km.labels_)
print(f"ARI: {ari}")
Example #14
0
 def setUp(self):
     self.gen = hawks.create_generator(
         Path(hawks.__file__).parents[1] / "tests" / "validation.json")
     self.init_pop = []
     for indiv in self.gen.create_individual():
         self.init_pop.append(indiv)
Example #15
0
# Use a HAWKS and sklearn example, and show the performance (boxplot) for HAWKS, moons, and blobs
from pathlib import Path
from sklearn.datasets import make_blobs, make_moons
import hawks

SEED_NUM = 42
SAVE_FOLDER = Path.cwd()
NUM_RUNS = 5
NUM_CLUSTERS = 5

generator = hawks.create_generator({
    "hawks": {
        "seed_num": SEED_NUM,
        "num_runs": NUM_RUNS
    },
    "dataset": {
        "num_clusters": NUM_CLUSTERS
    }
})
generator.run()
# Analyse the hawks datasets
df, _ = hawks.analysis.analyse_datasets(generator=generator,
                                        source="HAWKS",
                                        seed=SEED_NUM,
                                        save=False)
# Make the blobs datasets
datasets = []
label_sets = []
for run in range(NUM_RUNS):
    data, labels = make_blobs(n_samples=1000,
                              n_features=2,