def test_parse_grid(self): config1 = parse_params(GOOD_YAML_FILE, "grid") config2 = { "max_depth": [3, None], "max_features": [1, 11], "min_samples_split": [2, 11], "bootstrap": [0, 1], "criterion": ["gini", "entropy"], "learning_rate": [1e-06, 10.0] } assert config1 == config2
def test_parse_bayes(self): config1 = parse_params(GOOD_YAML_FILE, "bayes") config2 = { "max_depth": Categorical([3, None]), "max_features": Integer(1, 11), "learning_rate": Real(1e-06, 10.0) } assert type(config1["max_depth"]) == type(config2["max_depth"]) assert isinstance(config1["learning_rate"], type(config2["learning_rate"])) assert isinstance(config1["max_features"], type(config2["max_features"]))
def test_parse_randomized(self): config1 = parse_params(GOOD_YAML_FILE, "randomized") config2 = { "max_depth": [3, None], "max_features": randint(1, 11), "learning_rate": uniform(1e-06, 10.0) } assert isinstance(config1["max_depth"], type(config2["max_depth"])) assert isinstance(config1["learning_rate"], type(config2["learning_rate"])) assert isinstance(config1["max_features"], type(config2["max_features"]))
def test_badly_formatted_yaml(self): with pytest.raises(ParserError): parse_params(BAD_YAML_FILE, "grid")
def test_parse_with_unkown_mathod(self): with pytest.raises(ValueError): parse_params(GOOD_YAML_FILE, "foobar")
parser.add_argument("--scale", type=int, help="Number of cluster workers to requesat.") return parser.parse_args() if __name__ == '__main__': args = parse_args() # replace with your data digits = load_digits() X, y = digits.data, digits.target # replace with your model model = RandomForestClassifier(n_estimators=20) # replace with your hyperparameter search config config = os.path.join(CURRENT_DIR, "params.yaml") params = parse_params(config, args.method) # set up context manager for distributing computations LOGGER = logging.getLogger(__name__) CLUSTER = launch_cluster(args.cluster, args.scale) CLIENT = Client(CLUSTER) LOGGER.warn("Web dashboard now running at http://localhost:8787/status") with joblib.parallel_backend('dask.distributed'): result = search(model, X, y, params, method=args.method) # try to get progress bar in terminal; prefer web daskboad progress(result) dump(result, args.filepath)