def test_parse_grid(self):
     config1 = parse_params(GOOD_YAML_FILE, "grid")
     config2 = {
         "max_depth": [3, None],
         "max_features": [1, 11],
         "min_samples_split": [2, 11],
         "bootstrap": [0, 1],
         "criterion": ["gini", "entropy"],
         "learning_rate": [1e-06, 10.0]
     }
     assert config1 == config2
 def test_parse_bayes(self):
     config1 = parse_params(GOOD_YAML_FILE, "bayes")
     config2 = {
         "max_depth": Categorical([3, None]),
         "max_features": Integer(1, 11),
         "learning_rate": Real(1e-06, 10.0)
     }
     assert type(config1["max_depth"]) == type(config2["max_depth"])
     assert isinstance(config1["learning_rate"],
                       type(config2["learning_rate"]))
     assert isinstance(config1["max_features"],
                       type(config2["max_features"]))
 def test_parse_randomized(self):
     config1 = parse_params(GOOD_YAML_FILE, "randomized")
     config2 = {
         "max_depth": [3, None],
         "max_features": randint(1, 11),
         "learning_rate": uniform(1e-06, 10.0)
     }
     assert isinstance(config1["max_depth"], type(config2["max_depth"]))
     assert isinstance(config1["learning_rate"],
                       type(config2["learning_rate"]))
     assert isinstance(config1["max_features"],
                       type(config2["max_features"]))
 def test_badly_formatted_yaml(self):
     with pytest.raises(ParserError):
         parse_params(BAD_YAML_FILE, "grid")
 def test_parse_with_unkown_mathod(self):
     with pytest.raises(ValueError):
         parse_params(GOOD_YAML_FILE, "foobar")
Ejemplo n.º 6
0
    parser.add_argument("--scale",
                        type=int,
                        help="Number of cluster workers to requesat.")
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()

    # replace with your data
    digits = load_digits()
    X, y = digits.data, digits.target

    # replace with your model
    model = RandomForestClassifier(n_estimators=20)

    # replace with your hyperparameter search config
    config = os.path.join(CURRENT_DIR, "params.yaml")
    params = parse_params(config, args.method)

    # set up context manager for distributing computations
    LOGGER = logging.getLogger(__name__)
    CLUSTER = launch_cluster(args.cluster, args.scale)
    CLIENT = Client(CLUSTER)
    LOGGER.warn("Web dashboard now running at http://localhost:8787/status")
    with joblib.parallel_backend('dask.distributed'):
        result = search(model, X, y, params, method=args.method)
        # try to get progress bar in terminal; prefer web daskboad
        progress(result)
        dump(result, args.filepath)