def test_colt_constructor(): config = {"@type": "plugh", "*": ["plugh"], "y": "plugh"} obj = colt.build(config) assert obj.x == "plugh_x" assert obj.y == "plugh_y"
def test_colt_builtintypes(): config = [{"@type": "tuple", "*": [[1, 2, 3]]}, {"@type": "range", "*": [0, 10, 2]}] obj = colt.build(config) assert isinstance(obj[0], tuple) assert isinstance(obj[1], range)
def test_build_with_type() -> None: config = {"x": "abc"} obj = colt.build(config, Foo) assert isinstance(obj, Foo) assert obj.x == "abc"
def run(self): logger.info("load iris dataset") model = colt.build(self.model) iris = load_iris() X, y = iris.data, iris.target X_train, X_valid, y_train, y_valid = \ train_test_split(X, y, test_size=self.test_size) logger.info( f"dataset size: train={len(X_train)}, valid={len(X_valid)}") logger.info("start training") model.fit(X_train, y_train) logger.info("end training") train_accuracy = model.score(X_train, y_train) valid_accuracy = model.score(X_valid, y_valid) report = logexp.Report() report["train_size"] = len(X_train) report["valid_size"] = len(X_valid) report["train_accuracy"] = train_accuracy report["valid_accuracy"] = valid_accuracy return report
def main(prog: str = None): parser = argparse.ArgumentParser(description="mincrawler", usage='%(prog)s', prog=prog) parser.add_argument("--version", action="version", version="%(prog)s " + __version__) parser.add_argument( "config_path", type=str, help="path to parameter file describing the crawler settings") parser.add_argument("--module", type=str, action="append", default=[], help="additional modules to include") args = parser.parse_args() config = load_jsonnet(args.config_path) colt.import_modules(args.module) logger.debug("config: %s", repr(config)) worker = colt.build(config, Worker) worker()
def test_colt_without_annotation() -> None: config = { "@type": "corge", "x": ["a", "b"], } obj = colt.build(config) assert isinstance(obj, Corge) assert isinstance(obj.x, list) assert isinstance(obj.x[0], str)
def test_type_conversion() -> None: config = { "@type": "qux", "x": [1, 2, 3, 3], } obj = colt.build(config) assert isinstance(obj, Qux) assert isinstance(obj.x, set) assert len(obj.x) == 3
def test_colt_with_optional() -> None: config = { "@type": "baz", "x": "hello", } obj = colt.build(config) assert isinstance(obj, Baz) assert obj.x == "hello" assert obj.y is None config = { "@type": "baz", "x": "hello", "y": 123, # type: ignore } obj = colt.build(config) assert obj.y == 123
def test_colt_union() -> None: config = { "@type": "waldo", "x": "hello", } obj = colt.build(config) assert isinstance(obj, Waldo) assert isinstance(obj.x, str) assert obj.x == "hello" config = { "@type": "waldo", "x": {"x": "hello"}, # type: ignore } obj = colt.build(config) assert isinstance(obj, Waldo) assert isinstance(obj.x, Foo) assert obj.x.x == "hello"
def test_colt_with_subclass() -> None: config = { "@type": "bar", "foos": [ {"x": "hello"}, {"@type": "baz", "x": "world", "y": 123}, ], } obj = colt.build(config) assert isinstance(obj, Bar) assert isinstance(obj.foos[0], Foo) assert isinstance(obj.foos[1], Baz)
def test_colt_with_less_type() -> None: config = { "@type": "bar", "foos": [ {"x": "hello"}, {"x": "world"}, ], } obj = colt.build(config) assert isinstance(obj, Bar) assert isinstance(obj.foos, list) assert isinstance(obj.foos[0], Foo)
def test_colt_tuple(): config = { "@type": "grault", "x": [ {"x": "hello"}, {"x": [1, 2, 3]}, ], } obj = colt.build(config) assert isinstance(obj, Grault) assert isinstance(obj.x, tuple) assert isinstance(obj.x[0], Foo) assert isinstance(obj.x[1], Qux)
def test_registrable(): config = { "@type": "my_class", "foo": { "@type": "baz" }, "bar": { "@type": "baz" } } obj = colt.build(config) assert isinstance(obj.foo, FooBaz) assert isinstance(obj.bar, BarBaz)
def test_colt_dict() -> None: config = { "@type": "garply", "x": { "a": {"x": "hello"}, "b": {"x": "world"}, }, } obj = colt.build(config) assert isinstance(obj, Garply) assert isinstance(obj.x, dict) assert isinstance(obj.x["a"], Foo) assert isinstance(obj.x["b"], Foo) assert obj.x["a"].x == "hello" assert obj.x["b"].x == "world"
def main(): parser = argparse.ArgumentParser() parser.add_argument("--config", required=True) parser.add_argument("--module", action="append", default=[]) parser.add_argument("--output") args = parser.parse_args() config = load_jsonnet(args.config) print(config) colt.import_modules(args.module) worker = colt.build(config, Worker) predictions = worker() if args.output: predictions.to_csv(args.output, index=False)
def test_colt_with_type() -> None: config = { "bar": { "@type": "bar", "foos": [ {"@type": "foo", "x": "hello"}, {"@type": "foo", "x": "world"}, ], }, "foos": [ {"@type": "foo", "x": "hoge"}, {"@type": "foo", "x": "fuga"}, ], } obj = colt.build(config) assert isinstance(obj["bar"], Bar) assert isinstance(obj["bar"].foos, list) assert isinstance(obj["bar"].foos[0], Foo) assert isinstance(obj["foos"], list) assert isinstance(obj["foos"][0], Foo)
def build(cls, config: Dict[str, Any]) -> ConfigBuilder: if "automlcli" in config: config = config["automlcli"] if not isinstance(config, dict): raise ConfigurationError( "`automlcli` field is given, but it is not a dictionary.") random_seed = config.get("random_seed", 13370) numpy_seed = config.get("numpy_seed", 1337) set_random_seed(random_seed, numpy_seed) train_file = config.get("train_file") validation_file = config.get("validation_file") test_file = config.get("test_file") colt_config = DEFAULT_COLT_SETTING colt_config.update(config.get("colt", {})) model_config = config["model"] model = colt.build(model_config, cls=Model, **colt_config) # type: Model return cls(model, train_file, validation_file, test_file)
def run(self) -> logexp.Report: logger.info("params: %s", repr(self.params.to_json())) if self.random_seed is not None: np.random.seed(self.random_seed) logger.info("load datasets") train_df = pd.read_csv(self.train_path) test_df = pd.read_csv(self.test_path) logger.info("datasets:") logger.info("%s:\n%s", self.train_path, train_df.info()) logger.info("%s:\n%s", self.test_path, test_df.info()) logger.info("build ndarray") pdpipeline = colt.build(self.pdpipeline) pdpipeline.fit(train_df) y_train = train_df.pop("Survived").to_numpy(dtype=np.float) X_train = pdpipeline.transform(train_df).to_numpy(dtype=np.float) X_test = pdpipeline.transform(test_df).to_numpy(dtype=np.float) logger.info("build model") model = colt.build(self.model) if isinstance(model, BaseSearchCV): grid = model logger.info("[ GS ] start grid-search") grid.fit(X_train, y_train) logger.info("[ GS ] best params: %s", repr(grid.best_params_)) logger.info("[ GS ] best score: %s", repr(grid.best_score_)) with self.storage.open("best_params.json", "w") as f: json.dump(grid.best_params_, f) model = grid.best_estimator_ logger.info("model: %s", repr(model)) logger.info("start cross-validation: %s", repr(self.cross_validate)) cv_scores = cross_validate(model, X_train, y_train, **self.cross_validate) cv_score_mean = {key: val.mean() for key, val in cv_scores.items()} cv_score_std = {key: val.std() for key, val in cv_scores.items()} for key in cv_scores: mean = cv_score_mean[key] std = cv_score_std[key] logger.info("[ CV ] %s : %f +/- %f", key, mean, std) logger.info("start training model") model.fit(X_train, y_train) logger.info("save model") with self.storage.open("model.pkl", "wb") as f: pickle.dump(model, f) logger.info("make predictions") test_pred_df = pd.DataFrame() test_pred_df["PassengerId"] = test_df["PassengerId"] test_pred_df["Survived"] = model.predict(X_test).astype(int) logger.info("save predictions") with self.storage.open("submit.csv", "w") as f: test_pred_df.to_csv(f, index=False) report = logexp.Report() report["cv_score"] = { "mean": cv_score_mean, "std": cv_score_std, "all": {key: val.tolist() for key, val in cv_scores.items()}, } return report
def test_colt_import(): config = {"@type": "datetime.date", "year": 2020, "month": 1, "day": 1} obj = colt.build(config) assert obj.year == 2020
def test_colt_any() -> None: config = {"@type": "fred", "x": {"@type": "foo", "x": "hello"}} obj = colt.build(config) assert isinstance(obj.x, Foo)
import colt if __name__ == "__main__": config = { "@type": "sklearn.ensemble.VotingClassifier", "estimators": [ ( "rfc", { "@type": "sklearn.ensemble.RandomForestClassifier", "n_estimators": 10, }, ), ("svc", { "@type": "sklearn.svm.SVC", "gamma": "scale" }), ], } X, y = load_iris(return_X_y=True) X_train, X_valid, y_train, y_valid = train_test_split(X, y) model = colt.build(config) model.fit(X_train, y_train) valid_accuracy = model.score(X_valid, y_valid) print(f"valid_accuracy: {valid_accuracy}")