Esempio n. 1
0
def test_colt_constructor():
    config = {"@type": "plugh", "*": ["plugh"], "y": "plugh"}

    obj = colt.build(config)

    assert obj.x == "plugh_x"
    assert obj.y == "plugh_y"
Esempio n. 2
0
def test_colt_builtintypes():
    config = [{"@type": "tuple", "*": [[1, 2, 3]]}, {"@type": "range", "*": [0, 10, 2]}]

    obj = colt.build(config)

    assert isinstance(obj[0], tuple)
    assert isinstance(obj[1], range)
Esempio n. 3
0
def test_build_with_type() -> None:
    config = {"x": "abc"}

    obj = colt.build(config, Foo)

    assert isinstance(obj, Foo)
    assert obj.x == "abc"
Esempio n. 4
0
    def run(self):
        logger.info("load iris dataset")

        model = colt.build(self.model)

        iris = load_iris()
        X, y = iris.data, iris.target

        X_train, X_valid, y_train, y_valid = \
            train_test_split(X, y, test_size=self.test_size)

        logger.info(
            f"dataset size: train={len(X_train)}, valid={len(X_valid)}")

        logger.info("start training")

        model.fit(X_train, y_train)

        logger.info("end training")

        train_accuracy = model.score(X_train, y_train)
        valid_accuracy = model.score(X_valid, y_valid)

        report = logexp.Report()
        report["train_size"] = len(X_train)
        report["valid_size"] = len(X_valid)
        report["train_accuracy"] = train_accuracy
        report["valid_accuracy"] = valid_accuracy

        return report
Esempio n. 5
0
def main(prog: str = None):
    parser = argparse.ArgumentParser(description="mincrawler",
                                     usage='%(prog)s',
                                     prog=prog)
    parser.add_argument("--version",
                        action="version",
                        version="%(prog)s " + __version__)
    parser.add_argument(
        "config_path",
        type=str,
        help="path to parameter file describing the crawler settings")
    parser.add_argument("--module",
                        type=str,
                        action="append",
                        default=[],
                        help="additional modules to include")

    args = parser.parse_args()

    config = load_jsonnet(args.config_path)
    colt.import_modules(args.module)

    logger.debug("config: %s", repr(config))

    worker = colt.build(config, Worker)

    worker()
Esempio n. 6
0
def test_colt_without_annotation() -> None:
    config = {
        "@type": "corge",
        "x": ["a", "b"],
    }

    obj = colt.build(config)

    assert isinstance(obj, Corge)
    assert isinstance(obj.x, list)
    assert isinstance(obj.x[0], str)
Esempio n. 7
0
def test_type_conversion() -> None:
    config = {
        "@type": "qux",
        "x": [1, 2, 3, 3],
    }

    obj = colt.build(config)

    assert isinstance(obj, Qux)
    assert isinstance(obj.x, set)
    assert len(obj.x) == 3
Esempio n. 8
0
def test_colt_with_optional() -> None:
    config = {
        "@type": "baz",
        "x": "hello",
    }

    obj = colt.build(config)

    assert isinstance(obj, Baz)
    assert obj.x == "hello"
    assert obj.y is None

    config = {
        "@type": "baz",
        "x": "hello",
        "y": 123,  # type: ignore
    }

    obj = colt.build(config)

    assert obj.y == 123
Esempio n. 9
0
def test_colt_union() -> None:
    config = {
        "@type": "waldo",
        "x": "hello",
    }

    obj = colt.build(config)

    assert isinstance(obj, Waldo)
    assert isinstance(obj.x, str)
    assert obj.x == "hello"

    config = {
        "@type": "waldo",
        "x": {"x": "hello"},  # type: ignore
    }

    obj = colt.build(config)

    assert isinstance(obj, Waldo)
    assert isinstance(obj.x, Foo)
    assert obj.x.x == "hello"
Esempio n. 10
0
def test_colt_with_subclass() -> None:
    config = {
        "@type": "bar",
        "foos": [
            {"x": "hello"},
            {"@type": "baz", "x": "world", "y": 123},
        ],
    }

    obj = colt.build(config)

    assert isinstance(obj, Bar)
    assert isinstance(obj.foos[0], Foo)
    assert isinstance(obj.foos[1], Baz)
Esempio n. 11
0
def test_colt_with_less_type() -> None:
    config = {
        "@type": "bar",
        "foos": [
            {"x": "hello"},
            {"x": "world"},
        ],
    }

    obj = colt.build(config)

    assert isinstance(obj, Bar)
    assert isinstance(obj.foos, list)
    assert isinstance(obj.foos[0], Foo)
Esempio n. 12
0
def test_colt_tuple():
    config = {
        "@type": "grault",
        "x": [
            {"x": "hello"},
            {"x": [1, 2, 3]},
        ],
    }

    obj = colt.build(config)

    assert isinstance(obj, Grault)
    assert isinstance(obj.x, tuple)
    assert isinstance(obj.x[0], Foo)
    assert isinstance(obj.x[1], Qux)
Esempio n. 13
0
def test_registrable():
    config = {
        "@type": "my_class",
        "foo": {
            "@type": "baz"
        },
        "bar": {
            "@type": "baz"
        }
    }

    obj = colt.build(config)

    assert isinstance(obj.foo, FooBaz)
    assert isinstance(obj.bar, BarBaz)
Esempio n. 14
0
def test_colt_dict() -> None:
    config = {
        "@type": "garply",
        "x": {
            "a": {"x": "hello"},
            "b": {"x": "world"},
        },
    }

    obj = colt.build(config)

    assert isinstance(obj, Garply)
    assert isinstance(obj.x, dict)
    assert isinstance(obj.x["a"], Foo)
    assert isinstance(obj.x["b"], Foo)
    assert obj.x["a"].x == "hello"
    assert obj.x["b"].x == "world"
Esempio n. 15
0
File: run.py Progetto: altescy/colt
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--config", required=True)
    parser.add_argument("--module", action="append", default=[])
    parser.add_argument("--output")
    args = parser.parse_args()

    config = load_jsonnet(args.config)

    print(config)

    colt.import_modules(args.module)
    worker = colt.build(config, Worker)

    predictions = worker()

    if args.output:
        predictions.to_csv(args.output, index=False)
Esempio n. 16
0
def test_colt_with_type() -> None:
    config = {
        "bar": {
            "@type": "bar",
            "foos": [
                {"@type": "foo", "x": "hello"},
                {"@type": "foo", "x": "world"},
            ],
        },
        "foos": [
            {"@type": "foo", "x": "hoge"},
            {"@type": "foo", "x": "fuga"},
        ],
    }

    obj = colt.build(config)

    assert isinstance(obj["bar"], Bar)
    assert isinstance(obj["bar"].foos, list)
    assert isinstance(obj["bar"].foos[0], Foo)
    assert isinstance(obj["foos"], list)
    assert isinstance(obj["foos"][0], Foo)
Esempio n. 17
0
    def build(cls, config: Dict[str, Any]) -> ConfigBuilder:
        if "automlcli" in config:
            config = config["automlcli"]
            if not isinstance(config, dict):
                raise ConfigurationError(
                    "`automlcli` field is given, but it is not a dictionary.")

        random_seed = config.get("random_seed", 13370)
        numpy_seed = config.get("numpy_seed", 1337)
        set_random_seed(random_seed, numpy_seed)

        train_file = config.get("train_file")
        validation_file = config.get("validation_file")
        test_file = config.get("test_file")

        colt_config = DEFAULT_COLT_SETTING
        colt_config.update(config.get("colt", {}))

        model_config = config["model"]
        model = colt.build(model_config, cls=Model,
                           **colt_config)  # type: Model

        return cls(model, train_file, validation_file, test_file)
Esempio n. 18
0
    def run(self) -> logexp.Report:
        logger.info("params: %s", repr(self.params.to_json()))

        if self.random_seed is not None:
            np.random.seed(self.random_seed)

        logger.info("load datasets")
        train_df = pd.read_csv(self.train_path)
        test_df = pd.read_csv(self.test_path)

        logger.info("datasets:")
        logger.info("%s:\n%s", self.train_path, train_df.info())
        logger.info("%s:\n%s", self.test_path, test_df.info())

        logger.info("build ndarray")
        pdpipeline = colt.build(self.pdpipeline)
        pdpipeline.fit(train_df)

        y_train = train_df.pop("Survived").to_numpy(dtype=np.float)
        X_train = pdpipeline.transform(train_df).to_numpy(dtype=np.float)
        X_test = pdpipeline.transform(test_df).to_numpy(dtype=np.float)

        logger.info("build model")
        model = colt.build(self.model)
        if isinstance(model, BaseSearchCV):
            grid = model
            logger.info("[ GS ] start grid-search")
            grid.fit(X_train, y_train)

            logger.info("[ GS ] best params: %s", repr(grid.best_params_))
            logger.info("[ GS ] best score: %s", repr(grid.best_score_))

            with self.storage.open("best_params.json", "w") as f:
                json.dump(grid.best_params_, f)

            model = grid.best_estimator_

        logger.info("model: %s", repr(model))

        logger.info("start cross-validation: %s", repr(self.cross_validate))
        cv_scores = cross_validate(model, X_train, y_train,
                                   **self.cross_validate)
        cv_score_mean = {key: val.mean() for key, val in cv_scores.items()}
        cv_score_std = {key: val.std() for key, val in cv_scores.items()}
        for key in cv_scores:
            mean = cv_score_mean[key]
            std = cv_score_std[key]
            logger.info("[ CV ]  %s : %f +/- %f", key, mean, std)

        logger.info("start training model")
        model.fit(X_train, y_train)

        logger.info("save model")
        with self.storage.open("model.pkl", "wb") as f:
            pickle.dump(model, f)

        logger.info("make predictions")
        test_pred_df = pd.DataFrame()
        test_pred_df["PassengerId"] = test_df["PassengerId"]
        test_pred_df["Survived"] = model.predict(X_test).astype(int)

        logger.info("save predictions")
        with self.storage.open("submit.csv", "w") as f:
            test_pred_df.to_csv(f, index=False)

        report = logexp.Report()
        report["cv_score"] = {
            "mean": cv_score_mean,
            "std": cv_score_std,
            "all": {key: val.tolist()
                    for key, val in cv_scores.items()},
        }

        return report
Esempio n. 19
0
def test_colt_import():
    config = {"@type": "datetime.date", "year": 2020, "month": 1, "day": 1}

    obj = colt.build(config)

    assert obj.year == 2020
Esempio n. 20
0
def test_colt_any() -> None:
    config = {"@type": "fred", "x": {"@type": "foo", "x": "hello"}}

    obj = colt.build(config)

    assert isinstance(obj.x, Foo)
Esempio n. 21
0
import colt

if __name__ == "__main__":
    config = {
        "@type":
        "sklearn.ensemble.VotingClassifier",
        "estimators": [
            (
                "rfc",
                {
                    "@type": "sklearn.ensemble.RandomForestClassifier",
                    "n_estimators": 10,
                },
            ),
            ("svc", {
                "@type": "sklearn.svm.SVC",
                "gamma": "scale"
            }),
        ],
    }

    X, y = load_iris(return_X_y=True)
    X_train, X_valid, y_train, y_valid = train_test_split(X, y)

    model = colt.build(config)
    model.fit(X_train, y_train)

    valid_accuracy = model.score(X_valid, y_valid)
    print(f"valid_accuracy: {valid_accuracy}")