Beispiel #1
0
    def from_file(self, config_file):
        """Fill the contents of settings by reading a config file.

        Arguments
        ---------
        config_file: str
            Source configuration file.

        """
        if config_file is None or not os.path.isfile(config_file):
            if config_file is not None:
                print(f"Didn't find configuration file: {config_file}")
            return

        config = ConfigParser()
        config.optionxform = str
        config.read(config_file)

        # Read the each of the sections.
        for sect in config:
            if sect == "global_settings":
                for key, value in config.items(sect):
                    try:
                        setattr(self, key, SETTINGS_TYPE_DICT[key](value))
                    except (KeyError, TypeError):
                        print(f"Warning: value with key '{key}' is ignored "
                              "(spelling mistake, wrong type?).")

            elif sect in [
                    "model_param", "query_param", "balance_param",
                    "feature_param"
            ]:
                setattr(self, sect, dict(config.items(sect)))
            elif sect != "DEFAULT":
                print(f"Warning: section [{sect}] is ignored in "
                      f"config file {config_file}")

        model = get_classifier(self.model)
        _convert_types(model.default_param, self.model_param)
        balance_model = get_balance_model(self.balance_strategy)
        _convert_types(balance_model.default_param, self.balance_param)
        query_model = get_query_model(self.query_strategy)
        _convert_types(query_model.default_param, self.query_param)
        feature_model = get_feature_model(self.feature_extraction)
        _convert_types(feature_model.default_param, self.feature_param)
Beispiel #2
0
def test_balance(balance_strategy,
                 n_partition=100,
                 n_feature=200,
                 n_sample=100):
    model = get_balance_model(balance_strategy)
    assert isinstance(model.param, dict)
    assert model.name == balance_strategy
    X, y = generate_data(n_feature=n_feature, n_sample=n_sample)
    for _ in range(n_partition):
        n_train = np.random.randint(10, n_sample)
        while True:
            train_idx = np.random.choice(np.arange(len(y)),
                                         n_train,
                                         replace=False)
            num_zero = np.count_nonzero(y[train_idx] == 0)
            num_one = np.count_nonzero(y[train_idx] == 1)
            if num_zero > 0 and num_one > 0:
                break
        shared = {"query_src": {}, "current_queries": {}}
        X_train, y_train = model.sample(X, y, train_idx, shared)
        check_partition(X, y, X_train, y_train, train_idx)