Beispiel #1
0
    def get_algo(self, train, model=None):
        algo_config = self.algo_config

        # collect ingoing information
        X = train.values
        X = X.astype(float)
        nominal_ids = set(range(X.shape[1]))

        # perform duty
        if model is None:
            model = Mercs(**algo_config)

            tick = time.time()
            model.fit(X, nominal_attributes=nominal_ids, **algo_config)
            tock = time.time()

            fit_time_s = tock - tick
        elif isinstance(model, Mercs):
            if algo_config["reconfigure_algo"]:
                model = self.reconfigure_algo(model, **algo_config)
            fit_time_s = model.model_data["ind_time"]
        else:
            raise ValueError(
                "I expect either no model or a Mercs model. Not {}".format(model)
            )
        return dict(model=model, fit_time_s=fit_time_s)
Beispiel #2
0
def test_heart():
    # initialize the models
    mercs = Mercs(
        selection_algorithm="base",
        inference_algorithm="base",
        prediction_algorithm="it",
        max_depth=4,
        nb_targets=2,
        nb_iterations=1,
        n_jobs=1,
        verbose=1,
        max_steps=8,
    )

    mercs_mixed = Mercs(
        selection_algorithm="base",
        inference_algorithm="base",
        prediction_algorithm="it",
        mixed_algorithm="morfist",
        max_depth=4,
        nb_targets=2,
        nb_iterations=1,
        n_jobs=1,
        verbose=1,
        max_steps=8,
    )

    # load the data
    data = np.loadtxt("./data/heart_failure_clinical_records.csv",
                      delimiter=",",
                      skiprows=1)

    # split the data into training and testing
    x_train, x_test = train_test_split(data, test_size=0.25, random_state=1337)

    # ids of the nominal variables
    nominal_ids = {1, 3, 5, 9, 10, 12}

    # fit the models
    mercs.fit(x_train, nominal_attributes=nominal_ids)
    mercs_mixed.fit(x_train, nominal_attributes=nominal_ids)

    # create the query code for the prediction
    q_code = np.zeros(x_train.shape[1], dtype=np.int8)
    targets = [2, 12]
    q_code[targets] = 1
    print(q_code)

    # get real values of target variables
    y_test = x_test[:, targets]

    # predict target values
    y_pred = mercs.predict(x_test, q_code=q_code)
    y_pred_mixed = mercs_mixed.predict(x_test, q_code=q_code)

    scores = compute_scores(y_test, y_pred, [1])
    scores_mixed = compute_scores(y_test, y_pred_mixed, [1])

    print(scores)
    print(scores_mixed)
Beispiel #3
0
    def get_algo_fit(self, train=None, model=None):
        # collect and check ingoing information
        algo_config = self.algo_config
        assert isinstance(algo_config, dict)

        if train is None:
            train = self.train
            assert isinstance(train, pd.DataFrame)
        if model is None:
            model = self.model

        # actual actions
        X = train.values
        # X = X.astype(float)
        nominal_ids = set(range(X.shape[1]))

        if model is None:
            model = Mercs(**algo_config)

            tick = time.time()
            model.fit(X, nominal_attributes=nominal_ids)
            tock = time.time()

            fit_time_s = tock - tick
        elif isinstance(model, Mercs):
            fit_time_s = model.model_data["ind_time"]
        else:
            raise ValueError(
                "I expect either no model or a Mercs model. Not {}".format(model)
            )
        return dict(model=model, fit_time_s=fit_time_s)
Beispiel #4
0
    def do(self):
        # Do the required actions (train, query, storing model...)

        # inputs from synth
        xlsx_fn = convert_csv_to_xlsx(self.state.filepath)

        xl_range_obj = self.get_train_range()
        xl_range = None
        if xl_range_obj:
            xl_range = xl_range_obj.coord
        if not xl_range:
            print("Range outside of table")
            return

        wb = load_workbook(xlsx_fn)

        empty_rows = get_empty_rows(wb, xl_range_obj)

        df, encoders, nominal_ids = extract_data(wb, xl_range, empty_rows)

        mod = Mercs(
            evaluation_algorithm="dummy",
            selection_algorithm="random",
            nb_iterations=3,
            fraction_missing=[0, 0.2],
        )
        mod.fit(df.values, nominal_ids=nominal_ids)

        q_code = np.zeros(df.shape[1], dtype=int)

        empty_cols = get_empty_columns(wb, xl_range_obj)
        for c in empty_cols:
            q_code[c] = 1

        df_pred, _, _ = extract_data(wb, xl_range, [])

        y_pred = mod.predict(df_pred.values[:, :-1],
                             q_code=q_code,
                             prediction_algorithm="mi")

        predictions = []
        provenance = ("MERCS", str(uuid4()))
        for row in empty_rows:
            for col_index, col in enumerate(empty_cols):
                value = y_pred[row][col_index].item()
                if encoders[col]:
                    value = encoders[col].inverse_transform([value])[0]
                predictions.append(
                    Prediction(
                        Coordinate(
                            col + xl_range_obj.min_col - 1,
                            row + xl_range_obj.min_row - 1,
                        ),
                        value,
                        1,
                        provenance,
                    ))
        return self.state.add_objects(predictions)
Beispiel #5
0
def test_init():

    # load default dataset and print head
    train, test = default_dataset(n_features=3)

    # initialise MERCS model
    # the nb_targets defines the number of targets to use while fitting the model
    # but it is unrelated to the number of targets to use while predicting
    clf = Mercs(
        selection_algorithm="base",
        inference_algorithm="base",
        prediction_algorithm="it",
        mixed_algorithm="morfist",
        max_depth=4,
        nb_targets=2,
        nb_iterations=2,
        n_jobs=1,
        verbose=1,
        max_steps=8,
    )

    # ids of the nominal variables
    nominal_ids = {train.shape[1]-1}

    # fit the model
    clf.fit(train, nominal_attributes=nominal_ids)

    for idx, m in enumerate(clf.m_list):
        msg = """
        Model with index: {}
        {}
        """.format(idx, m.model)
        print(msg)

    for m_idx, m in enumerate(clf.m_list):
        msg = """
        Tree with id:          {}
        has source attributes: {}
        has target attributes: {},
        and predicts {} attributes
        """.format(m_idx, m.desc_ids, m.targ_ids, m.out_kind)
        print(msg)

    # Query code is [0 0 0 1] where 0 = descriptive and 1 = target
    q_code = np.array([0, 0, 0, 1])
    print("Query code is:", q_code)

    # predict value of query code for test data
    y_pred = clf.predict(test, q_code=q_code)
    # print the first 10 predictions
    print("Predictions:", y_pred[:10])
Beispiel #6
0
def mercs():
    mercs = Mercs(
        selection_algorithm="base",
        inference_algorithm="base",
        prediction_algorithm="it",
        mixed_algorithm="morfist",
        max_depth=4,
        nb_targets=2,
        nb_iterations=2,
        n_jobs=1,
        verbose=1,
        max_steps=8,
    )
    train, test = default_dataset(n_features=3)

    # ids of the nominal variables
    nominal_ids = {train.shape[1] - 1}

    # fit the model
    mercs.fit(train, nominal_attributes=nominal_ids)

    return mercs
Beispiel #7
0
def test_students():
    # initialize the models
    mercs = Mercs(
        selection_algorithm="base",
        inference_algorithm="base",
        prediction_algorithm="it",
        max_depth=4,
        nb_targets=2,
        nb_iterations=1,
        n_jobs=1,
        verbose=1,
        max_steps=8,
    )

    mercs_mixed = Mercs(
        selection_algorithm="base",
        inference_algorithm="base",
        prediction_algorithm="it",
        mixed_algorithm="morfist",
        max_depth=7,
        nb_targets=2,
        nb_iterations=1,
        n_jobs=1,
        verbose=1,
        max_steps=8,
    )

    # load the data
    data = np.loadtxt("./data/student-por.csv", delimiter=",", skiprows=1)

    # split the data into training and testing
    x_train, x_test = train_test_split(data, test_size=0.25)

    # ids of the nominal variables
    nominal_ids = {
        0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 29,
        21, 22, 23, 24, 25, 26, 27, 28
    }

    # fit the models
    mercs.fit(x_train, nominal_attributes=nominal_ids)
    mercs_mixed.fit(x_train, nominal_attributes=nominal_ids)

    # create the query code for the prediction
    q_code = np.zeros(x_train.shape[1], dtype=np.int8)
    targets = [1, 21, 31]
    q_code[targets] = 1
    print(q_code)

    # get real values of target variables
    y_test = x_test[:, targets]

    # predict target values
    y_pred = mercs.predict(x_test, q_code=q_code)
    y_pred_mixed = mercs_mixed.predict(x_test, q_code=q_code)

    scores = compute_scores(y_test, y_pred, [0, 1])
    scores_mixed = compute_scores(y_test, y_pred_mixed, [0, 1])

    print(scores)
    print(scores_mixed)