def get_algo(self, train, model=None): algo_config = self.algo_config # collect ingoing information X = train.values X = X.astype(float) nominal_ids = set(range(X.shape[1])) # perform duty if model is None: model = Mercs(**algo_config) tick = time.time() model.fit(X, nominal_attributes=nominal_ids, **algo_config) tock = time.time() fit_time_s = tock - tick elif isinstance(model, Mercs): if algo_config["reconfigure_algo"]: model = self.reconfigure_algo(model, **algo_config) fit_time_s = model.model_data["ind_time"] else: raise ValueError( "I expect either no model or a Mercs model. Not {}".format(model) ) return dict(model=model, fit_time_s=fit_time_s)
def test_heart(): # initialize the models mercs = Mercs( selection_algorithm="base", inference_algorithm="base", prediction_algorithm="it", max_depth=4, nb_targets=2, nb_iterations=1, n_jobs=1, verbose=1, max_steps=8, ) mercs_mixed = Mercs( selection_algorithm="base", inference_algorithm="base", prediction_algorithm="it", mixed_algorithm="morfist", max_depth=4, nb_targets=2, nb_iterations=1, n_jobs=1, verbose=1, max_steps=8, ) # load the data data = np.loadtxt("./data/heart_failure_clinical_records.csv", delimiter=",", skiprows=1) # split the data into training and testing x_train, x_test = train_test_split(data, test_size=0.25, random_state=1337) # ids of the nominal variables nominal_ids = {1, 3, 5, 9, 10, 12} # fit the models mercs.fit(x_train, nominal_attributes=nominal_ids) mercs_mixed.fit(x_train, nominal_attributes=nominal_ids) # create the query code for the prediction q_code = np.zeros(x_train.shape[1], dtype=np.int8) targets = [2, 12] q_code[targets] = 1 print(q_code) # get real values of target variables y_test = x_test[:, targets] # predict target values y_pred = mercs.predict(x_test, q_code=q_code) y_pred_mixed = mercs_mixed.predict(x_test, q_code=q_code) scores = compute_scores(y_test, y_pred, [1]) scores_mixed = compute_scores(y_test, y_pred_mixed, [1]) print(scores) print(scores_mixed)
def get_algo_fit(self, train=None, model=None): # collect and check ingoing information algo_config = self.algo_config assert isinstance(algo_config, dict) if train is None: train = self.train assert isinstance(train, pd.DataFrame) if model is None: model = self.model # actual actions X = train.values # X = X.astype(float) nominal_ids = set(range(X.shape[1])) if model is None: model = Mercs(**algo_config) tick = time.time() model.fit(X, nominal_attributes=nominal_ids) tock = time.time() fit_time_s = tock - tick elif isinstance(model, Mercs): fit_time_s = model.model_data["ind_time"] else: raise ValueError( "I expect either no model or a Mercs model. Not {}".format(model) ) return dict(model=model, fit_time_s=fit_time_s)
def do(self): # Do the required actions (train, query, storing model...) # inputs from synth xlsx_fn = convert_csv_to_xlsx(self.state.filepath) xl_range_obj = self.get_train_range() xl_range = None if xl_range_obj: xl_range = xl_range_obj.coord if not xl_range: print("Range outside of table") return wb = load_workbook(xlsx_fn) empty_rows = get_empty_rows(wb, xl_range_obj) df, encoders, nominal_ids = extract_data(wb, xl_range, empty_rows) mod = Mercs( evaluation_algorithm="dummy", selection_algorithm="random", nb_iterations=3, fraction_missing=[0, 0.2], ) mod.fit(df.values, nominal_ids=nominal_ids) q_code = np.zeros(df.shape[1], dtype=int) empty_cols = get_empty_columns(wb, xl_range_obj) for c in empty_cols: q_code[c] = 1 df_pred, _, _ = extract_data(wb, xl_range, []) y_pred = mod.predict(df_pred.values[:, :-1], q_code=q_code, prediction_algorithm="mi") predictions = [] provenance = ("MERCS", str(uuid4())) for row in empty_rows: for col_index, col in enumerate(empty_cols): value = y_pred[row][col_index].item() if encoders[col]: value = encoders[col].inverse_transform([value])[0] predictions.append( Prediction( Coordinate( col + xl_range_obj.min_col - 1, row + xl_range_obj.min_row - 1, ), value, 1, provenance, )) return self.state.add_objects(predictions)
def test_init(): # load default dataset and print head train, test = default_dataset(n_features=3) # initialise MERCS model # the nb_targets defines the number of targets to use while fitting the model # but it is unrelated to the number of targets to use while predicting clf = Mercs( selection_algorithm="base", inference_algorithm="base", prediction_algorithm="it", mixed_algorithm="morfist", max_depth=4, nb_targets=2, nb_iterations=2, n_jobs=1, verbose=1, max_steps=8, ) # ids of the nominal variables nominal_ids = {train.shape[1]-1} # fit the model clf.fit(train, nominal_attributes=nominal_ids) for idx, m in enumerate(clf.m_list): msg = """ Model with index: {} {} """.format(idx, m.model) print(msg) for m_idx, m in enumerate(clf.m_list): msg = """ Tree with id: {} has source attributes: {} has target attributes: {}, and predicts {} attributes """.format(m_idx, m.desc_ids, m.targ_ids, m.out_kind) print(msg) # Query code is [0 0 0 1] where 0 = descriptive and 1 = target q_code = np.array([0, 0, 0, 1]) print("Query code is:", q_code) # predict value of query code for test data y_pred = clf.predict(test, q_code=q_code) # print the first 10 predictions print("Predictions:", y_pred[:10])
def mercs(): mercs = Mercs( selection_algorithm="base", inference_algorithm="base", prediction_algorithm="it", mixed_algorithm="morfist", max_depth=4, nb_targets=2, nb_iterations=2, n_jobs=1, verbose=1, max_steps=8, ) train, test = default_dataset(n_features=3) # ids of the nominal variables nominal_ids = {train.shape[1] - 1} # fit the model mercs.fit(train, nominal_attributes=nominal_ids) return mercs
def test_students(): # initialize the models mercs = Mercs( selection_algorithm="base", inference_algorithm="base", prediction_algorithm="it", max_depth=4, nb_targets=2, nb_iterations=1, n_jobs=1, verbose=1, max_steps=8, ) mercs_mixed = Mercs( selection_algorithm="base", inference_algorithm="base", prediction_algorithm="it", mixed_algorithm="morfist", max_depth=7, nb_targets=2, nb_iterations=1, n_jobs=1, verbose=1, max_steps=8, ) # load the data data = np.loadtxt("./data/student-por.csv", delimiter=",", skiprows=1) # split the data into training and testing x_train, x_test = train_test_split(data, test_size=0.25) # ids of the nominal variables nominal_ids = { 0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 29, 21, 22, 23, 24, 25, 26, 27, 28 } # fit the models mercs.fit(x_train, nominal_attributes=nominal_ids) mercs_mixed.fit(x_train, nominal_attributes=nominal_ids) # create the query code for the prediction q_code = np.zeros(x_train.shape[1], dtype=np.int8) targets = [1, 21, 31] q_code[targets] = 1 print(q_code) # get real values of target variables y_test = x_test[:, targets] # predict target values y_pred = mercs.predict(x_test, q_code=q_code) y_pred_mixed = mercs_mixed.predict(x_test, q_code=q_code) scores = compute_scores(y_test, y_pred, [0, 1]) scores_mixed = compute_scores(y_test, y_pred_mixed, [0, 1]) print(scores) print(scores_mixed)