def test_get_split(): with pytest.raises(Exception): t = ToolBox(X=X, y=y, query_type='AllLabels', saving_path=None) a, b, c, d = t.get_split() tb.split_AL(test_ratio=0.3, initial_label_rate=0.1, split_count=split_count) a, b, c, d = tb.get_split() assert (check_one_to_one_correspondence(a, b, c, d))
# use the default Logistic Regression classifier model = acebox.get_default_model() # query 50 times stopping_criterion = acebox.get_stopping_criterion('num_of_queries', 50) # use pre-defined strategy, The data matrix is a reference which will not use additional memory randomStrategy = QueryRandom() uncertainStrategy = QueryInstanceUncertainty(X, y) oracle = acebox.get_clean_oracle() random_result = [] for round in range(split_count): train_idx, test_idx, Lind, Uind = acebox.get_split(round) # saver = acebox.StateIO(round) saver = acebox.get_stateio(round) # calc the initial point model.fit(X=X[Lind.index, :], y=y[Lind.index]) pred = model.predict(X[test_idx, :]) accuracy = sum(pred == y[test_idx]) / len(test_idx) saver.set_initial_point(accuracy) while not stopping_criterion.is_stop(): select_ind = randomStrategy.select(Uind) label, cost = oracle.query_by_index(select_ind) Lind.update(select_ind) Uind.difference_update(select_ind)
import os import numpy as np from sklearn.datasets import load_iris from acepy.experiment import State, StateIO from acepy.toolbox import ToolBox X, y = load_iris(return_X_y=True) split_count = 5 cur_path = os.path.abspath('.') toolbox = ToolBox(X=X, y=y, query_type='AllLabels', saving_path=cur_path) # split data toolbox.split_AL(test_ratio=0.3, initial_label_rate=0.1, split_count=split_count) train_ind, test_ind, L_ind, U_ind = toolbox.get_split(round=0) # -------Initialize StateIO---------- saver = StateIO(round=0, train_idx=train_ind, test_idx=test_ind, init_L=L_ind, init_U=U_ind, saving_path='.') # or by using toolbox # saver = toolbox.get_stateio(round=0) saver.init_L.difference_update([0, 1, 2]) saver.init_U.update([0, 1, 2]) # -------Basic operations------------ st1_batch1 = State(select_index=[1], performance=0.89) my_value = 'my_entry_info' st1_batch1.add_element(key='my_entry', value=my_value) st1_batch2 = State(select_index=[0, 1], performance=0.89) st2_batch1 = State(select_index=[0], performance=0.89) st3_batch1 = State(select_index=[2], performance=0.89)
# The cost budget is 50 times querying stopping_criterion = acebox.get_stopping_criterion('num_of_queries', 50) oracle1 = Oracle(labels=[1] * len(y)) oracle2 = Oracle(labels=[-1] * len(y)) oracles = Oracles() oracles.add_oracle(oracle_name='Tom', oracle_object=oracle1) oracles.add_oracle(oracle_name='Amy', oracle_object=oracle2) oracles_list = [oracle1, oracle2] all = QueryNoisyOraclesAll(X=X, y=y, oracles=oracles) rand = QueryNoisyOraclesRandom(X=X, y=y, oracles=oracles) for round in range(split_count): # Get the data split of one fold experiment train_idx, test_idx, label_ind, unlab_ind = acebox.get_split(round) # Get intermediate results saver for one fold experiment saver = acebox.get_stateio(round) # calc the initial point model.fit(X=X[label_ind.index, :], y=y[label_ind.index]) pred = model.predict(X[test_idx, :]) accuracy = sum(pred == y[test_idx]) / len(test_idx) saver.set_initial_point(accuracy) ceal = QueryNoisyOraclesCEAL(X, y, oracles=oracles, initial_labeled_indexes=label_ind) iet = QueryNoisyOraclesIEthresh(X=X, y=y, oracles=oracles, initial_labeled_indexes=label_ind)