def test_get_split(): with pytest.raises(Exception): t = ToolBox(X=X, y=y, query_type='AllLabels', saving_path=None) a, b, c, d = t.get_split() tb.split_AL(test_ratio=0.3, initial_label_rate=0.1, split_count=split_count) a, b, c, d = tb.get_split() assert (check_one_to_one_correspondence(a, b, c, d))
def test_init(): with pytest.raises(ValueError): ToolBox(y[0:5], X, query_type='AllLabels', saving_path=None) with pytest.raises(NotImplementedError): ToolBox(y, X, query_type='AllLabel', saving_path=None) with pytest.raises(Exception): ToolBox(y, x=None, query_type='Features', saving_path=None)
n_features=20, n_informative=2, n_redundant=2, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None) split_count = 5 acebox = ToolBox(X=X, y=y, query_type='AllLabels', saving_path=None) # split data acebox.split_AL(test_ratio=0.3, initial_label_rate=0.1, split_count=split_count) # use the default Logistic Regression classifier model = acebox.get_default_model() # query 50 times stopping_criterion = acebox.get_stopping_criterion('num_of_queries', 50) # use pre-defined strategy, The data matrix is a reference which will not use additional memory randomStrategy = QueryRandom() uncertainStrategy = QueryInstanceUncertainty(X, y)
import os import numpy as np from sklearn.datasets import load_iris from acepy.experiment import State, StateIO from acepy.toolbox import ToolBox X, y = load_iris(return_X_y=True) split_count = 5 cur_path = os.path.abspath('.') toolbox = ToolBox(X=X, y=y, query_type='AllLabels', saving_path=cur_path) # split data toolbox.split_AL(test_ratio=0.3, initial_label_rate=0.1, split_count=split_count) train_ind, test_ind, L_ind, U_ind = toolbox.get_split(round=0) # -------Initialize StateIO---------- saver = StateIO(round=0, train_idx=train_ind, test_idx=test_ind, init_L=L_ind, init_U=U_ind, saving_path='.') # or by using toolbox # saver = toolbox.get_stateio(round=0) saver.init_L.difference_update([0, 1, 2]) saver.init_U.update([0, 1, 2]) # -------Basic operations------------ st1_batch1 = State(select_index=[1], performance=0.89) my_value = 'my_entry_info' st1_batch1.add_element(key='my_entry', value=my_value) st1_batch2 = State(select_index=[0, 1], performance=0.89) st2_batch1 = State(select_index=[0], performance=0.89) st3_batch1 = State(select_index=[2], performance=0.89)
from __future__ import division import pytest from sklearn.datasets import load_iris from acepy.toolbox import ToolBox from acepy.utils.misc import check_one_to_one_correspondence X, y = load_iris(return_X_y=True) split_count = 5 tb = ToolBox(X=X, y=y, query_type='AllLabels', saving_path=None) def test_init(): with pytest.raises(ValueError): ToolBox(y[0:5], X, query_type='AllLabels', saving_path=None) with pytest.raises(NotImplementedError): ToolBox(y, X, query_type='AllLabel', saving_path=None) with pytest.raises(Exception): ToolBox(y, x=None, query_type='Features', saving_path=None) # with pytest.raises(TypeError): # ToolBox(X=X, y=y, query_type='AllLabels', saving_path='asdfasf') def test_al_split(): train_idx, test_idx, Lind, Uind = tb.split_AL(test_ratio=0.3, initial_label_rate=0.1, split_count=split_count) assert (check_one_to_one_correspondence(train_idx, test_idx, Lind, Uind)) def test_get_split(): with pytest.raises(Exception):
data_root = 'C:\\Code\\AAAI19_exp\\final_exp\\benchmarks_keel.mat' datasets = scio.loadmat(data_root) dataname = 'clean1' data = datasets[dataname] data = data[0][0] # print(type(data)) # print(len(data)) # print(data) X = data[0] y = data[1].flatten() # X, y = load_digits(return_X_y=True) # X, y = make_classification(n_samples=150, n_features=20, n_informative=2, n_redundant=2, # n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.15, class_sep=1.0, # hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None) acebox = ToolBox(X=X, y=y, query_type='AllLabels', saving_path='.') # Split data acebox.split_AL(test_ratio=0.3, initial_label_rate=0.1, split_count=10) # Use the default Logistic Regression classifier model = acebox.get_default_model() # The cost budget is 50 times querying stopping_criterion = acebox.get_stopping_criterion('num_of_queries', 50) oracle1 = Oracle(labels=[1] * len(y)) oracle2 = Oracle(labels=[-1] * len(y)) oracles = Oracles() oracles.add_oracle(oracle_name='Tom', oracle_object=oracle1) oracles.add_oracle(oracle_name='Amy', oracle_object=oracle2)
from sklearn.datasets import load_iris from acepy.toolbox import ToolBox X, y = load_iris(return_X_y=True) acebox = ToolBox(X=X, y=y, query_type='AllLabels', saving_path='.') acebox.split_AL(test_ratio=0.3, initial_label_rate=0.1, split_count=10) model = acebox.get_default_model() train_idx, test_idx, Lind, Uind = acebox.get_split(0) # -------------Initialize--------------- # initilize a strategy object by ToolBox QBCStrategy = acebox.get_query_strategy(strategy_name='QueryInstanceQBC') # import the acepy.query_strategy directly from acepy.query_strategy import QueryInstanceQBC, QueryInstanceUncertainty uncertainStrategy = QueryInstanceUncertainty(X, y, measure='entropy') # --------------Select---------------- # select the unlabeled data to query model.fit(X[Lind.index], y[Lind.index]) select_ind = uncertainStrategy.select(Lind, Lind, batch_size=1, model=model) print(select_ind) # Use the default logistic regression model to choose the instances select_ind = uncertainStrategy.select(Lind, Uind, batch_size=1, model=None) # Use select_by_prediction_mat() by providing the probabilistic prediction matrix prob_mat = model.predict_proba(X[Uind.index]) select_ind = QBCStrategy.select_by_prediction_mat(unlabel_index=Uind, predict=prob_mat, batch_size=1) print(select_ind)
data_root = 'C:\\Code\\AAAI19_exp\\final_exp\\benchmarks_keel.mat' datasets = scio.loadmat(data_root) dataname = 'clean1' data = datasets[dataname] data = data[0][0] # print(type(data)) # print(len(data)) # print(data) X = data[0] y = data[1].flatten() # X, y = load_digits(return_X_y=True) # X, y = make_classification(n_samples=150, n_features=20, n_informative=2, n_redundant=2, # n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.15, class_sep=1.0, # hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None) acebox = ToolBox(X=X, y=y, query_type='AllLabels', saving_path='.') # Split data acebox.split_AL(test_ratio=0.3, initial_label_rate=0.1, split_count=10) # train_idx, test_idx, label_ind, unlab_ind = acebox.get_split(round=0) # # bmdr = QueryInstanceBMDR(X, y, kernel='linear') # select = bmdr.select(label_ind, unlab_ind) # print(select) # # spal = QueryInstanceSPAL(X, y, kernel='linear') # select = spal.select(label_ind, unlab_ind) # print(select) # # lal = QueryInstanceLAL(X, y, mode='LAL_iterative', train_slt=False)