예제 #1
0
def test_get_split(): 
    with pytest.raises(Exception):
        t = ToolBox(X=X, y=y, query_type='AllLabels', saving_path=None)
        a, b, c, d = t.get_split()
    tb.split_AL(test_ratio=0.3, initial_label_rate=0.1, split_count=split_count)
    a, b, c, d = tb.get_split()
    assert (check_one_to_one_correspondence(a, b, c, d))
예제 #2
0
# use the default Logistic Regression classifier
model = acebox.get_default_model()

# query 50 times
stopping_criterion = acebox.get_stopping_criterion('num_of_queries', 50)

# use pre-defined strategy, The data matrix is a reference which will not use additional memory
randomStrategy = QueryRandom()
uncertainStrategy = QueryInstanceUncertainty(X, y)

oracle = acebox.get_clean_oracle()

random_result = []
for round in range(split_count):
    train_idx, test_idx, Lind, Uind = acebox.get_split(round)
    # saver = acebox.StateIO(round)
    saver = acebox.get_stateio(round)

    # calc the initial point
    model.fit(X=X[Lind.index, :], y=y[Lind.index])
    pred = model.predict(X[test_idx, :])
    accuracy = sum(pred == y[test_idx]) / len(test_idx)

    saver.set_initial_point(accuracy)
    while not stopping_criterion.is_stop():
        select_ind = randomStrategy.select(Uind)
        label, cost = oracle.query_by_index(select_ind)
        Lind.update(select_ind)
        Uind.difference_update(select_ind)
예제 #3
0
import os
import numpy as np
from sklearn.datasets import load_iris

from acepy.experiment import State, StateIO
from acepy.toolbox import ToolBox

X, y = load_iris(return_X_y=True)
split_count = 5
cur_path = os.path.abspath('.')
toolbox = ToolBox(X=X, y=y, query_type='AllLabels', saving_path=cur_path)

# split data
toolbox.split_AL(test_ratio=0.3, initial_label_rate=0.1, split_count=split_count)
train_ind, test_ind, L_ind, U_ind = toolbox.get_split(round=0)
# -------Initialize StateIO----------
saver = StateIO(round=0, train_idx=train_ind, test_idx=test_ind, init_L=L_ind, init_U=U_ind, saving_path='.')
# or by using toolbox 
# saver = toolbox.get_stateio(round=0)

saver.init_L.difference_update([0, 1, 2])
saver.init_U.update([0, 1, 2])

# -------Basic operations------------
st1_batch1 = State(select_index=[1], performance=0.89)
my_value = 'my_entry_info'
st1_batch1.add_element(key='my_entry', value=my_value)
st1_batch2 = State(select_index=[0, 1], performance=0.89)
st2_batch1 = State(select_index=[0], performance=0.89)
st3_batch1 = State(select_index=[2], performance=0.89)
예제 #4
0
# The cost budget is 50 times querying
stopping_criterion = acebox.get_stopping_criterion('num_of_queries', 50)

oracle1 = Oracle(labels=[1] * len(y))
oracle2 = Oracle(labels=[-1] * len(y))
oracles = Oracles()
oracles.add_oracle(oracle_name='Tom', oracle_object=oracle1)
oracles.add_oracle(oracle_name='Amy', oracle_object=oracle2)
oracles_list = [oracle1, oracle2]

all = QueryNoisyOraclesAll(X=X, y=y, oracles=oracles)
rand = QueryNoisyOraclesRandom(X=X, y=y, oracles=oracles)

for round in range(split_count):
    # Get the data split of one fold experiment
    train_idx, test_idx, label_ind, unlab_ind = acebox.get_split(round)
    # Get intermediate results saver for one fold experiment
    saver = acebox.get_stateio(round)
    # calc the initial point
    model.fit(X=X[label_ind.index, :], y=y[label_ind.index])
    pred = model.predict(X[test_idx, :])
    accuracy = sum(pred == y[test_idx]) / len(test_idx)
    saver.set_initial_point(accuracy)
    ceal = QueryNoisyOraclesCEAL(X,
                                 y,
                                 oracles=oracles,
                                 initial_labeled_indexes=label_ind)
    iet = QueryNoisyOraclesIEthresh(X=X,
                                    y=y,
                                    oracles=oracles,
                                    initial_labeled_indexes=label_ind)