Ejemplo n.º 1
0
def test_get_split(): 
    with pytest.raises(Exception):
        t = ToolBox(X=X, y=y, query_type='AllLabels', saving_path=None)
        a, b, c, d = t.get_split()
    tb.split_AL(test_ratio=0.3, initial_label_rate=0.1, split_count=split_count)
    a, b, c, d = tb.get_split()
    assert (check_one_to_one_correspondence(a, b, c, d))
Ejemplo n.º 2
0
def test_init():
    with pytest.raises(ValueError):
        ToolBox(y[0:5], X, query_type='AllLabels', saving_path=None)
    with pytest.raises(NotImplementedError):
        ToolBox(y, X, query_type='AllLabel', saving_path=None)
    with pytest.raises(Exception):
        ToolBox(y, x=None, query_type='Features', saving_path=None)
Ejemplo n.º 3
0
                           n_features=20,
                           n_informative=2,
                           n_redundant=2,
                           n_repeated=0,
                           n_classes=2,
                           n_clusters_per_class=2,
                           weights=None,
                           flip_y=0.01,
                           class_sep=1.0,
                           hypercube=True,
                           shift=0.0,
                           scale=1.0,
                           shuffle=True,
                           random_state=None)
split_count = 5
acebox = ToolBox(X=X, y=y, query_type='AllLabels', saving_path=None)

# split data
acebox.split_AL(test_ratio=0.3,
                initial_label_rate=0.1,
                split_count=split_count)

# use the default Logistic Regression classifier
model = acebox.get_default_model()

# query 50 times
stopping_criterion = acebox.get_stopping_criterion('num_of_queries', 50)

# use pre-defined strategy, The data matrix is a reference which will not use additional memory
randomStrategy = QueryRandom()
uncertainStrategy = QueryInstanceUncertainty(X, y)
Ejemplo n.º 4
0
import os
import numpy as np
from sklearn.datasets import load_iris

from acepy.experiment import State, StateIO
from acepy.toolbox import ToolBox

X, y = load_iris(return_X_y=True)
split_count = 5
cur_path = os.path.abspath('.')
toolbox = ToolBox(X=X, y=y, query_type='AllLabels', saving_path=cur_path)

# split data
toolbox.split_AL(test_ratio=0.3, initial_label_rate=0.1, split_count=split_count)
train_ind, test_ind, L_ind, U_ind = toolbox.get_split(round=0)
# -------Initialize StateIO----------
saver = StateIO(round=0, train_idx=train_ind, test_idx=test_ind, init_L=L_ind, init_U=U_ind, saving_path='.')
# or by using toolbox 
# saver = toolbox.get_stateio(round=0)

saver.init_L.difference_update([0, 1, 2])
saver.init_U.update([0, 1, 2])

# -------Basic operations------------
st1_batch1 = State(select_index=[1], performance=0.89)
my_value = 'my_entry_info'
st1_batch1.add_element(key='my_entry', value=my_value)
st1_batch2 = State(select_index=[0, 1], performance=0.89)
st2_batch1 = State(select_index=[0], performance=0.89)
st3_batch1 = State(select_index=[2], performance=0.89)
Ejemplo n.º 5
0
from __future__ import division

import pytest
from sklearn.datasets import load_iris

from acepy.toolbox import ToolBox
from acepy.utils.misc import check_one_to_one_correspondence

X, y = load_iris(return_X_y=True)

split_count = 5
tb = ToolBox(X=X, y=y, query_type='AllLabels', saving_path=None)

def test_init():
    with pytest.raises(ValueError):
        ToolBox(y[0:5], X, query_type='AllLabels', saving_path=None)
    with pytest.raises(NotImplementedError):
        ToolBox(y, X, query_type='AllLabel', saving_path=None)
    with pytest.raises(Exception):
        ToolBox(y, x=None, query_type='Features', saving_path=None)
    # with pytest.raises(TypeError):
    #     ToolBox(X=X, y=y, query_type='AllLabels', saving_path='asdfasf')
    

def test_al_split():
    train_idx, test_idx, Lind, Uind = tb.split_AL(test_ratio=0.3, initial_label_rate=0.1, split_count=split_count)
    assert (check_one_to_one_correspondence(train_idx, test_idx, Lind, Uind))


def test_get_split(): 
    with pytest.raises(Exception):
Ejemplo n.º 6
0
data_root = 'C:\\Code\\AAAI19_exp\\final_exp\\benchmarks_keel.mat'
datasets = scio.loadmat(data_root)
dataname = 'clean1'
data = datasets[dataname]
data = data[0][0]
# print(type(data))
# print(len(data))
# print(data)
X = data[0]
y = data[1].flatten()

# X, y = load_digits(return_X_y=True)
# X, y = make_classification(n_samples=150, n_features=20, n_informative=2, n_redundant=2,
#                            n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.15, class_sep=1.0,
#                            hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None)
acebox = ToolBox(X=X, y=y, query_type='AllLabels', saving_path='.')

# Split data
acebox.split_AL(test_ratio=0.3, initial_label_rate=0.1, split_count=10)

# Use the default Logistic Regression classifier
model = acebox.get_default_model()

# The cost budget is 50 times querying
stopping_criterion = acebox.get_stopping_criterion('num_of_queries', 50)

oracle1 = Oracle(labels=[1] * len(y))
oracle2 = Oracle(labels=[-1] * len(y))
oracles = Oracles()
oracles.add_oracle(oracle_name='Tom', oracle_object=oracle1)
oracles.add_oracle(oracle_name='Amy', oracle_object=oracle2)
Ejemplo n.º 7
0
from sklearn.datasets import load_iris
from acepy.toolbox import ToolBox

X, y = load_iris(return_X_y=True)
acebox = ToolBox(X=X, y=y, query_type='AllLabels', saving_path='.')
acebox.split_AL(test_ratio=0.3, initial_label_rate=0.1, split_count=10)
model = acebox.get_default_model()
train_idx, test_idx, Lind, Uind = acebox.get_split(0)
# -------------Initialize---------------
# initilize a strategy object by ToolBox
QBCStrategy = acebox.get_query_strategy(strategy_name='QueryInstanceQBC')

# import the acepy.query_strategy directly
from acepy.query_strategy import QueryInstanceQBC, QueryInstanceUncertainty

uncertainStrategy = QueryInstanceUncertainty(X, y, measure='entropy')
# --------------Select----------------
# select the unlabeled data to query
model.fit(X[Lind.index], y[Lind.index])
select_ind = uncertainStrategy.select(Lind, Lind, batch_size=1, model=model)
print(select_ind)

# Use the default logistic regression model to choose the instances
select_ind = uncertainStrategy.select(Lind, Uind, batch_size=1, model=None)

# Use select_by_prediction_mat() by providing the probabilistic prediction matrix
prob_mat = model.predict_proba(X[Uind.index])
select_ind = QBCStrategy.select_by_prediction_mat(unlabel_index=Uind,
                                                  predict=prob_mat,
                                                  batch_size=1)
print(select_ind)
Ejemplo n.º 8
0
data_root = 'C:\\Code\\AAAI19_exp\\final_exp\\benchmarks_keel.mat'
datasets = scio.loadmat(data_root)
dataname = 'clean1'
data = datasets[dataname]
data = data[0][0]
# print(type(data))
# print(len(data))
# print(data)
X = data[0]
y = data[1].flatten()

# X, y = load_digits(return_X_y=True)
# X, y = make_classification(n_samples=150, n_features=20, n_informative=2, n_redundant=2,
#                            n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.15, class_sep=1.0,
#                            hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None)
acebox = ToolBox(X=X, y=y, query_type='AllLabels', saving_path='.')

# Split data
acebox.split_AL(test_ratio=0.3, initial_label_rate=0.1, split_count=10)

# train_idx, test_idx, label_ind, unlab_ind = acebox.get_split(round=0)
#
# bmdr = QueryInstanceBMDR(X, y, kernel='linear')
# select = bmdr.select(label_ind, unlab_ind)
# print(select)
#
# spal = QueryInstanceSPAL(X, y, kernel='linear')
# select = spal.select(label_ind, unlab_ind)
# print(select)
#
# lal = QueryInstanceLAL(X, y, mode='LAL_iterative', train_slt=False)