Esempio n. 1
0
from sklearn.datasets import load_iris
from alipy.data_manipulate import split
from alipy.utils.multi_thread import aceThreading
# Get the data
X, y = load_iris(return_X_y=True)
# Split the data
train, test, lab, unlab = split(X=X,
                                y=y,
                                test_ratio=0.3,
                                initial_label_rate=0.05,
                                split_count=10)
# init the aceThreading
acethread = aceThreading(examples=X,
                         labels=y,
                         train_idx=train,
                         test_idx=test,
                         label_index=lab,
                         unlabel_index=unlab,
                         max_thread=None,
                         refresh_interval=1,
                         saving_path='.')

from sklearn import linear_model
from alipy.experiment import State
from alipy.query_strategy import QueryInstanceQBC


# define the custom function
# Specifically, the parameters of the custom function must be:
# (round, train_id, test_id, Ucollection, Lcollection, saver, examples, labels, global_parameters)
def target_func(round, train_id, test_id, Lcollection, Ucollection, saver,
Esempio n. 2
0
import numpy as np
# split instance
X = np.random.rand(10, 10)  # 10 instances with 10 features
y = [0] * 5 + [1] * 5
from alipy.data_manipulate import split
train, test, lab, unlab = split(X=X,
                                y=y,
                                test_ratio=0.5,
                                initial_label_rate=0.5,
                                split_count=1,
                                all_class=True,
                                saving_path='.')
print(train, test, lab, unlab)

# split multi_label
from alipy.data_manipulate import split_multi_label
# 3 instances with 3 labels.
mult_y = [[1, 1, 1], [0, 1, 1], [0, 1, 0]]
train_idx, test_idx, label_idx, unlabel_idx = split_multi_label(
    y=mult_y,
    split_count=1,
    all_class=False,
    test_ratio=0.3,
    initial_label_rate=0.5,
    saving_path=None)
print(train_idx)
print(test_idx)
print(label_idx)
print(unlabel_idx)

# split features