예제 #1
0
y = df.loc[:, 'True Positive']
#parameters for models defined here, simply change init_labels, trn_tst_split, splits to change experiment
stop = 300  #stopping value for number of queries
init_labels = 0.005  #initially labelled portion of the dataset
trn_tst_split = 0.2  #train test split to use for each fold
splits = 5  #number of k folds

al_unc = AlExperiment(X,
                      y,
                      model=LogisticRegression(penalty='l1',
                                               solver='liblinear'),
                      performance_metric='accuracy_score',
                      stopping_criteria='num_of_queries',
                      stopping_value=stop)
al_unc.split_AL(test_ratio=trn_tst_split,
                initial_label_rate=init_labels,
                split_count=splits,
                all_class=True)
al_unc.set_query_strategy(strategy='QueryInstanceUncertainty')
al_unc.set_performance_metric(performance_metric='accuracy_score')
al_unc.start_query(multi_thread=False)
# print(al.get_experiment_result())
# al.plot_learning_curve()
analyser = ExperimentAnalyser(x_axis='num_of_queries')
analyser.add_method('uncertainty', al_unc.get_experiment_result())
al_qbc = AlExperiment(X,
                      y,
                      model=LogisticRegression(penalty='l1',
                                               solver='liblinear'),
                      performance_metric='accuracy_score',
                      stopping_criteria='num_of_queries',
                      stopping_value=stop)
예제 #2
0
from sklearn.datasets import load_iris
from alipy.experiment import AlExperiment

# Get the data
X, y = load_iris(return_X_y=True)

# init the AlExperiment
al = AlExperiment(X, y)

# split the data by using split_AL()
al.split_AL(test_ratio=0.3, initial_label_rate=0.05, split_count=10)

# or set the data split indexes by input the specific parameters
from alipy.data_manipulate import split

train, test, lab, unlab = split(X=X,
                                y=y,
                                test_ratio=0.3,
                                initial_label_rate=0.05,
                                split_count=10)
al.set_data_split(train_idx=train,
                  test_idx=test,
                  label_idx=lab,
                  unlabel_idx=unlab)

# set the query strategy
# using the a pre-defined strategy
al.set_query_strategy(strategy="QueryInstanceUncertainty")

# or using your own query strategy
# class my_qs_class:
X, y = load_iris(return_X_y=True)

for strategy in [
        'QueryInstanceQBC', 'QueryInstanceUncertainty', 'QueryInstanceRandom',
        'QureyExpectedErrorReduction', 'QueryInstanceGraphDensity',
        'QueryInstanceQUIRE', 'QueryInstanceBMDR', 'QueryInstanceSPAL',
        'QueryInstanceLAL', 'QueryExpectedErrorReduction'
]:
    # init the AlExperiment
    al = AlExperiment(X,
                      y,
                      stopping_criteria='num_of_queries',
                      stopping_value=50)

    # split the data by using split_AL()
    al.split_AL(split_count=5)

    # al.set_query_strategy(strategy=strategy)

    # al.set_performance_metric('accuracy_score')

    # al.start_query(multi_thread=True)

    # or set the data split indexes by input the specific parameters
    from alipy.data_manipulate import split

    train, test, lab, unlab = split(X=X,
                                    y=y,
                                    test_ratio=0.3,
                                    initial_label_rate=0.05,
                                    split_count=1)
예제 #4
0
from sklearn.datasets import load_iris
from alipy.experiment import AlExperiment

# Get the data
X, y = load_iris(return_X_y=True)

# init the AlExperiment
al = AlExperiment(X, y)

# split the data by using split_AL()
al.split_AL()

# or set the data split indexes by input the specific parameters
from alipy.data_manipulate import split

train, test, lab, unlab = split(X=X, y=y, test_ratio=0.3, initial_label_rate=0.05,
                                split_count=10)
al.set_data_split(train_idx=train, test_idx=test, label_idx=lab, unlabel_idx=unlab)

# set the query strategy
# using the a pre-defined strategy
al.set_query_strategy(strategy="QueryInstanceUncertainty")

# or using your own query strategy
# class my_qs_class:
#     	def __init__(self, X=None, y=None, **kwargs):
# 		pass

# 	def select(self, label_index, unlabel_index, batch_size=1, **kwargs):
# 		"""Select instances to query."""
# 		pass