y = df.loc[:, 'True Positive'] #parameters for models defined here, simply change init_labels, trn_tst_split, splits to change experiment stop = 300 #stopping value for number of queries init_labels = 0.005 #initially labelled portion of the dataset trn_tst_split = 0.2 #train test split to use for each fold splits = 5 #number of k folds al_unc = AlExperiment(X, y, model=LogisticRegression(penalty='l1', solver='liblinear'), performance_metric='accuracy_score', stopping_criteria='num_of_queries', stopping_value=stop) al_unc.split_AL(test_ratio=trn_tst_split, initial_label_rate=init_labels, split_count=splits, all_class=True) al_unc.set_query_strategy(strategy='QueryInstanceUncertainty') al_unc.set_performance_metric(performance_metric='accuracy_score') al_unc.start_query(multi_thread=False) # print(al.get_experiment_result()) # al.plot_learning_curve() analyser = ExperimentAnalyser(x_axis='num_of_queries') analyser.add_method('uncertainty', al_unc.get_experiment_result()) al_qbc = AlExperiment(X, y, model=LogisticRegression(penalty='l1', solver='liblinear'), performance_metric='accuracy_score', stopping_criteria='num_of_queries', stopping_value=stop)
from sklearn.datasets import load_iris from alipy.experiment import AlExperiment # Get the data X, y = load_iris(return_X_y=True) # init the AlExperiment al = AlExperiment(X, y) # split the data by using split_AL() al.split_AL(test_ratio=0.3, initial_label_rate=0.05, split_count=10) # or set the data split indexes by input the specific parameters from alipy.data_manipulate import split train, test, lab, unlab = split(X=X, y=y, test_ratio=0.3, initial_label_rate=0.05, split_count=10) al.set_data_split(train_idx=train, test_idx=test, label_idx=lab, unlabel_idx=unlab) # set the query strategy # using the a pre-defined strategy al.set_query_strategy(strategy="QueryInstanceUncertainty") # or using your own query strategy # class my_qs_class:
X, y = load_iris(return_X_y=True) for strategy in [ 'QueryInstanceQBC', 'QueryInstanceUncertainty', 'QueryInstanceRandom', 'QureyExpectedErrorReduction', 'QueryInstanceGraphDensity', 'QueryInstanceQUIRE', 'QueryInstanceBMDR', 'QueryInstanceSPAL', 'QueryInstanceLAL', 'QueryExpectedErrorReduction' ]: # init the AlExperiment al = AlExperiment(X, y, stopping_criteria='num_of_queries', stopping_value=50) # split the data by using split_AL() al.split_AL(split_count=5) # al.set_query_strategy(strategy=strategy) # al.set_performance_metric('accuracy_score') # al.start_query(multi_thread=True) # or set the data split indexes by input the specific parameters from alipy.data_manipulate import split train, test, lab, unlab = split(X=X, y=y, test_ratio=0.3, initial_label_rate=0.05, split_count=1)
from sklearn.datasets import load_iris from alipy.experiment import AlExperiment # Get the data X, y = load_iris(return_X_y=True) # init the AlExperiment al = AlExperiment(X, y) # split the data by using split_AL() al.split_AL() # or set the data split indexes by input the specific parameters from alipy.data_manipulate import split train, test, lab, unlab = split(X=X, y=y, test_ratio=0.3, initial_label_rate=0.05, split_count=10) al.set_data_split(train_idx=train, test_idx=test, label_idx=lab, unlabel_idx=unlab) # set the query strategy # using the a pre-defined strategy al.set_query_strategy(strategy="QueryInstanceUncertainty") # or using your own query strategy # class my_qs_class: # def __init__(self, X=None, y=None, **kwargs): # pass # def select(self, label_index, unlabel_index, batch_size=1, **kwargs): # """Select instances to query.""" # pass