def AC_(self, X, y): # X, y = shuffle(X, Y) # y = y.astype('int') alibox = ToolBox(X=X, y=y, query_type='AllLabels') alibox.split_AL(test_ratio=0.3, initial_label_rate=0.1, split_count=10) model = alibox.get_default_model() # stopping_criterion = alibox.get_stopping_criterion('num_of_queries',50) model.fit(X, y) pred = model.predict(X) # 整理矩阵系数为信任度,返回start w = model.class_weight dim = w.shape[0] trustValue = [] for i in range(0, dim): value = math.exp(w[i]) # exp() 方法返回x的指数,ex。 trustValue.append(value) return trustValue
import copy from sklearn.datasets import load_iris from alipy import ToolBox X, y = load_iris(return_X_y=True) alibox = ToolBox(X=X, y=y, query_type='AllLabels', saving_path='.') # Split data alibox.split_AL(test_ratio=0.3, initial_label_rate=0.1, split_count=10) # Use the default Logistic Regression classifier model = alibox.get_default_model() # The cost budget is 50 times querying stopping_criterion = alibox.get_stopping_criterion('num_of_queries', 50) # Use pre-defined strategy QBCStrategy = alibox.get_query_strategy(strategy_name='QueryInstanceQBC') QBC_result = [] for round in range(10): # Get the data split of one fold experiment train_idx, test_idx, label_ind, unlab_ind = alibox.get_split(round) # Get intermediate results saver for one fold experiment saver = alibox.get_stateio(round) while not stopping_criterion.is_stop(): # Select a subset of Uind according to the query strategy # Passing model=None to use the default model for evaluating the committees' disagreement select_ind = QBCStrategy.select(label_ind, unlab_ind,
def create_and_implement_strategy(strategy_name, data, labels, queries): # Keep only the values of data and labels dataframe (Later, we use the global split based on idxs) X = data.values y = np.asarray(labels) toolbox = ToolBox(X=X, y=y, query_type='AllLabels', saving_path='.') # Create Logistic Regression model ( Default Setting with liblinear solver) model = toolbox.get_default_model() # Implement query strategy uncertainty_strategy = toolbox.get_query_strategy(strategy_name=strategy_name) # Create array to save the results examples = [] # Set stopping criterion, we will stop in 1000 labeled examples stopping_criterion = toolbox.get_stopping_criterion('num_of_queries', queries) # Get the indexes of the global split with open("dataset_al", "rb") as f: train_idx, test_idx, labeled_idx, unlabeled_idx = pickle.load(f) # Create saver to save the results saver = StateIO(round=0, train_idx=train_idx, test_idx=test_idx, init_L=labeled_idx, init_U=unlabeled_idx, saving_path='.') # print(train_idx.shape, test_idx.shape) # Starting with some labeled examples model.fit(X=X[labeled_idx.index, :], y=y[labeled_idx.index]) y_pred = model.predict(X[test_idx, :]) # Calculate the accuracy of the prediction accuracy = toolbox.calc_performance_metric(y_true=y[test_idx], y_pred=y_pred, performance_metric='accuracy_score') # Save accuracy of the prediction saver.set_initial_point(accuracy) while not stopping_criterion.is_stop(): # Select example of the unlabeled dataset example = uncertainty_strategy.select(labeled_idx, unlabeled_idx, model=model, batch_size=1) # Update the label idxs labeled_idx.update(example) unlabeled_idx.difference_update(example) # Train model for the added example model.fit(X=X[labeled_idx.index, :], y=y[labeled_idx.index]) y_pred = model.predict(X[test_idx, :]) # Calculate accuracy accuracy = toolbox.calc_performance_metric(y_true=y[test_idx], y_pred=y_pred, performance_metric='accuracy_score') # f1 = alibox.calc_performance_metric(y_true=y[test_idx], y_pred=y_pred, performance_metric='f1_score') # Save update results state = toolbox.State(select_index=example, performance=accuracy) saver.add_state(state) saver.save() # Update progress for stopping criterion stopping_criterion.update_information(saver) stopping_criterion.reset() examples.append(copy.deepcopy(saver)) # Uncomment and return in order to save the new active learning dataset # Save selected x_train examples X_train = X[labeled_idx, :] # Save labels for the examples y_train = y[labeled_idx, :] # Reshape target y_train = np.array(y_train).reshape(-1) # Save to pickle # with open('qbc_dataset','wb') as f: # pickle.dump((X_train, y_train), f) return examples