def uncertainty_values(self, data, target, X_train, y_train, X_full, y_full, train_idx): print("START: ST") # initializing the active learner learner = ActiveLearner(estimator=RandomForestClassifier(), query_strategy=margin_sampling, X_training=X_train, y_training=y_train) print('%f' % learner.score(X_full, y_full)) index = 0 # learning until the accuracy reaches a given threshold while learner.score(X_full, y_full) < 0.90: stream_idx = np.random.choice(range(len(X_full))) if classifier_uncertainty(learner, X_full[stream_idx].reshape( 1, -1)) >= 0.4: print("[ %1.3f, %1.3f]" % (classifier_uncertainty( learner, X_full[stream_idx].reshape(1, -1))[0], classifier_margin(learner, X_full[stream_idx].reshape( 1, -1))[0])) learner.teach(X_full[stream_idx].reshape(1, -1), y_full[stream_idx].reshape(-1, )) learner_score = learner.score(X_full, y_full) # print('Item no. %d queried, new accuracy: %f' % (stream_idx, learner_score)) # print('%f' % (learner_score)) if index == 50: break index = index + 1 print("START: ST")
def uncertainty_batch_sampling(classifier: Union[BaseLearner, BaseCommittee], X: Union[np.ndarray, sp.csr_matrix], n_instances: int = 20, metric: Union[str, Callable] = 'euclidean', n_jobs: Optional[int] = None, **uncertainty_measure_kwargs ) -> Tuple[np.ndarray, Union[np.ndarray, sp.csr_matrix]]: """ Batch sampling query strategy. Selects the least sure instances for labelling. This strategy differs from :func:`~modAL.uncertainty.uncertainty_sampling` because, although it is supported, traditional active learning query strategies suffer from sub-optimal record selection when passing `n_instances` > 1. This sampling strategy extends the interactive uncertainty query sampling by allowing for batch-mode uncertainty query sampling. Furthermore, it also enforces a ranking -- that is, which records among the batch are most important for labeling? Refer to Cardoso et al.'s "Ranked batch-mode active learning": https://www.sciencedirect.com/science/article/pii/S0020025516313949 Args: classifier: One of modAL's supported active learning models. X: Set of records to be considered for our active learning model. n_instances: Number of records to return for labeling from `X`. metric: This parameter is passed to :func:`~sklearn.metrics.pairwise.pairwise_distances` n_jobs: If not set, :func:`~sklearn.metrics.pairwise.pairwise_distances_argmin_min` is used for calculation of distances between samples. Otherwise it is passed to :func:`~sklearn.metrics.pairwise.pairwise_distances`. **uncertainty_measure_kwargs: Keyword arguments to be passed for the :meth:`predict_proba` of the classifier. Returns: Indices of the instances from `X` chosen to be labelled; records from `X` chosen to be labelled. """ uncertainty = classifier_uncertainty(classifier, X, **uncertainty_measure_kwargs) query_indices = ranked_batch(classifier, unlabeled=X, uncertainty_scores=uncertainty, n_instances=n_instances, metric=metric, n_jobs=n_jobs) return query_indices, X[query_indices]
def al_stream(self, data, target, X_train, y_train, X_full, y_full, train_idx): # initializing the active learner acc = [] learner = ActiveLearner(estimator=RandomForestClassifier(), query_strategy=margin_sampling, X_training=X_train, y_training=y_train) # print('Initial prediction accuracy: %f' % learner.score(X_full, y_full)) index = 0 # learning until the accuracy reaches a given threshold while learner.score(X_full, y_full) < 0.90: stream_idx = np.random.choice(range(len(X_full))) if classifier_uncertainty(learner, X_full[stream_idx].reshape( 1, -1)) >= 0.2: learner.teach(X_full[stream_idx].reshape(1, -1), y_full[stream_idx].reshape(-1, )) learner_score = learner.score(X_full, y_full) # print('Item no. %d queried, new accuracy: %f' % (stream_idx, learner_score)) print('%0.3f' % (learner_score), end=",") if index == self.query_number: break index = index + 1 acc.append(learner_score) return acc
def active_learn(df1, first_item_index_of_each_category): train_idx = first_item_index_of_each_category # X_train = iris['data'][train_idx] # y_train = iris['target'][train_idx] # initial training data data = df1.values[:, 1:] target = df1['label'].values X_full = df1.values[:, 1:] y_full = df1['label'].values X_train = df1.values[:, 1:][ train_idx] #item from second column as the first column is the label.. y_train = df1['label'].values[train_idx] # with plt.style.context('seaborn-white'): # pca = PCA(n_components=2).fit_transform(data) # plt.figure(figsize=(7, 7)) # plt.scatter(x=pca[:, 0], y=pca[:, 1], c=y_train, cmap='viridis', s=50) # plt.title('The iris dataset') # plt.show() # generating the pool X_pool = np.delete(data, train_idx, axis=0) y_pool = np.delete(target, train_idx) # initializing the active learner learner = ActiveLearner(estimator=RandomForestClassifier(), query_strategy=entropy_sampling, X_training=X_train, y_training=y_train) # print('Initial prediction accuracy: %f' % learner.score(X_full, y_full)) print('%f' % learner.score(X_full, y_full)) index = 0 performance_array = [] # learning until the accuracy reaches a given threshold while learner.score(X_full, y_full) < 0.90: stream_idx = np.random.choice(range(len(X_full))) if classifier_uncertainty(learner, X_full[stream_idx].reshape( 1, -1)) >= 0.4: learner.teach(X_full[stream_idx].reshape(1, -1), y_full[stream_idx].reshape(-1, )) learner_score = learner.score(X_full, y_full) # print('Item no. %d queried, new accuracy: %f' % (stream_idx, learner_score)) print('%f' % (learner_score)) if index == 505: break if (index % 100 == 0): performance_array.append(learner_score) index = index + 1 percentage_increase(performance_array) # visualizing initial prediction # with plt.style.context('seaborn-white'): # plt.figure(figsize=(7, 7)) # prediction = learner.predict(data) # plt.scatter(x=pca[:, 0], y=pca[:, 1], c=prediction, cmap='viridis', s=50) # plt.title('Initial accuracy: %f' % learner.score(data, target)) # plt.show() # pool-based sampling # n_queries = 502 # performance_array = [] # for idx in range(n_queries): # query_idx, query_instance = learner.query(X_pool) # learner.teach( # X=X_pool[query_idx].reshape(1, -1), # y=y_pool[query_idx].reshape(1, ) # ) # # remove queried instance from pool # X_pool = np.delete(X_pool, query_idx, axis=0) # y_pool = np.delete(y_pool, query_idx) # learner_score = learner.score(data, target) # print('Accuracy after query no. %d: %f' % (idx + 1, learner_score)) # if (idx % 100 == 0): # performance_array.append(learner_score) # # percentage_increase(performance_array) # plotting final prediction # with plt.style.context('seaborn-white'): # plt.figure(figsize=(7, 7)) # prediction = learner.predict(data) # plt.scatter(x=pca[:, 0], y=pca[:, 1], c=prediction, cmap='viridis', s=50) # plt.title( # 'Classification accuracy after %i queries: %f' % (n_queries, learner.score(data,target))) # plt.show() y = 0
) print('Initial prediction accuracy: %f' % learner.score(X_full, y_full)) # visualizing initial prediciton with plt.style.context('seaborn-white'): plt.figure(figsize=(7, 7)) prediction = learner.predict_proba(X_full)[:, 1] plt.imshow(prediction.reshape(im_width, im_height)) plt.title('Initial prediction accuracy: %f' % learner.score(X_full, y_full)) plt.show() """ The instances are randomly selected one by one, if an instance's uncertainty is above a threshold, the label is requested and shown to the learner. The process is continued until the learner reaches a previously defined accuracy. """ # learning until the accuracy reaches a given threshold while learner.score(X_full, y_full) < 0.90: stream_idx = np.random.choice(range(len(X_full))) if classifier_uncertainty(learner, X_full[stream_idx].reshape(1, -1)) >= 0.4: learner.teach(X_full[stream_idx].reshape(1, -1), y_full[stream_idx].reshape(-1, )) print('Pixel no. %d queried, new accuracy: %f' % (stream_idx, learner.score(X_full, y_full))) # visualizing final prediciton with plt.style.context('seaborn-white'): plt.figure(figsize=(7, 7)) prediction = learner.predict_proba(X_full)[:, 1] plt.imshow(prediction.reshape(im_width, im_height)) plt.title('Final prediction accuracy: %f' % learner.score(X_full, y_full)) plt.show()
# create the data to stream from X_full = np.transpose( [np.tile(np.asarray(range(im.shape[0])), im.shape[1]), np.repeat(np.asarray(range(im.shape[1])), im.shape[0])] ) # map the intensity values against the grid y_full = np.asarray([im[P[0], P[1]] for P in X_full]) # assembling initial training set n_initial = 5 initial_idx = np.random.choice(range(len(X_full)), size=n_initial, replace=False) X_train, y_train = X_full[initial_idx], y_full[initial_idx] # initialize the learner learner = ActiveLearner( predictor=RandomForestClassifier(), X_initial=X_train, y_initial=y_train ) """ The instances are randomly selected one by one, if an instance's uncertainty is above a threshold, the label is requested and shown to the learner. The process is continued until the learner reaches a previously defined accuracy. """ # learning until the accuracy reaches a given threshold while learner.score(X_full, y_full) < 0.7: stream_idx = np.random.choice(range(len(X_full))) if classifier_uncertainty(learner, X_full[stream_idx].reshape(1, -1)) >= 0.4: learner.teach(X_full[stream_idx].reshape(1, -1), y_full[stream_idx].reshape(-1, ))
x = 40 queries = int((x / 100) * 150) accuracy_list = [] accuracy_list.append(committee.score(X, Y)) # In[69]: iter = 0 print("Accuracy after", 0, "iterations :", committee.score(X, Y)) for i in range(0, queries): for x in range(135): if (classifier_uncertainty(committee, X_unlab[iter].reshape(1, -1)) >= 0.8): break iter = (iter + 1) % (X_unlab.shape[0]) q_id = iter - 1 X_new = X_unlab[q_id].reshape(1, -1) Y_new = Y_unlab[q_id].reshape(1, ) X_unlab, Y_unlab = np.asarray(np.delete(X_unlab, q_id, axis=0)), np.delete(Y_unlab, q_id, axis=0) committee.teach(X_new, Y_new)
# creating new utility measures by linear combination and product # linear_combination will return 1.0*classifier_uncertainty + 1.0*classifier_margin linear_combination = make_linear_combination( classifier_uncertainty, classifier_margin, weights=[1.0, 1.0] ) # product will return (classifier_uncertainty**0.5)*(classifier_margin**0.1) product = make_product( classifier_uncertainty, classifier_margin, exponents=[0.5, 0.1] ) # visualizing the different utility metrics with plt.style.context('seaborn-white'): utilities = [ (1, classifier_uncertainty(learner, X), 'Classifier uncertainty'), (2, classifier_margin(learner, X), 'Classifier margin'), (3, linear_combination(learner, X), '1.0*uncertainty + 1.0*margin'), (4, product(learner, X), '(uncertainty**0.5)*(margin**0.5)') ] plt.figure(figsize=(18, 14)) for idx, utility, title in utilities: plt.subplot(2, 2, idx) plt.scatter(x=X[:, 0], y=X[:, 1], c=utility, cmap='viridis', s=50) plt.title(title) plt.colorbar() plt.show()