def main(): classifier_random = [ LogisticRegression(solver='lbfgs', multi_class='multinomial', max_iter=1000) for i in range(10) ] classifier_active = [ LogisticRegression(solver='lbfgs', multi_class='multinomial', max_iter=1000) for i in range(10) ] k = 0 all_results_record = {'entropy': []} passive_results = [] mnist = fetch_mldata('MNIST original') x_train, x_test, y_train, y_test = train_test_split( mnist.data, mnist.target) labeled_data, X_unlabeled, available_label, oracle_label_results = train_test_split( x_train, y_train, test_size=0.99) for model in classifier_random: model.classes_ = np.arange(10) model.fit(labeled_data, available_label) for model in classifier_active: model.classes_ = np.arange(10) model.fit(labeled_data, available_label) labeled_data_rand = deepcopy(labeled_data) available_label_rand = deepcopy(available_label) labeled_data_active = deepcopy(labeled_data) available_label_active = deepcopy(available_label) batch_size = 32 examples_list = [32] * 30 seen_examples_count = 32 for num_queries in examples_list: seen_examples_count = seen_examples_count + num_queries num_samples.append(num_queries) random_queries = np.random.choice(unlabeled_data.shape[0], num_queries, replace=False) labeled_data_rand = np.concatenate( (labeled_data_rand, unlabeled_data[random_queries, :])) available_label_rand = np.concatenate( (available_label_rand, oracle_label[random_queries])) predictions = [] for model in classifier_random: model.fit(labeled_data_rand, available_label_rand) predictions.append(model.predict(X_test)) prediction_stack = np.stack(predictions) commitee_decision = np.apply_along_axis(\ lambda x: Counter(x).most_common()[0][0],\ 0, prediction_stack) matches = np.sum(commitee_decision == y_test) average_accuracy = matches / np.shape(X_test)[0] passive_results.append(average_accuracy) al_obj = ActiveLearner(strategy='entropy') for model in classifier_active: model.classes_ = np.arange(10) indexes = al_obj.rank(classifier_active, unlabeled_data, num_queries) labeled_data_active = np.concatenate( (labeled_data_active, unlabeled_data[indexes, :])) available_label_active = np.concatenate( (available_label_active, oracle_label[indexes])) predictions = [] for model in classifier_active: model.fit(labeled_data_active, available_label_active) curr_pred = model.predict(X_test) predictions.append(curr_pred) prediction_stack = np.stack(predictions) commitee_decision = np.apply_along_axis(\ lambda x: Counter(x).most_common()[0][0],\ 0, prediction_stack) matches = np.sum(commitee_decision == y_test) average_accuracy = matches / np.shape(X_test)[0] all_results_record['entropy'].append(average_accuracy) k = k + 1 np.savetxt('./misc/random_model_accuracy.txt', passive_results) np.savetxt('./misc/active_model_accuracy.txt', all_results_record['entropy'])
X_labeled, X_unlabeled, y_labeled, y_oracle = train_test_split( X_train, y_train, test_size=0.8) for num_queries in (0, 50, 100, 150, 200, 250, 300, 350, 400, 450, 500): num_samples.append(num_queries) random_queries = np.random.choice(X_unlabeled.shape[0], num_queries, replace=False) X_augmented = vstack((X_labeled, X_unlabeled[random_queries, :])) y_augmented = np.concatenate((y_labeled, y_oracle[random_queries])) clf.fit(X_augmented, y_augmented) random_sampling_results\ .append(np.sum(clf.predict(X_test) == y_test) / np.shape(X_test)[0]) for strategy in uncertainty_sampling_results: clf.fit(X_labeled, y_labeled) idx = ActiveLearner(strategy=strategy).rank(clf, X_unlabeled, num_queries) X_augmented = vstack((X_labeled, X_unlabeled[idx, :])) y_augmented = np.concatenate((y_labeled, y_oracle[idx])) clf.fit(X_augmented, y_augmented) uncertainty_sampling_results[strategy]\ .append(np.sum(clf.predict(X_test) == y_test) / np.shape(X_test)[0]) all_random_sampling_results.append(random_sampling_results) for strategy in uncertainty_sampling_results: all_uncertainty_sampling_results[strategy]\ .append(uncertainty_sampling_results[strategy]) sns.set_style("darkgrid") plt.plot(num_samples, np.mean(all_random_sampling_results, axis=0), 'red', num_samples, np.mean(all_uncertainty_sampling_results['least_confident'], axis=0), 'blue',
def main(): classifier_random = [ LogisticRegression(solver='lbfgs', multi_class='multinomial', max_iter=1000) for i in range(10) ] classifier_active = [ LogisticRegression(solver='lbfgs', multi_class='multinomial', max_iter=1000) for i in range(10) ] k = 0 decision = {'vote_entropy': []} passive_results = [] (X_train_set, y_train_set), (X_test_set, y_test_set) = fashion_mnist.load_data() x_train, x_test, y_train, y_test = train_test_split( X_train_set, y_train_set) x_train = x_train.reshape(x_train.shape[0], x_train.shape[1] * x_train.shape[2]) x_test = x_test.reshape(x_test.shape[0], x_test.shape[1] * x_test.shape[2]) labeled_data, unlabeled_data, available_label, oracle_label = train_test_split( x_train, y_train, test_size=0.99) for model in classifier_random: model.classes_ = np.arange(10) model.fit(labeled_data, available_label) for model in classifier_active: model.classes_ = np.arange(10) model.fit(labeled_data, available_label) labeled_data_rand = deepcopy(labeled_data) available_label_rand = deepcopy(available_label) labeled_data_active = deepcopy(labeled_data) available_label_active = deepcopy(available_label) batch_size = 32 new_samples_listing = [32] * 10 for new_samples in new_samples_listing: random_queries = np.random.choice(unlabeled_data.shape[0], new_samples, replace=False) labeled_data_rand = np.concatenate( (labeled_data_rand, unlabeled_data[random_queries, :])) available_label_rand = np.concatenate( (available_label_rand, oracle_label[random_queries])) predictions = [] for model in classifier_random: model.fit(labeled_data_rand, available_label_rand) predictions.append(model.predict(X_test)) prediction_stack = np.stack(predictions) commitee_decision = np.apply_along_axis(\ lambda x: Counter(x).most_common()[0][0], 0,\ prediction_stack) matches = np.sum(commitee_decision == y_test) average_accuracy = matches / np.shape(X_test)[0] passive_results.append(average_accuracy) al_obj = ActiveLearner(strategy='vote_entropy') for model in classifier_active: model.classes_ = np.arange(10) indexes = al_obj.rank(classifier_active, unlabeled_data, new_samples) labeled_data_active = np.concatenate( (labeled_data_active, unlabeled_data[indexes, :])) available_label_active = np.concatenate( (available_label_active, oracle_label[indexes])) predictions = [] for model in classifier_active: model.fit(labeled_data_active, available_label_active) curr_pred = model.predict(X_test) predictions.append(curr_pred) prediction_stack = np.stack(predictions) commitee_decision = np.apply_along_axis( lambda x: Counter(x).most_common()[0][0], 0, prediction_stack) matches = np.sum(commitee_decision == y_test) average_accuracy = matches / np.shape(X_test)[0] decision['vote_entropy'].append(average_accuracy) k = k + 1 np.savetxt('./misc/random_model_accuracy.txt', passive_results) np.savetxt('./misc/active_model_accuracy.txt', decision)
def main(): classifier_random = [ LogisticRegression(solver='lbfgs', multi_class='multinomial', max_iter=1000) for i in range(10) ] classifier_active = [ LogisticRegression(solver='lbfgs', multi_class='multinomial', max_iter=1000) for i in range(10) ] k = 0 results_record = {'entropy': []} passive_results = [] (X_train_set, y_train_set), (X_test_set, y_test_set) = fashion_mnist.load_data() x_train, x_test, y_train, y_test = train_test_split( X_train_set, y_train_set) x_train = x_train.reshape(x_train.shape[0], x_train.shape[1] * x_train.shape[2]) x_test = x_test.reshape(x_test.shape[0], x_test.shape[1] * x_test.shape[2]) X_labeled, X_unlabeled, y_labeled, y_oracle = train_test_split( x_train, y_train, test_size=0.99) for model in classifier_random: model.classes_ = np.arange(10) model.fit(X_labeled, y_labeled) for model in classifier_active: model.classes_ = np.arange(10) model.fit(X_labeled, y_labeled) X_labeled_rand = deepcopy(X_labeled) y_labeled_rand = deepcopy(y_labeled) X_labeled_active = deepcopy(X_labeled) y_labeled_active = deepcopy(y_labeled) batch_size = 32 examples_list = [32] * 30 seen_examples_count = 32 for new_examples_count in examples_list: seen_examples_count = seen_examples_count + new_examples_count num_samples.append(new_examples_count) random_datapoint = np.random.choice(X_unlabeled.shape[0], new_examples_count, replace=False) X_labeled_rand = np.concatenate( (X_labeled_rand, X_unlabeled[random_datapoint, :])) y_labeled_rand = np.concatenate( (y_labeled_rand, y_oracle[random_datapoint])) predictions = [] for model in classifier_random: model.fit(X_labeled_rand, y_labeled_rand) predictions.append(model.predict(X_test)) prediction_stack = np.stack(predictions) commitee_decision = np.apply_along_axis(\ lambda x: Counter(x).most_common()[0][0],\ 0, prediction_stack) matches = np.sum(commitee_decision == y_test) average_accuracy = matches / np.shape(X_test)[0] passive_results.append(average_accuracy) al_obj = ActiveLearner(strategy='entropy') for model in classifier_active: model.classes_ = np.arange(10) indexes = al_obj.rank(classifier_active, X_unlabeled, new_examples_count) X_labeled_active = np.concatenate( (X_labeled_active, X_unlabeled[indexes, :])) y_labeled_active = np.concatenate( (y_labeled_active, y_oracle[indexes])) predictions = [] for model in classifier_active: model.fit(X_labeled_active, y_labeled_active) curr_pred = model.predict(X_test) predictions.append(curr_pred) commitee_decision = np.apply_along_axis(\ lambda x: Counter(x).most_common()[0][0],\ 0, np.stack(predictions)) matches = np.sum(commitee_decision == y_test) average_accuracy = matches / np.shape(X_test)[0] results_record['entropy'].append(average_accuracy) k = k + 1 np.savetxt('./misc/random_model_accuracy.txt', passive_results) np.savetxt('./misc/active_model_accuracy.txt', results_record['entropy'])
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from sklearn.datasets import load_iris from sklearn.linear_model import LogisticRegression from active_learning.active_learning import ActiveLearner from collections import Counter X, y = load_iris(return_X_y=True) n_queries = 100 X_labeled, y_labeled = X[[0, 50, 100]], y[[0, 50, 100]] estimators = [LogisticRegression(solver='lbfgs', multi_class='auto'), LogisticRegression(solver='lbfgs', multi_class='auto')] for estimator in estimators: estimator.fit(X_labeled, y_labeled) learner = ActiveLearner(strategy='vote_entropy') preds = [] results = [] correct = 0 for _ in range(n_queries): query_idx = learner.rank(estimators, X, num_queries=1) X_labeled = np.concatenate((X_labeled, X[query_idx]), axis=0) y_labeled = np.concatenate((y_labeled, y[query_idx]), axis=0) for estimator in estimators: estimator.fit(X_labeled, y_labeled) preds.append(estimator.predict(X)) majority_votes = np.apply_along_axis(lambda x: Counter(x).most_common()[0][0], 0, np.stack(preds)) accuracy = np.sum(majority_votes == y) / np.shape(X)[0] correct += np.sum(majority_votes == y)