def draw_learning_comparison(splits, r_score, u_score, d_score, samples_per_split, repeats, scoring): """ Plot the different learning methods on same graph """ # create ticks for x axis ticks = np.linspace(samples_per_split, splits * samples_per_split, splits) # set up the figure plt.figure() plt.grid() plt.xlabel('Training Instances') plt.ylabel(scoring) plt.title('%s Comparison using %s batches and %s repeats' % (scoring, splits, repeats)) plt.plot(ticks, r_score, label='Random Sampling') plt.plot(ticks, u_score, label='Uncertainty Sampling') plt.plot(ticks, d_score, label='Density Sampling') plt.legend(loc='best') plt.savefig('plots/learning_comparison_' + scoring + '_' + time_stamped('.png'), format='png') plt.clf()
def plot_roc_curve(): """ Plot roc curve, not cross validated for now """ clf = build_pipeline() extractor = FeatureExtractor(word_gap=True, word_features=True, count_dict=True, phrase_count=True) features, labels = load_features_data(extractor) # transform from dict into array for training vec = DictVectorizer() data = vec.fit_transform(features).toarray() # split data into train and test, may want to use cross validation later train_data, test_data, train_labels, test_labels = cross_validation.train_test_split(data, labels, train_size=0.9, random_state=1) clf.fit(train_data, train_labels) confidence = clf.decision_function(test_data) fpr, tpr, thresholds = metrics.roc_curve(test_labels, confidence) auroc = metrics.auc(fpr, tpr) print len(fpr), len(tpr) # set up the figure plt.figure() #plt.grid() plt.xlabel('FP rate') plt.ylabel('TP rate') plt.title('Receiver operating characteristic') plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % auroc) plt.plot([0, 1], [0, 1], 'k--') plt.legend(loc='best') filepath = 'results/' + time_stamped('roc.png') plt.savefig(filepath, format='png')
def draw_plots(scores, av_accuracy, samples_per_split): """ Create plots for precision, recall and f-score """ #scores = pickle.load(open('av_scores.p', 'rb')) false_p = [s[0][0] for s in scores] true_p = [s[0][1] for s in scores] false_r = [s[1][0] for s in scores] true_r = [s[1][1] for s in scores] false_f = [s[2][0] for s in scores] true_f = [s[2][1] for s in scores] # create ticks for x axis ticks = np.linspace(samples_per_split, 9 * samples_per_split, 9) plot(ticks, true_p, false_p, 'Precision', 'plots/' + time_stamped('balanced_precision.png')) plot(ticks, true_r, false_r, 'Recall', 'plots/' + time_stamped('balanced_recall.png')) plot(ticks, true_f, false_f, 'F-score', 'plots/' + time_stamped('balanced_fscore.png')) plot(ticks, av_accuracy, None, 'Accuracy', 'plots/' + time_stamped('balanced_accuracy.png'))
def plot_roc_curve(clf, train_data, train_labels, test_data, test_labels): """ Plot roc curve, not cross validated for now """ clf.fit(train_data, train_labels) confidence = clf.decision_function(test_data) fpr, tpr, thresholds = metrics.roc_curve(test_labels, confidence) auroc = metrics.auc(fpr, tpr) # set up the figure plt.figure() #plt.grid() plt.xlabel('FP rate') plt.ylabel('TP rate') plt.title('Receiver operating characteristic') plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % auroc) plt.plot([0, 1], [0, 1], 'k--') plt.legend(loc='best') filepath = 'results/roc' + time_stamped('.png') plt.savefig(filepath, format='png')
def learning_method_comparison(splits, repeats, seed, bag_of_words=0, orig_only=False, word_features=0): """ Plot learning curves to compare accuracy of different learning methods """ clf, extractor, sim = build_pipeline(bag_of_words, orig_only) # orig will always be use for training, new will be used for testing and added incrementally orig_records, new_records = load_records(orig_only) # samples per split = number of records remaining after removing test set divided by number of splits samples_per_split = (4 * len(new_records)) / (5 * splits) print 'samples per split', samples_per_split # can use below if similarities are to be generated each run (so extractor is def correct) # faster to pickle them in advance though for the sake of nereating results #all_records = orig_records + new_records #sim = get_similarities(all_records, extractor) #sim = sim[len(orig_records):] r_scores = np.zeros(shape=(repeats, splits, 3)) u_scores = np.zeros(shape=(repeats, splits, 3)) d_scores = np.zeros(shape=(repeats, splits, 3)) r_accuracy = np.zeros(shape=(repeats, splits)) u_accuracy = np.zeros(shape=(repeats, splits)) d_accuracy = np.zeros(shape=(repeats, splits)) # loop number of times to generate average scores for i in xrange(repeats): print i # going to split the data here, then pass identical indices to the different learning methods all_indices = np.arange(len(new_records)) # seed the shuffle here so can repeat experiment for different numbers of splits np.random.seed(seed * i) np.random.shuffle(all_indices) # take off 20% for testing test_indices = all_indices[:len(new_records) / 5] train_indices = all_indices[len(new_records) / 5:] # now use same test and train indices to generate scores for each learning method u_scores[i], u_accuracy[i] = uncertainty_sampling( clf, extractor, orig_records, new_records, train_indices, test_indices, splits, word_features) d_scores[i], d_accuracy[i] = uncertainty_sampling( clf, extractor, orig_records, new_records, train_indices, test_indices, splits, word_features, sim) r_scores[i], r_accuracy[i] = random_sampling(clf, extractor, orig_records, new_records, train_indices, test_indices, splits, word_features) # create array of scores to pass to plotter scores = [['Accuracy'], ['Precision'], ['Recall'], ['F-Score']] # accuracy scores scores[0].append(r_accuracy.mean(axis=0, dtype=np.float64)) scores[0].append(u_accuracy.mean(axis=0, dtype=np.float64)) scores[0].append(d_accuracy.mean(axis=0, dtype=np.float64)) # average over the repeats r_scores = r_scores.mean(axis=0, dtype=np.float64) u_scores = u_scores.mean(axis=0, dtype=np.float64) d_scores = d_scores.mean(axis=0, dtype=np.float64) # using numpy slicing to select correct scores for i in xrange(3): scores[i + 1].append(r_scores[:, i]) scores[i + 1].append(u_scores[:, i]) scores[i + 1].append(d_scores[:, i]) f_name = 'pickles/newCurves_seed%s_splits%s_' % (seed, splits) f_name = f_name + time_stamped('.p') pickle.dump(scores, open(f_name, 'wb')) for i in xrange(4): draw_learning_comparison(splits, scores[i][1], scores[i][2], scores[i][3], samples_per_split, repeats, scores[i][0], seed)