world = dataset.DiscreteDistribution([(1, dist) for cat,dist in rooms.items()]) test_samples = dataset.LabelledSample(world, TEST_SAMPLES) ############################################################################# def map_classifier(sample): best = max([(dataset.SampleProbability(dist, sample), label) for label, dist in rooms.items()]) return best[1] confusion = collections.defaultdict(int) for label, sample in map(dataset.ExtractLabel, test_samples): guess = map_classifier(sample) confusion[label, guess] += 1 '''Plot confusion matrix.''' labels = set() for ((label,guess), count) in confusion.items(): labels.add(label) labels.add(guess) labels = list(sorted(labels)) def correct(a, b): return 1 if a == b else -1 mat = [[confusion[labels[i],labels[j]]*correct(i, j) for j in range(len(labels))] for i in range(len(labels))] graph.hinton(np.array(mat), title="Confusion matrix", vlabels=labels, hlabels=labels) graph.savefig(OUTPUT)
roc_curve = [] for threshold, score in sorted(ct.items(), key=lambda x: x[0], reverse=True): roc_curve.append(tuple(score)) print roc_curve[0:10] return roc_curve for output, nfeatures in [('synthetic-all.pdf', [5, 10, 15, 20, 35, 50]), ('synthetic-3features.pdf', [3]), ('synthetic-5features.pdf', [5]), ('synthetic-10features.pdf', [10]), ('synthetic-50features.pdf', [50])]: print "Generating ", output test_samples = list( generate_samples(classes=ALL_CLASSES, samples=5000, labelled=True, nfeatures=nfeatures)) R1 = roc_performance(exact_novelty_detector, test_samples, KNOWN_CLASSES) R2 = roc_performance(uniform_novelty_detector, test_samples, KNOWN_CLASSES) R3 = roc_performance(independent_novelty_detector, test_samples, KNOWN_CLASSES) graph.newfig() graph.roc(data=R1, style='g^-', label='exact') graph.roc(data=R2, style='b*-', label='uniform') graph.roc(data=R3, style='ro-', label='independent') graph.savefig(output)
roc_curve = [] #for threshold, label in sorted(map(ThresholdAndLabel, samples), key = lambda x: x[0], reverse=True): for threshold, label in sorted(map(ThresholdAndLabel, samples), key = lambda x: x, reverse=True): roc_curve.append( label in known_labels ) graph.roc(roc_curve, label = title) pass plot_roc(threshold = density_threshold, samples = test_data, title= 'ROC for P(x|c) threshold') plot_roc(threshold = semi_threshold, samples = test_data, title= 'ROC for P(x|c)/P(x) threshold') graph.savefig('example1.pdf') graph.show() """Input space is small, so we can analyze it.""" def analyse(threshold, samples): for a in sorted(zip(map(threshold, samples), samples), reverse=True): print(a) inputs = list(set(unlabelled_data)) print('P(x|c) Threshold') analyse(density_threshold, inputs) print('P(x|c)/P(x) Threshold') analyse(semi_threshold, inputs)
ct[threshold][label in known_labels] += 1 roc_curve = [] for threshold, score in sorted(ct.items(), key = lambda x: x[0], reverse=True): roc_curve.append(tuple(score)) print roc_curve[0:10] return roc_curve for output, nfeatures in [('synthetic-all.pdf', [5, 10, 15, 20, 35, 50]), ('synthetic-3features.pdf', [3]), ('synthetic-5features.pdf', [5]), ('synthetic-10features.pdf', [10]), ('synthetic-50features.pdf', [50])]: print "Generating ", output test_samples = list(generate_samples(classes = ALL_CLASSES, samples = 5000, labelled = True, nfeatures = nfeatures)) R1 = roc_performance(exact_novelty_detector, test_samples, KNOWN_CLASSES) R2 = roc_performance(uniform_novelty_detector, test_samples, KNOWN_CLASSES) R3 = roc_performance(independent_novelty_detector, test_samples, KNOWN_CLASSES) graph.newfig() graph.roc(data = R1, style = 'g^-', label = 'exact') graph.roc(data = R2, style = 'b*-', label = 'uniform') graph.roc(data = R3, style = 'ro-', label = 'independent') graph.savefig(output)
key=lambda x: x, reverse=True): roc_curve.append(label in known_labels) graph.roc(roc_curve, label=title) pass plot_roc(threshold=density_threshold, samples=test_data, title='ROC for P(x|c) threshold') plot_roc(threshold=semi_threshold, samples=test_data, title='ROC for P(x|c)/P(x) threshold') graph.savefig('example1.pdf') graph.show() """Input space is small, so we can analyze it.""" def analyse(threshold, samples): for a in sorted(zip(map(threshold, samples), samples), reverse=True): print(a) inputs = list(set(unlabelled_data)) print('P(x|c) Threshold') analyse(density_threshold, inputs) print('P(x|c)/P(x) Threshold') analyse(semi_threshold, inputs)