def _fit_top_2_special(self, predictions): ### Fitting across top-2 points (done differently than top-k, k=2). output_colourings = {} for output, probability in predictions.items(): self.mapped_output(output) colour = self.output_colour(output) output_colourings[output] = (colour, probability) interpolation_points = {} for output, colour_probability in sorted(output_colourings.items(), key=lambda item: item[1][1], reverse=True)[:2]: colour, probability = colour_probability interpolation_points[colour] = (output, probability) if len(interpolation_points) == 1: return "rgb(%d, %d, %d)" % next(iter(interpolation_points.keys())) else: point_a, point_b = [item for item in interpolation_points.items()] distance = geometry.distance(point_a[0], point_b[0]) # Not a typo: we want to invert their probabilities so that the most likely prediction gets the smallest distance, and visa versa. # v v v v pdist = mlbase.regmax({ point_a[1][0]: point_b[1][1], point_b[1][0]: point_a[1][1] }) fit = geometry.fit_proportion( (point_a[0], point_b[0]), (pdist[point_a[1][0]], pdist[point_b[1][0]])) return "rgb(%d, %d, %d)" % tuple([round(i) for i in fit])
def test_fit_point2_vline(self): reference_points = [[0, -2], [0, 4]] target_distances = [2, 4] expected = [0, 0] point, t = fit_point(reference_points, target_distances) self.assertTrue( math.isclose(distance(point, expected), 0, abs_tol=0.0001), point) self.assertLess(t, SMALL_MAX_T)
def test_fit_point1_d0(self): reference_point = [5, 3] target_distance = 0 point, t = fit_point([reference_point], [target_distance]) self.assertTrue( math.isclose(distance(point, reference_point), 0, abs_tol=.0001), point) self.assertLess(t, SMALL_MAX_T)
def test_fit_point3(self): reference_points = [[math.sqrt(8), math.sqrt(8)], [0, -4], [-4, 0]] target_distances = [4, 4, 4] expected = [0, 0] point, t = fit_point(reference_points, target_distances, visualize=True) self.assertTrue( math.isclose(distance(point, expected), 0, abs_tol=0.0001), point) self.assertLess(t, SMALL_MAX_T)
def test_fit_point2_hline(self): reference_points = [[-1, 0], [5, 0]] target_distances = [2, 4] expected = [1, 0] point, t = fit_point(reference_points, target_distances, visualize=True) self.assertTrue( math.isclose(distance(point, expected), 0, abs_tol=0.0001), point) self.assertLess(t, SMALL_MAX_T)
def test_fit_proportion(self): point_a = [0, 0] point_b = [1, 1] self.assertEqual(fit_proportion([point_a, point_b], [.5, .5]), [.5, .5]) point = fit_proportion([point_a, point_b], [.2, .8]) self.assertTrue( math.isclose(distance(point, [.2, .2]), 0, abs_tol=.0001), point) point = fit_proportion([point_a, point_b], [.9, .1]) self.assertTrue( math.isclose(distance(point, [.9, .9]), 0, abs_tol=.0001), point) # Test 3d as well point = fit_proportion([(0, 0, 0), (1, -1, 2)], [.9, .1]) self.assertTrue( math.isclose(distance(point, [.9, -.9, 1.8]), 0, abs_tol=.0001), point) point = fit_proportion([(1, 2, 3), (3, 0, -3)], [.5, .5]) self.assertTrue( math.isclose(distance(point, [2, 1, 0]), 0, abs_tol=.0001), point)
def find_closest(query_part, query_layer, query): assert len(query) > 0, "empty query - would simply return everything!" global activation_data result = [] minimum_distance = None maximum_distance = None for candidate in activation_data: if query_part == candidate.part and query_layer == candidate.layer: sub_point = [candidate.point[axis] for axis, _ in query] target_point = [target for axis, target in query] distance = geometry.distance(sub_point, target_point) result += [(distance, candidate)] if minimum_distance is None or distance < minimum_distance: minimum_distance = distance if maximum_distance is None or distance > maximum_distance: maximum_distance = distance if len(result) == 0: return result q50 = maximum_distance * .5 q25 = maximum_distance * .25 q10 = maximum_distance * .1 print("distance stats: [%.4f, %.4f] q10: %.4f q25: %.4f q50: %.4f" % (minimum_distance, maximum_distance, q10, q25, q50)) sorted_result = sorted(result) cut10 = int(len(result) * 0.1) cut25 = int(len(result) * 0.25) cut50 = int(len(result) * 0.5) histogram_q10, q10 = build_histogram(sorted_result[:cut10]) histogram_q25, q25 = build_histogram(sorted_result[:cut25]) histogram_q50, q50 = build_histogram(sorted_result[:cut50]) print("q10: %.4f q25: %.4f q50: %.4f" % (q10, q25, q50)) print("histograms\n q10: %s\n q25: %s\n q50: %s" % (adjutant.dict_as_str(histogram_q10), adjutant.dict_as_str(histogram_q25), adjutant.dict_as_str(histogram_q50))) return sorted_result, cut10, cut25, cut50
def _fit_top_k(self, predictions): ## Fitting across top-k points. colour_probabilities = {} for output, probability in predictions.items(): self.mapped_output(output) colour = self.output_colour(output) if colour not in colour_probabilities: colour_probabilities[colour] = 0 colour_probabilities[colour] += probability colour, probability = max(colour_probabilities.items(), key=lambda item: item[1]) total = sum(colour_probabilities.values()) fit = geometry.fit_proportion((colour, [255, 255, 255]), (total - probability, probability)) return "rgb(%d, %d, %d)" % tuple([round(i) for i in fit]) maximum_distance = None for pair in itertools.combinations( [colour for colour in colour_probabilities.keys()], 2): distance = geometry.distance(pair[0], pair[1]) if maximum_distance is None or distance > maximum_distance: maximum_distance = distance inverted = [ item for item in mlbase.regmax( {c: 1.0 - p for c, p in colour_probabilities.items()}).items() ] fit, _ = geometry.fit_point( [item[0] for item in inverted], [item[1] * maximum_distance for item in inverted]) return "rgb(%d, %d, %d)" % tuple([round(i) for i in fit])
def test_fit_point(self): reference_points_1 = [[0, -2, 3], [1, 0, -6], [2, 4, 9], [3, 6, -12]] #reference_points_1 = [[0, -2, 3], [1, 0, -6], [0, 4, 9], [3, 6, -12]] reference_points_2 = [[3, -2, 3], [2, 0, -6], [1, 4, 9], [0, 6, -12]] target_distances_1 = [5, 4, 3, 2] target_distances_2 = [1, 2, 3, 4] #target_distances_2 = [1, 1, 4, 3] point_11, t_11 = fit_point(reference_points_1, target_distances_1, visualize=True) self.assertTrue(all([not math.isnan(p) for p in point_11]), point_11) self.assertLess(t_11, BIG_MAX_T) point_12, t_12 = fit_point(reference_points_1, target_distances_2, visualize=True) self.assertTrue(all([not math.isnan(p) for p in point_12]), point_12) self.assertLess(t_12, BIG_MAX_T) point_21, t_21 = fit_point(reference_points_2, target_distances_1, visualize=True) self.assertTrue(all([not math.isnan(p) for p in point_21]), point_21) self.assertLess(t_21, BIG_MAX_T) point_22, t_22 = fit_point(reference_points_2, target_distances_2, visualize=True) self.assertTrue(all([not math.isnan(p) for p in point_22]), point_22) self.assertLess(t_22, BIG_MAX_T) self.assertGreater(distance(point_11, point_12), 1) self.assertGreater(distance(point_11, point_21), 1) self.assertGreater(distance(point_11, point_22), 1) self.assertGreater(distance(point_12, point_21), 1) self.assertGreater(distance(point_12, point_22), .75) self.assertGreater(distance(point_21, point_22), 1)
def test_distance(self): self.assertEqual(distance([0, 1, 2], [0, 1, 2]), 0.0) self.assertEqual(distance([0, 1, 2], [-1, 1, 4]), math.sqrt(1**2 + 2**2))
} #for word, probability in predictions[key].items(): # colour = self.colour_embeddings[word] # # if colour not in interpolation_points: # interpolation_points[colour] = (word, probability) # else: # if interpolation_points[colour][1] < probability: # interpolation_points[colour] = (word, probability) # if len(interpolation_points) == 1: colours[key] = "rgb(%d, %d, %d)" % next(iter(interpolation_points.keys())) else: maximum_distance = None for pair in itertools.combinations([colour for colour in interpolation_points.keys()], 2): distance = geometry.distance(pair[0], pair[1]) if maximum_distance is None or distance > maximum_distance: maximum_distance = distance lowest_probability = min([p for w, p in interpolation_points.values()]) highest_probability = max([p for w, p in interpolation_points.values()]) maximum_domain = highest_probability + lowest_probability prediction_distances = [(w, maximum_distance + (-p * maximum_distance / maximum_domain)) for w, p in interpolation_points.values()] fit, _ = geometry.fit_point([colour_embeddings[item[0]] for item in prediction_distances], [item[1] for item in prediction_distances], epsilon=0.1, visualize=False) print("rgb(%d, %d, %d)" % tuple([round(i) for i in fit]))
def measure(lstm, data_dir, kind, keys): sequence_changes = {} global_minimum = {key: (None, None, None) for key in keys} global_maximum = {key: (None, None, None) for key in keys} for j, xy in enumerate(data.stream_data(data_dir, kind)): if j % 100 == 0: logging.debug("At the %d instance." % (j)) sequence = tuple([item[0] for item in xy.x]) + (xy.y[-1][0], ) stepwise_rnn = lstm.stepwise(handle_unknown=True) change_distances = {key: [] for key in keys} previous_states = {} minimum = {key: (None, None) for key in keys} maximum = {key: (None, None) for key in keys} for i, word_pos in enumerate(xy.x): result, instruments = stepwise_rnn.step(word_pos[0], rnn.LSTM_INSTRUMENTS) for part, layer in lstm.part_layers(): key = lstm.encode_key(part, layer) if key in keys: current_state = instruments[part][layer] if key in previous_states: distance = geometry.distance(previous_states[key], current_state) else: distance = geometry.hypotenuse(current_state) change_distances[key] += [distance] previous_states[key] = current_state if minimum[key] == (None, None) or distance < minimum[key][0]: minimum[key] = (distance, i) if maximum[key] == (None, None) or distance > maximum[key][0]: maximum[key] = (distance, i) for key in keys: if global_minimum[key] == ( None, None, None) or minimum[key][0] < global_minimum[key][0]: global_minimum[key] = minimum[key] + (sequence, ) # Only keeping track of the more notable sequence changes sequence_changes[sequence] = change_distances sequence_str, changes_str = stringify( sequence, sequence_changes[sequence][key]) logging.debug( "Noting minimum for %s of %.4f @%d:\n %s\n %s" % (key, minimum[key][0], minimum[key][1], sequence_str, changes_str)) if global_maximum[key] == ( None, None, None) or maximum[key][0] > global_maximum[key][0]: global_maximum[key] = maximum[key] + (sequence, ) # Only keeping track of the more notable sequence changes sequence_changes[sequence] = change_distances sequence_str, changes_str = stringify( sequence, sequence_changes[sequence][key]) logging.debug( "Noting maximum for %s of %.4f @%d:\n %s\n %s" % (key, maximum[key][0], maximum[key][1], sequence_str, changes_str)) return global_minimum, global_maximum, sequence_changes