def run_full_attack(): global query_count, SAVED_QUERIES extracted_normals = [] extracted_biases = [] known_T = KnownT(extracted_normals, extracted_biases) for layer_num in range(0, len(A) - 1): # For each layer of the network ... # First setup the critical points generator critical_points = sweep_for_critical_points(PARAM_SEARCH_AT_LOCATION, known_T) # Extract weights corresponding to those critical points extracted_normal, extracted_bias, mask = layer_recovery.compute_layer_values( critical_points, known_T, layer_num) # Report how well we're doing check_quality(layer_num, extracted_normal, extracted_bias) # Now, make them more precise extracted_normal, extracted_bias = refine_precision.improve_layer_precision( layer_num, known_T, extracted_normal, extracted_bias) print("Query count", query_count) # And print how well we're doing check_quality(layer_num, extracted_normal, extracted_bias) # New generator critical_points = sweep_for_critical_points(1e1) # Solve for signs if layer_num == 0 and sizes[1] <= sizes[0]: extracted_sign = sign_recovery.solve_contractive_sign( known_T, extracted_normal, extracted_bias, layer_num) elif layer_num > 0 and sizes[1] <= sizes[0] and all( sizes[x + 1] <= sizes[x] / 2 for x in range(1, len(sizes) - 1)): try: extracted_sign = sign_recovery.solve_contractive_sign( known_T, extracted_normal, extracted_bias, layer_num) except AcceptableFailure as e: print( "Contractive solving failed; fall back to noncontractive method" ) if layer_num == len(A) - 2: print("Solve final two") break extracted_sign, _ = sign_recovery.solve_layer_sign( known_T, extracted_normal, extracted_bias, critical_points, layer_num, l1_mask=np.int32(np.sign(mask))) else: if layer_num == len(A) - 2: print("Solve final two") break extracted_sign, _ = sign_recovery.solve_layer_sign( known_T, extracted_normal, extracted_bias, critical_points, layer_num, l1_mask=np.int32(np.sign(mask))) print("Extracted", extracted_sign) print('real sign', np.int32(np.sign(mask))) print("Total query count", query_count) # Correct signs extracted_normal *= extracted_sign extracted_bias *= extracted_sign extracted_bias = np.array(extracted_bias, dtype=np.float64) # Report how we're doing extracted_normal, extracted_bias = check_quality(layer_num, extracted_normal, extracted_bias, do_fix=True) extracted_normals.append(extracted_normal) extracted_biases.append(extracted_bias) known_T = KnownT(extracted_normals, extracted_biases) for a, b in sorted(query_count_at.items(), key=lambda x: -x[1]): print('count', b, '\t', 'line:', a, ':', self_lines[a - 1].strip()) # And then finish up if len(extracted_normals) == len(sizes) - 2: print("Just solve final layer") N = int(len(SAVED_QUERIES) / 1000) or 1 ins, outs = zip(*SAVED_QUERIES[::N]) solve_final_layer(known_T, np.array(ins), np.array(outs)) else: print("Solve final two") solve_final_two_layers(known_T, extracted_normal, extracted_bias)
def follow_hyperplane(LAYER, start_point, known_T, known_A, known_B, history=[], MAX_POINTS=1e3, only_need_positive=False): """ This is the ugly algorithm that will let us recover sign for expansive networks. Assumes we have extracted up to layer K-1 correctly, and layer K up to sign. start_point is a neuron on layer K+1 known_T is the transformation that computes up to layer K-1, with known_A and known_B being the layer K matrix up to sign. We're going to come up with a bunch of different inputs, each of which has the same critical point held constant at zero. """ def choose_new_direction_from_minimize(previous_axis): """ Given the current point which is at a critical point of the next layer neuron, compute which direction we should travel to continue with finding more points on this hyperplane. Our goal is going to be to pick a direction that lets us explore a new part of the space we haven't seen before. """ print("Choose a new direction to travel in") if len(history) == 0: which_to_change = 0 new_perp_dir = perp_dir new_start_point = start_point initial_signs = get_polytope_at(known_T, known_A, known_B, start_point) # If we're in the 1 region of the polytope then we try to make it smaller # otherwise make it bigger fn = min if initial_signs[0] == 1 else max else: neuron_values = np.array([x[1] for x in history]) neuron_positive_count = np.sum(neuron_values > 1, axis=0) neuron_negative_count = np.sum(neuron_values < -1, axis=0) mean_plus_neuron_value = neuron_positive_count / ( neuron_positive_count + neuron_negative_count + 1) mean_minus_neuron_value = neuron_negative_count / ( neuron_positive_count + neuron_negative_count + 1) # we want to find values that are consistently 0 or 1 # So map 0 -> 0 and 1 -> 0 and the middle to higher values if only_need_positive: neuron_consistency = mean_plus_neuron_value else: neuron_consistency = mean_plus_neuron_value * mean_minus_neuron_value # Print out how much progress we've made. # This estimate is probably worse than Windows 95's estimated time remaining. # At least it's monotonic. Be thankful for that. print("Progress", "%.1f" % int(np.mean(neuron_consistency != 0) * 100) + "%") print("Counts on each side of each neuron") print(neuron_positive_count) print(neuron_negative_count) # Choose the smallest value, which is the most consistent which_to_change = np.argmin(neuron_consistency) print("Try to explore the other side of neuron", which_to_change) if which_to_change != previous_axis: if previous_axis is not None and neuron_consistency[ previous_axis] == neuron_consistency[which_to_change]: # If the previous thing we were working towards has the same value as this one # the don't change our mind and just keep going at that one # (almost always--sometimes we can get stuck, let us get unstuck) which_to_change = previous_axis new_start_point = start_point new_perp_dir = perp_dir else: valid_axes = np.where( neuron_consistency == neuron_consistency[which_to_change])[0] best = (np.inf, None, None) for _, potential_hidden_vector, potential_point in history[ -1:]: for potential_axis in valid_axes: value = potential_hidden_vector[potential_axis] if np.abs(value) < best[0]: best = (np.abs(value), potential_axis, potential_point) _, which_to_change, new_start_point = best new_perp_dir = perp_dir else: new_start_point = start_point new_perp_dir = perp_dir # If we're in the 1 region of the polytope then we try to make it smaller # otherwise make it bigger fn = min if neuron_positive_count[ which_to_change] > neuron_negative_count[ which_to_change] else max arg_fn = np.argmin if neuron_positive_count[ which_to_change] > neuron_negative_count[ which_to_change] else np.argmax print("Changing", which_to_change, 'to flip sides because mean is', mean_plus_neuron_value[which_to_change]) val = matmul(known_T.forward(new_start_point, with_relu=True), known_A, known_B)[which_to_change] initial_signs = get_polytope_at(known_T, known_A, known_B, new_start_point) # Now we're going to figure out what direction makes this biggest/smallest # this doesn't take any queries # There's probably an analytical way to do this. # But thinking is hard. Just try 1000 random angles. # There are no queries involved in this process. choices = [] for _ in range(1000): random_dir = np.random.normal(size=DIM) perp_component = np.dot(random_dir, new_perp_dir) / (np.dot( new_perp_dir, new_perp_dir)) * new_perp_dir parallel_dir = random_dir - perp_component # This is the direction we're going to travel in. go_direction = parallel_dir / np.sum(parallel_dir**2)**.5 try: a_bit_further, high = binary_search_towards( known_T, known_A, known_B, new_start_point, initial_signs, go_direction) except AcceptableFailure: continue if a_bit_further is None: continue # choose a direction that makes the Kth value go down by the most val = matmul( known_T.forward(a_bit_further[np.newaxis, :], with_relu=True), known_A, known_B)[0][which_to_change] #print('\t', val, high) choices.append([val, new_start_point + high * go_direction]) best_value, multiple_intersection_point = fn(choices, key=lambda x: x[0]) print('Value', best_value) return new_start_point, multiple_intersection_point, which_to_change ################################################### ### Actual code to do the sign recovery starts. ### ################################################### start_box_step = 0 points_on_plane = [] if CHEATING: layer = np.abs( cheat_get_inner_layers(np.array(start_point))[LAYER + 1]) print("Layer", layer) which_is_zero = np.argmin(layer) current_change_axis = 0 while True: print("\n\n") print("-----" * 10) if CHEATING: layer = np.abs( cheat_get_inner_layers(np.array(start_point))[LAYER + 1]) #print('layer',LAYER+1, layer) #print('all inner layers') #for e in cheat_get_inner_layers(np.array(start_point)): # print(e) which_is_zero_2 = np.argmin(np.abs(layer)) if which_is_zero_2 != which_is_zero: print("STARTED WITH", which_is_zero, "NOW IS", which_is_zero_2) print(layer) raise # Keep track of where we've been, so we can go to new places. which_polytope = get_polytope_at(known_T, known_A, known_B, start_point, False) # [-1 1 -1] hidden_vector = get_hidden_at(known_T, known_A, known_B, LAYER, start_point, False) sign_at_init = sign_to_int(which_polytope) # 0b010 -> 2 print("Number of collected points", len(points_on_plane)) if len(points_on_plane) > MAX_POINTS: return points_on_plane, False neuron_values = np.array([x[1] for x in history]) neuron_positive_count = np.sum(neuron_values > 1, axis=0) neuron_negative_count = np.sum(neuron_values < -1, axis=0) if (np.all(neuron_positive_count > 0) and np.all(neuron_negative_count > 0)) or \ (only_need_positive and np.all(neuron_positive_count > 0)): print("Have all the points we need (1)") print(query_count) print(neuron_positive_count) print(neuron_negative_count) neuron_values = np.array([ get_hidden_at(known_T, known_A, known_B, LAYER, x, False) for x in points_on_plane ]) neuron_positive_count = np.sum(neuron_values > 1, axis=0) neuron_negative_count = np.sum(neuron_values < -1, axis=0) print(neuron_positive_count) print(neuron_negative_count) return points_on_plane, True # 1. find a way to move along the hyperplane by computing the normal # direction using the ratios function. Then find a parallel direction. try: #perp_dir = get_ratios([start_point], [range(DIM)], eps=1e-4)[0].flatten() perp_dir = get_ratios_lstsq(0, [start_point], [range(DIM)], KnownT([], []), eps=1e-5)[0].flatten() except AcceptableFailure: print( "Failed to compute ratio at start point. Something very bad happened." ) return points_on_plane, False # Record these points. history.append((which_polytope, hidden_vector, np.copy(start_point))) # We can't just pick any parallel direction. If we did, then we would # not end up covering much of the input space. # Instead, we're going to figure out which layer-1 hyperplanes are "visible" # from the current point. Then we're going to try and go reach all of them. # This is the point at which the first and second layers intersect. start_point, multiple_intersection_point, new_change_axis = choose_new_direction_from_minimize( current_change_axis) if new_change_axis != current_change_axis: start_point, multiple_intersection_point, current_change_axis = choose_new_direction_from_minimize( None) #if CHEATING: # print("INIT MULTIPLE", cheat_get_inner_layers(multiple_intersection_point)) # Refine the direction we're going to travel in---stay numerically stable. towards_multiple_direction = multiple_intersection_point - start_point step_distance = np.sum(towards_multiple_direction**2)**.5 print("Distance we need to step:", step_distance) if step_distance > 1 or True: mid_point = 1e-4 * towards_multiple_direction / np.sum( towards_multiple_direction**2)**.5 + start_point random_dir = np.random.normal(size=DIM) mid_points = do_better_sweep(mid_point, perp_dir / np.sum(perp_dir**2)**.5, low=-1e-3, high=1e-3, known_T=known_T) if len(mid_points) > 0: mid_point = mid_points[np.argmin( np.sum((mid_point - mid_points)**2, axis=1))] towards_multiple_direction = mid_point - start_point towards_multiple_direction = towards_multiple_direction / np.sum( towards_multiple_direction**2)**.5 initial_signs = get_polytope_at(known_T, known_A, known_B, start_point) _, high = binary_search_towards(known_T, known_A, known_B, start_point, initial_signs, towards_multiple_direction) multiple_intersection_point = towards_multiple_direction * high + start_point # Find the angle of the next hyperplane # First, take random steps away from the intersection point # Then run the search algorithm to find some intersections # what we find will either be a layer-1 or layer-2 intersection. print("Now try to find the continuation direction") success = None while success is None: if start_box_step < 0: start_box_step = 0 print("VERY BAD FAILURE") print("Choose a new random point to start from") which_point = np.random.randint(0, len(history)) start_point = history[which_point][2] print("New point is", which_point) current_change_axis = np.random.randint(0, sizes[LAYER + 1]) print("New axis to change", current_change_axis) break print("\tStart the box step with size", start_box_step) try: success, camefrom, stepsize = find_plane_angle( known_T, known_A, known_B, multiple_intersection_point, sign_at_init, start_box_step) except AcceptableFailure: # Go back to the top and try with a new start point print("\tOkay we need to try with a new start point") start_box_step = -10 start_box_step -= 2 if success is None: continue val = matmul( known_T.forward(multiple_intersection_point, with_relu=True), known_A, known_B)[new_change_axis] print("Value at multiple:", val) val = matmul(known_T.forward(success, with_relu=True), known_A, known_B)[new_change_axis] print("Value at success:", val) if stepsize < 10: new_move_direction = success - multiple_intersection_point # We don't want to be right next to the multiple intersection point. # So let's binary search to find how far away we can go while remaining in this polytope. # Then we'll go half as far as we can maximally go. initial_signs = get_polytope_at(known_T, known_A, known_B, success) print("polytope at initial", sign_to_int(initial_signs)) low = 0 high = 1 while high - low > 1e-2: mid = (high + low) / 2 query_point = multiple_intersection_point + mid * new_move_direction next_signs = get_polytope_at(known_T, known_A, known_B, query_point) print( "polytope at", mid, sign_to_int(next_signs), "%x" % (sign_to_int(next_signs) ^ sign_to_int(initial_signs))) if initial_signs == next_signs: low = mid else: high = mid print("GO TO", mid) success = multiple_intersection_point + (mid / 2) * new_move_direction val = matmul(known_T.forward(success, with_relu=True), known_A, known_B)[new_change_axis] print("Value at moved success:", val) print("Adding the points to the set of known good points") points_on_plane.append(start_point) if camefrom is not None: points_on_plane.append(camefrom) #print("Old start point", start_point) #print("Set to success", success) start_point = success start_box_step = max(stepsize - 1, 0) return points_on_plane, False
def compute_layer_values(critical_points, known_T, LAYER): if LAYER == 0: COUNT = neuron_count[LAYER+1] * 3 else: COUNT = neuron_count[LAYER+1] * np.log(sizes[LAYER+1]) * 3 # type: [(ratios, critical_point)] this_layer_critical_points = [] partial_weights = None partial_biases = None def check_fn(point): if partial_weights is None: return True hidden = matmul(known_T.forward(point, with_relu=True), partial_weights.T, partial_biases) if np.any(np.abs(hidden) < 1e-4): return False return True print() print("Start running critical point search to find neurons on layer", LAYER) while True: print("At this iteration I have", len(this_layer_critical_points), "critical points") def reuse_critical_points(): for witness in critical_points: yield witness this_layer_critical_points.extend(gather_ratios(reuse_critical_points(), known_T, check_fn, LAYER, COUNT)) print("Query count after that search:", query_count) print("And now up to ", len(this_layer_critical_points), "critical points") ## filter out duplicates filtered_points = [] # Let's not add points that are identical to onees we've already done. for i,(ratio1,point1) in enumerate(this_layer_critical_points): for ratio2,point2 in this_layer_critical_points[i+1:]: if np.sum((point1 - point2)**2)**.5 < 1e-10: break else: filtered_points.append((ratio1, point1)) this_layer_critical_points = filtered_points print("After filtering duplicates we're down to ", len(this_layer_critical_points), "critical points") print("Start trying to do the graph solving") try: critical_groups, extracted_normals = graph_solve([x[0] for x in this_layer_critical_points], [x[1] for x in this_layer_critical_points], neuron_count[LAYER+1], LAYER=LAYER, debug=True) break except GatherMoreData as e: print("Graph solving failed because we didn't explore all sides of at least one neuron") print("Fall back to the hyperplane following algorithm in order to get more data") def mine(r): while len(r) > 0: print("Yielding a point") yield r[0] r = r[1:] print("No more to give!") prev_T = KnownT(known_T.A[:-1], known_T.B[:-1]) _, more_critical_points = sign_recovery.solve_layer_sign(prev_T, known_T.A[-1], known_T.B[-1], mine(e.data), LAYER-1, already_checked_critical_points=True, only_need_positive=True) print("Add more", len(more_critical_points)) this_layer_critical_points.extend(gather_ratios(more_critical_points, known_T, check_fn, LAYER, 1e6)) print("Done adding") COUNT = neuron_count[LAYER+1] except AcceptableFailure as e: print("Graph solving failed; get more points") COUNT = neuron_count[LAYER+1] if 'partial_solution' in dir(e): if len(e.partial_solution[0]) > 0: partial_weights, corresponding_examples = e.partial_solution print("Got partial solution with shape", partial_weights.shape) if CHEATING: print("Corresponding to", np.argmin(np.abs(cheat_get_inner_layers([x[0] for x in corresponding_examples])[LAYER]),axis=1)) partial_biases = [] for weight, examples in zip(partial_weights, corresponding_examples): hidden = known_T.forward(examples, with_relu=True) print("hidden", np.array(hidden).shape) bias = -np.median(np.dot(hidden, weight)) partial_biases.append(bias) partial_biases = np.array(partial_biases) print("Number of critical points per cluster", [len(x) for x in critical_groups]) point_per_class = [x[0] for x in critical_groups] extracted_normals = np.array(extracted_normals).T # Compute the bias because we know wx+b=0 extracted_bias = [matmul(known_T.forward(point_per_class[i], with_relu=True), extracted_normals[:,i], c=None) for i in range(neuron_count[LAYER+1])] # Don't forget to negate it. # That's important. # No, I definitely didn't forget this line the first time around. extracted_bias = -np.array(extracted_bias) # For the failed-to-identify neurons, set the bias to zero extracted_bias *= np.any(extracted_normals != 0,axis=0)[:,np.newaxis] if CHEATING: # Compute how far we off from the true matrix real_scaled = A[LAYER]/A[LAYER][0] extracted_scaled = extracted_normals/extracted_normals[0] mask = [] reorder_rows = [] for i in range(len(extracted_bias)): which_idx = np.argmin(np.sum(np.abs(real_scaled - extracted_scaled[:,[i]]),axis=0)) reorder_rows.append(which_idx) mask.append((A[LAYER][0,which_idx])) print('matrix norm difference', np.sum(np.abs(extracted_normals*mask - A[LAYER][:,reorder_rows]))) else: mask = [1]*len(extracted_bias) return extracted_normals, extracted_bias, mask