def _build_train_fn(self): """ Adapted from https://gist.github.com/kkweon/c8d1caabaf7b43317bc8825c226045d2 """ action_prob_placeholder = self.model.output action_onehot_placeholder = K.placeholder(shape=(None, self.n_a), name="action_onehot") adv_placeholder = K.placeholder(shape=(None,), name="advantages") action_prob = K.sum(action_prob_placeholder * action_onehot_placeholder, axis=1) log_action_prob = K.log(action_prob) loss = - log_action_prob * adv_placeholder loss = K.mean(loss) adam = Adam(lr=self.actor_learning_rate) updates = adam.get_updates(params=self.model.trainable_weights, loss=loss) self.train_fn = K.function(inputs=[self.model.input, action_onehot_placeholder, adv_placeholder], outputs=[loss], updates=updates)
def _build_train_fn(self): """ Adapted from https://gist.github.com/kkweon/c8d1caabaf7b43317bc8825c226045d2 """ action_prob_placeholder = self.model.output action_onehot_placeholder = K.placeholder(shape=(None, self.n_a), name="action_onehot") adv_placeholder = K.placeholder(shape=(None, ), name="advantages") old_action_probs_placeholder = K.placeholder(shape=(None, ), name="pi_old") action_prob = K.sum(action_prob_placeholder * action_onehot_placeholder, axis=1) r = action_prob / (old_action_probs_placeholder + 1e-10) clip_loss = K.minimum( r * adv_placeholder, K.clip(r, 1 - self.epsilon, 1 + self.epsilon) * adv_placeholder) loss = -K.mean(clip_loss + self.entropy_coeff * -action_prob * K.log(action_prob + 1e-10)) adam = Adam(lr=self.actor_learning_rate) updates = adam.get_updates(params=self.model.trainable_weights, loss=loss) self.train_fn = K.function(inputs=[ self.model.input, action_onehot_placeholder, adv_placeholder, old_action_probs_placeholder ], outputs=[loss], updates=updates)
def make_train_fn(self): action_oh_pl = kb.placeholder(shape=(None, self.env.action_space.n)) discounted_rw_pl = kb.placeholder(shape=(None,)) action_prob = kb.sum(action_oh_pl * self.train_model.output, axis=-1) log_action_prob = kb.log(action_prob) loss = kb.mean(- log_action_prob * discounted_rw_pl) adam = Adam(lr=self.learning_rate) update_op = adam.get_updates( loss=loss, params=self.train_model.trainable_weights ) train_fn = kb.function( inputs=[ self.train_model.input, action_oh_pl, discounted_rw_pl ], outputs=[self.train_model.output, loss], updates=update_op ) return train_fn
def make_critic_train_fn(self): action_oh_pl = kb.placeholder(shape=(None, self.env.action_space.n)) newaction_oh_pl = kb.placeholder(shape=(None, self.env.action_space.n)) rewards_pl = kb.placeholder(shape=(None, )) dones_pl = kb.placeholder(shape=(None, )) critic_results = self.critic_model.output q_hat, new_q_hat = critic_results[0], critic_results[1] # TD loss val = rewards_pl + (1.0 - dones_pl) * self.gamma * new_q_hat - q_hat # Mean squared error of the prediction loss = kb.mean(val**2) adam = Adam(lr=self.learning_rate) update_op = adam.get_updates( loss=loss, params=self.critic_model.trainable_weights) train_fn = kb.function(inputs=[ self.critic_model.input, action_oh_pl, newaction_oh_pl, rewards_pl, dones_pl ], outputs=[self.critic_model.output, val, loss], updates=update_op) return train_fn
def _build_actor_train_fn(self): actions = self.model.output state_actions = Concatenate(axis=1)([self.model.input, actions]) q_values = self.critic(state_actions) loss = -K.mean(q_values) adam = Adam(lr=self.actor_learning_rate) updates = adam.get_updates(params=self.model.trainable_weights, loss=loss) self.actor_train_fn = K.function(inputs=[self.model.input], outputs=[loss], updates=updates)
def make_critic_train_fn(self): action_oh_pl = kb.placeholder(shape=(None, self.env.action_space.n)) discounted_rw_pl = kb.placeholder(shape=(None, )) critic_results = self.critic_model.output # Mean squared error of the prediction loss = kb.mean(mean_absolute_error(discounted_rw_pl, critic_results)) adam = Adam(lr=self.learning_rate) update_op = adam.get_updates( loss=loss, params=self.critic_model.trainable_weights) train_fn = kb.function( inputs=[self.critic_model.input, action_oh_pl, discounted_rw_pl], outputs=[self.critic_model.output, loss], updates=update_op) return train_fn
def make_critic_train_fn(self): rewards_pl = kb.placeholder(shape=(None, )) dones_pl = kb.placeholder(shape=(None, )) q_hats = self.merged_model.output new_q_hats = self.merged_target.output # Mean squared error of the prediction loss = kb.mean(rewards_pl + (1.0 - dones_pl) * self.gamma * new_q_hats - q_hats) loss = kb.mean(loss**2) adam = Adam(lr=self.learning_rate) update_op = adam.get_updates( loss=loss, params=self.critic_model.trainable_weights) train_fn = kb.function(inputs=[ self.merged_model.input, self.merged_target.input, rewards_pl, dones_pl ], outputs=[self.merged_model.output, loss], updates=update_op) return train_fn
weighted_content_losses = [] total_loss = K.variable(0.) for loss in style_losses: weighted_loss = args.style_weight * K.mean(loss) weighted_style_losses.append(weighted_loss) total_loss += weighted_loss for loss in content_losses: weighted_loss = args.content_weight * K.mean(loss) weighted_content_losses.append(weighted_loss) total_loss += weighted_loss weighted_tv_loss = args.tv_weight * K.mean(total_var_loss) total_loss += weighted_tv_loss opt = Adam(lr=args.lr) updates = opt.get_updates([pastiche_image], {}, total_loss) # List of outputs outputs = [ total_loss ] + weighted_content_losses + weighted_style_losses + [weighted_tv_loss] # Function that makes a step after backpropping to the image make_step = K.function([], outputs, updates) # Perform optimization steps and save the results start_time = time.time() for i in range(args.num_iterations): out = make_step([]) if (i + 1) % args.print_and_save == 0: print('Iteration %d/%d' % (i + 1, args.num_iterations))
class Visualizer: # upsample size, default is 1 UPSAMPLE_SIZE = 1 # pixel intensity range of image and preprocessing method # raw: [0, 255] # mnist: [0, 1] # imagenet: imagenet mean centering # inception: [-1, 1] INTENSITY_RANGE = 'raw' # type of regularization of the mask REGULARIZATION = 'l1' # threshold of attack success rate for dynamically changing cost ATTACK_SUCC_THRESHOLD = 0.99 # patience PATIENCE = 10 # multiple of changing cost, down multiple is the square root of this COST_MULTIPLIER = 1.5, # if resetting cost to 0 at the beginning # default is true for full optimization, set to false for early detection RESET_COST_TO_ZERO = True # min/max of mask MASK_MIN = 0 MASK_MAX = 1 # min/max of raw pixel intensity COLOR_MIN = 0 COLOR_MAX = 255 # number of color channel IMG_COLOR = 3 # whether to shuffle during each epoch SHUFFLE = True # batch size of optimization BATCH_SIZE = 32 # verbose level, 0, 1 or 2 VERBOSE = 1 # whether to return log or not RETURN_LOGS = True # whether to save last pattern or best pattern SAVE_LAST = False # epsilon used in tanh EPSILON = K.epsilon() # early stop flag EARLY_STOP = True # early stop threshold EARLY_STOP_THRESHOLD = 0.99 # early stop patience EARLY_STOP_PATIENCE = 2 * PATIENCE # save tmp masks, for debugging purpose SAVE_TMP = False # dir to save intermediate masks TMP_DIR = 'tmp' # whether input image has been preprocessed or not RAW_INPUT_FLAG = False def __init__(self, model, intensity_range, regularization, input_shape, init_cost, steps, mini_batch, lr, num_classes, upsample_size=UPSAMPLE_SIZE, attack_succ_threshold=ATTACK_SUCC_THRESHOLD, patience=PATIENCE, cost_multiplier=COST_MULTIPLIER, reset_cost_to_zero=RESET_COST_TO_ZERO, mask_min=MASK_MIN, mask_max=MASK_MAX, color_min=COLOR_MIN, color_max=COLOR_MAX, img_color=IMG_COLOR, shuffle=SHUFFLE, batch_size=BATCH_SIZE, verbose=VERBOSE, return_logs=RETURN_LOGS, save_last=SAVE_LAST, epsilon=EPSILON, early_stop=EARLY_STOP, early_stop_threshold=EARLY_STOP_THRESHOLD, early_stop_patience=EARLY_STOP_PATIENCE, save_tmp=SAVE_TMP, tmp_dir=TMP_DIR, raw_input_flag=RAW_INPUT_FLAG): assert intensity_range in {'imagenet', 'inception', 'mnist', 'raw'} assert regularization in {None, 'l1', 'l2'} self.model = model self.intensity_range = intensity_range self.regularization = regularization self.input_shape = input_shape self.init_cost = init_cost self.steps = steps self.mini_batch = mini_batch self.lr = lr self.num_classes = num_classes self.upsample_size = upsample_size self.attack_succ_threshold = attack_succ_threshold self.patience = patience self.cost_multiplier_up = cost_multiplier self.cost_multiplier_down = cost_multiplier**1.5 self.reset_cost_to_zero = reset_cost_to_zero self.mask_min = mask_min self.mask_max = mask_max self.color_min = color_min self.color_max = color_max self.img_color = img_color self.shuffle = shuffle self.batch_size = batch_size self.verbose = verbose self.return_logs = return_logs self.save_last = save_last self.epsilon = epsilon self.early_stop = early_stop self.early_stop_threshold = early_stop_threshold self.early_stop_patience = early_stop_patience self.save_tmp = save_tmp self.tmp_dir = tmp_dir self.raw_input_flag = raw_input_flag mask_size = np.ceil( np.array(input_shape[0:2], dtype=float) / upsample_size) mask_size = mask_size.astype(int) self.mask_size = mask_size mask = np.zeros(self.mask_size) pattern = np.zeros(input_shape) mask = np.expand_dims(mask, axis=2) mask_tanh = np.zeros_like(mask) pattern_tanh = np.zeros_like(pattern) # prepare mask related tensors self.mask_tanh_tensor = K.variable(mask_tanh) mask_tensor_unrepeat = (K.tanh(self.mask_tanh_tensor) / (2 - self.epsilon) + 0.5) mask_tensor_unexpand = K.repeat_elements(mask_tensor_unrepeat, rep=self.img_color, axis=2) self.mask_tensor = K.expand_dims(mask_tensor_unexpand, axis=0) upsample_layer = UpSampling2D(size=(self.upsample_size, self.upsample_size)) mask_upsample_tensor_uncrop = upsample_layer(self.mask_tensor) uncrop_shape = K.int_shape(mask_upsample_tensor_uncrop)[1:] cropping_layer = Cropping2D( cropping=((0, uncrop_shape[0] - self.input_shape[0]), (0, uncrop_shape[1] - self.input_shape[1]))) self.mask_upsample_tensor = cropping_layer(mask_upsample_tensor_uncrop) reverse_mask_tensor = (K.ones_like(self.mask_upsample_tensor) - self.mask_upsample_tensor) def keras_preprocess(x_input, intensity_range): if intensity_range is 'raw': x_preprocess = x_input elif intensity_range is 'imagenet': # 'RGB'->'BGR' x_tmp = x_input[..., ::-1] # Zero-center by mean pixel mean = K.constant([[[103.939, 116.779, 123.68]]]) x_preprocess = x_tmp - mean elif intensity_range is 'inception': x_preprocess = (x_input / 255.0 - 0.5) * 2.0 elif intensity_range is 'mnist': x_preprocess = x_input / 255.0 else: raise Exception('unknown intensity_range %s' % intensity_range) return x_preprocess def keras_reverse_preprocess(x_input, intensity_range): if intensity_range is 'raw': x_reverse = x_input elif intensity_range is 'imagenet': # Zero-center by mean pixel mean = K.constant([[[103.939, 116.779, 123.68]]]) x_reverse = x_input + mean # 'BGR'->'RGB' x_reverse = x_reverse[..., ::-1] elif intensity_range is 'inception': x_reverse = (x_input / 2 + 0.5) * 255.0 elif intensity_range is 'mnist': x_reverse = x_input * 255.0 else: raise Exception('unknown intensity_range %s' % intensity_range) return x_reverse # prepare pattern related tensors self.pattern_tanh_tensor = K.variable(pattern_tanh) self.pattern_raw_tensor = ((K.tanh(self.pattern_tanh_tensor) / (2 - self.epsilon) + 0.5) * 255.0) # prepare input image related tensors # ignore clip operation here # assume input image is already clipped into valid color range input_tensor = K.placeholder(model.input_shape) if self.raw_input_flag: input_raw_tensor = input_tensor else: input_raw_tensor = keras_reverse_preprocess( input_tensor, self.intensity_range) # IMPORTANT: MASK OPERATION IN RAW DOMAIN X_adv_raw_tensor = ( reverse_mask_tensor * input_raw_tensor + self.mask_upsample_tensor * self.pattern_raw_tensor) X_adv_tensor = keras_preprocess(X_adv_raw_tensor, self.intensity_range) output_tensor = model(X_adv_tensor) y_true_tensor = K.placeholder(model.output_shape) self.loss_acc = categorical_accuracy(output_tensor, y_true_tensor) self.loss_ce = categorical_crossentropy(output_tensor, y_true_tensor) if self.regularization is None: self.loss_reg = K.constant(0) elif self.regularization is 'l1': self.loss_reg = (K.sum(K.abs(self.mask_upsample_tensor)) / self.img_color) elif self.regularization is 'l2': self.loss_reg = K.sqrt( K.sum(K.square(self.mask_upsample_tensor)) / self.img_color) cost = self.init_cost self.cost_tensor = K.variable(cost) self.loss = self.loss_ce + self.loss_reg * self.cost_tensor self.opt = Adam(lr=self.lr, beta_1=0.5, beta_2=0.9) self.updates = self.opt.get_updates( params=[self.pattern_tanh_tensor, self.mask_tanh_tensor], loss=self.loss) self.train = K.function( [input_tensor, y_true_tensor], [self.loss_ce, self.loss_reg, self.loss, self.loss_acc], updates=self.updates) pass def reset_opt(self): K.set_value(self.opt.iterations, 0) for w in self.opt.weights: K.set_value(w, np.zeros(K.int_shape(w))) pass def reset_state(self, pattern_init, mask_init): print('resetting state') # setting cost if self.reset_cost_to_zero: self.cost = 0 else: self.cost = self.init_cost K.set_value(self.cost_tensor, self.cost) # setting mask and pattern mask = np.array(mask_init) pattern = np.array(pattern_init) mask = np.clip(mask, self.mask_min, self.mask_max) pattern = np.clip(pattern, self.color_min, self.color_max) mask = np.expand_dims(mask, axis=2) # convert to tanh space mask_tanh = np.arctanh((mask - 0.5) * (2 - self.epsilon)) pattern_tanh = np.arctanh((pattern / 255.0 - 0.5) * (2 - self.epsilon)) print('mask_tanh', np.min(mask_tanh), np.max(mask_tanh)) print('pattern_tanh', np.min(pattern_tanh), np.max(pattern_tanh)) K.set_value(self.mask_tanh_tensor, mask_tanh) K.set_value(self.pattern_tanh_tensor, pattern_tanh) # resetting optimizer states self.reset_opt() pass def save_tmp_func(self, step): cur_mask = K.eval(self.mask_upsample_tensor) cur_mask = cur_mask[0, ..., 0] img_filename = ('%s/%s' % (self.tmp_dir, 'tmp_mask_step_%d.png' % step)) dump_image(np.expand_dims(cur_mask, axis=2) * 255, img_filename, 'png') cur_fusion = K.eval(self.mask_upsample_tensor * self.pattern_raw_tensor) cur_fusion = cur_fusion[0, ...] img_filename = ('%s/%s' % (self.tmp_dir, 'tmp_fusion_step_%d.png' % step)) dump_image(cur_fusion, img_filename, 'png') pass def visualize(self, gen, y_target, pattern_init, mask_init): # since we use a single optimizer repeatedly, we need to reset # optimzier's internal states before running the optimization self.reset_state(pattern_init, mask_init) # best optimization results mask_best = None mask_upsample_best = None pattern_best = None reg_best = float('inf') # logs and counters for adjusting balance cost logs = [] cost_set_counter = 0 cost_up_counter = 0 cost_down_counter = 0 cost_up_flag = False cost_down_flag = False # counter for early stop early_stop_counter = 0 early_stop_reg_best = reg_best # vectorized target Y_target = to_categorical([y_target] * self.batch_size, self.num_classes) # loop start for step in range(self.steps): # record loss for all mini-batches loss_ce_list = [] loss_reg_list = [] loss_list = [] loss_acc_list = [] for idx in range(self.mini_batch): X_batch, _ = gen.next() if X_batch.shape[0] != Y_target.shape[0]: Y_target = to_categorical([y_target] * X_batch.shape[0], self.num_classes) (loss_ce_value, loss_reg_value, loss_value, loss_acc_value) = self.train([X_batch, Y_target]) loss_ce_list.extend(list(loss_ce_value.flatten())) loss_reg_list.extend(list(loss_reg_value.flatten())) loss_list.extend(list(loss_value.flatten())) loss_acc_list.extend(list(loss_acc_value.flatten())) avg_loss_ce = np.mean(loss_ce_list) avg_loss_reg = np.mean(loss_reg_list) avg_loss = np.mean(loss_list) avg_loss_acc = np.mean(loss_acc_list) # check to save best mask or not if avg_loss_acc >= self.attack_succ_threshold and avg_loss_reg < reg_best: mask_best = K.eval(self.mask_tensor) mask_best = mask_best[0, ..., 0] mask_upsample_best = K.eval(self.mask_upsample_tensor) mask_upsample_best = mask_upsample_best[0, ..., 0] pattern_best = K.eval(self.pattern_raw_tensor) reg_best = avg_loss_reg # verbose if self.verbose != 0: if self.verbose == 2 or step % (self.steps // 10) == 0: print( 'step: %3d, cost: %.2E, attack: %.3f, loss: %f, ce: %f, reg: %f, reg_best: %f' % (step, Decimal(self.cost), avg_loss_acc, avg_loss, avg_loss_ce, avg_loss_reg, reg_best)) # save log logs.append((step, avg_loss_ce, avg_loss_reg, avg_loss, avg_loss_acc, reg_best, self.cost)) # check early stop if self.early_stop: # only terminate if a valid attack has been found if reg_best < float('inf'): if reg_best >= self.early_stop_threshold * early_stop_reg_best: early_stop_counter += 1 else: early_stop_counter = 0 early_stop_reg_best = min(reg_best, early_stop_reg_best) if (cost_down_flag and cost_up_flag and early_stop_counter >= self.early_stop_patience): print('early stop') break # check cost modification if self.cost == 0 and avg_loss_acc >= self.attack_succ_threshold: cost_set_counter += 1 if cost_set_counter >= self.patience: self.cost = self.init_cost K.set_value(self.cost_tensor, self.cost) cost_up_counter = 0 cost_down_counter = 0 cost_up_flag = False cost_down_flag = False print('initialize cost to %.2E' % Decimal(self.cost)) else: cost_set_counter = 0 if avg_loss_acc >= self.attack_succ_threshold: cost_up_counter += 1 cost_down_counter = 0 else: cost_up_counter = 0 cost_down_counter += 1 if cost_up_counter >= self.patience: cost_up_counter = 0 if self.verbose == 2: print('up cost from %.2E to %.2E' % (Decimal(self.cost), Decimal(self.cost * self.cost_multiplier_up))) self.cost *= self.cost_multiplier_up K.set_value(self.cost_tensor, self.cost) cost_up_flag = True elif cost_down_counter >= self.patience: cost_down_counter = 0 if self.verbose == 2: print('down cost from %.2E to %.2E' % (Decimal(self.cost), Decimal(self.cost / self.cost_multiplier_down))) self.cost /= self.cost_multiplier_down K.set_value(self.cost_tensor, self.cost) cost_down_flag = True if self.save_tmp: self.save_tmp_func(step) # save the final version if mask_best is None or self.save_last: mask_best = K.eval(self.mask_tensor) mask_best = mask_best[0, ..., 0] mask_upsample_best = K.eval(self.mask_upsample_tensor) mask_upsample_best = mask_upsample_best[0, ..., 0] pattern_best = K.eval(self.pattern_raw_tensor) if self.return_logs: return pattern_best, mask_best, mask_upsample_best, logs else: return pattern_best, mask_best, mask_upsample_best
class Snooper: """ A poison snooper for neural networks implementing the TABOR method. Named for: https://dune.fandom.com/wiki/Poison_snooper Based off of: https://github.com/bolunwang/backdoor/blob/master/visualizer.py """ # upsample size, default is 1 UPSAMPLE_SIZE = 1 def __init__(self, model, upsample_size=UPSAMPLE_SIZE): mask_size = np.ceil(np.array((32, 32), dtype=float) / upsample_size) mask_size = mask_size.astype(int) self.mask_size = mask_size mask = np.zeros(self.mask_size) pattern = np.zeros((32, 32, 3)) mask = np.expand_dims(mask, axis=2) mask_tanh = np.zeros_like(mask) pattern_tanh = np.zeros_like(pattern) # prepare mask related tensors self.mask_tanh_tensor = K.variable(mask_tanh) mask_tensor_unrepeat = (K.tanh(self.mask_tanh_tensor) \ / (2 - K.epsilon()) + 0.5) mask_tensor_unexpand = K.repeat_elements(mask_tensor_unrepeat, rep=3, axis=2) self.mask_tensor = K.expand_dims(mask_tensor_unexpand, axis=0) upsample_layer = UpSampling2D(size=(upsample_size, upsample_size)) mask_upsample_tensor_uncrop = upsample_layer(self.mask_tensor) uncrop_shape = K.int_shape(mask_upsample_tensor_uncrop)[1:] cropping_layer = Cropping2D(cropping=((0, uncrop_shape[0] - 32), (0, uncrop_shape[1] - 32))) self.mask_upsample_tensor = cropping_layer(mask_upsample_tensor_uncrop) # self.mask_upsample_tensor = K.round(self.mask_upsample_tensor) reverse_mask_tensor = (K.ones_like(self.mask_upsample_tensor) - self.mask_upsample_tensor) # prepare pattern related tensors self.pattern_tanh_tensor = K.variable(pattern_tanh) self.pattern_raw_tensor = ((K.tanh(self.pattern_tanh_tensor) / (2 - K.epsilon()) + 0.5) * 255.0) # prepare input image related tensors # ignore clip operation here # assume input image is already clipped into valid color range input_tensor = K.placeholder((None, 32, 32, 3)) input_raw_tensor = input_tensor # IMPORTANT: MASK OPERATION IN RAW DOMAIN X_adv_raw_tensor = ( reverse_mask_tensor * input_raw_tensor + self.mask_upsample_tensor * self.pattern_raw_tensor) X_adv_tensor = X_adv_raw_tensor output_tensor = model(X_adv_tensor) y_target_tensor = K.placeholder((None, 43)) y_true_tensor = K.placeholder((None, 43)) self.loss_ce = categorical_crossentropy(output_tensor, y_target_tensor) self.hyperparameters = K.reshape(K.constant( np.array([1e-6, 1e-5, 1e-7, 1e-8, 0, 1e-2])), shape=(6, 1)) self.loss_reg = self.build_tabor_regularization( input_raw_tensor, model, y_target_tensor, y_true_tensor) self.loss_reg = K.dot(K.reshape(self.loss_reg, shape=(1, 6)), self.hyperparameters) self.loss = K.mean(self.loss_ce) + self.loss_reg self.opt = Adam(lr=1e-3, beta_1=0.5, beta_2=0.9) self.updates = self.opt.get_updates( params=[self.pattern_tanh_tensor, self.mask_tanh_tensor], loss=self.loss) self.train = K.function([input_tensor, y_true_tensor, y_target_tensor], [self.loss_ce, self.loss_reg, self.loss], updates=self.updates) def build_tabor_regularization(self, input_raw_tensor, model, y_target_tensor, y_true_tensor): reg_losses = [] # R1 - Overly large triggers mask_l1_norm = K.sum(K.abs(self.mask_upsample_tensor)) mask_l2_norm = K.sum(K.square(self.mask_upsample_tensor)) mask_r1 = (mask_l1_norm + mask_l2_norm) pattern_tensor = (K.ones_like(self.mask_upsample_tensor) \ - self.mask_upsample_tensor) * self.pattern_raw_tensor pattern_l1_norm = K.sum(K.abs(pattern_tensor)) pattern_l2_norm = K.sum(K.square(pattern_tensor)) pattern_r1 = (pattern_l1_norm + pattern_l2_norm) # R2 - Scattered triggers pixel_dif_mask_col = K.sum(K.square( self.mask_upsample_tensor[:-1, :, :] \ - self.mask_upsample_tensor[1:, :, :])) pixel_dif_mask_row = K.sum(K.square( self.mask_upsample_tensor[:, :-1, :] \ - self.mask_upsample_tensor[:, 1:, :])) mask_r2 = pixel_dif_mask_col + pixel_dif_mask_row pixel_dif_pat_col = K.sum(K.square(pattern_tensor[:-1, :, :] \ - pattern_tensor[1:, :, :])) pixel_dif_pat_row = K.sum(K.square(pattern_tensor[:, :-1, :] \ - pattern_tensor[:, 1:, :])) pattern_r2 = pixel_dif_pat_col + pixel_dif_pat_row # R3 - Blocking triggers cropped_input_tensor = (K.ones_like(self.mask_upsample_tensor) \ - self.mask_upsample_tensor) * input_raw_tensor r3 = K.mean( categorical_crossentropy( model(cropped_input_tensor), K.reshape(y_true_tensor[0], shape=(1, -1)))) # R4 - Overlaying triggers mask_crop_tensor = self.mask_upsample_tensor * self.pattern_raw_tensor r4 = K.mean( categorical_crossentropy( model(mask_crop_tensor), K.reshape(y_target_tensor[0], shape=(1, -1)))) reg_losses.append(mask_r1) reg_losses.append(pattern_r1) reg_losses.append(mask_r2) reg_losses.append(pattern_r2) reg_losses.append(r3) reg_losses.append(r4) return K.stack(reg_losses) def reset_opt(self): K.set_value(self.opt.iterations, 0) for w in self.opt.weights: K.set_value(w, np.zeros(K.int_shape(w))) def reset_state(self, pattern_init, mask_init): print('resetting state') # setting mask and pattern mask = np.array(mask_init) pattern = np.array(pattern_init) mask = np.clip(mask, 0, 1) pattern = np.clip(pattern, 0, 255) mask = np.expand_dims(mask, axis=2) # convert to tanh space mask_tanh = np.arctanh((mask - 0.5) * (2 - K.epsilon())) pattern_tanh = np.arctanh((pattern / 255.0 - 0.5) * (2 - K.epsilon())) print('mask_tanh', np.min(mask_tanh), np.max(mask_tanh)) print('pattern_tanh', np.min(pattern_tanh), np.max(pattern_tanh)) K.set_value(self.mask_tanh_tensor, mask_tanh) K.set_value(self.pattern_tanh_tensor, pattern_tanh) # resetting optimizer states self.reset_opt() def snoop(self, x, y, y_target, pattern_init, mask_init, poison_type, poison_loc, poison_size): self.reset_state(pattern_init, mask_init) # best optimization results mask_best = None mask_upsample_best = None pattern_best = None Y_target = None loss_best = float('inf') # logs and counters for adjusting balance cost logs = [] steps = 50 # loop start for step in range(steps): # record loss for all mini-batches loss_ce_list = [] loss_reg_list = [] loss_list = [] for idx in trange(ceil(len(x) / 32) - 1): X_batch = x[idx * 32:(idx + 1) * 32] Y_batch = y[idx * 32:(idx + 1) * 32] if Y_target is None: Y_target = to_categorical([y_target] * Y_batch.shape[0], 43) (loss_ce_value, loss_reg_value, loss_value) = self.train([X_batch, Y_batch, Y_target]) loss_ce_list.extend(loss_ce_value.flatten()) loss_reg_list.extend(loss_reg_value.flatten()) loss_list.extend(loss_value.flatten()) avg_loss_ce = np.mean(loss_ce_list) avg_loss_reg = np.mean(loss_reg_list) avg_loss = np.mean(loss_list) # check to save best mask or not if avg_loss < loss_best: mask_best = K.eval(self.mask_tensor) mask_best = mask_best[0, ..., 0] mask_upsample_best = K.eval(self.mask_upsample_tensor) mask_upsample_best = mask_upsample_best[0, ..., 0] pattern_best = K.eval(self.pattern_raw_tensor) loss_best = avg_loss filepath = 'backward_triggers' with open( '%s/pattern_%s_%s_%d.npy' % (filepath, poison_type, poison_loc, poison_size), 'wb') as f: np.save(f, pattern_best) with open( '%s/mask_%s_%s_%d.npy' % (filepath, poison_type, poison_loc, poison_size), 'wb') as f: np.save(f, mask_best) # save log logs.append((step, avg_loss_ce, avg_loss_reg, avg_loss)) print("Step {} | loss_ce {} | loss_reg {} | loss {}".format( step, avg_loss_ce, avg_loss_reg, avg_loss)) # save the final version if mask_best is None: mask_best = K.eval(self.mask_tensor) mask_best = mask_best[0, ..., 0] mask_upsample_best = K.eval(self.mask_upsample_tensor) mask_upsample_best = mask_upsample_best[0, ..., 0] pattern_best = K.eval(self.pattern_raw_tensor) # if self.return_logs: # return pattern_best, mask_best, mask_upsample_best, logs # else: return pattern_best, mask_best, mask_upsample_best
elif update.op.type == 'AssignAdd': updates[var.value()] = var + value ##### tyoe is "AssignVariableOp" raise error ><,./:"|\}_\||--" else: raise ValueError( "Update op type (%s) must be of type Assign or AssignAdd" % update_op.op.type) return updates cgan_d = Adam(lr=1e-4, beta_1=0.5, epsilon=1e-8) cgan_g = tf.compat.v1.train.AdamOptimizer(0.001, beta1=0.5) disc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "discriminator") updates = cgan_d.get_updates(D_loss, disc_vars) d_train_op = tf.group(*updates, name="d_train_op") update_dict = extract_update_dict(updates) for i in range(unrolling - 1): cur_update_dict = update_dict cur_update_dict = tf.contrib.graph_editor.graph_replace( update_dict, cur_update_dict) unrolled_loss = graph_replace(D_loss, cur_update_dict) G_solver = cgan_g.minimize(loss=-unrolled_loss, var_list=para_G) unrolled_loss_sum = tf.compat.v1.summary.scalar("unrolled_loss", unrolled_loss) sess = tf.compat.v1.Session() initial = tf.compat.v1.global_variables_initializer()
def train_sated_nmt(loo=0, num_users=200, num_words=5000, num_epochs=20, h=128, emb_h=128, l2_ratio=1e-4, exp_id=0, lr=0.001, batch_size=32, mask=False, drop_p=0.5, cross_domain=False, tied=False, ablation=False, sample_user=False, user_data_ratio=0., rnn_fn='lstm', optim_fn='adam'): # if cross_domain: # sample_user = True # user_src_texts, user_trg_texts, dev_src_texts, dev_trg_texts, test_src_texts, test_trg_texts,\ # src_vocabs, trg_vocabs = load_europarl_by_user(num_users=num_users, num_words=num_words) # else: user_src_texts, user_trg_texts, dev_src_texts, dev_trg_texts, test_src_texts, test_trg_texts, \ src_vocabs, trg_vocabs = load_sated_data_by_user(num_users, num_words, sample_user=sample_user, user_data_ratio=user_data_ratio) train_src_texts, train_trg_texts = [], [] users = sorted(user_src_texts.keys()) print("Creating dataset...") for i, user in enumerate(users): if loo is not None and i == loo: print("Leave user {} out".format(user)) continue train_src_texts += user_src_texts[user] train_trg_texts += user_trg_texts[user] train_src_texts = words_to_indices(train_src_texts, src_vocabs, mask=mask) train_trg_texts = words_to_indices(train_trg_texts, trg_vocabs, mask=mask) dev_src_texts = words_to_indices(dev_src_texts, src_vocabs, mask=mask) dev_trg_texts = words_to_indices(dev_trg_texts, trg_vocabs, mask=mask) print("Num train data {}, num test data {}".format(len(train_src_texts), len(dev_src_texts))) Vs = len(src_vocabs) Vt = len(trg_vocabs) print(f"Source vocab len: {Vs}, Target vocab length: {Vt}") print("Building NMT model...") model = build_nmt_model(Vs=Vs, Vt=Vt, mask=mask, drop_p=drop_p, h=h, demb=emb_h, tied=tied, l2_ratio=l2_ratio, rnn_fn=rnn_fn) src_input_var, trg_input_var = model.inputs prediction = model.output trg_label_var = K.placeholder((None, None), dtype='float32') loss = K.sparse_categorical_crossentropy(trg_label_var, prediction, from_logits=True) loss = K.mean(K.sum(loss, axis=-1)) if optim_fn == 'adam': optimizer = Adam(learning_rate=lr, clipnorm=5.) elif optim_fn == 'mom_sgd': optimizer = SGD(learning_rate=lr, momentum=0.9) else: raise ValueError(optim_fn) updates = optimizer.get_updates(loss, model.trainable_weights) train_fn = K.function(inputs=[ src_input_var, trg_input_var, trg_label_var, K.learning_phase() ], outputs=[loss], updates=updates) pred_fn = K.function(inputs=[ src_input_var, trg_input_var, trg_label_var, K.learning_phase() ], outputs=[loss]) # pad batches to same length train_prop = 0.2 batches = [] for batch in group_texts_by_len(train_src_texts, train_trg_texts, bs=batch_size): src_input, trg_input = batch src_input = pad_texts(src_input, src_vocabs['<eos>'], mask=mask) trg_input = pad_texts(trg_input, trg_vocabs['<eos>'], mask=mask) batches.append((src_input, trg_input)) print(f"Number of batches: {len(batches)}\nFirst batch: {batches[0]}") print("Training NMT model...") for epoch in range(num_epochs): print(f"On epoch {epoch} of training...") np.random.shuffle(batches) for batch in batches: src_input, trg_input = batch _ = train_fn([src_input, trg_input[:, :-1], trg_input[:, 1:], 1])[0] train_loss, train_it = get_perp(train_src_texts, train_trg_texts, pred_fn, shuffle=True, prop=train_prop) test_loss, test_it = get_perp(dev_src_texts, dev_trg_texts, pred_fn) print( "Epoch {}, train loss={:.3f}, train perp={:.3f}, test loss={:.3f}, test perp={:.3f}" .format(epoch, train_loss / len(train_src_texts) / train_prop, np.exp(train_loss / train_it), test_loss / len(dev_src_texts), np.exp(test_loss / test_it))) # if cross_domain: # fname = 'europal_nmt{}'.format('' if loo is None else loo) # else: fname = 'sated_nmt{}'.format('' if loo is None else loo) if ablation: fname = 'ablation_' + fname if 0. < user_data_ratio < 1.: fname += '_dr{}'.format(user_data_ratio) if sample_user: fname += '_shadow_exp{}_{}'.format(exp_id, rnn_fn) np.savez( MODEL_PATH + 'shadow_users{}_{}_{}_{}.npz'.format( exp_id, rnn_fn, num_users, 'cd' if cross_domain else ''), users) print( f"Shadow model {exp_id} saved to {MODEL_PATH + 'shadow_users{}_{}_{}_{}.npz'.format(exp_id, rnn_fn, num_users, 'cd' if cross_domain else '')}." ) model.save(MODEL_PATH + '{}_{}.h5'.format(fname, num_users)) print( f"Target model saved to {MODEL_PATH + '{}_{}.h5'.format(fname, num_users)}." ) K.clear_session()
class actor_critic: def __init__(self, input_dim, output_dim): self.input_dim = input_dim self.out_dim = output_dim self.gamma = 0.99 # self.actor_critic = shared_model(self.input_dim, self.out_dim) self.actor_critic = shared_model_conv(self.input_dim, self.out_dim) # for l in self.actor_critic.layers: # print(l.name, l.trainable) # self.actor, self.critic = each_model_conv(self.input_dim, self.out_dim) # self.actor_critic.add_loss(optimize_func) # self.actor_critic.compile(optimizer='adam', loss={'actor': optimize_actor_func, 'critic': optimize_critic_func}) # self.adam_optimizer = RMSprop(lr=0.0001, rho=0.99, epsilon=0.01) self.adam_optimizer = Adam(lr=7e-4) self.opt = self.optimizer() # self.actor_opt = self.actor_optimizer() # self.critic_opt = self.critic_optimizer() def optimizer(self): """ Actor Optimization: Advantages + Entropy term to encourage exploration (Cf. https://arxiv.org/abs/1602.01783) """ actor, critic = self.actor_critic(self.actor_critic.input) action = K.placeholder(shape=(None, self.out_dim)) advantages = K.placeholder(shape=(None, )) weighted_actions = K.sum(action * actor, axis=1) eligibility = K.log(weighted_actions + 1e-10) * K.stop_gradient(advantages) entropy = K.sum(actor * K.log(actor + 1e-10), axis=1) entropy = K.mean(entropy) actor_loss = 1.0e-3 * entropy - K.mean(eligibility) # actor_loss = 1.0e-4 * entropy - K.cast(K.sum(eligibility), 'float32') discounted_reward = K.placeholder(shape=(None, 1)) # critic_loss = K.mean(K.square(discounted_reward - critic)) critic_loss = K.mean(K.square(discounted_reward - critic)) # loss = actor_loss + 0.5 * critic_loss # updates = self.adam_optimizer.get_updates(loss=loss, params=self.actor_critic.trainable_weights) # return K.function(inputs=[self.actor_critic.input, action, advantages, discounted_reward], \ # outputs=loss, updates=updates) updates = self.adam_optimizer.get_updates( loss=[actor_loss, critic_loss], params=self.actor_critic.trainable_weights) return K.function(inputs=[self.actor_critic.input, action, advantages, discounted_reward], \ outputs=[actor_loss, critic_loss], updates=updates) def actor_optimizer(self): # actor, _ = self.actor_critic(self.actor_critic.input) # self.actor_critic.layers[-1].trainable = False # self.actor_critic.layers[-2].trainable = True actor = self.actor(self.actor.input) action = K.placeholder(shape=(None, self.out_dim)) advantages = K.placeholder(shape=(None, )) weighted_actions = K.sum(action * actor, axis=1) eligibility = K.log(weighted_actions + 1e-10) * K.stop_gradient(advantages) entropy = K.sum(actor * K.log(actor + 1e-10), axis=1) entropy = K.sum(entropy) actor_loss = 1.0e-3 * entropy - K.sum(eligibility) # actor_loss = 1.0e-3 * entropy - K.mean(eligibility) # updates = self.adam_optimizer.get_updates(loss=[actor_loss], params=self.actor_critic.trainable_weights) # return K.function([self.actor_critic.input, action, advantages], [actor_loss], updates=updates) updates = self.adam_optimizer.get_updates( loss=[actor_loss], params=self.actor.trainable_weights) return K.function([self.actor.input, action, advantages], [actor_loss], updates=updates) def critic_optimizer(self): # _, critic = self.actor_critic(self.actor_critic.input) # self.actor_critic.layers[-1].trainable = True # self.actor_critic.layers[-2].trainable = False critic = self.critic(self.critic.input) discounted_reward = K.placeholder(shape=(None, )) critic_loss = K.sum( K.mean(K.square(discounted_reward - critic), axis=1)) # updates = self.adam_optimizer.get_updates(loss=[critic_loss], params=self.actor_critic.trainable_weights) # return K.function([self.actor_critic.input, discounted_reward], [critic_loss], updates=updates) updates = self.adam_optimizer.get_updates( loss=[critic_loss], params=self.critic.trainable_weights) return K.function([self.critic.input, discounted_reward], [critic_loss], updates=updates) def train_models(self, states, actions, rewards): """ Update actor and critic networks from experience """ # Compute discounted rewards and Advantage (TD. Error) # reshape channel states = np.array(states) # states = states.reshape(states.shape[0], 4, 84*84) # states = np.swapaxes(states, 1, 2) discounted_rewards = self.discount(rewards) policy, values = self.actor_critic.predict_on_batch(np.array(states)) # policy, values = self.actor.predict_on_batch(np.array(states)), self.critic.predict_on_batch(np.array(states)) # print(actions) advantages = np.array(discounted_rewards) - np.reshape( values, len(values)) # result = self.actor_critic.train_on_batch(states, {'actor':np.array(actions), 'critic': discounted_rewards}) # return result[1:] # actor_loss = self.actor_opt([states, np.array(actions), advantages]) # critic_loss = self.critic_opt([states, discounted_rewards]) # return actor_loss, critic_loss return self.opt([ states, np.array(actions), advantages, discounted_rewards.reshape(len(discounted_rewards), 1) ]) def discount(self, r): """ Compute the gamma-discounted rewards over an episode """ discounted_r, cumul_r = np.zeros_like(r), 0 for t in reversed(range(0, len(r))): cumul_r = r[t] + cumul_r * self.gamma discounted_r[t] = cumul_r return discounted_r
def __init__(self, state_size, action_size, reservoir_size=1000, spectral_radius=0.99, n_drop=0, leak=0.1, reservoir_scale=1.2, connection_probability=0.1, noise_level=0.01, print=True): self.input_size = state_size self.reservoir_size = reservoir_size self.action_size = action_size self.leak = leak self.reservoir_state = np.zeros(reservoir_size) self.reservoir_scale = reservoir_scale self.third_layer_buffer = np.zeros(10) # init input weights sampled from uniform random numbers between -1 to 1 self.input_weights = np.random.uniform(-1, 1, size=(reservoir_size, state_size)) # init reservoir neuron recurrent weights variance = 1 / (connection_probability * reservoir_size) self.recurrent_weights = np.random.normal(loc=0, scale=variance, size=(reservoir_size, reservoir_size)) # init multi layer neural network (mlnn) input = Input(batch_shape=(None, reservoir_size + state_size)) layer_1 = (Dense(100, activation='tanh', input_dim=reservoir_size + state_size, kernel_initializer=RandomNormal( mean=0.0, stddev=np.sqrt(0.01 / 100))))(input) layer_2 = (Dense(40, activation='tanh', kernel_initializer=RandomNormal( mean=0.0, stddev=np.sqrt(0.01 / 40))))(layer_1) layer_3 = (Dense(10, activation='tanh', name='third_layer', kernel_initializer=RandomNormal( mean=0.0, stddev=np.sqrt(0.01 / 10))))(layer_2) actions = (Dense(action_size, activation='tanh', name='actions'))(layer_3) value = (Dense(1, activation='linear', name='value'))(layer_3) actions_and_value = concatenate([actions, value]) # we need two outputs: first, the third layer output with actor and critic and the second last layer output for feedback self.mlnn = Model(inputs=input, outputs=actions_and_value) outputs = [layer_3, actions_and_value] self.predict = K.function( [self.mlnn.input, K.learning_phase()], outputs) # the train operator target_placeholder = Input(batch_shape=(None, action_size + 1)) loss = mean_squared_error(self.mlnn.output, target_placeholder) optimizer = Adam(lr=0.001) train_op = optimizer.get_updates(params=self.mlnn.trainable_weights, loss=loss) self._train = K.function(inputs=[self.mlnn.input, target_placeholder], outputs=[self.mlnn.output, loss], updates=[train_op]) # init the feedback weights from third layer of mlnn to reservoir, uniform random numbers between 0 to 1 self.fb_weights = np.random.uniform(-1, 1, size=(reservoir_size, 10))