def optimizer_step(optimizer, before, change1, max_part, change2, after, pos_embeddings, sentence_embeddings, attention_mask, temperature, embedding_map, model): """Optimize the sentence towards the target activation. Args: optimizer: The optimizer to be used. before: The tensor for everything before the modifyable content. change1: Modifyable content before the word to be maximized for. max_part: The static tensor around the word to be maximized for. change2: Modifyable content after the word to be maximized for. after: The tensor for everything after the modifiable content. pos_embeddings: The positional embeddings used for inference. sentence_embeddings: The sentence embeddings for inference. attention_mask: The attention mask used for inference. temperature: The temperature used for making the softmax spike. embedding_map: Holding all the token embeddings for BERT. model: Model to run inference on. Returns: max_values: The maximal values for the current token representations. token_ids: The token ids of the current representation. prediction: The current prediction score of the word to be maximized. """ # Reset the gradient optimizer.zero_grad() # Softmax over the one-hots one_hots_sm_1 = one_hots_helper.softmax_one_hots(change1, temperature, FLAGS.gumbel) one_hots_sm_2 = one_hots_helper.softmax_one_hots(change2, temperature, FLAGS.gumbel) fused_one_hots = torch.cat([before, one_hots_sm_1, max_part, one_hots_sm_2, after], dim=1) # Get the prediction prediction_score = inference_helper.run_inference_mlm( fused_one_hots, pos_embeddings, sentence_embeddings, attention_mask, embedding_map, model) prediction = get_prediction(prediction_score, FLAGS.maximize_word, FLAGS.maximize_id, FLAGS.normalize) # Calculate the loss as an inverse activation of the layer to be optimised for # (adam wants to minimize this value, we want to maximize it) loss = -prediction # Backpropagate the loss loss.backward(retain_graph=True) # Optimize the word vector based on that lossone_hotsone_hots optimizer.step() # Get the actual tokens and distances to the embedding for this modified # embedding one_hots_sm_1 = one_hots_helper.softmax_one_hots(change1, temperature, FLAGS.gumbel) one_hots_sm_2 = one_hots_helper.softmax_one_hots(change2, temperature, FLAGS.gumbel) fused_one_hots = torch.cat([before, one_hots_sm_1, max_part, one_hots_sm_2, after], dim=1) max_values, token_ids = one_hots_helper.get_tokens_from_one_hots( fused_one_hots) return max_values, token_ids, prediction
def step_towards_activation(optimizer, before, modify, after, pos_embeddings, sentence_embeddings, att_mask, temperature, iteration, gumbel, write_top_k, k_value, data, word_id, neuron_id, layer_id, modify_start, modify_end, tokenizer, embedding_map, model, target_activation): """Optimize the sentence towards the target activation. Args: optimizer: The optimizer to be used. before: The tensor for everything before the modifyable content. modify: The tensor of the modifyable content. after: The tensor for everything after the modifiable content. pos_embeddings: The positional embeddings used for inference. sentence_embeddings: The sentence embeddings for inference. att_mask: The attention mask used for inference. temperature: The temperature used for making the softmax spike. iteration: Current iteration number of the optimization process. gumbel: Whether to use gumbel noise. write_top_k: Whether to write the top-rated tokens per iteration. k_value: How many tokens to write to top_k. data: Placeholder for the top_k data. word_id: Word to get the activation for. neuron_id: Neuron to get the activation for. layer_id: Layer to get the activation for. modify_start: The start index of the modifiable content. modify_end: The end index of the modifyable content. tokenizer: Used for converting between tokens and ids. embedding_map: Holding all the token embeddings for BERT. model: Model to run inference on. target_activation: The activation we are aiming towards. Returns: max_values: The maximal values for the current token representations. token_ids: The token ids of the current representation. loss: The current loss towards the target activation. """ # Reset the gradient optimizer.zero_grad() # Softmax over the one-hots one_hots_sm = one_hots_helper.softmax_one_hots(modify, temperature, gumbel) fused_one_hots = torch.cat([before, one_hots_sm, after], dim=1) # Check if top_k should be written if write_top_k: output_helper.write_top_ks(fused_one_hots, k_value, iteration, data, modify_start, modify_end, tokenizer) # Get the activation layer_activations = inference_helper.run_inference(before, one_hots_sm, after, pos_embeddings, sentence_embeddings, att_mask, embedding_map, model) activation = activation_helper.get_activations(layer_activations, word_id, neuron_id, layer_id) # Calculate the loss as an inverse activation of the layer to be optimised for # (adam wants to minimize the training loss, we want to maximize the # activation) loss = F.mse_loss(activation, target_activation) # Backpropagate the loss loss.backward(retain_graph=True) # Optimize the word vector based on that loss optimizer.step() # Get the actual tokens and distances to the embedding for this modified # embedding one_hots_sm = one_hots_helper.softmax_one_hots(modify, temperature, gumbel) fused_one_hots = torch.cat([before, one_hots_sm, after], dim=1) max_values, token_ids = one_hots_helper.get_tokens_from_one_hots( fused_one_hots) return max_values, token_ids, loss
def deep_dream(data, results, params, device, tokenizer, embedding_map, model): """Iteratively modifying the embedding using gradient descent. Args: data: Holds the top-k values. results: Holds the results of the run. params: Holds the parameters of the run. device: The device to store the variables on. tokenizer: The tokenizer to transform the input. embedding_map: Holding all token embeddings. model: The model that should dream. """ # An embedding for the tokens is obtained tokens = tokenization_helper.tokenize_input_sentence( tokenizer, FLAGS.sentence, FLAGS.sentence2) tokens_tensor, segments_tensor = tokenization_helper.tensors_from_tokens( tokenizer, tokens, device) _, pos_embeddings, sentence_embeddings = embeddings_helper.get_embeddings( tokens_tensor, segments_tensor, model) # Correct the end of the dream if necessary if FLAGS.dream_end == 0: FLAGS.dream_end = len(tokens) - 2 # Write the parameters to a file output_helper.get_params(params, FLAGS, tokens, embedding_ana=FLAGS.embedding_analysis) # Get the smooth one-hot vector that is to be optimized, split into static and # modifiable parts before, modify, after = one_hots_helper.get_one_hots( tokens_tensor.data.cpu().numpy(), FLAGS.dream_start, FLAGS.dream_end, device) # Obtain the default attention mask to be able to run the model attention_mask = attention_mask_helper.get_attention_mask(tokens_tensor) # The optimizer used to modify the input embedding optimizer = torch.optim.Adam([modify], lr=FLAGS.learning_rate) # Init temperature for Gumbel temperature = torch.tensor(FLAGS.start_temp, device=device, requires_grad=False) # Obtain the properties of the initial embedding one_hots_sm = one_hots_helper.softmax_one_hots(modify, temperature, FLAGS.gumbel) max_values, tokens_ids = one_hots_helper.get_tokens_from_one_hots( torch.cat([before, one_hots_sm, after], dim=1)) numpy_max_values = max_values.data.cpu().numpy() ids = tokens_ids.data.cpu().numpy()[0] tokens = tokenizer.convert_ids_to_tokens(ids) ids_activation = activation_helper.get_ids_activation(ids, pos_embeddings, sentence_embeddings, attention_mask, FLAGS.dream_start, FLAGS.dream_end, FLAGS.word_id, FLAGS.neuron_id, FLAGS.layer_id, FLAGS.normalize, embedding_map, model, device, average=True) output_helper.init_results(results) # Optimize the embedding for i iterations and update the properties to # evaluate the result in each step for i in range(FLAGS.num_iterations): max_vals, tokens_ids, activation, emb_tok, emb_act = optimizer_step( optimizer, before, modify, after, pos_embeddings, sentence_embeddings, attention_mask, temperature, i, data, tokenizer, embedding_map, model, device) # Write the properties of the last step if (i % FLAGS.metrics_frequency) == 0: output_helper.get_metrics(tokens, i, temperature, numpy_max_values, results, activation=activation, ids_activation=ids_activation, emb_tokens=emb_tok, emb_activation=emb_act, emb_ana=FLAGS.embedding_analysis, iterations=FLAGS.num_iterations) # Set the numpy max values numpy_max_values = max_vals.data.cpu().numpy() # Obtain the activation property for the id-array that would result from the # optimization ids = tokens_ids.data.cpu().numpy()[0] tokens = tokenizer.convert_ids_to_tokens(ids) # Calculate the activation using the highest scoring words ids_activation = activation_helper.get_ids_activation( ids, pos_embeddings, sentence_embeddings, attention_mask, FLAGS.dream_start, FLAGS.dream_end, FLAGS.word_id, FLAGS.neuron_id, FLAGS.layer_id, FLAGS.normalize, embedding_map, model, device, average=True) # Check if the temperature needs to decrease if i > FLAGS.warmup: temperature = torch.clamp(temperature * FLAGS.anneal, FLAGS.end_temp) # Calculate the final activation just as before, but without backprop if (FLAGS.num_iterations % FLAGS.metrics_frequency) == 0: with torch.no_grad(): one_hots_sm = one_hots_helper.softmax_one_hots( modify, temperature, FLAGS.gumbel) fused_one_hots = torch.cat([before, one_hots_sm, after], dim=1) if FLAGS.write_top_k: output_helper.get_top_ks(fused_one_hots, FLAGS.k, FLAGS.num_iterations, data, FLAGS.dream_start, FLAGS.dream_end, tokenizer, activation=activation) layer_activations = inference_helper.run_inference( before, one_hots_sm, after, pos_embeddings, sentence_embeddings, attention_mask, embedding_map, model) activation = activation_helper.get_activation( layer_activations, FLAGS.word_id, FLAGS.neuron_id, FLAGS.layer_id, FLAGS.normalize) emb_tok, emb_act = embeddings_helper.analyze_current_embedding( fused_one_hots, embedding_map, FLAGS.dream_start, FLAGS.dream_end, device, pos_embeddings, sentence_embeddings, attention_mask, model, FLAGS.word_id, FLAGS.neuron_id, FLAGS.layer_id, FLAGS.normalize, tokenizer) output_helper.get_metrics(tokens, FLAGS.num_iterations, temperature, numpy_max_values, results, activation=activation, ids_activation=ids_activation, emb_tokens=emb_tok, emb_activation=emb_act, emb_ana=FLAGS.embedding_analysis, iterations=FLAGS.num_iterations)
def optimizer_step(optimizer, before, modify, after, pos_embeddings, sentence_embeddings, attention_mask, temperature, iteration, data, tokenizer, embedding_map, model, device): """Make a step along the gradient of the optimizer. Args: optimizer: The optimizer that is used for gradient decent. before: Embeddings of everything up to the modifyable content. modify: Embeddings of the modifyable content. after: Embeddings of everything after the modifyable content. pos_embeddings: Positional embeddings of the current sequence. sentence_embeddings: Sentence embeddings of the current sequence. attention_mask: Attention mask to be used with the current sequence. temperature: Current temperature of the softmax function. iteration: Current iteration of the optimization. data: Top-k data to be written after optimization. tokenizer: Converts between tokens and their ids. embedding_map: Holding the embeddings for each token. model: The model to be used with this optimization. device: Where to store the variables. Returns: max_values: The values of the tokens with the highest softmax value. token_ids: The ids of the tokens with the highest softmax value. activation: The activation of the current input representation. emb_tokens: The tokens of the closest embedding representing real tokens. emb_activation: Activation for closest embedding representing real tokens. """ # Reset the gradient optimizer.zero_grad() # Softmax over the one-hots one_hots_sm = one_hots_helper.softmax_one_hots(modify, temperature, FLAGS.gumbel) fused_one_hots = torch.cat([before, one_hots_sm, after], dim=1) # Check if the embedding analysis is to be done emb_tokens = None emb_activation = None if FLAGS.embedding_analysis != 0: if iteration % FLAGS.embedding_analysis == 0: tok, act = embeddings_helper.analyze_current_embedding( fused_one_hots, embedding_map, FLAGS.dream_start, FLAGS.dream_end, device, pos_embeddings, sentence_embeddings, attention_mask, model, FLAGS.word_id, FLAGS.neuron_id, FLAGS.layer_id, FLAGS.normalize, tokenizer) emb_tokens = tok emb_activation = act # Get the activation layer_activations = inference_helper.run_inference(before, one_hots_sm, after, pos_embeddings, sentence_embeddings, attention_mask, embedding_map, model) activation = activation_helper.get_activation(layer_activations, FLAGS.word_id, FLAGS.neuron_id, FLAGS.layer_id, FLAGS.normalize) # Check if top_k should be written if FLAGS.write_top_k: output_helper.get_top_ks(fused_one_hots, FLAGS.k, iteration, data, FLAGS.dream_start, FLAGS.dream_end, tokenizer, activation=activation) # Calculate the loss as an inverse activation of the layer to be optimised for # (adam wants to minimize this value, we want to maximize it) loss = -activation # Backpropagate the loss loss.backward(retain_graph=True) # Optimize the word vector based on that loss optimizer.step() # Get the actual tokens and distances to the embedding for this modified # embedding one_hots_sm = one_hots_helper.softmax_one_hots(modify, temperature, FLAGS.gumbel) fused_one_hots = torch.cat([before, one_hots_sm, after], dim=1) max_values, token_ids = one_hots_helper.get_tokens_from_one_hots( fused_one_hots) return max_values, token_ids, activation, emb_tokens, emb_activation
def deep_dream(data, results, params, device, tokenizer, embedding_map, model): """Deep dream to a target activation. Args: data: Holds the top-k values. results: Holds the results of the run. params: Holds the parameters of the run. device: Where to place new variables. tokenizer: Used to convert between ids and tokens. embedding_map: Holding all BERT token embeddings. model: The model used for this dream. """ # An embedding for the tokens is obtained tokens = tokenization_helper.tokenize_input_sentence( tokenizer, FLAGS.sentence, FLAGS.sentence2) tokens_tensor, segments_tensor = tokenization_helper.tensors_from_tokens( tokenizer, tokens, device) _, pos_embeddings, sentence_embeddings = embeddings_helper.get_embeddings( tokens_tensor, segments_tensor, model) # Correct the end of the dream if necessary if FLAGS.dream_end == 0: FLAGS.dream_end = len(tokens) - 2 # Write the parameters to a file output_helper.get_params(params, FLAGS, tokens) # Get the smooth one-hot vector that is to be optimized, split into static and # modifiable parts before, modify, after = one_hots_helper.get_one_hots( tokens_tensor.data.cpu().numpy(), FLAGS.dream_start, FLAGS.dream_end, device) modify = torch.randn(modify.shape, device=device, requires_grad=True) # Obtain the default attention mask to be able to run the model att_mask = attention_mask_helper.get_attention_mask(tokens_tensor) # The optimizer used to modify the input embedding optimizer = torch.optim.Adam([modify], lr=FLAGS.learning_rate) # Init temperature for Gumbel temperature = torch.tensor(FLAGS.start_temp, device=device, requires_grad=False) # Obtain the target activation we try to optimize towards. target_ids = tokens_tensor.data.cpu().numpy()[0] target_activation = activation_helper.get_ids_activation( target_ids, pos_embeddings, sentence_embeddings, att_mask, FLAGS.dream_start, FLAGS.dream_end, FLAGS.word_id, FLAGS.neuron_id, FLAGS.layer_id, False, embedding_map, model, device) target_activation = change_target_activation(target_activation, device) target_activation = target_activation.clone().detach().requires_grad_( False) # Obtain the properties of the initial embedding one_hots_sm = one_hots_helper.softmax_one_hots(modify, temperature, FLAGS.gumbel) max_values, token_ids = one_hots_helper.get_tokens_from_one_hots( torch.cat([before, one_hots_sm, after], dim=1)) numpy_max_values = max_values.data.cpu().numpy() ids = token_ids.data.cpu().numpy()[0] tokens = tokenizer.convert_ids_to_tokens(ids) ids_activation = activation_helper.get_ids_activation( ids, pos_embeddings, sentence_embeddings, att_mask, FLAGS.dream_start, FLAGS.dream_end, FLAGS.word_id, FLAGS.neuron_id, FLAGS.layer_id, False, embedding_map, model, device) # Write the initial stuff for the results file output_helper.init_results(results) # Optimize the embedding for i iterations and update the properties to # evaluate the result in each step for i in range(FLAGS.num_iterations): # Do an optimization step max_vals, token_ids, loss = optimization_helper.step_towards_activation( optimizer, before, modify, after, pos_embeddings, sentence_embeddings, att_mask, temperature, i, FLAGS.gumbel, FLAGS.write_top_k, FLAGS.k, data, FLAGS.word_id, FLAGS.neuron_id, FLAGS.layer_id, FLAGS.dream_start, FLAGS.dream_end, tokenizer, embedding_map, model, target_activation) # Write the properties of the last step ids_loss = F.mse_loss(ids_activation, target_activation) if (i % FLAGS.metrics_frequency) == 0: output_helper.get_metrics(tokens, i, temperature, numpy_max_values, results, loss=loss, ids_loss=ids_loss) # Set the numpy max values numpy_max_values = max_vals.data.cpu().numpy() # Obtain the activation property for the id-array that would result from the # optimization ids = token_ids.data.cpu().numpy()[0] tokens = tokenizer.convert_ids_to_tokens(ids) # Calculate the activation using the highest scoring words ids_activation = activation_helper.get_ids_activation( ids, pos_embeddings, sentence_embeddings, att_mask, FLAGS.dream_start, FLAGS.dream_end, FLAGS.word_id, FLAGS.neuron_id, FLAGS.layer_id, False, embedding_map, model, device) # Check if the temperature needs to decrease if i > FLAGS.warmup: temperature = torch.clamp(temperature * FLAGS.anneal, FLAGS.end_temp) # Calculate the final activation just as before, but without backprop if (FLAGS.num_iterations % FLAGS.metrics_frequency) == 0: with torch.no_grad(): one_hots_sm = one_hots_helper.softmax_one_hots( modify, temperature, FLAGS.gumbel) fused_one_hots = torch.cat([before, one_hots_sm, after], dim=1) if FLAGS.write_top_k: output_helper.write_top_ks(fused_one_hots, FLAGS.k, FLAGS.num_iterations, data, FLAGS.dream_start, FLAGS.dream_end, tokenizer) layers = inference_helper.run_inference(before, one_hots_sm, after, pos_embeddings, sentence_embeddings, att_mask, embedding_map, model) activation = activation_helper.get_activations( layers, FLAGS.word_id, FLAGS.neuron_id, FLAGS.layer_id) loss = F.mse_loss(activation, target_activation) ids_loss = F.mse_loss(ids_activation, target_activation) output_helper.get_metrics(tokens, FLAGS.num_iterations, temperature, numpy_max_values, results, loss=loss, ids_loss=ids_loss)
def deep_dream(results, params, device, tokenizer, embedding_map, model): """Deep dream to maximally activate the class probability for a token. Args: results: Holds the results of the run. params: Holds the parameters of the run. device: The device to store the variables on. tokenizer: The tokenizer to transform the input. embedding_map: Holding all token embeddings. model: The model that should dream. """ # An embedding for the tokens is obtained tokens = tokenization_helper.tokenize_input_sentence( tokenizer, FLAGS.sentence, FLAGS.sentence2, mask_word=FLAGS.maximize_word) tokens_tensor, segments_tensor = tokenization_helper.tensors_from_tokens( tokenizer, tokens, device) _, pos_embeddings, sentence_embeddings = embeddings_helper.get_embeddings( tokens_tensor, segments_tensor, model.bert) # Write the parameters to a file output_helper.get_params_mlm(params, FLAGS, tokens) # Get the smooth one-hot vector that is to be optimized, split into static and # modifiable parts before, change1, max_part, change2, after = one_hots_helper.get_one_hots_mlm( tokens_tensor.data.cpu().numpy(), FLAGS.dream_before_start, FLAGS.dream_before_end, FLAGS.dream_after_start, FLAGS.dream_after_end, device) # Obtain the default attention mask to be able to run the model attention_mask = attention_mask_helper.get_attention_mask(tokens_tensor) # The optimizer used to modify the input embedding optimizer = torch.optim.Adam([change1, change2], lr=FLAGS.learning_rate) # Init temperature for Gumbel temperature = torch.tensor(FLAGS.start_temp, device=device, requires_grad=False) # Obtain the properties of the initial embedding one_hots_sm_1 = one_hots_helper.softmax_one_hots(change1, temperature, FLAGS.gumbel) one_hots_sm_2 = one_hots_helper.softmax_one_hots(change2, temperature, FLAGS.gumbel) max_values, tokens_ids = one_hots_helper.get_tokens_from_one_hots( torch.cat([before, one_hots_sm_1, max_part, one_hots_sm_2, after], dim=1)) numpy_max_values = max_values.data.cpu().numpy() ids = tokens_ids.data.cpu().numpy()[0] tokens = tokenizer.convert_ids_to_tokens(ids) ids_prediction = get_ids_prediction( ids, pos_embeddings, sentence_embeddings, attention_mask, FLAGS.maximize_word, FLAGS.maximize_id, FLAGS.normalize, embedding_map, model, device, FLAGS.dream_before_start, FLAGS.dream_before_end, FLAGS.dream_after_start, FLAGS.dream_after_end) output_helper.init_results(results) # Optimize the embedding for i iterations and update the properties to # evaluate the result in each step for i in range(FLAGS.num_iterations): max_vals, tokens_ids, prediction = optimizer_step( optimizer, before, change1, max_part, change2, after, pos_embeddings, sentence_embeddings, attention_mask, temperature, embedding_map, model) # Write the properties of the last step if (i % FLAGS.metrics_frequency) == 0: output_helper.get_metrics_mlm( tokens, prediction, ids_prediction, i, temperature, numpy_max_values, results) # Set the numpy max values numpy_max_values = max_vals.data.cpu().numpy() # Obtain the activation property for the id-array that would result from the # optimization ids = tokens_ids.data.cpu().numpy()[0] tokens = tokenizer.convert_ids_to_tokens(ids) # Calculate the activation using the highest scoring words ids_prediction = get_ids_prediction( ids, pos_embeddings, sentence_embeddings, attention_mask, FLAGS.maximize_word, FLAGS.maximize_id, FLAGS.normalize, embedding_map, model, device, FLAGS.dream_before_start, FLAGS.dream_before_end, FLAGS.dream_after_start, FLAGS.dream_after_end) # Check if the temperature needs to decrease if i > FLAGS.warmup: temperature = torch.clamp(temperature * FLAGS.anneal, FLAGS.end_temp) # Calculate the final activation just as before, but without backprop if (FLAGS.num_iterations % FLAGS.metrics_frequency) == 0: with torch.no_grad(): one_hots_sm_1 = one_hots_helper.softmax_one_hots(change1, temperature, FLAGS.gumbel) one_hots_sm_2 = one_hots_helper.softmax_one_hots(change2, temperature, FLAGS.gumbel) fused = torch.cat([before, one_hots_sm_1, max_part, one_hots_sm_2, after], dim=1) prediction_score = inference_helper.run_inference_mlm( fused, pos_embeddings, sentence_embeddings, attention_mask, embedding_map, model) prediction = get_prediction(prediction_score, FLAGS.maximize_word, FLAGS.maximize_id, FLAGS.normalize) output_helper.get_metrics_mlm( tokens, prediction, ids_prediction, FLAGS.num_iterations, temperature, numpy_max_values, results)