def test_beam(self): beams = [Beam(4), Beam(3), Beam(2)] test_sets = [[("aaa", 1), ("bbb", 0), ("ccc", -3)], [("aaa", 1), ("bbb", -3), ("ccc", 0)], [("aaa", 1), ("bbb", -3), ("ccc", 0), ("ddd", 2)]] for (candidates, tests) in zip(beams, test_sets): random.shuffle(tests) for a in tests: candidates.add(*a) tests.sort(key=lambda x: -x[1]) tests = tests[:candidates.budget] ans = candidates.check_balance() self.assertEqual(len(tests), len(ans)) for (a, b) in zip(tests, ans): self.assertTupleEqual(a, b)
def beam_search(self, seqs, lens, B, L, vocab): n_best = 1 K = constant.beam_size beam = [Beam(K, n_best=n_best, global_scorer=GNMTGlobalScorer(),) # min_length=self.min_length, # stepwise_penalty=self.stepwise_penalty, # block_ngram_repeat=self.block_ngram_repeat, # exclusion_tokens=exclusion_tokens) for _ in range(B)] # (1) Run the encoder on the src. src_h, dec_h_t = self.encode(seqs, lens) # (2) Repeat src objects `beam_size` times. # Tile states and inputs K times dec_h_t = tile(dec_h_t, K, dim=1) if self.use_attn: src_h = tile(src_h.contiguous(), K, dim=0) # We use now batch_size x beam_size (same as fast mode) # (3) run the decoder to generate sentences, using beam search. for t in range(L): if all((b.done() for b in beam)): break # (a) Construct batch x beam_size nxt words. # Get all the pending current beam words and arrange for forward. x_t = torch.stack([b.get_current_state() for b in beam])#.t().contiguous() x_t = x_t.view(-1) # (b) Decode and forward y_t, dec_h_t = self.decoder(x_t, dec_h_t, src_h) y_t = y_t.view(B, K, -1) # B, K, V y_t = F.log_softmax(y_t, dim=2) # (c) Advance each beam. select_indices_array = [] # Loop over the batch_size number of beams (beam search per sequence) for j, b in enumerate(beam): b.advance(y_t[j], None) select_indices_array.append( b.get_current_origin() + j * K) select_indices = torch.cat(select_indices_array) dec_h_t = dec_h_t.index_select(1, select_indices) # select correct nodes # (4) Extract sentences from beam. preds = [] for b in beam: scores, ks = b.sort_finished(minimum=n_best) hyps = [] for times, k in ks[:n_best]: hyp, _ = b.get_hyp(times, k) hyps.append(" ".join([vocab.index2word[word.item()] for word in hyp if word.item() not in [constant.eou_idx, constant.pad_idx]])) preds.append(hyps[0]) return preds
def try_all_pos(self, pos: list, tran: Transformation, gradients, get_embed, candidates: Beam): """ Try all possible positions for trans :param pos: possible positions, a list of (start_pos, end_pos) :param tran: the target transformation :param gradients: the gradients tensor with respect to self.x :param get_embed: a function for getting the embedding of a list of tokens :param candidates: a beam of candidates, will be modified by this methods :return: None """ for (start_pos_ori, end_pos_ori) in pos: if all(self.map_ori2x[i] is not None for i in range(start_pos_ori, end_pos_ori)): start_pos_x = self.map_ori2x[start_pos_ori] # notice that self.map_ori2x[end_pos] can be None, we need to calculate from self.map_ori2x[end_pos - 1] end_pos_x = self.map_ori2x[end_pos_ori - 1] + 1 for new_x in tran.transformer(self.x, start_pos_x, end_pos_x): delta_len = len(new_x) - len(self.x) if get_embed is not None and gradients is not None: old_embedding = get_embed(self.x[start_pos_x:]) # ret_len specifies the ret length (padding if not enought) new_embedding = get_embed( new_x[start_pos_x:min(len(new_x), len(self.x))], ret_len=len(self.x) - start_pos_x) # gradients[start_pos_x:] has shape (len(self.x) - start_pos_x, dim) new_score = self.score + np.sum( gradients[start_pos_x:] * (new_embedding - old_embedding)) else: # we use random sampling new_score = np.random.random() new_map_ori2x = self.map_ori2x[:start_pos_ori] + [None] * ( end_pos_ori - start_pos_ori) + [ p if p is None else p + delta_len for p in self.map_ori2x[end_pos_ori:] ] new_candidate = Candidate(new_x, new_score, new_map_ori2x) candidates.add(new_candidate, new_score)
def gen_adv(self, model, tree, x: list, top_n: int, dataset_vocab, return_score=False): """ Beam search for the perturbation space. The order of beam search is the same in the perturbation DSL. Some adversarial attack tries to rearrange the order of beam search for better performance. TODO: the order of beam search can be rearranged for better performance. :param model: the victim model, which has to support a method get_grad. :param tree: the target tree :param x: a list of input tokens. :param top_n: maximum number of adversarial candidates given the perturbation space. :param dataset_vocab: the vocabulary in the dataset, not the vocabulary used in training :param return_score: whether return the score as a list [(sen, score)], default False, i.e., return [sen] :return: a list of adversarial examples """ try: model.get_grad except: raise AttributeError( "The victim model does not support get_grad method.") meta_data = (x, tree, dataset_vocab) candidate = CandidateTree( tree, x, 0 if not self.use_random_aug else np.random.random()) candidates = Beam(top_n) candidates.add(candidate, candidate.score) for (tran, delta) in self.perturbation: possible_pos = tran.get_pos(x) # get a list of possible positions perturbed_set = set( ) # restore the perturbed candidates to eliminate duplications for _ in range(delta): # keep the old candidates because we will change candidates in the following loop old_candidates = candidates.check_balance() for (candidate, _) in old_candidates: if candidate not in perturbed_set: if len(x) > 0: if self.use_random_aug: candidate.try_all_pos(meta_data, possible_pos, tran, None, model, candidates) else: candidate.try_all_pos( meta_data, possible_pos, tran, model.get_grad(candidate.tree, isinstance(tran, Ins)), model, candidates) perturbed_set.add(candidate) ret = candidates.check_balance() if return_score: return [(x.tree, x.score) for (x, _) in ret] else: return [x.tree for (x, _) in ret]
def gen_adv(self, model, x: list, y, top_n: int, get_embed, return_score=False): """ Beam search for the perturbation space. The order of beam search is the same in the perturbation DSL. Some adversarial attack tries to rearrange the order of beam search for better performance. TODO: the order of beam search can be rearranged for better performance. :param model: the victim model, which has to support a method get_grad. :param x: a list of input tokens. :param y: the correct label of input x. :param top_n: maximum number of adversarial candidates given the perturbation space. :param get_embed: get_embed(x, ret_len) takes a list of tokens as inputs and output the embedding matrix with shape (ret_len, dim). if ret_len > len(x), then padding is needed. if ret_len < len(x), then truncating is needed. (Currently, we do not need truncate) :param return_score: whether return the score as a list [(sen, score)], default False, i.e., return [sen] :return: a list of adversarial examples """ try: model.get_grad except: raise AttributeError( "The victim model does not support get_grad method.") candidate = Candidate( x, 0 if not self.use_random_aug else np.random.random()) candidates = Beam(top_n) candidates.add(candidate, candidate.score) for (tran, delta) in self.perturbation: possible_pos = tran.get_pos(x) # get a list of possible positions perturbed_set = set( ) # restore the perturbed candidates to eliminate duplications for _ in range(delta): # keep the old candidates because we will change candidates in the following loop old_candidates = candidates.check_balance() for (candidate, _) in old_candidates: if candidate not in perturbed_set: if len(candidate.x) > 0: if self.use_random_aug: candidate.try_all_pos(possible_pos, tran, None, None, candidates) else: candidate.try_all_pos( possible_pos, tran, model.get_grad(candidate.x, y), get_embed, candidates) perturbed_set.add(candidate) ret = candidates.check_balance() if return_score: return [(x.x, x.score) for (x, _) in ret] else: return [x.x for (x, _) in ret]
def eval(model, token_tables, num_samples=100, beam_size=10, em=True, num_examples=4): ''' Evaluates the "goodness" of model on num_samples and reports the number which satisfied the criterion. beam_size determines the size of the beam decoder (which keeps the top beam_size scored candidate solutions, and em determines if we should use exact match or consistency as the evalation metric ''' model.eval() num_match = 0 print("Evaluatng Examples...") num_in_beams_consistent = [0] * (beam_size + 1) for idx in range(num_samples): if (idx % 10 == 0): print("On example {}".format(idx)) expected_programs, examples = sample(token_tables) # do not allow generating progams longer than max_len! max_len = len(expected_programs) + 5 # Elements in beam are tuples ([sequence], output_all_hidden, hidden) beam = Beam(beam_size) res_beam = Beam(beam_size) output_all_hidden, hidden = model.encode_io([examples]) beam.add(([model.program_size], output_all_hidden, hidden), 0) iteration = 0 while (len(beam) > 0 and iteration < max_len): next_beam = Beam(beam_size) for elt, score in beam.get_elts_and_scores(): # Get the next probabilities sequence, output_all_hidden, hidden = elt inp_idx = sequence[-1] decoder_input = [ model.decoder_embedding( torch.tensor([inp_idx], device=model.device, dtype=torch.long)) for _ in range(hidden[0].size()[1]) ] probs, output_all_hidden, hidden = model.next_probs( decoder_input, hidden, output_all_hidden) probs = F.log_softmax(probs.squeeze(0), dim=-1) # Add the top beam_size candidates scored, idx = torch.topk(probs, dim=-1, k=beam_size) for next_score, next_idx in zip(scored, idx): if (next_idx == 0): #EOS! next_sequence = copy.deepcopy(sequence) next_sequence.append(next_idx.item()) res_beam.add(next_sequence, score + next_score) else: next_sequence = copy.deepcopy(sequence) next_sequence.append(next_idx.item()) next_beam.add( (next_sequence, output_all_hidden, hidden), score + next_score) beam = next_beam iteration += 1 # Evaluate this beam! this_beam = 0 matched = False for sequence, _ in res_beam.get_elts_and_scores(): sequence = sequence[1:] if (em and sequence == expected_programs): num_match += 1 break elif not em: expected_in = copy.deepcopy(expected_programs) if (num_consistent((expected_in, examples), sequence, token_tables) == num_examples): if (not matched): num_match += 1 matched = True this_beam += 1 num_in_beams_consistent[this_beam] += 1 print("Number of beams with [index] many consistent programs in the beam:") print(num_in_beams_consistent) print('{}\% Accuracy!'.format((num_match / num_samples) * 100))
def beam_search(self, teststring, beam_width=5, clip_len=7, end_token='<end>', start_token='<start>'): encoded_word = self.__encode_string(teststring) beam = Beam(beam_width) beam.add((0.0, False, [self.__label_edge_index], [start_token])) #initialise the beam while True: curr_beam = Beam(beam_width) for (logprob, complete, prefix, labels) in beam: #print(labels) if complete == True: curr_beam.add((logprob, True, prefix, labels)) else: for (next_prob, i, next_word) in self.__distribution( encoded_word, prefix): if next_word == end_token: curr_beam.add( (logprob + next_prob, True, prefix, labels)) else: curr_beam.add((logprob + next_prob, False, prefix + [i], labels + [next_word])) #sorted_beam = sorted(curr_beam) any_removals = False while True: #(best_prob, best_complete, best_prefix, best_labels) = sorted_beam[-1] (best_prob, best_complete, best_prefix, best_labels) = curr_beam.get_best()[0] if best_complete or len(best_prefix) - 1 == clip_len: yield (best_labels[1:], math.exp(best_prob)) curr_beam.remove( (best_prob, best_complete, best_prefix, best_labels)) any_removals = True if curr_beam.is_empty(): break else: break if any_removals: if curr_beam.is_empty(): break else: beam = Beam(beam_width, curr_beam) else: beam = curr_beam
def beam_decode(self, dec_hidden, enc_out, beam_width=3, topk=3): # Start decoding step with <SOS> token and empty input feed, stored in a Beam node dec_input = torch.zeros(1, 1).fill_(2).long().to(self.device) input_feed = torch.zeros(1, 1, enc_out.size(2), device=self.device) node = BeamSearchNode(dec_hidden, None, dec_input, 0, 1, input_feed) # Initialize Beam queue objects and an output list in_nodes = Beam() out_nodes = Beam() endnodes = [] # Feed the input Beam queue with the start token in_nodes.put((node.eval(), node)) # Start Beam search for i in range(self.max_len_sentence): # At each step, keep the beam_width best nodes for i in range(beam_width): # Get the best node in the input Beam queue score, n = in_nodes.get() # Collect the values of the node to decode dec_input = n.wordid dec_hidden = n.hidden input_feed = n.feed # If we find an <EOS> token, then stop the decoding for this Beam if n.wordid.item( ) in self.special_tokens_ids and n.prevnode != None: endnodes.append((score, n)) # Break the loop if we have enough decoded sentences if len(endnodes) >= topk: break else: continue # Decode with the RNN dec_input = self.embedding( dec_input) # (batch size, 1, emb dim) dec_input = torch.cat((dec_input, input_feed), 2) dec_output, dec_hidden = self.decode_rnn( dec_input, dec_hidden, enc_out) out = self.gen(dec_output) # Extract the top K most likely tokens and their log probability (log softmax) log_prob, indexes = torch.topk(out, beam_width) # Create a node for each of the K outputs and score them (sum of log probs div by length of sequence) nextnodes = [] for new_k in range(beam_width): out_t = indexes[0][new_k].view(1, -1) log_p = log_prob[0][new_k].item() node = BeamSearchNode(dec_hidden, n, out_t, n.logp + log_p, n.leng + 1, dec_output) score = node.eval() nextnodes.append((score, node)) # Push the nodes to the output Beam queue for i in range(len(nextnodes)): score, nn = nextnodes[i] out_nodes.put((score, nn)) # Break the loop if the input Beam is empty (only happens with <SOS> token at first step) if len(in_nodes) == 0: break # Fill the input Beam queue with the previously computed output Beam nodes in_nodes = out_nodes out_nodes = Beam() # Stop decoding when we have enough output sequences if len(endnodes) >= topk: break # In the case where we did not encounter a <EOS> token, take the most likely sequences if len(endnodes) == 0: endnodes = [in_nodes.get() for _ in range(topk)] # Now we unpack the sequences in reverse order to retrieve the sentences utterances = [] for score, n in sorted(endnodes, key=operator.itemgetter(0)): utterance = [n.wordid.item()] while n.prevnode != None: n = n.prevnode utterance.append(n.wordid.item()) # Reverse the sentence utterance = utterance[::-1] utterances.append(utterance) return utterances
def beam_search(self, x, max_sum_len, beam_width, len_oovs, x_padding_masks): """Using beam search to generate summary. Args: x (Tensor): Input sequence as the source. max_sum_len (int): The maximum length a summary can have. beam_width (int): Beam size. max_oovs (int): Number of out-of-vocabulary tokens. x_padding_masks (Tensor): The padding masks for the input sequences. Returns: result (list(Beam)): The list of best k candidates. """ # run body_sequence input through encoder. Call encoder forward propagation ########################################### # TODO: module 4 task 2 # ########################################### encoder_output, encoder_states = self.model.encoder( replace_oovs(x, self.vocab), self.model.decoder.embedding) coverage_vector = torch.zeros((1, x.shape[1])).to(self.DEVICE) # initialize decoder states with encoder forward states decoder_states = self.model.reduce_state(encoder_states) # initialize the hypothesis with a class Beam instance. init_beam = Beam([self.vocab.SOS], [0], decoder_states, coverage_vector) # get the beam size and create a list for stroing current candidates # and a list for completed hypothesis k = beam_width curr, completed = [init_beam], [] # use beam search for max_sum_len (maximum length) steps for _ in range(max_sum_len): # get k best hypothesis when adding a new token topk = [] for beam in curr: # When an EOS token is generated, add the hypo to the completed # list and decrease beam size. if beam.tokens[-1] == self.vocab.EOS: completed.append(beam) k -= 1 continue for can in self.best_k(beam, k, encoder_output, x_padding_masks, x, torch.max(len_oovs)): # Using topk as a heap to keep track of top k candidates. # Using the sequence scores of the hypos to campare # and object ids to break ties. add2heap(topk, (can.seq_score(), id(can), can), k) curr = [items[2] for items in topk] # stop when there are enough completed hypothesis if len(completed) == beam_width: break # When there are not engouh completed hypotheses, # take whatever when have in current best k as the final candidates. completed += curr # sort the hypothesis by normalized probability and choose the best one result = sorted(completed, key=lambda x: x.seq_score(), reverse=True)[0].tokens return result
def test_map_ori2x(self): model = TestModel(100) perturb = [(Sub("data/pddb", True), 3), (Del({"floor"}), 2), (Ins(), 1)] sen = "i see that a cat sits on the floor .".split() sub_pos = perturb[0][0].get_pos(sen) del_pos = perturb[1][0].get_pos(sen) ins_pos = perturb[2][0].get_pos(sen) candidate = Candidate(["dummy"], 0, [None] * len(sen)) candidates = Beam(1) candidate.try_all_pos(ins_pos, perturb[2][0], model.get_grad(candidate.x, 0), model.get_embed, candidates) self.assertEqual(0, len(candidates.queue)) map_ori2x = [None] * len(sen) map_ori2x[sen.index("floor")] = 1 candidate = Candidate(["dummy1", "floor", "dummy2"], 0, map_ori2x) # all perturbation can be applied candidates = Beam(1) candidate.try_all_pos(sub_pos, perturb[0][0], model.get_grad(candidate.x, 0), model.get_embed, candidates) self.assertSequenceEqual(["dummy1", "flooring", "dummy2"], candidates.check_balance()[0][0].x) candidates = Beam(1) candidate.try_all_pos(del_pos, perturb[1][0], model.get_grad(candidate.x, 0), model.get_embed, candidates) self.assertSequenceEqual(["dummy1", "dummy2"], candidates.check_balance()[0][0].x) candidates = Beam(1) candidate.try_all_pos(ins_pos, perturb[2][0], model.get_grad(candidate.x, 0), model.get_embed, candidates) self.assertSequenceEqual(["dummy1", "floor", "floor", "dummy2"], candidates.check_balance()[0][0].x)
def try_all_pos(self, meta_data, pos: list, tran: Transformation, gradients, victim_model, candidates: Beam): """ Try all possible positions for trans :param meta_data: (ori, old_tree, vocab) original input tokens, original tree, dataset vocab (not the vocab in training). Difference: dataset vocab contains out of vocab (OOV) words, while vocab in training map OOV to UNK. :param pos: possible positions, a list of (start_pos, end_pos) :param tran: the target transformation :param gradients: the gradients tensor with respect to self.x :param victim_model: the victim model :param candidates: a beam of candidates, will be modified by this methods :return: None """ get_embed = victim_model.get_embed ori, old_tree, vocab = meta_data for (start_pos_ori, end_pos_ori) in pos: if all(self.map_ori2x[i] is not None for i in range(start_pos_ori, end_pos_ori)): start_pos_x = self.map_ori2x[start_pos_ori] # notice that self.map_ori2x[end_pos] can be None, we need to calculate from self.map_ori2x[end_pos - 1] end_pos_x = self.map_ori2x[end_pos_ori - 1] + 1 for new_x in tran.transformer(self.x, start_pos_x, end_pos_x): delta_len = len(new_x) - len(self.x) if isinstance(tran, Del): new_trans_on_pos = self.trans_on_pos[:start_pos_ori] + [ 1 ] + self.trans_on_pos[end_pos_ori:] new_syns_on_pos = copy.copy(self.syns_on_pos) if gradients is not None: old_embedding = get_embed([self.x[start_pos_x] ])[0] # (dim) new_score = self.score + np.sum( (0 - old_embedding) * gradients[start_pos_x]) else: new_score = np.random.random() elif isinstance(tran, Ins): new_trans_on_pos = self.trans_on_pos[:start_pos_ori] + [ 2 ] + self.trans_on_pos[end_pos_ori:] new_syns_on_pos = copy.copy(self.syns_on_pos) if gradients is not None: old_embedding = get_embed([self.x[start_pos_x] ]) # (1, dim) ioux_grads, c_grads = gradients delta_ioux, delta_c = victim_model.model.cal_delta_Ins( old_embedding) new_score = self.score + np.sum( ioux_grads[start_pos_x] * delta_ioux) + np.sum( c_grads[start_pos_x] * delta_c) else: new_score = np.random.random() elif isinstance(tran, Sub): if vocab.get(new_x[start_pos_x], -1) == -1: continue new_trans_on_pos = self.trans_on_pos[:start_pos_ori] + [ 3 ] + self.trans_on_pos[end_pos_ori:] new_syns_on_pos = self.syns_on_pos[:start_pos_ori] + [ new_x[start_pos_x] ] + self.syns_on_pos[end_pos_ori:] if gradients is not None: old_embedding = get_embed([self.x[start_pos_x] ])[0] # (dim) new_embedding = get_embed([new_x[start_pos_x] ])[0] # (dim) new_score = self.score + np.sum( (new_embedding - old_embedding) * gradients[start_pos_x]) else: new_score = np.random.random() else: raise NotImplementedError new_tree = cons_tree(ori, new_trans_on_pos, new_syns_on_pos, old_tree, vocab) new_map_ori2x = self.map_ori2x[:start_pos_ori] + [None] * ( end_pos_ori - start_pos_ori) + [ p if p is None else p + delta_len for p in self.map_ori2x[end_pos_ori:] ] new_candidate = CandidateTree(new_tree, new_x, new_score, new_map_ori2x, new_trans_on_pos, new_syns_on_pos) candidates.add(new_candidate, new_score)