def batched_predict_instances(predictor: Predictor, examples: List[Instance], batch_size: int = 16) -> List[Dict[str, Any]]: results = [] # type: List[Dict[str, Any]] for i in range(0, len(examples), batch_size): batch_examples = examples[i:i + batch_size] batch_results = predictor.predict_batch_instance(batch_examples) results.extend(batch_results) return results
def main(input_file, archive_file, batch_size, cuda_device): model, config = load_archive(archive_file=archive_file, cuda_device=cuda_device) model.eval() dataset_reader = DatasetReader.from_params(config["dataset_reader"]) dataset = dataset_reader.read(input_file) predictor = Predictor(model, dataset_reader) evaluator = Evaluator() with tqdm(desc="Decoding...") as p: for ins in batch(dataset, batch_size): for result in predictor.predict_batch_instance(ins): print(json.dumps(result)) evaluator(result) p.update() print(evaluator.get_metrics(reset=True), file=sys.stderr)
def replace_one_token( predictor: Predictor, instances: List[Instance], reduction_field_name: str, gradient_field_name: str, n_beams: List[int], indices: List[List[int]], replaced_indices: List[List[int]], embedding_weight: np.ndarray, index_to_token: Dict[int, str], max_beam_size: int = 5, ignore_tokens: List[str] = ['@@NULL@@'], ): """ remove one token from each example. each example branches out to at most max_beam_size new beams. we do not do beam verification here. batch structure: > example 0 beam 1 > example 0 beam 2 # n_beams[0] = 2 > example 1 beam 1 # n_beams[1] = 1 > example 2 beam 1 > example 2 beam 2 # n_beams[2] = 2 > # n_beams[3] = 0 """ n_examples = len(n_beams) # not batch size! if 'label' not in instances[0].fields: outputs = predictor.predict_batch_instance(instances) instances = [ predictor.predictions_to_labeled_instances(i, o)[0] for i, o in zip(instances, outputs) ] # one forward-backward pass to get the score of each token in the batch gradients, outputs = predictor.get_gradients(instances) grads = gradients[gradient_field_name] hotflip_grad = np.einsum('bld,kd->blk', grads, embedding_weight) sign = -1 # beams of example_idx: batch[start: start + n_beams[example_idx]] start = 0 new_instances = [] new_n_beams = [0 for _ in range(n_examples)] new_indices = [] new_replaced_indices = [] current_lengths = [ real_sequence_length(x[reduction_field_name], ignore_tokens) for x in instances ] for example_idx in range(n_examples): """ for each example_idx, current beams -> future beams 1. find beam-level reduction candidates 2. merge and sort them to get example-level reduction candidates """ # skip if example_idx exited the search if n_beams[example_idx] == 0: continue # find beam-level candidates candidates = [] # (batch_index i, token j, replacement k) for i in range(start, start + n_beams[example_idx]): field = instances[i][reduction_field_name] # argsort the flattened scores indices_sorted = np.argsort(sign * hotflip_grad[i].ravel()) # unravel into original shape indices_sorted = np.unravel_index(indices_sorted, hotflip_grad[i].shape) indices_sorted = np.stack(indices_sorted, 1) beam_candidates = [ (i, j, k) for j, k in indices_sorted if (j < field.sequence_length() and field.tokens[j].text not in ignore_tokens) ] candidates += beam_candidates[:max_beam_size] # no beam-level candidate found, skip if len(candidates) == 0: start += n_beams[example_idx] continue # gather scores of all example-level candidates # sort them to get example-level candidates candidates = np.asarray(candidates) scores = hotflip_grad[candidates[:, 0], candidates[:, 1], candidates[:, 2]] candidate_scores = sorted(zip(candidates, scores), key=lambda x: sign * x[1]) candidates = [c for c, s in candidate_scores[:max_beam_size]] # each candidate should be a valid token in the beam it belongs assert all(j < current_lengths[i] for i, j, k in candidates) for i, j, k in candidates: new_instance = deepcopy(instances[i]) new_instance[reduction_field_name].tokens = ( new_instance[reduction_field_name].tokens[0:j] + [Token(index_to_token[k])] + new_instance[reduction_field_name].tokens[j + 1:]) new_instance.indexed = False new_n_beams[example_idx] += 1 new_instances.append(new_instance) new_replaced_indices.append(replaced_indices[i] + [indices[i][j]]) new_indices.append(indices[i]) # move starting position to next example start += n_beams[example_idx] return new_instances, new_n_beams, new_indices, new_replaced_indices
def replace_instances( predictor: Predictor, instances: List[Instance], reduction_field_name: str, gradient_field_name: str, probs_field_name: str, embedding_weight: np.ndarray, index_to_token: Dict[int, str], max_beam_size: int = 5, max_replace_steps: int = 5, ignore_tokens: List[str] = ['@@NULL@@'], ): """ original batch > example 0 > example 1 > example 2 > example 3 during reduction, and example 4 already exited the search > example 0 beam 1 > example 0 beam 2 # n_beams[0] = 2 > example 1 beam 1 # n_beams[1] = 1 > example 2 beam 1 > example 2 beam 2 # n_beams[2] = 2 > # n_beams[3] = 0 then each example i beam j branches out to > example i beam j 0 > example i beam j 1 > ... which forms > example i beam j 0 > example i beam j 1 > example i beam j 2 > example i beam k 0 > example i beam k 1 we sort all beams of example i, select the top ones, filter ones that do not retain prediction, go to next step :param predictor: :param instances: :param reduction_field_name: :param gradient_field_name: :param probs_field_name: :param max_beam_size: """ if 'label' not in instances[0].fields: outputs = predictor.predict_batch_instance(instances) instances = [ predictor.predictions_to_labeled_instances(i, o)[0] for i, o in zip(instances, outputs) ] n_examples = len(instances) n_beams = [1 for _ in range(n_examples)] # each example starts with 1 beam indices = [[ i for i, token in enumerate(instance[reduction_field_name]) if token.text not in ignore_tokens ] for instance in instances] replaced_indices = [[] for _ in range(n_examples)] # perturbed instances that do not necessarily flip model prediction final_instances = { i: deepcopy(instance) for i, instance in enumerate(instances) } final_replaced_indices = {i: [] for i in range(n_examples)} # check if prediction is flipped original_instances = deepcopy(instances) for _ in range(max_replace_steps): # all beams are reduced at the same pace # remove one token from each example instances, n_beams, indices, replaced_indices = replace_one_token( predictor, instances, reduction_field_name=reduction_field_name, gradient_field_name=gradient_field_name, n_beams=n_beams, indices=indices, replaced_indices=replaced_indices, embedding_weight=embedding_weight, index_to_token=index_to_token, max_beam_size=max_beam_size, ignore_tokens=ignore_tokens, ) # verify prediction for each beam outputs = predictor.predict_batch_instance(instances) # beams of example_idx: batch[start: start + n_beams[example_idx]] start = 0 new_instances = [] new_indices = [] new_replaced_indices = [] new_n_beams = [0 for _ in range(n_examples)] for example_idx in range(n_examples): for i in range(start, start + n_beams[example_idx]): reduced_prediction = np.argmax(outputs[i][probs_field_name]) original_prediction = original_instances[example_idx][ 'label'].label final_instances[example_idx] = deepcopy(instances[i]) final_replaced_indices[example_idx] = replaced_indices[i] if reduced_prediction == original_prediction: # prediction not flipped yet, keep replacing new_n_beams[example_idx] += 1 new_instances.append(instances[i]) new_indices.append(indices[i]) new_replaced_indices.append(replaced_indices[i]) # move cursor to next example then update the beam count of this example start += n_beams[example_idx] if len(new_instances) == 0: break instances = new_instances n_beams = new_n_beams indices = new_indices replaced_indices = new_replaced_indices return ([final_instances[i] for i in range(n_examples)], [final_replaced_indices[i] for i in range(n_examples)])
def remove_one_token( predictor: Predictor, instances: List[Instance], reduction_field_name: str, gradient_field_name: str, n_beams: List[int], indices: List[List[int]], removed_indices: List[List[int]], token_id_field_name: str = None, embedding_weight: np.ndarray = None, max_beam_size: int = 5, min_sequence_length: int = 1, ignore_tokens: List[str] = ['@@NULL@@'], ): """ remove one token from each example. each example branches out to at most max_beam_size new beams. we do not do beam verification here. batch structure: > example 0 beam 1 > example 0 beam 2 # n_beams[0] = 2 > example 1 beam 1 # n_beams[1] = 1 > example 2 beam 1 > example 2 beam 2 # n_beams[2] = 2 > # n_beams[3] = 0 """ n_examples = len(n_beams) # not batch size! if 'label' not in instances[0].fields: outputs = predictor.predict_batch_instance(instances) instances = [predictor.predictions_to_labeled_instances(i, o)[0] for i, o in zip(instances, outputs)] # one forward-backward pass to get the score of each token in the batch gradients, outputs = predictor.get_gradients(instances) grads = gradients[gradient_field_name] if embedding_weight: token_ids = outputs[token_id_field_name].cpu().numpy() hotflip_grad = np.einsum('bld,kd->blk', grads, embedding_weight) onehot_grad = np.take(hotflip_grad, token_ids) else: onehot_grad = np.einsum('bld,bld->bl', grads, grads) # beams of example_idx: batch[start: start + n_beams[example_idx]] start = 0 new_instances = [] new_n_beams = [0 for _ in range(n_examples)] new_indices = [] new_removed_indices = [] current_lengths = [real_sequence_length(x[reduction_field_name], ignore_tokens) for x in instances] for example_idx in range(n_examples): """ for each example_idx, current beams -> future beams 1. find beam-level reduction candidates 2. merge and sort them to get example-level reduction candidates """ # skip if example_idx exited the search if n_beams[example_idx] == 0: continue # find beam-level candidates candidates = [] # (batch_index i, token j) for i in range(start, start + n_beams[example_idx]): if current_lengths[i] <= min_sequence_length: # nothing to reduce continue field = instances[i][reduction_field_name] beam_candidates = [ (i, j) for j in np.argsort(- onehot_grad[i]) if ( j < field.sequence_length() and field.tokens[j].text not in ignore_tokens ) ] candidates += beam_candidates[:max_beam_size] # no beam-level candidate found, skip if len(candidates) == 0: start += n_beams[example_idx] continue # gather scores of all example-level candidates # sort them to get example-level candidates candidates = np.asarray(candidates) scores = onehot_grad[candidates[:, 0], candidates[:, 1]] candidate_scores = sorted(zip(candidates, scores), key=lambda x: -x[1]) candidates = [c for c, s in candidate_scores[:max_beam_size]] # each candidate should be a valid token in the beam it belongs assert all(j < current_lengths[i] for i, j in candidates) for i, j in candidates: new_instance = deepcopy(instances[i]) new_instance[reduction_field_name].tokens = ( new_instance[reduction_field_name].tokens[0: j] + new_instance[reduction_field_name].tokens[j + 1:] ) new_instance.indexed = False new_n_beams[example_idx] += 1 new_instances.append(new_instance) new_removed_indices.append(removed_indices[i] + [indices[i][j]]) new_indices.append(indices[i][:j] + indices[i][j + 1:]) # move starting position to next example start += n_beams[example_idx] return new_instances, new_n_beams, new_indices, new_removed_indices
def reduce_instances( predictor: Predictor, instances: List[Instance], reduction_field_name: str, gradient_field_name: str, probs_field_name: str, token_id_field_name: str = None, embedding_weight: np.ndarray = None, max_beam_size: int = 5, prob_threshold: float = -1, min_sequence_length: int = 1, ignore_tokens: List[str] = ['@@NULL@@'], ): """ original batch > example 0 > example 1 > example 2 > example 3 during reduction, and example 4 already exited the search > example 0 beam 1 > example 0 beam 2 # n_beams[0] = 2 > example 1 beam 1 # n_beams[1] = 1 > example 2 beam 1 > example 2 beam 2 # n_beams[2] = 2 > # n_beams[3] = 0 then each example i beam j branches out to > example i beam j 0 > example i beam j 1 > ... which forms > example i beam j 0 > example i beam j 1 > example i beam j 2 > example i beam k 0 > example i beam k 1 we sort all beams of example i, select the top ones, filter ones that do not retain prediction, go to next step :param predictor: :param instances: :param reduction_field_name: :param gradient_field_name: :param probs_field_name: :param max_beam_size: :param prob_threshold: """ if 'label' not in instances[0].fields: outputs = predictor.predict_batch_instance(instances) instances = [predictor.predictions_to_labeled_instances(i, o)[0] for i, o in zip(instances, outputs)] n_examples = len(instances) n_beams = [1 for _ in range(n_examples)] # each example starts with 1 beam indices = [[ i for i, token in enumerate(instance[reduction_field_name]) if token.text not in ignore_tokens ] for instance in instances] removed_indices = [[] for _ in range(n_examples)] # keep track of a single shortest reduced versions shortest_instances = {i: deepcopy(x) for i, x in enumerate(instances)} shortest_lengths = { i: real_sequence_length(x[reduction_field_name], ignore_tokens) for i, x in enumerate(instances) } shortest_removed_indices = {} # to make sure predictions remain the same original_instances = deepcopy(instances) while True: # all beams are reduced at the same pace # remove one token from each example instances, n_beams, indices, removed_indices = remove_one_token( predictor, instances, reduction_field_name=reduction_field_name, gradient_field_name=gradient_field_name, n_beams=n_beams, indices=indices, removed_indices=removed_indices, max_beam_size=max_beam_size, min_sequence_length=min_sequence_length, ignore_tokens=ignore_tokens, ) # verify prediction for each beam outputs = predictor.predict_batch_instance(instances) # beams of example_idx: batch[start: start + n_beams[example_idx]] start = 0 new_instances = [] new_indices = [] new_n_beams = [0 for _ in range(n_examples)] new_removed_indices = [] current_lengths = [real_sequence_length(x[reduction_field_name], ignore_tokens) for x in instances] for example_idx in range(n_examples): original_field = original_instances[example_idx][reduction_field_name] original_length = real_sequence_length(original_field, ignore_tokens) for i in range(start, start + n_beams[example_idx]): assert current_lengths[i] + len(removed_indices[i]) == original_length reduced_prediction = np.argmax(outputs[i][probs_field_name]) reduced_score = outputs[i][probs_field_name][reduced_prediction] original_prediction = original_instances[example_idx]['label'].label if ( reduced_prediction == original_prediction and reduced_score >= prob_threshold ): # check if this new valid reduced example is shorter than current # reduced_token_sequence = instances[i][reduction_field_name].tokens if current_lengths[i] < shortest_lengths[example_idx]: shortest_instances[example_idx] = deepcopy(instances[i]) # shortest_token_sequences[example_idx] = [reduced_token_sequence] shortest_removed_indices[example_idx] = removed_indices[i] shortest_lengths[example_idx] = current_lengths[i] # elif ( # current_lengths[i] == shortest_lengths[example_idx] # and reduced_token_sequence not in shortest_token_sequences[example_idx] # ): # shortest_instances[example_idx].append(deepcopy(instances[i])) # shortest_token_sequences[example_idx].append(reduced_token_sequence) # shortest_removed_indices[example_idx].append(removed_indices[i]) if current_lengths[i] <= min_sequence_length: # all beams of an example has the same length # this means all beams of this example has length 1 # do not branch out from this example pass else: # beam valid, but not short enough, keep reducing new_n_beams[example_idx] += 1 new_instances.append(instances[i]) new_indices.append(indices[i]) new_removed_indices.append(removed_indices[i]) # move cursor to next example then update the beam count of this example start += n_beams[example_idx] if len(new_instances) == 0: break instances = new_instances n_beams = new_n_beams indices = new_indices removed_indices = new_removed_indices shortest_instances = [shortest_instances[i] for i in range(n_examples)] shortest_removed_indices = [shortest_removed_indices.get(i, []) for i in range(n_examples)] return shortest_instances, shortest_removed_indices