def process_output(self, output: JsonDict) -> JsonDict:
     pred_sent_orders = output.get('pred_sent_orders', None)
     num_sents = len(output['sent_labels']) # for removing padding
     if not pred_sent_orders is None:
         pred_chains = [order2chain(order) for order in pred_sent_orders]
         pred_chains = [ch for ch in pred_chains if all(c < num_sents for c in ch)]
         assert len(pred_chains) > 0, repr([order2chain(order) for order in pred_sent_orders]) + '\n' + 'num sents: %d' % num_sents + '\n%s' % output['_id']
     else:
         # get pred evdiences from sentences with top k ``gate_prob``
         gate_probs = output['gate_probs'][:num_sents]
         pred_chains = [[i] for i in sorted(range(num_sents), key=lambda x: gate_probs[x], reverse=True)[:10]]
     return {#'answer_texts': output['answer_texts'],
             #'best_span_str': output.get('best_span_str', None),
             #'best_span': output.get('best_span', None),
             'pred_sent_labels': output.get('pred_sent_labels', None),
             'pred_sent_orders': output.get('pred_sent_orders', None),
             'pred_chains': pred_chains,
             'possible_chain': output.get('evd_possible_chains', None),
             'question_tokens': output['question_tokens'],
             'passage_sent_tokens': output['passage_sent_tokens'],
             #'token_spans_sp': output['token_spans_sp'],
             #'token_spans_sent': output['token_spans_sent'],
             'sent_labels': output['sent_labels'],
             'ans_sent_idxs': output.get('ans_sent_idxs', None),
             '_id': output['_id']}
Esempio n. 2
0
    def predict(self, inputs: JsonDict):
        result = None

        image_url = inputs.get("image_url")
        if image_url is not None:
            result = super().predict({
                "question": inputs["question"],
                "image": image_url
            })
        else:
            image = inputs.get("image")
            if image is not None:
                image_base64 = image["image_base64"]
                if image_base64 is not None:
                    with tempfile.NamedTemporaryFile(
                            prefix=f"{self.__class__.__name__}-") as f:
                        f.write(standard_b64decode(image_base64))
                        f.flush()
                        result = super().predict({
                            "question": inputs["question"],
                            "image": f.name
                        })

        if result is None:
            raise ValueError("No image found in request.")

        results = [{
            "answer": token,
            "confidence": score * 100
        } for token, score in result["tokens"].items()
                   if not token.startswith("@@")]
        results.sort(key=lambda x: -x["confidence"])
        return results[:45]  # Jon only wants the first 45 results.
Esempio n. 3
0
 def dump_line(self, outputs: JsonDict) -> str:  # pylint: disable=no-self-use
     """
     If you don't want your outputs in JSON-lines format
     you can override this function to output them differently.
     """
     keys = ['citation_id', 'prediction', 'probabilities', 'citation_text']
     for k in outputs.copy():
         if k not in keys:
             outputs.pop(k)
     return json.dumps(outputs, cls=JsonFloatEncoder) + "\n"
Esempio n. 4
0
 def dump_line(self, outputs: JsonDict) -> str:  # pylint: disable=no-self-use
     """
     If you don't want your outputs in JSON-lines format
     you can override this function to output them differently.
     """
     keys = ['citedPaperId', 'citingPaperId', 'excerptCitationIntents']
     for k in outputs.copy():
         if k not in keys:
             outputs.pop(k)
     return json.dumps(outputs, cls=JsonFloatEncoder) + "\n"
 def _json_to_instance(self, json_dict: JsonDict) -> Instance:
     """
     Expects JSON that looks like ``{"tokens": "[...]", "upos_tags": "[...]"}``.
     """
     tokens = json_dict["tokens"]
     gold_upos_tags = json_dict.get("upos_tags", None)
     gold_lemmas = json_dict.get("lemmas", None)
     return self._dataset_reader.text_to_instance(tokens=tokens,
                                                  upos_tags=gold_upos_tags,
                                                  lemmas=gold_lemmas)
Esempio n. 6
0
    def _json_to_instance(self, json_dict: JsonDict) -> Instance:

        text = json_dict["text"]
        text = " " + text.strip()
        sentiment = json_dict["sentiment"]
        text_id = json_dict.get("TextID")
        if text_id is None:
            text_id = "<No-Text-id>"
        return self._dataset_reader.text_to_instance(
            text, sentiment, text_id, json_dict.get("selected_text")
        )
Esempio n. 7
0
 def _json_to_instance(
         self,  # type: ignore
         json_dict: JsonDict) -> Instance:
     premise_text = json_dict.get("sentence1", None) or json_dict.get(
         "premise", None)
     hypothesis_text = json_dict.get("sentence2", None) or json_dict.get(
         "hypothesis", None)
     if premise_text and hypothesis_text:
         return self._dataset_reader.text_to_instance(
             premise_text, hypothesis_text)
     logger.info("Error parsing input")
     return None
 def _json_to_instance(self, json_dict: JsonDict) -> Instance:
     """
     Expects JSON that looks like ``{"question": "...", "fact": "..."}``.
     """
     if isinstance(json_dict["question"], dict):
         question_stem = json_dict["question"]["stem"]
         choices = [x["text"] for x in json_dict["question"]["choices"]]
     else:
         question_text = json_dict["question"]
         question_stem, choices = decompose_question(question_text)
     fact = json_dict.get("fact") or json_dict.get("fact1")
     span = json_dict.get("span") or json_dict.get("answer_spans")[0]
     spans = [span]
     if "relation" in json_dict:
         relations = [json_dict["relation"]]
     else:
         relations = None
     if "offset" in json_dict:
         offset = json_dict["offset"]
     elif "answer_starts" in json_dict:
         offset = json_dict["answer_starts"][0]
     else:
         offset = fact.index(span)
     if offset == -1:
         raise ValueError("Span: {} not found in fact: {}".format(
             span, fact))
     offsets = [offset]  #[(offset, offset + len(span))]
     if "id" in json_dict:
         qid = json_dict["id"]
     else:
         qid = random.randint(100)
     prefetched_sentences = json_dict.get("prefetched_sentences", None)
     prefetched_indices = json_dict.get("prefetched_indices", None)
     if prefetched_sentences is not None:
         return self._dataset_reader.text_to_instance(
             qid,
             question_stem,
             choices,
             fact,
             spans,
             relations,
             answer_starts=offsets,
             prefetched_sentences=prefetched_sentences,
             prefetched_indices=prefetched_indices)
     else:
         return self._dataset_reader.text_to_instance(qid,
                                                      question_stem,
                                                      choices,
                                                      fact,
                                                      spans,
                                                      relations,
                                                      answer_starts=offsets)
Esempio n. 9
0
 def _json_to_instance(self,  # type: ignore
                       json_dict: JsonDict) -> Instance:
     premises = json_dict["premises"]
     hypotheses = json_dict["hypotheses"]
     entailments = json_dict.get("entailments", None)
     if entailments is None:
         answer_indices = None
     else:
         answer_indices = [index for index, entailment in enumerate(entailments) if entailment]
     relevant_sentence_idxs = json_dict.get("relevant_sentence_idxs", None)
     return self._dataset_reader.text_to_instance(premises,
                                                  hypotheses,
                                                  answer_indices,
                                                  relevant_sentence_idxs)
Esempio n. 10
0
 def _compatible_question(self, question_data: JsonDict) -> bool:
     question_id = question_data.get("id")
     if not question_id:
         return True
     if "_friction" not in self._lf_syntax:
         return True
     return "_Fr_" in question_id or "Friction" in question_id
Esempio n. 11
0
 def _my_json_to_instance(self,
                          json_dict: JsonDict) -> Tuple[Instance, JsonDict]:
     # Make a cast here to satisfy mypy
     dataset_reader = cast(BertMCQAReader, self._dataset_reader)
     qid = json_dict['id']
     question_data = json_dict['question']
     question_text = question_data['stem']
     choice_text_list = [
         choice['text'] for choice in question_data['choices']
     ]
     choice_labels = [
         choice['label'] for choice in question_data['choices']
     ]
     choice_context_list = []
     context = json_dict.get("para", None)
     for choice in question_data['choices']:
         choice_context_list.append(choice.get("para", None))
     instance = dataset_reader.text_to_instance(
         qid,
         question_text,
         choice_text_list,
         context=context,
         choice_context_list=choice_context_list)
     extra_info = {'id': qid, 'choice_labels': choice_labels}
     return instance, extra_info
Esempio n. 12
0
 def _get_entity_literals(self, question_data: JsonDict) -> JsonDict:
     res: JsonDict = {}
     for key, value in question_data.items():
         if '_literals' in key and key.replace('_literals',
                                               '') in self._entity_types:
             res.update(value)
     return res
Esempio n. 13
0
 def _json_to_instance(self, json_dict: JsonDict) -> Instance:
     return self._dataset_reader.text_to_instance(
         words=json_dict['words'],
         ud_head_indices=json_dict['heads'],
         ud_tags=json_dict['tags'],
         ud_labels=json_dict['head_labels'],
         metadata=json_dict.get('metadata', None))
Esempio n. 14
0
 def _compatible_question(self, question_data: JsonDict) -> bool:
     question_id = question_data.get('id')
     if not question_id:
         return True
     if not '_friction' in self._lf_syntax:
         return True
     return '_Fr_' in question_id or 'Friction' in question_id
Esempio n. 15
0
 def _compatible_question(self, question_data: JsonDict) -> bool:
     question_id = question_data.get('id')
     if not question_id:
         return True
     if not '_friction' in self._lf_syntax:
         return True
     return '_Fr_' in question_id or 'Friction' in question_id
Esempio n. 16
0
 def _get_entity_tags(self,
                      entities: List[str],
                      table_field: KnowledgeGraphField,
                      entity_literals: JsonDict,
                      tokenized_question: List[Token]) -> List[int]:
     res = []
     # Hackily access last two feature extractors for table field (span overlaps which don't
     # depend on the actual table information)
     features = table_field._feature_extractors[8:]  # pylint: disable=protected-access
     for i, token in enumerate(tokenized_question):
         tag_best = 0
         score_max = 0.0
         for tag_index, tag in enumerate(entities):
             literals = entity_literals.get(tag, [])
             if not isinstance(literals, list):
                 literals = [literals]
             for literal in literals:
                 tag_tokens = self._tokenizer.tokenize(literal.lower())
                 scores = [fe(tag, tag_tokens, token, i, tokenized_question) for fe in features]
                 # Small tie breaker in favor of longer sequences
                 score = max(scores) + len(tag_tokens)/100
                 if score > score_max and score >= 0.5:
                     tag_best = tag_index + 1
                     score_max = score
         res.append(tag_best)
     return res
Esempio n. 17
0
 def _get_entity_tags(self, entities: List[str],
                      table_field: KnowledgeGraphField,
                      entity_literals: JsonDict,
                      tokenized_question: List[Token]) -> List[int]:
     res = []
     # Hackily access last two feature extractors for table field (span overlaps which don't
     # depend on the actual table information)
     features = table_field._feature_extractors[8:]  # pylint: disable=protected-access
     for i, token in enumerate(tokenized_question):
         tag_best = 0
         score_max = 0.0
         for tag_index, tag in enumerate(entities):
             literals = entity_literals.get(tag, [])
             if not isinstance(literals, list):
                 literals = [literals]
             for literal in literals:
                 tag_tokens = self._tokenizer.tokenize(literal.lower())
                 scores = [
                     fe(tag, tag_tokens, token, i, tokenized_question)
                     for fe in features
                 ]
                 # Small tie breaker in favor of longer sequences
                 score = max(scores) + len(tag_tokens) / 100
                 if score > score_max and score >= 0.5:
                     tag_best = tag_index + 1
                     score_max = score
         res.append(tag_best)
     return res
Esempio n. 18
0
    def labeled_json_to_labeled_instances(
            self, json_dict: JsonDict) -> Dict[int, Instance]:
        seq_offset = 0
        seq_len = -1
        adhoc_vocab = Vocabulary()
        instances = {}
        for i, str_i in sorted(map((lambda x: (int(x), x)), json_dict.keys())):
            inst_obj = json_dict[str_i]
            if seq_len == -1:
                seq_len = len(inst_obj['words'])
                text_field = TextField(
                    [Token(tok['text']) for tok in inst_obj['words']], {})
                instance = Instance({'tokens': text_field})

            new_instance = instance.duplicate()

            tags_field = ConstructiveSupertagField(
                [json_to_cat(tag) for tag in inst_obj['tags']], text_field,
                [i - seq_offset])
            adhoc_vocab.add_tokens_to_namespace(tags_field.labels, 'labels')
            new_instance.add_field('tags', tags_field)
            new_instance.index_fields(adhoc_vocab)

            instances[i] = new_instance

            if i + 1 - seq_offset == seq_len:
                seq_offset += seq_len
                seq_len = -1

        return instances
Esempio n. 19
0
    def _json_to_instance(self,
                          json_dict: JsonDict) -> Tuple[Instance, JsonDict]:
        premise = json_dict.get('premise', None)
        hypothesis = json_dict.get('hypothesis', None)
        premise_entities = json_dict.get('premise_entities', None)
        hypothesis_entities = json_dict.get('hypothesis_entities', None)
        instance = self._dataset_reader.text_to_instance(
            premise=premise,
            hypothesis=hypothesis,
            premise_entities=premise_entities,
            hypothesis_entities=hypothesis_entities)

        label_dict = self._model.vocab.get_index_to_token_vocabulary('labels')
        all_labels = [label_dict[i] for i in range(len(label_dict))]

        return instance, {"all_labels": all_labels}
Esempio n. 20
0
 def dump_line(self, outputs: JsonDict) -> str:  # pylint: disable=no-self-use
     """
     If you don't want your outputs in JSON-lines format
     you can override this function to output them differently.
     """
     if 'beam_sql_query' in outputs.keys():
         return outputs['predicted_sql_query'] + "\n" + outputs['beam_sql_query'] + "\n"
     else:
         return outputs['predicted_sql_query'] + "\n"
    def dump_line(self, output: JsonDict) -> str:  # pylint: disable=no-self-use
        output.pop('class_probabilities', None)
        output['hierplane_tree'].pop('linkNameToLabel', None)
        output['hierplane_tree'].pop('nodeTypeToStyle', None)
        tree = output['hierplane_tree']
        # Spans are 4-tuple with (start, end (exclusive), span_text, span_label)
        spans = self.get_parse_spans(tree, [])
        sentence_id = None
        if "sentence_id" in output['metadata']:
            sentence_id = output['metadata']['sentence_id']
        tokens = output['metadata']['tokens']

        output_jsonl_dict = {
            'sentence_id': sentence_id,
            'tokens': tokens,
            'spans': spans
        }
        return json.dumps(output_jsonl_dict) + "\n"
Esempio n. 22
0
 def _json_to_instance(self, json_dict: JsonDict) -> Instance:
     """
     Expects JSON that looks like ``{"sentence": "..."}``.
     """
     if 'text_idx' in json_dict:
         x = (json_dict['text_idx'], json_dict['comment_idx']
              )  # , json_dict['comment_idx'])
         return self._dataset_reader.text_to_instance(*x)  # type: ignore
     return self._dataset_reader.text_to_instance(*json_dict.values())
Esempio n. 23
0
    def predict_json(self, inputs: JsonDict) -> JsonDict:
        """
        We need to override this because of the interactive beam search aspects.
        """

        instance = self._json_to_instance(inputs)

        # Get the rules out of the instance
        index_to_rule = [
            production_rule_field.rule
            for production_rule_field in instance.fields["actions"].field_list
        ]
        rule_to_index = {rule: i for i, rule in enumerate(index_to_rule)}

        # A sequence of strings to force, then convert them to ints
        initial_tokens = inputs.get("initial_sequence", [])

        # Want to get initial_sequence on the same device as the model.
        initial_sequence = torch.tensor(
            [rule_to_index[token] for token in initial_tokens],
            device=next(self._model.parameters()).device,
        )

        # Replace beam search with one that forces the initial sequence
        original_beam_search = self._model._beam_search
        interactive_beam_search = original_beam_search.constrained_to(
            initial_sequence)
        self._model._beam_search = interactive_beam_search

        # Now get results
        results = self.predict_instance(instance)

        # And add in the choices. Need to convert from idxs to rules.
        results["choices"] = [[
            (probability, action) for probability, action in zip(
                pa["action_probabilities"], pa["considered_actions"])
        ] for pa in results["predicted_actions"]]

        results["beam_snapshots"] = {
            # For each batch_index, we get a list of beam snapshots
            batch_index: [
                # Each beam_snapshots consists of a list of timesteps,
                # each of which is a list of pairs (score, sequence).
                # The sequence is the *indices* of the rules, which we
                # want to convert to the string representations.
                [(score, [index_to_rule[idx] for idx in sequence])
                 for score, sequence in timestep_snapshot]
                for timestep_snapshot in beam_snapshots
            ]
            for batch_index, beam_snapshots in
            interactive_beam_search.beam_snapshots.items()
        }

        # Restore original beam search
        self._model._beam_search = original_beam_search

        return results
Esempio n. 24
0
 def _sentence_to_srl_instances(self, json_dict: JsonDict) -> List[Instance]:
     sentence = json_dict["sentence"]
     if "verbs" in json_dict.keys():
         text = sentence.split()
         pos = ["VERB" if i == json_dict["verbs"] else "NOUN" for i, _ in enumerate(text)]
         tokens = [Token(t, i, i + len(text), pos_=p) for i, (t, p) in enumerate(zip(text, pos))]
     else:
         tokens = self._tokenizer.tokenize(sentence)
     return self.tokens_to_instances(tokens)
Esempio n. 25
0
def convert_qajson_to_entailment(qa_json: JsonDict):
    question_text = qa_json["question"]["stem"]
    choices = qa_json["question"]["choices"]
    for choice in choices:
        choice_text = choice["text"]

        statement = create_hypothesis(get_fitb_from_question(question_text), choice_text)
        create_output_dict(qa_json, statement,  choice["label"] == qa_json.get("answerKey", "Z"))

    return qa_json
    def _json_to_instance(self, json_dict: JsonDict) -> Instance:
        """
        Expects JSON that looks like ``{"question": "...", "passage": "..."}``.
        """
        question_text = json_dict["question"]
        background = json_dict["background"]
        situation = json_dict.get("situation")

        return self._dataset_reader.text_to_instance(question_text, background,
                                                     situation)
Esempio n. 27
0
def align_entities(extracted: List[str],
                   literals: JsonDict,
                   stemmer: NltkPorterStemmer) -> List[str]:
    """
    Use stemming to attempt alignment between extracted world and given world literals.
    If more words align to one world vs the other, it's considered aligned.
    """
    literal_keys = list(literals.keys())
    literal_values = list(literals.values())
    overlaps = [get_stem_overlaps(extract, literal_values, stemmer) for extract in extracted]
    worlds = []
    for overlap in overlaps:
        if overlap[0] > overlap[1]:
            worlds.append(literal_keys[0])
        elif overlap[0] < overlap[1]:
            worlds.append(literal_keys[1])
        else:
            worlds.append(None)
    return worlds
Esempio n. 28
0
    def attack_from_json(self,
                         inputs: JsonDict = None) -> JsonDict:
        _volatile_json_ = inputs.copy()
        raw_instance = self.predictor.json_to_labeled_instances(inputs)[0]
        raw_tokens = list(map(lambda x: x.text, self.spacy.tokenize(inputs[self.f2c])))

        # Select words that can be changed
        sids_to_change = []
        nbr_dct = defaultdict(lambda: [])
        for i in range(len(raw_tokens)):
            if raw_tokens[i] not in self.ignore_tokens:
                word = raw_tokens[i]
                nbrs = self.searcher.search(word)
                nbrs = [nbr for nbr in nbrs if nbr not in self.forbidden_tokens]
                if len(nbrs) > 0:
                    sids_to_change.append(i)
                    nbr_dct[i] = nbrs

        # max number of tokens that can be changed
        max_change_num = min(self.max_change_num(len(raw_tokens)), len(sids_to_change))

        # Construct adversarial instances
        adv_jsons = []
        for i in range(self.search_num):
            adv_tokens = [ele for ele in raw_tokens]
            word_sids = random.choices(sids_to_change, k=max_change_num)
            for word_sid in word_sids:
                adv_tokens[word_sid] = random.choice(nbr_dct[word_sid])
            _volatile_json_[self.f2c] = " ".join(adv_tokens)
            adv_jsons.append(_volatile_json_.copy())

        # Checking attacking status, early stop
        successful = False
        results = self.predictor.predict_batch_json(adv_jsons)

        for i, result in enumerate(results):
            adv_instance = self.predictor._json_to_instance(adv_jsons[i])
            adv_instance = self.predictor.predictions_to_labeled_instances(
                adv_instance, result)[0]
            if adv_instance[self.f2a].label != raw_instance[self.f2a].label:
                successful = True
                break
        adv_tokens = adv_jsons[i][self.f2c].split(" ")
        outputs = result

        return sanitize({
            "adv": adv_tokens,
            "raw": raw_tokens,
            "outputs": outputs,
            "success": 1 if successful else 0
        })
Esempio n. 29
0
    def attack(self, attacker_id: str, attack: JsonDict) -> JsonDict:
        """
        Modifies the input (e.g. by adding or removing tokens) to try to change the model's predicti$
        in some desired manner.
        """
        if attacker_id not in config.VALID_ATTACKERS:
            raise http.UnknownAttackerError(attacker_id)
        attacker = self.attackers.get(attacker_id)
        if attacker is None:
            raise http.InvalidAttackerError(attacker_id)

        print('attack', attack)

        inputs = attack['inputs']
        input_field_to_attack = attack.get('input_field_to_attack', 'tokens')
        grad_input_field = attack.get('grad_input_field', 'grad_input_1')
        ignore_tokens = attack.get('ignore_tokens', None)
        target = attack.get('target', None)

        if target is not None:
            raise ValueError(
                "Input reduction does not implement targeted attacks")
        ignore_tokens = ["@@NULL@@"
                         ] if ignore_tokens is None else ignore_tokens

        original_instances = self.predictor.labeled_json_to_labeled_instances(
            inputs)

        final_tokens = {}
        original_tokens = {}
        for idx, instance in sorted(original_instances.items()):
            final_tokens[idx] = (attacker._attack_instance(
                inputs, instance, input_field_to_attack, grad_input_field,
                ignore_tokens))
            original_tokens[idx] = deepcopy(
                instance[input_field_to_attack].tokens)
        return sanitize({"final": final_tokens, "original": original_tokens})
Esempio n. 30
0
    def dump_line(self, outputs: JsonDict) -> str:

        if not self.numeric:
            prediction = outputs["label"]
        else:
            prediction = outputs["prediction"]
            if isinstance(prediction, float):
                prediction = min(max(prediction, 0), 5)
                prediction = f"{prediction:.3f}"

        output = {
            "idx": int(outputs["index"]),
            # "label": prediction,
            "pseudolabel": outputs["logits"],
            **outputs.get("raw_input", {})
        }

        return json.dumps(output, ensure_ascii=False) + "\n"
 def predict_json(self, inputs: JsonDict) -> JsonDict:
     n = inputs.pop('n', 10)
     if 'track_id' in inputs:
         if self.index is None:
             raise AttributeError("Please build an index before searching by track.")
         idx = self.vocab.get_token_to_index_vocabulary("labels")[inputs['track_id']]
         nns = self.index.get_nns_by_item(idx, n+1)[1:]
         #scores = self.index.get_item_vector(idx) 
         tracks = self.neighbors_to_tracks(nns)
         return tracks
         #return {'tracks': tracks, 'scores': scores}
         
         
     instance = self._json_to_instance(inputs)
     output_dict = self.predict_instance(instance)
     output_dict['inputs'] = inputs
     if self.index:
         logits = output_dict.get('logits')
         nns = self.index.get_nns_by_vector(logits, n)
         return self.neighbors_to_tracks(nns)
         #output_dict['tracks'] = self.neighbors_to_tracks(nns)
     return output_dict
Esempio n. 32
0
 def get_most_important_part(self, instance: Instance, output: JsonDict):
     if "alphas" not in output:
         return output
     alphas = output.pop("alphas")
     best_span = argmax(alphas)
     tokens = instance["tokens"].tokens
     nom = 0
     span = (0, 0)
     length = len(tokens)
     flag = False
     for i in range(1, length - 1):
         for j in range(i, length - 1):
             span = (i, j)
             if nom == best_span:
                 flag = True
                 break
             nom += 1
     i, j = span
     best_tokens = tokens[i:j + 1]
     output["best_span"] = " ".join([token.text for token in best_tokens])
     output["nom"] = nom
     output["ij"] = [i, j]
     output["break"] = flag
     output["val"] = alphas[best_span]
Esempio n. 33
0
 def _get_entity_literals(self, question_data: JsonDict) -> JsonDict:
     res: JsonDict = {}
     for key, value in question_data.items():
         if '_literals' in key and key.replace('_literals', '') in self._entity_types:
             res.update(value)
     return res
 def sanitize(result: JsonDict) -> JsonDict:
     return {key: value for key, value in result.items()
             if key.startswith("best_span")}