def _json_to_instance(self, json_dict: JsonDict) -> Instance: """ Expects JSON that looks like ``{"sentence": "..."}``. """ if 'text_idx' in json_dict: x = (json_dict['text_idx'], json_dict['comment_idx'] ) # , json_dict['comment_idx']) return self._dataset_reader.text_to_instance(*x) # type: ignore return self._dataset_reader.text_to_instance(*json_dict.values())
def align_entities(extracted: List[str], literals: JsonDict, stemmer: NltkPorterStemmer) -> List[str]: """ Use stemming to attempt alignment between extracted world and given world literals. If more words align to one world vs the other, it's considered aligned. """ literal_keys = list(literals.keys()) literal_values = list(literals.values()) overlaps = [get_stem_overlaps(extract, literal_values, stemmer) for extract in extracted] worlds = [] for overlap in overlaps: if overlap[0] > overlap[1]: worlds.append(literal_keys[0]) elif overlap[0] < overlap[1]: worlds.append(literal_keys[1]) else: worlds.append(None) return worlds