Example #1
0
 def _json_to_instance(self, json_dict: JsonDict) -> Instance:
     """
     Expects JSON that looks like ``{"sentence": "..."}``.
     """
     if 'text_idx' in json_dict:
         x = (json_dict['text_idx'], json_dict['comment_idx']
              )  # , json_dict['comment_idx'])
         return self._dataset_reader.text_to_instance(*x)  # type: ignore
     return self._dataset_reader.text_to_instance(*json_dict.values())
Example #2
0
def align_entities(extracted: List[str],
                   literals: JsonDict,
                   stemmer: NltkPorterStemmer) -> List[str]:
    """
    Use stemming to attempt alignment between extracted world and given world literals.
    If more words align to one world vs the other, it's considered aligned.
    """
    literal_keys = list(literals.keys())
    literal_values = list(literals.values())
    overlaps = [get_stem_overlaps(extract, literal_values, stemmer) for extract in extracted]
    worlds = []
    for overlap in overlaps:
        if overlap[0] > overlap[1]:
            worlds.append(literal_keys[0])
        elif overlap[0] < overlap[1]:
            worlds.append(literal_keys[1])
        else:
            worlds.append(None)
    return worlds