def __str__(self): buf = io.StringIO() text = self._data['text'] tokens = text.split() buf.write("Text ({:d}): {:s}\n".format(len(tokens), text)) def _fmt_span(s, e): return '[{:d},{:d})\t"{:s}"'.format(s, e, " ".join(tokens[s:e])) for t in self._data['targets']: buf.write("\n") buf.write(" span1: {}\n".format(_fmt_span(*t['span1']))) buf.write(" span2: {}\n".format(_fmt_span(*t['span2']))) labels = utils.wrap_singleton_string(t['label']) buf.write(" label: ({:d})\t {}\n".format(len(labels), ", ".join(labels))) return buf.getvalue()
def _split_and_flatten_records(self, records: Iterable[Dict]): ex_records = [] # long-form example records, minus targets tr_records = [] # long-form target records with 'idx' column for idx, r in enumerate(records): d = {'text': r['text'], 'idx': idx} d.update(_get_nested_vals(r, 'info')) d.update(_get_nested_vals(r, 'preds')) ex_records.append(d) for t in r['targets']: d = {'label': utils.wrap_singleton_string(t['label']), 'idx': idx} if 'span1' in t: d['span1'] = tuple(t['span1']) if 'span2' in t: d['span2'] = tuple(t['span2']) d.update(_get_nested_vals(t, 'info')) d.update(_get_nested_vals(t, 'preds')) tr_records.append(d) return ex_records, tr_records