def find_curve_range_intersection(curve_1, curve_2, cut_at_inflection=False): """ Return intersections of x- and y-ranges of two real curves, which are parametric curves on the xy-plane given as (x_array, y_array), a tuple of NumPy arrays. """ x1, y1 = curve_1 x2, y2 = curve_2 if cut_at_inflection is True: x1_min, x1_max = sorted([x1[0], x1[-1]]) x2_min, x2_max = sorted([x2[0], x2[-1]]) y1_min, y1_may = sorted([y1[0], y1[-1]]) y2_min, y2_may = sorted([y2[0], y2[-1]]) else: x1_min, x1_max = numpy.sort(x1)[[0, -1]] x2_min, x2_max = numpy.sort(x2)[[0, -1]] y1_min, y1_may = numpy.sort(y1)[[0, -1]] y2_min, y2_may = numpy.sort(y2)[[0, -1]] x1_interval = Interval(x1_min, x1_max) x2_interval = Interval(x2_min, x2_max) y1_interval = Interval(y1_min, y1_may) y2_interval = Interval(y2_min, y2_may) x_range = x1_interval.intersect(x2_interval) y_range = y1_interval.intersect(y2_interval) return (x_range, y_range)
def find_curve_range_intersection(curve_1, curve_2, cut_at_inflection=False): """ Return intersections of x- and y-ranges of two real curves, which are parametric curves on the xy-plane given as (x_array, y_array), a tuple of NumPy arrays. """ x1, y1 = curve_1 x2, y2 = curve_2 if cut_at_inflection is True: x1_min, x1_max = sorted([x1[0], x1[-1]]) x2_min, x2_max = sorted([x2[0], x2[-1]]) y1_min, y1_may = sorted([y1[0], y1[-1]]) y2_min, y2_may = sorted([y2[0], y2[-1]]) else: x1_min, x1_max = numpy.sort(x1)[[0, -1]] x2_min, x2_max = numpy.sort(x2)[[0, -1]] y1_min, y1_may = numpy.sort(y1)[[0, -1]] y2_min, y2_may = numpy.sort(y2)[[0, -1]] x1_interval = Interval(x1_min, x1_max) x2_interval = Interval(x2_min, x2_max) y1_interval = Interval(y1_min, y1_may) y2_interval = Interval(y2_min, y2_may) x_range = x1_interval.intersect(x2_interval) y_range = y1_interval.intersect(y2_interval) return (x_range, y_range)
def test_measure(): a = Symbol('a', real=True) assert Interval(1, 3).measure == 2 assert Interval(0, a).measure == a assert Interval(1, a).measure == a - 1 assert Union(Interval(1, 2), Interval(3, 4)).measure == 2 assert Union(Interval(1, 2), Interval(3, 4), FiniteSet(5, 6, 7)).measure \ == 2 assert FiniteSet(1, 2, oo, a, -oo, -5).measure == 0 assert S.EmptySet.measure == 0 square = Interval(0, 10) * Interval(0, 10) offsetsquare = Interval(5, 15) * Interval(5, 15) band = Interval(-oo, oo) * Interval(2, 4) assert square.measure == offsetsquare.measure == 100 assert (square + offsetsquare).measure == 175 # there is some overlap assert (square - offsetsquare).measure == 75 assert (square * FiniteSet(1, 2, 3)).measure == 0 assert (square.intersect(band)).measure == 20 assert (square + band).measure == oo assert (band * FiniteSet(1, 2, 3)).measure == nan
def test_union(): assert Union(Interval(1, 2), Interval(2, 3)) == Interval(1, 3) assert Union(Interval(1, 2), Interval(2, 3, True)) == Interval(1, 3) assert Union(Interval(1, 3), Interval(2, 4)) == Interval(1, 4) assert Union(Interval(1, 2), Interval(1, 3)) == Interval(1, 3) assert Union(Interval(1, 3), Interval(1, 2)) == Interval(1, 3) assert Union(Interval(1, 3, False, True), Interval(1, 2)) == \ Interval(1, 3, False, True) assert Union(Interval(1, 3), Interval(1, 2, False, True)) == Interval(1, 3) assert Union(Interval(1, 2, True), Interval(1, 3)) == Interval(1, 3) assert Union(Interval(1, 2, True), Interval(1, 3, True)) == \ Interval(1, 3, True) assert Union(Interval(1, 2, True), Interval(1, 3, True, True)) == \ Interval(1, 3, True, True) assert Union(Interval(1, 2, True, True), Interval(1, 3, True)) == \ Interval(1, 3, True) assert Union(Interval(1, 3), Interval(2, 3)) == Interval(1, 3) assert Union(Interval(1, 3, False, True), Interval(2, 3)) == \ Interval(1, 3) assert Union(Interval(1, 2, False, True), Interval(2, 3, True)) != \ Interval(1, 3) assert Union(Interval(1, 2), S.EmptySet) == Interval(1, 2) assert Union(S.EmptySet) == S.EmptySet assert Union(Interval(0, 1), [FiniteSet(1.0/n) for n in range(1, 10)]) == \ Interval(0, 1) assert Interval(1, 2).union(Interval(2, 3)) == \ Interval(1, 2) + Interval(2, 3) assert Interval(1, 2).union(Interval(2, 3)) == Interval(1, 3) assert Union(Set()) == Set() assert FiniteSet(1) + FiniteSet(2) + FiniteSet(3) == FiniteSet(1, 2, 3) assert FiniteSet(['ham']) + FiniteSet(['eggs']) == FiniteSet('ham', 'eggs') assert FiniteSet(1, 2, 3) + S.EmptySet == FiniteSet(1, 2, 3) assert FiniteSet(1, 2, 3) & FiniteSet(2, 3, 4) == FiniteSet(2, 3) assert FiniteSet(1, 2, 3) | FiniteSet(2, 3, 4) == FiniteSet(1, 2, 3, 4) x = Symbol("x") y = Symbol("y") z = Symbol("z") assert S.EmptySet | FiniteSet(x, FiniteSet(y, z)) == \ FiniteSet(x, FiniteSet(y, z)) # Test that Intervals and FiniteSets play nicely assert Interval(1, 3) + FiniteSet(2) == Interval(1, 3) assert Interval(1, 3, True, True) + FiniteSet(3) == \ Interval(1, 3, True, False) X = Interval(1, 3) + FiniteSet(5) Y = Interval(1, 2) + FiniteSet(3) XandY = X.intersect(Y) assert 2 in X and 3 in X and 3 in XandY assert X.subset(XandY) and Y.subset(XandY) raises(TypeError, lambda: Union(1, 2, 3)) assert X.is_iterable is False
def test_complement(): assert Interval(0, 1).complement == \ Union(Interval(-oo, 0, True, True), Interval(1, oo, True, True)) assert Interval(0, 1, True, False).complement == \ Union(Interval(-oo, 0, True, False), Interval(1, oo, True, True)) assert Interval(0, 1, False, True).complement == \ Union(Interval(-oo, 0, True, True), Interval(1, oo, False, True)) assert Interval(0, 1, True, True).complement == \ Union(Interval(-oo, 0, True, False), Interval(1, oo, False, True)) assert -S.EmptySet == S.EmptySet.complement assert ~S.EmptySet == S.EmptySet.complement assert S.EmptySet.complement == Interval(-oo, oo) assert Union(Interval(0, 1), Interval(2, 3)).complement == \ Union(Interval(-oo, 0, True, True), Interval(1, 2, True, True), Interval(3, oo, True, True)) assert FiniteSet(0).complement == Union(Interval(-oo, 0, True, True), Interval(0, oo, True, True)) assert (FiniteSet(5) + Interval(S.NegativeInfinity, 0)).complement == \ Interval(0, 5, True, True) + Interval(5, S.Infinity, True,True) assert FiniteSet(1, 2, 3).complement == Interval( S.NegativeInfinity, 1, True, True) + Interval( 1, 2, True, True) + Interval(2, 3, True, True) + Interval( 3, S.Infinity, True, True) X = Interval(1, 3) + FiniteSet(5) assert X.intersect(X.complement) == S.EmptySet
def test_union(): assert Union(Interval(1, 2), Interval(2, 3)) == Interval(1, 3) assert Union(Interval(1, 2), Interval(2, 3, True)) == Interval(1, 3) assert Union(Interval(1, 3), Interval(2, 4)) == Interval(1, 4) assert Union(Interval(1, 2), Interval(1, 3)) == Interval(1, 3) assert Union(Interval(1, 3), Interval(1, 2)) == Interval(1, 3) assert Union(Interval(1, 3, False, True), Interval(1, 2)) == \ Interval(1, 3, False, True) assert Union(Interval(1, 3), Interval(1, 2, False, True)) == Interval(1, 3) assert Union(Interval(1, 2, True), Interval(1, 3)) == Interval(1, 3) assert Union(Interval(1, 2, True), Interval(1, 3, True)) == \ Interval(1, 3, True) assert Union(Interval(1, 2, True), Interval(1, 3, True, True)) == \ Interval(1, 3, True, True) assert Union(Interval(1, 2, True, True), Interval(1, 3, True)) == \ Interval(1, 3, True) assert Union(Interval(1, 3), Interval(2, 3)) == Interval(1, 3) assert Union(Interval(1, 3, False, True), Interval(2, 3)) == \ Interval(1, 3) assert Union(Interval(1, 2, False, True), Interval(2, 3, True)) != \ Interval(1, 3) assert Union(Interval(1, 2), S.EmptySet) == Interval(1, 2) assert Union(S.EmptySet) == S.EmptySet assert Union(Interval(0, 1), [FiniteSet(1.0/n) for n in range(1, 10)]) == \ Interval(0, 1) assert Interval(1, 2).union(Interval(2, 3)) == \ Interval(1, 2) + Interval(2, 3) assert Interval(1, 2).union(Interval(2, 3)) == Interval(1, 3) assert Union(Set()) == Set() assert FiniteSet(1) + FiniteSet(2) + FiniteSet(3) == FiniteSet(1, 2, 3) assert FiniteSet('ham') + FiniteSet('eggs') == FiniteSet('ham', 'eggs') assert FiniteSet(1, 2, 3) + S.EmptySet == FiniteSet(1, 2, 3) assert FiniteSet(1, 2, 3) & FiniteSet(2, 3, 4) == FiniteSet(2, 3) assert FiniteSet(1, 2, 3) | FiniteSet(2, 3, 4) == FiniteSet(1, 2, 3, 4) x = Symbol("x") y = Symbol("y") z = Symbol("z") assert S.EmptySet | FiniteSet(x, FiniteSet(y, z)) == \ FiniteSet(x, FiniteSet(y, z)) # Test that Intervals and FiniteSets play nicely assert Interval(1, 3) + FiniteSet(2) == Interval(1, 3) assert Interval(1, 3, True, True) + FiniteSet(3) == \ Interval(1, 3, True, False) X = Interval(1, 3) + FiniteSet(5) Y = Interval(1, 2) + FiniteSet(3) XandY = X.intersect(Y) assert 2 in X and 3 in X and 3 in XandY assert XandY.is_subset(X) and XandY.is_subset(Y) raises(TypeError, lambda: Union(1, 2, 3)) assert X.is_iterable is False
class WordInterval(object): SILENCE_WORD = '#' def __init__(self, inf, sup, word): self.word = word self.interval = Interval(inf, sup) @property def is_silent(self): return self.word == WordInterval.SILENCE_WORD @property def inf(self): return self.interval.inf @property def sup(self): return self.interval.sup def intersect(self, another_interval): return self.interval.intersect(another_interval) def __eq__(self, other): return (self.interval == other.interval) and (self.word == other.word) def __str__(self): return "%s -> %s" % (self.interval, self.word) def __repr__(self): return self.__str__()
class WordInterval(object): SILENCE_WORD = '#' def __init__(self, inf, sup, word): self.word = word self.interval = Interval(inf, sup) @property def is_silent(self): return self.word == WordInterval.SILENCE_WORD @property def inf(self): return self.interval.inf @property def sup(self): return self.interval.sup def intersect(self, another_interval): return self.interval.intersect(another_interval) def __eq__(self, other): return (self.interval == other.interval) and (self.word == other.word) def __str__(self): return "%s -> %s" % (self.interval, self.word) def __repr__(self): return self.__str__()
def test_complement(): assert Interval(0, 1).complement == \ Union(Interval(-oo, 0, True, True), Interval(1, oo, True, True)) assert Interval(0, 1, True, False).complement == \ Union(Interval(-oo, 0, True, False), Interval(1, oo, True, True)) assert Interval(0, 1, False, True).complement == \ Union(Interval(-oo, 0, True, True), Interval(1, oo, False, True)) assert Interval(0, 1, True, True).complement == \ Union(Interval(-oo, 0, True, False), Interval(1, oo, False, True)) assert -S.EmptySet == S.EmptySet.complement assert ~S.EmptySet == S.EmptySet.complement assert S.EmptySet.complement == Interval(-oo, oo) assert Union(Interval(0, 1), Interval(2, 3)).complement == \ Union(Interval(-oo, 0, True, True), Interval(1, 2, True, True), Interval(3, oo, True, True)) assert FiniteSet(0).complement == Union(Interval(-oo,0, True,True) , Interval(0,oo, True, True)) assert (FiniteSet(5) + Interval(S.NegativeInfinity, 0)).complement == \ Interval(0, 5, True, True) + Interval(5, S.Infinity, True,True) assert FiniteSet(1,2,3).complement == Interval(S.NegativeInfinity,1, True,True) + Interval(1,2, True,True) + Interval(2,3, True,True) + Interval(3,S.Infinity, True,True) X = Interval(1,3)+FiniteSet(5) assert X.intersect(X.complement) == S.EmptySet
def test_union(): assert Union(Interval(1, 2), Interval(2, 3)) == Interval(1, 3) assert Union(Interval(1, 2), Interval(2, 3, True)) == Interval(1, 3) assert Union(Interval(1, 3), Interval(2, 4)) == Interval(1, 4) assert Union(Interval(1, 2), Interval(1, 3)) == Interval(1, 3) assert Union(Interval(1, 3), Interval(1, 2)) == Interval(1, 3) assert Union(Interval(1, 3, False, True), Interval(1, 2)) == \ Interval(1, 3, False, True) assert Union(Interval(1, 3), Interval(1, 2, False, True)) == Interval(1, 3) assert Union(Interval(1, 2, True), Interval(1, 3)) == Interval(1, 3) assert Union(Interval(1, 2, True), Interval(1, 3, True)) == Interval(1, 3, True) assert Union(Interval(1, 2, True), Interval(1, 3, True, True)) == \ Interval(1, 3, True, True) assert Union(Interval(1, 2, True, True), Interval(1, 3, True)) == \ Interval(1, 3, True) assert Union(Interval(1, 3), Interval(2, 3)) == Interval(1, 3) assert Union(Interval(1, 3, False, True), Interval(2, 3)) == \ Interval(1, 3) assert Union(Interval(1, 2, False, True), Interval(2, 3, True)) != \ Interval(1, 3) assert Union(Interval(1, 2), S.EmptySet) == Interval(1, 2) assert Union(S.EmptySet) == S.EmptySet assert Union(Interval(0,1), [FiniteSet(1.0/n) for n in range(1,10)]) == \ Interval(0,1) assert Interval(1, 2).union(Interval(2, 3)) == \ Interval(1, 2) + Interval(2, 3) assert Interval(1, 2).union(Interval(2, 3)) == Interval(1, 3) assert Union(Set()) == Set() assert FiniteSet(1) + FiniteSet(2) + FiniteSet(3) == FiniteSet(1,2,3) assert FiniteSet(['ham']) + FiniteSet(['eggs']) == FiniteSet('ham', 'eggs') assert FiniteSet(1,2,3) + S.EmptySet == FiniteSet(1,2,3) assert FiniteSet(1,2,3) & FiniteSet(2,3,4) == FiniteSet(2,3) assert FiniteSet(1,2,3) | FiniteSet(2,3,4) == FiniteSet(1,2,3,4) # Test that Intervals and FiniteSets play nicely assert Interval(1,3) + FiniteSet(2) == Interval(1,3) assert Interval(1,3, True,True) + FiniteSet(3) == Interval(1,3, True,False) X = Interval(1,3)+FiniteSet(5) Y = Interval(1,2)+FiniteSet(3) XandY = X.intersect(Y) assert 2 in X and 3 in X and 3 in XandY assert X.subset(XandY) and Y.subset(XandY) raises(TypeError, "Union(1, 2, 3)")
def test_complement(): assert Interval(0, 1).complement == \ Union(Interval(-oo, 0, True, True), Interval(1, oo, True, True)) assert Interval(0, 1, True, False).complement == \ Union(Interval(-oo, 0, True, False), Interval(1, oo, True, True)) assert Interval(0, 1, False, True).complement == \ Union(Interval(-oo, 0, True, True), Interval(1, oo, False, True)) assert Interval(0, 1, True, True).complement == \ Union(Interval(-oo, 0, True, False), Interval(1, oo, False, True)) assert -S.EmptySet == S.EmptySet.complement assert ~S.EmptySet == S.EmptySet.complement assert S.EmptySet.complement == S.UniversalSet assert S.UniversalSet.complement == S.EmptySet assert Union(Interval(0, 1), Interval(2, 3)).complement == \ Union(Interval(-oo, 0, True, True), Interval(1, 2, True, True), Interval(3, oo, True, True)) assert FiniteSet(0).complement == Union(Interval(-oo, 0, True, True), Interval(0, oo, True, True)) assert (FiniteSet(5) + Interval(S.NegativeInfinity, 0)).complement == \ Interval(0, 5, True, True) + Interval(5, S.Infinity, True,True) assert FiniteSet(1, 2, 3).complement == Interval( S.NegativeInfinity, 1, True, True) + Interval( 1, 2, True, True) + Interval(2, 3, True, True) + Interval( 3, S.Infinity, True, True) X = Interval(1, 3) + FiniteSet(5) assert X.intersect(X.complement) == S.EmptySet square = Interval(0, 1) * Interval(0, 1) notsquare = square.complement assert all(pt in square for pt in [(0, 0), (.5, .5), (1, 0), (1, 1)]) assert not any(pt in notsquare for pt in [(0, 0), (.5, .5), (1, 0), (1, 1)]) assert not any(pt in square for pt in [(-1, 0), (1.5, .5), (10, 10)]) assert all(pt in notsquare for pt in [(-1, 0), (1.5, .5), (10, 10)])
def test_complement(): assert Interval(0, 1).complement == \ Union(Interval(-oo, 0, True, True), Interval(1, oo, True, True)) assert Interval(0, 1, True, False).complement == \ Union(Interval(-oo, 0, True, False), Interval(1, oo, True, True)) assert Interval(0, 1, False, True).complement == \ Union(Interval(-oo, 0, True, True), Interval(1, oo, False, True)) assert Interval(0, 1, True, True).complement == \ Union(Interval(-oo, 0, True, False), Interval(1, oo, False, True)) assert -S.EmptySet == S.EmptySet.complement assert ~S.EmptySet == S.EmptySet.complement assert S.EmptySet.complement == S.UniversalSet assert S.UniversalSet.complement == S.EmptySet assert Union(Interval(0, 1), Interval(2, 3)).complement == \ Union(Interval(-oo, 0, True, True), Interval(1, 2, True, True), Interval(3, oo, True, True)) assert FiniteSet(0).complement == Union(Interval(-oo, 0, True, True), Interval(0, oo, True, True)) assert (FiniteSet(5) + Interval(S.NegativeInfinity, 0)).complement == \ Interval(0, 5, True, True) + Interval(5, S.Infinity, True, True) assert FiniteSet(1, 2, 3).complement == \ Interval(S.NegativeInfinity, 1, True, True) + Interval(1, 2, True, True) + \ Interval(2, 3, True, True) + Interval(3, S.Infinity, True, True) X = Interval(1, 3) + FiniteSet(5) assert X.intersect(X.complement) == S.EmptySet square = Interval(0, 1) * Interval(0, 1) notsquare = square.complement assert all(pt in square for pt in [(0, 0), (.5, .5), (1, 0), (1, 1)]) assert not any( pt in notsquare for pt in [(0, 0), (.5, .5), (1, 0), (1, 1)]) assert not any(pt in square for pt in [(-1, 0), (1.5, .5), (10, 10)]) assert all(pt in notsquare for pt in [(-1, 0), (1.5, .5), (10, 10)])
def compare_single_doc(gs, ann): """ Report on what proportion of a gold standard dataframe is covered by the given annotations. Assumes all annotations correspond to the same single document. Evaluation - exact - 0/1 an exact match on interval (1) - partial - 0/1 partially detected (counts as miss) False positives - proportion of string incorrectly labelled as PHI (excluding white space) - or proportion of labelled text beyond true labels """ # short circuit comparisons if trivial cases if (gs.shape[0] == 0) | (ann.shape[0] == 0): # if both df are empty, we will output an empty dataframe performance = [] if gs.shape[0] > 0: doc_id = gs['document_id'].values[0] # append gold standard rows as misses for i, row in gs.iterrows(): span = '{} {}'.format(row['start'], row['stop']) performance.append( [doc_id, row['annotation_id'], 0, 0, 1, span] ) # if ann.shape[0] == 0: # # append ann rows as false positives # for i, row in ann.iterrows(): # span = '{} {}'.format(row['start'], row['stop']) # performance.append( # [doc_id, row['annotation_id'], 0, 0, 0, span]) performance = pd.DataFrame.from_records( performance, columns=[ 'document_id', 'annotation_id', 'exact', 'partial', 'missed', 'span' ] ) return performance # performance is list of lists # index, document_id, exact, partial, missed, start, stop # if partial, start/stop denote the missed section # otherwise, they encompass the entire entity performance = list() if gs.shape[0] > 0: doc_id = gs['document_id'].values[0] else: doc_id = ann['document_id'].values[0] # create right-open intervals cmp_intervals = [ [x[0], x[1], False, True] for x in ann.loc[:, ['start', 'stop']].values ] cmp_intervals = [Interval(*c) for c in cmp_intervals] cmp_intervals = Union(*cmp_intervals) for _, row in gs.iterrows(): # indices are right open i = Interval(row['start'], row['stop'], False, True) overlap = i.intersect(cmp_intervals) mismatch = i - overlap # exact match if mismatch.is_EmptySet: span = '{} {}'.format(row['start'], row['stop']) performance.append([doc_id, row['annotation_id'], 1, 0, 0, span]) # partial match else: # no match if mismatch == i: span = '{} {}'.format(row['start'], row['stop']) performance.append( [doc_id, row['annotation_id'], 0, 0, 1, span] ) else: if type(mismatch) is Union: # we have non-continuous segments in our mismatch span = [] for m in mismatch.args: m = adjust_interval(m) span.append('{} {}'.format(m.left, m.right)) # brat format: non-contiguous segments are delimited by ';' span = ';'.join(span) else: mismatch = adjust_interval(mismatch) span = '{} {}'.format(mismatch.left, mismatch.right) performance.append( [doc_id, row['annotation_id'], 0, 1, 0, span] ) # convert back to a dataframe with same index as gs performance = pd.DataFrame.from_records( performance, columns=[ 'document_id', 'annotation_id', 'exact', 'partial', 'missed', 'span' ] ) return performance
def compare(goldstandard, comparison): """ Report on what proportion of the gold standard corpus is covered by the given annotations. Evaluation - exact - 0/1 an exact match on interval (1) - partial - 0/1 partially detected (counts as miss) False positives - proportion of string incorrectly labelled as PHI (excluding white space) - or proportion of labelled text beyond true labels """ group_names = ['document_id'] cols = ['document_id', 'annotation_id', 'start', 'stop', 'entity'] # stack gold standard and annotations into the same dataframe comparison['annotation_id'] = comparison['annotator'] df = pd.concat( [goldstandard[cols], comparison[cols]], ignore_index=True, axis=0 ) # delineate gold standard from annotation df['source'] = 'gs' df.loc[goldstandard.shape[0]:, 'source'] = 'ann' # performance is list of lists # index, document_id, exact, partial, missed, start, stop # if partial, start/stop denote the missed section # otherwise, they encompass the entire entity performance = list() n_groups = df[group_names].drop_duplicates().shape[0] # iterate through each document for grp_idx, grp in tqdm(df.groupby(group_names), total=n_groups): idxG = grp['source'] == 'gs' # create right-open intervals cmp_intervals = [ [x[0], x[1], False, True] for x in grp.loc[~idxG, ['start', 'stop']].values ] cmp_intervals = [Interval(*c) for c in cmp_intervals] cmp_intervals = Union(*cmp_intervals) for idx, row in grp.loc[idxG, :].iterrows(): # indices are right open i = Interval(row['start'], row['stop'], False, True) overlap = i.intersect(cmp_intervals) mismatch = i - overlap # exact match if mismatch.is_EmptySet: span = '{} {}'.format(row['start'], row['stop']) performance.append( [grp_idx, row['annotation_id'], 1, 0, 0, span] ) # partial match else: # no match if mismatch == i: span = '{} {}'.format(row['start'], row['stop']) performance.append( [grp_idx, row['annotation_id'], 0, 0, 1, span] ) else: if type(mismatch) is Union: # we have non-continuous segments in our mismatch span = [] for m in mismatch.args: m = adjust_interval(m) span.append('{} {}'.format(m.left, m.right)) # brat format: non-contiguous segments are delimited by ';' span = ';'.join(span) else: mismatch = adjust_interval(mismatch) span = '{} {}'.format(mismatch.left, mismatch.right) performance.append( [grp_idx, row['annotation_id'], 0, 1, 0, span] ) # convert back to a dataframe with same index as gs performance = pd.DataFrame.from_records( performance, columns=[ 'document_id', 'annotation_id', 'exact', 'partial', 'missed', 'span' ] ) return performance