def __create_span(ann_obj, mods, type, start, end, txt_file_path, projectconf, attributes): # TODO: Rip this out! start = int(start) end = int(end) # Before we add a new trigger, does it already exist? found = None for tb_ann in ann_obj.get_textbounds(): try: if (tb_ann.start == start and tb_ann.end == end and tb_ann.type == type): found = tb_ann break except AttributeError: # Not a trigger then pass if found is None: # Get a new ID new_id = ann_obj.get_new_id('T') #XXX: Cons # Get the text span with open_textfile(txt_file_path, 'r') as txt_file: text = txt_file.read()[start:end] #TODO: Data tail should be optional if '\n' not in text: ann = TextBoundAnnotationWithText(start, end, new_id, type, text) ann_obj.add_annotation(ann) mods.addition(ann) else: ann = None else: ann = found if ann is not None: if projectconf.is_physical_entity_type(type): # TODO: alert that negation / speculation are ignored if set event = None else: # Create the event also new_event_id = ann_obj.get_new_id('E') #XXX: Cons event = EventAnnotation(ann.id, [], unicode(new_event_id), type, '') ann_obj.add_annotation(event) mods.addition(event) else: # We got a newline in the span, don't take any action event = None return ann, event
def _create_argument(ann_obj, projectconf, mods, origin, target, type, attributes, old_type, old_target): try: arg_tup = (type, str(target.id)) # Is this an addition or an update? if old_type is None and old_target is None: if arg_tup not in origin.args: before = str(origin) origin.add_argument(type, str(target.id)) mods.change(before, origin) else: # It already existed as an arg, we were called to do nothing... pass else: # Construct how the old arg would have looked like old_arg_tup = (type if old_type is None else old_type, target if old_target is None else old_target) if old_arg_tup in origin.args and arg_tup not in origin.args: before = str(origin) origin.args.remove(old_arg_tup) origin.add_argument(type, str(target.id)) mods.change(before, origin) else: # Collision etc. don't do anything pass except AttributeError: # The annotation did not have args, it was most likely an entity # thus we need to create a new Event... new_id = ann_obj.get_new_id('E') ann = EventAnnotation( origin.id, [arg_tup], new_id, origin.type, '' ) ann_obj.add_annotation(ann) mods.addition(ann) # No addressing mechanism for arguments at the moment return None
def __create_span(ann_obj, mods, type, offsets, txt_file_path, projectconf, attributes): # For event types, reuse trigger if a matching one exists. found = None if projectconf.is_event_type(type): for tb_ann in ann_obj.get_textbounds(): try: if (_offsets_equal(tb_ann.spans, offsets) and tb_ann.type == type): found = tb_ann break except AttributeError: # Not a trigger then pass if found is None: # Get a new ID new_id = ann_obj.get_new_id('T') # XXX: Cons # Get the text span with open_textfile(txt_file_path, 'r') as txt_file: text = txt_file.read() text_span = _text_for_offsets(text, offsets) # The below code resolves cases where there are newlines in the # offsets by creating discontinuous annotations for each span # separated by newlines. For most cases it preserves the offsets. seg_offsets = [] for o_start, o_end in offsets: pos = o_start for text_seg in text_span.split('\n'): if not text_seg and o_start != o_end: # Double new-line, skip ahead pos += 1 continue start = pos end = start + len(text_seg) # For the next iteration the position is after the newline. pos = end + 1 # Adjust the offsets to compensate for any potential leading # and trailing whitespace. start += len(text_seg) - len(text_seg.lstrip()) end -= len(text_seg) - len(text_seg.rstrip()) # If there is any segment left, add it to the offsets. if start != end: seg_offsets.append((start, end, )) # if we're dealing with a null-span if not seg_offsets: seg_offsets = offsets ann_text = DISCONT_SEP.join((text[start:end] for start, end in seg_offsets)) ann = TextBoundAnnotationWithText(seg_offsets, new_id, type, ann_text) ann_obj.add_annotation(ann) mods.addition(ann) else: ann = found if ann is not None: if projectconf.is_physical_entity_type(type): # TODO: alert that negation / speculation are ignored if set event = None else: # Create the event also new_event_id = ann_obj.get_new_id('E') # XXX: Cons event = EventAnnotation( ann.id, [], str(new_event_id), type, '') ann_obj.add_annotation(event) mods.addition(event) else: # We got a newline in the span, don't take any action event = None return ann, event
def __create_span(ann_obj, mods, type, offsets, txt_file_path, projectconf, attributes): # For event types, reuse trigger if a matching one exists. found = None if projectconf.is_event_type(type): for tb_ann in ann_obj.get_textbounds(): try: if (_offsets_equal(tb_ann.spans, offsets) and tb_ann.type == type): found = tb_ann break except AttributeError: # Not a trigger then pass if found is None: # Get a new ID new_id = ann_obj.get_new_id('T') #XXX: Cons # Get the text span with open_textfile(txt_file_path, 'r') as txt_file: # TODO discont: use offsets instead (note need for int conversion) text = _text_for_offsets(txt_file.read(), offsets) # The below code resolves cases where there are newlines in the # offsets by creating discontinuous annotations for each span # separated by newlines. For most cases it preserves the offsets. seg_offsets = [] for o_start, o_end in offsets: pos = o_start for text_seg in text.split('\n'): if not text_seg and o_start != o_end: # Double new-line, skip ahead pos += 1 continue end = pos + len(text_seg) seg_offsets.append((pos, end)) # Our current position is after the newline pos = end + 1 ann = TextBoundAnnotationWithText( seg_offsets, new_id, type, # Replace any newlines with the discontinuous separator MUL_NL_REGEX.sub(DISCONT_SEP, text)) ann_obj.add_annotation(ann) mods.addition(ann) else: ann = found if ann is not None: if projectconf.is_physical_entity_type(type): # TODO: alert that negation / speculation are ignored if set event = None else: # Create the event also new_event_id = ann_obj.get_new_id('E') #XXX: Cons event = EventAnnotation(ann.id, [], unicode(new_event_id), type, '') ann_obj.add_annotation(event) mods.addition(event) else: # We got a newline in the span, don't take any action event = None return ann, event