def _parse_attributes(attributes): if attributes is None: _attributes = {} else: try: _attributes = json_loads(attributes) except ValueError: # Failed to parse, warn the client Messager.warning( ('Unable to parse attributes string "%s" for ' '"createSpan", ignoring attributes for request and ' 'assuming no attributes set') % (attributes, )) _attributes = {} # XXX: Hack since the client is sending back False and True as values... # These are __not__ to be sent, they violate the protocol for _del in [k for k, v in list(_attributes.items()) if v == False]: del _attributes[_del] # These are to be old-style modifiers without values for _revalue in [k for k, v in list(_attributes.items()) if v]: _attributes[_revalue] = True ### return _attributes
def undo(collection, document, token): try: token = json_loads(token) except ValueError: raise CorruptUndoTokenError try: action = token['action'] except KeyError: raise InvalidTokenError('action') if action == 'add_tb': # Undo an addition return delete_span(collection, document, token['id']) if action == 'mod_tb': # Undo a modification # TODO: We do not handle attributes and comments return create_span( collection, document, token['start'], token['end'], token['type'], id=token['id'], attributes=token['attributes'], comment=token['comment'] if 'comment' in token else None) else: raise NonUndoableActionError assert False, 'should have returned prior to this point'
def _json_offsets_to_list(offsets): try: offsets = json_loads(offsets) except Exception: Messager.error('create_span: protocol argument error: expected offsets as JSON, but failed to parse "%s"' % str(offsets)) raise ProtocolArgumentError try: offsets = [(int(s),int(e)) for s,e in offsets] except Exception: Messager.error('create_span: protocol argument error: expected offsets as list of int pairs, received "%s"' % str(offsets)) raise ProtocolArgumentError return offsets
def _parse_span_normalizations(normalizations): if normalizations is None: _normalizations = {} else: try: _normalizations = json_loads(normalizations) except ValueError: # Failed to parse, warn the client Messager.warning((u'Unable to parse normalizations string "%s" for ' u'"createSpan", ignoring normalizations for request and ' u'assuming no normalizations set') % (normalizations, )) _normalizations = {} return _normalizations
def _json_offsets_to_list(offsets): try: offsets = json_loads(offsets) except Exception: Messager.error( 'create_span: protocol argument error: expected offsets as JSON, but failed to parse "%s"' % str(offsets)) raise ProtocolArgumentError try: offsets = [(int(s), int(e)) for s, e in offsets] except Exception: Messager.error( 'create_span: protocol argument error: expected offsets as list of int pairs, received "%s"' % str(offsets)) raise ProtocolArgumentError return offsets
def split_span(collection, document, args, id): directory = collection real_dir = real_directory(directory) document = path_join(real_dir, document) # TODO don't know how to pass an array directly, so doing extra catenate and split tosplit_args = json_loads(args) with TextAnnotations(document) as ann_obj: # bail as quick as possible if read-only if ann_obj._read_only: raise AnnotationsIsReadOnlyError(ann_obj.get_document()) mods = ModificationTracker() ann = ann_obj.get_ann_by_id(id) # currently only allowing splits for events if not isinstance(ann, EventAnnotation): raise AnnotationSplitError("Cannot split an annotation of type %s" % ann.type) # group event arguments into ones that will be split on and # ones that will not, placing the former into a dict keyed by # the argument without trailing numbers (e.g. "Theme1" -> # "Theme") and the latter in a straight list. split_args = {} nonsplit_args = [] import re for arg, aid in ann.args: m = re.match(r'^(.*?)\d*$', arg) if m: arg = m.group(1) if arg in tosplit_args: if arg not in split_args: split_args[arg] = [] split_args[arg].append(aid) else: nonsplit_args.append((arg, aid)) # verify that split is possible for a in tosplit_args: acount = len(split_args.get(a,[])) if acount < 2: raise AnnotationSplitError("Cannot split %s on %s: only %d %s arguments (need two or more)" % (ann.id, a, acount, a)) # create all combinations of the args on which to split argument_combos = [[]] for a in tosplit_args: new_combos = [] for aid in split_args[a]: for c in argument_combos: new_combos.append(c + [(a, aid)]) argument_combos = new_combos # create the new events (first combo will use the existing event) from copy import deepcopy new_events = [] for i, arg_combo in enumerate(argument_combos): # tweak args if i == 0: ann.args = nonsplit_args[:] + arg_combo else: newann = deepcopy(ann) newann.id = ann_obj.get_new_id("E") # TODO: avoid hard-coding ID prefix newann.args = nonsplit_args[:] + arg_combo ann_obj.add_annotation(newann) new_events.append(newann) mods.addition(newann) # then, go through all the annotations referencing the original # event, and create appropriate copies for a in ann_obj: soft_deps, hard_deps = a.get_deps() refs = soft_deps | hard_deps if ann.id in refs: # Referenced; make duplicates appropriately if isinstance(a, EventAnnotation): # go through args and make copies for referencing new_args = [] for arg, aid in a.args: if aid == ann.id: for newe in new_events: new_args.append((arg, newe.id)) a.args.extend(new_args) elif isinstance(a, AttributeAnnotation): for newe in new_events: newmod = deepcopy(a) newmod.target = newe.id newmod.id = ann_obj.get_new_id("A") # TODO: avoid hard-coding ID prefix ann_obj.add_annotation(newmod) mods.addition(newmod) elif isinstance(a, BinaryRelationAnnotation): # TODO raise AnnotationSplitError("Cannot adjust annotation referencing split: not implemented for relations! (WARNING: annotations may be in inconsistent state, please reload!) (Please complain to the developers to fix this!)") elif isinstance(a, OnelineCommentAnnotation): for newe in new_events: newcomm = deepcopy(a) newcomm.target = newe.id newcomm.id = ann_obj.get_new_id("#") # TODO: avoid hard-coding ID prefix ann_obj.add_annotation(newcomm) mods.addition(newcomm) elif isinstance(a, NormalizationAnnotation): for newe in new_events: newnorm = deepcopy(a) newnorm.target = newe.id newnorm.id = ann_obj.get_new_id("N") # TODO: avoid hard-coding ID prefix ann_obj.add_annotation(newnorm) mods.addition(newnorm) else: raise AnnotationSplitError("Cannot adjust annotation referencing split: not implemented for %s! (Please complain to the lazy developers to fix this!)" % a.__class__) mods_json = mods.json_response() mods_json['annotations'] = _json_from_ann(ann_obj) return mods_json
def split_span(collection, document, args, id): directory = collection real_dir = real_directory(directory) document = path_join(real_dir, document) # TODO don't know how to pass an array directly, so doing extra catenate # and split tosplit_args = json_loads(args) with TextAnnotations(document) as ann_obj: # bail as quick as possible if read-only if ann_obj._read_only: raise AnnotationsIsReadOnlyError(ann_obj.get_document()) mods = ModificationTracker() ann = ann_obj.get_ann_by_id(id) # currently only allowing splits for events if not isinstance(ann, EventAnnotation): raise AnnotationSplitError( "Cannot split an annotation of type %s" % ann.type) # group event arguments into ones that will be split on and # ones that will not, placing the former into a dict keyed by # the argument without trailing numbers (e.g. "Theme1" -> # "Theme") and the latter in a straight list. split_args = {} nonsplit_args = [] import re for arg, aid in ann.args: m = re.match(r'^(.*?)\d*$', arg) if m: arg = m.group(1) if arg in tosplit_args: if arg not in split_args: split_args[arg] = [] split_args[arg].append(aid) else: nonsplit_args.append((arg, aid)) # verify that split is possible for a in tosplit_args: acount = len(split_args.get(a, [])) if acount < 2: raise AnnotationSplitError( "Cannot split %s on %s: only %d %s arguments (need two or more)" % (ann.id, a, acount, a)) # create all combinations of the args on which to split argument_combos = [[]] for a in tosplit_args: new_combos = [] for aid in split_args[a]: for c in argument_combos: new_combos.append(c + [(a, aid)]) argument_combos = new_combos # create the new events (first combo will use the existing event) from copy import deepcopy new_events = [] for i, arg_combo in enumerate(argument_combos): # tweak args if i == 0: ann.args = nonsplit_args[:] + arg_combo else: newann = deepcopy(ann) # TODO: avoid hard-coding ID prefix newann.id = ann_obj.get_new_id("E") newann.args = nonsplit_args[:] + arg_combo ann_obj.add_annotation(newann) new_events.append(newann) mods.addition(newann) # then, go through all the annotations referencing the original # event, and create appropriate copies for a in ann_obj: soft_deps, hard_deps = a.get_deps() refs = soft_deps | hard_deps if ann.id in refs: # Referenced; make duplicates appropriately if isinstance(a, EventAnnotation): # go through args and make copies for referencing new_args = [] for arg, aid in a.args: if aid == ann.id: for newe in new_events: new_args.append((arg, newe.id)) a.args.extend(new_args) elif isinstance(a, AttributeAnnotation): for newe in new_events: newmod = deepcopy(a) newmod.target = newe.id # TODO: avoid hard-coding ID prefix newmod.id = ann_obj.get_new_id("A") ann_obj.add_annotation(newmod) mods.addition(newmod) elif isinstance(a, BinaryRelationAnnotation): # TODO raise AnnotationSplitError( "Cannot adjust annotation referencing split: not implemented for relations! (WARNING: annotations may be in inconsistent state, please reload!) (Please complain to the developers to fix this!)") elif isinstance(a, OnelineCommentAnnotation): for newe in new_events: newcomm = deepcopy(a) newcomm.target = newe.id # TODO: avoid hard-coding ID prefix newcomm.id = ann_obj.get_new_id("#") ann_obj.add_annotation(newcomm) mods.addition(newcomm) elif isinstance(a, NormalizationAnnotation): for newe in new_events: newnorm = deepcopy(a) newnorm.target = newe.id # TODO: avoid hard-coding ID prefix newnorm.id = ann_obj.get_new_id("N") ann_obj.add_annotation(newnorm) mods.addition(newnorm) else: raise AnnotationSplitError( "Cannot adjust annotation referencing split: not implemented for %s! (Please complain to the lazy developers to fix this!)" % a.__class__) mods_json = mods.json_response() mods_json['annotations'] = _json_from_ann(ann_obj) return mods_json
def create_span(collection, document, start, end, type, attributes=None, id=None, comment=None): directory = collection if attributes is None: _attributes = {} else: # TODO: Catch parse failures here _attributes = json_loads(attributes) ### XXX: Hack since the client is sending back False and True as values... # These are __not__ to be sent, they violate the protocol for _del in [k for k, v in _attributes.items() if v == False]: del _attributes[_del] # These are to be old-style modifiers without values for _revalue in [k for k, v in _attributes.items() if v == True]: _attributes[_revalue] = True ### # log_info('ATTR: %s' %(_attributes, )) real_dir = real_directory(directory) document = path_join(real_dir, document) projectconf = ProjectConfiguration(real_dir) txt_file_path = document + "." + TEXT_FILE_SUFFIX working_directory = path_split(document)[0] with TextAnnotations(document) as ann_obj: mods = ModificationTracker() if id is not None: # We are to edit an existing annotation tb_ann, e_ann = _edit_span(ann_obj, mods, id, start, end, projectconf, _attributes, type) else: # We are to create a new annotation tb_ann, e_ann = _create_span(ann_obj, mods, type, start, end, txt_file_path, projectconf, _attributes) # Set annotation attributes if e_ann is not None: # Assign attributes to the event, not the trigger _set_attributes(ann_obj, e_ann, _attributes, mods) else: _set_attributes(ann_obj, tb_ann, _attributes, mods) # Handle annotation comments if tb_ann is not None: # If this is an event, we want to attach the comment to it if e_ann is not None: comment_on = e_ann else: comment_on = tb_ann # We are only interested in id;ed comments try: comment_on.id has_id = True except AttributeError: has_id = False if has_id: # Check if there is already an annotation comment for com_ann in ann_obj.get_oneline_comments(): if com_ann.type == "AnnotatorNotes" and com_ann.target == comment_on.id: found = com_ann break else: found = None if comment: if found is not None: # Change the comment # XXX: Note the ugly tab, it is for parsing the tail before = unicode(found) found.tail = "\t" + comment mods.change(before, found) else: # Create a new comment new_comment = OnelineCommentAnnotation( comment_on.id, ann_obj.get_new_id("#"), # XXX: Note the ugly tab "AnnotatorNotes", "\t" + comment, ) ann_obj.add_annotation(new_comment) mods.addition(new_comment) else: # We are to erase the annotation if found is not None: ann_obj.del_annotation(found) mods.deletion(found) if tb_ann is not None: if DEBUG: mods_json = mods.json_response() else: mods_json = {} else: # Hack, we had a new-line in the span mods_json = {} Messager.error("Text span contained new-line, rejected", duration=3) # save a roundtrip and send the annotations also txt_file_path = document + "." + TEXT_FILE_SUFFIX j_dic = _json_from_ann_and_txt(ann_obj, txt_file_path) mods_json["annotations"] = j_dic return mods_json
def _create_span(collection, document, start, end, _type, attributes=None, _id=None, comment=None): directory = collection undo_resp = {} if attributes is None: _attributes = {} else: try: _attributes = json_loads(attributes) except ValueError: # Failed to parse attributes, warn the client Messager.warning((u'Unable to parse attributes string "%s" for ' u'"createSpan", ignoring attributes for request and ' u'assuming no attributes set') % (attributes, )) _attributes = {} ### XXX: Hack since the client is sending back False and True as values... # These are __not__ to be sent, they violate the protocol for _del in [k for k, v in _attributes.items() if v == False]: del _attributes[_del] # These are to be old-style modifiers without values for _revalue in [k for k, v in _attributes.items() if v == True]: _attributes[_revalue] = True ### #log_info('ATTR: %s' %(_attributes, )) real_dir = real_directory(directory) document = path_join(real_dir, document) projectconf = ProjectConfiguration(real_dir) txt_file_path = document + '.' + TEXT_FILE_SUFFIX working_directory = path_split(document)[0] with TextAnnotations(document) as ann_obj: # bail as quick as possible if read-only if ann_obj._read_only: raise AnnotationsIsReadOnlyError(ann_obj.get_document()) mods = ModificationTracker() if _id is not None: # We are to edit an existing annotation tb_ann, e_ann = _edit_span(ann_obj, mods, _id, start, end, projectconf, _attributes, _type, undo_resp=undo_resp) else: # We are to create a new annotation tb_ann, e_ann = __create_span(ann_obj, mods, _type, start, end, txt_file_path, projectconf, _attributes) undo_resp['action'] = 'add_tb' if e_ann is not None: undo_resp['id'] = e_ann.id else: undo_resp['id'] = tb_ann.id # Set annotation attributes if e_ann is not None: # Assign attributes to the event, not the trigger _set_attributes(ann_obj, e_ann, _attributes, mods, undo_resp=undo_resp) else: _set_attributes(ann_obj, tb_ann, _attributes, mods, undo_resp=undo_resp) # Handle annotation comments if tb_ann is not None: # If this is an event, we want to attach the comment to it if e_ann is not None: comment_on = e_ann else: comment_on = tb_ann # We are only interested in id;ed comments try: comment_on.id has_id = True except AttributeError: has_id = False if has_id: # Check if there is already an annotation comment for com_ann in ann_obj.get_oneline_comments(): if (com_ann.type == 'AnnotatorNotes' and com_ann.target == comment_on.id): found = com_ann # Note the comment in the undo undo_resp['comment'] = found.tail[1:] break else: found = None if comment: if found is not None: # Change the comment # XXX: Note the ugly tab, it is for parsing the tail before = unicode(found) found.tail = u'\t' + comment mods.change(before, found) else: # Create a new comment new_comment = OnelineCommentAnnotation( comment_on.id, ann_obj.get_new_id('#'), # XXX: Note the ugly tab u'AnnotatorNotes', u'\t' + comment) ann_obj.add_annotation(new_comment) mods.addition(new_comment) else: # We are to erase the annotation if found is not None: ann_obj.del_annotation(found) mods.deletion(found) if tb_ann is not None: mods_json = mods.json_response() else: # Hack, we had a new-line in the span mods_json = {} Messager.error('Text span contained new-line, rejected', duration=3) if undo_resp: mods_json['undo'] = json_dumps(undo_resp) mods_json['annotations'] = _json_from_ann(ann_obj) return mods_json