Esempio n. 1
0
def _parse_attributes(attributes):
    if attributes is None:
        _attributes = {}
    else:
        try:
            _attributes = json_loads(attributes)
        except ValueError:
            # Failed to parse, warn the client
            Messager.warning(
                ('Unable to parse attributes string "%s" for '
                 '"createSpan", ignoring attributes for request and '
                 'assuming no attributes set') %
                (attributes, ))
            _attributes = {}

        # XXX: Hack since the client is sending back False and True as values...
        # These are __not__ to be sent, they violate the protocol
        for _del in [k for k, v in list(_attributes.items()) if v == False]:
            del _attributes[_del]

        # These are to be old-style modifiers without values
        for _revalue in [k for k, v in list(_attributes.items()) if v]:
            _attributes[_revalue] = True
        ###
    return _attributes
Esempio n. 2
0
File: undo.py Progetto: a-tsioh/brat
def undo(collection, document, token):
    try:
        token = json_loads(token)
    except ValueError:
        raise CorruptUndoTokenError
    try:
        action = token['action']
    except KeyError:
        raise InvalidTokenError('action')

    if action == 'add_tb':
        # Undo an addition
        return delete_span(collection, document, token['id'])
    if action == 'mod_tb':
        # Undo a modification
        # TODO: We do not handle attributes and comments
        return create_span(
            collection,
            document,
            token['start'],
            token['end'],
            token['type'],
            id=token['id'],
            attributes=token['attributes'],
            comment=token['comment'] if 'comment' in token else None)
    else:
        raise NonUndoableActionError
    assert False, 'should have returned prior to this point'
Esempio n. 3
0
def undo(collection, document, token):
    try:
        token = json_loads(token)
    except ValueError:
        raise CorruptUndoTokenError
    try:
        action = token['action']
    except KeyError:
        raise InvalidTokenError('action')

    if action == 'add_tb':
        # Undo an addition
        return delete_span(collection, document, token['id'])
    if action == 'mod_tb':
        # Undo a modification
        # TODO: We do not handle attributes and comments
        return create_span(
            collection,
            document,
            token['start'],
            token['end'],
            token['type'],
            id=token['id'],
            attributes=token['attributes'],
            comment=token['comment'] if 'comment' in token else None)
    else:
        raise NonUndoableActionError
    assert False, 'should have returned prior to this point'
def _json_offsets_to_list(offsets):
    try:
        offsets = json_loads(offsets)
    except Exception:
        Messager.error('create_span: protocol argument error: expected offsets as JSON, but failed to parse "%s"' % str(offsets))
        raise ProtocolArgumentError
    try:
        offsets = [(int(s),int(e)) for s,e in offsets]
    except Exception:
        Messager.error('create_span: protocol argument error: expected offsets as list of int pairs, received "%s"' % str(offsets))
        raise ProtocolArgumentError
    return offsets
def _parse_span_normalizations(normalizations):
    if normalizations is None:
        _normalizations = {}
    else:
        try:
            _normalizations = json_loads(normalizations)
        except ValueError:
            # Failed to parse, warn the client
            Messager.warning((u'Unable to parse normalizations string "%s" for '
                    u'"createSpan", ignoring normalizations for request and '
                    u'assuming no normalizations set') % (normalizations, ))
            _normalizations = {}

    return _normalizations
Esempio n. 6
0
def _parse_span_normalizations(normalizations):
    if normalizations is None:
        _normalizations = {}
    else:
        try:
            _normalizations = json_loads(normalizations)
        except ValueError:
            # Failed to parse, warn the client
            Messager.warning((u'Unable to parse normalizations string "%s" for '
                    u'"createSpan", ignoring normalizations for request and '
                    u'assuming no normalizations set') % (normalizations, ))
            _normalizations = {}

    return _normalizations
Esempio n. 7
0
def _json_offsets_to_list(offsets):
    try:
        offsets = json_loads(offsets)
    except Exception:
        Messager.error(
            'create_span: protocol argument error: expected offsets as JSON, but failed to parse "%s"' %
            str(offsets))
        raise ProtocolArgumentError
    try:
        offsets = [(int(s), int(e)) for s, e in offsets]
    except Exception:
        Messager.error(
            'create_span: protocol argument error: expected offsets as list of int pairs, received "%s"' %
            str(offsets))
        raise ProtocolArgumentError
    return offsets
def split_span(collection, document, args, id):
    directory = collection

    real_dir = real_directory(directory)
    document = path_join(real_dir, document)
    # TODO don't know how to pass an array directly, so doing extra catenate and split
    tosplit_args = json_loads(args)
    
    with TextAnnotations(document) as ann_obj:
        # bail as quick as possible if read-only 
        if ann_obj._read_only:
            raise AnnotationsIsReadOnlyError(ann_obj.get_document())

        mods = ModificationTracker()
        
        ann = ann_obj.get_ann_by_id(id)

        # currently only allowing splits for events
        if not isinstance(ann, EventAnnotation):
            raise AnnotationSplitError("Cannot split an annotation of type %s" % ann.type)

        # group event arguments into ones that will be split on and
        # ones that will not, placing the former into a dict keyed by
        # the argument without trailing numbers (e.g. "Theme1" ->
        # "Theme") and the latter in a straight list.
        split_args = {}
        nonsplit_args = []
        import re
        for arg, aid in ann.args:
            m = re.match(r'^(.*?)\d*$', arg)
            if m:
                arg = m.group(1)
            if arg in tosplit_args:
                if arg not in split_args:
                    split_args[arg] = []
                split_args[arg].append(aid)
            else:
                nonsplit_args.append((arg, aid))

        # verify that split is possible
        for a in tosplit_args:
            acount = len(split_args.get(a,[]))
            if acount < 2:
                raise AnnotationSplitError("Cannot split %s on %s: only %d %s arguments (need two or more)" % (ann.id, a, acount, a))

        # create all combinations of the args on which to split
        argument_combos = [[]]
        for a in tosplit_args:
            new_combos = []
            for aid in split_args[a]:
                for c in argument_combos:
                    new_combos.append(c + [(a, aid)])
            argument_combos = new_combos

        # create the new events (first combo will use the existing event)
        from copy import deepcopy
        new_events = []
        for i, arg_combo in enumerate(argument_combos):
            # tweak args
            if i == 0:
                ann.args = nonsplit_args[:] + arg_combo
            else:
                newann = deepcopy(ann)
                newann.id = ann_obj.get_new_id("E") # TODO: avoid hard-coding ID prefix
                newann.args = nonsplit_args[:] + arg_combo
                ann_obj.add_annotation(newann)
                new_events.append(newann)
                mods.addition(newann)

        # then, go through all the annotations referencing the original
        # event, and create appropriate copies
        for a in ann_obj:
            soft_deps, hard_deps = a.get_deps()
            refs = soft_deps | hard_deps
            if ann.id in refs:
                # Referenced; make duplicates appropriately

                if isinstance(a, EventAnnotation):
                    # go through args and make copies for referencing
                    new_args = []
                    for arg, aid in a.args:
                        if aid == ann.id:
                            for newe in new_events:
                                new_args.append((arg, newe.id))
                    a.args.extend(new_args)

                elif isinstance(a, AttributeAnnotation):
                    for newe in new_events:
                        newmod = deepcopy(a)
                        newmod.target = newe.id
                        newmod.id = ann_obj.get_new_id("A") # TODO: avoid hard-coding ID prefix
                        ann_obj.add_annotation(newmod)
                        mods.addition(newmod)

                elif isinstance(a, BinaryRelationAnnotation):
                    # TODO
                    raise AnnotationSplitError("Cannot adjust annotation referencing split: not implemented for relations! (WARNING: annotations may be in inconsistent state, please reload!) (Please complain to the developers to fix this!)")

                elif isinstance(a, OnelineCommentAnnotation):
                    for newe in new_events:
                        newcomm = deepcopy(a)
                        newcomm.target = newe.id
                        newcomm.id = ann_obj.get_new_id("#") # TODO: avoid hard-coding ID prefix
                        ann_obj.add_annotation(newcomm)
                        mods.addition(newcomm)
                elif isinstance(a, NormalizationAnnotation):
                    for newe in new_events:
                        newnorm = deepcopy(a)
                        newnorm.target = newe.id
                        newnorm.id = ann_obj.get_new_id("N") # TODO: avoid hard-coding ID prefix
                        ann_obj.add_annotation(newnorm)
                        mods.addition(newnorm)
                else:
                    raise AnnotationSplitError("Cannot adjust annotation referencing split: not implemented for %s! (Please complain to the lazy developers to fix this!)" % a.__class__)

        mods_json = mods.json_response()
        mods_json['annotations'] = _json_from_ann(ann_obj)
        return mods_json
Esempio n. 9
0
def split_span(collection, document, args, id):
    directory = collection

    real_dir = real_directory(directory)
    document = path_join(real_dir, document)
    # TODO don't know how to pass an array directly, so doing extra catenate
    # and split
    tosplit_args = json_loads(args)

    with TextAnnotations(document) as ann_obj:
        # bail as quick as possible if read-only
        if ann_obj._read_only:
            raise AnnotationsIsReadOnlyError(ann_obj.get_document())

        mods = ModificationTracker()

        ann = ann_obj.get_ann_by_id(id)

        # currently only allowing splits for events
        if not isinstance(ann, EventAnnotation):
            raise AnnotationSplitError(
                "Cannot split an annotation of type %s" %
                ann.type)

        # group event arguments into ones that will be split on and
        # ones that will not, placing the former into a dict keyed by
        # the argument without trailing numbers (e.g. "Theme1" ->
        # "Theme") and the latter in a straight list.
        split_args = {}
        nonsplit_args = []
        import re
        for arg, aid in ann.args:
            m = re.match(r'^(.*?)\d*$', arg)
            if m:
                arg = m.group(1)
            if arg in tosplit_args:
                if arg not in split_args:
                    split_args[arg] = []
                split_args[arg].append(aid)
            else:
                nonsplit_args.append((arg, aid))

        # verify that split is possible
        for a in tosplit_args:
            acount = len(split_args.get(a, []))
            if acount < 2:
                raise AnnotationSplitError(
                    "Cannot split %s on %s: only %d %s arguments (need two or more)" %
                    (ann.id, a, acount, a))

        # create all combinations of the args on which to split
        argument_combos = [[]]
        for a in tosplit_args:
            new_combos = []
            for aid in split_args[a]:
                for c in argument_combos:
                    new_combos.append(c + [(a, aid)])
            argument_combos = new_combos

        # create the new events (first combo will use the existing event)
        from copy import deepcopy
        new_events = []
        for i, arg_combo in enumerate(argument_combos):
            # tweak args
            if i == 0:
                ann.args = nonsplit_args[:] + arg_combo
            else:
                newann = deepcopy(ann)
                # TODO: avoid hard-coding ID prefix
                newann.id = ann_obj.get_new_id("E")
                newann.args = nonsplit_args[:] + arg_combo
                ann_obj.add_annotation(newann)
                new_events.append(newann)
                mods.addition(newann)

        # then, go through all the annotations referencing the original
        # event, and create appropriate copies
        for a in ann_obj:
            soft_deps, hard_deps = a.get_deps()
            refs = soft_deps | hard_deps
            if ann.id in refs:
                # Referenced; make duplicates appropriately

                if isinstance(a, EventAnnotation):
                    # go through args and make copies for referencing
                    new_args = []
                    for arg, aid in a.args:
                        if aid == ann.id:
                            for newe in new_events:
                                new_args.append((arg, newe.id))
                    a.args.extend(new_args)

                elif isinstance(a, AttributeAnnotation):
                    for newe in new_events:
                        newmod = deepcopy(a)
                        newmod.target = newe.id
                        # TODO: avoid hard-coding ID prefix
                        newmod.id = ann_obj.get_new_id("A")
                        ann_obj.add_annotation(newmod)
                        mods.addition(newmod)

                elif isinstance(a, BinaryRelationAnnotation):
                    # TODO
                    raise AnnotationSplitError(
                        "Cannot adjust annotation referencing split: not implemented for relations! (WARNING: annotations may be in inconsistent state, please reload!) (Please complain to the developers to fix this!)")

                elif isinstance(a, OnelineCommentAnnotation):
                    for newe in new_events:
                        newcomm = deepcopy(a)
                        newcomm.target = newe.id
                        # TODO: avoid hard-coding ID prefix
                        newcomm.id = ann_obj.get_new_id("#")
                        ann_obj.add_annotation(newcomm)
                        mods.addition(newcomm)
                elif isinstance(a, NormalizationAnnotation):
                    for newe in new_events:
                        newnorm = deepcopy(a)
                        newnorm.target = newe.id
                        # TODO: avoid hard-coding ID prefix
                        newnorm.id = ann_obj.get_new_id("N")
                        ann_obj.add_annotation(newnorm)
                        mods.addition(newnorm)
                else:
                    raise AnnotationSplitError(
                        "Cannot adjust annotation referencing split: not implemented for %s! (Please complain to the lazy developers to fix this!)" %
                        a.__class__)

        mods_json = mods.json_response()
        mods_json['annotations'] = _json_from_ann(ann_obj)
        return mods_json
Esempio n. 10
0
def create_span(collection, document, start, end, type, attributes=None, id=None, comment=None):
    directory = collection

    if attributes is None:
        _attributes = {}
    else:
        # TODO: Catch parse failures here
        _attributes = json_loads(attributes)

        ### XXX: Hack since the client is sending back False and True as values...
        # These are __not__ to be sent, they violate the protocol
        for _del in [k for k, v in _attributes.items() if v == False]:
            del _attributes[_del]

        # These are to be old-style modifiers without values
        for _revalue in [k for k, v in _attributes.items() if v == True]:
            _attributes[_revalue] = True
        ###

    # log_info('ATTR: %s' %(_attributes, ))

    real_dir = real_directory(directory)
    document = path_join(real_dir, document)

    projectconf = ProjectConfiguration(real_dir)

    txt_file_path = document + "." + TEXT_FILE_SUFFIX

    working_directory = path_split(document)[0]

    with TextAnnotations(document) as ann_obj:
        mods = ModificationTracker()

        if id is not None:
            # We are to edit an existing annotation
            tb_ann, e_ann = _edit_span(ann_obj, mods, id, start, end, projectconf, _attributes, type)
        else:
            # We are to create a new annotation
            tb_ann, e_ann = _create_span(ann_obj, mods, type, start, end, txt_file_path, projectconf, _attributes)

        # Set annotation attributes
        if e_ann is not None:
            # Assign attributes to the event, not the trigger
            _set_attributes(ann_obj, e_ann, _attributes, mods)
        else:
            _set_attributes(ann_obj, tb_ann, _attributes, mods)

        # Handle annotation comments
        if tb_ann is not None:
            # If this is an event, we want to attach the comment to it
            if e_ann is not None:
                comment_on = e_ann
            else:
                comment_on = tb_ann

            # We are only interested in id;ed comments
            try:
                comment_on.id
                has_id = True
            except AttributeError:
                has_id = False

            if has_id:
                # Check if there is already an annotation comment
                for com_ann in ann_obj.get_oneline_comments():
                    if com_ann.type == "AnnotatorNotes" and com_ann.target == comment_on.id:
                        found = com_ann
                        break
                else:
                    found = None

                if comment:
                    if found is not None:
                        # Change the comment
                        # XXX: Note the ugly tab, it is for parsing the tail
                        before = unicode(found)
                        found.tail = "\t" + comment
                        mods.change(before, found)
                    else:
                        # Create a new comment
                        new_comment = OnelineCommentAnnotation(
                            comment_on.id,
                            ann_obj.get_new_id("#"),
                            # XXX: Note the ugly tab
                            "AnnotatorNotes",
                            "\t" + comment,
                        )
                        ann_obj.add_annotation(new_comment)
                        mods.addition(new_comment)
                else:
                    # We are to erase the annotation
                    if found is not None:
                        ann_obj.del_annotation(found)
                        mods.deletion(found)

        if tb_ann is not None:
            if DEBUG:
                mods_json = mods.json_response()
            else:
                mods_json = {}
        else:
            # Hack, we had a new-line in the span
            mods_json = {}
            Messager.error("Text span contained new-line, rejected", duration=3)

        # save a roundtrip and send the annotations also
        txt_file_path = document + "." + TEXT_FILE_SUFFIX
        j_dic = _json_from_ann_and_txt(ann_obj, txt_file_path)
        mods_json["annotations"] = j_dic

        return mods_json
Esempio n. 11
0
def _create_span(collection, document, start, end, _type, attributes=None,
        _id=None, comment=None):
    directory = collection
    undo_resp = {}

    if attributes is None:
        _attributes = {}
    else:
        try:
            _attributes =  json_loads(attributes)
        except ValueError:
            # Failed to parse attributes, warn the client
            Messager.warning((u'Unable to parse attributes string "%s" for '
                    u'"createSpan", ignoring attributes for request and '
                    u'assuming no attributes set') % (attributes, ))
            _attributes = {}

        ### XXX: Hack since the client is sending back False and True as values...
        # These are __not__ to be sent, they violate the protocol
        for _del in [k for k, v in _attributes.items() if v == False]:
            del _attributes[_del]

        # These are to be old-style modifiers without values
        for _revalue in [k for k, v in _attributes.items() if v == True]:
            _attributes[_revalue] = True
        ###

    #log_info('ATTR: %s' %(_attributes, ))

    real_dir = real_directory(directory)
    document = path_join(real_dir, document)

    projectconf = ProjectConfiguration(real_dir)

    txt_file_path = document + '.' + TEXT_FILE_SUFFIX

    working_directory = path_split(document)[0]

    with TextAnnotations(document) as ann_obj:
        # bail as quick as possible if read-only 
        if ann_obj._read_only:
            raise AnnotationsIsReadOnlyError(ann_obj.get_document())

        mods = ModificationTracker()

        if _id is not None:
            # We are to edit an existing annotation
            tb_ann, e_ann = _edit_span(ann_obj, mods, _id, start, end, projectconf,
                    _attributes, _type, undo_resp=undo_resp)
        else:
            # We are to create a new annotation
            tb_ann, e_ann = __create_span(ann_obj, mods, _type, start, end, txt_file_path,
                    projectconf, _attributes)

            undo_resp['action'] = 'add_tb'
            if e_ann is not None:
                undo_resp['id'] = e_ann.id
            else:
                undo_resp['id'] = tb_ann.id

        # Set annotation attributes
        if e_ann is not None:
            # Assign attributes to the event, not the trigger
            _set_attributes(ann_obj, e_ann, _attributes, mods,
                    undo_resp=undo_resp)
        else:
            _set_attributes(ann_obj, tb_ann, _attributes, mods,
                    undo_resp=undo_resp)

        # Handle annotation comments
        if tb_ann is not None:
            # If this is an event, we want to attach the comment to it
            if e_ann is not None:
                comment_on = e_ann
            else:
                comment_on = tb_ann

            # We are only interested in id;ed comments
            try:
                comment_on.id
                has_id = True
            except AttributeError:
                has_id = False

            if has_id:
                # Check if there is already an annotation comment
                for com_ann in ann_obj.get_oneline_comments():
                    if (com_ann.type == 'AnnotatorNotes'
                            and com_ann.target == comment_on.id):
                        found = com_ann

                        # Note the comment in the undo
                        undo_resp['comment'] = found.tail[1:]
                        break
                else:
                    found = None

                if comment:
                    if found is not None:
                        # Change the comment
                        # XXX: Note the ugly tab, it is for parsing the tail
                        before = unicode(found)
                        found.tail = u'\t' + comment
                        mods.change(before, found)
                    else:
                        # Create a new comment
                        new_comment = OnelineCommentAnnotation(
                                comment_on.id, ann_obj.get_new_id('#'),
                                # XXX: Note the ugly tab
                                u'AnnotatorNotes', u'\t' + comment)
                        ann_obj.add_annotation(new_comment)
                        mods.addition(new_comment)
                else:
                    # We are to erase the annotation
                    if found is not None:
                        ann_obj.del_annotation(found)
                        mods.deletion(found)

        if tb_ann is not None:
            mods_json = mods.json_response()
        else:
            # Hack, we had a new-line in the span
            mods_json = {}
            Messager.error('Text span contained new-line, rejected', duration=3)

        if undo_resp:
            mods_json['undo'] = json_dumps(undo_resp)
        mods_json['annotations'] = _json_from_ann(ann_obj)
        return mods_json