Ejemplo n.º 1
0
def _get_db_path(database, collection):
    if collection is None:
        # TODO: default to WORK_DIR config?
        return (None, Simstring.DEFAULT_UNICODE)
    else:
        conf_dir = real_directory(collection)
        projectconf = ProjectConfiguration(conf_dir)
        norm_conf = projectconf.get_normalization_config()
        try:
            conf_dir = real_directory(collection)
            projectconf = ProjectConfiguration(conf_dir)
            norm_conf = projectconf.get_normalization_config()
            for entry in norm_conf:
                # TODO THIS IS WRONG
                dbname, dbpath, dbunicode = entry[0], entry[3], entry[4]
                if dbname == database:
                    return (dbpath, dbunicode)
            # not found in config.
            Messager.warning('DB ' + database + ' not defined in config for ' +
                             collection + ', falling back on default.')
            return (None, Simstring.DEFAULT_UNICODE)
        except Exception:
            # whatever goes wrong, just warn and fall back on the default.
            Messager.warning('Failed to get DB path from config for ' +
                             collection + ', falling back on default.')
            return (None, Simstring.DEFAULT_UNICODE)
Ejemplo n.º 2
0
Archivo: tag.py Proyecto: ingridan/brat
def tag(collection, document, tagger):
    pconf = ProjectConfiguration(real_directory(collection))
    for tagger_token, _, _, tagger_service_url in pconf.get_annotator_config():
        if tagger == tagger_token:
            break
    else:
        raise UnknownTaggerError(tagger)

    doc_path = path_join(real_directory(collection), document)

    with TextAnnotations(path_join(real_directory(collection),
                                   document)) as ann_obj:

        url_soup = urlparse(tagger_service_url)

        if url_soup.scheme == 'http':
            Connection = HTTPConnection
        elif url_soup.scheme == 'https':
            # Delayed HTTPS import since it relies on SSL which is commonly
            #   missing if you roll your own Python, for once we should not
            #   fail early since tagging is currently an edge case and we
            #   can't allow it to bring down the whole server.
            from httplib import HTTPSConnection
            Connection = HTTPSConnection
        else:
            raise InvalidConnectionSchemeError(tagger_token, url_soup.scheme)

        conn = None
        try:
            conn = Connection(url_soup.netloc)
            req_headers = {
                'Content-type': 'text/plain; charset=utf-8',
                'Accept': 'application/json',
            }
            # Build a new service URL since the request method doesn't accept
            #   a parameters argument
            service_url = url_soup.path + ('?' + url_soup.query
                                           if url_soup.query else '')
            try:
                data = ann_obj.get_document_text().encode('utf-8')
                req_headers['Content-length'] = len(data)
                # Note: Trout slapping for anyone sending Unicode objects here
                conn.request(
                    'POST',
                    # As per: http://bugs.python.org/issue11898
                    # Force the url to be an ascii string
                    str(url_soup.path),
                    data,
                    headers=req_headers)
            except SocketError, e:
                raise TaggerConnectionError(tagger_token, e)
            resp = conn.getresponse()

            # Did the request succeed?
            if resp.status != 200:
                raise TaggerConnectionError(
                    tagger_token, '%s %s' % (resp.status, resp.reason))
            # Finally, we can read the response data
            resp_data = resp.read()
        finally:
Ejemplo n.º 3
0
def tag(collection, document, tagger):
    pconf = ProjectConfiguration(real_directory(collection))
    for tagger_token, _, _, tagger_service_url in pconf.get_annotator_config():
        if tagger == tagger_token:
            break
    else:
        raise UnknownTaggerError(tagger)

    doc_path = path_join(real_directory(collection), document)

    with TextAnnotations(path_join(real_directory(collection),
            document)) as ann_obj:

        url_soup = urlparse(tagger_service_url)

        if url_soup.scheme == 'http':
            Connection = HTTPConnection
        elif url_soup.scheme == 'https':
            # Delayed HTTPS import since it relies on SSL which is commonly
            #   missing if you roll your own Python, for once we should not
            #   fail early since tagging is currently an edge case and we
            #   can't allow it to bring down the whole server.
            from httplib import HTTPSConnection
            Connection = HTTPSConnection
        else:
            raise InvalidConnectionSchemeError(tagger_token, url_soup.scheme)

        conn = None
        try:
            conn = Connection(url_soup.netloc)
            req_headers = {
                    'Content-type': 'text/plain; charset=utf-8',
                    'Accept': 'application/json',
                    }
            # Build a new service URL since the request method doesn't accept
            #   a parameters argument
            service_url = url_soup.path + (
                    '?' + url_soup.query if url_soup.query else '')
            try:
                data = ann_obj.get_document_text().encode('utf-8')
                req_headers['Content-length'] = len(data)
                # Note: Trout slapping for anyone sending Unicode objects here
                conn.request('POST',
                        # As per: http://bugs.python.org/issue11898
                        # Force the url to be an ascii string
                        str(url_soup.path),
                        data,
                        headers=req_headers)
            except SocketError, e:
                raise TaggerConnectionError(tagger_token, e)
            resp = conn.getresponse()

            # Did the request succeed?
            if resp.status != 200:
                raise TaggerConnectionError(tagger_token,
                        '%s %s' % (resp.status, resp.reason))
            # Finally, we can read the response data
            resp_data = resp.read()
        finally:
Ejemplo n.º 4
0
def tag(collection, document, tagger):
    pconf = ProjectConfiguration(real_directory(collection))
    for tagger_token, _, _, tagger_service_url in pconf.get_annotator_config():
        if tagger == tagger_token:
            break
    else:
        raise UnknownTaggerError(tagger)

    doc_path = path_join(real_directory(collection), document)

    with TextAnnotations(path_join(real_directory(collection),
                                   document)) as ann_obj:

        url_soup = urlparse(tagger_service_url)

        if url_soup.scheme == 'http':
            Connection = HTTPConnection
        elif url_soup.scheme == 'https':
            Connection = HTTPSConnection
        else:
            raise InvalidConnectionSchemeError(tagger_token, url_soup.scheme)

        conn = None
        try:
            conn = Connection(url_soup.netloc)
            req_headers = {
                'Content-type': 'text/plain; charset=utf-8',
                'Accept': 'application/json',
            }
            # Build a new service URL since the request method doesn't accept
            #   a parameters argument
            service_url = url_soup.path + ('?' + url_soup.query
                                           if url_soup.query else '')
            try:
                conn.request(
                    'POST',
                    url_soup.path,
                    # The document text as body
                    ann_obj.get_document_text().encode('utf8'),
                    headers=req_headers)
            except SocketError, e:
                raise TaggerConnectionError(tagger_token, e)
            resp = conn.getresponse()

            # Did the request succeed?
            if resp.status != 200:
                raise TaggerConnectionError(
                    tagger_token, '%s %s' % (resp.status, resp.reason))
        finally:
Ejemplo n.º 5
0
def getOcrFileTypeDetails(collection, document):
    path = path_join(real_directory(collection), 'qafunnelocrfiledetails')

    returnMap = {"status": False}

    for line in fileinput.input(path, inplace=False):
        loaded_r = json.loads(line)
        docname = str(loaded_r['document'])

        if docname == document:
            returnMap = {
                "status":
                True,
                "document":
                document,
                "ocrOutputResult":
                loaded_r['ocrOutputResult'],
                "identificationOutputResult":
                loaded_r['identificationOutputResult'],
                "identificationBoundaryOutputResult":
                loaded_r['identificationBoundaryOutputResult'],
                "extractionOutputResult":
                loaded_r['extractionOutputResult'],
                "fileType":
                loaded_r['fileType'],
                "lossType":
                loaded_r['lossType'],
                "comments":
                loaded_r['comments']
            }

    #fileinput.close()
    return returnMap
Ejemplo n.º 6
0
def create_comment(collection, document, id, comment=None):
    directory = collection
    undo_resp = {}

    real_dir = real_directory(directory)
    document = path_join(real_dir, document)

    projectconf = ProjectConfiguration(real_dir)

    txt_file_path = document + '.' + TEXT_FILE_SUFFIX

    # XXX what is this doing here?
    # path_split(document)[0]

    with TextAnnotations(document) as ann_obj:
        # bail as quick as possible if read-only
        if ann_obj._read_only:
            raise AnnotationsIsReadOnlyError(ann_obj.get_document())

        mods = ModificationTracker()

        _set_special_comments(ann_obj, id, comment, mods, undo_resp=undo_resp)

        mods_json = mods.json_response()
        if undo_resp:
            mods_json['undo'] = json_dumps(undo_resp)
        mods_json['annotations'] = _json_from_ann(ann_obj)
        return mods_json
Ejemplo n.º 7
0
def download_collection(collection, exclude_configs=False):
    directory = collection
    real_dir = real_directory(directory)
    dir_name = basename(dirname(real_dir))
    fname = '%s.%s' % (dir_name, 'tar.gz')

    tmp_file_path = None
    try:
        tmp_file_fh, tmp_file_path = mkstemp()
        os_close(tmp_file_fh)

        tar_cmd_split = ['tar', '--exclude=.stats_cache']
        if exclude_configs:
            tar_cmd_split.extend(['--exclude=annotation.conf',
                                  '--exclude=visual.conf',
                                  '--exclude=tools.conf',
                                  '--exclude=kb_shortcuts.conf'])
        tar_cmd_split.extend(['-c', '-z', '-f', tmp_file_path, dir_name])
        tar_p = Popen(tar_cmd_split, cwd=path_join(real_dir, '..'))
        tar_p.wait()

        hdrs = [('Content-Type', 'application/octet-stream'), #'application/x-tgz'),
                ('Content-Disposition', 'inline; filename=%s' % fname)]
        with open(tmp_file_path, 'rb') as tmp_file:
            tar_data = tmp_file.read()

        raise NoPrintJSONError(hdrs, tar_data)
    finally:
        if tmp_file_path is not None:
            remove(tmp_file_path)
Ejemplo n.º 8
0
def input_text(path,doc,_id,text,start_list, current_list=None):
    try:
        from os.path import join as path_join
        from document import real_directory
        real_dir = real_directory(path)
    except:
        real_dir=path
    proj = ProjectConfiguration(real_dir)
    txt_lvl = TextAnnotations(proj)
    answerlist = txt_lvl.startlists[start_list].start
    with getAnnObject(path, doc) as ann:
        ann_txtLvls = ann.get_textLevels()
        annotation = None
        for i in ann_txtLvls:
            if i.type == answerlist.name:
                annotation = i
        if annotation:
            txt_lvl.set_ann(ann_txtLvls)
        else:
            ann_id = ann.get_new_id('F')
            ann.add_annotation(TextLevelAnnotation(ann_id, answerlist.name,[]))
            annotation = ann.get_ann_by_id(ann_id)
            #~ ann_txtLvls = ann.get_textLevels()
        #~ if annotation.tail:
            #~ annotation.tail += ";"
        txt_lvl.startlists[start_list].currentList.set_input(text)
        txt_lvl.startlists[start_list].currentList = 'stop'
        if not current_list in txt_lvl.startlists[start_list].followed_path:
            txt_lvl.startlists[start_list].followed_path.append(current_list)
        update_annotations(ann,annotation, txt_lvl.startlists[start_list])
        #~ annotation.tail += text
        #~ annotation.ids.append(_id)
    return {'stop':True, 'annotation':str(annotation),}
Ejemplo n.º 9
0
def possible_arc_types(collection, origin_type, target_type):
    directory = collection

    real_dir = real_directory(directory)
    projectconf = ProjectConfiguration(real_dir)
    response = {}

    try:
        possible = projectconf.arc_types_from_to(origin_type, target_type)

        # TODO: proper error handling
        if possible is None:
            Messager.error('Error selecting arc types!', -1)
        elif possible == []:
            # nothing to select
            response['html'] = generate_empty_fieldset()
            response['keymap'] = {}
            response['empty'] = True
        else:
            # XXX TODO: intentionally breaking this; KB shortcuts
            # should no longer be sent here. Remove 'keymap' and
            # 'html' args once clientside generation done.
            arc_kb_shortcuts = {} #select_keyboard_shortcuts(possible)

            response['keymap'] = {}
            for k, p in arc_kb_shortcuts.items():
                response['keymap'][k] = "arc_"+p

            response['html']  = generate_arc_type_html(projectconf, possible, arc_kb_shortcuts)
    except:
        Messager.error('Error selecting arc types!', -1)
        raise

    return response
Ejemplo n.º 10
0
def unselect(path,doc,start_list, current_id):
    try:
        from os.path import join as path_join
        from document import real_directory
        real_dir = real_directory(path)
    except:
        real_dir=path
    from message import Messager
    with getAnnObject(path, doc) as ann:
        proj = ProjectConfiguration(real_dir)
        ann_txtLvls = ann.get_textLevels()
        if not ann_txtLvls:
            return get_startlist(path,doc)
        txt_lvl = TextAnnotations(proj,ann_txtLvls)
        response_list = txt_lvl.unselect(start_list,current_id)
        #answerlist = txt_lvl.startlists[start_list].start
        #~ for i in ann_txtLvls:
                #~ if i.type == answerlist.name:
                    #~ update_annotations(ann,i, txt_lvl.startlists[start_list])
                    #~ break
        if response_list:
            response = list_to_dict(response_list)
        else:
            response = get_startlist(path,doc)
        return response
Ejemplo n.º 11
0
def _save_svg(collection, document, svg):
    svg_path = _svg_path()

    with open_textfile(svg_path, 'w') as svg_file:
        svg_hdr = ('<?xml version="1.0" encoding="UTF-8" standalone="no"?>'
                '<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" '
                '"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">')
        defs = svg.find('</defs>')

        with open_textfile(CSS_PATH, 'r') as css_file:
            css = css_file.read()

        if defs != -1:
            css = '<style type="text/css"><![CDATA[' + css + ']]></style>'
            font_data = []
            for font_path in SVG_FONTS:
                with open_textfile(font_path, 'r') as font_file:
                    font_data.append(font_file.read().strip())
            fonts = '\n'.join(font_data)
            svg = (svg_hdr + '\n' + svg[:defs] + '\n' + fonts + '\n' + css
                    + '\n' + svg[defs:])
            svg_file.write(svg)

            # Create a copy in the svg store?
            if SVG_STORE:
                real_dir = real_directory(collection, rel_to=SVG_STORE_DIR)
                if not exists(real_dir):
                    makedirs(real_dir)
                svg_store_path = path_join(real_dir, document + '.svg')
                with open_textfile(svg_store_path, 'w') as svg_store_file:
                    svg_store_file.write(svg)

        else:
            # TODO: @amadanmath: When does this actually happen?
            raise CorruptSVGError
def reverse_arc(collection, document, origin, target, type, attributes=None):
    directory = collection
    #undo_resp = {} # TODO
    real_dir = real_directory(directory)
    #mods = ModificationTracker() # TODO
    projectconf = ProjectConfiguration(real_dir)
    document = path_join(real_dir, document)
    with TextAnnotations(document) as ann_obj:
        # bail as quick as possible if read-only 
        if ann_obj._read_only:
            raise AnnotationsIsReadOnlyError(ann_obj.get_document())

        if projectconf.is_equiv_type(type):
            Messager.warning('Cannot reverse Equiv arc')
        elif not projectconf.is_relation_type(type):
            Messager.warning('Can only reverse configured binary relations')
        else:
            # OK to reverse
            found = None
            # TODO: more sensible lookup
            for ann in ann_obj.get_relations():
                if (ann.arg1 == origin and ann.arg2 == target and
                    ann.type == type):
                    found = ann
                    break
            if found is None:
                Messager.error('reverse_arc: failed to identify target relation (from %s to %s, type %s) (deleted?)' % (str(origin), str(target), str(type)))
            else:
                # found it; just adjust this
                found.arg1, found.arg2 = found.arg2, found.arg1
                # TODO: modification tracker

        json_response = {}
        json_response['annotations'] = _json_from_ann(ann_obj)
        return json_response
Ejemplo n.º 13
0
def possible_arc_types(collection, origin_type, target_type):
    directory = collection

    real_dir = real_directory(directory)
    projectconf = ProjectConfiguration(real_dir)
    response = {}

    try:
        possible = projectconf.arc_types_from_to(origin_type, target_type)

        # TODO: proper error handling
        if possible is None:
            Messager.error('Error selecting arc types!', -1)
        elif possible == []:
            # nothing to select
            response['html'] = generate_empty_fieldset()
            response['keymap'] = {}
            response['empty'] = True
        else:
            # XXX TODO: intentionally breaking this; KB shortcuts
            # should no longer be sent here. Remove 'keymap' and
            # 'html' args once clientside generation done.
            arc_kb_shortcuts = {} #select_keyboard_shortcuts(possible)

            response['keymap'] = {}
            for k, p in arc_kb_shortcuts.items():
                response['keymap'][k] = "arc_"+p

            response['html']  = generate_arc_type_html(projectconf, possible, arc_kb_shortcuts)
    except:
        Messager.error('Error selecting arc types!', -1)
        raise

    return response
Ejemplo n.º 14
0
def download_file(document, collection, extension):
    directory = collection
    real_dir = real_directory(directory)
    fname = '%s.%s' % (document, extension)
    fpath = path_join(real_dir, fname)
    #hdrs = [('Content-Type', 'text/plain; charset=utf-8'), ('Content-Disposition', 'inline; filename=%s' % fname)]
    hdrs = [('Content-Type', 'application/octet-stream'), ('Content-Disposition', 'inline; filename=%s' % fname)]
    if allowed_to_read(fpath):
        if not exists(fpath):
            data = ""
            if extension == "zip":
                import zipfile
                zipf = zipfile.ZipFile(fpath, 'w')
                zipf.close()
                with open(fpath, 'rb') as txt_file:
                    data = txt_file.read()
        else:
            if extension != "zip":
                with open_textfile(fpath, 'r') as txt_file:
                    data = txt_file.read().encode('utf-8')
            else:
                with open(fpath, 'rb') as txt_file:
                    data = txt_file.read()
    else:
        data = "Access Denied"
    raise NoPrintJSONError(hdrs, data)
Ejemplo n.º 15
0
def tag(collection, document, tagger):
    pconf = ProjectConfiguration(real_directory(collection))
    for tagger_token, _, _, tagger_service_url in pconf.get_annotator_config():
        if tagger == tagger_token:
            break
    else:
        raise UnknownTaggerError(tagger)

    doc_path = path_join(real_directory(collection), document)

    with TextAnnotations(path_join(real_directory(collection),
            document)) as ann_obj:

        url_soup = urlparse(tagger_service_url)

        if url_soup.scheme == 'http':
            Connection = HTTPConnection
        elif url_soup.scheme == 'https':
            Connection = HTTPSConnection
        else:
            raise InvalidConnectionSchemeError(tagger_token, url_soup.scheme)

        conn = None
        try:
            conn = Connection(url_soup.netloc)
            req_headers = {
                    'Content-type': 'text/plain; charset=utf-8',
                    'Accept': 'application/json',
                    }
            # Build a new service URL since the request method doesn't accept
            #   a parameters argument
            service_url = url_soup.path + (
                    '?' + url_soup.query if url_soup.query else '')
            try:
                conn.request('POST', url_soup.path,
                        # The document text as body
                        ann_obj.get_document_text().encode('utf8'),
                        headers=req_headers)
            except SocketError, e:
                raise TaggerConnectionError(tagger_token, e)
            resp = conn.getresponse()

            # Did the request succeed?
            if resp.status != 200:
                raise TaggerConnectionError(tagger_token,
                        '%s %s' % (resp.status, resp.reason))
        finally:
Ejemplo n.º 16
0
def getAnnObject2(collection,document):
    '''newest version of the getAnnObject methode'''
    try:
        from os.path import join as path_join
        from document import real_directory
        real_dir = real_directory(collection)
    except:
        real_dir=collection      
    app_path = WORK_DIR + "/application/"
    ann = None
    full_name = collection + document
    full_name = full_name.replace("/","")
    if( isfile(app_path+full_name)):
        temp=open (app_path+full_name , 'rb')
        ann = pickle_load(temp)
        temp.close()
    else:
        ann = TextAnnotations(real_dir+document)
        ann = SimpleAnnotations(ann)
        ann.folia = {}
        try:
            #TODO:good error message
            ann.folia=get_extra_info(collection,document)
        except Exception as e:
            ann.folia = {}
            Messager.error('Error: get extra folia info() failed: %s' % e)
    #Validation:
    try:
        import os
        import simplejson as json
        import session
        docdir = os.path.dirname(ann._document)
        string = session.load_conf()["config"]
        val = json.loads(string)["validationOn"]
        #validate if config enables it and if it's not already done.
        if val:
            if not ann.validated:    
                from verify_annotations import verify_annotation
                projectconf = ProjectConfiguration(docdir)
                issues = []
                issues = verify_annotation(ann, projectconf)
            else:
                issues = ann.issues
        else:
            ann.validated = False
            issues = []
    except session.NoSessionError:
        issues = []
    except KeyError:
        issues = []
    except Exception as e:
        # TODO add an issue about the failure?
        issues = []
    ann.issues = issues
    temp=open (app_path+full_name , 'wb')    
    pickle_dump(ann, temp)
    temp.close()
    return ann
Ejemplo n.º 17
0
def download_collection(collection, include_conf=False):
    directory = collection
    real_dir = real_directory(directory)
    dir_name = basename(dirname(real_dir))
    fname = '%s.%s' % (dir_name, 'tar.gz')

    confs = [
        'annotation.conf', 'visual.conf', 'tools.conf', 'kb_shortcuts.conf'
    ]

    try:
        include_conf = int(include_conf)
    except ValueError:
        pass

    tmp_file_path = None
    try:
        tmp_file_fh, tmp_file_path = mkstemp()
        os_close(tmp_file_fh)

        tar_cmd_split = ['tar', '--exclude=.stats_cache']
        conf_names = []
        if not include_conf:
            tar_cmd_split.extend(['--exclude=%s' % c for c in confs])
        else:
            # also include configs from parent directories.
            for cname in confs:
                cdir, depth = find_in_directory_tree(real_dir, cname)
                if depth is not None and depth > 0:
                    relpath = path_join(dir_name,
                                        *['..' for _ in range(depth)])
                    conf_names.append(path_join(relpath, cname))
            if conf_names:
                # replace pathname components ending in ".." with target
                # directory name so that .confs in parent directories appear
                # in the target directory in the tar.
                tar_cmd_split.extend([
                    '--absolute-names', '--transform',
                    's|.*\\.\\.|%s|' % dir_name
                ])

        tar_cmd_split.extend(['-c', '-z', '-f', tmp_file_path, dir_name])
        tar_cmd_split.extend(conf_names)
        tar_p = Popen(tar_cmd_split, cwd=path_join(real_dir, '..'))
        tar_p.wait()

        hdrs = [
            ('Content-Type',
             'application/octet-stream'),  #'application/x-tgz'),
            ('Content-Disposition', 'inline; filename=%s' % fname)
        ]
        with open(tmp_file_path, 'rb') as tmp_file:
            tar_data = tmp_file.read()

        raise NoPrintJSONError(hdrs, tar_data)
    finally:
        if tmp_file_path is not None:
            remove(tmp_file_path)
Ejemplo n.º 18
0
def create_arc(collection, document, origin, target, type, attributes=None,
               old_type=None, old_target=None, comment=None):
    directory = collection
    undo_resp = {}

    real_dir = real_directory(directory)

    mods = ModificationTracker()

    projectconf = ProjectConfiguration(real_dir)

    document = path_join(real_dir, document)

    with TextAnnotations(document) as ann_obj:
        # bail as quick as possible if read-only
        # TODO: make consistent across the different editing
        # functions, integrate ann_obj initialization and checks
        if ann_obj._read_only:
            raise AnnotationsIsReadOnlyError(ann_obj.get_document())

        origin = ann_obj.get_ann_by_id(origin)
        target = ann_obj.get_ann_by_id(target)

        # if there is a previous annotation and the arcs aren't in
        # the same category (e.g. relation vs. event arg), process
        # as delete + create instead of update.
        if old_type is not None and (
                projectconf.is_relation_type(old_type) !=
                projectconf.is_relation_type(type) or
                projectconf.is_equiv_type(old_type) !=
                projectconf.is_equiv_type(type)):
            _delete_arc_with_ann(origin.id, old_target, old_type, mods,
                                 ann_obj, projectconf)
            old_target, old_type = None, None

        if projectconf.is_equiv_type(type):
            ann = _create_equiv(ann_obj, projectconf, mods, origin, target,
                                type, attributes, old_type, old_target)

        elif projectconf.is_relation_type(type):
            ann = _create_relation(ann_obj, projectconf, mods, origin, target,
                                   type, attributes, old_type, old_target)
        else:
            ann = _create_argument(ann_obj, projectconf, mods, origin, target,
                                   type, attributes, old_type, old_target)

        # process comments
        if ann is not None:
            _set_comments(ann_obj, ann, comment, mods,
                          undo_resp=undo_resp)
        elif comment is not None:
            Messager.warning(
                'create_arc: non-empty comment for None annotation (unsupported type for comment?)')

        mods_json = mods.json_response()
        mods_json['annotations'] = _json_from_ann(ann_obj)
        return mods_json
Ejemplo n.º 19
0
def create_arc(collection, document, origin, target, type, attributes=None,
        old_type=None, old_target=None, comment=None):
    directory = collection
    undo_resp = {}

    real_dir = real_directory(directory)

    mods = ModificationTracker()

    projectconf = ProjectConfiguration(real_dir)

    document = path_join(real_dir, document)

    with TextAnnotations(document) as ann_obj:
        # bail as quick as possible if read-only 
        # TODO: make consistent across the different editing
        # functions, integrate ann_obj initialization and checks
        if ann_obj._read_only:
            raise AnnotationsIsReadOnlyError(ann_obj.get_document())

        origin = ann_obj.get_ann_by_id(origin) 
        target = ann_obj.get_ann_by_id(target)

        # if there is a previous annotation and the arcs aren't in
        # the same category (e.g. relation vs. event arg), process
        # as delete + create instead of update.
        if old_type is not None and (
            projectconf.is_relation_type(old_type) != 
            projectconf.is_relation_type(type) or
            projectconf.is_equiv_type(old_type) !=
            projectconf.is_equiv_type(type)):
            _delete_arc_with_ann(origin.id, old_target, old_type, mods, 
                                 ann_obj, projectconf)
            old_target, old_type = None, None

        if projectconf.is_equiv_type(type):
            ann =_create_equiv(ann_obj, projectconf, mods, origin, target, 
                               type, attributes, old_type, old_target)

        elif projectconf.is_relation_type(type):
            ann = _create_relation(ann_obj, projectconf, mods, origin, target, 
                                   type, attributes, old_type, old_target)
        else:
            ann = _create_argument(ann_obj, projectconf, mods, origin, target,
                                   type, attributes, old_type, old_target)

        # process comments
        if ann is not None:
            _set_comments(ann_obj, ann, comment, mods,
                          undo_resp=undo_resp)
        elif comment is not None:
            Messager.warning('create_arc: non-empty comment for None annotation (unsupported type for comment?)')
            

        mods_json = mods.json_response()
        mods_json['annotations'] = _json_from_ann(ann_obj)
        return mods_json
Ejemplo n.º 20
0
def tag(collection, document, tagger):
    pconf = ProjectConfiguration(real_directory(collection))
    for tagger_token, _, _, tagger_service_url in pconf.get_annotator_config():
        if tagger == tagger_token:
            break
    else:
        raise UnknownTaggerError(tagger)

    doc_path = path_join(real_directory(collection), document)

    with TextAnnotations(path_join(real_directory(collection),
            document)) as ann_obj:

        try:
            # Note: Can we actually fit a whole document in here?
            quoted_doc_text = quote_plus(ann_obj.get_document_text())
            resp = urlopen(tagger_service_url % quoted_doc_text, None,
                QUERY_TIMEOUT)
        except URLError:
            raise TaggerConnectionError(tagger_token)

        # TODO: Check for errors
        json_resp = loads(resp.read())

        mods = ModificationTracker()

        for ann_data in json_resp.itervalues():
            offsets = ann_data['offsets']
            # Note: We do not support discontinuous spans at this point
            assert len(offsets) == 1, 'discontinuous/null spans'
            start, end = offsets[0]
            _id = ann_obj.get_new_id('T')
            tb = TextBoundAnnotationWithText(
                    start, end,
                    _id,
                    ann_data['type'],
                    ann_data['text']
                    )
            mods.addition(tb)
            ann_obj.add_annotation(tb)

        mod_resp = mods.json_response()
        mod_resp['annotations'] = _json_from_ann(ann_obj)
        return mod_resp
Ejemplo n.º 21
0
def download_file(document, collection, extension):
    directory = collection
    real_dir = real_directory(directory)
    fname = "%s.%s" % (document, extension)
    fpath = path_join(real_dir, fname)

    hdrs = [("Content-Type", "text/plain; charset=utf-8"), ("Content-Disposition", "inline; filename=%s" % fname)]
    with open_textfile(fpath, "r") as txt_file:
        data = txt_file.read().encode("utf-8")
    raise NoPrintJSONError(hdrs, data)
def import_xmi(text, docid, collection=None):
    '''
    TODO: DOC:
    '''
    
    if len(docid) > 4 and docid[-4] == '.':
        docid = docid[:-4]

    directory = collection

    if directory is None:
        dir_path = DATA_DIR
    else:
        #XXX: These "security" measures can surely be fooled
        if (directory.count('../') or directory == '..'):
            raise InvalidDirError(directory)

        dir_path = real_directory(directory)

    # Is the directory a directory and are we allowed to write?
    if not isdir(dir_path):
        raise InvalidDirError(dir_path)
    if not access(dir_path, W_OK):
        raise NoWritePermissionError(dir_path)

    ############################
    from session import get_session
    try:
        username = get_session()['user']
    except KeyError:
        username = None
    if username != 'admin':
        if (not username) or username + '/' not in dir_path:
            raise NoWritePermissionError(dir_path)
    ############################

    base_path = join_path(dir_path, docid)
    xmi_path = base_path + '.zip'

    def decode_base64(data):
        import base64
        missing_padding = 4 - len(data) % 4
        if missing_padding:
            data += b'='* missing_padding
        return base64.decodestring(data)

    text = decode_base64(text[len("data:application/zip;base64,"):])
    #text = text.decode('base64')
    with open(xmi_path, 'wb') as thefile:
        thefile.write(text)
    #with open_textfile(xmi_path, 'wb') as xmi_file:
    #    xmi_file.write(text)

    return {}
Ejemplo n.º 23
0
def download_file(document, collection, extension):
    directory = collection
    real_dir = real_directory(directory)
    fname = '%s.%s' % (document, extension)
    fpath = path_join(real_dir, fname)

    hdrs = [('Content-Type', 'text/plain; charset=utf-8'),
            ('Content-Disposition', 'inline; filename=%s' % fname)]
    with open_textfile(fpath, 'r') as txt_file:
        data = txt_file.read().encode('utf-8')
    raise NoPrintJSONError(hdrs, data)
Ejemplo n.º 24
0
def download_file(document, collection, extension):
    directory = collection
    real_dir = real_directory(directory)
    fname = '%s.%s' % (document, extension)
    fpath = path_join(real_dir, fname)

    hdrs = [('Content-Type', 'text/plain; charset=utf-8'),
            ('Content-Disposition',
                'inline; filename=%s' % fname)]
    with open_textfile(fpath, 'r') as txt_file:
        data = txt_file.read()
    raise NoPrintJSONError(hdrs, data)
Ejemplo n.º 25
0
def download_collection(collection, include_conf=False):
    directory = collection
    real_dir = real_directory(directory)
    dir_name = basename(dirname(real_dir))
    fname = '%s.%s' % (dir_name, 'tar.gz')

    confs = ['annotation.conf', 'visual.conf', 'tools.conf',
             'kb_shortcuts.conf']

    try:
        include_conf = int(include_conf)
    except ValueError:
        pass

    tmp_file_path = None
    try:
        tmp_file_fh, tmp_file_path = mkstemp()
        os_close(tmp_file_fh)

        tar_cmd_split = ['tar', '--exclude=.stats_cache']
        conf_names = []
        if not include_conf:
            tar_cmd_split.extend(['--exclude=%s' % c for c in confs])
        else:
            # also include configs from parent directories.
            for cname in confs:
                cdir, depth = find_in_directory_tree(real_dir, cname)
                if depth is not None and depth > 0:
                    relpath = path_join(
                        dir_name, *['..' for _ in range(depth)])
                    conf_names.append(path_join(relpath, cname))
            if conf_names:
                # replace pathname components ending in ".." with target
                # directory name so that .confs in parent directories appear
                # in the target directory in the tar.
                tar_cmd_split.extend(['--absolute-names', '--transform',
                                      's|.*\\.\\.|%s|' % dir_name])

        tar_cmd_split.extend(['-c', '-z', '-f', tmp_file_path, dir_name])
        tar_cmd_split.extend(conf_names)
        tar_p = Popen(tar_cmd_split, cwd=path_join(real_dir, '..'))
        tar_p.wait()

        hdrs = [('Content-Type', 'application/octet-stream'),  # 'application/x-tgz'),
                ('Content-Disposition', 'inline; filename=%s' % fname)]
        with open(tmp_file_path, 'rb') as tmp_file:
            tar_data = tmp_file.read()

        raise NoPrintJSONError(hdrs, tar_data)
    finally:
        if tmp_file_path is not None:
            remove(tmp_file_path)
Ejemplo n.º 26
0
def __document_to_annotations(directory, document):
    """
    Given a directory and a document, returns an Annotations object
    for the file.
    """
    # TODO: put this shared functionality in a more reasonable place
    from document import real_directory
    from os.path import join as path_join

    real_dir = real_directory(directory)
    filenames = [path_join(real_dir, document)]

    return __filenames_to_annotations(filenames)
Ejemplo n.º 27
0
def logOcrFileTypeDetails(collection, document, ocrOutputResult,
                          identificationOutputResult,
                          identificationBoundaryOutputResult,
                          extractionOutputResult, fileType, lossType,
                          comments):
    path = path_join(real_directory(collection), 'qafunnelocrfiledetails')
    if not os.path.exists(path):
        os.mknod(path)

    searchFlag = False

    for line in fileinput.input(path, inplace=True):
        loaded_r = json.loads(line)
        docname = str(loaded_r['document'])

        if docname == document:
            searchFlag = True
            print "%s" % (json.dumps({
                "document": document,
                "ocrOutputResult": ocrOutputResult,
                "identificationOutputResult": identificationOutputResult,
                "identificationBoundaryOutputResult":
                identificationBoundaryOutputResult,
                "extractionOutputResult": extractionOutputResult,
                "fileType": fileType,
                "lossType": lossType,
                "comments": comments
            }) + '\n'),
        else:
            print "%s" % (line),

    fileinput.close()

    if not searchFlag:
        with open(path, 'a') as file:
            file.write(
                json.dumps({
                    "document": document,
                    "ocrOutputResult": ocrOutputResult,
                    "identificationOutputResult": identificationOutputResult,
                    "identificationBoundaryOutputResult":
                    identificationBoundaryOutputResult,
                    "extractionOutputResult": extractionOutputResult,
                    "fileType": fileType,
                    "lossType": lossType,
                    "comments": comments
                }) + '\n')
            file.close

    return {"status": 'true'}
Ejemplo n.º 28
0
def suggest_span_types(collection, document, start, end, text, model):

    pconf = ProjectConfiguration(real_directory(collection))
    for _, _, model_str, model_url in pconf.get_disambiguator_config():
        if model_str == model:
            break
    else:
        # We were unable to find a matching model
        raise SimSemConnectionNotConfiguredError

    try:
        quoted_text = quote_plus(text)
        resp = urlopen(model_url % quoted_text, None, QUERY_TIMEOUT)
    except URLError:
        # TODO: Could give more details
        raise SimSemConnectionError

    json = loads(resp.read())

    preds = json['result'][text.decode('utf-8')]

    selected_preds = []
    conf_sum = 0
    for cat, conf in preds:
        selected_preds.append((
            cat,
            conf,
        ))
        conf_sum += conf
        if conf_sum >= CUT_OFF:
            break

    log_annotation(collection, document, 'DONE', 'suggestion', [
        None,
        None,
        text,
    ] + [
        selected_preds,
    ])

    # array so that server can control presentation order in UI
    # independently from scores if needed
    return {
        'types': selected_preds,
        'collection': collection,  # echo for reference
        'document': document,
        'start': start,
        'end': end,
        'text': text,
    }
Ejemplo n.º 29
0
def getAnnObject(collection, document):
    try:
        real_dir = real_directory(collection)
    except:
        real_dir = collection
    app_path = WORK_DIR + "/application/"
    full_name = collection + document
    full_name = full_name.replace("/", "")
    if (os.path.isfile(app_path + full_name)):
        temp = open(app_path + full_name, 'rb')
        ann = pickle_load(temp)
        temp.close()
    else:
        ann = TextAnnotations(real_dir + document)
        ann = SimpleAnnotations(ann)
        ann.folia = {}
        try:
            #TODO:good error message
            ann.folia = get_extra_info(collection, document)
        except Exception as e:
            ann.folia = {}
            Messager.error('Error: get extra folia info() failed: %s' % e)
    #Validation:
    try:
        docdir = os.path.dirname(ann._document)
        string = session.load_conf()["config"]
        val = json.loads(string)["validationOn"]
        #validate if config enables it and if it's not already done.
        if val:
            if not ann.validated:
                projectconf = ProjectConfiguration(docdir)
                issues = verify_annotation(ann, projectconf)
            else:
                issues = ann.issues
        else:
            ann.validated = False
            issues = []
    except session.NoSessionError:
        issues = []
    except KeyError:
        issues = []
    except Exception as e:
        # TODO add an issue about the failure?
        issues = []
        Messager.error('Error: validation failed: %s' % e)
    ann.issues = issues
    temp = open(app_path + full_name, 'wb')
    pickle_dump(ann, temp)
    temp.close()
    return ann
Ejemplo n.º 30
0
def get_extra_info(path, fname):
    '''
    Methode that converts extra folia annotations
    '''
    try:
        from os.path import join as path_join
        from document import real_directory
        real_dir = real_directory(path)
    except:
        real_dir = path
    path = path_join(real_dir, fname)
    result = {}
    result["entities"] = []
    result["relations"] = []
    result["comments"] = []
    result["attributes"] = []
    result["tokens"] = {}
    try:
        doc = folia.Document(file=path + ".xml")
    except:
        return result
    text, offsets = parse_text(doc)
    #TOKEN ANNOTATIONS
    for i in doc.select(folia.Word):
        _id = offsets[i.id][0]
        if not _id in result["tokens"]:
            result["tokens"][_id] = {}
            index = 1
            for j in i.select(folia.Morpheme):
                result["tokens"][_id]['Morpheme' + str(index)] = ": " + str(j)
                token_anns = get_token_info(j, 'mor' + str(index) + '-')
                string = ""
                for key in token_anns:
                    string += "\n" + key + token_anns[key]
                result["tokens"][_id]['Morpheme' + str(index)] += string
                j.parent.remove(j)
                index += 1
            if index > 1:
                result["tokens"][_id].update(get_token_info(i, ''))
            else:
                result["tokens"][_id] = get_token_info(i, '')
    #ANNOTATIONS REPRESENTED BY AN ENTITY
    entities, relations, comments, attributes = get_extra_entities(
        offsets, text, doc)
    result["entities"] += entities
    result["relations"] += relations
    result["comments"] += comments
    result["attributes"] += attributes
    return result
Ejemplo n.º 31
0
def save_import(title, text, docid, collection=None):
    '''
    TODO: DOC:
    '''

    directory = collection

    if directory is None:
        dir_path = DATA_DIR
    else:
        #XXX: These "security" measures can surely be fooled
        if (directory.count('../') or directory == '..'):
            raise InvalidDirError(directory)

        dir_path = real_directory(directory)

    # Is the directory a directory and are we allowed to write?
    if not isdir(dir_path):
        raise InvalidDirError(dir_path)
    if not access(dir_path, W_OK):
        raise NoWritePermissionError(dir_path)

    base_path = join_path(dir_path, docid)
    txt_path = base_path + '.' + TEXT_FILE_SUFFIX
    ann_path = base_path + '.' + JOINED_ANN_FILE_SUFF
    dat_path = base_path + '.' + DATA_FILE_SUFFIX

    # Before we proceed, verify that we are not overwriting
    for path in (txt_path, ann_path):
        if isfile(path):
            raise FileExistsError(path)

    # Make sure we have a valid POSIX text file, i.e. that the
    # file ends in a newline.
    if text != "" and text[-1] != '\n':
        text = text + '\n'

    with open_textfile(txt_path, 'w') as txt_file:
        txt_file.write(title + '\n' + text)

    # Touch the ann file so that we can edit the file later
    with open(ann_path, 'w') as _:
        pass

    # Touch the dat file so that we can edit the file later
    with open(dat_path, 'w') as _:
        pass

    return { 'document': docid }
Ejemplo n.º 32
0
def __directory_to_annotations(directory):
    """
    Given a directory, returns Annotations objects for contained files.
    """
    # TODO: put this shared functionality in a more reasonable place
    from document import real_directory,_listdir
    from os.path import join as path_join

    real_dir = real_directory(directory)
    # Get the document names
    base_names = [fn[0:-4] for fn in _listdir(real_dir) if fn.endswith('txt')]

    filenames = [path_join(real_dir, bn) for bn in base_names]

    return __filenames_to_annotations(filenames)
Ejemplo n.º 33
0
def set_status(directory, document, status=None):
    real_dir = real_directory(directory)

    with TextAnnotations(path_join(real_dir, document)) as ann:
        # Erase all old status annotations
        for status in ann.get_statuses():
            ann.del_annotation(status)

        if status is not None:
            # XXX: This could work, not sure if it can induce an id collision
            new_status_id = ann.get_new_id("#")
            ann.add_annotation(OnelineCommentAnnotation(new_status, new_status_id, "STATUS", ""))

    json_dic = {"status": new_status}
    return json_dic
Ejemplo n.º 34
0
Archivo: search.py Proyecto: dmcc/brat
def __directory_to_annotations(directory):
    """
    Given a directory, returns Annotations objects for contained files.
    """
    # TODO: put this shared functionality in a more reasonable place
    from document import real_directory,_listdir
    from os.path import join as path_join

    real_dir = real_directory(directory)
    # Get the document names
    base_names = [fn[0:-4] for fn in _listdir(real_dir) if fn.endswith('txt')]

    filenames = [path_join(real_dir, bn) for bn in base_names]

    return __filenames_to_annotations(filenames)
Ejemplo n.º 35
0
def get_extra_info(path,fname):
    '''
    Methode that converts extra folia annotations
    '''
    try:
        from os.path import join as path_join
        from document import real_directory
        real_dir = real_directory(path)
    except:
        real_dir=path
    path = path_join(real_dir, fname)
    result = {}
    result["entities"] = []
    result["relations"] = []
    result["comments"] = []
    result["attributes"] = []
    result["tokens"] = {}
    try :
        doc = folia.Document(file=path+".xml")
    except:
        return result
    text,offsets = parse_text(doc)
    #TOKEN ANNOTATIONS
    for i in doc.select(folia.Word):
        _id = offsets[i.id][0]
        if not _id in result["tokens"]:
            result["tokens"][_id] = {}
            index = 1
            for j in i.select(folia.Morpheme):
                result["tokens"][_id]['Morpheme'+str(index)]=": " + str(j)
                token_anns = get_token_info(j,'mor'+str(index)+'-')
                string = ""
                for key in token_anns:
                    string += "\n"+key+token_anns[key]
                result["tokens"][_id]['Morpheme'+str(index)]+=string
                j.parent.remove(j)
                index+=1
            if index > 1:
                result["tokens"][_id].update(get_token_info(i,''))
            else:
                result["tokens"][_id] = get_token_info(i,'')
    #ANNOTATIONS REPRESENTED BY AN ENTITY
    entities, relations,comments, attributes = get_extra_entities(offsets,text,doc)
    result["entities"] += entities
    result["relations"] += relations
    result["comments"] += comments
    result["attributes"] += attributes
    return result
Ejemplo n.º 36
0
def save_import(text, docid, collection=None):
    '''
    TODO: DOC:
    '''

    directory = collection
    #print directory

    if directory is None:
        dir_path = DATA_DIR
    else:
        #XXX: These "security" measures can surely be fooled
        if (directory.count('../') or directory == '..'):
            raise InvalidDirError(directory)

        dir_path = real_directory(directory)

    # Is the directory a directory and are we allowed to write?
    if not isdir(dir_path):
        raise InvalidDirError(dir_path)
    if not access(dir_path, W_OK):
        raise NoWritePermissionError(dir_path)

    base_path = join_path(dir_path, docid)
    #print base_path
    txt_path = base_path + '.' + TEXT_FILE_SUFFIX
    ann_path = base_path + '.' + JOINED_ANN_FILE_SUFF

    # Before we proceed, verify that we are not overwriting
    for path in (txt_path, ann_path):
        if isfile(path):
            raise FileExistsError(path)

    # Make sure we have a valid POSIX text file, i.e. that the
    # file ends in a newline.
    if text != "" and text[-1] != '\n':
        text = text + '\n'

    lemmatized_text1 = lemmatizer(text)
    real_lemmatized_text = lemmatizer2(text)
    lemmatized_text = list(izip_longest(lemmatized_text1,
                                        real_lemmatized_text))
    conll_text = conll(lemmatized_text)
    standoff_main(conll_text, docid)

    return {'document': docid}
Ejemplo n.º 37
0
def get_startlist(path,doc):
    try:
        from os.path import join as path_join
        from document import real_directory
        real_dir = real_directory(path)
    except:
        real_dir=path
    ann =getAnnObject(path,doc)
    proj = ProjectConfiguration(real_dir)
    try:
        txt_lvl = TextAnnotations(proj,ann.get_textLevels())
    except NoTextLevelConf as e:
        return {'exception' :str(e) }
    response = startlist_to_dict(txt_lvl.startlists)
    #Back_pos tells if there is still atleast 1 answer left that can be removed
    response["back_pos"] = False
    return response
Ejemplo n.º 38
0
def set_status(directory, document, status=None):
    real_dir = real_directory(directory)

    with TextAnnotations(path_join(real_dir, document)) as ann:
        # Erase all old status annotations
        for status in ann.get_statuses():
            ann.del_annotation(status)

        if status is not None:
            # XXX: This could work, not sure if it can induce an id collision
            new_status_id = ann.get_new_id('#')
            ann.add_annotation(
                OnelineCommentAnnotation(new_status, new_status_id, 'STATUS',
                                         ''))

    json_dic = {'status': new_status}
    return json_dic
Ejemplo n.º 39
0
def download_file(document, collection, extension):
    directory = collection
    real_dir = real_directory(directory)
    fname = '%s.%s' % (document, extension)
    fpath = path_join(real_dir, fname)

    hdrs = [('Content-Type', 'text/plain; charset=utf-8'),
            ('Content-Disposition', 'inline; filename=%s' % fname)]

    #Folia conversion added by Sander Naert
    from brat2folia import convert
    if extension == 'xml':
        convert(real_dir, document)
        #convert to folia

    with open_textfile(fpath, 'r') as txt_file:
        data = txt_file.read().encode('utf-8')
    raise NoPrintJSONError(hdrs, data)
Ejemplo n.º 40
0
def save_import(title, text, docid, collection=None):
    '''
    TODO: DOC:
    '''

    directory = collection

    if directory is None:
        dir_path = DATA_DIR
    else:
        #XXX: These "security" measures can surely be fooled
        if (directory.count('../') or directory == '..'):
            raise InvalidDirError(directory)

        dir_path = real_directory(directory)

    # Is the directory a directory and are we allowed to write?
    if not isdir(dir_path):
        raise InvalidDirError(dir_path)
    if not access(dir_path, W_OK):
        raise NoWritePermissionError(dir_path)

    base_path = join_path(dir_path, docid)
    txt_path = base_path + '.' + TEXT_FILE_SUFFIX
    ann_path = base_path + '.' + JOINED_ANN_FILE_SUFF

    # Before we proceed, verify that we are not overwriting
    for path in (txt_path, ann_path):
        if isfile(path):
            raise FileExistsError(path)

    # Make sure we have a valid POSIX text file, i.e. that the
    # file ends in a newline.
    if text != "" and text[-1] != '\n':
        text = text + '\n'

    with open_textfile(txt_path, 'w') as txt_file:
        txt_file.write(title + '\n' + text)

    # Touch the ann file so that we can edit the file later
    with open(ann_path, 'w') as _:
        pass

    return { 'document': docid }
Ejemplo n.º 41
0
def save_web_page_import(url, docid, overwrite, collection=None):
    '''
    TODO: DOC:
    '''

    directory = collection

    if directory is None:
        dir_path = DATA_DIR
    else:
        #XXX: These "security" measures can surely be fooled
        if (directory.count('../') or directory == '..'):
            raise InvalidDirError(directory)

        dir_path = real_directory(directory)

    # Is the directory a directory and are we allowed to write?
    if not isdir(dir_path):
        raise InvalidDirError(dir_path)
    if not access(dir_path, W_OK):
        raise NoWritePermissionError(dir_path)

    base_path = join_path(dir_path, docid)
    txt_path = base_path + '.' + TEXT_FILE_SUFFIX
    ann_path = base_path + '.' + JOINED_ANN_FILE_SUFF

    # Before we proceed, verify that we are not overwriting
    for path in (txt_path, ann_path):
        if isfile(path):
            if not overwrite or overwrite == 'false':
                raise FileExistsError(path)
            remove(path)

    apiUrl = 'http://api-ie.qna.bf2.yahoo.com:4080/ie_ws/v1/ie_ws?url=' + url
    data = getApiData(apiUrl)

    # location = join_path(dir_path, 'input.json')
    # data = getFileData(location)

    try:
        json_resp = loads(data)
    except ValueError, e:
        raise FormatError(apiUrl, e)
Ejemplo n.º 42
0
def download_file(document, collection, extension):
    directory = collection
    real_dir = real_directory(directory)
    fname = '%s.%s' % (document, extension)
    fpath = path_join(real_dir, fname)

    hdrs = [('Content-Type', 'text/plain; charset=utf-8'),
            ('Content-Disposition',
                'inline; filename=%s' % fname)]

    #Folia conversion added by Sander Naert
    from brat2folia import convert
    if extension=='xml':
		convert(real_dir, document)
		#convert to folia

    with open_textfile(fpath, 'r') as txt_file:
        data = txt_file.read().encode('utf-8')
    raise NoPrintJSONError(hdrs, data)
def folder_import(docid, collection=None):
    '''
    TODO: DOC:
    '''

    directory = collection

    if directory is None:
        dir_path = DATA_DIR
    else:
        #XXX: These "security" measures can surely be fooled
        if (directory.count('../') or directory == '..'):
            raise InvalidDirError(directory)

        dir_path = real_directory(directory)

    # Is the directory a directory and are we allowed to write?
    if not isdir(dir_path):
        raise InvalidDirError(dir_path)
    if not access(dir_path, W_OK):
        raise NoWritePermissionError(dir_path)

    ############################
    from session import get_session
    try:
        username = get_session()['user']
    except KeyError:
        username = None
    if username != 'admin':
        if (not username) or username + '/' not in dir_path:
            raise NoWritePermissionError(dir_path)
    ############################

    base_path = join_path(dir_path, docid)
    base_path = abspath(base_path)

    # Before we proceed, verify that we are not overwriting
    if exists(base_path):
        raise FileExistsError(base_path)

    makedirs(base_path)

    return { 'document': base_path[len('/brat/data/'):] }
Ejemplo n.º 44
0
def delete_span(collection, document, id):
    directory = collection

    real_dir = real_directory(directory)
    document = path_join(real_dir, document)

    txt_file_path = document + "." + TEXT_FILE_SUFFIX

    with TextAnnotations(document) as ann_obj:
        mods = ModificationTracker()

        # TODO: Handle a failure to find it
        # XXX: Slow, O(2N)
        ann = ann_obj.get_ann_by_id(id)
        try:
            # Note: need to pass the tracker to del_annotation to track
            # recursive deletes. TODO: make usage consistent.
            ann_obj.del_annotation(ann, mods)
            try:
                trig = ann_obj.get_ann_by_id(ann.trigger)
                try:
                    ann_obj.del_annotation(trig, mods)
                except DependingAnnotationDeleteError:
                    # Someone else depended on that trigger
                    pass
            except AttributeError:
                pass
        except DependingAnnotationDeleteError, e:
            Messager.error(e.html_error_str())
            return {"exception": True}

        # print 'Content-Type: application/json\n'
        if DEBUG:
            mods_json = mods.json_response()
        else:
            mods_json = {}
        # save a roundtrip and send the annotations also
        txt_file_path = document + "." + TEXT_FILE_SUFFIX
        j_dic = _json_from_ann_and_txt(ann_obj, txt_file_path)
        mods_json["annotations"] = j_dic
        return mods_json
Ejemplo n.º 45
0
def suggest_span_types(collection, document, start, end, text, model):

    pconf = ProjectConfiguration(real_directory(collection))
    for _, _, model_str, model_url in pconf.get_disambiguator_config():
        if model_str == model:
            break
    else:
        # We were unable to find a matching model
        raise SimSemConnectionNotConfiguredError

    try:
        quoted_text = quote_plus(text)
        resp = urlopen(model_url % quoted_text, None, QUERY_TIMEOUT)
    except URLError:
        # TODO: Could give more details
        raise SimSemConnectionError
    
    json = loads(resp.read())

    preds = json['result'][text.decode('utf-8')]

    selected_preds = []
    conf_sum = 0
    for cat, conf in preds:
        selected_preds.append((cat, conf, ))
        conf_sum += conf
        if conf_sum >= CUT_OFF:
            break

    log_annotation(collection, document, 'DONE', 'suggestion',
            [None, None, text, ] + [selected_preds, ])

    # array so that server can control presentation order in UI
    # independently from scores if needed
    return { 'types': selected_preds,
             'collection': collection, # echo for reference
             'document': document,
             'start': start,
             'end': end,
             'text': text,
             }
Ejemplo n.º 46
0
def get_list(path,doc):
    try:
        from os.path import join as path_join
        from document import real_directory
        real_dir = real_directory(path)
    except:
        real_dir=path
    ann =getAnnObject(path,doc)
    proj = ProjectConfiguration(real_dir)
    try:
        txt_lvl = TextAnnotations(proj,ann.get_textLevels())
    except NoTextLevelConf as e:
        return {'exception' :str(e) }
    #~ if txt_lvl.currentList == "stop":
        #~ return {'stop':True, 'annotation':str(txt_lvl.selectedList.ann),}
    response = list_to_dict(txt_lvl.selectedList.currentList)
    #Back_pos tells if there is still atleast 1 answer left that can be removed
    response["back_pos"] = False
    if len(txt_lvl.followed_path) >0 :
        response["back_pos"] = True
    return response
def delete_arc(collection, document, origin, target, type):
    directory = collection

    real_dir = real_directory(directory)

    mods = ModificationTracker()

    projectconf = ProjectConfiguration(real_dir)

    document = path_join(real_dir, document)

    with TextAnnotations(document) as ann_obj:
        # bail as quick as possible if read-only 
        if ann_obj._read_only:
            raise AnnotationsIsReadOnlyError(ann_obj.get_document())

        _delete_arc_with_ann(origin, target, type, mods, ann_obj, projectconf)

        mods_json = mods.json_response()
        mods_json['annotations'] = _json_from_ann(ann_obj)
        return mods_json
Ejemplo n.º 48
0
def delete_arc(collection, document, origin, target, type):
    directory = collection

    real_dir = real_directory(directory)

    mods = ModificationTracker()

    projectconf = ProjectConfiguration(real_dir)

    document = path_join(real_dir, document)

    with TextAnnotations(document) as ann_obj:
        # bail as quick as possible if read-only
        if ann_obj._read_only:
            raise AnnotationsIsReadOnlyError(ann_obj.get_document())

        _delete_arc_with_ann(origin, target, type, mods, ann_obj, projectconf)

        mods_json = mods.json_response()
        mods_json['annotations'] = _json_from_ann(ann_obj)
        return mods_json
Ejemplo n.º 49
0
def delete_span(collection, document, id):
    directory = collection

    real_dir = real_directory(directory)
    document = path_join(real_dir, document)
    
    txt_file_path = document + '.' + TEXT_FILE_SUFFIX

    with TextAnnotations(document) as ann_obj:
        # bail as quick as possible if read-only 
        if ann_obj._read_only:
            raise AnnotationsIsReadOnlyError(ann_obj.get_document())

        mods = ModificationTracker()
        
        #TODO: Handle a failure to find it
        #XXX: Slow, O(2N)
        ann = ann_obj.get_ann_by_id(id)
        try:
            # Note: need to pass the tracker to del_annotation to track
            # recursive deletes. TODO: make usage consistent.
            ann_obj.del_annotation(ann, mods)
            try:
                trig = ann_obj.get_ann_by_id(ann.trigger)
                try:
                    ann_obj.del_annotation(trig, mods)
                except DependingAnnotationDeleteError:
                    # Someone else depended on that trigger
                    pass
            except AttributeError:
                pass
        except DependingAnnotationDeleteError, e:
            Messager.error(e.html_error_str())
            return {
                    'exception': True,
                    }

        mods_json = mods.json_response()
        mods_json['annotations'] = _json_from_ann(ann_obj)
        return mods_json
Ejemplo n.º 50
0
def convert(path, fname):
    from message import Messager
    try:
        from os.path import join as path_join
        from document import real_directory
        real_dir = real_directory(path)
    except:
        real_dir = path
    full_path = path_join(real_dir, fname)
    entity_ids = {}
    try:
        doc = folia.Document(file=full_path + ".xml")
        temp = open(full_path + ".ann", 'w')
        txt = open(full_path + ".txt", 'w')
    except IOError as e:
        Messager.error("IOError " + str(e))
        return {
            'result': False,
        }
    ann_obj = TextAnnotations(full_path)
    text, offsets = parse_text(doc)
    with SimpleAnnotations(ann_obj) as ann:
        add_entities(doc, ann, entity_ids, offsets, text)
        add_relations(doc, ann, entity_ids, offsets)
        add_attributes(doc, ann, entity_ids)
        add_comments(doc, ann, entity_ids)
        try:
            ann.folia = get_extra_info(path, fname)
        except:
            Messager.error("get_extra_info() from folia failed")
            ann.folia = {}
    txt.write(text)
    txt.close()
    #~ temp.write(str(ann))
    #~ temp.close()
    make_conf_file(real_dir, ann)
    #return is needed for client, so it can see the function is done, this can take a few seconds
    return {
        'result': True,
    }
Ejemplo n.º 51
0
def delete_span(collection, document, id):
    directory = collection

    real_dir = real_directory(directory)
    document = path_join(real_dir, document)
    
    txt_file_path = document + '.' + TEXT_FILE_SUFFIX

    with TextAnnotations(document) as ann_obj:
        # bail as quick as possible if read-only 
        if ann_obj._read_only:
            raise AnnotationsIsReadOnlyError(ann_obj.get_document())

        mods = ModificationTracker()
        
        #TODO: Handle a failure to find it
        #XXX: Slow, O(2N)
        ann = ann_obj.get_ann_by_id(id)
        try:
            # Note: need to pass the tracker to del_annotation to track
            # recursive deletes. TODO: make usage consistent.
            ann_obj.del_annotation(ann, mods)
            try:
                trig = ann_obj.get_ann_by_id(ann.trigger)
                try:
                    ann_obj.del_annotation(trig, mods)
                except DependingAnnotationDeleteError:
                    # Someone else depended on that trigger
                    pass
            except AttributeError:
                pass
        except DependingAnnotationDeleteError, e:
            Messager.error(e.html_error_str())
            return {
                    'exception': True,
                    }

        mods_json = mods.json_response()
        mods_json['annotations'] = _json_from_ann(ann_obj)
        return mods_json
Ejemplo n.º 52
0
def _get_db_path(database, collection):
    if collection is None:
        # TODO: default to WORK_DIR config?
        return None
    else:
        try:
            conf_dir = real_directory(collection)
            projectconf = ProjectConfiguration(conf_dir)
            norm_conf = projectconf.get_normalization_config()
            for entry in norm_conf:
                dbname, dbpath = entry[0], entry[3]
                if dbname == database:
                    return dbpath
            # not found in config.
            Messager.warning('DB ' + database + ' not defined in config for ' +
                             collection + ', falling back on default.')
            return None
        except Exception:
            # whatever goes wrong, just warn and fall back on the default.
            Messager.warning('Failed to get DB path from config for ' +
                             collection + ', falling back on default.')
            return None
Ejemplo n.º 53
0
def reverse_arc(collection, document, origin, target, type, attributes=None):
    directory = collection
    # undo_resp = {} # TODO
    real_dir = real_directory(directory)
    # mods = ModificationTracker() # TODO
    projectconf = ProjectConfiguration(real_dir)

    document = urllib.parse.unquote(document)
    document = path_join(real_dir, document)
    with TextAnnotations(document) as ann_obj:
        # bail as quick as possible if read-only
        if ann_obj._read_only:
            raise AnnotationsIsReadOnlyError(ann_obj.get_document())

        if projectconf.is_equiv_type(type):
            Messager.warning('Cannot reverse Equiv arc')
        elif not projectconf.is_relation_type(type):
            Messager.warning('Can only reverse configured binary relations')
        else:
            # OK to reverse
            found = None
            # TODO: more sensible lookup
            for ann in ann_obj.get_relations():
                if (ann.arg1 == origin and ann.arg2 == target
                        and ann.type == type):
                    found = ann
                    break
            if found is None:
                Messager.error(
                    'reverse_arc: failed to identify target relation (from %s to %s, type %s) (deleted?)'
                    % (str(origin), str(target), str(type)))
            else:
                # found it; just adjust this
                found.arg1, found.arg2 = found.arg2, found.arg1
                # TODO: modification tracker

        json_response = {}
        json_response['annotations'] = _json_from_ann(ann_obj)
        return json_response
Ejemplo n.º 54
0
def _create_span(collection, document, offsets, _type, attributes=None,
                 normalizations=None, _id=None, comment=None):

    if _offset_overlaps(offsets):
        raise SpanOffsetOverlapError(offsets)

    directory = collection
    undo_resp = {}

    _attributes = _parse_attributes(attributes)
    _normalizations = _parse_span_normalizations(normalizations)

    #log_info('ATTR: %s' %(_attributes, ))

    real_dir = real_directory(directory)
    document = path_join(real_dir, document)

    projectconf = ProjectConfiguration(real_dir)

    txt_file_path = document + '.' + TEXT_FILE_SUFFIX

    path_split(document)[0]

    with TextAnnotations(document) as ann_obj:
        # bail as quick as possible if read-only
        if ann_obj._read_only:
            raise AnnotationsIsReadOnlyError(ann_obj.get_document())

        mods = ModificationTracker()

        if _id is not None:
            # We are to edit an existing annotation
            tb_ann, e_ann = _edit_span(ann_obj, mods, _id, offsets, projectconf,
                                       _attributes, _type, undo_resp=undo_resp)
        else:
            # We are to create a new annotation
            tb_ann, e_ann = __create_span(
                ann_obj, mods, _type, offsets, txt_file_path, projectconf, _attributes)

            undo_resp['action'] = 'add_tb'
            if e_ann is not None:
                undo_resp['id'] = e_ann.id
            else:
                undo_resp['id'] = tb_ann.id

        # Determine which annotation attributes, normalizations,
        # comments etc. should be attached to. If there's an event,
        # attach to that; otherwise attach to the textbound.
        if e_ann is not None:
            # Assign to the event, not the trigger
            target_ann = e_ann
        else:
            target_ann = tb_ann

        # Set attributes
        _set_attributes(ann_obj, target_ann, _attributes, mods,
                        undo_resp=undo_resp)

        # Set normalizations
        _set_normalizations(ann_obj, target_ann, _normalizations, mods,
                            undo_resp=undo_resp)

        # Set comments
        if tb_ann is not None:
            _set_comments(ann_obj, target_ann, comment, mods,
                          undo_resp=undo_resp)

        if tb_ann is not None:
            mods_json = mods.json_response()
        else:
            # Hack, probably we had a new-line in the span
            mods_json = {}
            Messager.error(
                'Text span contained new-line, rejected',
                duration=3)

        if undo_resp:
            mods_json['undo'] = json_dumps(undo_resp)
        mods_json['annotations'] = _json_from_ann(ann_obj)
        return mods_json
Ejemplo n.º 55
0
def split_span(collection, document, args, id):
    directory = collection

    real_dir = real_directory(directory)
    document = path_join(real_dir, document)
    # TODO don't know how to pass an array directly, so doing extra catenate
    # and split
    tosplit_args = json_loads(args)

    with TextAnnotations(document) as ann_obj:
        # bail as quick as possible if read-only
        if ann_obj._read_only:
            raise AnnotationsIsReadOnlyError(ann_obj.get_document())

        mods = ModificationTracker()

        ann = ann_obj.get_ann_by_id(id)

        # currently only allowing splits for events
        if not isinstance(ann, EventAnnotation):
            raise AnnotationSplitError(
                "Cannot split an annotation of type %s" %
                ann.type)

        # group event arguments into ones that will be split on and
        # ones that will not, placing the former into a dict keyed by
        # the argument without trailing numbers (e.g. "Theme1" ->
        # "Theme") and the latter in a straight list.
        split_args = {}
        nonsplit_args = []
        import re
        for arg, aid in ann.args:
            m = re.match(r'^(.*?)\d*$', arg)
            if m:
                arg = m.group(1)
            if arg in tosplit_args:
                if arg not in split_args:
                    split_args[arg] = []
                split_args[arg].append(aid)
            else:
                nonsplit_args.append((arg, aid))

        # verify that split is possible
        for a in tosplit_args:
            acount = len(split_args.get(a, []))
            if acount < 2:
                raise AnnotationSplitError(
                    "Cannot split %s on %s: only %d %s arguments (need two or more)" %
                    (ann.id, a, acount, a))

        # create all combinations of the args on which to split
        argument_combos = [[]]
        for a in tosplit_args:
            new_combos = []
            for aid in split_args[a]:
                for c in argument_combos:
                    new_combos.append(c + [(a, aid)])
            argument_combos = new_combos

        # create the new events (first combo will use the existing event)
        from copy import deepcopy
        new_events = []
        for i, arg_combo in enumerate(argument_combos):
            # tweak args
            if i == 0:
                ann.args = nonsplit_args[:] + arg_combo
            else:
                newann = deepcopy(ann)
                # TODO: avoid hard-coding ID prefix
                newann.id = ann_obj.get_new_id("E")
                newann.args = nonsplit_args[:] + arg_combo
                ann_obj.add_annotation(newann)
                new_events.append(newann)
                mods.addition(newann)

        # then, go through all the annotations referencing the original
        # event, and create appropriate copies
        for a in ann_obj:
            soft_deps, hard_deps = a.get_deps()
            refs = soft_deps | hard_deps
            if ann.id in refs:
                # Referenced; make duplicates appropriately

                if isinstance(a, EventAnnotation):
                    # go through args and make copies for referencing
                    new_args = []
                    for arg, aid in a.args:
                        if aid == ann.id:
                            for newe in new_events:
                                new_args.append((arg, newe.id))
                    a.args.extend(new_args)

                elif isinstance(a, AttributeAnnotation):
                    for newe in new_events:
                        newmod = deepcopy(a)
                        newmod.target = newe.id
                        # TODO: avoid hard-coding ID prefix
                        newmod.id = ann_obj.get_new_id("A")
                        ann_obj.add_annotation(newmod)
                        mods.addition(newmod)

                elif isinstance(a, BinaryRelationAnnotation):
                    # TODO
                    raise AnnotationSplitError(
                        "Cannot adjust annotation referencing split: not implemented for relations! (WARNING: annotations may be in inconsistent state, please reload!) (Please complain to the developers to fix this!)")

                elif isinstance(a, OnelineCommentAnnotation):
                    for newe in new_events:
                        newcomm = deepcopy(a)
                        newcomm.target = newe.id
                        # TODO: avoid hard-coding ID prefix
                        newcomm.id = ann_obj.get_new_id("#")
                        ann_obj.add_annotation(newcomm)
                        mods.addition(newcomm)
                elif isinstance(a, NormalizationAnnotation):
                    for newe in new_events:
                        newnorm = deepcopy(a)
                        newnorm.target = newe.id
                        # TODO: avoid hard-coding ID prefix
                        newnorm.id = ann_obj.get_new_id("N")
                        ann_obj.add_annotation(newnorm)
                        mods.addition(newnorm)
                else:
                    raise AnnotationSplitError(
                        "Cannot adjust annotation referencing split: not implemented for %s! (Please complain to the lazy developers to fix this!)" %
                        a.__class__)

        mods_json = mods.json_response()
        mods_json['annotations'] = _json_from_ann(ann_obj)
        return mods_json
Ejemplo n.º 56
0
Archivo: tag.py Proyecto: WeSIG/Delta
def tag(collection, document, tagger):
    pconf = ProjectConfiguration(real_directory(collection))
    print("tagger", tagger, file=sys.stderr)
    for tagger_token, _, _, tagger_service_url in pconf.get_annotator_config():
        if tagger == tagger_token:
            break
    else:
        raise UnknownTaggerError(tagger)

    path_join(real_directory(collection), document)

    # print("path_join(real_directory(collection), document)", path_join(real_directory(collection), document), file=sys.stderr)
    # print("tagger_token", tagger_token, file=sys.stderr)
    with TextAnnotations(path_join(real_directory(collection),
                                   document)) as ann_obj:
        # print("ann_obj", document, file=sys.stderr)

        url_soup = urlparse(tagger_service_url)

        if url_soup.scheme == 'http':
            Connection = HTTPConnection
            # print("HTTPConnection", HTTPConnection, file=sys.stderr)
        elif url_soup.scheme == 'https':
            # Delayed HTTPS import since it relies on SSL which is commonly
            #   missing if you roll your own Python, for once we should not
            #   fail early since tagging is currently an edge case and we
            #   can't allow it to bring down the whole server.
            from http.client import HTTPSConnection
            Connection = HTTPSConnection
        else:
            raise InvalidConnectionSchemeError(tagger_token, url_soup.scheme)

        conn = None
        try:
            conn = Connection(url_soup.netloc)
            req_headers = {
                'Content-type': 'text/plain; charset=utf-8',
                'Accept': 'application/json',
            }
            # Build a new service URL since the request method doesn't accept
            #   a parameters argument
            service_url = url_soup.path + ('?' + url_soup.query
                                           if url_soup.query else '')
            try:
                # Note: Trout slapping for anyone sending Unicode objects here

                data = str(path_join(
                    real_directory(collection),
                    document)) + "#*^$#" + ann_obj.get_document_text()
                data = data.encode('utf-8')
                # print("data", type(data),data, file=sys.stderr)
                # print("data", ann_obj, file=sys.stderr)
                req_headers['Content-length'] = len(data)
                # Note: Trout slapping for anyone sending Unicode objects here
                conn.request(
                    'POST',
                    # As per: http://bugs.python.org/issue11898
                    # Force the url to be an ascii string
                    str(service_url),
                    data,
                    headers=req_headers)
                # httpConnection = http.client.HTTPConnection(url_soup.netloc)
                # httpConnection.request('GET', str(service_url), headers=req_headers)
                # response = httpConnection.getresponse()

            except SocketError as e:
                raise TaggerConnectionError(tagger_token, e)
            resp = conn.getresponse()
            # print("resp-------------", resp.read(), file=sys.stderr)

            # Did the request succeed?
            if resp.status != 200:
                raise TaggerConnectionError(
                    tagger_token, '%s %s' % (resp.status, resp.reason))
            # Finally, we can read the response data
            resp_data = resp.read()
        finally:
            if conn is not None:
                conn.close()

        try:
            json_resp = loads(resp_data)
            # print("json_resp", json_resp, file=sys.stderr)
        except ValueError:
            raise InvalidTaggerResponseError(tagger_token, resp_data)

        mods = ModificationTracker()
        cidmap = {}

        # print("json_resp.items:::::::::::::", json_resp.items(), file=sys.stderr)
        for cid, ann in ((i, a) for i, a in json_resp.items()
                         if _is_textbound(a)):
            assert 'offsets' in ann, 'Tagger response lacks offsets'
            offsets = ann['offsets']
            # print("json_resp.items:::::::::::::", offsets, file=sys.stderr)
            assert 'type' in ann, 'Tagger response lacks type'
            _type = ann['type']
            assert 'texts' in ann, 'Tagger response lacks texts'
            texts = ann['texts']

            # sanity
            assert len(offsets) != 0, 'Tagger response has empty offsets'
            assert len(texts) == len(
                offsets
            ), 'Tagger response has different numbers of offsets and texts'

            start, end = offsets[0]
            text = texts[0]
            # print("offsets, _type, texts, text:", offsets, _type, texts, text, file=sys.stderr)
            _id = ann_obj.get_new_id('T')
            print("_id", _id, file=sys.stderr)
            cidmap[cid] = _id

            tb = TextBoundAnnotationWithText(offsets, _id, _type, text,
                                             " " + ' '.join(texts[1:]))

            mods.addition(tb)
            ann_obj.add_annotation(tb)

        for norm in (a for a in json_resp.values() if _is_normalization(a)):
            try:
                _type = norm['type']
                target = norm['target']
                refdb = norm['refdb']
                refid = norm['refid']
            except KeyError as e:
                raise  # TODO

            _id = ann_obj.get_new_id('N')
            target = cidmap[target]

            na = NormalizationAnnotation(_id, _type, target, refdb, refid, '')

            mods.addition(na)
            ann_obj.add_annotation(na)

        mod_resp = mods.json_response()
        mod_resp['annotations'] = _json_from_ann(ann_obj)
        return mod_resp
Ejemplo n.º 57
0
def delete_arc(collection, document, origin, target, type):
    directory = collection

    real_dir = real_directory(directory)
    document = path_join(real_dir, document)

    txt_file_path = document + '.' + TEXT_FILE_SUFFIX

    with TextAnnotations(document) as ann_obj:
        # bail as quick as possible if read-only 
        if ann_obj._read_only:
            raise AnnotationsIsReadOnlyError(ann_obj.get_document())

        mods = ModificationTracker()

        # This can be an event or an equiv
        #TODO: Check for None!
        try:
            event_ann = ann_obj.get_ann_by_id(origin)
            # Try if it is an event
            arg_tup = (type, unicode(target))
            if arg_tup in event_ann.args:
                before = unicode(event_ann)
                event_ann.args.remove(arg_tup)
                mods.change(before, event_ann)

                '''
                if not event_ann.args:
                    # It was the last argument tuple, remove it all
                    try:
                        ann_obj.del_annotation(event_ann)
                        mods.deletion(event_ann)
                    except DependingAnnotationDeleteError, e:
                        #XXX: Old message api
                        print 'Content-Type: application/json\n'
                        print dumps(e.json_error_response())
                        return
                '''
            else:
                # What we were to remove did not even exist in the first place
                pass

        except AttributeError:
            projectconf = ProjectConfiguration(real_dir)
            if projectconf.is_equiv_type(type):
                # It is an equiv then?
                #XXX: Slow hack! Should have a better accessor! O(eq_ann)
                for eq_ann in ann_obj.get_equivs():
                    # We don't assume that the ids only occur in one Equiv, we
                    # keep on going since the data "could" be corrupted
                    if (unicode(origin) in eq_ann.entities
                            and unicode(target) in eq_ann.entities):
                        before = unicode(eq_ann)
                        eq_ann.entities.remove(unicode(origin))
                        eq_ann.entities.remove(unicode(target))
                        mods.change(before, eq_ann)

                    if len(eq_ann.entities) < 2:
                        # We need to delete this one
                        try:
                            ann_obj.del_annotation(eq_ann)
                            mods.deletion(eq_ann)
                        except DependingAnnotationDeleteError, e:
                            #TODO: This should never happen, dep on equiv
                            #print 'Content-Type: application/json\n'
                            # TODO: Proper exception here!
                            Messager.error(e.json_error_response())
                            return {}
            elif type in projectconf.get_relation_types():
                for ann in ann_obj.get_relations():
                    if ann.type == type and ann.arg1 == origin and ann.arg2 == target:
                        ann_obj.del_annotation(ann)
                        mods.deletion(ann)
                        break
            else: