Ejemplo n.º 1
0
def _get_match_regex(text, text_match="word", match_case=False,
                     whole_string=False):
    """
    Helper for the various search_anns_for_ functions.
    """
    if match_case:
        regex_flags = 0
    else:
        regex_flags = re.IGNORECASE

    if text is None:
        text = ''

    if text_match == "word":
        # full word match: require word boundaries or, optionally,
        # whole string boundaries
        if whole_string:
            return re.compile(r'^'+re.escape(text)+r'$', regex_flags)
        else:
            return re.compile(r'\b'+re.escape(text)+r'\b', regex_flags)
    elif text_match == "substring":
        # any substring match, as text (nonoverlapping matches)
        return re.compile(re.escape(text), regex_flags)
    elif text_match == "regex":
        try:
            return re.compile(text, regex_flags)
        except: # whatever (sre_constants.error, other?)
            Messager.warning('Given string "%s" is not a valid regular expression.' % text)
            return None        
    else:
        Messager.error('Unrecognized search match specification "%s"' % text_match)
        return None    
Ejemplo n.º 2
0
def _server_crash(cookie_hdrs, e):
    from config import ADMIN_CONTACT_EMAIL, DEBUG
    from jsonwrap import dumps
    from message import Messager

    stack_trace = _get_stack_trace()

    if DEBUG:
        # Send back the stack-trace as json
        error_msg = '\n'.join(('Server Python crash, stack-trace is:\n',
                               stack_trace))
        Messager.error(error_msg, duration=-1)
    else:
        # Give the user an error message
        # Use the current time since epoch as an id for later log look-up
        error_msg = ('The server encountered a serious error, '
                     'please contact the administrators at %s '
                     'and give the id #%d'
                     ) % (ADMIN_CONTACT_EMAIL, int(time()))
        Messager.error(error_msg, duration=-1)

    # Print to stderr so that the exception is logged by the webserver
    print(stack_trace, file=stderr)

    json_dic = {
        'exception': 'serverCrash',
    }
    return (cookie_hdrs, ((JSON_HDR, ), dumps(Messager.output_json(json_dic))))
Ejemplo n.º 3
0
def _config_check():
    from message import Messager
    
    from sys import path
    from copy import deepcopy
    from os.path import dirname
    # Reset the path to force config.py to be in the root (could be hacked
    #       using __init__.py, but we can be monkey-patched anyway)
    orig_path = deepcopy(path)
    # Can't you empty in O(1) instead of O(N)?
    while path:
        path.pop()
    path.append(path_join(abspath(dirname(__file__)), '../..'))
    # Check if we have a config, otherwise whine
    try:
        import config
        del config
    except ImportError, e:
        path.extend(orig_path)
        # "Prettiest" way to check specific failure
        if e.message == 'No module named config':
            Messager.error(_miss_config_msg(), duration=-1)
        else:
            Messager.error(_get_stack_trace(), duration=-1)
        raise ConfigurationError
Ejemplo n.º 4
0
    def attributes_for(self, ann_type):
        """
        Returs a list of the possible attribute types for an
        annotation of the given type.
        """
        attrs = []
        for attr in get_attribute_type_list(self.directory):
            if attr == SEPARATOR_STR:
                continue

            if 'Arg' not in attr.arguments:
                Messager.warning(
                    "Project configuration: config error: attribute '%s' lacks 'Arg:' specification."
                    % attr.storage_form())
                continue

            types = attr.arguments['Arg']

            if ((ann_type in types)
                    or (self.is_event_type(ann_type) and '<EVENT>' in types)
                    or (self.is_physical_entity_type(ann_type)
                        and '<ENTITY>' in types)):
                attrs.append(attr.storage_form())

        return attrs
Ejemplo n.º 5
0
def ssdb_build(strs,
               dbname,
               ngram_length=DEFAULT_NGRAM_LENGTH,
               include_marks=DEFAULT_INCLUDE_MARKS):
    '''
    Given a list of strings, a DB name, and simstring options, builds
    a simstring DB for the strings.
    '''
    try:
        import simstring
    except ImportError:
        Messager.error(SIMSTRING_MISSING_ERROR, duration=-1)
        raise NoSimStringError

    dbfn = __ssdb_path(dbname)
    try:
        # only library defaults (n=3, no marks) supported just now (TODO)
        assert ngram_length == 3, "Error: unsupported n-gram length"
        assert include_marks == False, "Error: begin/end marks not supported"
        db = simstring.writer(dbfn)
        for s in strs:
            db.insert(s)
        db.close()
    except:
        print >> sys.stderr, "Error building simstring DB"
        raise

    return dbfn
Ejemplo n.º 6
0
def _parse_attributes(attributes):
    if attributes is None:
        _attributes = {}
    else:
        try:
            _attributes = json_loads(attributes)
        except ValueError:
            # Failed to parse, warn the client
            Messager.warning(
                ('Unable to parse attributes string "%s" for '
                 '"createSpan", ignoring attributes for request and '
                 'assuming no attributes set') %
                (attributes, ))
            _attributes = {}

        # XXX: Hack since the client is sending back False and True as values...
        # These are __not__ to be sent, they violate the protocol
        for _del in [k for k, v in list(_attributes.items()) if v == False]:
            del _attributes[_del]

        # These are to be old-style modifiers without values
        for _revalue in [k for k, v in list(_attributes.items()) if v]:
            _attributes[_revalue] = True
        ###
    return _attributes
 def wrapper(*args, **kwds):
     if DEBUG:
         Messager.warning(
             ('Client sent "%s" action '
              'which is marked as deprecated') %
             func.__name__,)
     return func(*args, **kwds)
Ejemplo n.º 8
0
def login(user, password):
    if not _is_authenticated(user, password):
        raise InvalidAuthError

    get_session()['user'] = user
    Messager.info('Hello!')
    return {}
Ejemplo n.º 9
0
def possible_arc_types(collection, origin_type, target_type):
    directory = collection

    real_dir = real_directory(directory)
    projectconf = ProjectConfiguration(real_dir)
    response = {}

    try:
        possible = projectconf.arc_types_from_to(origin_type, target_type)

        # TODO: proper error handling
        if possible is None:
            Messager.error('Error selecting arc types!', -1)
        elif possible == []:
            # nothing to select
            response['html'] = generate_empty_fieldset()
            response['keymap'] = {}
            response['empty'] = True
        else:
            # XXX TODO: intentionally breaking this; KB shortcuts
            # should no longer be sent here. Remove 'keymap' and
            # 'html' args once clientside generation done.
            arc_kb_shortcuts = {} #select_keyboard_shortcuts(possible)

            response['keymap'] = {}
            for k, p in arc_kb_shortcuts.items():
                response['keymap'][k] = "arc_"+p

            response['html']  = generate_arc_type_html(projectconf, possible, arc_kb_shortcuts)
    except:
        Messager.error('Error selecting arc types!', -1)
        raise

    return response
Ejemplo n.º 10
0
def login(user, password):
    if not _is_authenticated(user, password):
        raise InvalidAuthError

    get_session()['user'] = user
    Messager.info('Hello!')
    return {}
Ejemplo n.º 11
0
def ssdb_build(strs, dbname, ngram_length=DEFAULT_NGRAM_LENGTH, include_marks=DEFAULT_INCLUDE_MARKS):
    """
    Given a list of strings, a DB name, and simstring options, builds
    a simstring DB for the strings.
    """
    try:
        import simstring
    except ImportError:
        Messager.error(SIMSTRING_MISSING_ERROR, duration=-1)
        raise NoSimStringError

    dbfn = __ssdb_path(dbname)
    try:
        # only library defaults (n=3, no marks) supported just now (TODO)
        assert ngram_length == 3, "Error: unsupported n-gram length"
        assert include_marks == False, "Error: begin/end marks not supported"
        db = simstring.writer(dbfn)
        for s in strs:
            db.insert(s)
        db.close()
    except:
        print >> sys.stderr, "Error building simstring DB"
        raise

    return dbfn
Ejemplo n.º 12
0
def ann_logger(directory):
    """
    Lazy initializer for the annotation logger. Returns None if
    annotation logging is not configured for the given directory and a
    logger otherwise.
    """
    if ann_logger.__logger == False:
        # not initialized
        annlogfile = options_get_annlogfile(directory)
        if annlogfile == '<NONE>':
            # not configured
            ann_logger.__logger = None
        else:
            # initialize
            try:
                l = logging.getLogger('annotation')
                l.setLevel(logging.INFO)
                handler = logging.FileHandler(annlogfile)
                handler.setLevel(logging.INFO)
                formatter = logging.Formatter('%(asctime)s\t%(message)s')
                handler.setFormatter(formatter)
                l.addHandler(handler)
                ann_logger.__logger = l
            except IOError as e:
                Messager.error(
                    """Error: failed to initialize annotation log %s: %s.
Edit action not logged.
Please check the Annotation-log logfile setting in tools.conf""" %
                    (annlogfile, e))
                logging.error("Failed to initialize annotation log %s: %s" %
                              (annlogfile, e))
                ann_logger.__logger = None

    return ann_logger.__logger
Ejemplo n.º 13
0
 def import_files(self, directory):
     real_dir = real_directory(directory)
     assert_allowed_to_read(real_dir)
     # Get the document names
     file_names = [
         fn[0:-4] for fn in _listdir(real_dir) if fn.endswith('txt')
     ]
     try:
         cursor = self.conn.cursor()
         for filename in file_names:
             state, fid, fileName, fileDirAbs, uid, userName = Ann_NULL, 0, filename, directory, 0, None
             cursor.execute(
                 _INSERT_ANN_SQL,
                 (state, fid, fileName, fileDirAbs, uid, userName))
         self.conn.commit()
     except sqlite3.Error as e:
         # print("Database error: %s" % e, file=sys.stderr)
         Messager.error("Database error: %s" % e)
         self.conn.rollback()
     except Exception as e:
         # print("Exception in _query: %s" % e, file=sys.stderr)
         Messager.error("Exception in _query: %s" % e)
         self.conn.rollback()
     finally:
         cursor.close()
Ejemplo n.º 14
0
 def set_Ann_state(self, directory, file, state):
     real_dir = real_directory(directory)
     assert_allowed_to_read(real_dir)
     # check and update
     try:
         cursor = self.conn.cursor()
         cursor.execute("""BEGIN TRANSACTION""")
         cursor.execute(
             """SELECT userName FROM Ann WHERE fileDirAbs = ? and  fileName = ?;""",
             (directory, file))
         rows = cursor.fetchall()
         if len(rows) == 0:
             cursor.execute(
                 """UPDATE Ann SET state = ? WHERE fileDirAbs = ? and  fileName = ?;""",
                 (state, directory, file))
     except sqlite3.Error as e:
         # print("Database error: %s" % e, file=sys.stderr)
         Messager.error("Database error: %s" % e)
         self.conn.rollback()
     except Exception as e:
         # print("Exception in _query: %s" % e, file=sys.stderr)
         Messager.error("Exception in _query: %s" % e)
         self.conn.rollback()
     finally:
         cursor.execute("COMMIT")
         cursor.close()
Ejemplo n.º 15
0
    def __init__(self):
        # 连接到SQLite数据库
        # 数据库文件是DB_FNAME,如果文件不存在,会自动在当前目录创建
        flag_exist = os.path.isfile(DB_FNAME)
        self.conn = sqlite3.connect(DB_FNAME)

        if flag_exist:
            return None
        try:
            cursor = self.conn.cursor()
            cursor.execute(_CREATE_ANN_SQL)
            self.conn.commit()
        except sqlite3.Error as e:
            # print("Database error: %s" % e, file=sys.stderr)
            Messager.error("Database error: %s" % e)
            self.conn.rollback()
            self.conn.close()
        except Exception as e:
            # print("Exception in _query: %s" % e, file=sys.stderr)
            Messager.error("Exception in _query: %s" % e)
            self.conn.rollback()
            self.conn.close()
        finally:
            cursor.close()
            en_import_DATA = True
            if en_import_DATA:
                for dir in [
                        x[0].replace(DATA_DIR, '') + '/'
                        for x in os.walk(DATA_DIR)
                ]:
                    if len(dir) > 1:
                        self.import_files(dir)
            return None
Ejemplo n.º 16
0
def retrieve_stored(document, suffix):
    stored_path = _stored_path()+'.'+suffix

    if not isfile(stored_path):
        # @ninjin: not sure what 'version' was supposed to be returned
        # here, but none was defined, so returning that
#         raise NoSVGError(version)
        raise NoSVGError('None')

    filename = document+'.'+suffix

    # sorry, quick hack to get the content-type right
    # TODO: send this with initial 'stored' response instead of
    # guessing on suffix
    if suffix == SVG_SUFFIX:
        content_type = 'image/svg+xml'
    elif suffix == PNG_SUFFIX:
        content_type = 'image/png'
    elif suffix == PDF_SUFFIX:
        content_type = 'application/pdf'
    elif suffix == EPS_SUFFIX:
        content_type = 'application/postscript'
    else:
        Messager.error('Unknown suffix "%s"; cannot determine Content-Type' % suffix)
        # TODO: reasonable backoff value
        content_type = None

    # Bail out with a hack since we violated the protocol
    hdrs = [('Content-Type', content_type),
            ('Content-Disposition', 'inline; filename=' + filename)]

    with open(stored_path, 'rb') as stored_file:
        data = stored_file.read()

    raise NoPrintJSONError(hdrs, data)
Ejemplo n.º 17
0
def _get_db_path(database, collection):
    if collection is None:
        # TODO: default to WORK_DIR config?
        return (None, Simstring.DEFAULT_UNICODE)
    else:
        conf_dir = real_directory(collection)
        projectconf = ProjectConfiguration(conf_dir)
        norm_conf = projectconf.get_normalization_config()
        try:
            conf_dir = real_directory(collection)
            projectconf = ProjectConfiguration(conf_dir)
            norm_conf = projectconf.get_normalization_config()
            for entry in norm_conf:
                # TODO THIS IS WRONG
                dbname, dbpath, dbunicode = entry[0], entry[3], entry[4]
                if dbname == database:
                    return (dbpath, dbunicode)
            # not found in config.
            Messager.warning('DB ' + database + ' not defined in config for ' +
                             collection + ', falling back on default.')
            return (None, Simstring.DEFAULT_UNICODE)
        except Exception:
            # whatever goes wrong, just warn and fall back on the default.
            Messager.warning('Failed to get DB path from config for ' +
                             collection + ', falling back on default.')
            return (None, Simstring.DEFAULT_UNICODE)
Ejemplo n.º 18
0
Archivo: auth.py Proyecto: WeSIG/Delta
def allowed_to_read(real_path):
    data_path = path_join('/', relpath(real_path, DATA_DIR))
    # add trailing slash to directories, required to comply to robots.txt
    if isdir(real_path):
        data_path = '%s/' % (data_path)

    real_dir = dirname(real_path)
    robotparser = ProjectConfiguration(real_dir).get_access_control()
    if robotparser is None:
        return True  # default allow

    # 目录读取权限
    try:
        user = get_session().get('user')
        if user is None:
            Messager.error('没有登录!', duration=3)
            user = '******'
    except KeyError:
        Messager.error('没有登录!', duration=3)
        return False

    # print(user, file=sys.stderr)
    # display_message('Path: %s, dir: %s, user: %s, ' % (data_path, real_dir, user), type='error', duration=-1)
    # / tutorials /
    # / tutorials /
    # / tutorials / bio /
    # / tutorials / news /
    # / tutorials /
    # / tutorials / bio /
    # / tutorials / news /
    # print(data_path, file=sys.stderr)

    return robotparser.can_fetch(user, data_path)
Ejemplo n.º 19
0
def _config_check():
    from message import Messager

    from sys import path
    from copy import deepcopy
    from os.path import dirname
    # Reset the path to force config.py to be in the root (could be hacked
    #       using __init__.py, but we can be monkey-patched anyway)
    orig_path = deepcopy(path)
    # Can't you empty in O(1) instead of O(N)?
    while path:
        path.pop()
    path.append(path_join(abspath(dirname(__file__)), '../..'))
    # Check if we have a config, otherwise whine
    try:
        import config
        del config
    except ImportError, e:
        path.extend(orig_path)
        # "Prettiest" way to check specific failure
        if e.message == 'No module named config':
            Messager.error(_miss_config_msg(), duration=-1)
        else:
            Messager.error(_get_stack_trace(), duration=-1)
        raise ConfigurationError
Ejemplo n.º 20
0
def ssdb_supstring_exists(s, dbname, threshold=DEFAULT_THRESHOLD):
    '''
    Given a string s and a DB name, returns whether at least one
    string in the associated simstring DB likely contains s as an
    (approximate) substring.
    '''
    try:
        import simstring
    except ImportError:
        Messager.error(SIMSTRING_MISSING_ERROR, duration=-1)
        raise NoSimStringError

    if threshold == 1.0:
        # optimized (not hugely, though) for this common case
        db = ssdb_open(dbname.encode('UTF-8'))

        __set_db_measure(db, 'overlap')
        db.threshold = threshold

        result = db.retrieve(s)
        db.close()

        # assume simstring DBs always contain UTF-8 - encoded strings
        result = [r.decode('UTF-8') for r in result]
        s = s.decode('UTF-8')

        for r in result:
            if s in r:
                return True
        return False
    else:
        # naive implementation for everything else
        return len(ssdb_supstring_lookup(s, dbname, threshold)) != 0
Ejemplo n.º 21
0
def ann_logger(directory):
    """
    Lazy initializer for the annotation logger. Returns None if
    annotation logging is not configured for the given directory and a
    logger otherwise.
    """
    if ann_logger.__logger == False:
        # not initialized
        annlogfile = options_get_annlogfile(directory)
        if annlogfile == '<NONE>':
            # not configured
            ann_logger.__logger = None
        else:
            # initialize
            try:
                l = logging.getLogger('annotation')
                l.setLevel(logging.INFO)
                handler = logging.FileHandler(annlogfile)
                handler.setLevel(logging.INFO)
                formatter = logging.Formatter('%(asctime)s\t%(message)s')
                handler.setFormatter(formatter)
                l.addHandler(handler)
                ann_logger.__logger = l
            except IOError, e:
                Messager.error("""Error: failed to initialize annotation log %s: %s.
Edit action not logged.
Please check the Annotation-log logfile setting in tools.conf""" % (annlogfile, e))
                logging.error("Failed to initialize annotation log %s: %s" % 
                              (annlogfile, e))
                ann_logger.__logger = None                
Ejemplo n.º 22
0
    def _parse_relation_annotation(self, id, data, data_tail, input_file_path):
        try:
            type_delim = data.index(' ')
            type, type_tail = (data[:type_delim], data[type_delim:])
        except ValueError:
            # cannot have a relation with just a type (contra event)
            raise IdedAnnotationLineSyntaxError(id, self.ann_line, self.ann_line_num+1, input_file_path)
            
        try:
            args = [tuple(arg.split(':')) for arg in type_tail.split()]
        except ValueError:
            raise IdedAnnotationLineSyntaxError(id, self.ann_line, self.ann_line_num+1, input_file_path)

        if len(args) != 2:
            Messager.error('Error parsing relation: must have exactly two arguments')
            raise IdedAnnotationLineSyntaxError(id, self.ann_line, self.ann_line_num+1, input_file_path)

        args.sort()
        if args[0][0] == args[1][0]:
            Messager.error('Error parsing relation: arguments must not be identical')
            raise IdedAnnotationLineSyntaxError(id, self.ann_line, self.ann_line_num+1, input_file_path)

        return BinaryRelationAnnotation(id, type,
                                        args[0][0], args[0][1],
                                        args[1][0], args[1][1],
                                        data_tail, source_id=input_file_path)
Ejemplo n.º 23
0
def get_configs(directory, filename, defaultstr, minconf, sections):
    if (directory, filename) not in get_configs.__cache:
        configstr, source = __read_first_in_directory_tree(directory, filename)

        if configstr is None:
            # didn't get one; try default dir and fall back to the default
            configstr = __read_or_default(filename, defaultstr)
            if configstr == defaultstr:
                Messager.info(
                    "Project configuration: no configuration file (%s) found, using default."
                    % filename, 5)
                source = "[default]"
            else:
                source = filename

        # try to parse what was found, fall back to minimal config
        try:
            configs = __parse_configs(configstr, source, sections)
        except:
            Messager.warning(
                "Project configuration: Falling back to minimal default. Configuration is likely wrong.",
                5)
            configs = minconf

        get_configs.__cache[(directory, filename)] = configs

    return get_configs.__cache[(directory, filename)]
Ejemplo n.º 24
0
Archivo: norm.py Proyecto: edycop/brat
def _check_DB_version(database):
    import fbkvdb
    if not fbkvdb.check_version(database):
        from message import Messager
        Messager.warning(
            "Warning: norm DB version mismatch: expected %s, got %s for %s" %
            (fbkvdb.NORM_DB_VERSION, fbkvdb.get_version(database), database))
Ejemplo n.º 25
0
def get_drawing_config_by_storage_form(directory, term):
    cache = get_drawing_config_by_storage_form.__cache
    if directory not in cache:
        d = {}
        for n in get_drawing_config(directory):
            t = n.storage_form()
            if t in d:
                Messager.warning(
                    "Project configuration: term %s appears multiple times, only using last. Configuration may be wrong."
                    % t, 5)
            d[t] = {}
            for a in n.arguments:
                if len(n.arguments[a]) != 1:
                    Messager.warning(
                        "Project configuration: expected single value for %s argument %s, got '%s'. Configuration may be wrong."
                        % (t, a, "|".join(n.arguments[a])))
                else:
                    d[t][a] = n.arguments[a][0]

        # TODO: hack to get around inability to have commas in values;
        # fix original issue instead
        for t in d:
            for k in d[t]:
                d[t][k] = d[t][k].replace("-", ",")

        # propagate defaults (TODO: get rid of magic "DEFAULT" values)
        default_keys = [VISUAL_SPAN_DEFAULT, VISUAL_ARC_DEFAULT]
        for default_dict in [d.get(dk, {}) for dk in default_keys]:
            for k in default_dict:
                for t in d:
                    d[t][k] = d[t].get(k, default_dict[k])

        cache[directory] = d

    return cache[directory].get(term, None)
Ejemplo n.º 26
0
def ssdb_supstring_exists(s, dbname, threshold=DEFAULT_THRESHOLD):
    """Given a string s and a DB name, returns whether at least one string in
    the associated simstring DB likely contains s as an (approximate)
    substring."""
    try:
        import simstring
    except ImportError:
        Messager.error(SIMSTRING_MISSING_ERROR, duration=-1)
        raise NoSimStringError

    if threshold == 1.0:
        # optimized (not hugely, though) for this common case
        db = ssdb_open(dbname.encode('UTF-8'))

        __set_db_measure(db, 'overlap')
        db.threshold = threshold

        result = db.retrieve(s)
        db.close()

        # assume simstring DBs always contain UTF-8 - encoded strings
        result = [r.decode('UTF-8') for r in result]

        for r in result:
            if s in r:
                return True
        return False
    else:
        # naive implementation for everything else
        return len(ssdb_supstring_lookup(s, dbname, threshold)) != 0
Ejemplo n.º 27
0
def __directory_relations_by_arg_num(directory,
                                     num,
                                     atype,
                                     include_special=False):
    assert num >= 0 and num < 2, "INTERNAL ERROR"

    rels = []

    for r in get_relation_type_list(directory):
        # "Special" nesting relation ignored unless specifically
        # requested
        if r.storage_form() == ENTITY_NESTING_TYPE and not include_special:
            continue

        if len(r.arg_list) != 2:
            Messager.warning(
                "Relation type %s has %d arguments in configuration (%s; expected 2). Please fix configuration."
                % (r.storage_form(), len(r.arg_list), ",".join(r.arg_list)))
        else:
            types = r.arguments[r.arg_list[num]]
            for type in types:
                # TODO: "wildcards" other than <ANY>
                if type == "<ANY>" or atype == "<ANY>" or type == atype:
                    rels.append(r)

    return rels
Ejemplo n.º 28
0
def ann_logger():
    """
    Lazy initializer for the annotation logger. Returns None if
    annotation logging is not configured and a logger otherwise.
    """
    if ann_logger.__logger == False:
        # not initialized
        if ANNOTATION_LOG is None:
            # not configured
            ann_logger.__logger = None
        else:
            # initialize
            try:
                l = logging.getLogger('annotation')
                l.setLevel(logging.INFO)
                handler = logging.FileHandler(ANNOTATION_LOG)
                handler.setLevel(logging.INFO)
                formatter = logging.Formatter('%(asctime)s\t%(message)s')
                handler.setFormatter(formatter)
                l.addHandler(handler)
                ann_logger.__logger = l
            except IOError, e:
                Messager.error("""Error: failed to initialize annotation log %s: %s.
Edit action not logged.
Please check ANNOTATION_LOG setting in config.py""" % (ANNOTATION_LOG, e))
                logging.error("Failed to initialize annotation log %s: %s" % 
                              (ANNOTATION_LOG, e))
                ann_logger.__logger = None                
Ejemplo n.º 29
0
def possible_arc_types(collection, origin_type, target_type):
    directory = collection

    real_dir = real_directory(directory)
    projectconf = ProjectConfiguration(real_dir)
    response = {}

    try:
        possible = projectconf.arc_types_from_to(origin_type, target_type)

        # TODO: proper error handling
        if possible is None:
            Messager.error('Error selecting arc types!', -1)
        elif possible == []:
            # nothing to select
            response['html'] = generate_empty_fieldset()
            response['keymap'] = {}
            response['empty'] = True
        else:
            # XXX TODO: intentionally breaking this; KB shortcuts
            # should no longer be sent here. Remove 'keymap' and
            # 'html' args once clientside generation done.
            arc_kb_shortcuts = {} #select_keyboard_shortcuts(possible)

            response['keymap'] = {}
            for k, p in arc_kb_shortcuts.items():
                response['keymap'][k] = "arc_"+p

            response['html']  = generate_arc_type_html(projectconf, possible, arc_kb_shortcuts)
    except:
        Messager.error('Error selecting arc types!', -1)
        raise

    return response
Ejemplo n.º 30
0
def _server_crash(cookie_hdrs, e):
    from config import ADMIN_CONTACT_EMAIL, DEBUG
    from jsonwrap import dumps
    from message import Messager

    stack_trace = _get_stack_trace()

    if DEBUG:
        # Send back the stack-trace as json
        error_msg = '\n'.join(('Server Python crash, stack-trace is:\n',
                               stack_trace))
        Messager.error(error_msg, duration=-1)
    else:
        # Give the user an error message
        # Use the current time since epoch as an id for later log look-up
        error_msg = ('The server encountered a serious error, '
                     'please contact the administrators at %s '
                     'and give the id #%d'
                     ) % (ADMIN_CONTACT_EMAIL, int(time()))
        Messager.error(error_msg, duration=-1)

    # Print to stderr so that the exception is logged by the webserver
    print(stack_trace, file=sys.stderr)

    json_dic = {
        'exception': 'serverCrash',
    }
    return (cookie_hdrs, ((JSON_HDR, ), dumps(Messager.output_json(json_dic))))
Ejemplo n.º 31
0
def norm_get_name(database, key, collection=None):
    if NORM_LOOKUP_DEBUG:
        _check_DB_version(database)
    if REPORT_LOOKUP_TIMINGS:
        lookup_start = datetime.now()

    dbpath = _get_db_path(database, collection)
    if dbpath is None:
        # full path not configured, fall back on name as default
        dbpath = database

    try:
        data = normdb.data_by_id(dbpath, key)
    except normdb.dbNotFoundError as e:
        Messager.warning(str(e))
        data = None

    # just grab the first one (sorry, this is a bit opaque)
    if data is not None:
        value = data[0][0][1]
    else:
        value = None

    if REPORT_LOOKUP_TIMINGS:
        _report_timings(database, lookup_start)

    # echo request for sync
    json_dic = {
        'database': database,
        'key': key,
        'value': value
    }
    return json_dic
Ejemplo n.º 32
0
def ann_logger():
    """
    Lazy initializer for the annotation logger. Returns None if
    annotation logging is not configured and a logger otherwise.
    """
    if ann_logger.__logger == False:
        # not initialized
        if ANNOTATION_LOG is None:
            # not configured
            ann_logger.__logger = None
        else:
            # initialize
            try:
                l = logging.getLogger('annotation')
                l.setLevel(logging.INFO)
                handler = logging.FileHandler(ANNOTATION_LOG)
                handler.setLevel(logging.INFO)
                formatter = logging.Formatter('%(asctime)s\t%(message)s')
                handler.setFormatter(formatter)
                l.addHandler(handler)
                ann_logger.__logger = l
            except IOError, e:
                Messager.error(
                    """Error: failed to initialize annotation log %s: %s.
Edit action not logged.
Please check ANNOTATION_LOG setting in config.py""" % (ANNOTATION_LOG, e))
                logging.error("Failed to initialize annotation log %s: %s" %
                              (ANNOTATION_LOG, e))
                ann_logger.__logger = None
Ejemplo n.º 33
0
def _report_timings(dbname, start, msg=None):
    delta = datetime.now() - start
    strdelta = str(delta).replace('0:00:0', '')  # take out zero min & hour
    queries = normdb.get_query_count(dbname)
    normdb.reset_query_count(dbname)
    Messager.info("Processed " + str(queries) + " queries in " + strdelta +
                  (msg if msg is not None else ""))
Ejemplo n.º 34
0
def norm_get_data(database, key, collection=None):
    if NORM_LOOKUP_DEBUG:
        _check_DB_version(database)
    if REPORT_LOOKUP_TIMINGS:
        lookup_start = datetime.now()

    dbpath = _get_db_path(database, collection)
    if dbpath is None:
        # full path not configured, fall back on name as default
        dbpath = database

    try:
        data = normdb.data_by_id(dbpath, key)
    except normdb.dbNotFoundError as e:
        Messager.warning(str(e))
        data = None

    if data is None:
        Messager.warning("Failed to get data for " + database + ":" + key)

    if REPORT_LOOKUP_TIMINGS:
        _report_timings(database, lookup_start)

    # echo request for sync
    json_dic = {
        'database': database,
        'key': key,
        'value': data
    }
    return json_dic
Ejemplo n.º 35
0
    def arc_types_from_to(self, from_ann, to_ann="<ANY>", include_special=False):
        """
        Returns the possible arc types that can connect an annotation
        of type from_ann to an annotation of type to_ann.
        If to_ann has the value \"<ANY>\", returns all possible arc types.
        """

        from_node = get_node_by_storage_form(self.directory, from_ann)

        if from_node is None:
            Messager.warning("Project configuration: unknown textbound/event type %s. Configuration may be wrong." % from_ann)
            return []

        if to_ann == "<ANY>":
            relations_from = get_relations_by_arg1(self.directory, from_ann, include_special)
            # TODO: consider using from_node.arg_list instead of .arguments for order
            return unique_preserve_order([role for role in from_node.arguments] + [r.storage_form() for r in relations_from])

        # specific hits
        types = from_node.keys_by_type.get(to_ann, [])

        if "<ANY>" in from_node.keys_by_type:
            types += from_node.keys_by_type["<ANY>"]

        # generic arguments
        if self.is_event_type(to_ann) and '<EVENT>' in from_node.keys_by_type:
            types += from_node.keys_by_type['<EVENT>']
        if self.is_physical_entity_type(to_ann) and '<ENTITY>' in from_node.keys_by_type:
            types += from_node.keys_by_type['<ENTITY>']

        # relations
        types.extend(self.relation_types_from_to(from_ann, to_ann))

        return unique_preserve_order(types)
Ejemplo n.º 36
0
def get_drawing_config_by_storage_form(directory, term):
    cache = get_drawing_config_by_storage_form.__cache
    if directory not in cache:
        d = {}
        for n in get_drawing_config(directory):
            t = n.storage_form()
            if t in d:
                Messager.warning("Project configuration: term %s appears multiple times, only using last. Configuration may be wrong." % t, 5)
            d[t] = {}
            for a in n.arguments:
                if len(n.arguments[a]) != 1:
                    Messager.warning("Project configuration: expected single value for %s argument %s, got '%s'. Configuration may be wrong." % (t, a, "|".join(n.arguments[a])))
                else:
                    d[t][a] = n.arguments[a][0]

        # TODO: hack to get around inability to have commas in values;
        # fix original issue instead
        for t in d:
            for k in d[t]:
                d[t][k] = d[t][k].replace("-", ",")
                
        # propagate defaults (TODO: get rid of magic "DEFAULT" values)
        default_keys = [VISUAL_SPAN_DEFAULT, VISUAL_ARC_DEFAULT]
        for default_dict in [d.get(dk, {}) for dk in default_keys]:
            for k in default_dict:
                for t in d:
                    d[t][k] = d[t].get(k, default_dict[k])

        cache[directory] = d

    return cache[directory].get(term, None)
Ejemplo n.º 37
0
def filter_folia(ann_obj):
    forbidden_ann=[]
    response = {"entities":[],"comments":[],"relations":[],"attributes":[],"tokens":{}}
    try:
        import simplejson as json
        import session
        string = session.load_conf()["config"]
        val = json.loads(string)["foliaLayers"]
    except session.NoSessionError:
        val = []
    except KeyError:
        val = []
        pass
    except Exception as e:
        val = []
        Messager.error("Error while enabling/disabling folia layers: "+str(e))
        pass
    try:
        response["tokens"]=ann_obj.folia["tokens"]
    except KeyError as e:
        pass
    if val:
        removed = set()
        forbidden = set(i for i in val)
        result = []
        alternatives = "alter" in val
        try:
            if 'all' in val:
                response["tokens"]={}
                return response
            else:
                for i in ann_obj.folia["entities"]:
                    if not i[3] in forbidden and not ( i[4] and alternatives ):
                        result.append(i)
                    else:
                        removed.add(i[0])
                response["entities"] = result
                result = []
                for i in ann_obj.folia["relations"]:
                    if not i[3] in forbidden and not i[2][0][1] in removed and not i[2][1][1] in removed and not ( i[4] and alternatives ):
                        result.append(i)
                    else:
                        removed.add(i[0])
                response["relations"] = result
                result = []
                for i in ann_obj.folia["attributes"]:
                    if not i[2] in removed:
                        result.append(i)
                response["attributes"] = result
                result = []
                for i in ann_obj.folia["comments"]:
                    if not i[0] in removed:
                        result.append(i)
                response["comments"] = result
        except KeyError:
            pass
    else:
        response = ann_obj.folia
    return response
Ejemplo n.º 38
0
def __read_term_hierarchy(input):
    root_nodes    = []
    last_node_at_depth = {}

    macros = {}
    for l in input:
        # skip empties and lines starting with '#'
        if l.strip() == '' or re.match(r'^\s*#', l):
            continue

        # interpret lines of only hyphens as separators
        # for display
        if re.match(r'^\s*-+\s*$', l):
            # TODO: proper placeholder and placing
            root_nodes.append(SEPARATOR_STR)
            continue

        # interpret lines of the format <STR1>=STR2 as "macro"
        # definitions, defining <STR1> as a placeholder that should be
        # replaced with STR2 whevever it occurs.
        m = re.match(r'^<([a-zA-Z_-]+)>=\s*(.*?)\s*$', l)
        if m:
            name, value = m.groups()
            if name in reserved_macro_name:
                Messager.error("Cannot redefine <%s> in configuration, it is a reserved name." % name)
                # TODO: proper exception
                assert False
            else:
                macros["<%s>" % name] = value
            continue

        # macro expansion
        for n in macros:
            l = l.replace(n, macros[n])
        
        m = re.match(r'^(\s*)([^\t]+)(?:\t(.*))?$', l)
        assert m, "Error parsing line: '%s'" % l
        indent, terms, args = m.groups()
        terms = [t.strip() for t in terms.split("|") if t.strip() != ""]
        if args is None or args.strip() == "":
            args = []
        else:
            args = [a.strip() for a in args.split(",") if a.strip() != ""]

        # depth in the ontology corresponds to the number of
        # spaces in the initial indent.
        depth = len(indent)

        n = TypeHierarchyNode(terms, args)
        if depth == 0:
            # root level, no children assignments
            root_nodes.append(n)
        else:
            # assign as child of last node at the depth of the parent
            assert depth-1 in last_node_at_depth, "Error: no parent for '%s'" % l
            last_node_at_depth[depth-1].children.append(n)
        last_node_at_depth[depth] = n

    return root_nodes
def _create_relation(ann_obj, projectconf, mods, origin, target, type,
                     attributes, old_type, old_target, undo_resp={}):
    attributes = _parse_attributes(attributes)

    if old_type is not None or old_target is not None:
        assert type in projectconf.get_relation_types(), (
                ('attempting to convert relation to non-relation "%s" ' % (target.type, )) +
                ('(legit types: %s)' % (unicode(projectconf.get_relation_types()), )))

        sought_target = (old_target
                if old_target is not None else target.id)
        sought_type = (old_type
                if old_type is not None else type)
        sought_origin = origin.id

        # We are to change the type, target, and/or attributes
        found = None
        for ann in ann_obj.get_relations():
            if (ann.arg1 == sought_origin and ann.arg2 == sought_target and 
                ann.type == sought_type):
                found = ann
                break

        if found is None:
            # TODO: better response
            Messager.error('_create_relation: failed to identify target relation (type %s, target %s) (deleted?)' % (str(old_type), str(old_target)))
        elif found.arg2 == target.id and found.type == type:
            # no changes to type or target
            pass
        else:
            # type and/or target changed, mark.
            before = unicode(found)
            found.arg2 = target.id
            found.type = type
            mods.change(before, found)

        target_ann = found
    else:
        # Create a new annotation
        new_id = ann_obj.get_new_id('R')
        # TODO: do we need to support different relation arg labels
        # depending on participant types? This doesn't.         
        rels = projectconf.get_relations_by_type(type) 
        rel = rels[0] if rels else None
        assert rel is not None and len(rel.arg_list) == 2
        a1l, a2l = rel.arg_list
        ann = BinaryRelationAnnotation(new_id, type, a1l, origin.id, a2l, target.id, '\t')
        mods.addition(ann)
        ann_obj.add_annotation(ann)

        target_ann = ann

    # process attributes
    if target_ann is not None:
        _set_attributes(ann_obj, ann, attributes, mods, undo_resp)
    elif attributes != None:
        Messager.error('_create_relation: cannot set arguments: failed to identify target relation (type %s, target %s) (deleted?)' % (str(old_type), str(old_target)))        

    return target_ann
Ejemplo n.º 40
0
def _safe_serve(params, client_ip, client_hostname, cookie_data):
    # Note: Only logging imports here
    from config import WORK_DIR
    from logging import basicConfig as log_basic_config

    # Enable logging
    try:
        from config import LOG_LEVEL

        log_level = _convert_log_level(LOG_LEVEL)
    except ImportError:
        from logging import WARNING as LOG_LEVEL_WARNING

        log_level = LOG_LEVEL_WARNING
    log_basic_config(filename=path_join(WORK_DIR, "server.log"), level=log_level)

    # Do the necessary imports after enabling the logging, order critical
    try:
        from common import ProtocolError, ProtocolArgumentError, NoPrintJSONError
        from dispatch import dispatch
        from jsonwrap import dumps
        from message import Messager
        from session import get_session, init_session, close_session, NoSessionError, SessionStoreError
    except ImportError:
        # Note: Heisenbug trap for #612, remove after resolved
        from logging import critical as log_critical
        from sys import path as sys_path

        log_critical("Heisenbug trap reports: " + str(sys_path))
        raise

    init_session(client_ip, cookie_data=cookie_data)
    response_is_JSON = True
    try:
        # Unpack the arguments into something less obscure than the
        #   Python FieldStorage object (part dictonary, part list, part FUBAR)
        http_args = DefaultNoneDict()
        for k in params:
            # Also take the opportunity to convert Strings into Unicode,
            #   according to HTTP they should be UTF-8
            try:
                http_args[k] = unicode(params.getvalue(k), encoding="utf-8")
            except TypeError:
                Messager.error(
                    "protocol argument error: expected string argument %s, got %s" % (k, type(params.getvalue(k)))
                )
                raise ProtocolArgumentError

        # Dispatch the request
        json_dic = dispatch(http_args, client_ip, client_hostname)
    except ProtocolError, e:
        # Internal error, only reported to client not to log
        json_dic = {}
        e.json(json_dic)

        # Add a human-readable version of the error
        err_str = str(e)
        if err_str != "":
            Messager.error(err_str, duration=-1)
Ejemplo n.º 41
0
Archivo: auth.py Proyecto: bepnye/brat
def login(user, password):
    if not _is_authenticated(user, password):
        raise InvalidAuthError

    get_session()['user'] = user
    # Messager.info('Hello!')
    Messager.info('Hello, your ID is ' + user)  ##JESSY
    return {}
Ejemplo n.º 42
0
 def __init__(self, directory):
     # debugging (note: latter test for windows paths)
     if directory[:1] != "/" and not re.search(r'^[a-zA-Z]:\\', directory):
         Messager.debug(
             "Project config received relative directory ('%s'), configuration may not be found."
             % directory,
             duration=-1)
     self.directory = directory
Ejemplo n.º 43
0
def whoami():
    json_dic = {}
    try:
        json_dic['user'] = get_session().get('user')
    except KeyError:
        # TODO: Really send this message?
        Messager.error('Not logged in!', duration=3)
    return json_dic
Ejemplo n.º 44
0
def whoami():
    json_dic = {}
    try:
        json_dic['user'] = get_session().get('user')
    except KeyError:
        # TODO: Really send this message?
        Messager.error('Not logged in!', duration=3)
    return json_dic
Ejemplo n.º 45
0
def whoami():
    json_dic = {}
    try:
        json_dic["user"] = get_session().get("user")
    except KeyError:
        # TODO: Really send this message?
        Messager.error("Not logged in!", duration=3)
    return json_dic
Ejemplo n.º 46
0
def _listdir(directory):
    # return listdir(directory)
    try:
        assert_allowed_to_read(directory)
        return [f for f in listdir(directory) if not _is_hidden(f) and allowed_to_read(path_join(directory, f))]
    except OSError, e:
        Messager.error("Error listing %s: %s" % (directory, e))
        raise AnnotationCollectionNotFoundError(directory)
Ejemplo n.º 47
0
    def json_response(self, response=None):
        if response is None:
            response = {}

        # debugging
        if DEBUG:
            msg_str = ''
            if self.__added:
                msg_str += ('Added the following line(s):\n'
                        + '\n'.join([unicode(a).rstrip() for a in self.__added]))
            if self.__changed:
                changed_strs = []
                for before, after in self.__changed:
                    changed_strs.append('\t%s\n\tInto:\n\t%s' % (unicode(before).rstrip(), unicode(after).rstrip()))
                msg_str += ('Changed the following line(s):\n'
                        + '\n'.join([unicode(a).rstrip() for a in changed_strs]))
            if self.__deleted:
                msg_str += ('Deleted the following line(s):\n'
                        + '\n'.join([unicode(a).rstrip() for a in self.__deleted]))
            if msg_str:
                Messager.info(msg_str, duration=3*len(self))
            else:
                Messager.info('No changes made')

        # highlighting
        response['edited'] = []
        # TODO: implement cleanly, e.g. add a highlightid() method to Annotation classes
        for a in self.__added:
            try:
                response['edited'].append(a.reference_id())
            except AttributeError:
                pass # not all implement reference_id()
        for b,a in self.__changed:
            # can't mark "before" since it's stopped existing
            try:
                response['edited'].append(a.reference_id())
            except AttributeError:
                pass # not all implement reference_id()

        # unique, preserve order
        seen = set()
        uniqued = []
        for i in response['edited']:
            s = str(i)
            if s not in seen:
                uniqued.append(i)
                seen.add(s)
        response['edited'] = uniqued

        #added deleted  by sander naert
        response['deleted'] = []
        for a in self.__deleted:
            try:
                response['deleted'].append(a.id)
            except AttributeError:
                pass

        return response
Ejemplo n.º 48
0
def getAnnObject2(collection,document):
    '''newest version of the getAnnObject methode'''
    try:
        from os.path import join as path_join
        from document import real_directory
        real_dir = real_directory(collection)
    except:
        real_dir=collection      
    app_path = WORK_DIR + "/application/"
    ann = None
    full_name = collection + document
    full_name = full_name.replace("/","")
    if( isfile(app_path+full_name)):
        temp=open (app_path+full_name , 'rb')
        ann = pickle_load(temp)
        temp.close()
    else:
        ann = TextAnnotations(real_dir+document)
        ann = SimpleAnnotations(ann)
        ann.folia = {}
        try:
            #TODO:good error message
            ann.folia=get_extra_info(collection,document)
        except Exception as e:
            ann.folia = {}
            Messager.error('Error: get extra folia info() failed: %s' % e)
    #Validation:
    try:
        import os
        import simplejson as json
        import session
        docdir = os.path.dirname(ann._document)
        string = session.load_conf()["config"]
        val = json.loads(string)["validationOn"]
        #validate if config enables it and if it's not already done.
        if val:
            if not ann.validated:    
                from verify_annotations import verify_annotation
                projectconf = ProjectConfiguration(docdir)
                issues = []
                issues = verify_annotation(ann, projectconf)
            else:
                issues = ann.issues
        else:
            ann.validated = False
            issues = []
    except session.NoSessionError:
        issues = []
    except KeyError:
        issues = []
    except Exception as e:
        # TODO add an issue about the failure?
        issues = []
    ann.issues = issues
    temp=open (app_path+full_name , 'wb')    
    pickle_dump(ann, temp)
    temp.close()
    return ann
Ejemplo n.º 49
0
def _listdir(directory):
    # return listdir(directory)
    try:
        assert_allowed_to_read(directory)
        return [f for f in listdir(directory) if not _is_hidden(f)
                and allowed_to_read(path_join(directory, f))]
    except OSError as e:
        Messager.error("Error listing %s: %s" % (directory, e))
        raise AnnotationCollectionNotFoundError(directory)
Ejemplo n.º 50
0
def jp_token_boundary_gen(text):
    # TODO: consider honoring WHITESPACE_TOKENIZATION for japanese also
    if TOKENIZATION is not None and TOKENIZATION != JAPANESE_TOKENIZATION:
        from message import Messager
        Messager.warning('Ignoring unexpected TOKENIZATION '
                'specification for Japanese.')
    from mecab import token_offsets_gen
    for o in token_offsets_gen(text):
        yield o
Ejemplo n.º 51
0
def logout():
    try:
        del get_session()['user']
    except KeyError:
        # Already deleted, let it slide
        pass
    # TODO: Really send this message?
    Messager.info('Bye!')
    return {}
Ejemplo n.º 52
0
def create_arc(collection, document, origin, target, type, attributes=None,
        old_type=None, old_target=None, comment=None):
    directory = collection
    undo_resp = {}

    real_dir = real_directory(directory)

    mods = ModificationTracker()

    projectconf = ProjectConfiguration(real_dir)

    document = path_join(real_dir, document)

    with TextAnnotations(document) as ann_obj:
        # bail as quick as possible if read-only 
        # TODO: make consistent across the different editing
        # functions, integrate ann_obj initialization and checks
        if ann_obj._read_only:
            raise AnnotationsIsReadOnlyError(ann_obj.get_document())

        origin = ann_obj.get_ann_by_id(origin) 
        target = ann_obj.get_ann_by_id(target)

        # if there is a previous annotation and the arcs aren't in
        # the same category (e.g. relation vs. event arg), process
        # as delete + create instead of update.
        if old_type is not None and (
            projectconf.is_relation_type(old_type) != 
            projectconf.is_relation_type(type) or
            projectconf.is_equiv_type(old_type) !=
            projectconf.is_equiv_type(type)):
            _delete_arc_with_ann(origin.id, old_target, old_type, mods, 
                                 ann_obj, projectconf)
            old_target, old_type = None, None

        if projectconf.is_equiv_type(type):
            ann =_create_equiv(ann_obj, projectconf, mods, origin, target, 
                               type, attributes, old_type, old_target)

        elif projectconf.is_relation_type(type):
            ann = _create_relation(ann_obj, projectconf, mods, origin, target, 
                                   type, attributes, old_type, old_target)
        else:
            ann = _create_argument(ann_obj, projectconf, mods, origin, target,
                                   type, attributes, old_type, old_target)

        # process comments
        if ann is not None:
            _set_comments(ann_obj, ann, comment, mods,
                          undo_resp=undo_resp)
        elif comment is not None:
            Messager.warning('create_arc: non-empty comment for None annotation (unsupported type for comment?)')
            

        mods_json = mods.json_response()
        mods_json['annotations'] = _json_from_ann(ann_obj)
        return mods_json
Ejemplo n.º 53
0
def logout():
    try:
        del get_session()['user']
    except KeyError:
        # Already deleted, let it slide
        pass
    # TODO: Really send this message?
    Messager.info('Bye!')
    return {}
Ejemplo n.º 54
0
    def __init__(self, document, read_only=False):
        #TODO: DOC!
        #TODO: Incorparate file locking! Is the destructor called upon inter crash?
        from collections import defaultdict
        from os.path import basename, getmtime, getctime
        #from fileinput import FileInput, hook_encoded

        # we should remember this
        self._document = document

        self.failed_lines = []

        ### Here be dragons, these objects need constant updating and syncing
        # Annotation for each line of the file
        self._lines = []
        # Mapping between annotation objects and which line they occur on
        # Range: [0, inf.) unlike [1, inf.) which is common for files
        self._line_by_ann = {}
        # Maximum id number used for each id prefix, to speed up id generation
        #XXX: This is effectively broken by the introduction of id suffixes
        self._max_id_num_by_prefix = defaultdict(lambda : 1)
        # Annotation by id, not includid non-ided annotations 
        self._ann_by_id = {}
        ###

        ## We use some heuristics to find the appropriate annotation files
        self._read_only = read_only
        input_files = self._select_input_files(document)

        if not input_files:
            raise AnnotationFileNotFoundError(document)

        # We then try to open the files we got using the heuristics
        #self._file_input = FileInput(openhook=hook_encoded('utf-8'))
        self._input_files = input_files

        # Finally, parse the given annotation file
        try:
            self._parse_ann_file()
        
            # Sanity checking that can only be done post-parse
            self._sanity()
        except UnicodeDecodeError:
            Messager.error('Encoding error reading annotation file: '
                    'nonstandard encoding or binary?', -1)
            # TODO: more specific exception
            raise AnnotationFileNotFoundError(document)

        #XXX: Hack to get the timestamps after parsing
        if (len(self._input_files) == 1 and
                self._input_files[0].endswith(JOINED_ANN_FILE_SUFF)):
            self.ann_mtime = getmtime(self._input_files[0])
            self.ann_ctime = getctime(self._input_files[0])
        else:
            # We don't have a single file, just set to epoch for now
            self.ann_mtime = 0
            self.ann_ctime = 0
Ejemplo n.º 55
0
def create_arc(collection, document, origin, target, type, attributes=None,
               old_type=None, old_target=None, comment=None):
    directory = collection
    undo_resp = {}

    real_dir = real_directory(directory)

    mods = ModificationTracker()

    projectconf = ProjectConfiguration(real_dir)

    document = path_join(real_dir, document)

    with TextAnnotations(document) as ann_obj:
        # bail as quick as possible if read-only
        # TODO: make consistent across the different editing
        # functions, integrate ann_obj initialization and checks
        if ann_obj._read_only:
            raise AnnotationsIsReadOnlyError(ann_obj.get_document())

        origin = ann_obj.get_ann_by_id(origin)
        target = ann_obj.get_ann_by_id(target)

        # if there is a previous annotation and the arcs aren't in
        # the same category (e.g. relation vs. event arg), process
        # as delete + create instead of update.
        if old_type is not None and (
                projectconf.is_relation_type(old_type) !=
                projectconf.is_relation_type(type) or
                projectconf.is_equiv_type(old_type) !=
                projectconf.is_equiv_type(type)):
            _delete_arc_with_ann(origin.id, old_target, old_type, mods,
                                 ann_obj, projectconf)
            old_target, old_type = None, None

        if projectconf.is_equiv_type(type):
            ann = _create_equiv(ann_obj, projectconf, mods, origin, target,
                                type, attributes, old_type, old_target)

        elif projectconf.is_relation_type(type):
            ann = _create_relation(ann_obj, projectconf, mods, origin, target,
                                   type, attributes, old_type, old_target)
        else:
            ann = _create_argument(ann_obj, projectconf, mods, origin, target,
                                   type, attributes, old_type, old_target)

        # process comments
        if ann is not None:
            _set_comments(ann_obj, ann, comment, mods,
                          undo_resp=undo_resp)
        elif comment is not None:
            Messager.warning(
                'create_arc: non-empty comment for None annotation (unsupported type for comment?)')

        mods_json = mods.json_response()
        mods_json['annotations'] = _json_from_ann(ann_obj)
        return mods_json