def _get_match_regex(text, text_match="word", match_case=False, whole_string=False): """ Helper for the various search_anns_for_ functions. """ if match_case: regex_flags = 0 else: regex_flags = re.IGNORECASE if text is None: text = '' if text_match == "word": # full word match: require word boundaries or, optionally, # whole string boundaries if whole_string: return re.compile(r'^'+re.escape(text)+r'$', regex_flags) else: return re.compile(r'\b'+re.escape(text)+r'\b', regex_flags) elif text_match == "substring": # any substring match, as text (nonoverlapping matches) return re.compile(re.escape(text), regex_flags) elif text_match == "regex": try: return re.compile(text, regex_flags) except: # whatever (sre_constants.error, other?) Messager.warning('Given string "%s" is not a valid regular expression.' % text) return None else: Messager.error('Unrecognized search match specification "%s"' % text_match) return None
def _server_crash(cookie_hdrs, e): from config import ADMIN_CONTACT_EMAIL, DEBUG from jsonwrap import dumps from message import Messager stack_trace = _get_stack_trace() if DEBUG: # Send back the stack-trace as json error_msg = '\n'.join(('Server Python crash, stack-trace is:\n', stack_trace)) Messager.error(error_msg, duration=-1) else: # Give the user an error message # Use the current time since epoch as an id for later log look-up error_msg = ('The server encountered a serious error, ' 'please contact the administrators at %s ' 'and give the id #%d' ) % (ADMIN_CONTACT_EMAIL, int(time())) Messager.error(error_msg, duration=-1) # Print to stderr so that the exception is logged by the webserver print(stack_trace, file=stderr) json_dic = { 'exception': 'serverCrash', } return (cookie_hdrs, ((JSON_HDR, ), dumps(Messager.output_json(json_dic))))
def _config_check(): from message import Messager from sys import path from copy import deepcopy from os.path import dirname # Reset the path to force config.py to be in the root (could be hacked # using __init__.py, but we can be monkey-patched anyway) orig_path = deepcopy(path) # Can't you empty in O(1) instead of O(N)? while path: path.pop() path.append(path_join(abspath(dirname(__file__)), '../..')) # Check if we have a config, otherwise whine try: import config del config except ImportError, e: path.extend(orig_path) # "Prettiest" way to check specific failure if e.message == 'No module named config': Messager.error(_miss_config_msg(), duration=-1) else: Messager.error(_get_stack_trace(), duration=-1) raise ConfigurationError
def attributes_for(self, ann_type): """ Returs a list of the possible attribute types for an annotation of the given type. """ attrs = [] for attr in get_attribute_type_list(self.directory): if attr == SEPARATOR_STR: continue if 'Arg' not in attr.arguments: Messager.warning( "Project configuration: config error: attribute '%s' lacks 'Arg:' specification." % attr.storage_form()) continue types = attr.arguments['Arg'] if ((ann_type in types) or (self.is_event_type(ann_type) and '<EVENT>' in types) or (self.is_physical_entity_type(ann_type) and '<ENTITY>' in types)): attrs.append(attr.storage_form()) return attrs
def ssdb_build(strs, dbname, ngram_length=DEFAULT_NGRAM_LENGTH, include_marks=DEFAULT_INCLUDE_MARKS): ''' Given a list of strings, a DB name, and simstring options, builds a simstring DB for the strings. ''' try: import simstring except ImportError: Messager.error(SIMSTRING_MISSING_ERROR, duration=-1) raise NoSimStringError dbfn = __ssdb_path(dbname) try: # only library defaults (n=3, no marks) supported just now (TODO) assert ngram_length == 3, "Error: unsupported n-gram length" assert include_marks == False, "Error: begin/end marks not supported" db = simstring.writer(dbfn) for s in strs: db.insert(s) db.close() except: print >> sys.stderr, "Error building simstring DB" raise return dbfn
def _parse_attributes(attributes): if attributes is None: _attributes = {} else: try: _attributes = json_loads(attributes) except ValueError: # Failed to parse, warn the client Messager.warning( ('Unable to parse attributes string "%s" for ' '"createSpan", ignoring attributes for request and ' 'assuming no attributes set') % (attributes, )) _attributes = {} # XXX: Hack since the client is sending back False and True as values... # These are __not__ to be sent, they violate the protocol for _del in [k for k, v in list(_attributes.items()) if v == False]: del _attributes[_del] # These are to be old-style modifiers without values for _revalue in [k for k, v in list(_attributes.items()) if v]: _attributes[_revalue] = True ### return _attributes
def wrapper(*args, **kwds): if DEBUG: Messager.warning( ('Client sent "%s" action ' 'which is marked as deprecated') % func.__name__,) return func(*args, **kwds)
def login(user, password): if not _is_authenticated(user, password): raise InvalidAuthError get_session()['user'] = user Messager.info('Hello!') return {}
def possible_arc_types(collection, origin_type, target_type): directory = collection real_dir = real_directory(directory) projectconf = ProjectConfiguration(real_dir) response = {} try: possible = projectconf.arc_types_from_to(origin_type, target_type) # TODO: proper error handling if possible is None: Messager.error('Error selecting arc types!', -1) elif possible == []: # nothing to select response['html'] = generate_empty_fieldset() response['keymap'] = {} response['empty'] = True else: # XXX TODO: intentionally breaking this; KB shortcuts # should no longer be sent here. Remove 'keymap' and # 'html' args once clientside generation done. arc_kb_shortcuts = {} #select_keyboard_shortcuts(possible) response['keymap'] = {} for k, p in arc_kb_shortcuts.items(): response['keymap'][k] = "arc_"+p response['html'] = generate_arc_type_html(projectconf, possible, arc_kb_shortcuts) except: Messager.error('Error selecting arc types!', -1) raise return response
def ssdb_build(strs, dbname, ngram_length=DEFAULT_NGRAM_LENGTH, include_marks=DEFAULT_INCLUDE_MARKS): """ Given a list of strings, a DB name, and simstring options, builds a simstring DB for the strings. """ try: import simstring except ImportError: Messager.error(SIMSTRING_MISSING_ERROR, duration=-1) raise NoSimStringError dbfn = __ssdb_path(dbname) try: # only library defaults (n=3, no marks) supported just now (TODO) assert ngram_length == 3, "Error: unsupported n-gram length" assert include_marks == False, "Error: begin/end marks not supported" db = simstring.writer(dbfn) for s in strs: db.insert(s) db.close() except: print >> sys.stderr, "Error building simstring DB" raise return dbfn
def ann_logger(directory): """ Lazy initializer for the annotation logger. Returns None if annotation logging is not configured for the given directory and a logger otherwise. """ if ann_logger.__logger == False: # not initialized annlogfile = options_get_annlogfile(directory) if annlogfile == '<NONE>': # not configured ann_logger.__logger = None else: # initialize try: l = logging.getLogger('annotation') l.setLevel(logging.INFO) handler = logging.FileHandler(annlogfile) handler.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s\t%(message)s') handler.setFormatter(formatter) l.addHandler(handler) ann_logger.__logger = l except IOError as e: Messager.error( """Error: failed to initialize annotation log %s: %s. Edit action not logged. Please check the Annotation-log logfile setting in tools.conf""" % (annlogfile, e)) logging.error("Failed to initialize annotation log %s: %s" % (annlogfile, e)) ann_logger.__logger = None return ann_logger.__logger
def import_files(self, directory): real_dir = real_directory(directory) assert_allowed_to_read(real_dir) # Get the document names file_names = [ fn[0:-4] for fn in _listdir(real_dir) if fn.endswith('txt') ] try: cursor = self.conn.cursor() for filename in file_names: state, fid, fileName, fileDirAbs, uid, userName = Ann_NULL, 0, filename, directory, 0, None cursor.execute( _INSERT_ANN_SQL, (state, fid, fileName, fileDirAbs, uid, userName)) self.conn.commit() except sqlite3.Error as e: # print("Database error: %s" % e, file=sys.stderr) Messager.error("Database error: %s" % e) self.conn.rollback() except Exception as e: # print("Exception in _query: %s" % e, file=sys.stderr) Messager.error("Exception in _query: %s" % e) self.conn.rollback() finally: cursor.close()
def set_Ann_state(self, directory, file, state): real_dir = real_directory(directory) assert_allowed_to_read(real_dir) # check and update try: cursor = self.conn.cursor() cursor.execute("""BEGIN TRANSACTION""") cursor.execute( """SELECT userName FROM Ann WHERE fileDirAbs = ? and fileName = ?;""", (directory, file)) rows = cursor.fetchall() if len(rows) == 0: cursor.execute( """UPDATE Ann SET state = ? WHERE fileDirAbs = ? and fileName = ?;""", (state, directory, file)) except sqlite3.Error as e: # print("Database error: %s" % e, file=sys.stderr) Messager.error("Database error: %s" % e) self.conn.rollback() except Exception as e: # print("Exception in _query: %s" % e, file=sys.stderr) Messager.error("Exception in _query: %s" % e) self.conn.rollback() finally: cursor.execute("COMMIT") cursor.close()
def __init__(self): # 连接到SQLite数据库 # 数据库文件是DB_FNAME,如果文件不存在,会自动在当前目录创建 flag_exist = os.path.isfile(DB_FNAME) self.conn = sqlite3.connect(DB_FNAME) if flag_exist: return None try: cursor = self.conn.cursor() cursor.execute(_CREATE_ANN_SQL) self.conn.commit() except sqlite3.Error as e: # print("Database error: %s" % e, file=sys.stderr) Messager.error("Database error: %s" % e) self.conn.rollback() self.conn.close() except Exception as e: # print("Exception in _query: %s" % e, file=sys.stderr) Messager.error("Exception in _query: %s" % e) self.conn.rollback() self.conn.close() finally: cursor.close() en_import_DATA = True if en_import_DATA: for dir in [ x[0].replace(DATA_DIR, '') + '/' for x in os.walk(DATA_DIR) ]: if len(dir) > 1: self.import_files(dir) return None
def retrieve_stored(document, suffix): stored_path = _stored_path()+'.'+suffix if not isfile(stored_path): # @ninjin: not sure what 'version' was supposed to be returned # here, but none was defined, so returning that # raise NoSVGError(version) raise NoSVGError('None') filename = document+'.'+suffix # sorry, quick hack to get the content-type right # TODO: send this with initial 'stored' response instead of # guessing on suffix if suffix == SVG_SUFFIX: content_type = 'image/svg+xml' elif suffix == PNG_SUFFIX: content_type = 'image/png' elif suffix == PDF_SUFFIX: content_type = 'application/pdf' elif suffix == EPS_SUFFIX: content_type = 'application/postscript' else: Messager.error('Unknown suffix "%s"; cannot determine Content-Type' % suffix) # TODO: reasonable backoff value content_type = None # Bail out with a hack since we violated the protocol hdrs = [('Content-Type', content_type), ('Content-Disposition', 'inline; filename=' + filename)] with open(stored_path, 'rb') as stored_file: data = stored_file.read() raise NoPrintJSONError(hdrs, data)
def _get_db_path(database, collection): if collection is None: # TODO: default to WORK_DIR config? return (None, Simstring.DEFAULT_UNICODE) else: conf_dir = real_directory(collection) projectconf = ProjectConfiguration(conf_dir) norm_conf = projectconf.get_normalization_config() try: conf_dir = real_directory(collection) projectconf = ProjectConfiguration(conf_dir) norm_conf = projectconf.get_normalization_config() for entry in norm_conf: # TODO THIS IS WRONG dbname, dbpath, dbunicode = entry[0], entry[3], entry[4] if dbname == database: return (dbpath, dbunicode) # not found in config. Messager.warning('DB ' + database + ' not defined in config for ' + collection + ', falling back on default.') return (None, Simstring.DEFAULT_UNICODE) except Exception: # whatever goes wrong, just warn and fall back on the default. Messager.warning('Failed to get DB path from config for ' + collection + ', falling back on default.') return (None, Simstring.DEFAULT_UNICODE)
def allowed_to_read(real_path): data_path = path_join('/', relpath(real_path, DATA_DIR)) # add trailing slash to directories, required to comply to robots.txt if isdir(real_path): data_path = '%s/' % (data_path) real_dir = dirname(real_path) robotparser = ProjectConfiguration(real_dir).get_access_control() if robotparser is None: return True # default allow # 目录读取权限 try: user = get_session().get('user') if user is None: Messager.error('没有登录!', duration=3) user = '******' except KeyError: Messager.error('没有登录!', duration=3) return False # print(user, file=sys.stderr) # display_message('Path: %s, dir: %s, user: %s, ' % (data_path, real_dir, user), type='error', duration=-1) # / tutorials / # / tutorials / # / tutorials / bio / # / tutorials / news / # / tutorials / # / tutorials / bio / # / tutorials / news / # print(data_path, file=sys.stderr) return robotparser.can_fetch(user, data_path)
def ssdb_supstring_exists(s, dbname, threshold=DEFAULT_THRESHOLD): ''' Given a string s and a DB name, returns whether at least one string in the associated simstring DB likely contains s as an (approximate) substring. ''' try: import simstring except ImportError: Messager.error(SIMSTRING_MISSING_ERROR, duration=-1) raise NoSimStringError if threshold == 1.0: # optimized (not hugely, though) for this common case db = ssdb_open(dbname.encode('UTF-8')) __set_db_measure(db, 'overlap') db.threshold = threshold result = db.retrieve(s) db.close() # assume simstring DBs always contain UTF-8 - encoded strings result = [r.decode('UTF-8') for r in result] s = s.decode('UTF-8') for r in result: if s in r: return True return False else: # naive implementation for everything else return len(ssdb_supstring_lookup(s, dbname, threshold)) != 0
def ann_logger(directory): """ Lazy initializer for the annotation logger. Returns None if annotation logging is not configured for the given directory and a logger otherwise. """ if ann_logger.__logger == False: # not initialized annlogfile = options_get_annlogfile(directory) if annlogfile == '<NONE>': # not configured ann_logger.__logger = None else: # initialize try: l = logging.getLogger('annotation') l.setLevel(logging.INFO) handler = logging.FileHandler(annlogfile) handler.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s\t%(message)s') handler.setFormatter(formatter) l.addHandler(handler) ann_logger.__logger = l except IOError, e: Messager.error("""Error: failed to initialize annotation log %s: %s. Edit action not logged. Please check the Annotation-log logfile setting in tools.conf""" % (annlogfile, e)) logging.error("Failed to initialize annotation log %s: %s" % (annlogfile, e)) ann_logger.__logger = None
def _parse_relation_annotation(self, id, data, data_tail, input_file_path): try: type_delim = data.index(' ') type, type_tail = (data[:type_delim], data[type_delim:]) except ValueError: # cannot have a relation with just a type (contra event) raise IdedAnnotationLineSyntaxError(id, self.ann_line, self.ann_line_num+1, input_file_path) try: args = [tuple(arg.split(':')) for arg in type_tail.split()] except ValueError: raise IdedAnnotationLineSyntaxError(id, self.ann_line, self.ann_line_num+1, input_file_path) if len(args) != 2: Messager.error('Error parsing relation: must have exactly two arguments') raise IdedAnnotationLineSyntaxError(id, self.ann_line, self.ann_line_num+1, input_file_path) args.sort() if args[0][0] == args[1][0]: Messager.error('Error parsing relation: arguments must not be identical') raise IdedAnnotationLineSyntaxError(id, self.ann_line, self.ann_line_num+1, input_file_path) return BinaryRelationAnnotation(id, type, args[0][0], args[0][1], args[1][0], args[1][1], data_tail, source_id=input_file_path)
def get_configs(directory, filename, defaultstr, minconf, sections): if (directory, filename) not in get_configs.__cache: configstr, source = __read_first_in_directory_tree(directory, filename) if configstr is None: # didn't get one; try default dir and fall back to the default configstr = __read_or_default(filename, defaultstr) if configstr == defaultstr: Messager.info( "Project configuration: no configuration file (%s) found, using default." % filename, 5) source = "[default]" else: source = filename # try to parse what was found, fall back to minimal config try: configs = __parse_configs(configstr, source, sections) except: Messager.warning( "Project configuration: Falling back to minimal default. Configuration is likely wrong.", 5) configs = minconf get_configs.__cache[(directory, filename)] = configs return get_configs.__cache[(directory, filename)]
def _check_DB_version(database): import fbkvdb if not fbkvdb.check_version(database): from message import Messager Messager.warning( "Warning: norm DB version mismatch: expected %s, got %s for %s" % (fbkvdb.NORM_DB_VERSION, fbkvdb.get_version(database), database))
def get_drawing_config_by_storage_form(directory, term): cache = get_drawing_config_by_storage_form.__cache if directory not in cache: d = {} for n in get_drawing_config(directory): t = n.storage_form() if t in d: Messager.warning( "Project configuration: term %s appears multiple times, only using last. Configuration may be wrong." % t, 5) d[t] = {} for a in n.arguments: if len(n.arguments[a]) != 1: Messager.warning( "Project configuration: expected single value for %s argument %s, got '%s'. Configuration may be wrong." % (t, a, "|".join(n.arguments[a]))) else: d[t][a] = n.arguments[a][0] # TODO: hack to get around inability to have commas in values; # fix original issue instead for t in d: for k in d[t]: d[t][k] = d[t][k].replace("-", ",") # propagate defaults (TODO: get rid of magic "DEFAULT" values) default_keys = [VISUAL_SPAN_DEFAULT, VISUAL_ARC_DEFAULT] for default_dict in [d.get(dk, {}) for dk in default_keys]: for k in default_dict: for t in d: d[t][k] = d[t].get(k, default_dict[k]) cache[directory] = d return cache[directory].get(term, None)
def ssdb_supstring_exists(s, dbname, threshold=DEFAULT_THRESHOLD): """Given a string s and a DB name, returns whether at least one string in the associated simstring DB likely contains s as an (approximate) substring.""" try: import simstring except ImportError: Messager.error(SIMSTRING_MISSING_ERROR, duration=-1) raise NoSimStringError if threshold == 1.0: # optimized (not hugely, though) for this common case db = ssdb_open(dbname.encode('UTF-8')) __set_db_measure(db, 'overlap') db.threshold = threshold result = db.retrieve(s) db.close() # assume simstring DBs always contain UTF-8 - encoded strings result = [r.decode('UTF-8') for r in result] for r in result: if s in r: return True return False else: # naive implementation for everything else return len(ssdb_supstring_lookup(s, dbname, threshold)) != 0
def __directory_relations_by_arg_num(directory, num, atype, include_special=False): assert num >= 0 and num < 2, "INTERNAL ERROR" rels = [] for r in get_relation_type_list(directory): # "Special" nesting relation ignored unless specifically # requested if r.storage_form() == ENTITY_NESTING_TYPE and not include_special: continue if len(r.arg_list) != 2: Messager.warning( "Relation type %s has %d arguments in configuration (%s; expected 2). Please fix configuration." % (r.storage_form(), len(r.arg_list), ",".join(r.arg_list))) else: types = r.arguments[r.arg_list[num]] for type in types: # TODO: "wildcards" other than <ANY> if type == "<ANY>" or atype == "<ANY>" or type == atype: rels.append(r) return rels
def ann_logger(): """ Lazy initializer for the annotation logger. Returns None if annotation logging is not configured and a logger otherwise. """ if ann_logger.__logger == False: # not initialized if ANNOTATION_LOG is None: # not configured ann_logger.__logger = None else: # initialize try: l = logging.getLogger('annotation') l.setLevel(logging.INFO) handler = logging.FileHandler(ANNOTATION_LOG) handler.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s\t%(message)s') handler.setFormatter(formatter) l.addHandler(handler) ann_logger.__logger = l except IOError, e: Messager.error("""Error: failed to initialize annotation log %s: %s. Edit action not logged. Please check ANNOTATION_LOG setting in config.py""" % (ANNOTATION_LOG, e)) logging.error("Failed to initialize annotation log %s: %s" % (ANNOTATION_LOG, e)) ann_logger.__logger = None
def _server_crash(cookie_hdrs, e): from config import ADMIN_CONTACT_EMAIL, DEBUG from jsonwrap import dumps from message import Messager stack_trace = _get_stack_trace() if DEBUG: # Send back the stack-trace as json error_msg = '\n'.join(('Server Python crash, stack-trace is:\n', stack_trace)) Messager.error(error_msg, duration=-1) else: # Give the user an error message # Use the current time since epoch as an id for later log look-up error_msg = ('The server encountered a serious error, ' 'please contact the administrators at %s ' 'and give the id #%d' ) % (ADMIN_CONTACT_EMAIL, int(time())) Messager.error(error_msg, duration=-1) # Print to stderr so that the exception is logged by the webserver print(stack_trace, file=sys.stderr) json_dic = { 'exception': 'serverCrash', } return (cookie_hdrs, ((JSON_HDR, ), dumps(Messager.output_json(json_dic))))
def norm_get_name(database, key, collection=None): if NORM_LOOKUP_DEBUG: _check_DB_version(database) if REPORT_LOOKUP_TIMINGS: lookup_start = datetime.now() dbpath = _get_db_path(database, collection) if dbpath is None: # full path not configured, fall back on name as default dbpath = database try: data = normdb.data_by_id(dbpath, key) except normdb.dbNotFoundError as e: Messager.warning(str(e)) data = None # just grab the first one (sorry, this is a bit opaque) if data is not None: value = data[0][0][1] else: value = None if REPORT_LOOKUP_TIMINGS: _report_timings(database, lookup_start) # echo request for sync json_dic = { 'database': database, 'key': key, 'value': value } return json_dic
def ann_logger(): """ Lazy initializer for the annotation logger. Returns None if annotation logging is not configured and a logger otherwise. """ if ann_logger.__logger == False: # not initialized if ANNOTATION_LOG is None: # not configured ann_logger.__logger = None else: # initialize try: l = logging.getLogger('annotation') l.setLevel(logging.INFO) handler = logging.FileHandler(ANNOTATION_LOG) handler.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s\t%(message)s') handler.setFormatter(formatter) l.addHandler(handler) ann_logger.__logger = l except IOError, e: Messager.error( """Error: failed to initialize annotation log %s: %s. Edit action not logged. Please check ANNOTATION_LOG setting in config.py""" % (ANNOTATION_LOG, e)) logging.error("Failed to initialize annotation log %s: %s" % (ANNOTATION_LOG, e)) ann_logger.__logger = None
def _report_timings(dbname, start, msg=None): delta = datetime.now() - start strdelta = str(delta).replace('0:00:0', '') # take out zero min & hour queries = normdb.get_query_count(dbname) normdb.reset_query_count(dbname) Messager.info("Processed " + str(queries) + " queries in " + strdelta + (msg if msg is not None else ""))
def norm_get_data(database, key, collection=None): if NORM_LOOKUP_DEBUG: _check_DB_version(database) if REPORT_LOOKUP_TIMINGS: lookup_start = datetime.now() dbpath = _get_db_path(database, collection) if dbpath is None: # full path not configured, fall back on name as default dbpath = database try: data = normdb.data_by_id(dbpath, key) except normdb.dbNotFoundError as e: Messager.warning(str(e)) data = None if data is None: Messager.warning("Failed to get data for " + database + ":" + key) if REPORT_LOOKUP_TIMINGS: _report_timings(database, lookup_start) # echo request for sync json_dic = { 'database': database, 'key': key, 'value': data } return json_dic
def arc_types_from_to(self, from_ann, to_ann="<ANY>", include_special=False): """ Returns the possible arc types that can connect an annotation of type from_ann to an annotation of type to_ann. If to_ann has the value \"<ANY>\", returns all possible arc types. """ from_node = get_node_by_storage_form(self.directory, from_ann) if from_node is None: Messager.warning("Project configuration: unknown textbound/event type %s. Configuration may be wrong." % from_ann) return [] if to_ann == "<ANY>": relations_from = get_relations_by_arg1(self.directory, from_ann, include_special) # TODO: consider using from_node.arg_list instead of .arguments for order return unique_preserve_order([role for role in from_node.arguments] + [r.storage_form() for r in relations_from]) # specific hits types = from_node.keys_by_type.get(to_ann, []) if "<ANY>" in from_node.keys_by_type: types += from_node.keys_by_type["<ANY>"] # generic arguments if self.is_event_type(to_ann) and '<EVENT>' in from_node.keys_by_type: types += from_node.keys_by_type['<EVENT>'] if self.is_physical_entity_type(to_ann) and '<ENTITY>' in from_node.keys_by_type: types += from_node.keys_by_type['<ENTITY>'] # relations types.extend(self.relation_types_from_to(from_ann, to_ann)) return unique_preserve_order(types)
def get_drawing_config_by_storage_form(directory, term): cache = get_drawing_config_by_storage_form.__cache if directory not in cache: d = {} for n in get_drawing_config(directory): t = n.storage_form() if t in d: Messager.warning("Project configuration: term %s appears multiple times, only using last. Configuration may be wrong." % t, 5) d[t] = {} for a in n.arguments: if len(n.arguments[a]) != 1: Messager.warning("Project configuration: expected single value for %s argument %s, got '%s'. Configuration may be wrong." % (t, a, "|".join(n.arguments[a]))) else: d[t][a] = n.arguments[a][0] # TODO: hack to get around inability to have commas in values; # fix original issue instead for t in d: for k in d[t]: d[t][k] = d[t][k].replace("-", ",") # propagate defaults (TODO: get rid of magic "DEFAULT" values) default_keys = [VISUAL_SPAN_DEFAULT, VISUAL_ARC_DEFAULT] for default_dict in [d.get(dk, {}) for dk in default_keys]: for k in default_dict: for t in d: d[t][k] = d[t].get(k, default_dict[k]) cache[directory] = d return cache[directory].get(term, None)
def filter_folia(ann_obj): forbidden_ann=[] response = {"entities":[],"comments":[],"relations":[],"attributes":[],"tokens":{}} try: import simplejson as json import session string = session.load_conf()["config"] val = json.loads(string)["foliaLayers"] except session.NoSessionError: val = [] except KeyError: val = [] pass except Exception as e: val = [] Messager.error("Error while enabling/disabling folia layers: "+str(e)) pass try: response["tokens"]=ann_obj.folia["tokens"] except KeyError as e: pass if val: removed = set() forbidden = set(i for i in val) result = [] alternatives = "alter" in val try: if 'all' in val: response["tokens"]={} return response else: for i in ann_obj.folia["entities"]: if not i[3] in forbidden and not ( i[4] and alternatives ): result.append(i) else: removed.add(i[0]) response["entities"] = result result = [] for i in ann_obj.folia["relations"]: if not i[3] in forbidden and not i[2][0][1] in removed and not i[2][1][1] in removed and not ( i[4] and alternatives ): result.append(i) else: removed.add(i[0]) response["relations"] = result result = [] for i in ann_obj.folia["attributes"]: if not i[2] in removed: result.append(i) response["attributes"] = result result = [] for i in ann_obj.folia["comments"]: if not i[0] in removed: result.append(i) response["comments"] = result except KeyError: pass else: response = ann_obj.folia return response
def __read_term_hierarchy(input): root_nodes = [] last_node_at_depth = {} macros = {} for l in input: # skip empties and lines starting with '#' if l.strip() == '' or re.match(r'^\s*#', l): continue # interpret lines of only hyphens as separators # for display if re.match(r'^\s*-+\s*$', l): # TODO: proper placeholder and placing root_nodes.append(SEPARATOR_STR) continue # interpret lines of the format <STR1>=STR2 as "macro" # definitions, defining <STR1> as a placeholder that should be # replaced with STR2 whevever it occurs. m = re.match(r'^<([a-zA-Z_-]+)>=\s*(.*?)\s*$', l) if m: name, value = m.groups() if name in reserved_macro_name: Messager.error("Cannot redefine <%s> in configuration, it is a reserved name." % name) # TODO: proper exception assert False else: macros["<%s>" % name] = value continue # macro expansion for n in macros: l = l.replace(n, macros[n]) m = re.match(r'^(\s*)([^\t]+)(?:\t(.*))?$', l) assert m, "Error parsing line: '%s'" % l indent, terms, args = m.groups() terms = [t.strip() for t in terms.split("|") if t.strip() != ""] if args is None or args.strip() == "": args = [] else: args = [a.strip() for a in args.split(",") if a.strip() != ""] # depth in the ontology corresponds to the number of # spaces in the initial indent. depth = len(indent) n = TypeHierarchyNode(terms, args) if depth == 0: # root level, no children assignments root_nodes.append(n) else: # assign as child of last node at the depth of the parent assert depth-1 in last_node_at_depth, "Error: no parent for '%s'" % l last_node_at_depth[depth-1].children.append(n) last_node_at_depth[depth] = n return root_nodes
def _create_relation(ann_obj, projectconf, mods, origin, target, type, attributes, old_type, old_target, undo_resp={}): attributes = _parse_attributes(attributes) if old_type is not None or old_target is not None: assert type in projectconf.get_relation_types(), ( ('attempting to convert relation to non-relation "%s" ' % (target.type, )) + ('(legit types: %s)' % (unicode(projectconf.get_relation_types()), ))) sought_target = (old_target if old_target is not None else target.id) sought_type = (old_type if old_type is not None else type) sought_origin = origin.id # We are to change the type, target, and/or attributes found = None for ann in ann_obj.get_relations(): if (ann.arg1 == sought_origin and ann.arg2 == sought_target and ann.type == sought_type): found = ann break if found is None: # TODO: better response Messager.error('_create_relation: failed to identify target relation (type %s, target %s) (deleted?)' % (str(old_type), str(old_target))) elif found.arg2 == target.id and found.type == type: # no changes to type or target pass else: # type and/or target changed, mark. before = unicode(found) found.arg2 = target.id found.type = type mods.change(before, found) target_ann = found else: # Create a new annotation new_id = ann_obj.get_new_id('R') # TODO: do we need to support different relation arg labels # depending on participant types? This doesn't. rels = projectconf.get_relations_by_type(type) rel = rels[0] if rels else None assert rel is not None and len(rel.arg_list) == 2 a1l, a2l = rel.arg_list ann = BinaryRelationAnnotation(new_id, type, a1l, origin.id, a2l, target.id, '\t') mods.addition(ann) ann_obj.add_annotation(ann) target_ann = ann # process attributes if target_ann is not None: _set_attributes(ann_obj, ann, attributes, mods, undo_resp) elif attributes != None: Messager.error('_create_relation: cannot set arguments: failed to identify target relation (type %s, target %s) (deleted?)' % (str(old_type), str(old_target))) return target_ann
def _safe_serve(params, client_ip, client_hostname, cookie_data): # Note: Only logging imports here from config import WORK_DIR from logging import basicConfig as log_basic_config # Enable logging try: from config import LOG_LEVEL log_level = _convert_log_level(LOG_LEVEL) except ImportError: from logging import WARNING as LOG_LEVEL_WARNING log_level = LOG_LEVEL_WARNING log_basic_config(filename=path_join(WORK_DIR, "server.log"), level=log_level) # Do the necessary imports after enabling the logging, order critical try: from common import ProtocolError, ProtocolArgumentError, NoPrintJSONError from dispatch import dispatch from jsonwrap import dumps from message import Messager from session import get_session, init_session, close_session, NoSessionError, SessionStoreError except ImportError: # Note: Heisenbug trap for #612, remove after resolved from logging import critical as log_critical from sys import path as sys_path log_critical("Heisenbug trap reports: " + str(sys_path)) raise init_session(client_ip, cookie_data=cookie_data) response_is_JSON = True try: # Unpack the arguments into something less obscure than the # Python FieldStorage object (part dictonary, part list, part FUBAR) http_args = DefaultNoneDict() for k in params: # Also take the opportunity to convert Strings into Unicode, # according to HTTP they should be UTF-8 try: http_args[k] = unicode(params.getvalue(k), encoding="utf-8") except TypeError: Messager.error( "protocol argument error: expected string argument %s, got %s" % (k, type(params.getvalue(k))) ) raise ProtocolArgumentError # Dispatch the request json_dic = dispatch(http_args, client_ip, client_hostname) except ProtocolError, e: # Internal error, only reported to client not to log json_dic = {} e.json(json_dic) # Add a human-readable version of the error err_str = str(e) if err_str != "": Messager.error(err_str, duration=-1)
def login(user, password): if not _is_authenticated(user, password): raise InvalidAuthError get_session()['user'] = user # Messager.info('Hello!') Messager.info('Hello, your ID is ' + user) ##JESSY return {}
def __init__(self, directory): # debugging (note: latter test for windows paths) if directory[:1] != "/" and not re.search(r'^[a-zA-Z]:\\', directory): Messager.debug( "Project config received relative directory ('%s'), configuration may not be found." % directory, duration=-1) self.directory = directory
def whoami(): json_dic = {} try: json_dic['user'] = get_session().get('user') except KeyError: # TODO: Really send this message? Messager.error('Not logged in!', duration=3) return json_dic
def whoami(): json_dic = {} try: json_dic["user"] = get_session().get("user") except KeyError: # TODO: Really send this message? Messager.error("Not logged in!", duration=3) return json_dic
def _listdir(directory): # return listdir(directory) try: assert_allowed_to_read(directory) return [f for f in listdir(directory) if not _is_hidden(f) and allowed_to_read(path_join(directory, f))] except OSError, e: Messager.error("Error listing %s: %s" % (directory, e)) raise AnnotationCollectionNotFoundError(directory)
def json_response(self, response=None): if response is None: response = {} # debugging if DEBUG: msg_str = '' if self.__added: msg_str += ('Added the following line(s):\n' + '\n'.join([unicode(a).rstrip() for a in self.__added])) if self.__changed: changed_strs = [] for before, after in self.__changed: changed_strs.append('\t%s\n\tInto:\n\t%s' % (unicode(before).rstrip(), unicode(after).rstrip())) msg_str += ('Changed the following line(s):\n' + '\n'.join([unicode(a).rstrip() for a in changed_strs])) if self.__deleted: msg_str += ('Deleted the following line(s):\n' + '\n'.join([unicode(a).rstrip() for a in self.__deleted])) if msg_str: Messager.info(msg_str, duration=3*len(self)) else: Messager.info('No changes made') # highlighting response['edited'] = [] # TODO: implement cleanly, e.g. add a highlightid() method to Annotation classes for a in self.__added: try: response['edited'].append(a.reference_id()) except AttributeError: pass # not all implement reference_id() for b,a in self.__changed: # can't mark "before" since it's stopped existing try: response['edited'].append(a.reference_id()) except AttributeError: pass # not all implement reference_id() # unique, preserve order seen = set() uniqued = [] for i in response['edited']: s = str(i) if s not in seen: uniqued.append(i) seen.add(s) response['edited'] = uniqued #added deleted by sander naert response['deleted'] = [] for a in self.__deleted: try: response['deleted'].append(a.id) except AttributeError: pass return response
def getAnnObject2(collection,document): '''newest version of the getAnnObject methode''' try: from os.path import join as path_join from document import real_directory real_dir = real_directory(collection) except: real_dir=collection app_path = WORK_DIR + "/application/" ann = None full_name = collection + document full_name = full_name.replace("/","") if( isfile(app_path+full_name)): temp=open (app_path+full_name , 'rb') ann = pickle_load(temp) temp.close() else: ann = TextAnnotations(real_dir+document) ann = SimpleAnnotations(ann) ann.folia = {} try: #TODO:good error message ann.folia=get_extra_info(collection,document) except Exception as e: ann.folia = {} Messager.error('Error: get extra folia info() failed: %s' % e) #Validation: try: import os import simplejson as json import session docdir = os.path.dirname(ann._document) string = session.load_conf()["config"] val = json.loads(string)["validationOn"] #validate if config enables it and if it's not already done. if val: if not ann.validated: from verify_annotations import verify_annotation projectconf = ProjectConfiguration(docdir) issues = [] issues = verify_annotation(ann, projectconf) else: issues = ann.issues else: ann.validated = False issues = [] except session.NoSessionError: issues = [] except KeyError: issues = [] except Exception as e: # TODO add an issue about the failure? issues = [] ann.issues = issues temp=open (app_path+full_name , 'wb') pickle_dump(ann, temp) temp.close() return ann
def _listdir(directory): # return listdir(directory) try: assert_allowed_to_read(directory) return [f for f in listdir(directory) if not _is_hidden(f) and allowed_to_read(path_join(directory, f))] except OSError as e: Messager.error("Error listing %s: %s" % (directory, e)) raise AnnotationCollectionNotFoundError(directory)
def jp_token_boundary_gen(text): # TODO: consider honoring WHITESPACE_TOKENIZATION for japanese also if TOKENIZATION is not None and TOKENIZATION != JAPANESE_TOKENIZATION: from message import Messager Messager.warning('Ignoring unexpected TOKENIZATION ' 'specification for Japanese.') from mecab import token_offsets_gen for o in token_offsets_gen(text): yield o
def logout(): try: del get_session()['user'] except KeyError: # Already deleted, let it slide pass # TODO: Really send this message? Messager.info('Bye!') return {}
def create_arc(collection, document, origin, target, type, attributes=None, old_type=None, old_target=None, comment=None): directory = collection undo_resp = {} real_dir = real_directory(directory) mods = ModificationTracker() projectconf = ProjectConfiguration(real_dir) document = path_join(real_dir, document) with TextAnnotations(document) as ann_obj: # bail as quick as possible if read-only # TODO: make consistent across the different editing # functions, integrate ann_obj initialization and checks if ann_obj._read_only: raise AnnotationsIsReadOnlyError(ann_obj.get_document()) origin = ann_obj.get_ann_by_id(origin) target = ann_obj.get_ann_by_id(target) # if there is a previous annotation and the arcs aren't in # the same category (e.g. relation vs. event arg), process # as delete + create instead of update. if old_type is not None and ( projectconf.is_relation_type(old_type) != projectconf.is_relation_type(type) or projectconf.is_equiv_type(old_type) != projectconf.is_equiv_type(type)): _delete_arc_with_ann(origin.id, old_target, old_type, mods, ann_obj, projectconf) old_target, old_type = None, None if projectconf.is_equiv_type(type): ann =_create_equiv(ann_obj, projectconf, mods, origin, target, type, attributes, old_type, old_target) elif projectconf.is_relation_type(type): ann = _create_relation(ann_obj, projectconf, mods, origin, target, type, attributes, old_type, old_target) else: ann = _create_argument(ann_obj, projectconf, mods, origin, target, type, attributes, old_type, old_target) # process comments if ann is not None: _set_comments(ann_obj, ann, comment, mods, undo_resp=undo_resp) elif comment is not None: Messager.warning('create_arc: non-empty comment for None annotation (unsupported type for comment?)') mods_json = mods.json_response() mods_json['annotations'] = _json_from_ann(ann_obj) return mods_json
def __init__(self, document, read_only=False): #TODO: DOC! #TODO: Incorparate file locking! Is the destructor called upon inter crash? from collections import defaultdict from os.path import basename, getmtime, getctime #from fileinput import FileInput, hook_encoded # we should remember this self._document = document self.failed_lines = [] ### Here be dragons, these objects need constant updating and syncing # Annotation for each line of the file self._lines = [] # Mapping between annotation objects and which line they occur on # Range: [0, inf.) unlike [1, inf.) which is common for files self._line_by_ann = {} # Maximum id number used for each id prefix, to speed up id generation #XXX: This is effectively broken by the introduction of id suffixes self._max_id_num_by_prefix = defaultdict(lambda : 1) # Annotation by id, not includid non-ided annotations self._ann_by_id = {} ### ## We use some heuristics to find the appropriate annotation files self._read_only = read_only input_files = self._select_input_files(document) if not input_files: raise AnnotationFileNotFoundError(document) # We then try to open the files we got using the heuristics #self._file_input = FileInput(openhook=hook_encoded('utf-8')) self._input_files = input_files # Finally, parse the given annotation file try: self._parse_ann_file() # Sanity checking that can only be done post-parse self._sanity() except UnicodeDecodeError: Messager.error('Encoding error reading annotation file: ' 'nonstandard encoding or binary?', -1) # TODO: more specific exception raise AnnotationFileNotFoundError(document) #XXX: Hack to get the timestamps after parsing if (len(self._input_files) == 1 and self._input_files[0].endswith(JOINED_ANN_FILE_SUFF)): self.ann_mtime = getmtime(self._input_files[0]) self.ann_ctime = getctime(self._input_files[0]) else: # We don't have a single file, just set to epoch for now self.ann_mtime = 0 self.ann_ctime = 0
def create_arc(collection, document, origin, target, type, attributes=None, old_type=None, old_target=None, comment=None): directory = collection undo_resp = {} real_dir = real_directory(directory) mods = ModificationTracker() projectconf = ProjectConfiguration(real_dir) document = path_join(real_dir, document) with TextAnnotations(document) as ann_obj: # bail as quick as possible if read-only # TODO: make consistent across the different editing # functions, integrate ann_obj initialization and checks if ann_obj._read_only: raise AnnotationsIsReadOnlyError(ann_obj.get_document()) origin = ann_obj.get_ann_by_id(origin) target = ann_obj.get_ann_by_id(target) # if there is a previous annotation and the arcs aren't in # the same category (e.g. relation vs. event arg), process # as delete + create instead of update. if old_type is not None and ( projectconf.is_relation_type(old_type) != projectconf.is_relation_type(type) or projectconf.is_equiv_type(old_type) != projectconf.is_equiv_type(type)): _delete_arc_with_ann(origin.id, old_target, old_type, mods, ann_obj, projectconf) old_target, old_type = None, None if projectconf.is_equiv_type(type): ann = _create_equiv(ann_obj, projectconf, mods, origin, target, type, attributes, old_type, old_target) elif projectconf.is_relation_type(type): ann = _create_relation(ann_obj, projectconf, mods, origin, target, type, attributes, old_type, old_target) else: ann = _create_argument(ann_obj, projectconf, mods, origin, target, type, attributes, old_type, old_target) # process comments if ann is not None: _set_comments(ann_obj, ann, comment, mods, undo_resp=undo_resp) elif comment is not None: Messager.warning( 'create_arc: non-empty comment for None annotation (unsupported type for comment?)') mods_json = mods.json_response() mods_json['annotations'] = _json_from_ann(ann_obj) return mods_json