def attributes_for(self, ann_type): """ Returs a list of the possible attribute types for an annotation of the given type. """ attrs = [] for attr in com.get_attribute_type_list(self.directory): if attr == cst.SEPARATOR_STR: continue if 'Arg' not in attr.arguments: Messager.warning( "Project configuration: config error: attribute '%s' " "lacks 'Arg:' specification." % attr.storage_form()) continue types = attr.arguments['Arg'] if ((ann_type in types) or ('<ANY>' in types) or (self.is_event_type(ann_type) and '<EVENT>' in types) or (self.is_physical_entity_type(ann_type) and '<ENTITY>' in types) or (self.is_relation_type(ann_type) and '<RELATION>' in types)): attrs.append(attr.storage_form()) return attrs
def test_05_output(self): """ test ouput of pending messages """ Messager.warning(u'Hello warning') Messager.info(u'Hello info') Messager.debug(u'Hello debug') Messager.error(u'Hello error') output = NamedTemporaryFile("w", delete=False) try: Messager.output(output) output.close() with open(output.name, "r") as output: self.assertEqual( output.read(), u"warning : Hello warning\n" u"comment : Hello info\n" u'debug : Hello debug\n' u'error : Hello error\n') Messager.clear() with open(output.name, "w") as output: Messager.output(output) with open(output.name, "r") as output: self.assertEqual(output.read(), "") finally: os.unlink(output.name)
def wrapper(*args, **kwds): """ Add message sending to func """ if DEBUG: Messager.warning( ('Client sent "%s" action ' 'which is marked as deprecated') % func.__name__, ) return func(*args, **kwds)
def test_01_warning(self): """ test warning level """ Messager.warning(u'Hello 世界!') json_dic = {} Messager.output_json(json_dic) self.assertEqual( json_dic, {'messages': [(u'Hello \u4e16\u754c\uff01', 'warning', 3)]})
def get_labels(directory): l = {} for t in get_visual_configs(directory)[0][cst.LABEL_SECTION]: if t.storage_form() in l: Messager.warning( "In configuration, labels for '%s' defined more " "than once. Only using the last set." % t.storage_form(), -1) # first is storage for, rest are labels. l[t.storage_form()] = t.terms[1:] return l
def get_configs(directory, filename, defaultstr, minconf, sections, optional_sections): if (directory, filename) not in _GET_CONFIGS_CACHE: configstr, source = __read_first_in_directory_tree(directory, filename) if configstr is None: # didn't get one; try default dir and fall back to the default configstr = __read_or_default(filename, defaultstr) if configstr == defaultstr: Messager.info( "Project configuration: no configuration file (%s) " "found, using default." % filename, 5) source = "[default]" else: source = filename # try to parse what was found, fall back to minimal config try: configs, section_labels = __parse_configs(configstr, source, sections, optional_sections) except InvalidProjectConfigException: Messager.warning( "Project configuration: Falling back to minimal default. " "Configuration is likely wrong.", 5) configs = minconf section_labels = dict([(a, a) for a in sections]) # very, very special case processing: if we have a type # "Equiv" defined in a "relations" section that doesn't # specify a "<REL-TYPE>", automatically fill "symmetric" and # "transitive". This is to support older configurations that # rely on the type "Equiv" to identify the relation as an # equivalence. if 'relations' in configs: for r in configs['relations']: if r == cst.SEPARATOR_STR: continue if (r.storage_form() == "Equiv" and "<REL-TYPE>" not in r.special_arguments): # this was way too much noise; will only add in after # at least most configs are revised. # Messager.warning('Note: "Equiv" defined in config ' # 'without "<REL-TYPE>"; assuming ' # 'symmetric and transitive. Consider ' # 'revising config to add ' # '"<REL-TYPE>:symmetric-transitive" ' # 'to definition.') r.special_arguments["<REL-TYPE>"] = [ "symmetric", "transitive" ] _GET_CONFIGS_CACHE[(directory, filename)] = (configs, section_labels) return _GET_CONFIGS_CACHE[(directory, filename)]
def get_search_config(self): search_config = [] for r in com.get_search_config_list(self.directory): if '<URL>' not in r.special_arguments: Messager.warning( 'Project configuration: config error: missing <URL> ' 'specification for %s search.' % r.storage_form()) else: search_config.append( (r.storage_form(), r.special_arguments['<URL>'][0])) return search_config
def argument_minimum_count(self, atype, arg): """ Returns the minimum number of times that the given argument is allowed to be filled for an annotation of the given type. """ node = com.get_node_by_storage_form(self.directory, atype) if node is None: Messager.warning( "Project configuration: unknown event type %s. " "Configuration may be wrong." % atype) return 0 return node.argument_minimum_count(arg)
def multiple_allowed_arguments(self, atype): """ Returns the argument types that are allowed to be filled more than once for an annotation of the given type. """ node = com.get_node_by_storage_form(self.directory, atype) if node is None: Messager.warning( "Project configuration: unknown event type %s. " "Configuration may be wrong." % atype) return [] return node.multiple_allowed_arguments()
def mandatory_arguments(self, atype): """ Returns the mandatory argument types that must be present for an annotation of the given type. """ node = com.get_node_by_storage_form(self.directory, atype) if node is None: Messager.warning( "Project configuration: unknown event type %s. " "Configuration may be wrong." % atype) return [] return node.mandatory_arguments()
def _store_cache_stat(docstats, cache_file_path, directory): """ Cache the statistics """ try: with open(cache_file_path, 'wb') as cache_file: pickle_dump(docstats, cache_file, protocol=constants.PICKLE_PROTOCOL) except IOError as exception: Messager.warning( "Could not write statistics cache file to directory %s: %s" % (directory, exception))
def __parse_kb_shortcuts(shortcutstr, default, source): shortcuts = {} for l in shortcutstr.split("\n"): l = l.strip() if l == "" or l[:1] == "#": continue key, type_ = re.split(r'[ \t]+', l) if key in shortcuts: Messager.warning("Project configuration: keyboard shortcut " "for '%s' defined multiple times. Ignoring " "all but first ('%s')" % (key, shortcuts[key])) else: shortcuts[key] = type_ return shortcuts
def get_node_by_storage_form(directory, term): if directory not in _GET_NODE_BY_STORAGE_FORM_CACHE: d = {} for e in get_entity_type_list(directory) + get_event_type_list( directory): t = e.storage_form() if t in d: Messager.warning( "Project configuration: term %s appears multiple times, " "only using last. Configuration may be wrong." % t, 5) d[t] = e _GET_NODE_BY_STORAGE_FORM_CACHE[directory] = d return _GET_NODE_BY_STORAGE_FORM_CACHE[directory].get(term, None)
def __init__(self, terms, args=None): if args is None: args = [] self.terms, self.args = terms, args if not terms or len([t for t in terms if t == ""]) != 0: Messager.debug("Empty term in configuration", duration=-1) raise InvalidProjectConfigException # unused if any of the terms marked with "!" self.unused = False for i in range(len(self.terms)): if self.terms[i][0] == "!": self.terms[i] = self.terms[i][1:] self.unused = True self.children = [] # The first of the listed terms is used as the primary term for # storage (excepting for "special" config-only types). Due to # format restrictions, this form must not have e.g. space or # various special characters. if self.terms[0] not in cst.SPECIAL_RELATION_TYPES: self.__primary_term = normalize_to_storage_form(self.terms[0]) else: self.__primary_term = self.terms[0] # TODO: this might not be the ideal place to put this warning if self.__primary_term != self.terms[0]: Messager.warning( "Note: in configuration, term '%s' is not " "appropriate for storage (should match " "'^[a-zA-Z0-9_-]*$'), using '%s' instead. " "(Revise configuration file to get rid of " "this message. Terms other than the first are " "not subject to this restriction.)" % (self.terms[0], self.__primary_term), -1) self.terms[0] = self.__primary_term # TODO: cleaner and more localized parsing self.arguments = {} self.special_arguments = {} self.arg_list = [] self.arg_min_count = {} self.arg_max_count = {} self.keys_by_type = {} for a in self.args: self._process_arg(a, args)
def sentences(self): """ :returns: list of sentence tuple offsets """ ssplitter = self.configuration["sentence-splitter"] if ssplitter == 'newline': from arat.server.ssplit import newline_sentence_boundary_gen ss_offset_gen = newline_sentence_boundary_gen elif ssplitter == 'regex': from arat.server.ssplit import regex_sentence_boundary_gen ss_offset_gen = regex_sentence_boundary_gen else: Messager.warning('Unrecognized sentence splitting option ' ', reverting to newline sentence splitting.') from arat.server.ssplit import newline_sentence_boundary_gen ss_offset_gen = newline_sentence_boundary_gen return [o for o in ss_offset_gen(self.text)]
def store_svg(collection, document, svg): stored = [] _save_svg(collection, document, svg) stored.append({'name': 'svg', 'suffix': SVG_SUFFIX}) # attempt conversions from SVG to other formats try: from config import SVG_CONVERSION_COMMANDS except ImportError: SVG_CONVERSION_COMMANDS = [] for format_, command in SVG_CONVERSION_COMMANDS: try: from os import system svgfn = _svg_path() # TODO: assuming format name matches suffix; generalize outfn = svgfn.replace('.' + SVG_SUFFIX, '.' + format_) cmd = command % (svgfn, outfn) import logging logging.error(cmd) system(cmd) # TODO: this check may not work on all architectures. # consider rather checking is the intended output file # exists (don't forget to delete a possible old one # with the same name, though). # if retval != 0: # stored.append({'name': format, 'suffix': format}) # else: # Messager.warning("Failed conversion to %s" % format) # I'm getting weird return values from inkscape; will # just assume everything's OK ... # TODO: check return value, react appropriately stored.append({'name': format, 'suffix': format}) except: # pylint: disable=W0702 Messager.warning("Failed conversion to %s" % format) return {'stored': stored}
def get_normalization_config(self): norm_list = com.get_normalization_config_list(self.directory) norm_config = [] for n in norm_list: if 'DB' not in n.arguments: # optional, server looks in default location if None n.arguments['DB'] = [None] if '<URL>' not in n.special_arguments: Messager.warning( 'Project configuration: config error: missing <URL> ' 'specification for %s.' % n.storage_form()) continue if '<URLBASE>' not in n.special_arguments: # now optional, client skips link generation if None n.special_arguments['<URLBASE>'] = [None] norm_config.append((n.storage_form(), n.special_arguments['<URL>'][0], n.special_arguments['<URLBASE>'][0], n.arguments['DB'][0])) return norm_config
def __directory_relations_by_arg_num(directory, num, atype, include_special=False): assert num >= 0 and num < 2, "INTERNAL ERROR" rels = [] entity_types = set( [t.storage_form() for t in get_entity_type_list(directory)]) event_types = set( [t.storage_form() for t in get_event_type_list(directory)]) for r in get_relation_type_list(directory): # "Special" nesting relations ignored unless specifically # requested if r.storage_form( ) in cst.SPECIAL_RELATION_TYPES and not include_special: continue if len(r.arg_list) != 2: # Don't complain about argument constraints for unused relations if not r.unused: Messager.warning( "Relation type %s has %d arguments in " "configuration (%s; expected 2). Please fix " "configuration." % (r.storage_form(), len(r.arg_list), ",".join(r.arg_list))) else: types = r.arguments[r.arg_list[num]] for type_ in types: # TODO: there has to be a better way if (type_ == atype or type_ == "<ANY>" or atype == "<ANY>" or (type_ in entity_types and atype == "<ENTITY>") or (type_ in event_types and atype == "<EVENT>") or (atype in entity_types and type_ == "<ENTITY>") or (atype in event_types and type_ == "<EVENT>")): rels.append(r) # TODO: why not break here? return rels
def _get_option_by_storage_form(directory, term, config, cache): if directory not in cache: d = {} for n in config: t = n.storage_form() if t in d: Messager.warning( "Project configuration: %s appears multiple times, " "only using last. Configuration may be wrong." % t, 5) d[t] = {} for a in n.arguments: if len(n.arguments[a]) != 1: Messager.warning( "Project configuration: %s key %s has multiple " "values, only using first. Configuration may " "be wrong." % (t, a), 5) d[t][a] = n.arguments[a][0] cache[directory] = d return cache[directory].get(term, None)
def get_statistics(directory, base_names, use_cache=True): """ Check if we have a cache of the costly satistics generation Also, only use it if no file is newer than the cache itself """ cache_file_path = _get_stat_cache_by_dir(directory) try: cache_mtime = getmtime(cache_file_path) except OSError as exception: if exception.errno == 2: cache_mtime = -1 else: raise try: if _need_regeneration(directory, cache_file_path, cache_mtime): generate = True docstats = [] else: generate = False try: with open(cache_file_path, 'rb') as cache_file: docstats = pickle_load(cache_file) if len(docstats) != len(base_names): Messager.warning( 'Stats cache %s was incomplete; regenerating' % cache_file_path) generate = True docstats = [] except UnpicklingError: # Corrupt data, re-generate Messager.warning( 'Stats cache %s was corrupted; regenerating' % cache_file_path, -1) generate = True except EOFError: # Corrupt data, re-generate generate = True except OSError as exception: Messager.warning( 'Failed checking file modification times for stats cache check; regenerating' ) generate = True if not use_cache: generate = True # "header" and types stat_types = [("Entities", "int"), ("Relations", "int"), ("Events", "int")] if options_get_validation(directory) != 'none': stat_types.append(("Issues", "int")) if generate: stat_types, docstats = _generate_stats(directory, base_names, stat_types, cache_file_path) return stat_types, docstats
def _enrich_json_with_text(j_dic, txt_file_path, raw_text=None): if raw_text is not None: # looks like somebody read this already; nice text = raw_text else: # need to read raw text try: with open_textfile(txt_file_path, 'r') as txt_file: text = txt_file.read() except IOError: raise UnableToReadTextFile(txt_file_path) except UnicodeDecodeError: Messager.error( 'Error reading text file: nonstandard encoding or binary?', -1) raise UnableToReadTextFile(txt_file_path) j_dic['text'] = text tokeniser = options_get_tokenization(dirname(txt_file_path)) # First, generate tokenisation tok_offset_gen = tokeniser_by_name(tokeniser) j_dic['token_offsets'] = [o for o in tok_offset_gen(text)] ssplitter = options_get_ssplitter(dirname(txt_file_path)) if ssplitter == 'newline': from arat.server.ssplit import newline_sentence_boundary_gen ss_offset_gen = newline_sentence_boundary_gen elif ssplitter == 'regex': from arat.server.ssplit import regex_sentence_boundary_gen ss_offset_gen = regex_sentence_boundary_gen else: Messager.warning('Unrecognized sentence splitting option ' ', reverting to newline sentence splitting.') from arat.server.ssplit import newline_sentence_boundary_gen ss_offset_gen = newline_sentence_boundary_gen j_dic['sentence_offsets'] = [o for o in ss_offset_gen(text)] return True
def tokeniser_by_name(name): """ load a tokenizer by name Available tokenizers: >>> tokeniser_by_name('whitespace') == whitespace_token_boundary_gen True >>> tokeniser_by_name('ptblike') == gtb_token_boundary_gen True Any other name will returns default whitespace >>> tokeniser_by_name('unknown') == whitespace_token_boundary_gen True """ if name in REGISTERED_TOKENISER: return REGISTERED_TOKENISER[name] Messager.warning('Unrecognized tokenisation option ' ', reverting to whitespace tokenisation.') return whitespace_token_boundary_gen
def arc_types_from_to(self, from_ann, to_ann="<ANY>", include_special=False): """ Returns the possible arc types that can connect an annotation of type from_ann to an annotation of type to_ann. If to_ann has the value \"<ANY>\", returns all possible arc types. """ from_node = com.get_node_by_storage_form(self.directory, from_ann) if from_node is None: Messager.warning( "Project configuration: unknown textbound/event type %s. " "Configuration may be wrong." % from_ann) return [] if to_ann == "<ANY>": relations_from = com.get_relations_by_arg1( self.directory, from_ann, include_special) # TODO: consider using from_node.arg_list instead of .arguments for order return com.unique_preserve_order([role for role in from_node.arguments] + [r.storage_form() for r in relations_from]) # specific hits types = from_node.keys_by_type.get(to_ann, []) if "<ANY>" in from_node.keys_by_type: types += from_node.keys_by_type["<ANY>"] # generic arguments if self.is_event_type(to_ann) and '<EVENT>' in from_node.keys_by_type: types += from_node.keys_by_type['<EVENT>'] if self.is_physical_entity_type(to_ann) and '<ENTITY>' in from_node.keys_by_type: types += from_node.keys_by_type['<ENTITY>'] # relations types.extend(self.relation_types_from_to(from_ann, to_ann)) return com.unique_preserve_order(types)
def overlap_types(self, inner, outer): """ Returns the set of annotation overlap types that have been configured for the given pair of annotations. """ # TODO: this is O(NM) for relation counts N and M and goes # past much of the implemented caching. Might become a # bottleneck for annotations with large type systems. t1r = com.get_relations_by_arg1(self.directory, inner, True) t2r = com.get_relations_by_arg2(self.directory, outer, True) types = [] for r in (s for s in t1r if s.storage_form() in cst.SPECIAL_RELATION_TYPES): if r in t2r: types.append(r) # new-style overlap configuration ("<OVERLAP>") takes precedence # over old-style configuration ("ENTITY-NESTING"). ovl_types = set() ovl = [r for r in types if r.storage_form() == cst.TEXTBOUND_OVERLAP_TYPE] nst = [r for r in types if r.storage_form() == cst.ENTITY_NESTING_TYPE] if ovl: if nst: Messager.warning('Warning: both '+cst.TEXTBOUND_OVERLAP_TYPE + ' and '+cst.ENTITY_NESTING_TYPE+' defined for ' + '('+inner+','+outer+') in config. ' + 'Ignoring latter.') for r in ovl: if cst.OVERLAP_TYPE_ARG not in r.special_arguments: Messager.warning('Warning: missing '+cst.OVERLAP_TYPE_ARG + ' for '+cst.TEXTBOUND_OVERLAP_TYPE + ', ignoring specification.') continue for val in r.special_arguments[cst.OVERLAP_TYPE_ARG]: ovl_types |= set(val.split('|')) elif nst: # translate into new-style configuration ovl_types = set(['contain']) else: ovl_types = set() undefined_types = [t for t in ovl_types if t not in ('contain', 'equal', 'cross', '<ANY>')] if undefined_types: Messager.warning('Undefined '+cst.OVERLAP_TYPE_ARG+' value(s) ' + str(undefined_types)+' for ' + '('+inner+','+outer+') in config. ') return ovl_types
def __parse_configs(configstr, source, expected_sections, optional_sections): # top-level config structure is a set of term hierarchies # separated by lines consisting of "[SECTION]" where SECTION is # e.g. "entities", "relations", etc. # start by splitting config file lines by section, also storing # the label (default name or alias) used for each section. section = "general" section_lines = {section: []} section_labels = {} for l in configstr.split("\n"): m = re.match(r'^\s*\[(.*)\]\s*$', l) if m: section = m.group(1) # map and store section name/alias (e.g. "spans" -> "entities") section_name = cst.SECTION_ALIAS.get(section, section) section_labels[section_name] = section section = section_name if section not in expected_sections: Messager.warning( "Project configuration: unexpected section " "[%s] in %s. Ignoring contents." % (section, source), 5) if section not in section_lines: section_lines[section] = [] else: section_lines[section].append(l) # attempt to parse lines in each section as a term hierarchy configs = {} for s, sl in section_lines.items(): try: configs[s] = __read_term_hierarchy(sl, s) except Exception as e: Messager.warning( "Project configuration: error parsing section " "[%s] in %s: %s" % (s, source, str(e)), 5) raise # verify that expected sections are present; replace with empty if not. for s in expected_sections: if s not in configs: if s not in optional_sections: Messager.warning( "Project configuration: missing section [%s] in %s. " "Configuration may be wrong." % (s, source), 5) configs[s] = [] return (configs, section_labels)
def _get_tool_config(self, tool_list): tool_config = [] for r in tool_list: if '<URL>' not in r.special_arguments: Messager.warning( 'Project configuration: config error: missing <URL> ' 'specification for %s.' % r.storage_form()) continue if 'tool' not in r.arguments: Messager.warning( 'Project configuration: config error: missing tool ' 'name ("tool") for %s.' % r.storage_form()) continue if 'model' not in r.arguments: Messager.warning( 'Project configuration: config error: missing model ' 'name ("model") for %s.' % r.storage_form()) continue tool_config.append((r.storage_form(), r.arguments['tool'][0], r.arguments['model'][0], r.special_arguments['<URL>'][0])) return tool_config
def _process_arg(self, arg, args): arg = arg.strip() match_obj = re.match(r'^(\S*?):(\S*)$', arg) if not match_obj: Messager.warning( "Project configuration: Failed to parse argument " "'%s' (args: %s)" % (arg, args), 5) raise InvalidProjectConfigException key, atypes = match_obj.groups() # special case (sorry): if the key is a reserved config # string (e.g. "<REL-TYPE>" or "<URL>"), parse differently # and store separately if key in cst.RESERVED_CONFIG_STRING: if key is self.special_arguments: Messager.warning( "Project configuration: error parsing: %s argument " "'%s' appears multiple times." % key, 5) raise InvalidProjectConfigException # special case in special case: relation type specifications # are split by hyphens, nothing else is. # (really sorry about this.) if key == "<REL-TYPE>": self.special_arguments[key] = atypes.split("-") else: self.special_arguments[key] = [atypes] # NOTE: skip the rest of processing -- don't add in normal args return # Parse "repetition" modifiers. These are regex-like: # - Arg : mandatory argument, exactly one # - Arg? : optional argument, at most one # - Arg* : optional argument, any number # - Arg+ : mandatory argument, one or more # - Arg{N} : mandatory, exactly N # - Arg{N-M} : mandatory, between N and M match_obj = re.match(r'^(\S+?)(\{\S+\}|\?|\*|\+|)$', key) if not match_obj: Messager.warning( "Project configuration: error parsing " "argument '%s'." % key, 5) raise InvalidProjectConfigException key, rep = match_obj.groups() if rep == '': # exactly one minimum_count = 1 maximum_count = 1 elif rep == '?': # zero or one minimum_count = 0 maximum_count = 1 elif rep == '*': # any number minimum_count = 0 maximum_count = sys.maxsize elif rep == '+': # one or more minimum_count = 1 maximum_count = sys.maxsize else: # exact number or range constraint assert '{' in rep and '}' in rep, "INTERNAL ERROR" m = re.match(r'\{(\d+)(?:-(\d+))?\}$', rep) if not m: Messager.warning( "Project configuration: error parsing range '%s' in " "argument '%s' (syntax is " "'{MIN-MAX}')." % (rep, key + rep), 5) raise InvalidProjectConfigException n1, n2 = m.groups() n1 = int(n1) if n2 is None: # exact number if n1 == 0: Messager.warning( "Project configuration: cannot have exactly " "0 repetitions of argument '%s'." % (key + rep), 5) raise InvalidProjectConfigException minimum_count = n1 maximum_count = n1 else: # range n2 = int(n2) if n1 > n2: Messager.warning( "Project configuration: invalid range %d-%d " "for argument '%s'." % (n1, n2, key + rep), 5) raise InvalidProjectConfigException minimum_count = n1 maximum_count = n2 # format / config sanity: an argument whose label ends # with a digit label cannot be repeated, as this would # introduce ambiguity into parsing. (For example, the # second "Theme" is "Theme2", and the second "Arg1" would # be "Arg12".) if maximum_count > 1 and key[-1].isdigit(): Messager.warning( "Project configuration: error parsing: arguments ending " "with a digit cannot be repeated: '%s'" % (key + rep), 5) raise InvalidProjectConfigException if key in self.arguments: Messager.warning( "Project configuration: error parsing: %s argument '%s' " "appears multiple times." % key, 5) raise InvalidProjectConfigException assert (key not in self.arg_min_count and key not in self.arg_max_count), "INTERNAL ERROR" self.arg_min_count[key] = minimum_count self.arg_max_count[key] = maximum_count self.arg_list.append(key) for atype in atypes.split("|"): if atype.strip() == "": Messager.warning( "Project configuration: error parsing: empty type for " "argument '%s'." % arg, 5) raise InvalidProjectConfigException # Check disabled; need to support arbitrary UTF values # for visual.conf. TODO: add this check for other configs. # TODO: consider checking for similar for appropriate confs. # if atype not in RESERVED_CONFIG_STRING and # normalize_to_storage_form(atype) != atype: # Messager.warning("Project configuration: '%s' " # "is not a valid argument (should " # "match '^[a-zA-Z0-9_-]*$')" % atype, 5) # raise InvalidProjectConfigException if key not in self.arguments: self.arguments[key] = [] self.arguments[key].append(atype) if atype not in self.keys_by_type: self.keys_by_type[atype] = [] self.keys_by_type[atype].append(key)
def get_drawing_config_by_storage_form(directory, term): cache = get_drawing_config_by_storage_form.__cache if directory not in cache: d = {} for n in get_drawing_config(directory): t = n.storage_form() if t in d: Messager.warning( "Project configuration: term %s appears multiple times, " "only using last. Configuration may be wrong." % t, 5) d[t] = {} for a in n.arguments: # attribute drawing can be specified with multiple # values (multi-valued attributes), other parts of # drawing config should have single values only. if len(n.arguments[a]) != 1: if a in cst.ATTR_DRAWING_ATTRIBUTES: # use multi-valued directly d[t][a] = n.arguments[a] else: # warn and pass Messager.warning("Project configuration: expected " "single value for %s argument %s, " "got '%s'. Configuration may be " "wrong." % (t, a, "|".join(n.arguments[a]))) else: d[t][a] = n.arguments[a][0] # TODO: hack to get around inability to have commas in values; # fix original issue instead for t in d: for k in d[t]: # sorry about this if not isinstance(d[t][k], list): d[t][k] = d[t][k].replace("-", ",") else: for i in range(len(d[t][k])): d[t][k][i] = d[t][k][i].replace("-", ",") default_keys = [ cst.VISUAL_SPAN_DEFAULT, cst.VISUAL_ARC_DEFAULT, cst.VISUAL_ATTR_DEFAULT ] for default_dict in [d.get(dk, {}) for dk in default_keys]: for k in default_dict: for t in d: d[t][k] = d[t].get(k, default_dict[k]) # Kind of a special case: recognize <NONE> as "deleting" an # attribute (prevents default propagation) and <EMPTY> as # specifying that a value should be the empty string # (can't be written as such directly). for t in d: todelete = [k for k in d[t] if d[t][k] == '<NONE>'] for k in todelete: del d[t][k] for k in d[t]: if d[t][k] == '<EMPTY>': d[t][k] = '' cache[directory] = d return cache[directory].get(term, None)
def _fill_attribute_configuration(nodes, project_conf): items = [] for node in nodes: if node == SEPARATOR_STR: continue else: item = {} _type = node.storage_form() item['name'] = project_conf.preferred_display_form(_type) item['type'] = _type item['unused'] = node.unused item['labels'] = project_conf.get_labels_by_type(_type) attr_drawing_conf = project_conf.get_drawing_config_by_type(_type) if attr_drawing_conf is None: attr_drawing_conf = project_conf.get_drawing_config_by_type( VISUAL_ATTR_DEFAULT) if attr_drawing_conf is None: attr_drawing_conf = {} # Check if the possible values for the argument are specified # TODO: avoid magic strings if "Value" in node.arguments: args = node.arguments["Value"] else: # no "Value" defined; assume binary. args = [] # Check if a default value is specified for the attribute if '<DEFAULT>' in node.special_arguments: try: item['default'] = node.special_arguments['<DEFAULT>'][0] except IndexError: Messager.warning("Config error: empty <DEFAULT> for %s" % item['name']) # Each item's 'values' entry is a list of dictionaries, one # dictionary per value option. if len(args) == 0: # binary; use drawing config directly attr_values = {'name': _type} for k in ATTR_DRAWING_ATTRIBUTES: if k in attr_drawing_conf: # protect against error from binary attribute # having multi-valued visual config (#698) if isinstance(attr_drawing_conf[k], list): Messager.warning( "Visual config error: expected single value for %s binary attribute '%s' config, found %d. Visuals may be wrong." % (_type, k, len(attr_drawing_conf[k]))) # fall back on the first just to have something. attr_values[k] = attr_drawing_conf[k][0] else: attr_values[k] = attr_drawing_conf[k] item['values'] = [attr_values] else: # has normal arguments, use these as possible values. # (this is quite terrible all around, sorry.) # we'll populate this incrementally as we process the args item['values'] = [] for i, v in enumerate(args): attr_values = {'name': v} # match up annotation config with drawing config by # position in list of alternative values so that e.g. # "Values:L1|L2|L3" can have the visual config # "glyph:[1]|[2]|[3]". If only a single value is # defined, apply to all. for k in ATTR_DRAWING_ATTRIBUTES: if k in attr_drawing_conf: # (sorry about this) if isinstance(attr_drawing_conf[k], list): # sufficiently many specified? if len(attr_drawing_conf[k]) > i: attr_values[k] = attr_drawing_conf[k][i] else: Messager.warning( "Visual config error: expected %d values for %s attribute '%s' config, found only %d. Visuals may be wrong." % (len(args), v, k, len(attr_drawing_conf[k]))) else: # single value (presumably), apply to all attr_values[k] = attr_drawing_conf[k] # if no drawing attribute was defined, fall back to # using a glyph derived from the attribute value if len([ k for k in ATTR_DRAWING_ATTRIBUTES if k in attr_values ]) == 0: attr_values['glyph'] = '[' + v + ']' item['values'].append(attr_values) items.append(item) return items
def __read_term_hierarchy(input_, section=None): """ Output a list of TypeHierarchyNode >>> _input = ["# This a comment to be ignored"] >>> _input.append("[spans]") >>> _input.append("# POS tags") >>> _input.append("adj") >>> _input.append("adv") >>> _input.append("art") >>> isinstance((__read_term_hierarchy("\\n".join(_input))[0]), TypeHierarchyNode) True """ root_nodes = [] last_node_at_depth = {} last_args_at_depth = {} macros = {} for line in input_: # skip empties and lines starting with '#' if line.strip() == '' or re.match(r'^\s*#', line): continue # interpret lines of only hyphens as separators # for display if re.match(r'^\s*-+\s*$', line): # TODO: proper placeholder and placing root_nodes.append(cst.SEPARATOR_STR) continue # interpret lines of the format <STR1>=STR2 as "macro" # definitions, defining <STR1> as a placeholder that should be # replaced with STR2 whevever it occurs. match_obj = re.match(r'^<([a-zA-Z_-]+)>=\s*(.*?)\s*$', line) if match_obj: name, value = match_obj.groups() if name in cst.RESERVED_CONFIG_NAME: Messager.error("Cannot redefine <%s> in configuration, " "it is a reserved name." % name) # TODO: proper exception raise InvalidProjectConfigException("Reserved name: " + name) else: macros["<%s>" % name] = value continue # macro expansion for token in macros: line = line.replace(token, macros[token]) # check for undefined macros for match_obj in re.finditer(r'(<.*?>)', line): token = match_obj.group(1) assert token in cst.RESERVED_CONFIG_STRING, ( "Error: undefined macro %s " "in configuration. (Note that macros are section-specific.)" ) % token # choose strict tab-only separator or looser any-space # separator matching depending on section if __require_tab_separator(section): match_obj = re.match(r'^(\s*)([^\t]+)(?:\t(.*))?$', line) else: match_obj = re.match(r'^(\s*)(\S+)(?:\s+(.*))?$', line) assert match_obj, "Error parsing line: '%s'" % line indent, terms, args = match_obj.groups() terms = [i.strip() for i in terms.split("|") if i.strip() != ""] if args is None or args.strip() == "": args = [] else: args = [i.strip() for i in args.split(",") if i.strip() != ""] # older configs allowed space in term strings, splitting those # from arguments by space. Trying to parse one of these in the # new way will result in a crash from space in arguments. # The following is a workaround for the transition. if [i for i in args if re.search(r'\s', i)] and '\t' in line: # re-parse in the old way (dups from above) match_obj = re.match(r'^(\s*)([^\t]+)(?:\t(.*))?$', line) assert match_obj, "Error parsing line: '%s'" % line indent, terms, args = match_obj.groups() terms = [i.strip() for i in terms.split("|") if i.strip() != ""] if args is None or args.strip() == "": args = [] else: args = [i.strip() for i in args.split(",") if i.strip() != ""] # issue a warning Messager.warning( "Space in term name(s) (%s) on line \"%s\" " "in config. This feature is deprecated and " "support will be removed in future versions. " "Please revise your configuration." % (",".join(['"%s"' % i for i in terms if " " in i]), line), 20) # depth in the ontology corresponds to the number of # spaces in the initial indent. depth = len(indent) # expand <INHERIT> into parent arguments expanded_args = [] for a in args: if a != '<INHERIT>': expanded_args.append(a) else: assert depth-1 in last_args_at_depth, \ "Error no parent for '%s'" % line expanded_args.extend(last_args_at_depth[depth - 1]) args = expanded_args n = TypeHierarchyNode(terms, args) if depth == 0: # root level, no children assignments root_nodes.append(n) else: # assign as child of last node at the depth of the parent assert depth-1 in last_node_at_depth, \ "Error: no parent for '%s'" % line last_node_at_depth[depth - 1].children.append(n) last_node_at_depth[depth] = n last_args_at_depth[depth] = args return root_nodes