예제 #1
0
def get_drawing_config_by_storage_form(directory, term):
    cache = get_drawing_config_by_storage_form.__cache
    if directory not in cache:
        d = {}
        for n in get_drawing_config(directory):
            t = n.storage_form()
            if t in d:
                Messager.warning("Project configuration: term %s appears multiple times, only using last. Configuration may be wrong." % t, 5)
            d[t] = {}
            for a in n.arguments:
                if len(n.arguments[a]) != 1:
                    Messager.warning("Project configuration: expected single value for %s argument %s, got '%s'. Configuration may be wrong." % (t, a, "|".join(n.arguments[a])))
                else:
                    d[t][a] = n.arguments[a][0]

        # TODO: hack to get around inability to have commas in values;
        # fix original issue instead
        for t in d:
            for k in d[t]:
                d[t][k] = d[t][k].replace("-", ",")
                
        # propagate defaults (TODO: get rid of magic "DEFAULT" values)
        default_keys = [VISUAL_SPAN_DEFAULT, VISUAL_ARC_DEFAULT]
        for default_dict in [d.get(dk, {}) for dk in default_keys]:
            for k in default_dict:
                for t in d:
                    d[t][k] = d[t].get(k, default_dict[k])

        cache[directory] = d

    return cache[directory].get(term, None)
예제 #2
0
파일: norm.py 프로젝트: edycop/brat
def _check_DB_version(database):
    import fbkvdb
    if not fbkvdb.check_version(database):
        from message import Messager
        Messager.warning(
            "Warning: norm DB version mismatch: expected %s, got %s for %s" %
            (fbkvdb.NORM_DB_VERSION, fbkvdb.get_version(database), database))
예제 #3
0
파일: norm.py 프로젝트: victorgau/brat
def _get_db_path(database, collection):
    if collection is None:
        # TODO: default to WORK_DIR config?
        return (None, Simstring.DEFAULT_UNICODE)
    else:
        conf_dir = real_directory(collection)
        projectconf = ProjectConfiguration(conf_dir)
        norm_conf = projectconf.get_normalization_config()
        try:
            conf_dir = real_directory(collection)
            projectconf = ProjectConfiguration(conf_dir)
            norm_conf = projectconf.get_normalization_config()
            for entry in norm_conf:
                # TODO THIS IS WRONG
                dbname, dbpath, dbunicode = entry[0], entry[3], entry[4]
                if dbname == database:
                    return (dbpath, dbunicode)
            # not found in config.
            Messager.warning('DB ' + database + ' not defined in config for ' +
                             collection + ', falling back on default.')
            return (None, Simstring.DEFAULT_UNICODE)
        except Exception:
            # whatever goes wrong, just warn and fall back on the default.
            Messager.warning('Failed to get DB path from config for ' +
                             collection + ', falling back on default.')
            return (None, Simstring.DEFAULT_UNICODE)
예제 #4
0
def get_drawing_config_by_storage_form(directory, term):
    cache = get_drawing_config_by_storage_form.__cache
    if directory not in cache:
        d = {}
        for n in get_drawing_config(directory):
            t = n.storage_form()
            if t in d:
                Messager.warning(
                    "Project configuration: term %s appears multiple times, only using last. Configuration may be wrong."
                    % t, 5)
            d[t] = {}
            for a in n.arguments:
                if len(n.arguments[a]) != 1:
                    Messager.warning(
                        "Project configuration: expected single value for %s argument %s, got '%s'. Configuration may be wrong."
                        % (t, a, "|".join(n.arguments[a])))
                else:
                    d[t][a] = n.arguments[a][0]

        # TODO: hack to get around inability to have commas in values;
        # fix original issue instead
        for t in d:
            for k in d[t]:
                d[t][k] = d[t][k].replace("-", ",")

        # propagate defaults (TODO: get rid of magic "DEFAULT" values)
        default_keys = [VISUAL_SPAN_DEFAULT, VISUAL_ARC_DEFAULT]
        for default_dict in [d.get(dk, {}) for dk in default_keys]:
            for k in default_dict:
                for t in d:
                    d[t][k] = d[t].get(k, default_dict[k])

        cache[directory] = d

    return cache[directory].get(term, None)
 def wrapper(*args, **kwds):
     if DEBUG:
         Messager.warning(
             ('Client sent "%s" action '
              'which is marked as deprecated') %
             func.__name__,)
     return func(*args, **kwds)
예제 #6
0
파일: search.py 프로젝트: svigi/brat-nlpg
def _get_match_regex(text, text_match="word", match_case=False,
                     whole_string=False):
    """
    Helper for the various search_anns_for_ functions.
    """
    if match_case:
        regex_flags = 0
    else:
        regex_flags = re.IGNORECASE

    if text is None:
        text = ''

    if text_match == "word":
        # full word match: require word boundaries or, optionally,
        # whole string boundaries
        if whole_string:
            return re.compile(r'^'+re.escape(text)+r'$', regex_flags)
        else:
            return re.compile(r'\b'+re.escape(text)+r'\b', regex_flags)
    elif text_match == "substring":
        # any substring match, as text (nonoverlapping matches)
        return re.compile(re.escape(text), regex_flags)
    elif text_match == "regex":
        try:
            return re.compile(text, regex_flags)
        except: # whatever (sre_constants.error, other?)
            Messager.warning('Given string "%s" is not a valid regular expression.' % text)
            return None        
    else:
        Messager.error('Unrecognized search match specification "%s"' % text_match)
        return None    
예제 #7
0
    def attributes_for(self, ann_type):
        """
        Returs a list of the possible attribute types for an
        annotation of the given type.
        """
        attrs = []
        for attr in get_attribute_type_list(self.directory):
            if attr == SEPARATOR_STR:
                continue

            if 'Arg' not in attr.arguments:
                Messager.warning(
                    "Project configuration: config error: attribute '%s' lacks 'Arg:' specification."
                    % attr.storage_form())
                continue

            types = attr.arguments['Arg']

            if ((ann_type in types)
                    or (self.is_event_type(ann_type) and '<EVENT>' in types)
                    or (self.is_physical_entity_type(ann_type)
                        and '<ENTITY>' in types)):
                attrs.append(attr.storage_form())

        return attrs
예제 #8
0
파일: norm.py 프로젝트: zhaurora/brat
def norm_get_name(database, key, collection=None):
    if NORM_LOOKUP_DEBUG:
        _check_DB_version(database)
    if REPORT_LOOKUP_TIMINGS:
        lookup_start = datetime.now()

    dbpath = _get_db_path(database, collection)
    if dbpath is None:
        # full path not configured, fall back on name as default
        dbpath = database

    try:
        data = normdb.data_by_id(dbpath, key)
    except normdb.dbNotFoundError as e:
        Messager.warning(str(e))
        data = None

    # just grab the first one (sorry, this is a bit opaque)
    if data is not None:
        value = data[0][0][1]
    else:
        value = None

    if REPORT_LOOKUP_TIMINGS:
        _report_timings(database, lookup_start)

    # echo request for sync
    json_dic = {
        'database': database,
        'key': key,
        'value': value
    }
    return json_dic
def reverse_arc(collection, document, origin, target, type, attributes=None):
    directory = collection
    #undo_resp = {} # TODO
    real_dir = real_directory(directory)
    #mods = ModificationTracker() # TODO
    projectconf = ProjectConfiguration(real_dir)
    document = path_join(real_dir, document)
    with TextAnnotations(document) as ann_obj:
        # bail as quick as possible if read-only 
        if ann_obj._read_only:
            raise AnnotationsIsReadOnlyError(ann_obj.get_document())

        if projectconf.is_equiv_type(type):
            Messager.warning('Cannot reverse Equiv arc')
        elif not projectconf.is_relation_type(type):
            Messager.warning('Can only reverse configured binary relations')
        else:
            # OK to reverse
            found = None
            # TODO: more sensible lookup
            for ann in ann_obj.get_relations():
                if (ann.arg1 == origin and ann.arg2 == target and
                    ann.type == type):
                    found = ann
                    break
            if found is None:
                Messager.error('reverse_arc: failed to identify target relation (from %s to %s, type %s) (deleted?)' % (str(origin), str(target), str(type)))
            else:
                # found it; just adjust this
                found.arg1, found.arg2 = found.arg2, found.arg1
                # TODO: modification tracker

        json_response = {}
        json_response['annotations'] = _json_from_ann(ann_obj)
        return json_response
예제 #10
0
def get_configs(directory, filename, defaultstr, minconf, sections):
    if (directory, filename) not in get_configs.__cache:
        configstr, source = __read_first_in_directory_tree(directory, filename)

        if configstr is None:
            # didn't get one; try default dir and fall back to the default
            configstr = __read_or_default(filename, defaultstr)
            if configstr == defaultstr:
                Messager.info(
                    "Project configuration: no configuration file (%s) found, using default."
                    % filename, 5)
                source = "[default]"
            else:
                source = filename

        # try to parse what was found, fall back to minimal config
        try:
            configs = __parse_configs(configstr, source, sections)
        except:
            Messager.warning(
                "Project configuration: Falling back to minimal default. Configuration is likely wrong.",
                5)
            configs = minconf

        get_configs.__cache[(directory, filename)] = configs

    return get_configs.__cache[(directory, filename)]
예제 #11
0
파일: norm.py 프로젝트: zhaurora/brat
def norm_get_data(database, key, collection=None):
    if NORM_LOOKUP_DEBUG:
        _check_DB_version(database)
    if REPORT_LOOKUP_TIMINGS:
        lookup_start = datetime.now()

    dbpath = _get_db_path(database, collection)
    if dbpath is None:
        # full path not configured, fall back on name as default
        dbpath = database

    try:
        data = normdb.data_by_id(dbpath, key)
    except normdb.dbNotFoundError as e:
        Messager.warning(str(e))
        data = None

    if data is None:
        Messager.warning("Failed to get data for " + database + ":" + key)

    if REPORT_LOOKUP_TIMINGS:
        _report_timings(database, lookup_start)

    # echo request for sync
    json_dic = {
        'database': database,
        'key': key,
        'value': data
    }
    return json_dic
예제 #12
0
파일: annotator.py 프로젝트: zhaurora/brat
def _parse_attributes(attributes):
    if attributes is None:
        _attributes = {}
    else:
        try:
            _attributes = json_loads(attributes)
        except ValueError:
            # Failed to parse, warn the client
            Messager.warning(
                ('Unable to parse attributes string "%s" for '
                 '"createSpan", ignoring attributes for request and '
                 'assuming no attributes set') %
                (attributes, ))
            _attributes = {}

        # XXX: Hack since the client is sending back False and True as values...
        # These are __not__ to be sent, they violate the protocol
        for _del in [k for k, v in list(_attributes.items()) if v == False]:
            del _attributes[_del]

        # These are to be old-style modifiers without values
        for _revalue in [k for k, v in list(_attributes.items()) if v]:
            _attributes[_revalue] = True
        ###
    return _attributes
예제 #13
0
    def arc_types_from_to(self, from_ann, to_ann="<ANY>", include_special=False):
        """
        Returns the possible arc types that can connect an annotation
        of type from_ann to an annotation of type to_ann.
        If to_ann has the value \"<ANY>\", returns all possible arc types.
        """

        from_node = get_node_by_storage_form(self.directory, from_ann)

        if from_node is None:
            Messager.warning("Project configuration: unknown textbound/event type %s. Configuration may be wrong." % from_ann)
            return []

        if to_ann == "<ANY>":
            relations_from = get_relations_by_arg1(self.directory, from_ann, include_special)
            # TODO: consider using from_node.arg_list instead of .arguments for order
            return unique_preserve_order([role for role in from_node.arguments] + [r.storage_form() for r in relations_from])

        # specific hits
        types = from_node.keys_by_type.get(to_ann, [])

        if "<ANY>" in from_node.keys_by_type:
            types += from_node.keys_by_type["<ANY>"]

        # generic arguments
        if self.is_event_type(to_ann) and '<EVENT>' in from_node.keys_by_type:
            types += from_node.keys_by_type['<EVENT>']
        if self.is_physical_entity_type(to_ann) and '<ENTITY>' in from_node.keys_by_type:
            types += from_node.keys_by_type['<ENTITY>']

        # relations
        types.extend(self.relation_types_from_to(from_ann, to_ann))

        return unique_preserve_order(types)
예제 #14
0
def __directory_relations_by_arg_num(directory,
                                     num,
                                     atype,
                                     include_special=False):
    assert num >= 0 and num < 2, "INTERNAL ERROR"

    rels = []

    for r in get_relation_type_list(directory):
        # "Special" nesting relation ignored unless specifically
        # requested
        if r.storage_form() == ENTITY_NESTING_TYPE and not include_special:
            continue

        if len(r.arg_list) != 2:
            Messager.warning(
                "Relation type %s has %d arguments in configuration (%s; expected 2). Please fix configuration."
                % (r.storage_form(), len(r.arg_list), ",".join(r.arg_list)))
        else:
            types = r.arguments[r.arg_list[num]]
            for type in types:
                # TODO: "wildcards" other than <ANY>
                if type == "<ANY>" or atype == "<ANY>" or type == atype:
                    rels.append(r)

    return rels
예제 #15
0
파일: document.py 프로젝트: szyulj/brat
def _enrich_json_with_text(j_dic, txt_file_path, raw_text=None):
    if raw_text is not None:
        # looks like somebody read this already; nice
        text = raw_text
    else:
        # need to read raw text
        try:
            with open_textfile(txt_file_path, "r") as txt_file:
                text = txt_file.read()
        except IOError:
            raise UnableToReadTextFile(txt_file_path)
        except UnicodeDecodeError:
            Messager.error("Error reading text file: nonstandard encoding or binary?", -1)
            raise UnableToReadTextFile(txt_file_path)

    j_dic["text"] = text

    from logging import info as log_info

    tokeniser = options_get_tokenization(dirname(txt_file_path))

    # First, generate tokenisation
    if tokeniser == "mecab":
        from tokenise import jp_token_boundary_gen

        tok_offset_gen = jp_token_boundary_gen
    elif tokeniser == "whitespace":
        from tokenise import whitespace_token_boundary_gen

        tok_offset_gen = whitespace_token_boundary_gen
    elif tokeniser == "ptblike":
        from tokenise import gtb_token_boundary_gen

        tok_offset_gen = gtb_token_boundary_gen
    else:
        Messager.warning("Unrecognized tokenisation option " ", reverting to whitespace tokenisation.")
        from tokenise import whitespace_token_boundary_gen

        tok_offset_gen = whitespace_token_boundary_gen
    j_dic["token_offsets"] = [o for o in tok_offset_gen(text)]

    ssplitter = options_get_ssplitter(dirname(txt_file_path))
    if ssplitter == "newline":
        from ssplit import newline_sentence_boundary_gen

        ss_offset_gen = newline_sentence_boundary_gen
    elif ssplitter == "regex":
        from ssplit import regex_sentence_boundary_gen

        ss_offset_gen = regex_sentence_boundary_gen
    else:
        Messager.warning("Unrecognized sentence splitting option " ", reverting to newline sentence splitting.")
        from ssplit import newline_sentence_boundary_gen

        ss_offset_gen = newline_sentence_boundary_gen
    j_dic["sentence_offsets"] = [o for o in ss_offset_gen(text)]

    return True
예제 #16
0
def create_arc(collection, document, origin, target, type, attributes=None,
        old_type=None, old_target=None, comment=None):
    directory = collection
    undo_resp = {}

    real_dir = real_directory(directory)

    mods = ModificationTracker()

    projectconf = ProjectConfiguration(real_dir)

    document = path_join(real_dir, document)

    with TextAnnotations(document) as ann_obj:
        # bail as quick as possible if read-only 
        # TODO: make consistent across the different editing
        # functions, integrate ann_obj initialization and checks
        if ann_obj._read_only:
            raise AnnotationsIsReadOnlyError(ann_obj.get_document())

        origin = ann_obj.get_ann_by_id(origin) 
        target = ann_obj.get_ann_by_id(target)

        # if there is a previous annotation and the arcs aren't in
        # the same category (e.g. relation vs. event arg), process
        # as delete + create instead of update.
        if old_type is not None and (
            projectconf.is_relation_type(old_type) != 
            projectconf.is_relation_type(type) or
            projectconf.is_equiv_type(old_type) !=
            projectconf.is_equiv_type(type)):
            _delete_arc_with_ann(origin.id, old_target, old_type, mods, 
                                 ann_obj, projectconf)
            old_target, old_type = None, None

        if projectconf.is_equiv_type(type):
            ann =_create_equiv(ann_obj, projectconf, mods, origin, target, 
                               type, attributes, old_type, old_target)

        elif projectconf.is_relation_type(type):
            ann = _create_relation(ann_obj, projectconf, mods, origin, target, 
                                   type, attributes, old_type, old_target)
        else:
            ann = _create_argument(ann_obj, projectconf, mods, origin, target,
                                   type, attributes, old_type, old_target)

        # process comments
        if ann is not None:
            _set_comments(ann_obj, ann, comment, mods,
                          undo_resp=undo_resp)
        elif comment is not None:
            Messager.warning('create_arc: non-empty comment for None annotation (unsupported type for comment?)')
            

        mods_json = mods.json_response()
        mods_json['annotations'] = _json_from_ann(ann_obj)
        return mods_json
예제 #17
0
파일: annotator.py 프로젝트: zhaurora/brat
def create_arc(collection, document, origin, target, type, attributes=None,
               old_type=None, old_target=None, comment=None):
    directory = collection
    undo_resp = {}

    real_dir = real_directory(directory)

    mods = ModificationTracker()

    projectconf = ProjectConfiguration(real_dir)

    document = path_join(real_dir, document)

    with TextAnnotations(document) as ann_obj:
        # bail as quick as possible if read-only
        # TODO: make consistent across the different editing
        # functions, integrate ann_obj initialization and checks
        if ann_obj._read_only:
            raise AnnotationsIsReadOnlyError(ann_obj.get_document())

        origin = ann_obj.get_ann_by_id(origin)
        target = ann_obj.get_ann_by_id(target)

        # if there is a previous annotation and the arcs aren't in
        # the same category (e.g. relation vs. event arg), process
        # as delete + create instead of update.
        if old_type is not None and (
                projectconf.is_relation_type(old_type) !=
                projectconf.is_relation_type(type) or
                projectconf.is_equiv_type(old_type) !=
                projectconf.is_equiv_type(type)):
            _delete_arc_with_ann(origin.id, old_target, old_type, mods,
                                 ann_obj, projectconf)
            old_target, old_type = None, None

        if projectconf.is_equiv_type(type):
            ann = _create_equiv(ann_obj, projectconf, mods, origin, target,
                                type, attributes, old_type, old_target)

        elif projectconf.is_relation_type(type):
            ann = _create_relation(ann_obj, projectconf, mods, origin, target,
                                   type, attributes, old_type, old_target)
        else:
            ann = _create_argument(ann_obj, projectconf, mods, origin, target,
                                   type, attributes, old_type, old_target)

        # process comments
        if ann is not None:
            _set_comments(ann_obj, ann, comment, mods,
                          undo_resp=undo_resp)
        elif comment is not None:
            Messager.warning(
                'create_arc: non-empty comment for None annotation (unsupported type for comment?)')

        mods_json = mods.json_response()
        mods_json['annotations'] = _json_from_ann(ann_obj)
        return mods_json
예제 #18
0
파일: tokenise.py 프로젝트: edycop/brat
def jp_token_boundary_gen(text):
    # TODO: consider honoring WHITESPACE_TOKENIZATION for japanese also
    if TOKENIZATION is not None and TOKENIZATION != JAPANESE_TOKENIZATION:
        from message import Messager
        Messager.warning('Ignoring unexpected TOKENIZATION '
                'specification for Japanese.')
    from mecab import token_offsets_gen
    for o in token_offsets_gen(text):
        yield o
예제 #19
0
파일: tokenise.py 프로젝트: edycop/brat
def en_token_boundary_gen(text):
    if TOKENIZATION is None or TOKENIZATION == WHITESPACE_TOKENIZATION:
        return en_token_boundary_gen_simple(text)        
    elif TOKENIZATION == PTBLIKE_TOKENIZATION:
        return en_token_boundary_gen_gtb(text)
    else:
        from message import Messager
        Messager.warning('Unrecognized Engligh tokenization options '
                'for English, reverting to simple tokenization.')
        return en_token_boundary_gen_simple(text)
예제 #20
0
 def multiple_allowed_arguments(self, type):
     """
     Returns the argument types that are allowed to be filled more
     than once for an annotation of the given type.
     """
     node = get_node_by_storage_form(self.directory, type)
     if node is None:
         Messager.warning("Project configuration: unknown event type %s. Configuration may be wrong." % type)
         return []
     return node.multiple_allowed_arguments
예제 #21
0
 def mandatory_arguments(self, type):
     """
     Returns the mandatory argument types that must be present for
     an annotation of the given type.
     """
     node = get_node_by_storage_form(self.directory, type)
     if node is None:
         Messager.warning("Project configuration: unknown event type %s. Configuration may be wrong." % type)
         return []
     return node.mandatory_arguments
예제 #22
0
파일: search.py 프로젝트: dmcc/brat
def search_anns_for_event(ann_objs, trigger_text, args, restrict_types=[], ignore_types=[]):
    """
    Searches the given Annotations objects for Event annotations
    matching the given specification. Returns a SearchMatchSet object.
    """

    # treat None and empty list uniformly
    restrict_types = [] if restrict_types is None else restrict_types
    ignore_types   = [] if ignore_types is None else ignore_types

    # TODO: include args in description
    description = "Event triggered by text containing '%s'" % trigger_text
    if restrict_types != []:
        description = description + ' (of type %s)' % (",".join(restrict_types))
    matches = SearchMatchSet(description)
    
    for ann_obj in ann_objs:
        # collect per-document (ann_obj) for sorting
        ann_matches = []

        for e in ann_obj.get_events():
            if e.type in ignore_types:
                continue
            if restrict_types != [] and e.type not in restrict_types:
                continue

            try:
                t_ann = ann_obj.get_ann_by_id(e.trigger)
            except:
                # TODO: specific exception
                Messager.error('Failed to retrieve trigger annotation %s, skipping event %s in search' % (e.trigger, e.id))

            # TODO: make options for "text included" vs. "text matches"
            # TODO: remove temporary hack giving special status to "*"
            if (trigger_text != None and trigger_text != "" and 
                trigger_text != "*" and trigger_text not in t_ann.text):
                continue

            # TODO: argument constraints
            if len(args) != 0:
                Messager.warning('NOTE: ignoring event argument constraints in search (not implemented yet, sorry!)')

            ann_matches.append((t_ann, e))

        # sort by trigger start offset
        ann_matches.sort(lambda a,b: cmp((a[0].start,-a[0].end),(b[0].start,-b[0].end)))

        # add to overall collection
        for t_obj, e in ann_matches:
            matches.add_match(ann_obj, e)

    # sort by document name for output
    matches.sort_matches()

    return matches
예제 #23
0
파일: normnew.py 프로젝트: ninjin/brat
def norm_get_data(database, key):
    if NORM_LOOKUP_DEBUG:
        _check_DB_version(database)
    if REPORT_LOOKUP_TIMINGS:
        lookup_start = datetime.now()

    try:
        data = normdb.data_by_id(database, key)
    except normdb.dbNotFoundError, e:
        Messager.warning(str(e))
        data = None
예제 #24
0
def get_labels(directory):
    cache = get_labels.__cache
    if directory not in cache:
        l = {}
        for t in get_visual_configs(directory)[LABEL_SECTION]:
            if t.storage_form() in l:
                Messager.warning("In configuration, labels for '%s' defined more than once. Only using the last set." % t.storage_form(), -1)
            # first is storage for, rest are labels.
            l[t.storage_form()] = t.terms[1:]
        cache[directory] = l
    return cache[directory]
예제 #25
0
파일: document.py 프로젝트: CheggEng/brat
def _enrich_json_with_text(j_dic, txt_file_path, raw_text=None):
    if raw_text is not None:
        # looks like somebody read this already; nice
        text = raw_text
    else:
        # need to read raw text
        try:
            with open_textfile(txt_file_path, 'r') as txt_file:
                text = txt_file.read()
        except IOError:
            raise UnableToReadTextFile(txt_file_path)
        except UnicodeDecodeError:
            Messager.error(
                'Error reading text file: nonstandard encoding or binary?', -1)
            raise UnableToReadTextFile(txt_file_path)

    j_dic['text'] = text

    from logging import info as log_info

    tokeniser = options_get_tokenization(dirname(txt_file_path))

    # First, generate tokenisation
    if tokeniser == 'mecab':
        from tokenise import jp_token_boundary_gen
        tok_offset_gen = jp_token_boundary_gen
    elif tokeniser == 'whitespace':
        from tokenise import whitespace_token_boundary_gen
        tok_offset_gen = whitespace_token_boundary_gen
    elif tokeniser == 'ptblike':
        from tokenise import gtb_token_boundary_gen
        tok_offset_gen = gtb_token_boundary_gen
    else:
        Messager.warning('Unrecognized tokenisation option '
                         ', reverting to whitespace tokenisation.')
        from tokenise import whitespace_token_boundary_gen
        tok_offset_gen = whitespace_token_boundary_gen
    j_dic['token_offsets'] = [o for o in tok_offset_gen(text)]

    ssplitter = options_get_ssplitter(dirname(txt_file_path))
    if ssplitter == 'newline':
        from ssplit import newline_sentence_boundary_gen
        ss_offset_gen = newline_sentence_boundary_gen
    elif ssplitter == 'regex':
        from ssplit import regex_sentence_boundary_gen
        ss_offset_gen = regex_sentence_boundary_gen
    else:
        Messager.warning('Unrecognized sentence splitting option '
                         ', reverting to newline sentence splitting.')
        from ssplit import newline_sentence_boundary_gen
        ss_offset_gen = newline_sentence_boundary_gen
    j_dic['sentence_offsets'] = [o for o in ss_offset_gen(text)]

    return True
예제 #26
0
파일: norm.py 프로젝트: zhaurora/brat
def norm_search(database, name, collection=None, exactmatch=False):
    try:
        return _norm_search_impl(database, name, collection, exactmatch)
    except simstringdb.ssdbNotFoundError as e:
        Messager.warning(str(e))
        return {
            'database': database,
            'query': name,
            'header': [],
            'items': []
        }
예제 #27
0
 def mandatory_arguments(self, type):
     """
     Returns the mandatory argument types that must be present for
     an annotation of the given type.
     """
     node = get_node_by_storage_form(self.directory, type)
     if node is None:
         Messager.warning(
             "Project configuration: unknown event type %s. Configuration may be wrong."
             % type)
         return []
     return node.mandatory_arguments
예제 #28
0
 def multiple_allowed_arguments(self, type):
     """
     Returns the argument types that are allowed to be filled more
     than once for an annotation of the given type.
     """
     node = get_node_by_storage_form(self.directory, type)
     if node is None:
         Messager.warning(
             "Project configuration: unknown event type %s. Configuration may be wrong."
             % type)
         return []
     return node.multiple_allowed_arguments
예제 #29
0
def get_node_by_storage_form(directory, term):
    cache = get_node_by_storage_form.__cache
    if directory not in cache:
        d = {}
        for e in get_entity_type_list(directory) + get_event_type_list(directory):
            t = e.storage_form()
            if t in d:
                Messager.warning("Project configuration: term %s appears multiple times, only using last. Configuration may be wrong." % t, 5)
            d[t] = e
        cache[directory] = d

    return cache[directory].get(term, None)
예제 #30
0
def get_labels(directory):
    cache = get_labels.__cache
    if directory not in cache:
        l = {}
        for t in get_visual_configs(directory)[LABEL_SECTION]:
            if t.storage_form() in l:
                Messager.warning(
                    "In configuration, labels for '%s' defined more than once. Only using the last set."
                    % t.storage_form(), -1)
            # first is storage for, rest are labels.
            l[t.storage_form()] = t.terms[1:]
        cache[directory] = l
    return cache[directory]
예제 #31
0
def compcode(compcode, collection, document):
	## We want to write the compcode and user somewhere 
    try:
        user =  get_session()['user']
    except KeyError:
		Messager.warning('Not logged in??')
		user = '******'

	with open('/afs/inf.ed.ac.uk/web/securepages/clai/web/brat/work/userlog.txt', 'a') as f:
		f.write("COMPLETION, %s, %s, %s, %s\n" % (str(datetime.now()), user, compcode, collection))

    Messager.info('Thank you! Task completion has been logged!')
    return {} 
def _enrich_json_with_text(j_dic, txt_file_path, raw_text=None):
    if raw_text is not None:
        # looks like somebody read this already; nice
        text = raw_text
    else:
        # need to read raw text
        try:
            with open_textfile(txt_file_path, 'r') as txt_file:
                text = txt_file.read()
        except IOError:
            raise UnableToReadTextFile(txt_file_path)
        except UnicodeDecodeError:
            Messager.error('Error reading text file: nonstandard encoding or binary?', -1)
            raise UnableToReadTextFile(txt_file_path)

    j_dic['text'] = text

    tokeniser = options_get_tokenization(dirname(txt_file_path))

    # First, generate tokenisation
    if tokeniser == 'mecab':
        from tokenise import jp_token_boundary_gen
        tok_offset_gen = jp_token_boundary_gen
    elif tokeniser == 'whitespace':
        from tokenise import whitespace_token_boundary_gen
        tok_offset_gen = whitespace_token_boundary_gen
    elif tokeniser == 'ptblike':
        from tokenise import gtb_token_boundary_gen
        tok_offset_gen = gtb_token_boundary_gen
    else:
        Messager.warning('Unrecognized tokenisation option '
                ', reverting to whitespace tokenisation.')
        from tokenise import whitespace_token_boundary_gen
        tok_offset_gen = whitespace_token_boundary_gen
    j_dic['token_offsets'] = [o for o in tok_offset_gen(text)]

    ssplitter = options_get_ssplitter(dirname(txt_file_path))
    if ssplitter == 'newline':
        from ssplit import newline_sentence_boundary_gen
        ss_offset_gen = newline_sentence_boundary_gen
    elif ssplitter == 'regex':
        from ssplit import regex_sentence_boundary_gen
        ss_offset_gen = regex_sentence_boundary_gen
    else:
        Messager.warning('Unrecognized sentence splitting option '
                ', reverting to newline sentence splitting.')
        from ssplit import newline_sentence_boundary_gen
        ss_offset_gen = newline_sentence_boundary_gen
    j_dic['sentence_offsets'] = [o for o in ss_offset_gen(text)]

    return True
예제 #33
0
파일: annotator.py 프로젝트: edycop/brat
def _parse_span_normalizations(normalizations):
    if normalizations is None:
        _normalizations = {}
    else:
        try:
            _normalizations = json_loads(normalizations)
        except ValueError:
            # Failed to parse, warn the client
            Messager.warning((u'Unable to parse normalizations string "%s" for '
                    u'"createSpan", ignoring normalizations for request and '
                    u'assuming no normalizations set') % (normalizations, ))
            _normalizations = {}

    return _normalizations
def _parse_span_normalizations(normalizations):
    if normalizations is None:
        _normalizations = {}
    else:
        try:
            _normalizations = json_loads(normalizations)
        except ValueError:
            # Failed to parse, warn the client
            Messager.warning((u'Unable to parse normalizations string "%s" for '
                    u'"createSpan", ignoring normalizations for request and '
                    u'assuming no normalizations set') % (normalizations, ))
            _normalizations = {}

    return _normalizations
예제 #35
0
def get_node_by_storage_form(directory, term):
    cache = get_node_by_storage_form.__cache
    if directory not in cache:
        d = {}
        for e in get_entity_type_list(directory) + get_event_type_list(
                directory):
            t = e.storage_form()
            if t in d:
                Messager.warning(
                    "Project configuration: term %s appears multiple times, only using last. Configuration may be wrong."
                    % t, 5)
            d[t] = e
        cache[directory] = d

    return cache[directory].get(term, None)
예제 #36
0
파일: norm.py 프로젝트: omarghf1/c4v-py
def norm_get_name(database, key, collection=None):
    if NORM_LOOKUP_DEBUG:
        _check_DB_version(database)
    if REPORT_LOOKUP_TIMINGS:
        lookup_start = datetime.now()

    dbpath = _get_db_path(database, collection)
    if dbpath is None:
        # full path not configured, fall back on name as default
        dbpath = database

    try:
        data = normdb.data_by_id(dbpath, key)
    except normdb.dbNotFoundError, e:
        Messager.warning(str(e))
        data = None
예제 #37
0
파일: norm.py 프로젝트: 52nlp/brat
def norm_get_name(database, key, collection=None):
    if NORM_LOOKUP_DEBUG:
        _check_DB_version(database)
    if REPORT_LOOKUP_TIMINGS:
        lookup_start = datetime.now()

    dbpath = _get_db_path(database, collection)
    if dbpath is None:
        # full path not configured, fall back on name as default
        dbpath = database

    try:
        data = normdb.data_by_id(dbpath, key)
    except normdb.dbNotFoundError, e:
        Messager.warning(str(e))
        data = None
예제 #38
0
def __parse_kb_shortcuts(shortcutstr, default, source):
    try:
        shortcuts = {}
        for l in shortcutstr.split("\n"):
            l = l.strip()
            if l == "" or l[:1] == "#":
                continue
            key, type = re.split(r'[ \t]+', l)
            if key in shortcuts:
                Messager.warning("Project configuration: keyboard shortcut for '%s' defined multiple times. Ignoring all but first ('%s')" % (key, shortcuts[key]))
            else:
                shortcuts[key] = type
    except:
        # TODO: specific exception handling
        Messager.warning("Project configuration: error parsing keyboard shortcuts from %s. Configuration may be wrong." % source, 5)
        shortcuts = default
    return shortcuts
예제 #39
0
파일: svg.py 프로젝트: ninjin/brat
def store_svg(collection, document, svg):
    stored = []

    _save_svg(collection, document, svg)
    stored.append({"name": "svg", "suffix": SVG_SUFFIX})

    # attempt conversions from SVG to other formats
    try:
        from config import SVG_CONVERSION_COMMANDS
    except ImportError:
        SVG_CONVERSION_COMMANDS = []

    for format, command in SVG_CONVERSION_COMMANDS:
        try:
            from os import system

            svgfn = _svg_path()
            # TODO: assuming format name matches suffix; generalize
            outfn = svgfn.replace("." + SVG_SUFFIX, "." + format)
            cmd = command % (svgfn, outfn)

            import logging

            logging.error(cmd)

            retval = system(cmd)

            # TODO: this check may not work on all architectures.
            # consider rather checking is the intended output file
            # exists (don't forget to delete a possible old one
            # with the same name, though).
            #             if retval != 0:
            #                 stored.append({'name': format, 'suffix': format})
            #             else:
            #                 Messager.warning("Failed conversion to %s" % format)
            # I'm getting weird return values from inkscape; will
            # just assume everything's OK ...
            # TODO: check return value, react appropriately
            stored.append({"name": format, "suffix": format})

        except:  # whatever
            Messager.warning("Failed conversion to %s" % format)
            # no luck, but doesn't matter
            pass

    return {"stored": stored}
예제 #40
0
파일: svg.py 프로젝트: omarghf1/c4v-py
def store_svg(collection, document, svg):
    stored = []

    _save_svg(collection, document, svg)
    stored.append({'name': 'svg', 'suffix': SVG_SUFFIX})

    # attempt conversions from SVG to other formats
    try:
        from config import SVG_CONVERSION_COMMANDS
    except ImportError:
        SVG_CONVERSION_COMMANDS = []

    for format, command in SVG_CONVERSION_COMMANDS:
        try:
            from os import system

            svgfn = _svg_path()
            # TODO: assuming format name matches suffix; generalize
            outfn = svgfn.replace('.'+SVG_SUFFIX, '.'+format)
            cmd = command % (svgfn, outfn)

            import logging
            logging.error(cmd)

            retval = system(cmd)

            # TODO: this check may not work on all architectures.
            # consider rather checking is the intended output file
            # exists (don't forget to delete a possible old one
            # with the same name, though).
#             if retval != 0:
#                 stored.append({'name': format, 'suffix': format})
#             else:
#                 Messager.warning("Failed conversion to %s" % format)
            # I'm getting weird return values from inkscape; will
            # just assume everything's OK ...
            # TODO: check return value, react appropriately
            stored.append({'name': format, 'suffix': format})
            
        except: # whatever
            Messager.warning("Failed conversion to %s" % format)
            # no luck, but doesn't matter
            pass

    return { 'stored' : stored }
예제 #41
0
    def arc_types_from_to(self,
                          from_ann,
                          to_ann="<ANY>",
                          include_special=False):
        """
        Returns the possible arc types that can connect an annotation
        of type from_ann to an annotation of type to_ann.
        If to_ann has the value \"<ANY>\", returns all possible arc types.
        """

        from_node = get_node_by_storage_form(self.directory, from_ann)

        if from_node is None:
            Messager.warning(
                "Project configuration: unknown textbound/event type %s. Configuration may be wrong."
                % from_ann)
            return []

        if to_ann == "<ANY>":
            relations_from = get_relations_by_arg1(self.directory, from_ann,
                                                   include_special)
            # TODO: consider using from_node.arg_list instead of .arguments for order
            return unique_preserve_order(
                [role for role in from_node.arguments] +
                [r.storage_form() for r in relations_from])

        # specific hits
        types = from_node.keys_by_type.get(to_ann, [])

        if "<ANY>" in from_node.keys_by_type:
            types += from_node.keys_by_type["<ANY>"]

        # generic arguments
        if self.is_event_type(to_ann) and '<EVENT>' in from_node.keys_by_type:
            types += from_node.keys_by_type['<EVENT>']
        if self.is_physical_entity_type(
                to_ann) and '<ENTITY>' in from_node.keys_by_type:
            types += from_node.keys_by_type['<ENTITY>']

        # relations
        types.extend(self.relation_types_from_to(from_ann, to_ann))

        return unique_preserve_order(types)
예제 #42
0
def __parse_kb_shortcuts(shortcutstr, default, source):
    try:
        shortcuts = {}
        for l in shortcutstr.split("\n"):
            l = l.strip()
            if l == "" or l[:1] == "#":
                continue
            key, type = re.split(r'[ \t]+', l)
            if key in shortcuts:
                Messager.warning(
                    "Project configuration: keyboard shortcut for '%s' defined multiple times. Ignoring all but first ('%s')"
                    % (key, shortcuts[key]))
            else:
                shortcuts[key] = type
    except:
        # TODO: specific exception handling
        Messager.warning(
            "Project configuration: error parsing keyboard shortcuts from %s. Configuration may be wrong."
            % source, 5)
        shortcuts = default
    return shortcuts
예제 #43
0
def __directory_relations_by_arg_num(directory, num, atype, include_special=False):
    assert num >= 0 and num < 2, "INTERNAL ERROR"

    rels = []

    for r in get_relation_type_list(directory):
        # "Special" nesting relation ignored unless specifically
        # requested
        if r.storage_form() == ENTITY_NESTING_TYPE and not include_special:
            continue

        if len(r.arg_list) != 2:
            Messager.warning("Relation type %s has %d arguments in configuration (%s; expected 2). Please fix configuration." % (r.storage_form(), len(r.arg_list), ",".join(r.arg_list)))
        else:
            types = r.arguments[r.arg_list[num]]
            for type in types:
                # TODO: "wildcards" other than <ANY>
                if type == "<ANY>" or atype == "<ANY>" or type == atype:
                    rels.append(r)

    return rels
예제 #44
0
def reverse_arc(collection, document, origin, target, type, attributes=None):
    directory = collection
    # undo_resp = {} # TODO
    real_dir = real_directory(directory)
    # mods = ModificationTracker() # TODO
    projectconf = ProjectConfiguration(real_dir)

    document = urllib.parse.unquote(document)
    document = path_join(real_dir, document)
    with TextAnnotations(document) as ann_obj:
        # bail as quick as possible if read-only
        if ann_obj._read_only:
            raise AnnotationsIsReadOnlyError(ann_obj.get_document())

        if projectconf.is_equiv_type(type):
            Messager.warning('Cannot reverse Equiv arc')
        elif not projectconf.is_relation_type(type):
            Messager.warning('Can only reverse configured binary relations')
        else:
            # OK to reverse
            found = None
            # TODO: more sensible lookup
            for ann in ann_obj.get_relations():
                if (ann.arg1 == origin and ann.arg2 == target
                        and ann.type == type):
                    found = ann
                    break
            if found is None:
                Messager.error(
                    'reverse_arc: failed to identify target relation (from %s to %s, type %s) (deleted?)'
                    % (str(origin), str(target), str(type)))
            else:
                # found it; just adjust this
                found.arg1, found.arg2 = found.arg2, found.arg1
                # TODO: modification tracker

        json_response = {}
        json_response['annotations'] = _json_from_ann(ann_obj)
        return json_response
예제 #45
0
파일: norm.py 프로젝트: zhaurora/brat
def _get_db_path(database, collection):
    if collection is None:
        # TODO: default to WORK_DIR config?
        return None
    else:
        try:
            conf_dir = real_directory(collection)
            projectconf = ProjectConfiguration(conf_dir)
            norm_conf = projectconf.get_normalization_config()
            for entry in norm_conf:
                dbname, dbpath = entry[0], entry[3]
                if dbname == database:
                    return dbpath
            # not found in config.
            Messager.warning('DB ' + database + ' not defined in config for ' +
                             collection + ', falling back on default.')
            return None
        except Exception:
            # whatever goes wrong, just warn and fall back on the default.
            Messager.warning('Failed to get DB path from config for ' +
                             collection + ', falling back on default.')
            return None
예제 #46
0
    def attributes_for(self, ann_type):
        """
        Returs a list of the possible attribute types for an
        annotation of the given type.
        """
        attrs = []
        for attr in get_attribute_type_list(self.directory):
            if attr == SEPARATOR_STR:
                continue
            
            if 'Arg' not in attr.arguments:
                Messager.warning("Project configuration: config error: attribute '%s' lacks 'Arg:' specification." % attr.storage_form())
                continue

            types = attr.arguments['Arg']

            if ((ann_type in types) or
                (self.is_event_type(ann_type) and '<EVENT>' in types) or
                (self.is_physical_entity_type(ann_type) and '<ENTITY>' in types)):
                attrs.append(attr.storage_form())

        return attrs
예제 #47
0
def get_configs(directory, filename, defaultstr, minconf, sections):
    if (directory, filename) not in get_configs.__cache:
        configstr, source =  __read_first_in_directory_tree(directory, filename)

        if configstr is None:
            # didn't get one; try default dir and fall back to the default
            configstr = __read_or_default(filename, defaultstr)
            if configstr == defaultstr:                
                Messager.info("Project configuration: no configuration file (%s) found, using default." % filename, 5)
                source = "[default]"
            else:
                source = filename

        # try to parse what was found, fall back to minimal config
        try: 
            configs = __parse_configs(configstr, source, sections)        
        except:
            Messager.warning("Project configuration: Falling back to minimal default. Configuration is likely wrong.", 5)
            configs = minconf

        get_configs.__cache[(directory, filename)] = configs

    return get_configs.__cache[(directory, filename)]
예제 #48
0
파일: search.py 프로젝트: svigi/brat-nlpg
def __doc_or_dir_to_annotations(directory, document, scope):
    """
    Given a directory, a document, and a scope specification
    with the value "collection" or "document" selecting between
    the two, returns Annotations object for either the specific
    document identified (scope=="document") or all documents in
    the given directory (scope=="collection").
    """

    # TODO: lots of magic values here; try to avoid this

    if scope == "collection":
        return __directory_to_annotations(directory)
    elif scope == "document":
        # NOTE: "/NO-DOCUMENT/" is a workaround for a brat
        # client-server comm issue (issue #513).
        if document == "" or document == "/NO-DOCUMENT/":
            Messager.warning('No document selected for search in document.')
            return []
        else:
            return __document_to_annotations(directory, document)
    else:
        Messager.error('Unrecognized search scope specification %s' % scope)
        return []
예제 #49
0
def _create_equiv(ann_obj, projectconf, mods, origin, target, type, attributes,
                  old_type, old_target):

    # due to legacy representation choices for Equivs (i.e. no
    # unique ID), support for attributes for Equivs would need
    # some extra work. Getting the easy non-Equiv case first.
    if attributes is not None:
        Messager.warning(
            '_create_equiv: attributes for Equiv annotation not supported yet, please tell the devs if you need this feature (mention "issue #799").'
        )
        attributes = None

    ann = None

    if old_type is None:
        # new annotation

        # sanity
        assert old_target is None, '_create_equiv: incoherent args: old_type is None, old_target is not None (client/protocol error?)'

        ann = EquivAnnotation(
            type, [unicode(origin.id), unicode(target.id)], '')
        ann_obj.add_annotation(ann)
        mods.addition(ann)

        # TODO: attributes
        assert attributes is None, "INTERNAL ERROR"  # see above
    else:
        # change to existing Equiv annotation. Other than the no-op
        # case, this remains TODO.
        assert projectconf.is_equiv_type(
            old_type
        ), 'attempting to change equiv relation to non-equiv relation, operation not supported'

        # sanity
        assert old_target is not None, '_create_equiv: incoherent args: old_type is not None, old_target is None (client/protocol error?)'

        if old_type != type:
            Messager.warning(
                '_create_equiv: equiv type change not supported yet, please tell the devs if you need this feature (mention "issue #798").'
            )

        if old_target != target.id:
            Messager.warning(
                '_create_equiv: equiv reselect not supported yet, please tell the devs if you need this feature (mention "issue #797").'
            )

        # TODO: attributes
        assert attributes is None, "INTERNAL ERROR"  # see above

    return ann
예제 #50
0
파일: annotator.py 프로젝트: a-tsioh/brat
def _create_equiv(ann_obj, projectconf, mods, origin, target, type, attributes,
                  old_type, old_target):

    # due to legacy representation choices for Equivs (i.e. no
    # unique ID), support for attributes for Equivs would need
    # some extra work. Getting the easy non-Equiv case first.
    if attributes is not None:
        Messager.warning(
            '_create_equiv: attributes for Equiv annotation not supported yet, please tell the devs if you need this feature (mention "issue #799").')
        attributes = None

    ann = None

    if old_type is None:
        # new annotation

        # sanity
        assert old_target is None, '_create_equiv: incoherent args: old_type is None, old_target is not None (client/protocol error?)'

        ann = EquivAnnotation(type, [str(origin.id),
                                     str(target.id)], '')
        ann_obj.add_annotation(ann)
        mods.addition(ann)

        # TODO: attributes
        assert attributes is None, "INTERNAL ERROR"  # see above
    else:
        # change to existing Equiv annotation. Other than the no-op
        # case, this remains TODO.
        assert projectconf.is_equiv_type(
            old_type), 'attempting to change equiv relation to non-equiv relation, operation not supported'

        # sanity
        assert old_target is not None, '_create_equiv: incoherent args: old_type is not None, old_target is None (client/protocol error?)'

        if old_type != type:
            Messager.warning(
                '_create_equiv: equiv type change not supported yet, please tell the devs if you need this feature (mention "issue #798").')

        if old_target != target.id:
            Messager.warning(
                '_create_equiv: equiv reselect not supported yet, please tell the devs if you need this feature (mention "issue #797").')

        # TODO: attributes
        assert attributes is None, "INTERNAL ERROR"  # see above

    return ann
예제 #51
0
def __parse_configs(configstr, source, expected_sections):
    # top-level config structure is a set of term hierarchies
    # separated by lines consisting of "[SECTION]" where SECTION is
    # e.g.  "entities", "relations", etc.

    # start by splitting config file lines by section

    section = "general"
    section_lines = {section: []}
    for ln, l in enumerate(configstr.split("\n")):
        m = re.match(r'^\s*\[(.*)\]\s*$', l)
        if m:
            section = m.group(1)
            if section not in expected_sections:
                Messager.warning(
                    "Project configuration: unexpected section [%s] in %s. Ignoring contents."
                    % (section, source), 5)
            if section not in section_lines:
                section_lines[section] = []
        else:
            section_lines[section].append(l)

    # attempt to parse lines in each section as a term hierarchy
    configs = {}
    for s, sl in section_lines.items():
        try:
            configs[s] = __read_term_hierarchy(sl)
        except:
            Messager.warning(
                "Project configuration: error parsing section [%s] in %s." %
                (s, source), 5)
            raise

    # verify that expected sections are present; replace with empty if not.
    for s in expected_sections:
        if s not in configs:
            Messager.warning(
                "Project configuration: missing section [%s] in %s. Configuration may be wrong."
                % (s, source), 5)
            configs[s] = []

    return configs
예제 #52
0
def __parse_configs(configstr, source, expected_sections):
    # top-level config structure is a set of term hierarchies
    # separated by lines consisting of "[SECTION]" where SECTION is
    # e.g.  "entities", "relations", etc.

    # start by splitting config file lines by section

    section = "general"
    section_lines = { section: [] }
    for ln, l in enumerate(configstr.split("\n")):
        m = re.match(r'^\s*\[(.*)\]\s*$', l)
        if m:
            section = m.group(1)
            if section not in expected_sections:
                Messager.warning("Project configuration: unexpected section [%s] in %s. Ignoring contents." % (section, source), 5)
            if section not in section_lines:
                section_lines[section] = []
        else:
            section_lines[section].append(l)

    # attempt to parse lines in each section as a term hierarchy
    configs = {}
    for s, sl in section_lines.items():
        try:
            configs[s] = __read_term_hierarchy(sl)
        except:
            Messager.warning("Project configuration: error parsing section [%s] in %s." % (s, source), 5)
            raise

    # verify that expected sections are present; replace with empty if not.
    for s in expected_sections:
        if s not in configs:
            Messager.warning("Project configuration: missing section [%s] in %s. Configuration may be wrong." % (s, source), 5)
            configs[s] = []

    return configs
예제 #53
0
                           and not f.startswith('.')))
                # The configuration is newer than the cache
                or getmtime(get_config_path(directory)) > cache_mtime):
            generate = True
            docstats = []
        else:
            generate = False
            try:
                with open(
                        cache_file_path.decode('utf-8').encode('utf-8'),
                        'rb') as cache_file:
                    docstats = pickle_load(cache_file)
            except UnpicklingError:
                # Corrupt data, re-generate
                Messager.warning(
                    'Stats cache %s was corrupted; regenerating' %
                    cache_file_path, -1)
                generate = True
            except EOFError:
                # Corrupt data, re-generate
                generate = True
    except OSError, e:
        Messager.warning(
            'Failed checking file modification times for stats cache check; regenerating'
        )
        generate = True

    # "header" and types
    stat_types = [("Entities", "int"), ("Relations", "int"), ("Events", "int")]

    if options_get_validation(directory) != 'none':