Python verify_annotation Beispiele, verify_annotations.verify_annotation Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: ApplicationScope.py Projekt: proycon/brat

def getAnnObject2(collection,document):
    '''newest version of the getAnnObject methode'''
    try:
        from os.path import join as path_join
        from document import real_directory
        real_dir = real_directory(collection)
    except:
        real_dir=collection      
    app_path = WORK_DIR + "/application/"
    ann = None
    full_name = collection + document
    full_name = full_name.replace("/","")
    if( isfile(app_path+full_name)):
        temp=open (app_path+full_name , 'rb')
        ann = pickle_load(temp)
        temp.close()
    else:
        ann = TextAnnotations(real_dir+document)
        ann = SimpleAnnotations(ann)
        ann.folia = {}
        try:
            #TODO:good error message
            ann.folia=get_extra_info(collection,document)
        except Exception as e:
            ann.folia = {}
            Messager.error('Error: get extra folia info() failed: %s' % e)
    #Validation:
    try:
        import os
        import simplejson as json
        import session
        docdir = os.path.dirname(ann._document)
        string = session.load_conf()["config"]
        val = json.loads(string)["validationOn"]
        #validate if config enables it and if it's not already done.
        if val:
            if not ann.validated:    
                from verify_annotations import verify_annotation
                projectconf = ProjectConfiguration(docdir)
                issues = []
                issues = verify_annotation(ann, projectconf)
            else:
                issues = ann.issues
        else:
            ann.validated = False
            issues = []
    except session.NoSessionError:
        issues = []
    except KeyError:
        issues = []
    except Exception as e:
        # TODO add an issue about the failure?
        issues = []
    ann.issues = issues
    temp=open (app_path+full_name , 'wb')    
    pickle_dump(ann, temp)
    temp.close()
    return ann

Beispiel #2

0

Datei anzeigen

Datei: ApplicationScope.py Projekt: desmetb/brat

def getAnnObject(collection, document):
    try:
        real_dir = real_directory(collection)
    except:
        real_dir = collection
    app_path = WORK_DIR + "/application/"
    full_name = collection + document
    full_name = full_name.replace("/", "")
    if (os.path.isfile(app_path + full_name)):
        temp = open(app_path + full_name, 'rb')
        ann = pickle_load(temp)
        temp.close()
    else:
        ann = TextAnnotations(real_dir + document)
        ann = SimpleAnnotations(ann)
        ann.folia = {}
        try:
            #TODO:good error message
            ann.folia = get_extra_info(collection, document)
        except Exception as e:
            ann.folia = {}
            Messager.error('Error: get extra folia info() failed: %s' % e)
    #Validation:
    try:
        docdir = os.path.dirname(ann._document)
        string = session.load_conf()["config"]
        val = json.loads(string)["validationOn"]
        #validate if config enables it and if it's not already done.
        if val:
            if not ann.validated:
                projectconf = ProjectConfiguration(docdir)
                issues = verify_annotation(ann, projectconf)
            else:
                issues = ann.issues
        else:
            ann.validated = False
            issues = []
    except session.NoSessionError:
        issues = []
    except KeyError:
        issues = []
    except Exception as e:
        # TODO add an issue about the failure?
        issues = []
        Messager.error('Error: validation failed: %s' % e)
    ann.issues = issues
    temp = open(app_path + full_name, 'wb')
    pickle_dump(ann, temp)
    temp.close()
    return ann

Beispiel #3

0

Datei anzeigen

                with Annotations(path_join(directory, docname),
                                 read_only=True) as ann_obj:
                    tb_count = len([a for a in ann_obj.get_entities()])
                    rel_count = (len([a for a in ann_obj.get_relations()]) +
                                 len([a for a in ann_obj.get_equivs()]))
                    event_count = len([a for a in ann_obj.get_events()])

                    if options_get_validation(directory) == 'none':
                        docstats.append([tb_count, rel_count, event_count])
                    else:
                        # verify and include verification issue count
                        try:
                            from projectconfig import ProjectConfiguration
                            projectconf = ProjectConfiguration(directory)
                            from verify_annotations import verify_annotation
                            issues = verify_annotation(ann_obj, projectconf)
                            issue_count = len(issues)
                        except:
                            # TODO: error reporting
                            issue_count = -1
                        docstats.append(
                            [tb_count, rel_count, event_count, issue_count])
            except Exception, e:
                log_info('Received "%s" when trying to generate stats' % e)
                # Pass exceptions silently, just marking stats missing
                docstats.append([-1] * len(stat_types))

        # Cache the statistics
        try:
            with open(cache_file_path.decode('utf-8').encode('utf-8'),
                      'wb') as cache_file:

Beispiel #4

0

Datei anzeigen

Datei: document.py Projekt: CheggEng/brat

def _enrich_json_with_data(j_dic, ann_obj):
    # TODO: figure out if there's a reason for all the unicode()
    # invocations here; remove if not.

    # We collect trigger ids to be able to link the textbound later on
    trigger_ids = set()
    for event_ann in ann_obj.get_events():
        trigger_ids.add(event_ann.trigger)
        j_dic['events'].append([
            unicode(event_ann.id),
            unicode(event_ann.trigger), event_ann.args
        ])

    for rel_ann in ann_obj.get_relations():
        j_dic['relations'].append([
            unicode(rel_ann.id),
            unicode(rel_ann.type),
            [(rel_ann.arg1l, rel_ann.arg1), (rel_ann.arg2l, rel_ann.arg2)]
        ])

    for tb_ann in ann_obj.get_textbounds():
        #j_tb = [unicode(tb_ann.id), tb_ann.type, tb_ann.start, tb_ann.end]
        j_tb = [unicode(tb_ann.id), tb_ann.type, tb_ann.spans]

        # If we spotted it in the previous pass as a trigger for an
        # event or if the type is known to be an event type, we add it
        # as a json trigger.
        # TODO: proper handling of disconnected triggers. Currently
        # these will be erroneously passed as 'entities'
        if unicode(tb_ann.id) in trigger_ids:
            j_dic['triggers'].append(j_tb)
            # special case for BioNLP ST 2013 format: send triggers
            # also as entities for those triggers that are referenced
            # from annotations other than events (#926).
            if BIONLP_ST_2013_COMPATIBILITY:
                if tb_ann.id in ann_obj.externally_referenced_triggers:
                    try:
                        j_dic['entities'].append(j_tb)
                    except KeyError:
                        j_dic['entities'] = [
                            j_tb,
                        ]
        else:
            try:
                j_dic['entities'].append(j_tb)
            except KeyError:
                j_dic['entities'] = [
                    j_tb,
                ]

    for eq_ann in ann_obj.get_equivs():
        j_dic['equivs'].append(
            (['*', eq_ann.type] + [e for e in eq_ann.entities]))

    for att_ann in ann_obj.get_attributes():
        j_dic['attributes'].append([
            unicode(att_ann.id),
            unicode(att_ann.type),
            unicode(att_ann.target), att_ann.value
        ])

    for norm_ann in ann_obj.get_normalizations():
        j_dic['normalizations'].append([
            unicode(norm_ann.id),
            unicode(norm_ann.type),
            unicode(norm_ann.target),
            unicode(norm_ann.refdb),
            unicode(norm_ann.refid),
            unicode(norm_ann.reftext)
        ])

    for com_ann in ann_obj.get_oneline_comments():
        comment = [
            unicode(com_ann.target),
            unicode(com_ann.type),
            com_ann.tail.strip()
        ]
        try:
            j_dic['comments'].append(comment)
        except KeyError:
            j_dic['comments'] = [
                comment,
            ]

    if ann_obj.failed_lines:
        error_msg = 'Unable to parse the following line(s):\n%s' % (
            '\n'.join([
                (
                    '%s: %s' % (
                        # The line number is off by one
                        unicode(line_num + 1),
                        unicode(ann_obj[line_num]))).strip()
                for line_num in ann_obj.failed_lines
            ]))
        Messager.error(error_msg, duration=len(ann_obj.failed_lines) * 3)

    j_dic['mtime'] = ann_obj.ann_mtime
    j_dic['ctime'] = ann_obj.ann_ctime

    try:
        # XXX avoid digging the directory from the ann_obj
        import os
        docdir = os.path.dirname(ann_obj._document)
        if options_get_validation(docdir) in (
                'all',
                'full',
        ):
            from verify_annotations import verify_annotation
            projectconf = ProjectConfiguration(docdir)
            issues = verify_annotation(ann_obj, projectconf)
        else:
            issues = []
    except Exception, e:
        # TODO add an issue about the failure?
        issues = []
        Messager.error('Error: verify_annotation() failed: %s' % e, -1)

Beispiel #5

0

Datei anzeigen

Datei: document.py Projekt: Rikard-Andersson/GHTorrent-Brat

def _enrich_json_with_data(j_dic, ann_obj):
    # TODO: figure out if there's a reason for all the unicode()
    # invocations here; remove if not.

    # We collect trigger ids to be able to link the textbound later on
    trigger_ids = set()
    for event_ann in ann_obj.get_events():
        trigger_ids.add(event_ann.trigger)
        j_dic['events'].append(
                [unicode(event_ann.id), unicode(event_ann.trigger), event_ann.args]
                )

    for rel_ann in ann_obj.get_relations():
        j_dic['relations'].append(
            [unicode(rel_ann.id), unicode(rel_ann.type), 
             [(rel_ann.arg1l, rel_ann.arg1),
              (rel_ann.arg2l, rel_ann.arg2)]]
            )

    for tb_ann in ann_obj.get_textbounds():
        #j_tb = [unicode(tb_ann.id), tb_ann.type, tb_ann.start, tb_ann.end]
        j_tb = [unicode(tb_ann.id), tb_ann.type, tb_ann.spans]

        # If we spotted it in the previous pass as a trigger for an
        # event or if the type is known to be an event type, we add it
        # as a json trigger.
        # TODO: proper handling of disconnected triggers. Currently
        # these will be erroneously passed as 'entities'
        if unicode(tb_ann.id) in trigger_ids:
            j_dic['triggers'].append(j_tb)
            # special case for BioNLP ST 2013 format: send triggers
            # also as entities for those triggers that are referenced
            # from annotations other than events (#926).
            if BIONLP_ST_2013_COMPATIBILITY:
                if tb_ann.id in ann_obj.externally_referenced_triggers:
                    try:
                        j_dic['entities'].append(j_tb)
                    except KeyError:
                        j_dic['entities'] = [j_tb, ]
        else: 
            try:
                j_dic['entities'].append(j_tb)
            except KeyError:
                j_dic['entities'] = [j_tb, ]


    for eq_ann in ann_obj.get_equivs():
        j_dic['equivs'].append(
                (['*', eq_ann.type]
                    + [e for e in eq_ann.entities])
                )

    for att_ann in ann_obj.get_attributes():
        j_dic['attributes'].append(
                [unicode(att_ann.id), unicode(att_ann.type), unicode(att_ann.target), att_ann.value]
                )

    for norm_ann in ann_obj.get_normalizations():
        j_dic['normalizations'].append(
                [unicode(norm_ann.id), unicode(norm_ann.type), 
                 unicode(norm_ann.target), unicode(norm_ann.refdb), 
                 unicode(norm_ann.refid), unicode(norm_ann.reftext)]
                )

    for com_ann in ann_obj.get_oneline_comments():
        comment = [unicode(com_ann.target), unicode(com_ann.type),
                com_ann.tail.strip()]
        try:
            j_dic['comments'].append(comment)
        except KeyError:
            j_dic['comments'] = [comment, ]

    if ann_obj.failed_lines:
        error_msg = 'Unable to parse the following line(s):\n%s' % (
                '\n'.join(
                [('%s: %s' % (
                            # The line number is off by one
                            unicode(line_num + 1),
                            unicode(ann_obj[line_num])
                            )).strip()
                 for line_num in ann_obj.failed_lines])
                )
        Messager.error(error_msg, duration=len(ann_obj.failed_lines) * 3)

    j_dic['mtime'] = ann_obj.ann_mtime
    j_dic['ctime'] = ann_obj.ann_ctime

    try:
        # XXX avoid digging the directory from the ann_obj
        import os
        docdir = os.path.dirname(ann_obj._document)
        if options_get_validation(docdir) in ('all', 'full', ):
            from verify_annotations import verify_annotation
            projectconf = ProjectConfiguration(docdir)
            issues = verify_annotation(ann_obj, projectconf)
        else:
            issues = []
    except Exception, e:
        # TODO add an issue about the failure?
        issues = []
        Messager.error('Error: verify_annotation() failed: %s' % e, -1)

Beispiel #6

0

Datei anzeigen

Datei: stats.py Projekt: ladyrassilon/brat

def get_statistics(directory, base_names, use_cache=True):
    # Check if we have a cache of the costly satistics generation
    # Also, only use it if no file is newer than the cache itself
    cache_file_path = get_stat_cache_by_dir(directory)

    try:
        cache_mtime = getmtime(cache_file_path)
    except OSError as e:
        if e.errno == 2:
            cache_mtime = -1
        else:
            raise

    try:
        if (not isfile(cache_file_path)
                # Has config.py been changed?
                or getmtime(get_config_py_path()) > cache_mtime
                # Any file has changed in the dir since the cache was generated
                or any(True for f in listdir(directory)
                       if (getmtime(path_join(directory, f)) > cache_mtime
                           # Ignore hidden files
                           and not f.startswith('.')))
                # The configuration is newer than the cache
                or getmtime(get_config_path(directory)) > cache_mtime):
            generate = True
            docstats = []
        else:
            generate = False
            try:
                with open(cache_file_path, 'rb') as cache_file:
                    docstats = pickle_load(cache_file)
                if len(docstats) != len(base_names):
                    Messager.warning(
                        'Stats cache %s was incomplete; regenerating' %
                        cache_file_path)
                    generate = True
                    docstats = []
            except UnpicklingError:
                # Corrupt data, re-generate
                Messager.warning(
                    'Stats cache %s was corrupted; regenerating' %
                    cache_file_path, -1)
                generate = True
            except EOFError:
                # Corrupt data, re-generate
                generate = True
    except OSError as e:
        Messager.warning(
            'Failed checking file modification times for stats cache check; regenerating'
        )
        generate = True

    if not use_cache:
        generate = True

    # "header" and types
    stat_types = [("Entities", "int"), ("Relations", "int"), ("Events", "int")]

    if options_get_validation(directory) != 'none':
        stat_types.append(("Issues", "int"))

    if generate:
        # Generate the document statistics from scratch
        from .annotation import JOINED_ANN_FILE_SUFF
        log_info('generating statistics for "%s"' % directory)
        docstats = []
        for docname in base_names:
            try:
                with Annotations(path_join(directory, docname),
                                 read_only=True) as ann_obj:
                    tb_count = len([a for a in ann_obj.get_entities()])
                    rel_count = (len([a for a in ann_obj.get_relations()]) +
                                 len([a for a in ann_obj.get_equivs()]))
                    event_count = len([a for a in ann_obj.get_events()])

                    if options_get_validation(directory) == 'none':
                        docstats.append([tb_count, rel_count, event_count])
                    else:
                        # verify and include verification issue count
                        try:
                            from projectconfig import ProjectConfiguration
                            projectconf = ProjectConfiguration(directory)
                            from verify_annotations import verify_annotation
                            issues = verify_annotation(ann_obj, projectconf)
                            issue_count = len(issues)
                        except BaseException:
                            # TODO: error reporting
                            issue_count = -1
                        docstats.append(
                            [tb_count, rel_count, event_count, issue_count])
            except Exception as e:
                log_info('Received "%s" when trying to generate stats' % e)
                # Pass exceptions silently, just marking stats missing
                docstats.append([-1] * len(stat_types))

        # Cache the statistics
        try:
            with open(cache_file_path, 'wb') as cache_file:
                pickle_dump(docstats, cache_file)
        except IOError as e:
            Messager.warning(
                "Could not write statistics cache file to directory %s: %s" %
                (directory, e))

    return stat_types, docstats

Beispiel #7

0

Datei anzeigen

Datei: stats.py Projekt: s-case/requirements-annotation-tool

                        read_only=True) as ann_obj:
                    tb_count = len([a for a in ann_obj.get_entities()])
                    rel_count = (len([a for a in ann_obj.get_relations()]) +
                                 len([a for a in ann_obj.get_equivs()]))
                    event_count = len([a for a in ann_obj.get_events()])


                    if options_get_validation(directory) == 'none':
                        docstats.append([tb_count, rel_count, event_count])
                    else:
                        # verify and include verification issue count
                        try:
                            from projectconfig import ProjectConfiguration
                            projectconf = ProjectConfiguration(directory)
                            from verify_annotations import verify_annotation
                            issues = verify_annotation(ann_obj, projectconf)
                            issue_count = len(issues)
                        except:
                            # TODO: error reporting
                            issue_count = -1
                        docstats.append([tb_count, rel_count, event_count, issue_count])
            except Exception, e:
                log_info('Received "%s" when trying to generate stats' % e)
                # Pass exceptions silently, just marking stats missing
                docstats.append([-1] * len(stat_types))

        # Cache the statistics
        try:
            with open(cache_file_path, 'wb') as cache_file:
                pickle_dump(docstats, cache_file)
        except IOError, e:

Beispiel #8

0

Datei anzeigen

Datei: document.py Projekt: edycop/brat

def _enrich_json_with_data(j_dic, ann_obj):
    # TODO: figure out if there's a reason for all the unicode()
    # invocations here; remove if not.

    # We collect trigger ids to be able to link the textbound later on
    trigger_ids = set()
    for event_ann in ann_obj.get_events():
        trigger_ids.add(event_ann.trigger)
        j_dic['events'].append(
                [unicode(event_ann.id), unicode(event_ann.trigger), event_ann.args]
                )

    for rel_ann in ann_obj.get_relations():
        j_dic['relations'].append(
            [unicode(rel_ann.id), unicode(rel_ann.type), rel_ann.arg1, rel_ann.arg2]
            )

    for tb_ann in ann_obj.get_textbounds():
        j_tb = [unicode(tb_ann.id), unicode(tb_ann.type), tb_ann.start, tb_ann.end]

        # If we spotted it in the previous pass as a trigger for an
        # event or if the type is known to be an event type, we add it
        # as a json trigger.
        # TODO: proper handling of disconnected triggers. Currently
        # these will be erroneously passed as 'entities'
        if unicode(tb_ann.id) in trigger_ids:
            j_dic['triggers'].append(j_tb)
        else: 
            j_dic['entities'].append(j_tb)

    for eq_ann in ann_obj.get_equivs():
        j_dic['equivs'].append(
                (['*', eq_ann.type]
                    + [e for e in eq_ann.entities])
                )

    for att_ann in ann_obj.get_attributes():
        j_dic['attributes'].append(
                [unicode(att_ann.id), unicode(att_ann.type), unicode(att_ann.target), att_ann.value]
                )

    for norm_ann in ann_obj.get_normalizations():
        j_dic['normalizations'].append(
                [unicode(norm_ann.id), unicode(norm_ann.type), 
                 unicode(norm_ann.target), unicode(norm_ann.refdb), 
                 unicode(norm_ann.refid), unicode(norm_ann.reftext)]
                )

    for com_ann in ann_obj.get_oneline_comments():
        j_dic['comments'].append(
                [unicode(com_ann.target), unicode(com_ann.type), com_ann.tail.strip()]
                )

    if ann_obj.failed_lines:
        error_msg = 'Unable to parse the following line(s):\n%s' % (
                '\n'.join(
                [('%s: %s' % (
                            # The line number is off by one
                            unicode(line_num + 1),
                            unicode(ann_obj[line_num])
                            )).strip()
                 for line_num in ann_obj.failed_lines])
                )
        Messager.error(error_msg, duration=len(ann_obj.failed_lines) * 3)

    j_dic['mtime'] = ann_obj.ann_mtime
    j_dic['ctime'] = ann_obj.ann_ctime

    try:
        if PERFORM_VERIFICATION:
            # XXX avoid digging the directory from the ann_obj
            import os
            docdir = os.path.dirname(ann_obj._document)
            projectconf = ProjectConfiguration(docdir)
            from verify_annotations import verify_annotation
            issues = verify_annotation(ann_obj, projectconf)
        else:
            issues = []
    except Exception, e:
        # TODO add an issue about the failure?
        issues = []
        Messager.error('Error: verify_annotation() failed: %s' % e, -1)

Beispiel #9

0

Datei anzeigen

def _enrich_json_with_data(j_dic, ann_obj):
    # TODO: figure out if there's a reason for all the unicode()
    # invocations here; remove if not.

    # We collect trigger ids to be able to link the textbound later on
    trigger_ids = set()
    for event_ann in ann_obj.get_events():
        trigger_ids.add(event_ann.trigger)
        j_dic['events'].append([
            unicode(event_ann.id),
            unicode(event_ann.trigger), event_ann.args
        ])

    for rel_ann in ann_obj.get_relations():
        j_dic['relations'].append([
            unicode(rel_ann.id),
            unicode(rel_ann.type), rel_ann.arg1, rel_ann.arg2
        ])

    for tb_ann in ann_obj.get_textbounds():
        j_tb = [
            unicode(tb_ann.id),
            unicode(tb_ann.type), tb_ann.start, tb_ann.end
        ]

        # If we spotted it in the previous pass as a trigger for an
        # event or if the type is known to be an event type, we add it
        # as a json trigger.
        # TODO: proper handling of disconnected triggers. Currently
        # these will be erroneously passed as 'entities'
        if unicode(tb_ann.id) in trigger_ids:
            j_dic['triggers'].append(j_tb)
        else:
            j_dic['entities'].append(j_tb)

    for eq_ann in ann_obj.get_equivs():
        j_dic['equivs'].append(
            (['*', eq_ann.type] + [e for e in eq_ann.entities]))

    for att_ann in ann_obj.get_attributes():
        j_dic['attributes'].append([
            unicode(att_ann.id),
            unicode(att_ann.type),
            unicode(att_ann.target), att_ann.value
        ])

    for norm_ann in ann_obj.get_normalizations():
        j_dic['normalizations'].append([
            unicode(norm_ann.id),
            unicode(norm_ann.type),
            unicode(norm_ann.target),
            unicode(norm_ann.refdb),
            unicode(norm_ann.refid),
            unicode(norm_ann.reftext)
        ])

    for com_ann in ann_obj.get_oneline_comments():
        j_dic['comments'].append([
            unicode(com_ann.target),
            unicode(com_ann.type),
            com_ann.tail.strip()
        ])

    if ann_obj.failed_lines:
        error_msg = 'Unable to parse the following line(s):\n%s' % (
            '\n'.join([
                (
                    '%s: %s' % (
                        # The line number is off by one
                        unicode(line_num + 1),
                        unicode(ann_obj[line_num]))).strip()
                for line_num in ann_obj.failed_lines
            ]))
        Messager.error(error_msg, duration=len(ann_obj.failed_lines) * 3)

    j_dic['mtime'] = ann_obj.ann_mtime
    j_dic['ctime'] = ann_obj.ann_ctime

    try:
        if PERFORM_VERIFICATION:
            # XXX avoid digging the directory from the ann_obj
            import os
            docdir = os.path.dirname(ann_obj._document)
            projectconf = ProjectConfiguration(docdir)
            from verify_annotations import verify_annotation
            issues = verify_annotation(ann_obj, projectconf)
        else:
            issues = []
    except Exception, e:
        # TODO add an issue about the failure?
        issues = []
        Messager.error('Error: verify_annotation() failed: %s' % e, -1)

Beispiel #10

0

Datei anzeigen

Datei: stats.py Projekt: a-tsioh/brat

def get_statistics(directory, base_names, use_cache=True):
    # Check if we have a cache of the costly satistics generation
    # Also, only use it if no file is newer than the cache itself
    cache_file_path = get_stat_cache_by_dir(directory)

    try:
        cache_mtime = getmtime(cache_file_path)
    except OSError as e:
        if e.errno == 2:
            cache_mtime = -1
        else:
            raise

    try:
        if (not isfile(cache_file_path)
                # Has config.py been changed?
                or getmtime(get_config_py_path()) > cache_mtime
                # Any file has changed in the dir since the cache was generated
                or any(True for f in listdir(directory)
                       if (getmtime(path_join(directory, f)) > cache_mtime
                           # Ignore hidden files
                           and not f.startswith('.')))
                # The configuration is newer than the cache
                or getmtime(get_config_path(directory)) > cache_mtime):
            generate = True
            docstats = []
        else:
            generate = False
            try:
                with open(cache_file_path, 'rb') as cache_file:
                    docstats = pickle_load(cache_file)
                if len(docstats) != len(base_names):
                    Messager.warning(
                        'Stats cache %s was incomplete; regenerating' %
                        cache_file_path)
                    generate = True
                    docstats = []
            except UnpicklingError:
                # Corrupt data, re-generate
                Messager.warning(
                    'Stats cache %s was corrupted; regenerating' %
                    cache_file_path, -1)
                generate = True
            except EOFError:
                # Corrupt data, re-generate
                generate = True
    except OSError as e:
        Messager.warning(
            'Failed checking file modification times for stats cache check; regenerating')
        generate = True

    if not use_cache:
        generate = True

    # "header" and types
    stat_types = [("Entities", "int"), ("Relations", "int"), ("Events", "int")]

    if options_get_validation(directory) != 'none':
        stat_types.append(("Issues", "int"))

    if generate:
        # Generate the document statistics from scratch
        from annotation import JOINED_ANN_FILE_SUFF
        log_info('generating statistics for "%s"' % directory)
        docstats = []
        for docname in base_names:
            try:
                with Annotations(path_join(directory, docname),
                                 read_only=True) as ann_obj:
                    tb_count = len([a for a in ann_obj.get_entities()])
                    rel_count = (len([a for a in ann_obj.get_relations()]) +
                                 len([a for a in ann_obj.get_equivs()]))
                    event_count = len([a for a in ann_obj.get_events()])

                    if options_get_validation(directory) == 'none':
                        docstats.append([tb_count, rel_count, event_count])
                    else:
                        # verify and include verification issue count
                        try:
                            from projectconfig import ProjectConfiguration
                            projectconf = ProjectConfiguration(directory)
                            from verify_annotations import verify_annotation
                            issues = verify_annotation(ann_obj, projectconf)
                            issue_count = len(issues)
                        except BaseException:
                            # TODO: error reporting
                            issue_count = -1
                        docstats.append(
                            [tb_count, rel_count, event_count, issue_count])
            except Exception as e:
                log_info('Received "%s" when trying to generate stats' % e)
                # Pass exceptions silently, just marking stats missing
                docstats.append([-1] * len(stat_types))

        # Cache the statistics
        try:
            with open(cache_file_path, 'wb') as cache_file:
                pickle_dump(docstats, cache_file)
        except IOError as e:
            Messager.warning(
                "Could not write statistics cache file to directory %s: %s" %
                (directory, e))

    return stat_types, docstats