cache_mtime = -1 else: raise directory = directory.decode('utf-8').encode('utf-8') try: if (not isfile(cache_file_path) # Has config.py been changed? or getmtime(get_config_py_path()) > cache_mtime # Any file has changed in the dir since the cache was generated or any(True for f in listdir(directory) if (getmtime(path_join(directory, f)) > cache_mtime # Ignore hidden files and not f.startswith('.'))) # The configuration is newer than the cache or getmtime(get_config_path(directory)) > cache_mtime): generate = True docstats = [] else: generate = False try: with open( cache_file_path.decode('utf-8').encode('utf-8'), 'rb') as cache_file: docstats = pickle_load(cache_file) except UnpicklingError: # Corrupt data, re-generate Messager.warning( 'Stats cache %s was corrupted; regenerating' % cache_file_path, -1) generate = True
if e.errno == 2: cache_mtime = -1; else: raise try: if (not isfile(cache_file_path) # Has config.py been changed? or getmtime(get_config_py_path()) > cache_mtime # Any file has changed in the dir since the cache was generated or any(True for f in listdir(directory) if (getmtime(path_join(directory, f)) > cache_mtime # Ignore hidden files and not f.startswith('.'))) # The configuration is newer than the cache or getmtime(get_config_path(directory)) > cache_mtime): generate = True docstats = [] else: generate = False try: with open(cache_file_path, 'rb') as cache_file: docstats = pickle_load(cache_file) except UnpicklingError: # Corrupt data, re-generate Messager.warning('Stats cache %s was corrupted; regenerating' % cache_file_path, -1) generate = True except EOFError: # Corrupt data, re-generate generate = True except OSError, e:
def get_statistics(directory, base_names, use_cache=True): # Check if we have a cache of the costly satistics generation # Also, only use it if no file is newer than the cache itself cache_file_path = get_stat_cache_by_dir(directory) try: cache_mtime = getmtime(cache_file_path) except OSError as e: if e.errno == 2: cache_mtime = -1 else: raise try: if (not isfile(cache_file_path) # Has config.py been changed? or getmtime(get_config_py_path()) > cache_mtime # Any file has changed in the dir since the cache was generated or any(True for f in listdir(directory) if (getmtime(path_join(directory, f)) > cache_mtime # Ignore hidden files and not f.startswith('.'))) # The configuration is newer than the cache or getmtime(get_config_path(directory)) > cache_mtime): generate = True docstats = [] else: generate = False try: with open(cache_file_path, 'rb') as cache_file: docstats = pickle_load(cache_file) if len(docstats) != len(base_names): Messager.warning( 'Stats cache %s was incomplete; regenerating' % cache_file_path) generate = True docstats = [] except UnpicklingError: # Corrupt data, re-generate Messager.warning( 'Stats cache %s was corrupted; regenerating' % cache_file_path, -1) generate = True except EOFError: # Corrupt data, re-generate generate = True except OSError as e: Messager.warning( 'Failed checking file modification times for stats cache check; regenerating' ) generate = True if not use_cache: generate = True # "header" and types stat_types = [("实体", "int"), ("关系", "int"), ("事件", "int")] if options_get_validation(directory) != 'none': stat_types.append(("观点", "int")) stat_types.append(("修改者", "string")) if generate: # Generate the document statistics from scratch from annotation import JOINED_ANN_FILE_SUFF log_info('generating statistics for "%s"' % directory) docstats = [] for docname in base_names: try: # 在这里获取实体,关系,事件,修改者。 with Annotations(path_join(directory, docname), read_only=True) as ann_obj: tb_count = len([a for a in ann_obj.get_entities()]) rel_count = (len([a for a in ann_obj.get_relations()]) + len([a for a in ann_obj.get_equivs()])) event_count = len([a for a in ann_obj.get_events()]) try: user = get_session().get('user') except KeyError: user = None if user is None: user = '******' if options_get_validation(directory) == 'none': docstats.append( [tb_count, rel_count, event_count, user]) else: # verify and include verification issue count try: from projectconfig import ProjectConfiguration projectconf = ProjectConfiguration(directory) from verify_annotations import verify_annotation issues = verify_annotation(ann_obj, projectconf) issue_count = len(issues) except BaseException: # TODO: error reporting issue_count = -1 docstats.append([ tb_count, rel_count, event_count, issue_count, user ]) except Exception as e: log_info('Received "%s" when trying to generate stats' % e) # Pass exceptions silently, just marking stats missing docstats.append([-1] * len(stat_types)) try: user = get_session().get('user') except KeyError: user = None if user is None: user = '******' # Cache the statistics try: with open(cache_file_path, 'wb') as cache_file: pickle_dump(docstats, cache_file) except IOError as e: Messager.warning( "Could not write statistics cache file to directory %s: %s" % (directory, e)) return stat_types, docstats
def get_statistics(directory, base_names, use_cache=True): # Check if we have a cache of the costly satistics generation # Also, only use it if no file is newer than the cache itself cache_file_path = get_stat_cache_by_dir(directory) try: cache_mtime = getmtime(cache_file_path) except OSError as e: if e.errno == 2: cache_mtime = -1 else: raise try: if (not isfile(cache_file_path) # Has config.py been changed? or getmtime(get_config_py_path()) > cache_mtime # Any file has changed in the dir since the cache was generated or any(True for f in listdir(directory) if (getmtime(path_join(directory, f)) > cache_mtime # Ignore hidden files and not f.startswith('.'))) # The configuration is newer than the cache or getmtime(get_config_path(directory)) > cache_mtime): generate = True docstats = [] else: generate = False try: with open(cache_file_path, 'rb') as cache_file: docstats = pickle_load(cache_file) if len(docstats) != len(base_names): Messager.warning( 'Stats cache %s was incomplete; regenerating' % cache_file_path) generate = True docstats = [] except UnpicklingError: # Corrupt data, re-generate Messager.warning( 'Stats cache %s was corrupted; regenerating' % cache_file_path, -1) generate = True except EOFError: # Corrupt data, re-generate generate = True except OSError as e: Messager.warning( 'Failed checking file modification times for stats cache check; regenerating') generate = True if not use_cache: generate = True # "header" and types stat_types = [("Entities", "int"), ("Relations", "int"), ("Events", "int")] if options_get_validation(directory) != 'none': stat_types.append(("Issues", "int")) if generate: # Generate the document statistics from scratch from annotation import JOINED_ANN_FILE_SUFF log_info('generating statistics for "%s"' % directory) docstats = [] for docname in base_names: try: with Annotations(path_join(directory, docname), read_only=True) as ann_obj: tb_count = len([a for a in ann_obj.get_entities()]) rel_count = (len([a for a in ann_obj.get_relations()]) + len([a for a in ann_obj.get_equivs()])) event_count = len([a for a in ann_obj.get_events()]) if options_get_validation(directory) == 'none': docstats.append([tb_count, rel_count, event_count]) else: # verify and include verification issue count try: from projectconfig import ProjectConfiguration projectconf = ProjectConfiguration(directory) from verify_annotations import verify_annotation issues = verify_annotation(ann_obj, projectconf) issue_count = len(issues) except BaseException: # TODO: error reporting issue_count = -1 docstats.append( [tb_count, rel_count, event_count, issue_count]) except Exception as e: log_info('Received "%s" when trying to generate stats' % e) # Pass exceptions silently, just marking stats missing docstats.append([-1] * len(stat_types)) # Cache the statistics try: with open(cache_file_path, 'wb') as cache_file: pickle_dump(docstats, cache_file) except IOError as e: Messager.warning( "Could not write statistics cache file to directory %s: %s" % (directory, e)) return stat_types, docstats