def get_directory_information(collection): directory = collection real_dir = real_directory(directory) assert_allowed_to_read(real_dir) # Get the document names base_names = [fn[0:-4] for fn in _listdir(real_dir) if fn.endswith('txt')] doclist = base_names[:] doclist_header = [("Document", "string")] # Then get the modification times doclist_with_time = [] for file_name in doclist: file_path = path_join(DATA_DIR, real_dir, file_name + "." + JOINED_ANN_FILE_SUFF) doclist_with_time.append([file_name, _getmtime(file_path)]) doclist = doclist_with_time doclist_header.append(("Modified", "time")) try: stats_types, doc_stats = get_statistics(real_dir, base_names) except OSError: # something like missing access permissions? raise CollectionNotAccessibleError doclist = [doclist[i] + doc_stats[i] for i in range(len(doclist))] doclist_header += stats_types dirlist = [dir for dir in _listdir(real_dir) if isdir(path_join(real_dir, dir))] # just in case, and for generality dirlist = [[dir] for dir in dirlist] # check whether at root, ignoring e.g. possible trailing slashes if normpath(real_dir) != normpath(DATA_DIR): parent = abspath(path_join(real_dir, '..'))[len(DATA_DIR) + 1:] # to get consistent processing client-side, add explicitly to list dirlist.append([".."]) else: parent = None # combine document and directory lists, adding a column # differentiating files from directories and an unused column (can # point to a specific annotation) required by the protocol. The # values filled here for the first are "c" for "collection" # (i.e. directory) and "d" for "document". combolist = [] for i in dirlist: combolist.append(["c", None]+i) for i in doclist: combolist.append(["d", None]+i) # plug in the search config too search_config = get_search_config(real_dir) # ... and the disambiguator config ... this is getting a bit much disambiguator_config = get_disambiguator_config(real_dir) # ... and the normalization config (TODO: rethink) normalization_config = get_normalization_config(real_dir) # read in README (if any) to send as a description of the # collection try: with open_textfile(path_join(real_dir, "README")) as txt_file: readme_text = txt_file.read() except IOError: readme_text = None # fill in a flag for whether annotator logging is active so that # the client knows whether to invoke timing actions ann_logging = annotation_logging_active(real_dir) # fill in NER services, if any ner_taggers = get_annotator_config(real_dir) return _inject_annotation_type_conf(real_dir, json_dic={ 'items': combolist, 'header' : doclist_header, 'parent': parent, 'messages': [], 'description': readme_text, 'search_config': search_config, 'disambiguator_config' : disambiguator_config, 'normalization_config' : normalization_config, 'annotation_logging': ann_logging, 'ner_taggers': ner_taggers, })
def get_directory_information(collection): directory = collection real_dir = real_directory(directory) assert_allowed_to_read(real_dir) # Get the document names base_names = [fn[0:-4] for fn in _listdir(real_dir) if fn.endswith('txt')] doclist = base_names[:] doclist_header = [("Document", "string")] # Then get the modification times doclist_with_time = [] for file_name in doclist: file_path = path_join(DATA_DIR, real_dir, file_name + "." + JOINED_ANN_FILE_SUFF) doclist_with_time.append([file_name, _getmtime(file_path)]) doclist = doclist_with_time doclist_header.append(("Modified", "time")) try: stats_types, doc_stats = get_statistics(real_dir, base_names) except OSError: # something like missing access permissions? raise CollectionNotAccessibleError doclist = [doclist[i] + doc_stats[i] for i in range(len(doclist))] doclist_header += stats_types dirlist = [ dir for dir in _listdir(real_dir) if isdir(path_join(real_dir, dir)) ] # just in case, and for generality dirlist = [[dir] for dir in dirlist] # check whether at root, ignoring e.g. possible trailing slashes if normpath(real_dir) != normpath(DATA_DIR): parent = abspath(path_join(real_dir, '..'))[len(DATA_DIR) + 1:] # to get consistent processing client-side, add explicitly to list dirlist.append([".."]) else: parent = None # combine document and directory lists, adding a column # differentiating files from directories and an unused column (can # point to a specific annotation) required by the protocol. The # values filled here for the first are "c" for "collection" # (i.e. directory) and "d" for "document". combolist = [] for i in dirlist: combolist.append(["c", None] + i) for i in doclist: combolist.append(["d", None] + i) # plug in the search config too search_config = get_search_config(real_dir) # ... and the disambiguator config ... this is getting a bit much disambiguator_config = get_disambiguator_config(real_dir) # ... and the normalization config (TODO: rethink) normalization_config = get_normalization_config(real_dir) # read in README (if any) to send as a description of the # collection try: with open_textfile(path_join(real_dir, "README")) as txt_file: readme_text = txt_file.read() except IOError: readme_text = None # fill in a flag for whether annotator logging is active so that # the client knows whether to invoke timing actions ann_logging = annotation_logging_active(real_dir) # fill in NER services, if any ner_taggers = get_annotator_config(real_dir) #send logging directory: logging = options_get_annlogfile(real_dir) return _inject_annotation_type_conf(real_dir, json_dic={ 'items': combolist, 'header': doclist_header, 'parent': parent, 'messages': [], 'description': readme_text, 'search_config': search_config, 'disambiguator_config': disambiguator_config, 'normalization_config': normalization_config, 'annotation_logging': ann_logging, 'ner_taggers': ner_taggers, 'logging': logging, })
def get_directory_information(collection): directory = collection real_dir = real_directory(directory) assert_allowed_to_read(real_dir) # Get the document names user = get_session().get('user') if user is None or user == 'guest': base_names = [] # # 可以从配置文件获取用户。 elif user in USER_PASSWORD: base_names = [fn[0:-4] for fn in _listdir(real_dir) if fn.endswith('txt')] else: db = DBlite() base_names = db.get_AnnNull_files(directory) names_ING = db.get_AnnING_files(directory, user) print("names_ING", names_ING, file=sys.stderr) base_names.extend(names_ING) doclist = base_names[:] doclist_header = [("文档", "string")] # Then get the modification times doclist_with_time = [] for file_name in doclist: file_path = path_join(DATA_DIR, real_dir, file_name + "." + JOINED_ANN_FILE_SUFF) doclist_with_time.append([file_name, _getmtime(file_path)]) doclist = doclist_with_time doclist_header.append(("修改时间", "time")) """ stats_types: [('Entities', 'int'), ('Relations', 'int'), ('Events', 'int')] doc_stats: [[29, 0, 0], [97, 0, 0], [22, 0, 0], [8, 0, 0], [17, 0, 0], [22, 0, 0], [14, 0, 0], [24, 0, 0], [22, 0, 0], [21, 0, 0]] doclist: [['ned.train-doc-184', 1555259780.624325, 29, 0, 0], ['ned.train-doc-181', 1555259780.623239, 97, 0, 0], ['ned.train-doc-236' """ try: stats_types, doc_stats = get_statistics(real_dir, base_names) print("stats_types:", stats_types, file=sys.stderr) print("doc_stats:", doc_stats, file=sys.stderr) except OSError: # something like missing access permissions? raise CollectionNotAccessibleError doclist = [doclist[i] + doc_stats[i] for i in range(len(doclist))] print("doclist:", doclist, file=sys.stderr) doclist_header += stats_types # doclist_header.append(("修改者", "string")) print("doclist_header:", doclist_header, file=sys.stderr) if user is None or user == 'guest': dirlist = [] elif user in USER_PASSWORD: dirlist = [dir for dir in _listdir(real_dir) if isdir(path_join(real_dir, dir))] else: # for user ACL dirlist = [dir for dir in _listdir(real_dir) if isdir(path_join(real_dir, dir))] # just in case, and for generality dirlist = [[dir] for dir in dirlist] # print("---------------dirlist------------------", dirlist, file=sys.stderr) # 打开最后的文件目录结构时出现 # 文件名 修改时间 实体 关系 事件 # [['esp.train-doc-46', 1555259780.6167455, 104, 0, 0], ['esp.train-doc-989', 1555259780.6174483, 34, 0, 0], # print(doclist, file=sys.stderr) # check whether at root, ignoring e.g. possible trailing slashes if normpath(real_dir) != normpath(DATA_DIR): parent = abspath(path_join(real_dir, '..'))[len(DATA_DIR) + 1:] # to get consistent processing client-side, add explicitly to list dirlist.append([".."]) else: parent = None # combine document and directory lists, adding a column # differentiating files from directories and an unused column (can # point to a specific annotation) required by the protocol. The # values filled here for the first are "c" for "collection" # (i.e. directory) and "d" for "document". combolist = [] for i in dirlist: combolist.append(["c", None] + i) for i in doclist: combolist.append(["d", None] + i) # plug in the search config too search_config = get_search_config(real_dir) # ... and the disambiguator config ... this is getting a bit much disambiguator_config = get_disambiguator_config(real_dir) # ... and the normalization config (TODO: rethink) normalization_config = get_normalization_config(real_dir) # read in README (if any) to send as a description of the # collection try: with open_textfile(path_join(real_dir, "README")) as txt_file: readme_text = txt_file.read() except IOError: readme_text = None # fill in a flag for whether annotator logging is active so that # the client knows whether to invoke timing actions ann_logging = annotation_logging_active(real_dir) # fill in NER services, if any ner_taggers = get_annotator_config(real_dir) return _inject_annotation_type_conf(real_dir, json_dic={ 'items': combolist, 'header': doclist_header, 'parent': parent, 'messages': [], 'description': readme_text, 'search_config': search_config, 'disambiguator_config': disambiguator_config, 'normalization_config': normalization_config, 'annotation_logging': ann_logging, 'ner_taggers': ner_taggers, })