def compare(path,doc): convert(path,doc) ann = Annotations(path+doc) fdoc = folia.Document(file=path+doc+".xml") #test entities for ent in ann.get_textbounds(): try: found=fdoc[ent.id] text = [str(a) for a in found.wrefs()] if ent.tail.strip() != " ".join(text): print "error: not found entity" print ent return False except KeyError: print "error: not found entity" print ent return False #test relations for rel in ann.get_relations(): try: found=fdoc[rel.id] arefs = found.select(folia.AlignReference) if not (any(a.id == rel.arg1 for a in arefs) and any(a.id == rel.arg2 for a in arefs)): print "error: not found relation" print rel return False except KeyError: print "error: not found relation" print rel return False #test events for event in ann.get_events(): try: found=fdoc[event.id] arefs = found.select(folia.AlignReference) for role,rid in event.args: if not any(a.id == rid for a in arefs) : print "error: not found relation" print rel return False except KeyError: print "error: not found relation" print rel return False #test attributes for attr in ann.get_attributes(): try: found=fdoc[attr.target] if not any(fattr.cls == str(attr.value) and fattr.subset == attr.type for fattr in found.select(folia.Feature)) : print "error: not found attr" print attr print return False except KeyError: print "error: not found attr" print rel return False print "file "+path+doc+" is OK" return True
def convert(path,doc): #path is path to the file without extension projectconf = ProjectConfiguration(path) path = path_join(path,doc) ann = Annotations(path+".ann") doc = build_text_structure(ann,path+".txt") add_relations(doc,ann) add_comments(doc,ann) #~ ent_set=xml(build_entity_set(doc)) #~ rel_set=xml(build_relations_set(doc)) #~ temp=open ("entiteit_set.xml",'w') #~ temp.write(ent_set) #~ temp.close() #~ rel=open ("relation_set.xml",'w') #~ rel.write(rel_set) #~ rel.close() doc.save(path+".xml")
def convert(data, src): # Fail early if we don't have a converter try: conv_text, conv_ann = CONV_BY_SRC[src] except KeyError: raise InvalidSrcFormat # Note: Due to a lack of refactoring we need to write to disk to read # annotions, once this is fixed, the below code needs some clean-up tmp_dir = None try: tmp_dir = mkdtemp() doc_base = path_join(tmp_dir, 'tmp') with open_textfile(doc_base + '.txt', 'w') as txt_file: txt_file.write(conv_text(data)) with open(doc_base + '.ann', 'w'): pass with Annotations(doc_base) as ann_obj: for ann in conv_ann(data): ann_obj.add_annotation(ann) json_dic = _document_json_dict(doc_base) # Note: Blank the comments, they rarely do anything good but whine # about configuration when we use the tool solely for visualisation # purposes json_dic['comments'] = [] # Note: This is an ugly hack... we want to ride along with the # Stanford tokenisation and sentence splits when returning their # output rather than relying on the ones generated by brat. if src.startswith('stanford-'): json_dic['token_offsets'] = stanford_token_offsets(data) json_dic['sentence_offsets'] = stanford_sentence_offsets(data) return json_dic finally: if tmp_dir is not None: rmtree(tmp_dir)
def main(args): # Imports are here so we don't need to wait for them to load unecessarily. import os import tkinter from video import Video from annotation import Annotations from state import State import gui # Parameters video_file_path = args.video_file_path annotation_file_path = args.annotation_file_path if annotation_file_path is None: # Expect the following dir structure: # dataset/ # - videos/ # - annotations/ split_path = os.path.split(video_file_path) annotation_file_name = split_path[-1].split('.')[0] + '.pkl' annotation_file_dir = list(split_path[:-1]) + ['..', 'annotations'] annotation_file_dir = os.path.join(*annotation_file_dir) if not os.path.isdir(annotation_file_dir): print('Invalid directory structure.') return annotation_file_path = os.path.join(annotation_file_dir, annotation_file_name) # Load Video video = Video(video_file_path) annotations = Annotations(annotation_file_path, video) state = State(video, annotations) # Create GUI gui.App(tkinter.Tk(), state) # When everything done, release the video capture object video.close()
generate = True # "header" and types stat_types = [("Entities", "int"), ("Relations", "int"), ("Events", "int")] if options_get_validation(directory) != 'none': stat_types.append(("Issues", "int")) if generate: # Generate the document statistics from scratch from annotation import JOINED_ANN_FILE_SUFF log_info('generating statistics for "%s"' % directory) docstats = [] for docname in base_names: try: with Annotations(path_join(directory, docname), read_only=True) as ann_obj: tb_count = len([a for a in ann_obj.get_entities()]) rel_count = (len([a for a in ann_obj.get_relations()]) + len([a for a in ann_obj.get_equivs()])) event_count = len([a for a in ann_obj.get_events()]) if options_get_validation(directory) == 'none': docstats.append([tb_count, rel_count, event_count]) else: # verify and include verification issue count try: from projectconfig import ProjectConfiguration projectconf = ProjectConfiguration(directory) from verify_annotations import verify_annotation issues = verify_annotation(ann_obj, projectconf) issue_count = len(issues)
def get_statistics(directory, base_names, use_cache=True): # Check if we have a cache of the costly satistics generation # Also, only use it if no file is newer than the cache itself cache_file_path = get_stat_cache_by_dir(directory) try: cache_mtime = getmtime(cache_file_path) except OSError as e: if e.errno == 2: cache_mtime = -1 else: raise try: if (not isfile(cache_file_path) # Has config.py been changed? or getmtime(get_config_py_path()) > cache_mtime # Any file has changed in the dir since the cache was generated or any(True for f in listdir(directory) if (getmtime(path_join(directory, f)) > cache_mtime # Ignore hidden files and not f.startswith('.'))) # The configuration is newer than the cache or getmtime(get_config_path(directory)) > cache_mtime): generate = True docstats = [] else: generate = False try: with open(cache_file_path, 'rb') as cache_file: docstats = pickle_load(cache_file) if len(docstats) != len(base_names): Messager.warning( 'Stats cache %s was incomplete; regenerating' % cache_file_path) generate = True docstats = [] except UnpicklingError: # Corrupt data, re-generate Messager.warning( 'Stats cache %s was corrupted; regenerating' % cache_file_path, -1) generate = True except EOFError: # Corrupt data, re-generate generate = True except OSError as e: Messager.warning( 'Failed checking file modification times for stats cache check; regenerating' ) generate = True if not use_cache: generate = True # "header" and types stat_types = [("实体", "int"), ("关系", "int"), ("事件", "int")] if options_get_validation(directory) != 'none': stat_types.append(("观点", "int")) stat_types.append(("修改者", "string")) if generate: # Generate the document statistics from scratch from annotation import JOINED_ANN_FILE_SUFF log_info('generating statistics for "%s"' % directory) docstats = [] for docname in base_names: try: # 在这里获取实体,关系,事件,修改者。 with Annotations(path_join(directory, docname), read_only=True) as ann_obj: tb_count = len([a for a in ann_obj.get_entities()]) rel_count = (len([a for a in ann_obj.get_relations()]) + len([a for a in ann_obj.get_equivs()])) event_count = len([a for a in ann_obj.get_events()]) try: user = get_session().get('user') except KeyError: user = None if user is None: user = '******' if options_get_validation(directory) == 'none': docstats.append( [tb_count, rel_count, event_count, user]) else: # verify and include verification issue count try: from projectconfig import ProjectConfiguration projectconf = ProjectConfiguration(directory) from verify_annotations import verify_annotation issues = verify_annotation(ann_obj, projectconf) issue_count = len(issues) except BaseException: # TODO: error reporting issue_count = -1 docstats.append([ tb_count, rel_count, event_count, issue_count, user ]) except Exception as e: log_info('Received "%s" when trying to generate stats' % e) # Pass exceptions silently, just marking stats missing docstats.append([-1] * len(stat_types)) try: user = get_session().get('user') except KeyError: user = None if user is None: user = '******' # Cache the statistics try: with open(cache_file_path, 'wb') as cache_file: pickle_dump(docstats, cache_file) except IOError as e: Messager.warning( "Could not write statistics cache file to directory %s: %s" % (directory, e)) return stat_types, docstats
full_name = temp_paths[1].replace("/", "") temp = open(app_path + full_name, 'wb') pickle_dump(sann, temp) temp.close() except Exception as e: Messager.error("Error while caching changes in the annotation file: " + str(e)) def update_dump(j_dic, file_path): app_path = WORK_DIR + "/application/" temp_paths = file_path.split("/data/") try: full_name = temp_paths[1].replace("/", "") temp = open(app_path + full_name, 'wb') pickle_dump(j_dic, temp) temp.close() except Exception as e: Messager.error("Error while caching changes in the annotation file: " + str(e)) if __name__ == '__main__': millis = int(round(time.time() * 1000)) print millis ann = Annotations("/home/sander/Documents/Masterproef/brat/data/test") sann = SimpleAnnotations(ann) print filter_folia(sann) millis = int(round(time.time() * 1000)) - millis print millis
temp.append(i) ids.add(i) if isinstance(temp_ann, BinaryRelationAnnotation): for i in temp_ann.get_deps()[1]: if not i in ids: temp.append(i) ids.add(i) for i in temp: temp_ann = ann.get_ann_by_id(i) if isinstance(temp_ann, TextBoundAnnotation): recursive_ann(temp_ann, con, ids, ann) if __name__ == "__main__": from annotation import TextBoundAnnotation, TextAnnotations, EventAnnotation, BinaryRelationAnnotation proj = ProjectConfiguration( "/home/sander/Documents/Masterproef/brat/data/brat_vb/sentiment") ann = Annotations( "/home/sander/Documents/Masterproef/brat/data/brat_vb/sentiment/sporza" ) #SPEED TEST import time millis = int(round(time.time() * 1000)) print millis vrules = ValidationRules(proj) for i in vrules.validate(ann)[0]: print str(i) millis = int(round(time.time() * 1000)) - millis print millis