def compare(path,doc): convert(path,doc) ann = Annotations(path+doc) fdoc = folia.Document(file=path+doc+".xml") #test entities for ent in ann.get_textbounds(): try: found=fdoc[ent.id] text = [str(a) for a in found.wrefs()] if ent.tail.strip() != " ".join(text): print "error: not found entity" print ent return False except KeyError: print "error: not found entity" print ent return False #test relations for rel in ann.get_relations(): try: found=fdoc[rel.id] arefs = found.select(folia.AlignReference) if not (any(a.id == rel.arg1 for a in arefs) and any(a.id == rel.arg2 for a in arefs)): print "error: not found relation" print rel return False except KeyError: print "error: not found relation" print rel return False #test events for event in ann.get_events(): try: found=fdoc[event.id] arefs = found.select(folia.AlignReference) for role,rid in event.args: if not any(a.id == rid for a in arefs) : print "error: not found relation" print rel return False except KeyError: print "error: not found relation" print rel return False #test attributes for attr in ann.get_attributes(): try: found=fdoc[attr.target] if not any(fattr.cls == str(attr.value) and fattr.subset == attr.type for fattr in found.select(folia.Feature)) : print "error: not found attr" print attr print return False except KeyError: print "error: not found attr" print rel return False print "file "+path+doc+" is OK" return True
def __init__(self): self.policy = ScalePolicy() self.displays = [] self.xmax = None self.xmin = None self.annotations = Annotations() self.annotations.connect('context-added',self.add_context_button) self.annotations.connect('context-removed',self.remove_context_button) self.buttons = dict() self.display_box = gtk.VBox() self.context_box = gtk.HBox() self.input_state = InputState(self.annotations) self.input_state.connect('select-selection',self.show_selection_menu) self.input_state.connect('select-annotation',self.show_annotation_menu) add_button = gtk.Button(stock='gtk-add') add_button.connect('clicked',lambda but: self.create_context()) self.context_box.pack_start(add_button,expand=False,fill=True) scr_win = gtk.ScrolledWindow() scr_win.add_with_viewport(self.display_box) scr_win.set_policy(gtk.POLICY_NEVER,gtk.POLICY_AUTOMATIC) self.adjustment = gtk.Adjustment() self.adjustment.connect('value-changed',self.update_pos) self.scroller = gtk.HScrollbar(self.adjustment) self.scroller.hide() gtk.VBox.__init__(self) self.pack_start(scr_win,expand=True,fill=True) self.pack_start(self.scroller,expand=False,fill=True) self.pack_end(self.context_box,expand=False,fill=True)
def read_annotation_file(filename): """Loads a file annotated for co-references with brat.""" annotations = Annotations(filename, read_only=True) entities = {entity.id: entity for entity in annotations.get_entities() if entity.type in ('Actor', 'Location')} relations = defaultdict(list) for relation in annotations.get_relations(): relations[relation.arg1].append(relation.arg2) chains = resolve(relations, entities) actors, locations = [], [] for chain in chains: chain = [(entity, entities[entity].tail.strip(), entities[entity].get_start(), entities[entity].get_end()) for entity in chain] entity_type = entities[chain[0][0]].type if entity_type == 'Actor': actors.append(chain) elif entity_type == 'Location': locations.append(chain) return actors, locations, entities
def convert(path,doc): #path is path to the file without extension projectconf = ProjectConfiguration(path) path = path_join(path,doc) ann = Annotations(path+".ann") doc = build_text_structure(ann,path+".txt") add_relations(doc,ann) add_comments(doc,ann) #~ ent_set=xml(build_entity_set(doc)) #~ rel_set=xml(build_relations_set(doc)) #~ temp=open ("entiteit_set.xml",'w') #~ temp.write(ent_set) #~ temp.close() #~ rel=open ("relation_set.xml",'w') #~ rel.write(rel_set) #~ rel.close() doc.save(path+".xml")
def convert(data, src): # Fail early if we don't have a converter try: conv_text, conv_ann = CONV_BY_SRC[src] except KeyError: raise InvalidSrcFormat # Note: Due to a lack of refactoring we need to write to disk to read # annotions, once this is fixed, the below code needs some clean-up tmp_dir = None try: tmp_dir = mkdtemp() doc_base = path_join(tmp_dir, 'tmp') with open_textfile(doc_base + '.txt', 'w') as txt_file: txt_file.write(conv_text(data)) with open(doc_base + '.ann', 'w'): pass with Annotations(doc_base) as ann_obj: for ann in conv_ann(data): ann_obj.add_annotation(ann) json_dic = _document_json_dict(doc_base) # Note: Blank the comments, they rarely do anything good but whine # about configuration when we use the tool solely for visualisation # purposes json_dic['comments'] = [] # Note: This is an ugly hack... we want to ride along with the # Stanford tokenisation and sentence splits when returning their # output rather than relying on the ones generated by brat. if src.startswith('stanford-'): json_dic['token_offsets'] = stanford_token_offsets(data) json_dic['sentence_offsets'] = stanford_sentence_offsets(data) return json_dic finally: if tmp_dir is not None: rmtree(tmp_dir)
def main(args): # Imports are here so we don't need to wait for them to load unecessarily. import os import tkinter from video import Video from annotation import Annotations from state import State import gui # Parameters video_file_path = args.video_file_path annotation_file_path = args.annotation_file_path if annotation_file_path is None: # Expect the following dir structure: # dataset/ # - videos/ # - annotations/ split_path = os.path.split(video_file_path) annotation_file_name = split_path[-1].split('.')[0] + '.pkl' annotation_file_dir = list(split_path[:-1]) + ['..', 'annotations'] annotation_file_dir = os.path.join(*annotation_file_dir) if not os.path.isdir(annotation_file_dir): print('Invalid directory structure.') return annotation_file_path = os.path.join(annotation_file_dir, annotation_file_name) # Load Video video = Video(video_file_path) annotations = Annotations(annotation_file_path, video) state = State(video, annotations) # Create GUI gui.App(tkinter.Tk(), state) # When everything done, release the video capture object video.close()
generate = True # "header" and types stat_types = [("Entities", "int"), ("Relations", "int"), ("Events", "int")] if options_get_validation(directory) != 'none': stat_types.append(("Issues", "int")) if generate: # Generate the document statistics from scratch from annotation import JOINED_ANN_FILE_SUFF log_info('generating statistics for "%s"' % directory) docstats = [] for docname in base_names: try: with Annotations(path_join(directory, docname), read_only=True) as ann_obj: tb_count = len([a for a in ann_obj.get_entities()]) rel_count = (len([a for a in ann_obj.get_relations()]) + len([a for a in ann_obj.get_equivs()])) event_count = len([a for a in ann_obj.get_events()]) if options_get_validation(directory) == 'none': docstats.append([tb_count, rel_count, event_count]) else: # verify and include verification issue count try: from projectconfig import ProjectConfiguration projectconf = ProjectConfiguration(directory) from verify_annotations import verify_annotation issues = verify_annotation(ann_obj, projectconf) issue_count = len(issues)
def get_statistics(directory, base_names, use_cache=True): # Check if we have a cache of the costly satistics generation # Also, only use it if no file is newer than the cache itself cache_file_path = get_stat_cache_by_dir(directory) try: cache_mtime = getmtime(cache_file_path) except OSError as e: if e.errno == 2: cache_mtime = -1 else: raise try: if (not isfile(cache_file_path) # Has config.py been changed? or getmtime(get_config_py_path()) > cache_mtime # Any file has changed in the dir since the cache was generated or any(True for f in listdir(directory) if (getmtime(path_join(directory, f)) > cache_mtime # Ignore hidden files and not f.startswith('.'))) # The configuration is newer than the cache or getmtime(get_config_path(directory)) > cache_mtime): generate = True docstats = [] else: generate = False try: with open(cache_file_path, 'rb') as cache_file: docstats = pickle_load(cache_file) if len(docstats) != len(base_names): Messager.warning( 'Stats cache %s was incomplete; regenerating' % cache_file_path) generate = True docstats = [] except UnpicklingError: # Corrupt data, re-generate Messager.warning( 'Stats cache %s was corrupted; regenerating' % cache_file_path, -1) generate = True except EOFError: # Corrupt data, re-generate generate = True except OSError as e: Messager.warning( 'Failed checking file modification times for stats cache check; regenerating' ) generate = True if not use_cache: generate = True # "header" and types stat_types = [("实体", "int"), ("关系", "int"), ("事件", "int")] if options_get_validation(directory) != 'none': stat_types.append(("观点", "int")) stat_types.append(("修改者", "string")) if generate: # Generate the document statistics from scratch from annotation import JOINED_ANN_FILE_SUFF log_info('generating statistics for "%s"' % directory) docstats = [] for docname in base_names: try: # 在这里获取实体,关系,事件,修改者。 with Annotations(path_join(directory, docname), read_only=True) as ann_obj: tb_count = len([a for a in ann_obj.get_entities()]) rel_count = (len([a for a in ann_obj.get_relations()]) + len([a for a in ann_obj.get_equivs()])) event_count = len([a for a in ann_obj.get_events()]) try: user = get_session().get('user') except KeyError: user = None if user is None: user = '******' if options_get_validation(directory) == 'none': docstats.append( [tb_count, rel_count, event_count, user]) else: # verify and include verification issue count try: from projectconfig import ProjectConfiguration projectconf = ProjectConfiguration(directory) from verify_annotations import verify_annotation issues = verify_annotation(ann_obj, projectconf) issue_count = len(issues) except BaseException: # TODO: error reporting issue_count = -1 docstats.append([ tb_count, rel_count, event_count, issue_count, user ]) except Exception as e: log_info('Received "%s" when trying to generate stats' % e) # Pass exceptions silently, just marking stats missing docstats.append([-1] * len(stat_types)) try: user = get_session().get('user') except KeyError: user = None if user is None: user = '******' # Cache the statistics try: with open(cache_file_path, 'wb') as cache_file: pickle_dump(docstats, cache_file) except IOError as e: Messager.warning( "Could not write statistics cache file to directory %s: %s" % (directory, e)) return stat_types, docstats
full_name = temp_paths[1].replace("/", "") temp = open(app_path + full_name, 'wb') pickle_dump(sann, temp) temp.close() except Exception as e: Messager.error("Error while caching changes in the annotation file: " + str(e)) def update_dump(j_dic, file_path): app_path = WORK_DIR + "/application/" temp_paths = file_path.split("/data/") try: full_name = temp_paths[1].replace("/", "") temp = open(app_path + full_name, 'wb') pickle_dump(j_dic, temp) temp.close() except Exception as e: Messager.error("Error while caching changes in the annotation file: " + str(e)) if __name__ == '__main__': millis = int(round(time.time() * 1000)) print millis ann = Annotations("/home/sander/Documents/Masterproef/brat/data/test") sann = SimpleAnnotations(ann) print filter_folia(sann) millis = int(round(time.time() * 1000)) - millis print millis
class CtxAnnotator(gtk.VBox): def __init__(self): self.policy = ScalePolicy() self.displays = [] self.xmax = None self.xmin = None self.annotations = Annotations() self.annotations.connect('context-added',self.add_context_button) self.annotations.connect('context-removed',self.remove_context_button) self.buttons = dict() self.display_box = gtk.VBox() self.context_box = gtk.HBox() self.input_state = InputState(self.annotations) self.input_state.connect('select-selection',self.show_selection_menu) self.input_state.connect('select-annotation',self.show_annotation_menu) add_button = gtk.Button(stock='gtk-add') add_button.connect('clicked',lambda but: self.create_context()) self.context_box.pack_start(add_button,expand=False,fill=True) scr_win = gtk.ScrolledWindow() scr_win.add_with_viewport(self.display_box) scr_win.set_policy(gtk.POLICY_NEVER,gtk.POLICY_AUTOMATIC) self.adjustment = gtk.Adjustment() self.adjustment.connect('value-changed',self.update_pos) self.scroller = gtk.HScrollbar(self.adjustment) self.scroller.hide() gtk.VBox.__init__(self) self.pack_start(scr_win,expand=True,fill=True) self.pack_start(self.scroller,expand=False,fill=True) self.pack_end(self.context_box,expand=False,fill=True) def update_pos(self,adj): self.policy.update_pos(adj.value) for d in self.displays: d.update_zoom(self.policy) def find_annotation(self,x): hits = self.annotations.find_annotation(x) if len(hits) is 0: return None else: return hits[0] def bigger(self): self.policy.biggerx() self.update_zoom() def smaller(self): self.policy.smallerx() self.update_zoom() def update_zoom(self): if self.xmax is None: self.scroller.hide() return max = self.xmax - self.policy.get_window() self.adjustment.lower = self.xmin self.adjustment.upper = max if max < self.adjustment.value: self.adjustment.value = max if self.xmin > self.adjustment.value: self.adjustment.value = self.xmin if self.xmin < max: self.scroller.show() else: self.scroller.hide() self.adjustment.step_increment = self.policy.get_steps() self.adjustment.page_increment = self.policy.get_pages() self.adjustment.changed() for d in self.displays: d.update_zoom(self.policy) def recalculate(self): xmin = None xmax = None for d in self.displays: min,max = d.src.get_time_bounds() if xmin is None or min < xmin: xmin = min if xmax is None or max > xmax: xmax = max ann_l,ann_r = self.annotations.bounds() if ann_l is not None and ann_l < xmin: xmin = ann_l if ann_r is not None and ann_r > xmax: xmax = ann_r self.xmin = xmin self.xmax = xmax if xmin is not None: self.policy.update_min(xmin) self.update_zoom() def add_source(self,src): disp = Display(src,self.annotations,self.input_state) self.displays.append(disp) frame = gtk.Table(3,2) cont = gtk.Frame() cont.set_shadow_type(gtk.SHADOW_ETCHED_OUT) cont.add(disp) frame.attach(cont,0,1,1,3,gtk.EXPAND|gtk.FILL,gtk.EXPAND|gtk.FILL) lbl = gtk.Label() lbl.set_markup("<b>"+src.get_name()+"</b>") lbl.set_alignment(0.0,0.5) frame.attach(lbl,0,1,0,1,gtk.EXPAND|gtk.FILL,gtk.SHRINK|gtk.FILL) rem_but = gtk.Button() rem_but.set_image(gtk.image_new_from_stock(gtk.STOCK_DELETE,gtk.ICON_SIZE_MENU)) rem_but.connect('clicked',self.remove_source_handler,frame,disp) frame.attach(rem_but,1,2,1,2,gtk.SHRINK|gtk.FILL,gtk.SHRINK|gtk.FILL) frame.attach(gtk.VBox(),1,2,2,3,gtk.SHRINK,gtk.EXPAND) frame.show_all() self.display_box.pack_start(frame,expand=True,fill=True) self.recalculate() def remove_source_handler(self,but,frame,display): self.display_box.remove(frame) self.displays.remove(display) frame.destroy() self.recalculate() def add_context(self,name): self.annotations.add_context(name) def add_context_button(self,model,name,color): but = ContextButton(name,color,self) but.show_all() self.context_box.pack_start(but,expand=False,fill=True) self.buttons[name] = but def remove_context_button(self,model,name): but = self.buttons[name] self.context_box.remove(but) del self.buttons[name] def remove_context(self,name): self.annotations.remove_context(name) def create_context(self): dialog = gtk.MessageDialog(None, gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT, gtk.MESSAGE_QUESTION, gtk.BUTTONS_OK, None) dialog.set_markup(_("Please enter the <b>name</b> of the context")) entry = gtk.Entry() entry.connect("activate", lambda wid: dialog.response(gtk.RESPONSE_OK)) dialog.vbox.pack_end(entry,expand=True,fill=True) dialog.show_all() dialog.run() self.add_context(entry.get_text()) dialog.destroy() def add_annotation(self,name,start,end): self.annotations.add_annotation(name,start,end) def create_annotation(self,name): if self.input_state.selection is not None: (start,end) = self.input_state.selection self.add_annotation(name,start,end) def show_selection_menu(self,state,display,boundl,boundr,time): menu = SelectionMenu(self,display) menu.show_all() menu.popup(None,None,None,3,time) def show_annotation_menu(self,state,display,id,time): menu = AnnotationMenu(self,id) menu.show_all() menu.popup(None,None,None,3,time) def remove_annotation(self,id): self.annotations.remove_annotation(id) def write_out(self,fn): pkg = AnnPkg([(disp.src,None) for disp in self.displays], [ann for ann in self.annotations]) pkg.write(fn) def read_in(self,fn): pkg = AnnPkg.load(fn) for (name,start,end) in pkg.annotations: self.annotations.add_annotation(name,start,end) for (src,anns) in pkg.sources: if src is not None: self.add_source(src) def export(self,fn,cb=None,end_cb=None): pkg = AnnPkg([(disp.src,None) for disp in self.displays], [ann for ann in self.annotations]) pkg.export(fn,cb,end_cb) def importer(self,fn): pkg = import_file(fn) for (name,start,end) in pkg.annotations: self.annotations.add_annotation(name,start,end) for (src,anns) in pkg.sources: if src is not None: self.add_source(src) def read_annotations(self,fn): try: self.annotations.read(fn) except Exception as e: warning = gtk.MessageDialog(type=gtk.MESSAGE_ERROR, buttons=gtk.BUTTONS_OK, message_format=str(e)) warning.run() warning.destroy()
temp.append(i) ids.add(i) if isinstance(temp_ann, BinaryRelationAnnotation): for i in temp_ann.get_deps()[1]: if not i in ids: temp.append(i) ids.add(i) for i in temp: temp_ann = ann.get_ann_by_id(i) if isinstance(temp_ann, TextBoundAnnotation): recursive_ann(temp_ann, con, ids, ann) if __name__ == "__main__": from annotation import TextBoundAnnotation, TextAnnotations, EventAnnotation, BinaryRelationAnnotation proj = ProjectConfiguration( "/home/sander/Documents/Masterproef/brat/data/brat_vb/sentiment") ann = Annotations( "/home/sander/Documents/Masterproef/brat/data/brat_vb/sentiment/sporza" ) #SPEED TEST import time millis = int(round(time.time() * 1000)) print millis vrules = ValidationRules(proj) for i in vrules.validate(ann)[0]: print str(i) millis = int(round(time.time() * 1000)) - millis print millis