Example #1
0
def compare(path,doc):
    convert(path,doc)
    ann = Annotations(path+doc)
    fdoc = folia.Document(file=path+doc+".xml")
    #test entities
    for ent in ann.get_textbounds():
        try:
            found=fdoc[ent.id]
            text = [str(a) for a in found.wrefs()]        
            if ent.tail.strip() != " ".join(text):
                print "error: not found entity"
                print ent
                return False
        except KeyError:
            print "error: not found entity"
            print ent
            return False
    #test relations
    for rel in ann.get_relations():
        try:
            found=fdoc[rel.id]
            arefs = found.select(folia.AlignReference)
            if  not (any(a.id == rel.arg1 for a in arefs) and any(a.id == rel.arg2 for a in arefs)):
                print "error: not found relation"
                print rel
                return False
        except KeyError:
            print "error: not found relation"
            print rel
            return False
    #test events
    for event in ann.get_events():
        try:
            found=fdoc[event.id]
            arefs = found.select(folia.AlignReference)
            for role,rid in event.args:
                if  not any(a.id == rid for a in arefs) :
                    print "error: not found relation"
                    print rel
                    return False
        except KeyError:
            print "error: not found relation"
            print rel
            return False
    #test attributes
    for attr in ann.get_attributes():
        try:
            found=fdoc[attr.target]
            if  not any(fattr.cls == str(attr.value) and fattr.subset == attr.type for fattr in found.select(folia.Feature)) :
                print "error: not found attr"
                print attr
                print 
                return False
        except KeyError:
            print "error: not found attr"
            print rel
            return False
        
    print "file "+path+doc+" is OK"
    return True
Example #2
0
    def __init__(self):
        self.policy = ScalePolicy()
        self.displays = []
        self.xmax = None
        self.xmin = None
        self.annotations = Annotations()
        self.annotations.connect('context-added',self.add_context_button)
        self.annotations.connect('context-removed',self.remove_context_button)
        self.buttons = dict()

        self.display_box = gtk.VBox()
        self.context_box = gtk.HBox()
        self.input_state = InputState(self.annotations)
        self.input_state.connect('select-selection',self.show_selection_menu)
        self.input_state.connect('select-annotation',self.show_annotation_menu)
        add_button = gtk.Button(stock='gtk-add')
        add_button.connect('clicked',lambda but: self.create_context())
        self.context_box.pack_start(add_button,expand=False,fill=True)

        scr_win = gtk.ScrolledWindow()
        scr_win.add_with_viewport(self.display_box)
        scr_win.set_policy(gtk.POLICY_NEVER,gtk.POLICY_AUTOMATIC)

        self.adjustment = gtk.Adjustment()
        self.adjustment.connect('value-changed',self.update_pos)
        self.scroller = gtk.HScrollbar(self.adjustment)
        self.scroller.hide()
        gtk.VBox.__init__(self)
        self.pack_start(scr_win,expand=True,fill=True)
        self.pack_start(self.scroller,expand=False,fill=True)
        self.pack_end(self.context_box,expand=False,fill=True)
Example #3
0
def read_annotation_file(filename):
    """Loads a file annotated for co-references with brat."""
    annotations = Annotations(filename, read_only=True)
    entities = {entity.id: entity for entity in annotations.get_entities()
                if entity.type in ('Actor', 'Location')}
    relations = defaultdict(list)
    for relation in annotations.get_relations():
        relations[relation.arg1].append(relation.arg2)
    chains = resolve(relations, entities)
    actors, locations = [], []
    for chain in chains:
        chain = [(entity, entities[entity].tail.strip(),
                          entities[entity].get_start(),
                          entities[entity].get_end()) for entity in chain]
        entity_type = entities[chain[0][0]].type
        if entity_type == 'Actor':
            actors.append(chain)
        elif entity_type == 'Location':
            locations.append(chain)
    return actors, locations, entities
Example #4
0
def compare(path,doc):
    convert(path,doc)
    ann = Annotations(path+doc)
    fdoc = folia.Document(file=path+doc+".xml")
    #test entities
    for ent in ann.get_textbounds():
        try:
            found=fdoc[ent.id]
            text = [str(a) for a in found.wrefs()]
            if ent.tail.strip() != " ".join(text):
                print "error: not found entity"
                print ent
                return False
        except KeyError:
            print "error: not found entity"
            print ent
            return False
    #test relations
    for rel in ann.get_relations():
        try:
            found=fdoc[rel.id]
            arefs = found.select(folia.AlignReference)
            if  not (any(a.id == rel.arg1 for a in arefs) and any(a.id == rel.arg2 for a in arefs)):
                print "error: not found relation"
                print rel
                return False
        except KeyError:
            print "error: not found relation"
            print rel
            return False
    #test events
    for event in ann.get_events():
        try:
            found=fdoc[event.id]
            arefs = found.select(folia.AlignReference)
            for role,rid in event.args:
                if  not any(a.id == rid for a in arefs) :
                    print "error: not found relation"
                    print rel
                    return False
        except KeyError:
            print "error: not found relation"
            print rel
            return False
    #test attributes
    for attr in ann.get_attributes():
        try:
            found=fdoc[attr.target]
            if  not any(fattr.cls == str(attr.value) and fattr.subset == attr.type for fattr in found.select(folia.Feature)) :
                print "error: not found attr"
                print attr
                print
                return False
        except KeyError:
            print "error: not found attr"
            print rel
            return False

    print "file "+path+doc+" is OK"
    return True
Example #5
0
def convert(path,doc):
    #path is path to the file without extension
    projectconf = ProjectConfiguration(path)
    path = path_join(path,doc)
    ann = Annotations(path+".ann")
    doc = build_text_structure(ann,path+".txt")
    add_relations(doc,ann)
    add_comments(doc,ann)
    #~ ent_set=xml(build_entity_set(doc))
    #~ rel_set=xml(build_relations_set(doc))
    #~ temp=open ("entiteit_set.xml",'w')
    #~ temp.write(ent_set)
    #~ temp.close()
    #~ rel=open ("relation_set.xml",'w')
    #~ rel.write(rel_set)
    #~ rel.close()
    doc.save(path+".xml")
Example #6
0
def convert(data, src):
    # Fail early if we don't have a converter
    try:
        conv_text, conv_ann = CONV_BY_SRC[src]
    except KeyError:
        raise InvalidSrcFormat

    # Note: Due to a lack of refactoring we need to write to disk to read
    #   annotions, once this is fixed, the below code needs some clean-up
    tmp_dir = None
    try:
        tmp_dir = mkdtemp()
        doc_base = path_join(tmp_dir, 'tmp')
        with open_textfile(doc_base + '.txt', 'w') as txt_file:
            txt_file.write(conv_text(data))
        with open(doc_base + '.ann', 'w'):
            pass

        with Annotations(doc_base) as ann_obj:
            for ann in conv_ann(data):
                ann_obj.add_annotation(ann)

        json_dic = _document_json_dict(doc_base)
        # Note: Blank the comments, they rarely do anything good but whine
        #   about configuration when we use the tool solely for visualisation
        #   purposes
        json_dic['comments'] = []

        # Note: This is an ugly hack... we want to ride along with the
        #   Stanford tokenisation and sentence splits when returning their
        #   output rather than relying on the ones generated by brat.
        if src.startswith('stanford-'):
            json_dic['token_offsets'] = stanford_token_offsets(data)
            json_dic['sentence_offsets'] = stanford_sentence_offsets(data)

        return json_dic
    finally:
        if tmp_dir is not None:
            rmtree(tmp_dir)
Example #7
0
def main(args):
    # Imports are here so we don't need to wait for them to load unecessarily.
    import os
    import tkinter

    from video import Video
    from annotation import Annotations
    from state import State
    import gui

    # Parameters
    video_file_path = args.video_file_path
    annotation_file_path = args.annotation_file_path

    if annotation_file_path is None:
        # Expect the following dir structure:
        # dataset/
        # - videos/
        # - annotations/
        split_path = os.path.split(video_file_path)
        annotation_file_name = split_path[-1].split('.')[0] + '.pkl'
        annotation_file_dir = list(split_path[:-1]) + ['..', 'annotations']
        annotation_file_dir = os.path.join(*annotation_file_dir)
        if not os.path.isdir(annotation_file_dir):
            print('Invalid directory structure.')
            return
        annotation_file_path = os.path.join(annotation_file_dir,
                                            annotation_file_name)

    # Load Video
    video = Video(video_file_path)
    annotations = Annotations(annotation_file_path, video)
    state = State(video, annotations)

    # Create GUI
    gui.App(tkinter.Tk(), state)

    # When everything done, release the video capture object
    video.close()
Example #8
0
        generate = True

    # "header" and types
    stat_types = [("Entities", "int"), ("Relations", "int"), ("Events", "int")]

    if options_get_validation(directory) != 'none':
        stat_types.append(("Issues", "int"))

    if generate:
        # Generate the document statistics from scratch
        from annotation import JOINED_ANN_FILE_SUFF
        log_info('generating statistics for "%s"' % directory)
        docstats = []
        for docname in base_names:
            try:
                with Annotations(path_join(directory, docname),
                                 read_only=True) as ann_obj:
                    tb_count = len([a for a in ann_obj.get_entities()])
                    rel_count = (len([a for a in ann_obj.get_relations()]) +
                                 len([a for a in ann_obj.get_equivs()]))
                    event_count = len([a for a in ann_obj.get_events()])

                    if options_get_validation(directory) == 'none':
                        docstats.append([tb_count, rel_count, event_count])
                    else:
                        # verify and include verification issue count
                        try:
                            from projectconfig import ProjectConfiguration
                            projectconf = ProjectConfiguration(directory)
                            from verify_annotations import verify_annotation
                            issues = verify_annotation(ann_obj, projectconf)
                            issue_count = len(issues)
Example #9
0
def get_statistics(directory, base_names, use_cache=True):
    # Check if we have a cache of the costly satistics generation
    # Also, only use it if no file is newer than the cache itself
    cache_file_path = get_stat_cache_by_dir(directory)

    try:
        cache_mtime = getmtime(cache_file_path)
    except OSError as e:
        if e.errno == 2:
            cache_mtime = -1
        else:
            raise

    try:
        if (not isfile(cache_file_path)
                # Has config.py been changed?
                or getmtime(get_config_py_path()) > cache_mtime
                # Any file has changed in the dir since the cache was generated
                or any(True for f in listdir(directory)
                       if (getmtime(path_join(directory, f)) > cache_mtime
                           # Ignore hidden files
                           and not f.startswith('.')))
                # The configuration is newer than the cache
                or getmtime(get_config_path(directory)) > cache_mtime):
            generate = True
            docstats = []
        else:
            generate = False
            try:
                with open(cache_file_path, 'rb') as cache_file:
                    docstats = pickle_load(cache_file)
                if len(docstats) != len(base_names):
                    Messager.warning(
                        'Stats cache %s was incomplete; regenerating' %
                        cache_file_path)
                    generate = True
                    docstats = []
            except UnpicklingError:
                # Corrupt data, re-generate
                Messager.warning(
                    'Stats cache %s was corrupted; regenerating' %
                    cache_file_path, -1)
                generate = True
            except EOFError:
                # Corrupt data, re-generate
                generate = True
    except OSError as e:
        Messager.warning(
            'Failed checking file modification times for stats cache check; regenerating'
        )
        generate = True

    if not use_cache:
        generate = True

    # "header" and types
    stat_types = [("实体", "int"), ("关系", "int"), ("事件", "int")]

    if options_get_validation(directory) != 'none':
        stat_types.append(("观点", "int"))

    stat_types.append(("修改者", "string"))
    if generate:
        # Generate the document statistics from scratch
        from annotation import JOINED_ANN_FILE_SUFF
        log_info('generating statistics for "%s"' % directory)
        docstats = []
        for docname in base_names:
            try:
                # 在这里获取实体,关系,事件,修改者。
                with Annotations(path_join(directory, docname),
                                 read_only=True) as ann_obj:
                    tb_count = len([a for a in ann_obj.get_entities()])
                    rel_count = (len([a for a in ann_obj.get_relations()]) +
                                 len([a for a in ann_obj.get_equivs()]))
                    event_count = len([a for a in ann_obj.get_events()])

                    try:
                        user = get_session().get('user')
                    except KeyError:
                        user = None

                    if user is None:
                        user = '******'

                    if options_get_validation(directory) == 'none':
                        docstats.append(
                            [tb_count, rel_count, event_count, user])
                    else:
                        # verify and include verification issue count
                        try:
                            from projectconfig import ProjectConfiguration
                            projectconf = ProjectConfiguration(directory)
                            from verify_annotations import verify_annotation
                            issues = verify_annotation(ann_obj, projectconf)
                            issue_count = len(issues)
                        except BaseException:
                            # TODO: error reporting
                            issue_count = -1
                        docstats.append([
                            tb_count, rel_count, event_count, issue_count, user
                        ])
            except Exception as e:
                log_info('Received "%s" when trying to generate stats' % e)
                # Pass exceptions silently, just marking stats missing
                docstats.append([-1] * len(stat_types))

        try:
            user = get_session().get('user')
        except KeyError:
            user = None
        if user is None:
            user = '******'
        # Cache the statistics
        try:
            with open(cache_file_path, 'wb') as cache_file:
                pickle_dump(docstats, cache_file)
        except IOError as e:
            Messager.warning(
                "Could not write statistics cache file to directory %s: %s" %
                (directory, e))

    return stat_types, docstats
Example #10
0
        full_name = temp_paths[1].replace("/", "")
        temp = open(app_path + full_name, 'wb')
        pickle_dump(sann, temp)
        temp.close()
    except Exception as e:
        Messager.error("Error while caching changes in the annotation file: " +
                       str(e))


def update_dump(j_dic, file_path):
    app_path = WORK_DIR + "/application/"
    temp_paths = file_path.split("/data/")
    try:
        full_name = temp_paths[1].replace("/", "")
        temp = open(app_path + full_name, 'wb')
        pickle_dump(j_dic, temp)
        temp.close()
    except Exception as e:
        Messager.error("Error while caching changes in the annotation file: " +
                       str(e))


if __name__ == '__main__':
    millis = int(round(time.time() * 1000))
    print millis
    ann = Annotations("/home/sander/Documents/Masterproef/brat/data/test")
    sann = SimpleAnnotations(ann)
    print filter_folia(sann)
    millis = int(round(time.time() * 1000)) - millis
    print millis
Example #11
0
class CtxAnnotator(gtk.VBox):
    def __init__(self):
        self.policy = ScalePolicy()
        self.displays = []
        self.xmax = None
        self.xmin = None
        self.annotations = Annotations()
        self.annotations.connect('context-added',self.add_context_button)
        self.annotations.connect('context-removed',self.remove_context_button)
        self.buttons = dict()

        self.display_box = gtk.VBox()
        self.context_box = gtk.HBox()
        self.input_state = InputState(self.annotations)
        self.input_state.connect('select-selection',self.show_selection_menu)
        self.input_state.connect('select-annotation',self.show_annotation_menu)
        add_button = gtk.Button(stock='gtk-add')
        add_button.connect('clicked',lambda but: self.create_context())
        self.context_box.pack_start(add_button,expand=False,fill=True)

        scr_win = gtk.ScrolledWindow()
        scr_win.add_with_viewport(self.display_box)
        scr_win.set_policy(gtk.POLICY_NEVER,gtk.POLICY_AUTOMATIC)

        self.adjustment = gtk.Adjustment()
        self.adjustment.connect('value-changed',self.update_pos)
        self.scroller = gtk.HScrollbar(self.adjustment)
        self.scroller.hide()
        gtk.VBox.__init__(self)
        self.pack_start(scr_win,expand=True,fill=True)
        self.pack_start(self.scroller,expand=False,fill=True)
        self.pack_end(self.context_box,expand=False,fill=True)
    def update_pos(self,adj):
        self.policy.update_pos(adj.value)
        for d in self.displays:
            d.update_zoom(self.policy)
    def find_annotation(self,x):
        hits = self.annotations.find_annotation(x)
        if len(hits) is 0:
            return None
        else:
            return hits[0]
    def bigger(self):
        self.policy.biggerx()
        self.update_zoom()
    def smaller(self):
        self.policy.smallerx()
        self.update_zoom()
    def update_zoom(self):
        if self.xmax is None:
            self.scroller.hide()
            return
        max = self.xmax - self.policy.get_window()
        self.adjustment.lower = self.xmin
        self.adjustment.upper = max
        if max < self.adjustment.value:
            self.adjustment.value = max
        if self.xmin > self.adjustment.value:
            self.adjustment.value = self.xmin
        if self.xmin < max:
            self.scroller.show()
        else:
            self.scroller.hide()
        self.adjustment.step_increment = self.policy.get_steps()
        self.adjustment.page_increment = self.policy.get_pages()
        self.adjustment.changed()
        for d in self.displays:
            d.update_zoom(self.policy)
            
    def recalculate(self):
        xmin = None
        xmax = None
        for d in self.displays:
            min,max = d.src.get_time_bounds()
            if xmin is None or min < xmin:
                xmin = min
            if xmax is None or max > xmax:
                xmax = max
        ann_l,ann_r = self.annotations.bounds()
        if ann_l is not None and ann_l < xmin:
            xmin = ann_l
        if ann_r is not None and ann_r > xmax:
            xmax = ann_r
        self.xmin = xmin
        self.xmax = xmax
        if xmin is not None:
            self.policy.update_min(xmin)
        self.update_zoom()

    def add_source(self,src):
        disp = Display(src,self.annotations,self.input_state)
        self.displays.append(disp)
        frame = gtk.Table(3,2)
        cont = gtk.Frame()
        cont.set_shadow_type(gtk.SHADOW_ETCHED_OUT)
        cont.add(disp)
        frame.attach(cont,0,1,1,3,gtk.EXPAND|gtk.FILL,gtk.EXPAND|gtk.FILL)
        lbl = gtk.Label()
        lbl.set_markup("<b>"+src.get_name()+"</b>")
        lbl.set_alignment(0.0,0.5)
        frame.attach(lbl,0,1,0,1,gtk.EXPAND|gtk.FILL,gtk.SHRINK|gtk.FILL)
        rem_but = gtk.Button()
        rem_but.set_image(gtk.image_new_from_stock(gtk.STOCK_DELETE,gtk.ICON_SIZE_MENU))
        rem_but.connect('clicked',self.remove_source_handler,frame,disp)
        frame.attach(rem_but,1,2,1,2,gtk.SHRINK|gtk.FILL,gtk.SHRINK|gtk.FILL)
        frame.attach(gtk.VBox(),1,2,2,3,gtk.SHRINK,gtk.EXPAND)
        frame.show_all()
        self.display_box.pack_start(frame,expand=True,fill=True)
        self.recalculate()
    def remove_source_handler(self,but,frame,display):
        self.display_box.remove(frame)
        self.displays.remove(display)
        frame.destroy()
        self.recalculate()
    def add_context(self,name):
        self.annotations.add_context(name)
    def add_context_button(self,model,name,color):
        but = ContextButton(name,color,self)
        but.show_all()
        self.context_box.pack_start(but,expand=False,fill=True)
        self.buttons[name] = but
    def remove_context_button(self,model,name):
        but = self.buttons[name]
        self.context_box.remove(but)
        del self.buttons[name]
    def remove_context(self,name):
        self.annotations.remove_context(name)
    def create_context(self):
        dialog = gtk.MessageDialog(None,
                                   gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT,
                                   gtk.MESSAGE_QUESTION,
                                   gtk.BUTTONS_OK, None)
        dialog.set_markup(_("Please enter the <b>name</b> of the context"))
        entry = gtk.Entry()
        entry.connect("activate", lambda wid: dialog.response(gtk.RESPONSE_OK))
        dialog.vbox.pack_end(entry,expand=True,fill=True)
        dialog.show_all()
        dialog.run()
        self.add_context(entry.get_text())
        dialog.destroy()
    def add_annotation(self,name,start,end):
        self.annotations.add_annotation(name,start,end)
    def create_annotation(self,name):
        if self.input_state.selection is not None:
            (start,end) = self.input_state.selection
            self.add_annotation(name,start,end)
    def show_selection_menu(self,state,display,boundl,boundr,time):
        menu = SelectionMenu(self,display)
        menu.show_all()
        menu.popup(None,None,None,3,time)
    def show_annotation_menu(self,state,display,id,time):
        menu = AnnotationMenu(self,id)
        menu.show_all()
        menu.popup(None,None,None,3,time)
    def remove_annotation(self,id):
        self.annotations.remove_annotation(id)
    def write_out(self,fn):
        pkg = AnnPkg([(disp.src,None) for disp in self.displays],
                     [ann for ann in self.annotations])
        pkg.write(fn)
            
    def read_in(self,fn):
        pkg = AnnPkg.load(fn)
        for (name,start,end) in pkg.annotations:
            self.annotations.add_annotation(name,start,end)
        for (src,anns) in pkg.sources:
            if src is not None:
                self.add_source(src)
    def export(self,fn,cb=None,end_cb=None):
        pkg = AnnPkg([(disp.src,None) for disp in self.displays],
                     [ann for ann in self.annotations])
        pkg.export(fn,cb,end_cb)
    def importer(self,fn):
        pkg = import_file(fn)
        for (name,start,end) in pkg.annotations:
            self.annotations.add_annotation(name,start,end)
        for (src,anns) in pkg.sources:
            if src is not None:
                self.add_source(src)
    def read_annotations(self,fn):
        try:
            self.annotations.read(fn)
        except Exception as e:
            warning = gtk.MessageDialog(type=gtk.MESSAGE_ERROR,
                                        buttons=gtk.BUTTONS_OK,
                                        message_format=str(e))
            warning.run()
            warning.destroy()
Example #12
0
                    temp.append(i)
                    ids.add(i)
        if isinstance(temp_ann, BinaryRelationAnnotation):
            for i in temp_ann.get_deps()[1]:
                if not i in ids:
                    temp.append(i)
                    ids.add(i)
    for i in temp:
        temp_ann = ann.get_ann_by_id(i)
        if isinstance(temp_ann, TextBoundAnnotation):
            recursive_ann(temp_ann, con, ids, ann)


if __name__ == "__main__":
    from annotation import TextBoundAnnotation, TextAnnotations, EventAnnotation, BinaryRelationAnnotation
    proj = ProjectConfiguration(
        "/home/sander/Documents/Masterproef/brat/data/brat_vb/sentiment")
    ann = Annotations(
        "/home/sander/Documents/Masterproef/brat/data/brat_vb/sentiment/sporza"
    )

    #SPEED TEST
    import time
    millis = int(round(time.time() * 1000))
    print millis
    vrules = ValidationRules(proj)
    for i in vrules.validate(ann)[0]:
        print str(i)
    millis = int(round(time.time() * 1000)) - millis
    print millis