def build_gold_annotations(self):
        """ Merge annotations, save as gold annotations.
            Take union of all extractions, discard mismatched attributions
        """

        # Merge annotations from annotators
        self.gold_fic_annotations = {}
        for fandom_fname in sorted(self.fandom_fnames):
            self.build_fic_gold_annotations(
                fandom_fname
            )  # saves to self.gold_fic_annotations[fandom_fname]

        # Save out
        for fandom_fname, annotations in sorted(
                self.gold_fic_annotations.items()):
            if self.span_type == 'coref':
                gold_annotations = Annotation(self.annotations_dirpath,
                                              fandom_fname,
                                              file_ext='_entity_clusters.csv')
            elif self.span_type == 'quotes':
                gold_annotations = Annotation(
                    self.annotations_dirpath,
                    fandom_fname,
                    file_ext='_quote_attribution.csv')
            gold_annotations.save_annotated_spans(annotations)
    def evaluate_quotes(self, fandom_fname, fic_representation, save=True, exact_match=True):
        """ Evaluate quotes for a fic.
            Args:
                save: save AnnotatedSpan quote objects in a pickled file in a tmp directory
        """
        # Quote extraction evaluation
        # Load gold quote spans
        gold = Annotation(self.quote_settings.gold_dirpath, fandom_fname, file_ext=self.quote_settings.gold_ext, fic_csv_dirpath=self.fic_csv_dirpath)
        gold.extract_annotated_spans()

        # Load predicted quote spans
        fic_representation.extract_quotes(
            save_dirpath=self.quote_settings.preds_outpath, 
            coref_from=self.coref_from)

        # Get scores
        quote_scores, quote_groups = scorer.quote_scores(fic_representation.quotes, gold.annotations, exact_match=exact_match)
        print('\tQuote extraction results:')
        for key in ['extraction_f1', 'extraction_precision', 'extraction_recall']:
            print(f'\t\t{key}: {quote_scores[key]: .2%}')
        print('\tQuote attribution results:')
        for key in ['attribution_f1', 'attribution_precision', 'attribution_recall']:
            print(f'\t\t{key}: {quote_scores[key]: .2%}')
        print()
        return quote_scores, quote_groups
Exemple #3
0
    def modify_coref_files(self,
                           coref_annotations_dirpath,
                           coref_annotations_ext,
                           annotation_type='gold'):
        """ Changes coref tokens to specified external annotations in 
            self.token_data.
            Saves out to {token_output_dirpath}_gold_coref/token_fpath.
            Returns the suffix added to dirpaths.
        """
        # Load externally annotated mentions, place in self.character_mentions
        annotation = Annotation(coref_annotations_dirpath,
                                self.fandom_fname,
                                file_ext=coref_annotations_ext,
                                fic_csv_dirpath=self.fic_csv_dirpath,
                                annotation_type=annotation_type)
        annotation.extract_annotated_spans()
        self.character_mentions = annotation.annotations

        # Modify coref <tags> in CSV
        self.modify_coref_tags(
            annotation.annotations)  # Modifies self.coref_fic

        # Save out
        modify_text = f'_{annotation_type}_coref'
        self.coref_output_dirpath = self.coref_output_dirpath.rstrip(
            '/') + modify_text
        self.save_coref_csv()

        # Modify coref characters file
        self.coref_chars_output_dirpath = self.coref_chars_output_dirpath.rstrip(
            '/') + modify_text
        self.save_characters_file()

        return modify_text
Exemple #4
0
    def modify_quote_spans(self, quote_annotations_dirpath,
                           quote_annotations_ext):
        """ Modifies quote marks so that the pipeline will recognized
            gold quotes as quote spans """
        # Load gold quote extractions
        gold = Annotation(quote_annotations_dirpath,
                          self.fandom_fname,
                          file_ext=quote_annotations_ext,
                          fic_csv_dirpath=self.fic_csv_dirpath)
        gold.extract_annotated_spans()

        # Modify CSV text_tokenized
        self.modify_quote_marks(gold.annotations)  # Modifies self.coref_fic

        # Save out
        modify_text = '_gold_quotes'
        self.coref_output_dirpath = self.coref_output_dirpath.rstrip(
            '/') + modify_text
        self.save_coref_csv()

        # Change characters file path, too
        self.coref_chars_output_dirpath = self.coref_chars_output_dirpath.rstrip(
            '/') + modify_text

        return modify_text
Exemple #5
0
    def __init__(self, name, pnts, channels, scatters=None, notes=None):
        """
        fcmdata(name, pnts, channels, scatters=None)
        name: name of corresponding FCS file minus extension
        pnts: array of data points
        channels: a list of which markers/scatters are on which column of
                    the array.
        scatters: a list of which indexes in channels are scatters

        """
        self.name = name
        self.tree = Tree(pnts, channels)

        #TODO add some default intelligence for determining scatters if None
        self.scatters = scatters
        self.markers = []
        if self.scatters is not None:
            for chan in range(len(channels)):
                if chan in self.scatters:
                    pass
                elif self.tree.root.channels[chan] in self.scatters:
                    pass
                else:
                    self.markers.append(chan)
        if notes == None:
            notes = Annotation()
        self.notes = notes
Exemple #6
0
    def loadLabels(self):
        filename = self.getLabelFilename()
        if not filename:
            self.clearAnnotation()
            return

        # If we have everything and the filename did not change, then we are good
        if self.annotation and filename == self.currentLabelFile:
            return

        # Clear the current labels first
        self.clearAnnotation()

        try:
            self.annotation = Annotation()
            self.annotation.fromJsonFile(filename)
        except IOError as e:
            # This is the error if the file does not exist
            message = "Error parsing labels in {0}. Message: {1}".format( filename, e.strerror )
            self.statusBar().showMessage(message)

        # Remember the filename loaded
        self.currentLabelFile = filename

        # Remeber the status bar message to restore it later
        restoreMessage = self.statusBar().currentMessage()

        # Restore the message
        self.statusBar().showMessage( restoreMessage )
Exemple #7
0
 def __init__(self, name, pnts, channels, scatters=None, notes=None):
     """
     fcmdata(name, pnts, channels, scatters=None)
     name: name of corresponding FCS file minus extension
     pnts: array of data points
     channels: a list of which markers/scatters are on which column of
                 the array.
     scatters: a list of which indexes in channels are scatters
     
     """
     self.name = name
     #        if type(pnts) != type(array([])):
     #            raise BadFCMPointDataTypeError(pnts, "pnts isn't a numpy.array")
     self.tree = Tree(pnts, channels)
     #self.pnts = pnts
     #self.channels = channels
     #TODO add some default intelegence for determining scatters if None
     self.scatters = scatters
     self.markers = []
     if self.scatters is not None:
         for chan in range(len(channels)):
             if chan in self.scatters:
                 pass
             elif self.tree.root.channels[chan] in self.scatters:
                 pass
             else:
                 self.markers.append(chan)
     if notes == None:
         notes = Annotation()
     self.notes = notes
def _createannotationobjects(annotations):
    """
    Create instances of the Annotation class for each of the "T" annotations.

    Input is assumed to only be "T" annotations.

    :param annotations: (dict) dictionary of "T" annotations.
    :return: (OrderedDict) an ordered dictionary of Annotations objects.
    Length of this dictionary should be equal to the input dictionary.
    """
    targets = OrderedDict()

    for key, t in annotations.items():
        splitted = t.split("\t")
        t = splitted[0]
        repr = u" ".join(splitted[1:])

        split = t.split()
        label = split[0]

        spans = [[int(span.split()[0]),
                  int(span.split()[1])]
                 for span in u" ".join(split[1:]).split(";")]

        targets[key] = Annotation(key, repr, spans, [label])

    return targets
    def parse_json(self, filename: str, classes: list) -> List[Annotation]:
        with open(filename, 'r') as json_file:
            json_data = json.load(json_file)
            images = json_data["images"]
            categories = json_data["categories"]
            annotations = []
            for anno in json_data["annotations"]:
                image_id = anno["image_id"]
                cls_id = anno["category_id"]

                for info in images:
                    if info["id"] == image_id:
                        annotation = Annotation(
                            info["file_name"].split(".")[0])
                        annotation.image_size()
                for category in categories:
                    if category["id"] == cls_id:
                        annotation.class_id = category["name"]
                bndbox = {
                    "xmin": anno["bbox"][0],
                    "ymin": anno["bbox"][1],
                    "xmax": anno["bbox"][2] + anno["bbox"][0],
                    "ymax": anno["bbox"][3] + anno["bbox"][1]
                }
                annotation.bbox = (bndbox["xmin"], bndbox["ymin"],
                                   bndbox["xmax"], bndbox["ymax"])
                annotations.append(annotation)
            return annotations
 def set_annotation(self):
     dialog = Annotation(self.singletons, self.base_sets_clustering, self.representatives, self.points_type)
     if dialog.exec_() == QDialog.Accepted:
         self.base_sets_clustering = dialog.base_sets
         self.colorClusters(self.base_sets_clustering)
     else:
         print('Cancelled')
     dialog.deleteLater()
Exemple #11
0
 def __init__(self):
     self.name = 'Building_dataset_{}_{}_{}'.format(datetime.now().year,
                                                    datetime.now().month,
                                                    datetime.now().day)
     self.size = SIZE
     self.json = Annotation()
     self.factory = BuildingFactory()
     self.material_factory = MaterialFactory()
Exemple #12
0
 def load_anno_all(self,wjd):
     if "anno_all" not in wjd: return
     for aID, aset in wjd["anno_all"].items():
         a = Annotation(aID,name=aset['name'],address=aset['address'],tag=aset['tag'],
             desc=aset["desc"],comment=aset["comment"],numfile=aset["numfile"],
             created_at=datetime_parse(  aset["created_at"])
             )
         a.add_dataset_ID(aset["dataset_ID"])
         self.add_annotation(a)
Exemple #13
0
    def create_annotation(self,src,name=None):
        ID = gen_ID(pre="a")
        addr = self.data_root+"/annosets/"+ID

        an = Annotation(ID,name=name,address=addr)

        if(an.populate(src)):
            self.add_annotation(an)
        return ID
    def modify_quote_tokens(self,
                            original_tokenization_dirpath=None,
                            quote_annotations_dirpath=None,
                            quote_annotations_ext=None,
                            change_to='gold'):
        """ Changes quote tokens so BookNLP will recognize them in certain ways.
            Args:
                change_to:
                    'gold': Change to gold quote extractions
                    'match': Replace quotes with smart quotes to match a tokens file done without whitespace tokenization
                    'strict': Change existing BookNLP quotes using a dictionary. Single quotes to ` and ', double quotes to `` and ''
        """
        if change_to == 'gold':
            # Load gold quote extractions
            gold = Annotation(quote_annotations_dirpath,
                              self.fandom_fname,
                              file_ext=quote_annotations_ext,
                              fic_csv_dirpath=self.fic_csv_dirpath)
            gold.extract_annotated_spans()

            # Clear existing quotes, since might have been modified after whitespace tokenization
            self.clear_quotes()

            # Add gold quote spans in
            for span in gold.annotations:
                self.add_quote_span(span)

            # Change output dirpath for later saving (after replace gold coref)
            self.modified_token_output_dirpath = self.modified_token_output_dirpath.rstrip(
                '/') + '_gold_quotes'

        elif change_to == 'match':
            original_tokens = load_tokens_file(
                os.path.join(self.original_tokenization_dirpath,
                             self.fandom_fname + self.token_file_ext))
            self.token_data = match_quotes(original_tokens, self.token_data)

            # Save out
            save_tokens_file(self.token_data, self.modified_token_fpath)

        elif change_to == 'strict':
            quote_changes = {
                "“": "``",
                "”": "''",
            }
            self.token_data['normalizedWord'] = self.token_data[
                'normalizedWord'].map(lambda x: quote_changes.get(x, x))
            self.token_data['lemma'] = self.token_data['lemma'].map(
                lambda x: quote_changes.get(x, x))

            # Save out
            pdb.set_trace()
            self.token_data.to_csv(self.modified_token_fpath,
                                   sep='\t',
                                   quoting=csv.QUOTE_NONE,
                                   index=False)
Exemple #15
0
def get_annotations():
    user_map = get_user_map()
    assignments = get_assignments(user_map)

    annotations = {
        video: Annotation(assignment, video)
        for video, assignment in assignments.items()
    }

    return annotations
Exemple #16
0
    def targets(self, field, keypoint_sets):
        assert self.keypoints is not None
        assert self.skeleton is not None

        annotations = [
            Annotation(keypoints=self.keypoints, skeleton=self.skeleton).set(kps, fixed_score=None)
            for kps in keypoint_sets
        ]

        self._confidences(field[0])
        self._regressions(field[1], field[2], field[3], field[4], annotations=annotations)
Exemple #17
0
	def __init__(self, directory):
		subject_mapping = {}
		with open(configs.MAPPING_FILE) as mappping_file:
			mappping_file = mappping_file.read().strip().split('\n')
			for line in mappping_file:
				id = re.findall(r'^(\d+)\s', line)[0]
				name = line[len(id) + 1:]
				subject_mapping[name] = id

		def getSubject(name):
			return int(subject_mapping[name])

		#
		self.dataset = SingleDataset()

		subject_dirs = fileutils.listdir(directory)
		ann = Annotation()


		prev_subject_id = None
		sjList = None
		for dir in subject_dirs:
			subject_name = fileutils.dirname(dir)
			subject_id = getSubject(subject_name)

			duplicate = None
			for sj in self.dataset.subjects:
				if sj.id == subject_id and sj.name == subject_name:
					duplicate = sj

			##
			if duplicate == None:
				subject = Subject(subject_id, subject_name)
			else:
				subject = duplicate

			if prev_subject_id != subject_id:
				sjList = ann.getSubjectList(subject_id)
			prev_subject_id = subject_id

			for file in fileutils.recursive_walk(dir):
				if fileutils.fileextension(file) == configs.LAYER:
					###
					filename = fileutils.filename(file)
					rc = Record()
					rc.data = blob.load_np_array(file)
					rc.label = int(Annotation.getClass(sjList, filename))
					rc.frame = filename

					subject.records.append(rc)
					print('Loading', subject_id, filename, rc.label)

			if(duplicate == None):
				self.dataset.subjects.append(subject)
Exemple #18
0
    def targets(self, field, *, annotation_dicts):
        assert self.keypoints is not None
        assert self.skeleton is not None

        annotations = [
            Annotation(keypoints=self.keypoints,
                       skeleton=self.skeleton).set(ann['keypoints'],
                                                   fixed_score=None,
                                                   fixed_bbox=ann['bbox'])
            for ann in annotation_dicts
        ]

        self._confidences(field[0])
        self._regressions(field[1], field[2], annotations=annotations)
Exemple #19
0
 def __init__(self, name, fcms=None, notes=None):
     """
     Initialize with fcm collection and notes.
     """
     #  - how is this done in fcmdata?
     self.fcmdict = {}
     self.name = name
     if fcms is not None:
         for fcm in fcms:
             self.fcmdict[fcm.name] = fcm
     if notes is not None:
         self.notes = Annotation()
     else:
         self.notes = notes
 def load_fic_annotations(self, fandom_fname, span_type):
     # Load annotations
     if span_type == 'coref':
         file_ext = 'entity_clusters'
     elif span_type == 'quotes':
         file_ext = 'quote_attribution'
     self.fic_annotations[fandom_fname] = {}
     for annotator in self.annotators:
         self.fic_annotations[fandom_fname][annotator] = Annotation(
             self.annotations_dirpath,
             fandom_fname,
             file_ext=f'_{file_ext}_{annotator}.csv',
             fic_csv_dirpath=self.fic_csv_dirpath)
         self.fic_annotations[fandom_fname][
             annotator].extract_annotated_spans()
Exemple #21
0
def loclabel_gen(ano_path, loc_path, out_path):
    pids = list(map(lambda x: x.strip('.json'), os.listdir(ano_path)))

    annotations = {}
    for pid in pids:
        pid_json_path = os.path.join(ano_path, pid + '.json')
        anno = Annotation()
        anno.from_json(pid_json_path)
        annotations[pid] = anno

    coords = []
    infile = open(loc_path)
    for i, line in enumerate(infile):
        pid, x_center, y_center = line.strip('\n').split(',')
        coords.append((pid, x_center, y_center))
    infile.close()

    num_sample = len(coords)
    print(f"Total sample: {num_sample}")

    outfile = open(out_path, 'w')
    for index in range(num_sample):
        pid, x_center, y_center = coords[index]
        x_center = int(x_center)
        y_center = int(y_center)

        x_top_left = int(x_center - IMG_SIZE / 2)
        y_top_left = int(y_center - IMG_SIZE / 2)

        label = []
        for x_idx in range(3):
            for y_idx in range(3):
                # (x, y) is the center of each patch
                x = x_top_left + int((x_idx + 0.5) * SUB_SIZE)
                y = y_top_left + int((y_idx + 0.5) * SUB_SIZE)
                # get label information according to annotation
                if annotations[pid].inside_polygons((x, y), True):
                    label.append(1)
                else:
                    label.append(0)
                # write output
        outfile.write(
            f"{pid.lower()}, {x_center}, {y_center}, {str(label)[1:-1]}\n")

        if index % 100 == 0:
            print(index)

    outfile.close()
Exemple #22
0
def wrap_annotations(sentences):
    annotations = []
    tid = 0
    for sid, labels in enumerate(sentences):
        for idx, label in enumerate(labels):
            for ann in label.split('#'):
                type = ann[2:]
                if 'B-' in ann:
                    annotations.append(Annotation(type, sid, tid))
                elif 'I-' in ann:
                    for _ann in reversed(annotations):
                        if type == _ann.annotation:
                            _ann.add_id(tid)
                            break
            tid += 1
    return annotations
Exemple #23
0
 def move_mouse(self, event):
     """
     Handles the drawing of the arrow when deciding where to annotate
     """
     if WorldState.Instance().draw_plot:
         if WorldState.Instance().session_dict['click_one']:
             WorldState.Instance(
             ).session_dict['temp_annotation'] = Annotation(
                 WorldState.Instance()._ARROW,
                 (WorldState.Instance().session_dict['click_one_x'],
                  WorldState.Instance().session_dict['click_one_y']),
                 (event.xdata, event.ydata))
         if WorldState.Instance().session_dict['annotate']:
             WorldState.Instance().session_dict['redraw_legend'] = False
             WorldState.Instance().draw_plot.plot()
             WorldState.Instance().session_dict['redraw_legend'] = True
    def load_fic_spans(self, fandom_fname, gold_dirpath, baseline_dirpath,
                       experimental_dirpath, gold_annotations_ext):
        """ Load quote or coref predictions and gold spans for a fic.
            Returns gold_spans, baseline_spans, experimental_spans
        """
        gold_annotation = Annotation(gold_dirpath,
                                     fandom_fname,
                                     file_ext=gold_annotations_ext,
                                     fic_csv_dirpath=self.fic_csv_dirpath)
        gold_annotation.extract_annotated_spans()
        gold_spans = gold_annotation.annotations
        baseline_spans = utils.load_pickle(baseline_dirpath, fandom_fname)
        experimental_spans = utils.load_pickle(experimental_dirpath,
                                               fandom_fname)

        return gold_spans, baseline_spans, experimental_spans
Exemple #25
0
    def to_annotation(self, threshold=-np.inf, posterior=False):
        """

        Parameters
        ----------
        threshold : float, optional
            Each track is annotated with the label with the highest score.
            Yet, if the latter is smaller than `threshold`, label is replaced
            with an `Unknown` instance.
        posterior : bool, optional
            If True, scores are posterior probabilities in open-set
            identification. If top model posterior is higher than unknown
            posterior, it is selected. Otherwise, label is replaced with an
            `Unknown` instance.
        """

        annotation = Annotation(uri=self.uri, modality=self.modality)
        if not self:
            return annotation

        best = self.nbest(1, invert=False)

        if posterior:

            # compute unknown posterior
            func = lambda p: 1. - np.nansum(p, axis=1)
            Pu = self.apply(func, new_columns=['_'])

            # threshold best target posterior
            # with unknown posterior and threshold
            for segment, track, label, value in best.itervalues():

                if value < Pu[segment, track, '_'] or value < threshold:
                    label = Unknown()

                annotation[segment, track] = label

        else:

            # threshold best target score with threshold
            for segment, track, label, value in best.itervalues():
                if value < threshold:
                    label = Unknown()
                annotation[segment, track] = label

        return annotation
Exemple #26
0
    def _load_annotations(self):
        f = open(self.file_name, 'r')
        for line in f:
            line = line.strip()
            if line != "":
                annotations_list = [
                    Annotation(
                        int(begin_index) + 1, int(end_index), time_expression,
                        timex3)
                    for begin_index, end_index, time_expression, timex3 in map(
                        None, *([iter(line.split("\t"))] * 4))
                ]
            else:
                annotations_list = []

            self.annotations.append(annotations_list)
        f.close()
    def __init__(self, dandelion_raw_list):
        self.dandelion_raw_list = dandelion_raw_list
        self.good_annotations = []

        if self.dandelion_raw_list:
            for annotation_dict in self.dandelion_raw_list.get(
                    "annotations", []):
                my_annotation = Annotation(annotation_dict)

                if not my_annotation.suppress:
                    for top_entity in self.dandelion_raw_list.get(
                            "topEntities", []):
                        if my_annotation.uri == top_entity["uri"]:
                            my_annotation.is_top_entity = True
                            my_annotation.top_entity_score = top_entity[
                                "score"]
                    self.good_annotations.append(my_annotation)
Exemple #28
0
    def test_get_annotated(self):

        text = 'Lorem ipsum dolor sit amet. Consectetur adipiscing elit. Sed do eiusmod tempor incididunt.'
        tagged_text = '<p><span>Lorem</span> ipsum dolor sit <span>amet</span>.</p><p>Consectetur adipiscing <span>elit</span>.</p><p>Sed do eiusmod tempor <span>incididunt</span>.</p>'
        lst_annotations = [
            Annotation('sentence', 0, 27),
            Annotation('sentence', 28, 56),
            Annotation('sentence', 57, 90),
            Annotation('word', 22, 26),
            Annotation('word', 51, 55),
            Annotation('word', 79, 89),
            Annotation('word', 0, 5)
        ]

        tagged_text_test = get_annotated(
            Document(text, 'test', lst_annotations))
        self.assertEqual(tagged_text_test, tagged_text)
def dict_to_tf_examples(data):
    global DATA_DIR

    img_name = os.path.join(data['name'] + 'leftImg8bit.png')
    img_path = os.path.join(DATA_DIR, 'leftImg8bit', data['relpath'], img_name)

    annotation = Annotation()
    annotation.fromJsonFile(data['json_path'])
    instanceImg = createInstanceImage(annotation, "trainIds")
    instanceImg.Format = 'PNG'

    with tf.gfile.GFile(img_path, 'rb') as fid:
        image_file = fid.read()
        image_io = io.BytesIO(image_file)
        image = Image.open(image_io)

    splits_divider = FLAGS.splits_divider

    split_width = int(np.ceil(image.width / splits_divider))
    split_width_half = int(np.ceil(split_width / 2))

    split_positions = [i * split_width for i in range(splits_divider - 1)]
    split_positions.append(image.width - split_width)
    split_positions += [
        split_width_half + i * split_width for i in range(splits_divider - 1)
    ]
    # split_positions += [random.randint(10,(image.width-split_width-10)) for i in range(FLAGS.splits_add)]

    examples = []

    for i, pos in enumerate(split_positions):
        box = (pos, 0, pos + split_width, image.height)
        sub_image = image.crop(box)
        sub_instanceImg = instanceImg.crop(box)

        examples.append(
            sub_img_to_tf_example(img_name + '[#' + str(i) + ']', sub_image,
                                  sub_instanceImg))

    return (examples)
Exemple #30
0
 def __init__(self,
              parent=None,
              with_filename=True,
              with_slider=True,
              cache_capacity=500,
              max_fps=0):
     super(VideoWidget, self).__init__(parent)
     self.with_filename = with_filename
     self.with_slider = with_slider
     self.video = Video(cache_capacity=cache_capacity, max_fps=max_fps)
     self.annotation = Annotation()
     self.tube_id = 0
     self.tracker = None
     self.sim_thr = 0.9
     self.init_ui()
     self.installEventFilter(self)
     if self.with_slider:
         self.slider.sliderReleased.connect(self.on_slider_released)
     self.label_frame.bbox_added.connect(self.set_tracker)
     self.label_frame.bbox_deleted.connect(self.del_tracker)
     self.video.frame_updated.connect(self.update_frame)
     self.video.export_progress_updated.connect(self.update_export_progress)