def build_gold_annotations(self): """ Merge annotations, save as gold annotations. Take union of all extractions, discard mismatched attributions """ # Merge annotations from annotators self.gold_fic_annotations = {} for fandom_fname in sorted(self.fandom_fnames): self.build_fic_gold_annotations( fandom_fname ) # saves to self.gold_fic_annotations[fandom_fname] # Save out for fandom_fname, annotations in sorted( self.gold_fic_annotations.items()): if self.span_type == 'coref': gold_annotations = Annotation(self.annotations_dirpath, fandom_fname, file_ext='_entity_clusters.csv') elif self.span_type == 'quotes': gold_annotations = Annotation( self.annotations_dirpath, fandom_fname, file_ext='_quote_attribution.csv') gold_annotations.save_annotated_spans(annotations)
def evaluate_quotes(self, fandom_fname, fic_representation, save=True, exact_match=True): """ Evaluate quotes for a fic. Args: save: save AnnotatedSpan quote objects in a pickled file in a tmp directory """ # Quote extraction evaluation # Load gold quote spans gold = Annotation(self.quote_settings.gold_dirpath, fandom_fname, file_ext=self.quote_settings.gold_ext, fic_csv_dirpath=self.fic_csv_dirpath) gold.extract_annotated_spans() # Load predicted quote spans fic_representation.extract_quotes( save_dirpath=self.quote_settings.preds_outpath, coref_from=self.coref_from) # Get scores quote_scores, quote_groups = scorer.quote_scores(fic_representation.quotes, gold.annotations, exact_match=exact_match) print('\tQuote extraction results:') for key in ['extraction_f1', 'extraction_precision', 'extraction_recall']: print(f'\t\t{key}: {quote_scores[key]: .2%}') print('\tQuote attribution results:') for key in ['attribution_f1', 'attribution_precision', 'attribution_recall']: print(f'\t\t{key}: {quote_scores[key]: .2%}') print() return quote_scores, quote_groups
def modify_coref_files(self, coref_annotations_dirpath, coref_annotations_ext, annotation_type='gold'): """ Changes coref tokens to specified external annotations in self.token_data. Saves out to {token_output_dirpath}_gold_coref/token_fpath. Returns the suffix added to dirpaths. """ # Load externally annotated mentions, place in self.character_mentions annotation = Annotation(coref_annotations_dirpath, self.fandom_fname, file_ext=coref_annotations_ext, fic_csv_dirpath=self.fic_csv_dirpath, annotation_type=annotation_type) annotation.extract_annotated_spans() self.character_mentions = annotation.annotations # Modify coref <tags> in CSV self.modify_coref_tags( annotation.annotations) # Modifies self.coref_fic # Save out modify_text = f'_{annotation_type}_coref' self.coref_output_dirpath = self.coref_output_dirpath.rstrip( '/') + modify_text self.save_coref_csv() # Modify coref characters file self.coref_chars_output_dirpath = self.coref_chars_output_dirpath.rstrip( '/') + modify_text self.save_characters_file() return modify_text
def modify_quote_spans(self, quote_annotations_dirpath, quote_annotations_ext): """ Modifies quote marks so that the pipeline will recognized gold quotes as quote spans """ # Load gold quote extractions gold = Annotation(quote_annotations_dirpath, self.fandom_fname, file_ext=quote_annotations_ext, fic_csv_dirpath=self.fic_csv_dirpath) gold.extract_annotated_spans() # Modify CSV text_tokenized self.modify_quote_marks(gold.annotations) # Modifies self.coref_fic # Save out modify_text = '_gold_quotes' self.coref_output_dirpath = self.coref_output_dirpath.rstrip( '/') + modify_text self.save_coref_csv() # Change characters file path, too self.coref_chars_output_dirpath = self.coref_chars_output_dirpath.rstrip( '/') + modify_text return modify_text
def __init__(self, name, pnts, channels, scatters=None, notes=None): """ fcmdata(name, pnts, channels, scatters=None) name: name of corresponding FCS file minus extension pnts: array of data points channels: a list of which markers/scatters are on which column of the array. scatters: a list of which indexes in channels are scatters """ self.name = name self.tree = Tree(pnts, channels) #TODO add some default intelligence for determining scatters if None self.scatters = scatters self.markers = [] if self.scatters is not None: for chan in range(len(channels)): if chan in self.scatters: pass elif self.tree.root.channels[chan] in self.scatters: pass else: self.markers.append(chan) if notes == None: notes = Annotation() self.notes = notes
def loadLabels(self): filename = self.getLabelFilename() if not filename: self.clearAnnotation() return # If we have everything and the filename did not change, then we are good if self.annotation and filename == self.currentLabelFile: return # Clear the current labels first self.clearAnnotation() try: self.annotation = Annotation() self.annotation.fromJsonFile(filename) except IOError as e: # This is the error if the file does not exist message = "Error parsing labels in {0}. Message: {1}".format( filename, e.strerror ) self.statusBar().showMessage(message) # Remember the filename loaded self.currentLabelFile = filename # Remeber the status bar message to restore it later restoreMessage = self.statusBar().currentMessage() # Restore the message self.statusBar().showMessage( restoreMessage )
def __init__(self, name, pnts, channels, scatters=None, notes=None): """ fcmdata(name, pnts, channels, scatters=None) name: name of corresponding FCS file minus extension pnts: array of data points channels: a list of which markers/scatters are on which column of the array. scatters: a list of which indexes in channels are scatters """ self.name = name # if type(pnts) != type(array([])): # raise BadFCMPointDataTypeError(pnts, "pnts isn't a numpy.array") self.tree = Tree(pnts, channels) #self.pnts = pnts #self.channels = channels #TODO add some default intelegence for determining scatters if None self.scatters = scatters self.markers = [] if self.scatters is not None: for chan in range(len(channels)): if chan in self.scatters: pass elif self.tree.root.channels[chan] in self.scatters: pass else: self.markers.append(chan) if notes == None: notes = Annotation() self.notes = notes
def _createannotationobjects(annotations): """ Create instances of the Annotation class for each of the "T" annotations. Input is assumed to only be "T" annotations. :param annotations: (dict) dictionary of "T" annotations. :return: (OrderedDict) an ordered dictionary of Annotations objects. Length of this dictionary should be equal to the input dictionary. """ targets = OrderedDict() for key, t in annotations.items(): splitted = t.split("\t") t = splitted[0] repr = u" ".join(splitted[1:]) split = t.split() label = split[0] spans = [[int(span.split()[0]), int(span.split()[1])] for span in u" ".join(split[1:]).split(";")] targets[key] = Annotation(key, repr, spans, [label]) return targets
def parse_json(self, filename: str, classes: list) -> List[Annotation]: with open(filename, 'r') as json_file: json_data = json.load(json_file) images = json_data["images"] categories = json_data["categories"] annotations = [] for anno in json_data["annotations"]: image_id = anno["image_id"] cls_id = anno["category_id"] for info in images: if info["id"] == image_id: annotation = Annotation( info["file_name"].split(".")[0]) annotation.image_size() for category in categories: if category["id"] == cls_id: annotation.class_id = category["name"] bndbox = { "xmin": anno["bbox"][0], "ymin": anno["bbox"][1], "xmax": anno["bbox"][2] + anno["bbox"][0], "ymax": anno["bbox"][3] + anno["bbox"][1] } annotation.bbox = (bndbox["xmin"], bndbox["ymin"], bndbox["xmax"], bndbox["ymax"]) annotations.append(annotation) return annotations
def set_annotation(self): dialog = Annotation(self.singletons, self.base_sets_clustering, self.representatives, self.points_type) if dialog.exec_() == QDialog.Accepted: self.base_sets_clustering = dialog.base_sets self.colorClusters(self.base_sets_clustering) else: print('Cancelled') dialog.deleteLater()
def __init__(self): self.name = 'Building_dataset_{}_{}_{}'.format(datetime.now().year, datetime.now().month, datetime.now().day) self.size = SIZE self.json = Annotation() self.factory = BuildingFactory() self.material_factory = MaterialFactory()
def load_anno_all(self,wjd): if "anno_all" not in wjd: return for aID, aset in wjd["anno_all"].items(): a = Annotation(aID,name=aset['name'],address=aset['address'],tag=aset['tag'], desc=aset["desc"],comment=aset["comment"],numfile=aset["numfile"], created_at=datetime_parse( aset["created_at"]) ) a.add_dataset_ID(aset["dataset_ID"]) self.add_annotation(a)
def create_annotation(self,src,name=None): ID = gen_ID(pre="a") addr = self.data_root+"/annosets/"+ID an = Annotation(ID,name=name,address=addr) if(an.populate(src)): self.add_annotation(an) return ID
def modify_quote_tokens(self, original_tokenization_dirpath=None, quote_annotations_dirpath=None, quote_annotations_ext=None, change_to='gold'): """ Changes quote tokens so BookNLP will recognize them in certain ways. Args: change_to: 'gold': Change to gold quote extractions 'match': Replace quotes with smart quotes to match a tokens file done without whitespace tokenization 'strict': Change existing BookNLP quotes using a dictionary. Single quotes to ` and ', double quotes to `` and '' """ if change_to == 'gold': # Load gold quote extractions gold = Annotation(quote_annotations_dirpath, self.fandom_fname, file_ext=quote_annotations_ext, fic_csv_dirpath=self.fic_csv_dirpath) gold.extract_annotated_spans() # Clear existing quotes, since might have been modified after whitespace tokenization self.clear_quotes() # Add gold quote spans in for span in gold.annotations: self.add_quote_span(span) # Change output dirpath for later saving (after replace gold coref) self.modified_token_output_dirpath = self.modified_token_output_dirpath.rstrip( '/') + '_gold_quotes' elif change_to == 'match': original_tokens = load_tokens_file( os.path.join(self.original_tokenization_dirpath, self.fandom_fname + self.token_file_ext)) self.token_data = match_quotes(original_tokens, self.token_data) # Save out save_tokens_file(self.token_data, self.modified_token_fpath) elif change_to == 'strict': quote_changes = { "“": "``", "”": "''", } self.token_data['normalizedWord'] = self.token_data[ 'normalizedWord'].map(lambda x: quote_changes.get(x, x)) self.token_data['lemma'] = self.token_data['lemma'].map( lambda x: quote_changes.get(x, x)) # Save out pdb.set_trace() self.token_data.to_csv(self.modified_token_fpath, sep='\t', quoting=csv.QUOTE_NONE, index=False)
def get_annotations(): user_map = get_user_map() assignments = get_assignments(user_map) annotations = { video: Annotation(assignment, video) for video, assignment in assignments.items() } return annotations
def targets(self, field, keypoint_sets): assert self.keypoints is not None assert self.skeleton is not None annotations = [ Annotation(keypoints=self.keypoints, skeleton=self.skeleton).set(kps, fixed_score=None) for kps in keypoint_sets ] self._confidences(field[0]) self._regressions(field[1], field[2], field[3], field[4], annotations=annotations)
def __init__(self, directory): subject_mapping = {} with open(configs.MAPPING_FILE) as mappping_file: mappping_file = mappping_file.read().strip().split('\n') for line in mappping_file: id = re.findall(r'^(\d+)\s', line)[0] name = line[len(id) + 1:] subject_mapping[name] = id def getSubject(name): return int(subject_mapping[name]) # self.dataset = SingleDataset() subject_dirs = fileutils.listdir(directory) ann = Annotation() prev_subject_id = None sjList = None for dir in subject_dirs: subject_name = fileutils.dirname(dir) subject_id = getSubject(subject_name) duplicate = None for sj in self.dataset.subjects: if sj.id == subject_id and sj.name == subject_name: duplicate = sj ## if duplicate == None: subject = Subject(subject_id, subject_name) else: subject = duplicate if prev_subject_id != subject_id: sjList = ann.getSubjectList(subject_id) prev_subject_id = subject_id for file in fileutils.recursive_walk(dir): if fileutils.fileextension(file) == configs.LAYER: ### filename = fileutils.filename(file) rc = Record() rc.data = blob.load_np_array(file) rc.label = int(Annotation.getClass(sjList, filename)) rc.frame = filename subject.records.append(rc) print('Loading', subject_id, filename, rc.label) if(duplicate == None): self.dataset.subjects.append(subject)
def targets(self, field, *, annotation_dicts): assert self.keypoints is not None assert self.skeleton is not None annotations = [ Annotation(keypoints=self.keypoints, skeleton=self.skeleton).set(ann['keypoints'], fixed_score=None, fixed_bbox=ann['bbox']) for ann in annotation_dicts ] self._confidences(field[0]) self._regressions(field[1], field[2], annotations=annotations)
def __init__(self, name, fcms=None, notes=None): """ Initialize with fcm collection and notes. """ # - how is this done in fcmdata? self.fcmdict = {} self.name = name if fcms is not None: for fcm in fcms: self.fcmdict[fcm.name] = fcm if notes is not None: self.notes = Annotation() else: self.notes = notes
def load_fic_annotations(self, fandom_fname, span_type): # Load annotations if span_type == 'coref': file_ext = 'entity_clusters' elif span_type == 'quotes': file_ext = 'quote_attribution' self.fic_annotations[fandom_fname] = {} for annotator in self.annotators: self.fic_annotations[fandom_fname][annotator] = Annotation( self.annotations_dirpath, fandom_fname, file_ext=f'_{file_ext}_{annotator}.csv', fic_csv_dirpath=self.fic_csv_dirpath) self.fic_annotations[fandom_fname][ annotator].extract_annotated_spans()
def loclabel_gen(ano_path, loc_path, out_path): pids = list(map(lambda x: x.strip('.json'), os.listdir(ano_path))) annotations = {} for pid in pids: pid_json_path = os.path.join(ano_path, pid + '.json') anno = Annotation() anno.from_json(pid_json_path) annotations[pid] = anno coords = [] infile = open(loc_path) for i, line in enumerate(infile): pid, x_center, y_center = line.strip('\n').split(',') coords.append((pid, x_center, y_center)) infile.close() num_sample = len(coords) print(f"Total sample: {num_sample}") outfile = open(out_path, 'w') for index in range(num_sample): pid, x_center, y_center = coords[index] x_center = int(x_center) y_center = int(y_center) x_top_left = int(x_center - IMG_SIZE / 2) y_top_left = int(y_center - IMG_SIZE / 2) label = [] for x_idx in range(3): for y_idx in range(3): # (x, y) is the center of each patch x = x_top_left + int((x_idx + 0.5) * SUB_SIZE) y = y_top_left + int((y_idx + 0.5) * SUB_SIZE) # get label information according to annotation if annotations[pid].inside_polygons((x, y), True): label.append(1) else: label.append(0) # write output outfile.write( f"{pid.lower()}, {x_center}, {y_center}, {str(label)[1:-1]}\n") if index % 100 == 0: print(index) outfile.close()
def wrap_annotations(sentences): annotations = [] tid = 0 for sid, labels in enumerate(sentences): for idx, label in enumerate(labels): for ann in label.split('#'): type = ann[2:] if 'B-' in ann: annotations.append(Annotation(type, sid, tid)) elif 'I-' in ann: for _ann in reversed(annotations): if type == _ann.annotation: _ann.add_id(tid) break tid += 1 return annotations
def move_mouse(self, event): """ Handles the drawing of the arrow when deciding where to annotate """ if WorldState.Instance().draw_plot: if WorldState.Instance().session_dict['click_one']: WorldState.Instance( ).session_dict['temp_annotation'] = Annotation( WorldState.Instance()._ARROW, (WorldState.Instance().session_dict['click_one_x'], WorldState.Instance().session_dict['click_one_y']), (event.xdata, event.ydata)) if WorldState.Instance().session_dict['annotate']: WorldState.Instance().session_dict['redraw_legend'] = False WorldState.Instance().draw_plot.plot() WorldState.Instance().session_dict['redraw_legend'] = True
def load_fic_spans(self, fandom_fname, gold_dirpath, baseline_dirpath, experimental_dirpath, gold_annotations_ext): """ Load quote or coref predictions and gold spans for a fic. Returns gold_spans, baseline_spans, experimental_spans """ gold_annotation = Annotation(gold_dirpath, fandom_fname, file_ext=gold_annotations_ext, fic_csv_dirpath=self.fic_csv_dirpath) gold_annotation.extract_annotated_spans() gold_spans = gold_annotation.annotations baseline_spans = utils.load_pickle(baseline_dirpath, fandom_fname) experimental_spans = utils.load_pickle(experimental_dirpath, fandom_fname) return gold_spans, baseline_spans, experimental_spans
def to_annotation(self, threshold=-np.inf, posterior=False): """ Parameters ---------- threshold : float, optional Each track is annotated with the label with the highest score. Yet, if the latter is smaller than `threshold`, label is replaced with an `Unknown` instance. posterior : bool, optional If True, scores are posterior probabilities in open-set identification. If top model posterior is higher than unknown posterior, it is selected. Otherwise, label is replaced with an `Unknown` instance. """ annotation = Annotation(uri=self.uri, modality=self.modality) if not self: return annotation best = self.nbest(1, invert=False) if posterior: # compute unknown posterior func = lambda p: 1. - np.nansum(p, axis=1) Pu = self.apply(func, new_columns=['_']) # threshold best target posterior # with unknown posterior and threshold for segment, track, label, value in best.itervalues(): if value < Pu[segment, track, '_'] or value < threshold: label = Unknown() annotation[segment, track] = label else: # threshold best target score with threshold for segment, track, label, value in best.itervalues(): if value < threshold: label = Unknown() annotation[segment, track] = label return annotation
def _load_annotations(self): f = open(self.file_name, 'r') for line in f: line = line.strip() if line != "": annotations_list = [ Annotation( int(begin_index) + 1, int(end_index), time_expression, timex3) for begin_index, end_index, time_expression, timex3 in map( None, *([iter(line.split("\t"))] * 4)) ] else: annotations_list = [] self.annotations.append(annotations_list) f.close()
def __init__(self, dandelion_raw_list): self.dandelion_raw_list = dandelion_raw_list self.good_annotations = [] if self.dandelion_raw_list: for annotation_dict in self.dandelion_raw_list.get( "annotations", []): my_annotation = Annotation(annotation_dict) if not my_annotation.suppress: for top_entity in self.dandelion_raw_list.get( "topEntities", []): if my_annotation.uri == top_entity["uri"]: my_annotation.is_top_entity = True my_annotation.top_entity_score = top_entity[ "score"] self.good_annotations.append(my_annotation)
def test_get_annotated(self): text = 'Lorem ipsum dolor sit amet. Consectetur adipiscing elit. Sed do eiusmod tempor incididunt.' tagged_text = '<p><span>Lorem</span> ipsum dolor sit <span>amet</span>.</p><p>Consectetur adipiscing <span>elit</span>.</p><p>Sed do eiusmod tempor <span>incididunt</span>.</p>' lst_annotations = [ Annotation('sentence', 0, 27), Annotation('sentence', 28, 56), Annotation('sentence', 57, 90), Annotation('word', 22, 26), Annotation('word', 51, 55), Annotation('word', 79, 89), Annotation('word', 0, 5) ] tagged_text_test = get_annotated( Document(text, 'test', lst_annotations)) self.assertEqual(tagged_text_test, tagged_text)
def dict_to_tf_examples(data): global DATA_DIR img_name = os.path.join(data['name'] + 'leftImg8bit.png') img_path = os.path.join(DATA_DIR, 'leftImg8bit', data['relpath'], img_name) annotation = Annotation() annotation.fromJsonFile(data['json_path']) instanceImg = createInstanceImage(annotation, "trainIds") instanceImg.Format = 'PNG' with tf.gfile.GFile(img_path, 'rb') as fid: image_file = fid.read() image_io = io.BytesIO(image_file) image = Image.open(image_io) splits_divider = FLAGS.splits_divider split_width = int(np.ceil(image.width / splits_divider)) split_width_half = int(np.ceil(split_width / 2)) split_positions = [i * split_width for i in range(splits_divider - 1)] split_positions.append(image.width - split_width) split_positions += [ split_width_half + i * split_width for i in range(splits_divider - 1) ] # split_positions += [random.randint(10,(image.width-split_width-10)) for i in range(FLAGS.splits_add)] examples = [] for i, pos in enumerate(split_positions): box = (pos, 0, pos + split_width, image.height) sub_image = image.crop(box) sub_instanceImg = instanceImg.crop(box) examples.append( sub_img_to_tf_example(img_name + '[#' + str(i) + ']', sub_image, sub_instanceImg)) return (examples)
def __init__(self, parent=None, with_filename=True, with_slider=True, cache_capacity=500, max_fps=0): super(VideoWidget, self).__init__(parent) self.with_filename = with_filename self.with_slider = with_slider self.video = Video(cache_capacity=cache_capacity, max_fps=max_fps) self.annotation = Annotation() self.tube_id = 0 self.tracker = None self.sim_thr = 0.9 self.init_ui() self.installEventFilter(self) if self.with_slider: self.slider.sliderReleased.connect(self.on_slider_released) self.label_frame.bbox_added.connect(self.set_tracker) self.label_frame.bbox_deleted.connect(self.del_tracker) self.video.frame_updated.connect(self.update_frame) self.video.export_progress_updated.connect(self.update_export_progress)