def test__kaldi_to_segmented_textdocument(self): pua_testcase = self.generate_pua_testcase() m = Mmif(validate=False) self.generate_audiodocument_view(m) seg_view = m.new_view() seg_view.new_contain(AnnotationTypes.TimeFrame, **{'timeUnit': self.app.timeunit}) # pua_testcase is 20 second. # Using two segments, sum(original speech segments) must be 19 second (+1 silence gap) seg1 = seg_view.new_annotation(AnnotationTypes.TimeFrame) seg1.add_property('frameType', 'speech') seg1.add_property('start', 0) seg1.add_property('end', 10 * self.app.timeunit_conv[self.app.timeunit]) seg2 = seg_view.new_annotation(AnnotationTypes.TimeFrame) seg2.add_property('frameType', 'speech') seg2.add_property('start', 20 * self.app.timeunit_conv[self.app.timeunit]) seg2.add_property('end', 29 * self.app.timeunit_conv[self.app.timeunit]) kaldi_view = m.new_view() kaldi_view.new_contain(AnnotationTypes.TimeFrame, **{'timeUnit': self.app.timeunit}) self.app._kaldi_to_segmented_textdocument(pua_testcase, kaldi_view, seg_view) self.assertEqual(3, len(m.views)) # audiodoc, speechsegment, kaldi self.assertEqual(2, len(list(kaldi_view.get_annotations(DocumentTypes.TextDocument)))) # 2 speech segments # only 5th('6', 10-11sec) token ignores (fall under mid-gap) self.assertEqual(9, len(list(kaldi_view.get_annotations(Uri.TOKEN)))) self.assertEqual(9, len(list(kaldi_view.get_annotations(AnnotationTypes.TimeFrame)))) self.assertEqual(10, len(list(kaldi_view.get_annotations(AnnotationTypes.Alignment)))) # 9 token-tf + 1 td-ad
def test_use_in_mmif(self): mmif_obj = Mmif(MMIF_EXAMPLES['everything'], frozen=False) view_obj: View = mmif_obj.get_view_by_id('v1') view_obj.new_annotation(AnnotationTypes.Polygon, 'p1') view_obj.new_annotation(AnnotationTypes.TimeFrame, 'bb2') self.assertEqual(list(view_obj.metadata.contains.keys()), [ f'http://mmif.clams.ai/{__specver__}/vocabulary/TimeFrame', f'http://mmif.clams.ai/{__specver__}/vocabulary/Polygon' ])
def test__kaldi_to_single_textdocument(self): pua_testcase = self.generate_pua_testcase() m = Mmif(validate=False) d = self.generate_audiodocument_view(m) kaldi_view = m.new_view() kaldi_view.new_contain(AnnotationTypes.TimeFrame, **{'timeUnit': self.app.timeunit}) self.app._kaldi_to_single_textdocument(pua_testcase, kaldi_view, d) self.assertEqual(2, len(m.views)) self.assertEqual(1, len(list(kaldi_view.get_annotations(DocumentTypes.TextDocument)))) self.assertEqual(10, len(list(kaldi_view.get_annotations(Uri.TOKEN)))) self.assertEqual(10, len(list(kaldi_view.get_annotations(AnnotationTypes.TimeFrame)))) self.assertEqual(11, len(list(kaldi_view.get_annotations(AnnotationTypes.Alignment)))) # 10 token-tf + 1 td-ad
def _annotate(self, mmif: Mmif, **kwargs) -> str: new_view = mmif.new_view() new_view.metadata['app'] = self.metadata["iri"] new_view.new_contain(AnnotationTypes.BoundingBox.value) filename = mmif.get_document_location(DocumentTypes.VideoDocument)[7:] cap = cv2.VideoCapture(filename) file_basename = os.path.basename(filename) FRAME_TYPE = "slate" views_with_tframe = [ tf_view for tf_view in mmif.get_all_views_contain( AnnotationTypes.TimeFrame) if tf_view.get_annotations(AnnotationTypes.TimeFrame, frameType=FRAME_TYPE) ] frame_number_ranges = [ (tf_annotation.properties["start"], tf_annotation.properties["end"]) for tf_view in views_with_tframe for tf_annotation in tf_view.get_annotations( AnnotationTypes.TimeFrame, frameType=FRAME_TYPE) ] target_frames = [(int(start) + int(end)) // 2 for start, end in frame_number_ranges] if not target_frames: print(f"No Slates for video: {file_basename}") for frame_number in target_frames: try: cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number) _, slate_image = cap.read() cv2.imwrite(f"tmp/{frame_number}.jpg", slate_image) orig = slate_image.copy() slate_image = cv2.cvtColor(slate_image, cv2.COLOR_BGR2RGB) slate_boxes = self.process_slate(slate_image) for _id, box in enumerate(slate_boxes): box = box.tolist() box = [int(x) for x in box] rect = cv2.rectangle(orig, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), 5) cv2.imwrite(f"tmp/{file_basename}_{_id}_rect.jpg", rect) annotation = new_view.new_annotation( f"{str(frame_number)}_{str(_id)}", AnnotationTypes.BoundingBox) annotation.add_property( "coordinates", [[box[0], box[1]], [box[0], box[3]], [box[2], box[1]], [box[2], box[3]]]) annotation.add_property('boxType', "slate_text") annotation.add_property("frame", frame_number) except: traceback.print_exc() return mmif
def __init__(self, mmif): self.mmif = mmif if type(mmif) is Mmif else Mmif(mmif) self.documents = [] self.nodes = {} self.alignments = [] # The top-level documents are added as nodes, but they are also put in # the documents list. for doc in self.mmif.documents: self.add_node(None, doc) self.documents.append(doc) # First pass over all annotations and documents in all views and save # them in the graph. for view in self.mmif.views: for annotation in view.annotations: self.add_node(view, annotation) # Second pass over the alignments so we create edges. for view, alignment in self.alignments: self.add_edge(view, alignment) # Third pass to add links between text elements, in particular from # entities to tokens, adding lists of tokens to entities. tokens = self.get_nodes(names.TOKEN) entities = self.get_nodes(names.NAMED_ENTITY) self.token_idx = TokenIndex(tokens) for e in entities: e.tokens = self.token_idx.get_tokens_for_node(e)
def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif: # use_speech_segmentation=True) -> Mmif: if not isinstance(mmif, Mmif): mmif: Mmif = Mmif(mmif) # get AudioDocuments with locations docs = [ document for document in mmif.documents if document.at_type == DocumentTypes.AudioDocument and len(document.location) > 0 ] conf = self.get_configuration(**parameters) use_speech_segmentation = conf.get('use_speech_segmentation', True) if use_speech_segmentation: # using "speech" TimeFrames, `files` holds newly generated patchwork audio files in `tmpdir` files, tf_src_view, tmpdir = self._patchwork_audiofiles(mmif, docs) else: # `files` holds original locations files = {doc.id: doc.location_path() for doc in docs} tf_src_view = {} tmpdir = None transcript_tmpdir = self._run_kaldi(files) transcripts = transcript_tmpdir.name # now re-format Kaldi output self._kaldi_to_mmif(mmif, conf, transcripts, tf_src_view) if transcript_tmpdir: transcript_tmpdir.cleanup() if tmpdir: tmpdir.cleanup() return mmif
def test_serialize_within_mmif(self): mmif_obj = Mmif(MMIF_EXAMPLES['everything'], frozen=False) view_obj = mmif_obj.get_view_by_id('v5') view_obj.annotations._items.pop('bb25') anno_obj = view_obj.new_annotation(AnnotationTypes.BoundingBox, 'bb25') anno_obj.add_property( 'coordinates', [[150, 810], [1120, 810], [150, 870], [1120, 870]]) anno_obj.add_property('timePoint', 21000) anno_obj.add_property('boxType', 'text') expected = json.loads(Mmif(MMIF_EXAMPLES['everything']).serialize()) actual = json.loads(mmif_obj.serialize()) bb_type = f'http://mmif.clams.ai/{__specver__}/vocabulary/BoundingBox' expected['views'][4]['metadata']['contains'][bb_type][ 'gen_time'] = 'dummy' actual['views'][4]['metadata']['contains'][bb_type][ 'gen_time'] = 'dummy' self.assertEqual(expected, actual)
def _annotate(self, mmif: Union[str, dict, Mmif], **kwargs) -> Mmif: if not isinstance(mmif, Mmif): mmif = Mmif(mmif) config = self.get_configuration(**kwargs) # get AudioDocuments with locations docs = [ document for document in mmif.documents if document.at_type == DocumentTypes.AudioDocument and len(document.location) > 0 and os.path.splitext( document.location)[-1] in self.SEGMENTER_ACCEPTED_EXTENSIONS ] files = [document.location_path() for document in docs] # key them by location docs_dict: Dict[str, Document] = { self.escape_filepath(doc.location_path()): doc for doc in docs } segmented, lengths = self.run_bacs(files) for filename, segmented_audio, total_frames in zip( files, segmented, lengths): v: View = mmif.new_view() self.sign_view(v, config) v.new_contain( AnnotationTypes.TimeFrame, timeUnit='milliseconds', document=docs_dict[self.escape_filepath(filename)].id) speech_starts = sorted(segmented_audio.keys()) if speech_starts[0] > 0: self.create_segment_tf(v, 0, speech_starts[0] - 1, 'non-speech') nonspeech_start = None for speech_start in speech_starts: if nonspeech_start is not None: nonspeech_end = speech_start - 1 self.create_segment_tf(v, nonspeech_start, nonspeech_end, 'non-speech') speech_end = segmented_audio[speech_start] nonspeech_start = speech_end + 1 self.create_segment_tf(v, speech_start, speech_end, 'speech') if nonspeech_start < total_frames: self.create_segment_tf(v, nonspeech_start, total_frames, 'non-speech') return mmif
def test_type_checking(self): mmif_obj = Mmif(MMIF_EXAMPLES['everything']) ann_obj = mmif_obj.get_view_by_id('v1').annotations['s1'] self.assertTrue(ann_obj.is_type(ann_obj.at_type)) self.assertTrue(ann_obj.is_type(str(ann_obj.at_type))) self.assertFalse(ann_obj.is_type(DocumentTypes.VideoDocument))
def setUp(self) -> None: self.major = 0 self.minor = 4 self.patch = 3 self.specver = self.version(self.major, self.minor, self.patch) self.mmif_cur = Mmif( Template(EVERYTHING_JSON).substitute(VERSION=self.specver)) self.mmif_pat_past = Mmif( Template(EVERYTHING_JSON).substitute( VERSION=self.version(self.major, self.minor, self.patch - 1))) self.mmif_pat_futr = Mmif( Template(EVERYTHING_JSON).substitute( VERSION=self.version(self.major, self.minor, self.patch + 1))) self.mmif_min_past = Mmif( Template(EVERYTHING_JSON).substitute( VERSION=self.version(self.major, self.minor - 1, self.patch))) self.mmif_min_futr = Mmif( Template(EVERYTHING_JSON).substitute( VERSION=self.version(self.major, self.minor + 1, self.patch))) self.mmif_maj_past = Mmif( Template(EVERYTHING_JSON).substitute( VERSION=self.version(self.major - 1, self.minor, self.patch))) self.mmif_maj_futr = Mmif( Template(EVERYTHING_JSON).substitute( VERSION=self.version(self.major + 1, self.minor, self.patch)))
class TestMMIFVersionCompatibility(unittest.TestCase): def setUp(self) -> None: self.major = 0 self.minor = 4 self.patch = 3 self.specver = self.version(self.major, self.minor, self.patch) self.mmif_cur = Mmif( Template(EVERYTHING_JSON).substitute(VERSION=self.specver)) self.mmif_pat_past = Mmif( Template(EVERYTHING_JSON).substitute( VERSION=self.version(self.major, self.minor, self.patch - 1))) self.mmif_pat_futr = Mmif( Template(EVERYTHING_JSON).substitute( VERSION=self.version(self.major, self.minor, self.patch + 1))) self.mmif_min_past = Mmif( Template(EVERYTHING_JSON).substitute( VERSION=self.version(self.major, self.minor - 1, self.patch))) self.mmif_min_futr = Mmif( Template(EVERYTHING_JSON).substitute( VERSION=self.version(self.major, self.minor + 1, self.patch))) self.mmif_maj_past = Mmif( Template(EVERYTHING_JSON).substitute( VERSION=self.version(self.major - 1, self.minor, self.patch))) self.mmif_maj_futr = Mmif( Template(EVERYTHING_JSON).substitute( VERSION=self.version(self.major + 1, self.minor, self.patch))) @staticmethod def version(*major_minor_patch): return '.'.join(map(str, major_minor_patch)) def test_compatibility(self): """ Simply tests searching by @type queries that do not match MMIF file version works at only patch level """ DocumentTypes.TextDocument.version = self.specver td_url_prefix = f'{DocumentTypes.TextDocument.base_uri}/{DocumentTypes.TextDocument.version}' text_documents = self.mmif_cur.get_documents_by_type( DocumentTypes.TextDocument) views_with_text_documents = self.mmif_cur.get_views_contain( DocumentTypes.TextDocument) self.assertEqual(td_url_prefix, self.mmif_cur.metadata['mmif']) self.assertNotEqual(td_url_prefix, self.mmif_pat_past.metadata['mmif']) self.assertEqual( len( self.mmif_pat_past.get_documents_by_type( DocumentTypes.TextDocument)), len(text_documents)) self.assertNotEqual(td_url_prefix, self.mmif_pat_futr.metadata['mmif']) self.assertEqual( len( self.mmif_pat_futr.get_views_contain( DocumentTypes.TextDocument)), len(views_with_text_documents)) self.assertNotEqual(td_url_prefix, self.mmif_min_past.metadata['mmif']) self.assertEqual( len( self.mmif_min_past.get_documents_by_type( DocumentTypes.TextDocument)), 0) self.assertNotEqual(td_url_prefix, self.mmif_min_futr.metadata['mmif']) self.assertEqual( len( self.mmif_min_futr.get_documents_by_type( DocumentTypes.TextDocument)), 0)
def generate_audiodocument_view(mmif:Mmif): ad_v = mmif.new_view() d = ad_v.new_annotation(DocumentTypes.AudioDocument) return d
def add_structure_from_timeframe(in_mmif: Mmif, iiif_json: Dict): # # get all views with timeframe annotations from mmif obj tf_views = in_mmif.get_views_contain(AnnotationTypes.TimeFrame) for range_id, view in enumerate(tf_views, start=1): view_range = tf_view_to_iiif_range(range_id, view) iiif_json["structures"].append(view_range)
def post(self): params = cast(request.args) return self.response(self.cla.consume(Mmif(request.get_data()), **params), mimetype=self.mimetype)