def test__kaldi_to_segmented_textdocument(self):
        pua_testcase = self.generate_pua_testcase()
        
        m = Mmif(validate=False)
        
        self.generate_audiodocument_view(m)

        seg_view = m.new_view()
        seg_view.new_contain(AnnotationTypes.TimeFrame, **{'timeUnit': self.app.timeunit})

        # pua_testcase is 20 second. 
        # Using two segments, sum(original speech segments) must be 19 second (+1 silence gap)
        seg1 = seg_view.new_annotation(AnnotationTypes.TimeFrame)
        seg1.add_property('frameType', 'speech')
        seg1.add_property('start', 0)
        seg1.add_property('end', 10 * self.app.timeunit_conv[self.app.timeunit])
        seg2 = seg_view.new_annotation(AnnotationTypes.TimeFrame)
        seg2.add_property('frameType', 'speech')
        seg2.add_property('start', 20 * self.app.timeunit_conv[self.app.timeunit])
        seg2.add_property('end', 29 * self.app.timeunit_conv[self.app.timeunit])

        kaldi_view = m.new_view()
        kaldi_view.new_contain(AnnotationTypes.TimeFrame, **{'timeUnit': self.app.timeunit})
        self.app._kaldi_to_segmented_textdocument(pua_testcase, kaldi_view, seg_view)
        self.assertEqual(3, len(m.views))  # audiodoc, speechsegment, kaldi
        self.assertEqual(2, len(list(kaldi_view.get_annotations(DocumentTypes.TextDocument))))  # 2 speech segments
        # only 5th('6', 10-11sec) token ignores (fall under mid-gap)
        self.assertEqual(9, len(list(kaldi_view.get_annotations(Uri.TOKEN))))  
        self.assertEqual(9, len(list(kaldi_view.get_annotations(AnnotationTypes.TimeFrame))))
        self.assertEqual(10, len(list(kaldi_view.get_annotations(AnnotationTypes.Alignment))))  # 9 token-tf + 1 td-ad
Esempio n. 2
0
 def test_use_in_mmif(self):
     mmif_obj = Mmif(MMIF_EXAMPLES['everything'], frozen=False)
     view_obj: View = mmif_obj.get_view_by_id('v1')
     view_obj.new_annotation(AnnotationTypes.Polygon, 'p1')
     view_obj.new_annotation(AnnotationTypes.TimeFrame, 'bb2')
     self.assertEqual(list(view_obj.metadata.contains.keys()), [
         f'http://mmif.clams.ai/{__specver__}/vocabulary/TimeFrame',
         f'http://mmif.clams.ai/{__specver__}/vocabulary/Polygon'
     ])
 def test__kaldi_to_single_textdocument(self):
     pua_testcase = self.generate_pua_testcase()
     m = Mmif(validate=False)
     d = self.generate_audiodocument_view(m)
     kaldi_view = m.new_view()
     kaldi_view.new_contain(AnnotationTypes.TimeFrame, **{'timeUnit': self.app.timeunit})
     self.app._kaldi_to_single_textdocument(pua_testcase, kaldi_view, d)
     self.assertEqual(2, len(m.views))
     self.assertEqual(1, len(list(kaldi_view.get_annotations(DocumentTypes.TextDocument))))
     self.assertEqual(10, len(list(kaldi_view.get_annotations(Uri.TOKEN))))
     self.assertEqual(10, len(list(kaldi_view.get_annotations(AnnotationTypes.TimeFrame))))
     self.assertEqual(11, len(list(kaldi_view.get_annotations(AnnotationTypes.Alignment))))  # 10 token-tf + 1 td-ad
Esempio n. 4
0
 def _annotate(self, mmif: Mmif, **kwargs) -> str:
     new_view = mmif.new_view()
     new_view.metadata['app'] = self.metadata["iri"]
     new_view.new_contain(AnnotationTypes.BoundingBox.value)
     filename = mmif.get_document_location(DocumentTypes.VideoDocument)[7:]
     cap = cv2.VideoCapture(filename)
     file_basename = os.path.basename(filename)
     FRAME_TYPE = "slate"
     views_with_tframe = [
         tf_view for tf_view in mmif.get_all_views_contain(
             AnnotationTypes.TimeFrame)
         if tf_view.get_annotations(AnnotationTypes.TimeFrame,
                                    frameType=FRAME_TYPE)
     ]
     frame_number_ranges = [
         (tf_annotation.properties["start"],
          tf_annotation.properties["end"]) for tf_view in views_with_tframe
         for tf_annotation in tf_view.get_annotations(
             AnnotationTypes.TimeFrame, frameType=FRAME_TYPE)
     ]
     target_frames = [(int(start) + int(end)) // 2
                      for start, end in frame_number_ranges]
     if not target_frames:
         print(f"No Slates for video: {file_basename}")
     for frame_number in target_frames:
         try:
             cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
             _, slate_image = cap.read()
             cv2.imwrite(f"tmp/{frame_number}.jpg", slate_image)
             orig = slate_image.copy()
             slate_image = cv2.cvtColor(slate_image, cv2.COLOR_BGR2RGB)
             slate_boxes = self.process_slate(slate_image)
             for _id, box in enumerate(slate_boxes):
                 box = box.tolist()
                 box = [int(x) for x in box]
                 rect = cv2.rectangle(orig, (box[0], box[1]),
                                      (box[2], box[3]), (0, 0, 255), 5)
                 cv2.imwrite(f"tmp/{file_basename}_{_id}_rect.jpg", rect)
                 annotation = new_view.new_annotation(
                     f"{str(frame_number)}_{str(_id)}",
                     AnnotationTypes.BoundingBox)
                 annotation.add_property(
                     "coordinates", [[box[0], box[1]], [box[0], box[3]],
                                     [box[2], box[1]], [box[2], box[3]]])
                 annotation.add_property('boxType', "slate_text")
                 annotation.add_property("frame", frame_number)
         except:
             traceback.print_exc()
     return mmif
 def __init__(self, mmif):
     self.mmif = mmif if type(mmif) is Mmif else Mmif(mmif)
     self.documents = []
     self.nodes = {}
     self.alignments = []
     # The top-level documents are added as nodes, but they are also put in
     # the documents list.
     for doc in self.mmif.documents:
         self.add_node(None, doc)
         self.documents.append(doc)
     # First pass over all annotations and documents in all views and save
     # them in the graph.
     for view in self.mmif.views:
         for annotation in view.annotations:
             self.add_node(view, annotation)
     # Second pass over the alignments so we create edges.
     for view, alignment in self.alignments:
         self.add_edge(view, alignment)
     # Third pass to add links between text elements, in particular from
     # entities to tokens, adding lists of tokens to entities.
     tokens = self.get_nodes(names.TOKEN)
     entities = self.get_nodes(names.NAMED_ENTITY)
     self.token_idx = TokenIndex(tokens)
     for e in entities:
         e.tokens = self.token_idx.get_tokens_for_node(e)
Esempio n. 6
0
    def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif:
        # use_speech_segmentation=True) -> Mmif:
        if not isinstance(mmif, Mmif):
            mmif: Mmif = Mmif(mmif)

        # get AudioDocuments with locations
        docs = [
            document for document in mmif.documents
            if document.at_type == DocumentTypes.AudioDocument
            and len(document.location) > 0
        ]
        conf = self.get_configuration(**parameters)
        use_speech_segmentation = conf.get('use_speech_segmentation', True)

        if use_speech_segmentation:
            # using "speech" TimeFrames, `files` holds newly generated patchwork audio files in `tmpdir`
            files, tf_src_view, tmpdir = self._patchwork_audiofiles(mmif, docs)
        else:
            # `files` holds original locations
            files = {doc.id: doc.location_path() for doc in docs}
            tf_src_view = {}
            tmpdir = None

        transcript_tmpdir = self._run_kaldi(files)
        transcripts = transcript_tmpdir.name

        # now re-format Kaldi output
        self._kaldi_to_mmif(mmif, conf, transcripts, tf_src_view)

        if transcript_tmpdir:
            transcript_tmpdir.cleanup()
        if tmpdir:
            tmpdir.cleanup()
        return mmif
Esempio n. 7
0
 def test_serialize_within_mmif(self):
     mmif_obj = Mmif(MMIF_EXAMPLES['everything'], frozen=False)
     view_obj = mmif_obj.get_view_by_id('v5')
     view_obj.annotations._items.pop('bb25')
     anno_obj = view_obj.new_annotation(AnnotationTypes.BoundingBox, 'bb25')
     anno_obj.add_property(
         'coordinates', [[150, 810], [1120, 810], [150, 870], [1120, 870]])
     anno_obj.add_property('timePoint', 21000)
     anno_obj.add_property('boxType', 'text')
     expected = json.loads(Mmif(MMIF_EXAMPLES['everything']).serialize())
     actual = json.loads(mmif_obj.serialize())
     bb_type = f'http://mmif.clams.ai/{__specver__}/vocabulary/BoundingBox'
     expected['views'][4]['metadata']['contains'][bb_type][
         'gen_time'] = 'dummy'
     actual['views'][4]['metadata']['contains'][bb_type][
         'gen_time'] = 'dummy'
     self.assertEqual(expected, actual)
Esempio n. 8
0
    def _annotate(self, mmif: Union[str, dict, Mmif], **kwargs) -> Mmif:
        if not isinstance(mmif, Mmif):
            mmif = Mmif(mmif)
        config = self.get_configuration(**kwargs)

        # get AudioDocuments with locations
        docs = [
            document for document in mmif.documents
            if document.at_type == DocumentTypes.AudioDocument
            and len(document.location) > 0 and os.path.splitext(
                document.location)[-1] in self.SEGMENTER_ACCEPTED_EXTENSIONS
        ]

        files = [document.location_path() for document in docs]

        # key them by location
        docs_dict: Dict[str, Document] = {
            self.escape_filepath(doc.location_path()): doc
            for doc in docs
        }

        segmented, lengths = self.run_bacs(files)

        for filename, segmented_audio, total_frames in zip(
                files, segmented, lengths):

            v: View = mmif.new_view()
            self.sign_view(v, config)
            v.new_contain(
                AnnotationTypes.TimeFrame,
                timeUnit='milliseconds',
                document=docs_dict[self.escape_filepath(filename)].id)

            speech_starts = sorted(segmented_audio.keys())
            if speech_starts[0] > 0:
                self.create_segment_tf(v, 0, speech_starts[0] - 1,
                                       'non-speech')
            nonspeech_start = None
            for speech_start in speech_starts:
                if nonspeech_start is not None:
                    nonspeech_end = speech_start - 1
                    self.create_segment_tf(v, nonspeech_start, nonspeech_end,
                                           'non-speech')
                speech_end = segmented_audio[speech_start]
                nonspeech_start = speech_end + 1
                self.create_segment_tf(v, speech_start, speech_end, 'speech')

            if nonspeech_start < total_frames:
                self.create_segment_tf(v, nonspeech_start, total_frames,
                                       'non-speech')
        return mmif
Esempio n. 9
0
 def test_type_checking(self):
     mmif_obj = Mmif(MMIF_EXAMPLES['everything'])
     ann_obj = mmif_obj.get_view_by_id('v1').annotations['s1']
     self.assertTrue(ann_obj.is_type(ann_obj.at_type))
     self.assertTrue(ann_obj.is_type(str(ann_obj.at_type)))
     self.assertFalse(ann_obj.is_type(DocumentTypes.VideoDocument))
Esempio n. 10
0
 def setUp(self) -> None:
     self.major = 0
     self.minor = 4
     self.patch = 3
     self.specver = self.version(self.major, self.minor, self.patch)
     self.mmif_cur = Mmif(
         Template(EVERYTHING_JSON).substitute(VERSION=self.specver))
     self.mmif_pat_past = Mmif(
         Template(EVERYTHING_JSON).substitute(
             VERSION=self.version(self.major, self.minor, self.patch - 1)))
     self.mmif_pat_futr = Mmif(
         Template(EVERYTHING_JSON).substitute(
             VERSION=self.version(self.major, self.minor, self.patch + 1)))
     self.mmif_min_past = Mmif(
         Template(EVERYTHING_JSON).substitute(
             VERSION=self.version(self.major, self.minor - 1, self.patch)))
     self.mmif_min_futr = Mmif(
         Template(EVERYTHING_JSON).substitute(
             VERSION=self.version(self.major, self.minor + 1, self.patch)))
     self.mmif_maj_past = Mmif(
         Template(EVERYTHING_JSON).substitute(
             VERSION=self.version(self.major - 1, self.minor, self.patch)))
     self.mmif_maj_futr = Mmif(
         Template(EVERYTHING_JSON).substitute(
             VERSION=self.version(self.major + 1, self.minor, self.patch)))
Esempio n. 11
0
class TestMMIFVersionCompatibility(unittest.TestCase):
    def setUp(self) -> None:
        self.major = 0
        self.minor = 4
        self.patch = 3
        self.specver = self.version(self.major, self.minor, self.patch)
        self.mmif_cur = Mmif(
            Template(EVERYTHING_JSON).substitute(VERSION=self.specver))
        self.mmif_pat_past = Mmif(
            Template(EVERYTHING_JSON).substitute(
                VERSION=self.version(self.major, self.minor, self.patch - 1)))
        self.mmif_pat_futr = Mmif(
            Template(EVERYTHING_JSON).substitute(
                VERSION=self.version(self.major, self.minor, self.patch + 1)))
        self.mmif_min_past = Mmif(
            Template(EVERYTHING_JSON).substitute(
                VERSION=self.version(self.major, self.minor - 1, self.patch)))
        self.mmif_min_futr = Mmif(
            Template(EVERYTHING_JSON).substitute(
                VERSION=self.version(self.major, self.minor + 1, self.patch)))
        self.mmif_maj_past = Mmif(
            Template(EVERYTHING_JSON).substitute(
                VERSION=self.version(self.major - 1, self.minor, self.patch)))
        self.mmif_maj_futr = Mmif(
            Template(EVERYTHING_JSON).substitute(
                VERSION=self.version(self.major + 1, self.minor, self.patch)))

    @staticmethod
    def version(*major_minor_patch):
        return '.'.join(map(str, major_minor_patch))

    def test_compatibility(self):
        """
        Simply tests searching by @type queries that do not match MMIF file version works at only patch level
        """
        DocumentTypes.TextDocument.version = self.specver
        td_url_prefix = f'{DocumentTypes.TextDocument.base_uri}/{DocumentTypes.TextDocument.version}'
        text_documents = self.mmif_cur.get_documents_by_type(
            DocumentTypes.TextDocument)
        views_with_text_documents = self.mmif_cur.get_views_contain(
            DocumentTypes.TextDocument)
        self.assertEqual(td_url_prefix, self.mmif_cur.metadata['mmif'])
        self.assertNotEqual(td_url_prefix, self.mmif_pat_past.metadata['mmif'])
        self.assertEqual(
            len(
                self.mmif_pat_past.get_documents_by_type(
                    DocumentTypes.TextDocument)), len(text_documents))
        self.assertNotEqual(td_url_prefix, self.mmif_pat_futr.metadata['mmif'])
        self.assertEqual(
            len(
                self.mmif_pat_futr.get_views_contain(
                    DocumentTypes.TextDocument)),
            len(views_with_text_documents))
        self.assertNotEqual(td_url_prefix, self.mmif_min_past.metadata['mmif'])
        self.assertEqual(
            len(
                self.mmif_min_past.get_documents_by_type(
                    DocumentTypes.TextDocument)), 0)
        self.assertNotEqual(td_url_prefix, self.mmif_min_futr.metadata['mmif'])
        self.assertEqual(
            len(
                self.mmif_min_futr.get_documents_by_type(
                    DocumentTypes.TextDocument)), 0)
 def generate_audiodocument_view(mmif:Mmif):
     ad_v = mmif.new_view()
     d = ad_v.new_annotation(DocumentTypes.AudioDocument)
     return d
def add_structure_from_timeframe(in_mmif: Mmif, iiif_json: Dict):
    # # get all views with timeframe annotations from mmif obj
    tf_views = in_mmif.get_views_contain(AnnotationTypes.TimeFrame)
    for range_id, view in enumerate(tf_views, start=1):
        view_range = tf_view_to_iiif_range(range_id, view)
        iiif_json["structures"].append(view_range)
Esempio n. 14
0
 def post(self):
     params = cast(request.args)
     return self.response(self.cla.consume(Mmif(request.get_data()),
                                           **params),
                          mimetype=self.mimetype)