コード例 #1
0
ファイル: test_tracks.py プロジェクト: brigittebigi/sppas
class TestAnchorTier( unittest.TestCase ):

    def setUp(self):
        self.t = AnchorTier()

    def test_window(self):
        self.t = AnchorTier()
        self.t.set_duration(12.)
        self.t.set_windelay(4.)
        f,t = self.t.fix_window( 0. )
        self.assertEqual(f, 0.)
        self.assertEqual(t, 4.)

        self.t.set_duration(18.)
        self.t.Append( Annotation(TimeInterval(TimePoint(13.), TimePoint(13.5)), Label(Text(18,data_type="int"))))
        self.t.Append( Annotation(TimeInterval(TimePoint(14.), TimePoint(14.6)), Label(Text(20,data_type="int"))))
        self.t.Append( Annotation(TimeInterval(TimePoint(14.6), TimePoint(15.6)), Label(Text(21,data_type="int"))))
        f,t = self.t.fix_window( 14. )
        self.assertEqual(f, 15.6 )
        self.assertEqual(t, 18. )

    def test_window_sil(self):
        self.t = AnchorTier()
        self.t.set_duration(17.8)
        self.t.set_windelay(4.)
        self.t.set_extdelay(1.)
        self.t.set_outdelay(0.2)

        self.t.Append( Annotation(TimeInterval(TimePoint(0.), TimePoint(1.5)), Label("#")))
        self.t.Append( Annotation(TimeInterval(TimePoint(4.5),TimePoint(6.3)), Label("#")))
        self.t.Append( Annotation(TimeInterval(TimePoint(9.7),TimePoint(11.3)), Label("#")))
        self.t.Append( Annotation(TimeInterval(TimePoint(14.6),TimePoint(17.8)), Label("#")))

        f,t = self.t.fix_window( 0. )
        self.assertEqual(f, 1.5)
        self.assertEqual(t, 4.5)
        f,t = self.t.fix_window( 0.5 )
        self.assertEqual(f, 1.5)
        self.assertEqual(t, 4.5)
        f,t = self.t.fix_window( 1.5 )
        self.assertEqual(f, 1.5)
        self.assertEqual(t, 4.5)

        f,t = self.t.fix_window( 3.5 )
        self.assertEqual(f, 3.5)
        self.assertEqual(t, 4.5)

        f,t = self.t.fix_window( 4.5 )
        self.assertEqual(f, 6.3)
        self.assertEqual(t, 9.7)

        f,t = self.t.fix_window( 9. )
        self.assertEqual(f, 9.)
        self.assertEqual(t, 9.7)

        f,t = self.t.fix_window( 9.6 )
        self.assertEqual(f, 11.3)
        self.assertEqual(t, 14.6)

        f,t = self.t.fix_window( 14.6 )
        self.assertEqual(f, 17.8)
        self.assertEqual(t, 17.8)

        self.t.set_windelay( 10. )
        f,t = self.t.fix_window( 0 )
        self.assertEqual(f, 1.5)
        self.assertEqual(t, 4.5)

        self.t.set_windelay( 2. )
        f,t = self.t.fix_window( 0 )
        self.assertEqual(f, 1.5)
        self.assertEqual(t, 3.5)

        self.t.set_windelay( 1. )
        f,t = self.t.fix_window( 0 )
        self.assertEqual(f, 1.5)
        self.assertEqual(t, 2.5)

    # ------------------------------------------------------------------------

    def test_holes(self):
        self.t = AnchorTier()
        self.t.set_duration(17.8)
        self.t.set_windelay(4.)
        self.t.set_extdelay(1.)
        self.t.set_outdelay(0.2)

        self.t.Append( Annotation(TimeInterval(TimePoint(0.),  TimePoint(1.5)),  Label("#")))
        self.t.Append( Annotation(TimeInterval(TimePoint(1.5), TimePoint(2.)),   Label(Text(0,data_type="int"))))
        self.t.Append( Annotation(TimeInterval(TimePoint(3.),  TimePoint(3.5)),  Label(Text(-1,data_type="int"))))
        self.t.Append( Annotation(TimeInterval(TimePoint(4.5), TimePoint(6.3)),  Label("#")))
        self.t.Append( Annotation(TimeInterval(TimePoint(7.),  TimePoint(8.)),   Label(Text(8,data_type="int"))))
        self.t.Append( Annotation(TimeInterval(TimePoint(11.3),TimePoint(12.)),  Label(Text(-1,data_type="int"))))
        self.t.Append( Annotation(TimeInterval(TimePoint(13.), TimePoint(13.5)), Label(Text(18,data_type="int"))))
        self.t.Append( Annotation(TimeInterval(TimePoint(14.), TimePoint(14.6)), Label(Text(20,data_type="int"))))
        self.t.Append( Annotation(TimeInterval(TimePoint(14.6), TimePoint(15.6)), Label(Text(21,data_type="int"))))

        self.assertTrue(self.t.check_holes_ntokens(10))
        self.assertFalse(self.t.check_holes_ntokens(9))

        self.assertEqual(self.t.fill_evident_holes(),1)

    # ------------------------------------------------------------------------

    def test_near(self):
        self.t = AnchorTier()
        self.t.set_duration(17.8)
        self.t.set_windelay(4.)
        self.t.set_extdelay(1.)
        self.t.set_outdelay(0.2)

        self.assertIsNone( self.t.near_indexed_anchor(1., -1) )

        self.t.Append( Annotation(TimeInterval(TimePoint(0.),  TimePoint(1.5)),  Label("#")))
        self.t.Append( Annotation(TimeInterval(TimePoint(1.5), TimePoint(2.)),   Label(Text(1,data_type="int"))))
        self.t.Append( Annotation(TimeInterval(TimePoint(3.),  TimePoint(3.5)),  Label(Text(-1,data_type="int"))))
        self.t.Append( Annotation(TimeInterval(TimePoint(4.5), TimePoint(6.3)),  Label("#")))
        self.t.Append( Annotation(TimeInterval(TimePoint(7.),  TimePoint(8.)),   Label(Text(2,data_type="int"))))
        self.t.Append( Annotation(TimeInterval(TimePoint(9.7), TimePoint(11.3)), Label("#")))
        self.t.Append( Annotation(TimeInterval(TimePoint(11.3),TimePoint(12.)),  Label(Text(-1,data_type="int"))))
        self.t.Append( Annotation(TimeInterval(TimePoint(14.), TimePoint(14.6)), Label(Text(3,data_type="int"))))
        self.t.Append( Annotation(TimeInterval(TimePoint(14.6),TimePoint(17.8)), Label("#")))

        a = self.t.Near( 13., 1 )
        self.assertEqual( a, 7 )
        a = self.t.Near( 17., 1 )
        self.assertEqual( a, -1 )
        a = self.t.Near( 17.8, 1 )
        self.assertEqual( a, -1 )

        a = self.t.near_indexed_anchor(1., 1)
        self.assertEqual( a.GetLabel().GetTypedValue(),1 )
        a = self.t.near_indexed_anchor(1.5, 1)
        self.assertEqual( a.GetLabel().GetTypedValue(),1 )
        a = self.t.near_indexed_anchor(2., 1)
        self.assertEqual( a.GetLabel().GetTypedValue(),2 )
        a = self.t.near_indexed_anchor(11., 1)
        self.assertEqual( a.GetLabel().GetTypedValue(),3 )
        a = self.t.near_indexed_anchor(15., 1)
        self.assertIsNone( a )

        a = self.t.near_indexed_anchor(1., -1)
        self.assertIsNone( a )
        a = self.t.near_indexed_anchor(3., -1)
        self.assertEqual( a.GetLabel().GetTypedValue(),1 )
        a = self.t.near_indexed_anchor(3.5, -1)
        self.assertEqual( a.GetLabel().GetTypedValue(),1 )
        a = self.t.near_indexed_anchor(5., -1)
        self.assertEqual( a.GetLabel().GetTypedValue(),1 )
        a = self.t.near_indexed_anchor(7., -1)
        self.assertEqual( a.GetLabel().GetTypedValue(),1 )
        a = self.t.near_indexed_anchor(8., -1)
        self.assertEqual( a.GetLabel().GetTypedValue(),2 )
        a = self.t.near_indexed_anchor(9.7, -1)
        self.assertEqual( a.GetLabel().GetTypedValue(),2 )
        a = self.t.near_indexed_anchor(11., -1)
        self.assertEqual( a.GetLabel().GetTypedValue(),2 )
        a = self.t.near_indexed_anchor(12., -1)
        self.assertEqual( a.GetLabel().GetTypedValue(),2 )
        a = self.t.near_indexed_anchor(14., -1)
        self.assertEqual( a.GetLabel().GetTypedValue(),2 )
        a = self.t.near_indexed_anchor(18., -1)
        self.assertEqual( a.GetLabel().GetTypedValue(),3 )

    def test_export(self):
        self.t = AnchorTier()
        self.t.set_duration(17.8)
        self.t.set_windelay(4.)
        self.t.set_extdelay(1.)
        self.t.set_outdelay(0.2)
        #self.t.Append( Annotation(TimeInterval(TimePoint(0.),  TimePoint(1.)),   Label("#")))
        self.t.Append( Annotation(TimeInterval(TimePoint(1.5), TimePoint(2.)),   Label(Text(3,data_type="int"))))
        self.t.Append( Annotation(TimeInterval(TimePoint(4.5), TimePoint(6.3)),  Label("#")))
        self.t.Append( Annotation(TimeInterval(TimePoint(7.),  TimePoint(8.)),   Label(Text(8,data_type="int"))))
        self.t.Append( Annotation(TimeInterval(TimePoint(13.), TimePoint(13.5)), Label(Text(12,data_type="int"))))
        self.t.Append( Annotation(TimeInterval(TimePoint(14.), TimePoint(14.6)), Label(Text(13,data_type="int"))))
        self.t.Append( Annotation(TimeInterval(TimePoint(14.6), TimePoint(15.2)), Label(Text(14,data_type="int"))))
        self.t.Append( Annotation(TimeInterval(TimePoint(15.2), TimePoint(15.6)), Label("#")))
        self.t.Append( Annotation(TimeInterval(TimePoint(16.), TimePoint(16.60)), Label(Text(16,data_type="int"))))
        self.t.Append( Annotation(TimeInterval(TimePoint(16.63), TimePoint(17.)), Label(Text(17,data_type="int"))))

        toklist=[u"w0", u"w1", u"w2", u"w3", u"w4", u"w5", u"w6", u"w7", u"w8", u"w9", u"w10", u"w11", u"w12", u"w13", u"w14", u"w15", u"w16", u"w17"]

        newtier = self.t.export(toklist)
コード例 #2
0
ファイル: chunks.py プロジェクト: brigittebigi/sppas
    def create_chunks(self, inputaudio, phontier, toktier, diralign):
        """
        Create time-aligned tiers from raw intput tiers.

        @param phontier (Tier - IN) the tier with phonetization
        @param toktier  (Tier - IN) the tier with tokenization to split
        @param diralign (str - IN) the directory to work.

        """
        trsoutput = Transcription("Chunks")

        # Extract the audio channel
        channel = autils.extract_audio_channel( inputaudio,0 )
        channel = autils.format_channel( channel,16000,2 )

        # Extract the lists of tokens and their corresponding pronunciations
        pronlist = self._tier2raw( phontier,map=True ).split()
        toklist  = self._tier2raw( toktier, map=False ).split()
        if len(pronlist) != len(toklist):
            raise IOError("Inconsistency between the number of items in phonetization %d and tokenization %d."%(len(pronlist),len(toklist)))

        # At a first stage, we'll find anchors.
        anchortier = AnchorTier()
        anchortier.set_duration( channel.get_duration() )
        anchortier.set_extdelay(1.)
        anchortier.set_outdelay(0.5)

        # Search silences and use them as anchors.
        if self.SILENCES is True:
            anchortier.append_silences( channel )

        # Estimates the speaking rate (amount of tokens/sec. in average)
        self._spkrate.eval_from_duration( channel.get_duration(), len(toklist) )

        # Multi-pass ASR to find anchors
        A = -1      # number of anchors in the preceding pass
        N = self.N  # decreasing N-gram value
        W = self.W  # decreasing window length

        while A != anchortier.GetSize() and anchortier.check_holes_ntokens( self.NBT ) is False:

            anchortier.set_windelay( W )
            A = anchortier.GetSize()

            logging.debug(" =========================================================== ")
            logging.debug(" Number of anchors: %d"%A)
            logging.debug(" N: %d"%N)
            logging.debug(" W: %d"%W)

            # perform ASR and append new anchors in the anchor tier (if any)
            self._asr(toklist, pronlist, anchortier, channel, diralign, N)

            # append the anchor tier as intermediate result
            if self.ANCHORS is True and A != anchortier.GetSize():
                self._append_tier(anchortier,trsoutput)
                annotationdata.io.write( os.path.join(diralign,"ANCHORS-%d.xra"%anchortier.GetSize()),trsoutput )

            # prepare next pass
            W = max(W-1., self.WMIN)
            N = max(N-1,  self.NMIN)

        # Then, anchors are exported as tracks.
        tiert = anchortier.export(toklist)
        tiert.SetName("Chunks-Tokenized")
        tierp = anchortier.export(pronlist)
        tierp.SetName("Chunks-Phonetized")
        trsoutput.Append(tiert)
        trsoutput.Append(tierp)

        return trsoutput