Ejemplo n.º 1
0
    def test_to_file(self):
        for codec in ['utf-8', 'latin_1', 'mac_roman']:
            self.tg = TextGrid(xmax=20)
            tier1 = self.tg.add_tier('tier')
            tier1.add_interval(1, 2, 'i1')
            tier1.add_interval(2, 3, 'i2')
            tier1.add_interval(4, 5, 'i3')

            tier4 = self.tg.add_tier('tier')
            tier4.add_interval(1, 2, u'i1ü')
            tier4.add_interval(2.0, 3, 'i2')
            tier4.add_interval(4, 5.0, 'i3')

            tier2 = self.tg.add_tier('tier2', tier_type='TextTier')
            tier2.add_point(1, u'p1ü')
            tier2.add_point(2, 'p1')
            tier2.add_point(3, 'p1')

            tempf = tempfile.mkstemp()[1]

            # Normal mode
            self.tg.to_file(tempf, codec=codec)
            TextGrid(tempf, codec=codec)
            # Short mode
            self.tg.to_file(tempf, codec=codec, mode='s')
            TextGrid(tempf, codec=codec)
            # Binary mode
            self.tg.to_file(tempf, mode='b')
            TextGrid(tempf)

            os.remove(tempf)
Ejemplo n.º 2
0
def test_to_file(codec, tmp_path):
    tg = TextGrid(xmax=20)
    tier1 = tg.add_tier('tier')
    tier1.add_interval(1, 2, 'i1')
    tier1.add_interval(2, 3, 'i2')
    tier1.add_interval(4, 5, 'i3')

    tier4 = tg.add_tier('tier')
    tier4.add_interval(1, 2, u'i1ü')
    tier4.add_interval(2.0, 3, 'i2')
    tier4.add_interval(4, 5.0, 'i3')

    tier2 = tg.add_tier('tier2', tier_type='TextTier')
    tier2.add_point(1, u'p1ü')
    tier2.add_point(2, 'p1')
    tier2.add_point(3, 'p1')

    tempf = str(tmp_path / 'test')

    # Normal mode
    tg.to_file(pathlib.Path(tempf), codec=codec)
    TextGrid(tempf, codec=codec)
    # Short mode
    tg.to_file(tempf, codec=codec, mode='s')
    TextGrid(tempf, codec=codec)
    # Binary mode
    tg.to_file(tempf, mode='b')
    TextGrid(tempf)
Ejemplo n.º 3
0
def write_to_text_grid(words: List[dict], sentences: List[dict],
                       duration: float):
    """Write results to Praat TextGrid. Because we are using pympi, we can also export to Elan EAF.

    Args:
        words (List[dict]): List of word times containing start, end, and value keys
        sentences (List[dict]): List of sentence times containing start, end, and value keys
        duration (float): duration of entire audio

    Returns:
        TextGrid: Praat TextGrid with word and sentence alignments
    """
    text_grid = TextGrid(xmax=duration)
    sentence_tier = text_grid.add_tier(name="Sentence")
    word_tier = text_grid.add_tier(name="Word")
    for s in sentences:
        sentence_tier.add_interval(
            begin=s[0]["start"],
            end=s[-1]["end"],
            value=" ".join([w["text"] for w in s]),
        )

    for w in words:
        word_tier.add_interval(begin=w["start"], end=w["end"], value=w["text"])

    return text_grid
Ejemplo n.º 4
0
    def toTextGrid(self, filePath, excludedTiers=[], includedTiers=[]):
        """
        Convert the elan file to praat's TextGrid, returns 0 if succesfull<br />
<br />
        filePath      -- The output file path - for stdout<br />
        excludedTiers -- Tiers to exclude<br />
        includedTiers -- Tiers to include if empty all tiers are included"""
        try:
            from pympi.Praat import TextGrid
        except ImportError:
            warnings.warn(
                'Please install the pympi.Praat module from the py' +
                'mpi module found at https://github.com/dopefishh/pympi')
            return 1
        tgout = TextGrid()
        for tier in [
                a for a in self.tiers if a not in excludedTiers and (
                    not includedTiers or a in includedTiers)
        ]:
            currentTier = tgout.addTier(tier)
            for interval in self.getAnnotationDataForTier(tier):
                if interval[0] == interval[1]:
                    continue
                currentTier.addInterval(interval[0] / 1000.0,
                                        interval[1] / 1000.0, interval[2])
        tgout.tofile(filePath)
        return 0
Ejemplo n.º 5
0
 def setUp(self):
     self.tg = TextGrid(xmax=20)
     self.maxdiff = None
Ejemplo n.º 6
0
    def test_to_file(self):
        for codec in ['utf-8', 'utf-16', 'latin_1', 'mac_roman']:
            self.tg = TextGrid(xmax=20)
            tier1 = self.tg.add_tier('tier')
            tier1.add_interval(1, 2, 'i1')
            tier1.add_interval(2, 3, 'i2')
            tier1.add_interval(4, 5, 'i3')

            tier4 = self.tg.add_tier('tier')
            tier4.add_interval(1, 2, u'i1ü')
            tier4.add_interval(2.0, 3, 'i2')
            tier4.add_interval(4, 5.0, 'i3')

            tier2 = self.tg.add_tier('tier2', tier_type='TextTier')
            tier2.add_point(1, u'p1ü')
            tier2.add_point(2, 'p1')
            tier2.add_point(3, 'p1')

            # Normal mode
            tgfile = io.StringIO()
            self.tg.to_stream(tgfile, codec=codec)
            tgfile.seek(0)
            tg1 = tgfile.read()
            tgfile.seek(0)
            self.tg = TextGrid(tgfile, codec=codec, stream=True)

            tgfile = io.StringIO()
            self.tg.to_stream(tgfile, codec=codec)
            tgfile.seek(0)
            tg2 = tgfile.read()
            tgfile.seek(0)

            self.assertEqual(tg2, tg1)

            # Short mode
            tgfile = io.StringIO()
            self.tg.to_stream(tgfile, codec=codec, mode='s')
            tgfile.seek(0)
            tg1 = tgfile.read()
            tgfile.seek(0)
            self.tg = TextGrid(tgfile, codec=codec, stream=True)

            tgfile = io.StringIO()
            self.tg.to_stream(tgfile, codec=codec, mode='s')
            tgfile.seek(0)
            tg2 = tgfile.read()
            tgfile.seek(0)

            self.assertEqual(tg2, tg1)

            # Binary mode
            tgfile = io.BytesIO()
            self.tg.to_stream(tgfile, codec=codec, mode='b')
            tgfile.seek(0)
            tg1 = tgfile.read()
            tgfile.seek(0)
            self.tg = TextGrid(tgfile, codec=codec, stream=True)

            tgfile = io.BytesIO()
            self.tg.to_stream(tgfile, codec=codec, mode='b')
            tgfile.seek(0)
            tg2 = tgfile.read()
            tgfile.seek(0)

            self.assertEqual(tg2, tg1)