Beispiel #1
0
    def __call__(self, chunk):
        from src.Sequencer import TextChunk

        chunk = self._duplicate_chunk(chunk)
        result = [chunk]

        if isinstance(chunk, TextChunk) and chunk.language == 'japanese':
            chunk.printable = False
            tokens = self.tokenize(chunk.text)
            for t in tokens:
                result.append(
                    TextChunk(text=t[0],
                              language='japanese',
                              audible=False,
                              printable=True,
                              final=True))
                if len(t) > 1:
                    text = f' ({t[1]}) '
                    result.append(
                        TextChunk(text=text,
                                  language='japanese',
                                  audible=False,
                                  printable=True,
                                  final=True))

        return result
Beispiel #2
0
 def test_promote(self):
     from src.Sequencer import TextChunk, SpeechChunk
     a = TextChunk(text='財布の中に何もありません',
                   language='japanese',
                   audible=True,
                   printable=True,
                   final=False)
     b = a.promote(SpeechChunk, volume=75)
Beispiel #3
0
    def __call__(self, chunk):
        from src.Sequencer import TextChunk, SpeechChunk, JingleChunk

        result = list()
        original = chunk

        if isinstance(chunk, TextChunk):
            current_language = None
            buffer = ''

            n_last = len(chunk.text) - 1
            n_cuts = 0
            for n, char in enumerate(chunk.text):
                language = self._get_language(char)
                if current_language is None:
                    current_language = language
                elif current_language != language and language is not None or n == n_last:
                    if n_cuts:
                        result.append(JingleChunk(jingle='silence'))
                    text = buffer + char if n == n_last else buffer
                    changes = {"text": text, "language": current_language}
                    if isinstance(original, SpeechChunk):
                        voice = chunk.voice if chunk.language == current_language else None
                        evolved = evolve(chunk, **changes, voice=voice)
                        result.append(evolved)
                    else:
                        result.append(TextChunk(**changes))
                    current_language = language
                    buffer = ''
                    n_cuts += 1
                buffer += char
        else:
            result.append(self._duplicate_chunk(chunk))

        return result
Beispiel #4
0
    def __call__(self, chunk):
        chunk = self._duplicate_chunk(chunk)
        chunk.final = True

        result = [chunk]
        text = self.tokenize(chunk.text)
        result.append(
            TextChunk(text=text,
                      language='japanese',
                      audible=False,
                      printable=True,
                      final=True))
        return result
Beispiel #5
0
    def test_abbr(self):
        from src.Sequencer import TextChunk, ChunkProcessor
        from src.filter.ExpandContractions import ExpandContractions
        from src.filter.StubFinalizer import StubFinalizer

        a = TextChunk(text='to listen up to smb. and smth.',
                      language='english',
                      audible=True,
                      printable=True,
                      final=False)
        p0 = ChunkProcessor(filters=[ExpandContractions(), StubFinalizer()])
        result = p0.apply_filters(a)
        assert all(
            map(lambda s: s in result[0].text, ['somebody', 'something']))
Beispiel #6
0
 def search_excerpt(self, app_config, foreign_name, word):
     if ' ' in word or foreign_name not in app_config['phraseExamples']:
         return None
     example = self.get_excerpt(word, foreign_name)
     if example is None:
         # Try to find an example for lemmatized (stemmed) form
         base_forms = self.lemmatize(word)
         for base_form in base_forms:
             example = self.get_excerpt(base_form, foreign_name)
             if example:
                 break
     return [
         JingleChunk(jingle='excerpt'),
         JingleChunk(jingle='silence'),
         TextChunk(text=example, language=foreign_name),
         JingleChunk(jingle='silence_long')
     ] if example else None
Beispiel #7
0
    def test_split_mixed_languages(self):
        from src.Sequencer import TextChunk, ChunkProcessor, JingleChunk
        from src.filter.AddVoice import AddVoice
        from src.filter.SplitMixedLanguages import SplitMixedLanguages
        from src.filter.StubFinalizer import StubFinalizer

        a = TextChunk(text='hi there привет こんにちは',
                      language='english',
                      audible=True,
                      printable=True,
                      final=False)
        p0 = ChunkProcessor(
            filters=[SplitMixedLanguages(
            ), AddVoice(), StubFinalizer()])
        result = p0.apply_filters(a)
        assert result[0].language == 'english'
        self.assertIsInstance(result[1], JingleChunk)
        assert result[2].language == 'russian'
        self.assertIsInstance(result[3], JingleChunk)
        assert result[4].language == 'japanese'
Beispiel #8
0
    def __call__(self, chunk):
        from src.Sequencer import JingleChunk, TextChunk

        chunk = self._duplicate_chunk(chunk)
        result = [chunk]

        if self._needs_process(chunk.text, chunk.language):
            result.append(JingleChunk(jingle='silence_long', printable=False))
            result.append(JingleChunk(jingle='by_letter', printable=False))
            result.append(JingleChunk(jingle='silence', printable=False))
            for letter in chunk.text:
                result.append(
                    JingleChunk(jingle='silence_short', printable=False))
                if letter.isspace():
                    result.append(JingleChunk(jingle='space', printable=False))
                else:
                    result.append(
                        TextChunk(text=letter,
                                  language='english',
                                  audible=True,
                                  printable=False,
                                  final=True))
        return result
Beispiel #9
0
    def __call__(self, chunk):
        from src.Sequencer import TextChunk, JingleChunk

        chunk = self._duplicate_chunk(chunk)
        result = [chunk]

        if not isinstance(chunk, TextChunk) or chunk.language != 'japanese':
            return result

        explanations = self._get_explanations(chunk.text)

        result.append(
            TextChunk(text='[', audible=False, printable=True, final=True))

        for k, ons, kuns, explanation in explanations:
            result.append(
                TextChunk(text=k,
                          language='japanese',
                          audible=False,
                          printable=True,
                          final=True))

            result.append(
                TextChunk(text='on',
                          language='english',
                          audible=True,
                          printable=False,
                          final=True))
            result.append(JingleChunk(jingle='silence'))
            for on in ons:
                result.append(
                    TextChunk(text=on,
                              language='japanese',
                              audible=True,
                              printable=True,
                              final=True))
                result.append(JingleChunk(jingle='silence'))
                result.append(
                    TextChunk(text='、',
                              audible=False,
                              printable=True,
                              final=True))

            result.append(
                TextChunk(text='koon',
                          language='english',
                          audible=True,
                          printable=False,
                          final=True))
            result.append(JingleChunk(jingle='silence'))
            for kun in kuns:
                result.append(
                    TextChunk(text=kun,
                              language='japanese',
                              audible=True,
                              printable=True,
                              final=True))
                result.append(JingleChunk(jingle='silence'))
                result.append(
                    TextChunk(text='、',
                              audible=False,
                              printable=True,
                              final=True))

            result.append(JingleChunk(jingle='definition'))
            result.append(
                TextChunk(text=explanation,
                          language='english',
                          audible=True,
                          printable=True,
                          final=True))

        result.append(
            TextChunk(text=']', audible=False, printable=True, final=True))
        result.append(
            JingleChunk(jingle='silence_long',
                        audible=False,
                        printable=True,
                        final=True))

        return result
Beispiel #10
0
 def example_sequence(foreign, native, pair):
     return [
         TextChunk(text=pair[0].strip(), language=foreign),
         JingleChunk(jingle='silence'),
         TextChunk(text=pair[1].strip(), language=native)
     ]
Beispiel #11
0
 def chunk_factory(*, language, text):
     return TextChunk(text=text, language=language)
Beispiel #12
0
    def test_chunk_preprocessing(self):
        from src.Sequencer import ChunkProcessor, TextChunk
        from src.filter.AddFurigana import AddFurigana
        from src.filter.ExplainJapaneseSentences import ExplainJapaneseSentences
        from src.filter.ExplainKanji import ExplainKanji
        from src.filter.PronounceByLetter import PronounceByLetter
        from src.filter.SimilarKanji import jp_reverse
        from src.filter.TidyUpEnglish import TidyUpEnglish
        from src.filter.StubFinalizer import StubFinalizer
        from src.filter.TidyUpText import TidyUpText

        c0 = TextChunk(text='"some guy\'s bad text {',
                       language='english',
                       audible=True,
                       printable=True,
                       final=False)
        p0 = ChunkProcessor(filters=[TidyUpEnglish(), StubFinalizer()])
        result0 = p0.apply_filters(c0)

        c1 = TextChunk(text='知りません',
                       language='japanese',
                       audible=False,
                       printable=True,
                       final=False)
        p1 = ChunkProcessor(filters=[ExplainKanji(), StubFinalizer()])
        result1 = p1.apply_filters(c1)

        c2 = TextChunk(text='faux pas',
                       language='english',
                       audible=True,
                       printable=True,
                       final=False)
        p2 = ChunkProcessor(filters=[PronounceByLetter(), StubFinalizer()])
        result2 = p2.apply_filters(c2)

        c3 = TextChunk(text='財布の中に何もありません',
                       language='japanese',
                       audible=True,
                       printable=True,
                       final=False)
        p3 = ChunkProcessor(
            filters=[ExplainJapaneseSentences(),
                     StubFinalizer()])
        result3 = p3.apply_filters(c3)

        c4 = TextChunk(text='財布の中に何もありません',
                       language='japanese',
                       audible=True,
                       printable=True,
                       final=False)
        p4 = ChunkProcessor(filters=[AddFurigana(), StubFinalizer()])
        result4 = p4.apply_filters(c4)

        r = jp_reverse('知')

        c5 = TextChunk(text='/in brackets/some_bad_formatting{going}here()',
                       language='japanese',
                       audible=True,
                       printable=True,
                       final=False)
        p5 = ChunkProcessor(filters=[TidyUpText(), StubFinalizer()])
        result5 = p5.apply_filters(c5)

        pass