Esempio n. 1
0
    def make_feature(self, txt):
        syllables = preprocessing.syllable_tokenize(txt)

        sy2ix, ch2ix = self.sy_dict, self.ch_dict

        ch_ix, ch_type_ix, syllable_ix = [], [], []

        for syllable in syllables:
            six = preprocessing.syllable2ix(sy2ix, syllable)

            characters = list(syllable)
            chs = list(
                map(
                    lambda ch: preprocessing.character2ix(ch2ix, ch),
                    characters,
                )
            )
            ch_ix.extend(chs)
            ch_type_ix.extend(char_type.get_char_type_ix(characters))
            syllable_ix.extend([six]*len(chs))

        features = np.stack((ch_ix, ch_type_ix, syllable_ix), axis=0) \
            .reshape((1, 3, -1)) \
            .astype(np.int64)

        seq_lengths = np.array([features.shape[-1]], dtype=np.int64)

        return list(txt), (torch.from_numpy(features), torch.from_numpy(seq_lengths))
Esempio n. 2
0
def test_long_txt_sequences(txt, expected, max_length):
    sequences = preprocessing.long_txt_to_sequences(txt, max_length=max_length)

    assert len(sequences) == len(expected)

    for _, s in zip(expected, sequences):
        syllables = preprocessing.syllable_tokenize(s.replace("|", ""))
        assert len(syllables) <= max_length
Esempio n. 3
0
def test_syllable_tokenize(txt, expected):
    act = preprocessing.syllable_tokenize(txt)
    exp = expected.split("~")

    print(f"actual: {act}")
    print(f"expected: {exp}")

    assert act == exp
Esempio n. 4
0
    def make_feature(self, txt):
        syllables = preprocessing.syllable_tokenize(txt)

        sy2ix = self.sy_dict

        syllable_ix = []

        for syllable in syllables:
            six = preprocessing.syllable2ix(sy2ix, syllable)
            syllable_ix.append(six)

        # dims: (len,)
        features = np.array(syllable_ix)\
            .astype(np.int64)\
            .reshape(-1)

        seq_lengths = np.array([features.shape[-1]], dtype=np.int64)
        
        features = torch.from_numpy(features)

        return syllables, (features, torch.from_numpy(seq_lengths))
Esempio n. 5
0
def test_syllable_tokenize(txt, expected):
    act = preprocessing.syllable_tokenize(txt)
    exp = expected.split("~")

    assert act == exp