예제 #1
0
def test_word_transcription(graph_db, textgrid_test_dir):
    with CorpusContext("discourse_textgrid", **graph_db) as c:
        c.reset()
        path = os.path.join(textgrid_test_dir, 'acoustic_corpus.TextGrid')
        parser = inspect_textgrid(path)
        c.load(parser, path)
        assert (c.hierarchy.has_type_property('word', 'transcription'))
예제 #2
0
def test_load_pronunciation(textgrid_test_dir, graph_db):
    path = os.path.join(textgrid_test_dir, 'pronunc_variants_corpus.TextGrid')

    with CorpusContext('test_pronunc', **graph_db) as c:
        c.reset()
        parser = inspect_textgrid(path)
        c.load(parser, path)
    def run_query(self):
        time.sleep(0.1)
        name = self.kwargs['name']
        directory = self.kwargs['directory']
        reset = True
        config = CorpusConfig(name, graph_host = 'localhost', graph_port = 7474)
        with CorpusContext(config) as c:
            if name == 'buckeye':
                parser = inspect_buckeye(directory)
            elif name == 'timit':
                parser = inspect_timit(directory)
            elif name == 'partitur':
                parser = inspect_partitur(directory)
            else:
                form = guess_textgrid_format(directory)
                if form == 'labbcat':
                    parser = inspect_labbcat(directory)
                elif form == 'mfa':
                    parser = inspect_mfa(directory)
                elif form == 'fave':
                    parser = inspect_fave(directory)
                else:
                    parser = inspect_textgrid(directory)

            parser.call_back = self.kwargs['call_back']
            parser.stop_check = self.kwargs['stop_check']
            parser.call_back('Resetting corpus...')
            if reset:
                c.reset(call_back = self.kwargs['call_back'], stop_check = self.kwargs['stop_check'])
            could_not_parse = c.load(parser, directory)
            self.actionCompleted.emit('importing corpus') 
        return could_not_parse
예제 #4
0
def test_directory(textgrid_test_dir, graph_db):
    path = os.path.join(textgrid_test_dir, 'phone_word.TextGrid')
    with CorpusContext('test_textgrid_directory', **graph_db) as c:
        c.reset()
        parser = inspect_textgrid(path)
        unparsed = c.load(parser, textgrid_test_dir)
        assert (len(unparsed) > 0)
예제 #5
0
    def run_query(self):
        time.sleep(0.1)
        name = self.kwargs['name']
        directory = self.kwargs['directory']
        reset = True
        config = CorpusConfig(name, graph_host='localhost', graph_port=7474)
        with CorpusContext(config) as c:
            if name == 'buckeye':
                parser = inspect_buckeye(directory)
            elif name == 'timit':
                parser = inspect_timit(directory)
            else:
                form = guess_textgrid_format(directory)
                if form == 'labbcat':
                    parser = inspect_labbcat(directory)
                elif form == 'mfa':
                    parser = inspect_mfa(directory)
                elif form == 'fave':
                    parser = inspect_fave(directory)
                else:
                    parser = inspect_textgrid(directory)

            parser.call_back = self.kwargs['call_back']
            parser.stop_check = self.kwargs['stop_check']
            parser.call_back('Resetting corpus...')
            if reset:
                c.reset(call_back=self.kwargs['call_back'],
                        stop_check=self.kwargs['stop_check'])
            could_not_parse = c.load(parser, directory)
        return could_not_parse
예제 #6
0
def test_directory(textgrid_test_dir, graph_db):
    path = os.path.join(textgrid_test_dir, 'phone_word.TextGrid')
    with CorpusContext('test_textgrid_directory', **graph_db) as c:
        c.reset()
        parser = inspect_textgrid(path)
        unparsed = c.load(parser, textgrid_test_dir)
        assert(len(unparsed) > 0)
예제 #7
0
def acoustic_config(graph_db, textgrid_test_dir):
    config = CorpusConfig("acoustic", **graph_db)

    acoustic_path = os.path.join(textgrid_test_dir, "acoustic_corpus.TextGrid")
    with CorpusContext(config) as c:
        c.reset()
        parser = inspect_textgrid(acoustic_path)
        c.load(parser, acoustic_path)
    return config
def acoustic_config(graph_db, textgrid_test_dir):
    config = CorpusConfig('acoustic', **graph_db)

    acoustic_path = os.path.join(textgrid_test_dir, 'acoustic_corpus.TextGrid')
    with CorpusContext(config) as c:
        c.reset()
        parser = inspect_textgrid(acoustic_path)
        c.load(parser, acoustic_path)
        #c.analyze_acoustics()
    return config
예제 #9
0
def summarized_config(graph_db, textgrid_test_dir):
    config = CorpusConfig('summarized', **graph_db)

    acoustic_path = os.path.join(textgrid_test_dir, 'acoustic_corpus.TextGrid')
    with CorpusContext(config) as c:
        c.reset()
        parser = inspect_textgrid(acoustic_path)
        c.load(parser, acoustic_path)

    return config
예제 #10
0
def test_load(textgrid_test_dir, graph_db):
    path = os.path.join(textgrid_test_dir, 'phone_word.TextGrid')
    with CorpusContext('test_textgrid', **graph_db) as c:
        parser = inspect_textgrid(path)
        parser.annotation_types[1].linguistic_type = 'word'
        parser.annotation_types[2].ignored = True
        parser.hierarchy['word'] = None
        parser.hierarchy['phone'] = 'word'
        print([(x.linguistic_type, x.name) for x in parser.annotation_types])
        c.load(parser, path)
예제 #11
0
def acoustic_config(graph_db, textgrid_test_dir):
    config = CorpusConfig('acoustic', **graph_db)

    acoustic_path = os.path.join(textgrid_test_dir, 'acoustic_corpus.TextGrid')
    with CorpusContext(config) as c:
        c.reset()
        parser = inspect_textgrid(acoustic_path)
        c.load(parser, acoustic_path)
    config.pitch_algorithm = 'acousticsim'
    config.formant_source = 'acousticsim'
    return config
예제 #12
0
def acoustic_config(graph_db, textgrid_test_dir):
    config = CorpusConfig('acoustic', **graph_db)

    acoustic_path = os.path.join(textgrid_test_dir, 'acoustic_corpus.TextGrid')
    with CorpusContext(config) as c:
        c.reset()
        parser = inspect_textgrid(acoustic_path)
        c.load(parser, acoustic_path)
    config.pitch_algorithm = 'acousticsim'
    config.formant_algorithm = 'acousticsim'
    return config
예제 #13
0
def test_load(textgrid_test_dir, graph_db):
    path = os.path.join(textgrid_test_dir, 'phone_word.TextGrid')
    with CorpusContext('test_textgrid', **graph_db) as c:
        c.reset()
        parser = inspect_textgrid(path)
        parser.annotation_tiers[1].linguistic_type = 'word'
        parser.annotation_tiers[2].ignored = True
        parser.hierarchy['word'] = None
        parser.hierarchy['phone'] = 'word'
        print([(x.linguistic_type, x.name) for x in parser.annotation_tiers])
        c.load(parser, path)
예제 #14
0
def test_load_pronunciation_ignore(textgrid_test_dir, graph_db):
    path = os.path.join(textgrid_test_dir, 'pronunc_variants_corpus.TextGrid')
    with CorpusContext('test_pronunc', **graph_db) as c:
        c.reset()
        parser = inspect_textgrid(path)
        parser.annotation_tiers[1].ignored = True
        parser.annotation_tiers[2].ignored = True
        c.load(parser, path)

        with pytest.raises(GraphQueryError):
            q = c.query_graph(c.actualPron)
            results = q.all()
예제 #15
0
def test_load_pronunciation_ignore(textgrid_test_dir, graph_db):
    path = os.path.join(textgrid_test_dir, 'pronunc_variants_corpus.TextGrid')
    with CorpusContext('test_pronunc', **graph_db) as c:
        c.reset()
        parser = inspect_textgrid(path)
        parser.annotation_types[1].ignored = True
        parser.annotation_types[2].ignored = True
        c.load(parser, path)


        with pytest.raises(GraphQueryError):
            q = c.query_graph(c.actualPron)
            results = q.all()
예제 #16
0
def french_config(graph_db, textgrid_test_dir):
    config = CorpusConfig('french', **graph_db)

    french_path = os.path.join(textgrid_test_dir, 'FR001_5.TextGrid')
    with CorpusContext(config) as c:
        c.reset()
        parser = inspect_textgrid(french_path)
        c.load(parser, french_path)

        c.encode_pauses(['sil', '<SIL>'])
        c.encode_utterances(min_pause_length=.15)

    return config
예제 #17
0
def test_utterance_nosilence(graph_db, textgrid_test_dir):
    tg_path = os.path.join(textgrid_test_dir, 'phone_word_no_silence.TextGrid')
    with CorpusContext('word_phone_nosilence', **graph_db) as g:
        g.reset()
        parser = inspect_textgrid(tg_path)
        parser.annotation_types[0].linguistic_type = 'phone'
        parser.annotation_types[1].linguistic_type = 'word'
        parser.hierarchy['word'] = None
        parser.hierarchy['phone'] = 'word'
        g.load(parser, tg_path)

        g.encode_utterances()

        q = g.query_graph(g.word).filter(g.word.label == 'b')

        q = q.columns(g.word.following.label.column_name('following_word'))
        print(q.cypher())
        results = q.all()
        assert (len(results) == 1)
        assert (results[0]['following_word'] is None)

        q = g.query_graph(
            g.word).filter(g.word.begin == g.word.utterance.begin)

        results = q.all()

        assert (len(results) == 1)
        assert (results[0]['label'] == 'a')

        q = g.query_graph(
            g.phone).filter(g.phone.begin == g.phone.utterance.begin)

        results = q.all()

        assert (len(results) == 1)
        assert (results[0]['label'] == 'a')

        # Things like g.phone.word.following are currently broken in PolyglotDB
        return

        q = g.query_graph(g.phone).filter(g.phone.label == 'b')

        q = q.filter(g.phone.following.label == 'b')

        q = q.columns(
            g.phone.label, g.phone.id,
            g.phone.word.following.label.column_name('following_word'))
        print(q.cypher())
        results = q.all()
        assert (len(results) == 1)
        assert (results[0]['following_word'] is None)
예제 #18
0
def acoustic_utt_config(graph_db, textgrid_test_dir):
    config = CorpusConfig('acoustic utt', **graph_db)

    acoustic_path = os.path.join(textgrid_test_dir, 'acoustic_corpus.TextGrid')
    with CorpusContext(config) as c:
        c.reset()
        parser = inspect_textgrid(acoustic_path)
        c.load(parser, acoustic_path)

        c.encode_pauses(['sil'])
        c.encode_utterances(min_pause_length=0)

    config.pitch_algorithm = 'acousticsim'
    config.formant_algorithm = 'acousticsim'
    return config
예제 #19
0
def acoustic_utt_config(graph_db, textgrid_test_dir):
    config = CorpusConfig('acoustic utt', **graph_db)

    acoustic_path = os.path.join(textgrid_test_dir, 'acoustic_corpus.TextGrid')
    with CorpusContext(config) as c:
        c.reset()
        parser = inspect_textgrid(acoustic_path)
        c.load(parser, acoustic_path)

        c.encode_pauses(['sil'])
        c.encode_utterances(min_pause_length = 0)

    config.pitch_algorithm = 'acousticsim'
    config.formant_algorithm = 'acousticsim'
    return config
예제 #20
0
def test_utterance_nosilence(graph_db, textgrid_test_dir):
    tg_path = os.path.join(textgrid_test_dir, 'phone_word_no_silence.TextGrid')
    with CorpusContext('word_phone_nosilence', **graph_db) as g:
        g.reset()
        parser = inspect_textgrid(tg_path)
        parser.annotation_types[0].linguistic_type = 'phone'
        parser.annotation_types[1].linguistic_type = 'word'
        parser.hierarchy['word'] = None
        parser.hierarchy['phone'] = 'word'
        g.load(parser, tg_path)

        g.encode_utterances()

        q = g.query_graph(g.word).filter(g.word.label == 'b')

        q = q.columns(g.word.following.label.column_name('following_word'))
        print(q.cypher())
        results = q.all()
        assert(len(results) == 1)
        assert(results[0]['following_word'] is None)

        q = g.query_graph(g.word).filter(g.word.begin == g.word.utterance.begin)

        results = q.all()

        assert(len(results) == 1)
        assert(results[0]['label'] == 'a')

        q = g.query_graph(g.phone).filter(g.phone.begin == g.phone.utterance.begin)

        results = q.all()

        assert(len(results) == 1)
        assert(results[0]['label'] == 'a')

        #Things like g.phone.word.following are currently broken in PolyglotDB
        return

        q = g.query_graph(g.phone).filter(g.phone.label == 'b')

        q = q.filter(g.phone.following.label == 'b')

        q = q.columns(g.phone.label,g.phone.id,g.phone.word.following.label.column_name('following_word'))
        print(q.cypher())
        results = q.all()
        assert(len(results) == 1)
        assert(results[0]['following_word'] is None)
예제 #21
0
def test_load_pronunciation(textgrid_test_dir, graph_db):
    path = os.path.join(textgrid_test_dir, 'pronunc_variants_corpus.TextGrid')

    with CorpusContext('test_pronunc', **graph_db) as c:
        c.reset()
        parser = inspect_textgrid(path)
        parser.annotation_types[2].type_property = False
        c.load(parser, path)

        q = c.query_graph(c.words).filter(c.words.label == 'probably')
        q = q.order_by(c.words.begin)
        q = q.columns(c.words.label,
                c.words.dictionaryPron.column_name('dict_pron'),
                c.words.actualPron.column_name('act_pron'))
        results = q.all()
        assert(results[0]['dict_pron'] == 'p.r.aa.b.ah.b.l.iy')
        assert(results[0]['act_pron'] == 'p.r.aa.b.ah.b.l.iy')
예제 #22
0
def test_load_pronunciation(textgrid_test_dir, graph_db):
    path = os.path.join(textgrid_test_dir, 'pronunc_variants_corpus.TextGrid')

    with CorpusContext('test_pronunc', **graph_db) as c:
        c.reset()
        parser = inspect_textgrid(path)
        parser.annotation_tiers[2].type_property = False
        c.load(parser, path)

        q = c.query_graph(c.word).filter(c.word.label == 'probably')
        q = q.order_by(c.word.begin)
        q = q.columns(c.word.label,
                      c.word.dictionaryPron.column_name('dict_pron'),
                      c.word.actualPron.column_name('act_pron'))
        results = q.all()
        assert (results[0]['dict_pron'] == 'p.r.aa.b.ah.b.l.iy')
        assert (results[0]['act_pron'] == 'p.r.aa.b.ah.b.l.iy')
예제 #23
0
def test_utterance_oneword(graph_db, textgrid_test_dir):
    tg_path = os.path.join(textgrid_test_dir, 'one_word_no_silence.TextGrid')
    with CorpusContext('one_word_no_silence', **graph_db) as g:
        g.reset()
        parser = inspect_textgrid(tg_path)
        parser.annotation_types[0].linguistic_type = 'phone'
        parser.annotation_types[1].linguistic_type = 'word'
        parser.hierarchy['word'] = None
        parser.hierarchy['phone'] = 'word'
        g.load(parser, tg_path)

        g.encode_utterances()

        q = g.query_graph(g.utterance)

        res = q.all()

        assert (res[0].begin == 0)
예제 #24
0
def test_utterance_oneword(graph_db, textgrid_test_dir):
    tg_path = os.path.join(textgrid_test_dir, 'one_word_no_silence.TextGrid')
    with CorpusContext('one_word_no_silence', **graph_db) as g:
        g.reset()
        parser = inspect_textgrid(tg_path)
        parser.annotation_types[0].linguistic_type = 'phone'
        parser.annotation_types[1].linguistic_type = 'word'
        parser.hierarchy['word'] = None
        parser.hierarchy['phone'] = 'word'
        g.load(parser, tg_path)

        g.encode_utterances()

        q = g.query_graph(g.utterance)

        res = q.all()

        assert(res[0].begin == 0)
예제 #25
0
def test_load_discourse(graph_db, mfa_test_dir, textgrid_test_dir):
    test_file_path = os.path.join(mfa_test_dir, "mfa_test.TextGrid")
    acoustic_path = os.path.join(textgrid_test_dir, 'acoustic_corpus.TextGrid')
    mfa_parser = inspect_mfa(test_file_path)
    parser = inspect_textgrid(acoustic_path)
    with CorpusContext('load_remove_test', **graph_db) as c:
        c.reset()
        c.load_discourse(parser, acoustic_path)
        c.load_discourse(mfa_parser, test_file_path)

        syllabics = ['ER', 'AE', 'IH', 'EH', 'ae', 'ih', 'er', 'eh']
        c.encode_syllabic_segments(syllabics)
        c.encode_syllables()

        q = c.query_graph(c.word).filter(c.word.label == 'JURASSIC')
        assert q.count() > 0
        q = c.query_graph(c.phone).filter(c.phone.label == 'AE')
        assert q.count() > 0
        q = c.query_lexicon(c.syllable).filter(c.syllable.label == 'JH.ER')
        assert q.count() > 0

        q = c.query_lexicon(
            c.lexicon_word).filter(c.lexicon_word.label == 'JURASSIC')
        assert q.count() > 0
        q = c.query_lexicon(
            c.lexicon_phone).filter(c.lexicon_phone.label == 'AE')
        assert q.count() > 0
        q = c.query_lexicon(
            c.lexicon_phone).filter(c.lexicon_phone.label == 'ae')
        assert q.count() > 0
        q = c.query_lexicon(
            c.lexicon_syllable).filter(c.lexicon_syllable.label == 'JH.ER')
        assert q.count() > 0

        q = c.query_discourses().filter(c.discourse.name == 'mfa_test')
        assert q.count() > 0
        q = c.query_speakers().filter(c.speaker.name == 'mfa')
        assert q.count() > 0

        d = c.discourse_sound_file('acoustic_corpus')
        assert os.path.exists(d['consonant_file_path'])
예제 #26
0
def test_inspect_textgrid_directory(textgrid_test_dir):
    parser = inspect_textgrid(textgrid_test_dir)
    assert (len(parser.annotation_tiers) == 4)
    'user': '******',
    'password': '******'
}


def call_back(*args):
    args = [x for x in args if isinstance(x, str)]
    if args:
        print(' '.join(args))


reset = True

if reset:
    print("Getting annotation types..")
    parser = pgio.inspect_textgrid(path_to_gp)
    parser.speaker_parser = FilenameSpeakerParser(5)
    parser.call_back = print
    print('Loading corpus...')
    with CorpusContext('gp_thai', **graph_db) as c:
        c.reset()
        beg = time.time()
        c.load(parser, path_to_gp)
        end = time.time()
        print('Time taken: {}'.format(end - beg))

if __name__ == '__main__':
    with CorpusContext('gp_thai', **graph_db) as g:
        q = g.query_graph(g.phones).filter(g.phones.label == 'd')
        print(q.cypher())
        print(q.count())
예제 #28
0
def test_tobi(textgrid_test_dir):
    path = os.path.join(textgrid_test_dir, 'tobi.TextGrid')
    parser = inspect_textgrid(path)
    assert (isinstance(parser.annotation_tiers[0], TobiTier))
    assert (isinstance(parser.annotation_tiers[1], OrthographyTier))
예제 #29
0
def test_tobi(textgrid_test_dir):
    path = os.path.join(textgrid_test_dir, 'tobi.TextGrid')
    parser = inspect_textgrid(path)
    assert(isinstance(parser.annotation_types[0], TobiTier))
    assert(isinstance(parser.annotation_types[1], OrthographyTier))
예제 #30
0
def test_directory(textgrid_test_dir, graph_db):
    path = os.path.join(textgrid_test_dir, 'phone_word.TextGrid')
    with CorpusContext('test_textgrid_directory', **graph_db) as c:
        with pytest.raises(TextGridError):
            parser = inspect_textgrid(path)
            c.load(parser, textgrid_test_dir)
예제 #31
0
def test_inspect_textgrid_directory(textgrid_test_dir):
    parser = inspect_textgrid(textgrid_test_dir)
    assert(len(parser.annotation_types) == 4)
graph_db = {'host':'localhost', 'port': 7474,
            'user': '******', 'password': '******'}



def call_back(*args):
    args = [x for x in args if isinstance(x, str)]
    if args:
        print(' '.join(args))

reset = True

if reset:
    print("Getting annotation types..")
    parser = pgio.inspect_textgrid(path_to_gp)
    parser.speaker_parser = FilenameSpeakerParser(5)
    parser.call_back = print
    print('Loading corpus...')
    with CorpusContext('gp_thai', **graph_db) as c:
        c.reset()
        beg = time.time()
        c.load(parser, path_to_gp)
        end = time.time()
        print('Time taken: {}'.format(end - beg))


if __name__ == '__main__':
    with CorpusContext('gp_thai', **graph_db) as g:
        q = g.query_graph(g.phones).filter(g.phones.label == 'd')
        print(q.cypher())