Python CorpusContext Beispiele, speechtools.corpus.CorpusContext Python Beispiele

Beispiel #1

0

Datei anzeigen

def test_complex_query(acoustic_config):
    with CorpusContext(acoustic_config) as g:
        vowels = ['aa']
        obstruents = ['k']
        syllabics = ['aa', 'ih']
        q = g.query_graph(g.phone).filter(g.phone.label.in_(syllabics))
        q.set_type('syllabic')

        q = g.query_graph(g.phone).filter(g.phone.label.in_(vowels))
        q = q.filter(g.phone.following.label.in_(obstruents))
        #q = q.filter(g.phone.following.end == g.word.end)
        #q = q.filter(g.word.end == g.utterance.end)

        q = q.clear_columns().columns(
            g.phone.label.column_name('vowel'),
            g.phone.duration.column_name('vowel_duration'),
            g.phone.begin.column_name('vowel_begin'),
            g.phone.end.column_name('vowel_end'),
            g.utterance.phone.rate.column_name('phone_rate'),
            g.word.phone.count.column_name('num_segments_in_word'),
            g.word.phone.subset_type('syllabic').count.column_name(
                'num_syllables_in_word'),
            g.word.discourse.column_name('discourse'),
            g.word.label.column_name('word'),
            g.word.transcription.column_name('word_transcription'),
            g.word.following.label.column_name('following_word'),
            g.word.following.duration.column_name('following_word_duration'),
            g.pause.following.duration.column_name('following_pause_duration'),
            g.phone.following.label.column_name('following_phone'))
        q.order_by(g.word.begin)
        print(q.cypher())
        results = q.all()
        assert (len(results) == 2)
        assert (results[0].num_segments_in_word == 5)
        assert (results[0].num_syllables_in_word == 2)

Beispiel #2

0

Datei anzeigen

def test_add_default_voicing_annotations(acoustic_config):
    with CorpusContext(acoustic_config) as c:
        stops = ('p', 't', 'k', 'b', 'd', 'g')

        q = c.query_graph(c.phone).filter(c.phone.label.in_(stops))
        q = q.columns(c.phone.id)
        num = q.count()
        assert (num == 28)

        defaults = [('closure', 0, 0.5, {
            'checked': False
        }), ('release', 0.5, 1, {
            'checked': False
        })]
        add_default_annotations(c, 'phone', defaults, subset=stops)

        q = c.query_graph(c.phone).filter(c.phone.label.in_(stops))

        for a in q.all():
            assert (len(a.closure) == 1)
            assert (len(a.release) == 1)
            assert (all(not x.checked for x in a.closure))
            assert (all(not x.checked for x in a.release))

        assert (q.count() == 28)

        q = c.query_graph(c.phone).filter(c.phone.label.in_(stops))
        q = q.preload(c.phone.closure, c.phone.release)
        assert (q.count() == 28)
        for a in q.all():
            print([(x.begin, x.end, x._type) for x in a.closure])
            print([(x.begin, x.end, x._type) for x in a.release])
            assert (len(a.closure) == 1)
            assert (len(a.release) == 1)

Beispiel #3

0

Datei anzeigen

Datei: workers.py Projekt: orianakc/speechcorpustools

 def run(self):
     config = self.kwargs['config']
     directory = self.kwargs['directory']
     with CorpusContext(config) as c:
         update_sound_files(c, directory)
         all_found = c.has_all_sound_files()
     self.dataReady.emit(all_found)

Beispiel #4

0

Datei anzeigen

Datei: workers.py Projekt: orianakc/speechcorpustools

 def run_query(self):
     config = self.kwargs['config']
     discourse = self.kwargs['discourse']
     with CorpusContext(config) as c:
         audio_file = c.discourse_sound_file(discourse)
         if audio_file is not None:
             c.sql_session.expunge(audio_file)
     return audio_file

Beispiel #5

0

Datei anzeigen

 def updateConfig(self, config):
     self.config = config
     self.discourseList.clear()
     if self.config is None or self.config.corpus_name == '':
         return
     with CorpusContext(self.config) as c:
         for d in sorted(c.discourses):
             self.discourseList.addItem(d)

Beispiel #6

0

Datei anzeigen

 def updateConfig(self, config):
     self.config = config
     self.changingDiscourse.emit()
     self.discourseWidget.config = config
     if self.config is None:
         return
     with CorpusContext(self.config) as c:
         self.discourseWidget.hierarchy = c.hierarchy

Beispiel #7

0

Datei anzeigen

def test_query_speaking_rate(acoustic_config):
    with CorpusContext(acoustic_config) as g:
        q = g.query_graph(g.word).filter(g.word.label == 'talking')
        q = q.columns(
            g.word.utterance.word.rate.column_name('words_per_second'))
        q = q.order_by(g.word.begin)
        print(q.cypher())
        results = q.all()
        assert (abs(results[0].words_per_second - (26 / 6.482261)) < 0.001)

Beispiel #8

0

Datei anzeigen

def test_encode_utterances(acoustic_config):
    with CorpusContext(acoustic_config) as g:
        g.encode_pauses(['sil', 'um'])
        g.encode_utterances(min_pause_length=0)
        q = g.query_graph(g.utterance).times().duration().order_by(
            g.utterance.begin)
        print(q.cypher())
        results = q.all()
        print(results)
        expected_utterances = [(1.059223, 7.541484), (8.576511, 11.807666),
                               (12.167356, 13.898228), (14.509726, 17.207370),
                               (18.359807, 19.434003), (19.599747, 21.017242),
                               (21.208318, 22.331874), (24.174348, 24.706663),
                               (24.980290, 25.251656)]
        assert (len(results) == len(expected_utterances))
        for i, r in enumerate(results):
            assert (round(r.begin, 3) == round(expected_utterances[i][0], 3))
            assert (round(r.end, 3) == round(expected_utterances[i][1], 3))
        assert (abs(results[0].duration - 6.482261) < 0.001)

        g.encode_pauses(['sil'])
        g.encode_utterances(min_pause_length=0)

        expected_utterances = [(1.059223, 7.541484), (8.016164, 11.807666),
                               (12.167356, 13.898228), (14.509726, 17.207370),
                               (18.359807, 19.434003), (19.599747, 21.017242),
                               (21.208318, 22.331874), (22.865036, 23.554014),
                               (24.174348, 24.706663), (24.980290, 25.251656)]
        q = g.query_graph(g.utterance).times().duration().order_by(
            g.utterance.begin)
        print(q.cypher())
        results = q.all()
        assert (len(g.query_graph(g.pause).all()) == 11)
        assert (len(results) == len(expected_utterances))
        for i, r in enumerate(results):
            assert (round(r.begin, 3) == round(expected_utterances[i][0], 3))
            assert (round(r.end, 3) == round(expected_utterances[i][1], 3))

        q = g.query_graph(g.utterance).order_by(g.utterance.begin)
        results = q.all()
        for i, r in enumerate(results):
            assert (round(r.begin, 3) == round(expected_utterances[i][0], 3))
            assert (round(r.end, 3) == round(expected_utterances[i][1], 3))
            assert (r.label is None)

        q = g.query_graph(
            g.phone).filter(g.phone.begin == g.phone.utterance.begin)
        q = q.order_by(g.phone.begin)
        results = q.all()

        assert (len(results) == len(expected_utterances))

        expected = ['dh', 'ah', 'l', 'ah', 'ae', 'hh', 'w', 'ah', 'ae', 'th']

        for i, r in enumerate(results):
            assert (r.label == expected[i])

Beispiel #9

0

Datei anzeigen

Datei: workers.py Projekt: orianakc/speechcorpustools

 def run(self):
     print('beginning pitch work')
     config = self.kwargs['config']
     algorithm = self.kwargs['algorithm']
     sound_file = self.kwargs['sound_file']
     with CorpusContext(config) as c:
         pitch_list = get_pitch(c, sound_file, algorithm)
         pitch_list = np.array([[x.time, x.F0] for x in pitch_list])
     self.dataReady.emit(pitch_list)
     print('finished pitch work')

Beispiel #10

0

Datei anzeigen

Datei: workers.py Projekt: orianakc/speechcorpustools

    def run_query(self):
        a_type = self.kwargs['annotation_type']
        config = self.kwargs['config']

        with CorpusContext(config) as c:
            a_type = getattr(c, a_type)
            query = c.query_graph(a_type)
            query = query.times().columns(
                a_type.discourse.column_name('discourse'))
            results = query.all()
        return query, results

Beispiel #11

0

Datei anzeigen

def test_utterance_nosilence(graph_db, textgrid_test_dir):
    tg_path = os.path.join(textgrid_test_dir, 'phone_word_no_silence.TextGrid')
    with CorpusContext('word_phone_nosilence', **graph_db) as g:
        g.reset()
        parser = inspect_textgrid(tg_path)
        parser.annotation_types[0].linguistic_type = 'phone'
        parser.annotation_types[1].linguistic_type = 'word'
        parser.hierarchy['word'] = None
        parser.hierarchy['phone'] = 'word'
        g.load(parser, tg_path)

        g.encode_utterances()

        q = g.query_graph(g.word).filter(g.word.label == 'b')

        q = q.columns(g.word.following.label.column_name('following_word'))
        print(q.cypher())
        results = q.all()
        assert (len(results) == 1)
        assert (results[0].following_word is None)

        q = g.query_graph(
            g.word).filter(g.word.begin == g.word.utterance.begin)

        results = q.all()

        assert (len(results) == 1)
        assert (results[0].label == 'a')

        q = g.query_graph(
            g.phone).filter(g.phone.begin == g.phone.utterance.begin)

        results = q.all()

        assert (len(results) == 1)
        assert (results[0].label == 'a')

        #Things like g.phone.word.following are currently broken in PolyglotDB
        return

        q = g.query_graph(g.phone).filter(g.phone.label == 'b')

        q = q.filter(g.phone.following.label == 'b')

        q = q.columns(
            g.phone.label, g.phone.id,
            g.phone.word.following.label.column_name('following_word'))
        print(q.cypher())
        results = q.all()
        assert (len(results) == 1)
        assert (results[0].following_word is None)

Beispiel #12

0

Datei anzeigen

Datei: workers.py Projekt: orianakc/speechcorpustools

    def run_query(self):
        config = self.kwargs['config']
        try:
            stops = gp_language_stops[config.corpus_name]
        except KeyError:
            print(
                'Couldn\'t find corpus name in stops, defaulting to p, t, k, b, d, g'
            )
            stops = ['p', 't', 'k', 'b', 'd', 'g']
        with CorpusContext(config) as c:
            a_type = c.hierarchy.lowest
            w_type = c.hierarchy[a_type]
            utt_type = c.hierarchy.highest
            a_type = getattr(c, a_type)
            w_type = getattr(a_type, w_type)
            utt_type = getattr(a_type, utt_type)
            q = c.query_graph(a_type)
            q = q.order_by(a_type.discourse.name)
            q = q.order_by(a_type.begin)
            q = q.filter(a_type.phon4lab1 == True)
            #print('Number found: {}'.format(q.count()))
            q = q.columns(a_type.label.column_name('Stop'),
                          a_type.begin.column_name('Begin'),
                          a_type.end.column_name('End'),
                          w_type.label.column_name('Word'),
                          a_type.checked.column_name('Annotated'),
                          a_type.speaker.name.column_name('Speaker'),
                          a_type.discourse.name.column_name('Discourse'),
                          a_type.id.column_name('Unique_id'),
                          a_type.notes.column_name('Notes'))

            if 'burst' in c.hierarchy.subannotations[c.hierarchy.lowest]:
                q = q.columns(
                    a_type.burst.begin.column_name('Burst_begin'),
                    a_type.burst.end.column_name('Burst_end'),
                    a_type.burst.duration.column_name('Burst_duration'))
            if 'voicing' in c.hierarchy.subannotations[c.hierarchy.lowest]:
                q = q.columns(
                    a_type.voicing.begin.column_name('Voicing_begin'),
                    a_type.voicing.end.column_name('Voicing_end'),
                    a_type.voicing.duration.column_name('Voicing_duration'))
            #q = q.limit(100)
            results = q.all()
        return q, results

Beispiel #13

0

Datei anzeigen

Datei: test_pause.py Projekt: orianakc/speechcorpustools

def test_query_with_pause(acoustic_config):
    with CorpusContext(acoustic_config) as g:
        g.encode_pauses(['sil', 'uh', 'um'])
        q = g.query_graph(g.word).filter(g.word.label == 'cares')
        q = q.columns(
            g.word.following.label.column_name('following'),
            g.pause.following.label.column_name('following_pause'),
            g.pause.following.duration.column_name('following_pause_duration'))
        q = q.order_by(g.word.begin)
        print(q.cypher())
        results = q.all()
        print(results)
        assert (len(results) == 1)
        assert (results[0].following == 'this')
        assert (results[0].following_pause == ['sil', 'um'])
        assert (abs(results[0].following_pause_duration - 1.035027) < 0.001)

        q = g.query_graph(g.word).filter(g.word.label == 'this')
        q = q.columns(
            g.word.previous.label.column_name('previous'),
            g.pause.previous.label.column_name('previous_pause'),
            g.pause.previous.begin, g.pause.previous.end,
            g.pause.previous.duration.column_name('previous_pause_duration'))
        q = q.order_by(g.word.begin)
        print(q.cypher())
        results = q.all()
        assert (len(results) == 2)
        assert (results[1].previous == 'cares')
        assert (results[1].previous_pause == ['sil', 'um'])
        assert (abs(results[1].previous_pause_duration - 1.035027) < 0.001)

        g.encode_pauses(['sil'])
        q = g.query_graph(g.word).filter(g.word.label == 'words')
        q = q.columns(
            g.word.following.label.column_name('following'),
            g.pause.following.label.column_name('following_pause'),
            g.pause.following.duration.column_name('following_pause_duration'))
        q = q.order_by(g.word.begin)
        print(q.cypher())
        results = q.all()
        assert (len(results) == 5)
        assert (results[0].following == 'and')
        assert (results[0].following_pause == ['sil'])
        assert (abs(results[0].following_pause_duration - 1.152438) < 0.001)

Beispiel #14

0

Datei anzeigen

Datei: workers.py Projekt: orianakc/speechcorpustools

 def run_query(self):
     a_type = self.kwargs['word_type']
     s_type = self.kwargs['seg_type']
     config = self.kwargs['config']
     discourse = self.kwargs['discourse']
     with CorpusContext(config) as c:
         word = getattr(c, a_type)
         q = c.query_graph(word).filter(word.discourse.name == discourse)
         preloads = []
         if a_type in c.hierarchy.subannotations:
             for s in c.hierarchy.subannotations[t]:
                 preloads.append(getattr(word, s))
         for t in c.hierarchy.get_lower_types(a_type):
             preloads.append(getattr(word, t))
         q = q.preload(*preloads)
         q = q.order_by(word.begin)
         #annotations = c.query_acoustics(q).pitch('reaper').all()
         annotations = q.all()
     return annotations

Beispiel #15

0

Datei anzeigen

Datei: test_acoustics.py Projekt: orianakc/speechcorpustools

def test_query_pitch(acoustic_config):
    with CorpusContext(acoustic_config) as g:
        q = g.query_graph(g.phone).filter(g.phone.label == 'ow').order_by(
            g.phone.begin.column_name('begin'))
        aq = g.query_acoustics(q).pitch('acousticsim')
        results = aq.all()
        expected_pitch = {
            4.23: 98.2,
            4.24: 390.2,
            4.25: 0.0,
            4.26: 95.8,
            4.27: 95.8
        }
        assert (set(results[0].pitch.keys()) == set(expected_pitch.keys()))
        for k, v in results[0].pitch.items():
            assert (round(v, 1) == expected_pitch[k])

        assert (round(aq.max()[0].max_pitch,
                      1) == round(max(expected_pitch.values()), 1))

Beispiel #16

0

Datei anzeigen

    def changeDiscourse(self, discourse):
        if discourse:
            self.changingDiscourse.emit()
            kwargs = {}

            kwargs['config'] = self.config
            kwargs['discourse'] = discourse

            self.audioWorker.setParams(kwargs)
            self.audioWorker.start()
            kwargs = {}
            with CorpusContext(self.config) as c:
                self.discourseWidget.updateHierachy(c.hierarchy)
                kwargs['seg_type'] = c.hierarchy.lowest
                kwargs['word_type'] = c.hierarchy.highest

            kwargs['config'] = self.config
            kwargs['discourse'] = discourse

            self.worker.setParams(kwargs)
            self.worker.start()

Beispiel #17

0

Datei anzeigen

def test_utterance_position(acoustic_config):
    with CorpusContext(acoustic_config) as g:
        g.encode_pauses(['sil', 'um'])
        q = g.query_graph(g.pause)
        print(q.all())
        g.encode_utterances(min_pause_length=0)
        q = g.query_graph(g.word)
        q = q.filter(g.word.label == 'this')
        q = q.order_by(g.word.begin)
        q = q.columns(g.word.utterance.word.position.column_name('position'))
        print(q.cypher())
        results = q.all()
        assert (results[0].position == 1)

        q = g.query_graph(g.word)
        q = q.filter(g.word.label == 'talking')
        q = q.order_by(g.word.begin)
        q = q.columns(g.word.utterance.word.position.column_name('position'))
        print(q.cypher())
        results = q.all()
        assert (results[0].position == 7)
        assert (results[1].position == 4)

Beispiel #18

0

Datei anzeigen

Datei: test_pause.py Projekt: orianakc/speechcorpustools

def test_encode_pause(acoustic_config):
    with CorpusContext(acoustic_config) as g:
        discourse = g.discourse('acoustic_corpus')
        g.encode_pauses(['sil'])
        q = g.query_graph(g.pause)
        print(q.cypher())
        assert (len(q.all()) == 11)

        paused = g.discourse('acoustic_corpus')
        expected = [x for x in discourse if x.label != 'sil']
        for i, d in enumerate(expected):
            print(d.label, paused[i].label)
            assert (d.label == paused[i].label)

        g.reset_pauses()
        new_discourse = g.discourse('acoustic_corpus')
        for i, d in enumerate(discourse):
            assert (d.label == new_discourse[i].label)

        g.encode_pauses(['sil', 'um', 'uh'])
        q = g.query_graph(g.pause)
        print(q.cypher())
        assert (len(q.all()) == 14)

        paused = g.discourse('acoustic_corpus')
        expected = [x for x in discourse if x.label not in ['sil', 'um', 'uh']]
        for i, d in enumerate(expected):
            print(d.label, paused[i].label)
            assert (d.label == paused[i].label)

        g.reset_pauses()
        new_discourse = g.discourse('acoustic_corpus')
        print(discourse)
        print(new_discourse)
        for i, d in enumerate(discourse):
            assert (d.label == new_discourse[i].label)

Beispiel #19

0

Datei anzeigen

Datei: globalphone_loading.py Projekt: orianakc/speechcorpustools

    'password': '******'
}


def call_back(*args):
    args = [x for x in args if isinstance(x, str)]
    if args:
        print(' '.join(args))


reset = True

if reset:
    print("Getting annotation types..")
    parser = pgio.inspect_textgrid(path_to_gp)
    parser.speaker_parser = FilenameSpeakerParser(5)
    parser.call_back = print
    print('Loading corpus...')
    with CorpusContext('gp_thai', **graph_db) as c:
        c.reset()
        beg = time.time()
        c.load(parser, path_to_gp)
        end = time.time()
        print('Time taken: {}'.format(end - beg))

if __name__ == '__main__':
    with CorpusContext('gp_thai', **graph_db) as g:
        q = g.query_graph(g.phones).filter(g.phones.label == 'd')
        print(q.cypher())
        print(q.count())

Beispiel #20

0

Datei anzeigen

Datei: timit_loading.py Projekt: orianakc/speechcorpustools

sys.path.insert(0, base)
import polyglotdb.io as pgio

from speechtools.corpus import CorpusContext

path_to_timit = r'D:\Data\TIMIT_fixed'

graph_db = {
    'host': 'localhost',
    'port': 7474,
    'user': '******',
    'password': '******'
}


def call_back(*args):
    args = [x for x in args if isinstance(x, str)]
    if args:
        print(' '.join(args))


parser = pgio.inspect_timit(path_to_timit)
parser.call_back = call_back

with CorpusContext('timit', **graph_db) as c:
    c.reset()
    beg = time.time()
    c.load(parser, path_to_timit)
    end = time.time()
    print('Time taken: {}'.format(end - beg))

Beispiel #21

0

Datei anzeigen

Datei: test_acoustics.py Projekt: orianakc/speechcorpustools

def test_query_formants_aggregate_group_by(acoustic_config):
    with CorpusContext(acoustic_config) as g:
        q = g.query_graph(g.phone).filter(g.phone.label.in_(['aa', 'ae']))
        aq = g.query_acoustics(q).group_by(
            g.phone.label).formants('acousticsim')

Beispiel #22

0

Datei anzeigen

def test_update_sound_files(acoustic_config, textgrid_test_dir):
    with CorpusContext(acoustic_config) as c:
        update_sound_files(c, textgrid_test_dir)
        expected_path = os.path.join(textgrid_test_dir, 'acoustic_corpus.wav')
        assert (c.discourse_sound_file('acoustic_corpus').filepath ==
                expected_path)

Beispiel #23

0

Datei anzeigen

Datei: test_acoustics.py Projekt: orianakc/speechcorpustools

def test_analyze_acoustics(graph_db):
    with CorpusContext('acoustic', pause_words=['sil'], **graph_db) as g:
        g.analyze_acoustics()

Beispiel #24

0

Datei anzeigen

def test_get_utterances(acoustic_config):
    with CorpusContext(acoustic_config) as g:
        g.encode_pauses(['sil'])
        utterances = g.get_utterances('acoustic_corpus',
                                      min_pause_length=0,
                                      min_utterance_length=0)

        expected_utterances = [(1.059223, 7.541484), (8.016164, 11.807666),
                               (12.167356, 13.898228), (14.509726, 17.207370),
                               (18.359807, 19.434003), (19.599747, 21.017242),
                               (21.208318, 22.331874), (22.865036, 23.554014),
                               (24.174348, 24.706663), (24.980290, 25.251656)]
        print(utterances)
        assert (len(utterances) == len(expected_utterances))
        for i, u in enumerate(utterances):
            assert (round(u[0], 5) == round(expected_utterances[i][0], 5))
            assert (round(u[1], 5) == round(expected_utterances[i][1], 5))
        utterances = g.get_utterances('acoustic_corpus', min_pause_length=0.5)

        expected_utterances = [(1.059223, 13.898228), (14.509726, 17.207370),
                               (18.359807, 22.331874), (22.865036, 23.554014),
                               (24.174348, 25.251656)]
        assert (len(utterances) == len(expected_utterances))
        for i, u in enumerate(utterances):
            assert (round(u[0], 5) == round(expected_utterances[i][0], 5))
            assert (round(u[1], 5) == round(expected_utterances[i][1], 5))

        utterances = g.get_utterances('acoustic_corpus',
                                      min_pause_length=0.5,
                                      min_utterance_length=1.0)

        expected_utterances = [(1.059223, 13.898228), (14.509726, 17.207370),
                               (18.359807, 23.554014), (24.174348, 25.251656)]
        assert (len(utterances) == len(expected_utterances))
        for i, u in enumerate(utterances):
            assert (round(u[0], 5) == round(expected_utterances[i][0], 5))
            assert (round(u[1], 5) == round(expected_utterances[i][1], 5))

        utterances = g.get_utterances('acoustic_corpus',
                                      min_pause_length=0.5,
                                      min_utterance_length=1.1)

        expected_utterances = [(1.059223, 13.898228), (14.509726, 17.207370),
                               (18.359807, 25.251656)]
        assert (len(utterances) == len(expected_utterances))
        for i, u in enumerate(utterances):
            assert (round(u[0], 5) == round(expected_utterances[i][0], 5))
            assert (round(u[1], 5) == round(expected_utterances[i][1], 5))

        g.encode_pauses(['sil', 'um'])
        utterances = g.get_utterances('acoustic_corpus',
                                      min_pause_length=0,
                                      min_utterance_length=0)

        expected_utterances = [(1.059223, 7.541484), (8.576511, 11.807666),
                               (12.167356, 13.898228), (14.509726, 17.207370),
                               (18.359807, 19.434003), (19.599747, 21.017242),
                               (21.208318, 22.331874), (24.174348, 24.706663),
                               (24.980290, 25.251656)]
        print(utterances)
        assert (len(utterances) == len(expected_utterances))
        for i, u in enumerate(utterances):
            assert (round(u[0], 5) == round(expected_utterances[i][0], 5))
            assert (round(u[1], 5) == round(expected_utterances[i][1], 5))

Beispiel #25

0

Datei anzeigen

Datei: test_acoustics.py Projekt: orianakc/speechcorpustools

def test_wav_info(acoustic_config):
    with CorpusContext(acoustic_config) as g:
        sf = g.discourse_sound_file('acoustic_corpus')
        assert (sf.sampling_rate == 16000)
        assert (sf.n_channels == 1)

Beispiel #26

0

Datei anzeigen

Datei: workers.py Projekt: orianakc/speechcorpustools

    def run(self):
        time.sleep(0.1)
        print('beginning export')
        try:
            config = self.kwargs['config']
            export_path = self.kwargs['path']
            try:
                stops = gp_language_stops[config.corpus_name]
            except KeyError:
                print(
                    'Couldn\'t find corpus name in stops, defaulting to p, t, k, b, d, g'
                )
                stops = ['p', 't', 'k', 'b', 'd', 'g']
            with CorpusContext(config) as c:
                a_type = c.hierarchy.lowest
                w_type = c.hierarchy[a_type]
                utt_type = c.hierarchy.highest
                a_type = getattr(c, a_type)
                w_type = getattr(a_type, w_type)
                utt_type = getattr(a_type, utt_type)
                q = c.query_graph(a_type)
                q = q.order_by(a_type.discourse.name)
                q = q.order_by(a_type.begin)
                q = q.filter(a_type.phon4lab1 == True)
                #print('Number found: {}'.format(q.count()))
                q = q.columns(a_type.label.column_name('Stop'),
                              a_type.begin.column_name('Begin'),
                              a_type.end.column_name('End'),
                              a_type.duration.column_name('Duration'))
                if 'burst' in c.hierarchy.subannotations[c.hierarchy.lowest]:
                    q = q.columns(
                        a_type.burst.begin.column_name('Burst_begin'),
                        a_type.burst.end.column_name('Burst_end'),
                        a_type.burst.duration.column_name('Burst_duration'))
                if 'voicing' in c.hierarchy.subannotations[c.hierarchy.lowest]:
                    q = q.columns(
                        a_type.voicing.begin.column_name('Voicing_begin'),
                        a_type.voicing.end.column_name('Voicing_end'),
                        a_type.voicing.duration.column_name(
                            'Voicing_duration'))

                q = q.columns(
                    w_type.label.column_name('Word'),
                    w_type.begin.column_name('Word_begin'),
                    w_type.end.column_name('Word_end'),
                    w_type.duration.column_name('Word_duration'),
                    w_type.transcription.column_name('Word_transcription'),
                    a_type.following.label.column_name('Following_segment'),
                    a_type.following.begin.column_name(
                        'Following_segment_begin'),
                    a_type.following.end.column_name('Following_segment_end'),
                    a_type.following.duration.column_name(
                        'Following_segment_duration'),
                    a_type.following.following.label.column_name(
                        'Following_following_segment'),
                    a_type.following.following.begin.column_name(
                        'Following_following_segment_begin'),
                    a_type.following.following.end.column_name(
                        'Following_following_segment_end'),
                    a_type.following.following.duration.column_name(
                        'Following_following_segment_duration'),
                    a_type.checked.column_name('Annotated'),
                    a_type.speaker.name.column_name('Speaker'),
                    a_type.discourse.name.column_name('Discourse'),
                    w_type.utterance.phones.rate.column_name('Speaking_rate'),
                    a_type.notes.column_name('Notes'))
                #q = q.limit(100)
                results = q.to_csv(export_path)
        except Exception as e:
            raise
            self.errorEncountered.emit(e)
            return
        print('finished')
        if self.stopped:
            time.sleep(0.1)
            self.finishedCancelling.emit()
            return

        self.dataReady.emit((q, results))

Beispiel #27

0

Datei anzeigen

Datei: acoustic_corpus_acoustics.py Projekt: orianakc/speechcorpustools

import sys
import os
import time
base = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
sys.path.insert(0,base)
import polyglotdb.io as aio

from speechtools.corpus import CorpusContext

from polyglotdb.config import CorpusConfig

graph_db = {'graph_host':'localhost', 'graph_port': 7474,
            'graph_user': '******', 'graph_password': '******'}

praat = r'C:\Users\michael\Documents\Praat\praatcon.exe'

config = CorpusConfig('acoustic', **graph_db)

config.reaper_path = r'D:\Dev\Tools\REAPER-master\reaper.exe'

def call_back(*args):
    args = [x for x in args if isinstance(x, str)]
    if args:
        print(' '.join(args))

if __name__ == '__main__':
    with CorpusContext(config) as g:
        g.encode_pauses(['sil'])
        g.encode_utterances()
        g.analyze_acoustics()

Beispiel #28

0

Datei anzeigen

Datei: test_acoustics.py Projekt: orianakc/speechcorpustools

def test_query_formants(acoustic_config):
    with CorpusContext(acoustic_config) as g:
        q = g.query_graph(g.phone).filter(g.phone.label == 'aa')
        aq = g.query_acoustics(q).formants('acousticsim')

Beispiel #29

0

Datei anzeigen

Datei: buckeye_l.py Projekt: orianakc/speechcorpustools

graph_db = {'host':'localhost', 'port': 7474,
            'user': '******', 'password': '******'}

first_run = True

from py2neo.packages.httpstream import http
http.socket_timeout = 999

syllabics = set(['aa', 'aan','ae', 'aen','ah', 'ahn','ay', 'ayn','aw','awn','ao', 'aon',
            'iy','iyn','ih', 'ihn',
            'uw', 'uwn','uh', 'uhn',
            'eh', 'ehn','ey', 'eyn', 'er','el','em', 'eng',
            'ow','own', 'oy', 'oyn'])
import time
with CorpusContext('buckeye', **graph_db) as g:
    if first_run:
        begin = time.time()
        g.encode_pauses('^[<{].*')
        print('Finished encoding pauses in {} seconds'.format(time.time() - begin))
        #g.encode_pauses(['uh','um','okay','yes','yeah','oh','heh','yknow','um-huh',
        #        'uh-uh','uh-huh','uh-hum','mm-hmm'])
        begin = time.time()
        g.reset_utterances()
        print('Finished resetting utterances in {} seconds'.format(time.time() - begin))
        g.encode_utterances(min_pause_length = 0.15)
        print('Finished encoding utterances in {} seconds'.format(time.time() - begin))
        #g.encode_syllables(syllabics)

        begin = time.time()
        q = g.query_graph(g.surface_transcription).filter(g.surface_transcription.label.in_(syllabics))

Beispiel #30

0

Datei anzeigen

Datei: workers.py Projekt: orianakc/speechcorpustools

 def run(self):
     config = self.kwargs['config']
     with CorpusContext(config) as c:
         all_found = c.has_all_sound_files()
     self.dataReady.emit(all_found)