Python CorpusContext.CorpusContext 예제들, polyglotdb.CorpusContext.CorpusContext Python 예제들

예제 #1

0

파일 보기

파일: workers.py 프로젝트: nd1511/speechcorpustools

    def run_query(self):
        print("in the lexical worker")
        config = self.kwargs['config']
        case_sensitive = self.kwargs['case_sensitive']
        path = self.kwargs['path']
        stop_check = self.kwargs['stop_check']
        call_back = self.kwargs['call_back']
        call_back('Enriching lexicon...')
        call_back(0, 0)
        with CorpusContext(config) as c:
            enrich_lexicon_from_csv(c, path)
            self.actionCompleted.emit('enriching lexicon')
            if stop_check():
                call_back('Resetting lexicon...')
                call_back(0, 0)
                c.reset_lexicon()

                return False
        return True

예제 #2

0

파일 보기

파일: workers.py 프로젝트: nd1511/speechcorpustools

 def run_query(self):
     config = self.kwargs['config']
     segments = self.kwargs['segments']
     label = self.kwargs['label']
     stop_check = self.kwargs['stop_check']
     call_back = self.kwargs['call_back']
     call_back('Resetting {}s...'.format(label))
     call_back(0, 0)
     with CorpusContext(config) as c:
         c.reset_class(label)
         c.encode_class(segments, label)
         self.actionCompleted.emit('encoding ' +
                                   self.kwargs['label'].replace('_', ' '))
         if stop_check():
             call_back('Resetting {}s...'.format(label))
             call_back(0, 0)
             c.reset_class(label)
             return False
     return True

예제 #3

0

파일 보기

def test_delete(subannotation_config):
    with CorpusContext(subannotation_config) as c:
        q = c.query_graph(c.phone).order_by(c.phone.id)
        res = q.all()
        id = res[0].id
        model = LinguisticAnnotation(c)
        model.load(id)

        assert (model.voicing_during_closure[0].begin == 99)

        model.delete_subannotation(model.voicing_during_closure[0])

        q = c.query_graph(c.phone).order_by(c.phone.id)
        res = q.all()
        id = res[0].id
        model = LinguisticAnnotation(c)
        model.load(id)

        assert (model.voicing_during_closure == [])

예제 #4

0

파일 보기

파일: test_acoustics_formants.py 프로젝트: msonderegger/PolyglotDB

def test_analyze_formants_gendered_praat(acoustic_utt_config, praat_path,
                                         results_test_dir):
    with CorpusContext(acoustic_utt_config) as g:
        g.reset_acoustics()
        gender_dict = {'gender': 'male'}
        g.hierarchy.add_speaker_properties(g, [('gender', str)])
        assert (g.hierarchy.has_speaker_property('gender'))
        g.config.praat_path = praat_path
        g.analyze_formant_tracks()
        assert (g.discourse_has_acoustics('formants', g.discourses[0]))
        assert 'formants' in g.hierarchy.acoustics
        q = g.query_graph(g.phone).filter(g.phone.label == 'ow')
        q = q.columns(g.phone.begin, g.phone.end, g.phone.formants.track)
        results = q.all()
        output_path = os.path.join(results_test_dir, 'formant_data.csv')
        q.to_csv(output_path)
        assert (len(results) > 0)
        for r in results:
            assert (len(r.track))

예제 #5

0

파일 보기

파일: workers.py 프로젝트: nd1511/speechcorpustools

    def run_query(self):
        config = self.kwargs['config']
        algorithm = self.kwargs['algorithm']
        stop_check = self.kwargs['stop_check']
        call_back = self.kwargs['call_back']
        call_back('Encoding syllables...')
        call_back(0, 0)
        with CorpusContext(config) as c:
            c.encode_syllables(algorithm=algorithm,
                               call_back=call_back,
                               stop_check=stop_check)
            self.actionCompleted.emit('encoding syllables')

            if stop_check():
                call_back('Resetting syllables...')
                call_back(0, 0)
                c.reset_syllables()
                return False
        return True

예제 #6

0

파일 보기

파일: common.py 프로젝트: james-tanner/SPADE

def formant_acoustic_analysis(config,
                              vowels,
                              vowel_prototypes_path,
                              ignored_speakers=None,
                              drop_formant=False,
                              output_tracks=False,
                              subset="vowel",
                              reset_formants=False):
    with CorpusContext(config) as c:
        if vowels is not None:
            if ignored_speakers:
                q = c.query_graph(c.phone).filter(c.phone.label.in_(vowels))
                q = q.filter(c.phone.speaker.name.not_in_(ignored_speakers))
                q = q.filter(c.phone.duration >= 0.05)
                q.create_subset(subset)
            else:
                c.encode_class(vowels, subset)
        if not reset_formants and not output_tracks and c.hierarchy.has_token_property(
                'phone', 'F1'):
            print('Formant point analysis already done, skipping.')
            return
        elif not reset_formants and output_tracks and 'formants' in c.hierarchy.acoustics:
            print('Formant track analysis already done, skipping.')
            return
        print('Beginning formant analysis')
        beg = time.time()
        time_taken = time.time() - beg
        save_performance_benchmark(config, 'vowel_encoding', time_taken)
        print('vowels encoded')
        beg = time.time()
        metadata = analyze_formant_points_refinement(
            c,
            subset,
            duration_threshold=duration_threshold,
            num_iterations=nIterations,
            vowel_prototypes_path=vowel_prototypes_path,
            drop_formant=drop_formant,
            output_tracks=output_tracks)
        end = time.time()
        time_taken = time.time() - beg
        print('Analyzing formants took: {}'.format(end - beg))
        save_performance_benchmark(config, 'formant_acoustic_analysis',
                                   time_taken)

예제 #7

0

파일 보기

파일: test_acoustics_other.py 프로젝트: Olabiyisam/PolyglotDB

def test_analyze_script(acoustic_utt_config, praat_path, praatscript_test_dir):
    with CorpusContext(acoustic_utt_config) as g:
        g.config.praat_path = praat_path
        g.encode_class(['s', 'z', 'sh', 'zh'], 'sibilant')
        script_path = os.path.join(praatscript_test_dir, 'sibilant_jane.praat')
        props = g.analyze_script(subset='sibilant', annotation_type="phone", script_path=script_path, stop_check=None, call_back=None,multiprocessing=False)
        assert props == sorted(['cog', 'peak', 'slope', 'spread'])
        q = g.query_graph(g.phone).filter(g.phone.subset == 'sibilant')
        q = q.columns(g.phone.begin, g.phone.end, g.phone.peak)
        results = q.all()
        assert (len(results) > 0)
        for r in results:
            assert (r.values)
        q2 = g.query_graph(g.phone).filter(g.phone.subset == 'sibilant')
        q2 = q2.columns(g.phone.begin, g.phone.end, g.phone.spread)
        results = q2.all()
        assert (len(results) > 0)
        for r in results:
            assert (r.values)

예제 #8

0

파일 보기

def test_query_previous_previous(timed_config):
    with CorpusContext(timed_config) as g:
        q = g.query_graph(g.word).filter(g.word.label == 'cute')
        q = q.filter(g.word.previous.label == 'are')
        q = q.filter(g.word.previous.previous.label == 'cats')
        print(q.cypher())
        results = q.all()
        assert (len(results) == 1)

        q = g.query_graph(g.word).filter(g.word.label == 'cute')
        q = q.columns(
            g.word.previous.label.column_name('previous_label'),
            g.word.previous.previous.label.column_name(
                'previous_previous_label'))
        print(q.cypher())
        results = q.all()
        assert (len(results) == 1)
        assert (results[0]['previous_label'] == 'are')
        assert (results[0]['previous_previous_label'] == 'cats')

예제 #9

0

파일 보기

파일: models.py 프로젝트: MontrealCorpusTools/ISCAN

 def check_hierarchy(self):
     a_type = self.get_item_type_display().lower()
     with CorpusContext(self.corpus.config) as c:
         if not c.hierarchy.has_subannotation_type(self.label):
             properties = []
             if self.save_user:
                 properties = [('user', str)]
             for field in self.fields.all():
                 if field.annotation_choice == 'N':
                     t = float
                 elif field.annotation_choice == 'B':
                     t = bool
                 else:
                     t = str
                 properties.append((field.label, t))
             c.hierarchy.add_subannotation_type(c,
                                                a_type,
                                                self.label,
                                                properties=properties)

예제 #10

0

파일 보기

def test_syllable_mean_duration_with_speaker_buckeye(graph_db,
                                                     buckeye_test_dir):
    syllabics = [
        'ae', 'aa', 'uw', 'ay', 'eh', 'ih', 'aw', 'ey', 'iy', 'uh', 'ah', 'ao',
        'er', 'ow'
    ]
    with CorpusContext('directory_buckeye', **graph_db) as g:
        g.reset()
        parser = inspect_buckeye(buckeye_test_dir)
        g.load(parser, buckeye_test_dir)
        g.encode_syllabic_segments(syllabics)
        g.encode_syllables()
        res = g.get_measure('duration', 'mean', 'syllable', True)
        print(res)
        assert (len(res) == 11)
        for i, r in enumerate(res):
            if r[1] == 'dh.ae.s':
                break
        assert (abs(res[i][2] - 0.17030199999999995) < .0000000000001)

예제 #11

0

파일 보기

def test_encode_class(timed_config):
    with CorpusContext(timed_config) as g:
        label = 'encoded_class'

        g.encode_class(['ae'], label)

        q = g.query_graph(g.phone).filter(g.phone.type_subset == label)

        assert (all(x.label == 'ae' for x in q.all()))

        q = g.query_graph(g.phone).filter(g.phone.type_subset != label)
        results = q.all()
        assert (len(results) > 0)
        assert (all(x.label != 'ae' for x in results))

        g.reset_class(label)

        with pytest.raises(SubsetError):
            g.phone.subset_type(label)

예제 #12

0

파일 보기

def get_algorithm_data(corpus_name):
    beg = time.time()
    with CorpusContext(corpus_name, **graph_db) as g:

        # THIS IS HACKY, fix later! Find out why these aren't getting encoded on Chevre
        try:
            print(g.graph_host)
        except:
            g.acoustic_host = 'localhost'
            g.acoustic_port = 8086
            g.graph_host = 'localhost'
            g.graph_port = 7474
            g.bolt_port = 7687
            g.config.praat_path = "/Applications/Praat.app/Contents/MacOS/Praat"

        prototype, data = extract_formants_full(g, VOWELS)
    end = time.time()
    duration = end - beg
    return prototype, data, duration

예제 #13

0

파일 보기

def test_preload(acoustic_config):
    with CorpusContext(acoustic_config) as c:
        q = c.query_graph(c.phone)
        q = q.order_by(c.phone.begin).preload(c.phone.word)
        print(q.cypher())
        results = q.all()

        for r in results:
            assert ('word' in r._supers)
            assert (r._supers['word'] is not None)

        q = c.query_graph(c.word)
        q = q.order_by(c.word.begin).preload(c.word.phone)
        print(q.cypher())
        results = q.all()

        for r in results:
            assert ('phone' in r._subs)
            assert (r._subs['phone'] is not None)

예제 #14

0

파일 보기

파일: test_query_annotations.py 프로젝트: Olabiyisam/PolyglotDB

def test_query_duration(acoustic_config):
    with CorpusContext(acoustic_config) as g:
        q = g.query_graph(g.phone).filter(g.phone.label == 'aa')
        q = q.order_by(g.phone.begin)
        q = q.columns(g.phone.begin.column_name('begin'),
                      g.phone.end.column_name('end'),
                      g.phone.duration.column_name('duration'))
        print(q.cypher())
        results = q.all()
        assert (len(results) == 3)
        assert (abs(results[0]['begin'] - 2.704) < 0.001)
        assert (abs(results[0]['duration'] - 0.078) < 0.001)

        assert (abs(results[1]['begin'] - 9.320) < 0.001)
        assert (abs(results[1]['duration'] - 0.122) < 0.001)

        assert (abs(results[2]['begin'] - 24.560) < 0.001)
        assert (abs(results[2]['duration'] - 0.039) < 0.001)
    assert ('acoustic' in get_corpora_list(acoustic_config))

예제 #15

0

파일 보기

파일: test_pause.py 프로젝트: samihuc/PolyglotDB

def test_query_with_pause(acoustic_config):
    with CorpusContext(acoustic_config) as g:
        g.encode_pauses(['sil', 'uh', 'um'])
        q = g.query_graph(g.word).filter(g.word.label == 'cares')
        q = q.columns(g.word.following.label.column_name('following'),
                      g.word.following_pause.label.column_name('following_pause'),
                      g.word.following_pause.duration.column_name('following_pause_duration'))
        q = q.order_by(g.word.begin)
        print(q.cypher())
        results = q.all()
        print(results)
        assert (len(results) == 1)
        assert (results[0]['following'] == 'this')
        assert (results[0]['following_pause'] == ['sil', 'um'])
        assert (abs(results[0]['following_pause_duration'] - 1.035027) < 0.001)

        q = g.query_graph(g.word).filter(g.word.label == 'this')
        q = q.columns(g.word.previous.label.column_name('previous'),
                      g.word.previous_pause.label.column_name('previous_pause'),
                      g.word.previous_pause.begin,
                      g.word.previous_pause.end,
                      g.word.previous_pause.duration.column_name('previous_pause_duration'))
        q = q.order_by(g.word.begin)
        print(q.cypher())
        results = q.all()
        assert (len(results) == 2)
        assert (results[1]['previous'] == 'cares')
        assert (results[1]['previous_pause'] == ['sil', 'um'])
        assert (abs(results[1]['previous_pause_duration'] - 1.035027) < 0.001)

        g.encode_pauses(['sil'])
        q = g.query_graph(g.word).filter(g.word.label == 'words')
        q = q.columns(g.word.following.label.column_name('following'),
                      g.word.following_pause.label.column_name('following_pause'),
                      g.word.following_pause.duration.column_name('following_pause_duration'))
        q = q.order_by(g.word.begin)
        print(q.cypher())
        results = q.all()
        assert (len(results) == 5)
        assert (results[0]['following'] == 'and')
        assert (results[0]['following_pause'] == ['sil'])
        assert (abs(results[0]['following_pause_duration'] - 1.152438) < 0.001)

예제 #16

0

파일 보기

def test_load_mfa(mfa_test_dir, graph_db):
    with CorpusContext('test_mfa', **graph_db) as c:
        c.reset()
        testFilePath = os.path.join(mfa_test_dir, "mfa_test.TextGrid")
        parser = inspect_mfa(testFilePath)
        print(parser.speaker_parser)
        c.load(parser, testFilePath)
        assert (c.hierarchy.has_type_property('word', 'transcription'))

        q = c.query_graph(c.word).filter(c.word.label == 'JURASSIC')
        print(q)
        print(q.all())
        q = q.filter(c.word.speaker.name == 'mfa')
        #print(c.word.speaker.name)
        print(q.all())
        q = q.order_by(c.word.begin)
        print(q.all())
        q = q.columns(c.word.label)
        print(q.all())
        results = q.all()
        assert (len(results) == 1)

        c.encode_pauses('<SIL>')

        c.encode_utterances(min_pause_length=0)

        q = c.query_graph(c.word).filter(c.word.label == 'PLANET')
        q = q.filter(c.word.speaker.name == 'mfa')
        q = q.order_by(c.word.begin)
        q = q.columns(c.word.label,
                      c.word.following.label.column_name('following'))
        results = q.all()
        assert (len(results) == 1)
        assert (results[0]['following'] == 'JURASSIC')

        q = c.query_speakers().filter(c.speaker.name == 'mfa')
        q = q.columns(c.speaker.discourses.name.column_name('discourses'))

        s = q.get()

        assert (len(s['discourses']) == 1)
        assert (s['discourses'] == ['mfa_test'])

예제 #17

0

파일 보기

def sibilant_acoustic_analysis(config, sibilant_segments):
    # Encode sibilant class and analyze sibilants using the praat script
    with CorpusContext(config) as c:
        if c.hierarchy.has_token_property('phone', 'cog'):
            print('Sibilant acoustics already analyzed, skipping.')
            return
        print('Beginning sibilant analysis')
        beg = time.time()
        c.encode_class(sibilant_segments, 'sibilant')
        time_taken = time.time() - beg
        save_performance_benchmark(config, 'sibilant_encoding', time_taken)
        print('sibilants encoded')

        # analyze all sibilants using the script found at script_path
        beg = time.time()
        c.analyze_script('sibilant', sibilant_script_path, duration_threshold=0.01)
        end = time.time()
        time_taken = time.time() - beg
        print('Sibilant analysis took: {}'.format(end - beg))
        save_performance_benchmark(config, 'sibilant_acoustic_analysis', time_taken)

예제 #18

0

파일 보기

파일: hillenbrand_benchmark.py 프로젝트: MontrealCorpusTools/benchmarking

def get_algorithm_data(corpus_name, nIterations, remove_short):
    beg = time.time()
    with CorpusContext(corpus_name, **graph_db) as g:

        # This is a hack - Chevre isn't encoding these, but other machines are
        try:
            print("Graph host:", g.graph_host)
        except:
            g.acoustic_host = 'localhost'
            g.acoustic_port = 8086
            g.graph_host = 'localhost'
            g.graph_port = 7474
            g.bolt_port = 7687
            g.config.praat_path = "/Applications/Praat.app/Contents/MacOS/Praat"

        prototype, metadata, data = extract_formants_full(
            g, VOWELS, remove_short=remove_short, nIterations=nIterations)
    end = time.time()
    duration = end - beg
    return prototype, metadata, data, duration

예제 #19

0

파일 보기

파일: test_enrich.py 프로젝트: samihuc/PolyglotDB

def test_feature_enrichment(timed_config, csv_test_dir):
    path = os.path.join(csv_test_dir, 'timed_features.txt')
    with CorpusContext(timed_config) as c:
        enrich_features_from_csv(c, path)

        q = c.query_graph(c.phone).filter(c.phone.vowel_height == 'lowhigh')

        q = q.columns(c.phone.label.column_name('label'))

        res = q.all()

        assert (all(x['label'] == 'ay' for x in res))

        q = c.query_graph(c.phone).filter(c.phone.place_of_articulation == 'velar')

        q = q.columns(c.phone.label.column_name('label'))

        res = q.all()

        assert (all(x['label'] in ['k', 'g'] for x in res))

예제 #20

0

파일 보기

def test_basic(subannotation_config):
    with CorpusContext(subannotation_config) as c:
        q = c.query_graph(c.phone).filter(c.phone.label == 'g')
        q = q.columns(c.phone.label.column_name('label'))
        q = q.columns(c.phone.voicing_during_closure.duration.column_name('voicing_during_closure'))
        q = q.order_by(c.phone.begin)
        res = q.all()
        assert (res[0]['label'] == 'g')
        assert ([round(x, 2) for x in res[0]['voicing_during_closure']] == [0.03, 0.01])
        q = c.query_graph(c.phone).filter(c.phone.label == 'g')
        q = q.order_by(c.phone.begin)
        q = q.columns(c.phone.label.column_name('label'),
                      Sum(c.phone.voicing_during_closure.duration).column_name('voicing_during_closure'))

        print(q.cypher())
        res = q.all()
        print(res)
        print(res[0])
        assert (res[0]['label'] == 'g')
        assert (round(res[0]['voicing_during_closure'], 2) == 0.04)

예제 #21

0

파일 보기

def test_no_speech_utterance(graph_db, textgrid_test_dir):
    tg_path = os.path.join(textgrid_test_dir, 'one_word_no_silence.TextGrid')
    with CorpusContext('one_word_no_silence', **graph_db) as g:
        g.reset()
        parser = inspect_textgrid(tg_path)
        parser.annotation_types[0].linguistic_type = 'phone'
        parser.annotation_types[1].linguistic_type = 'word'
        parser.hierarchy['word'] = None
        parser.hierarchy['phone'] = 'word'
        g.load(parser, tg_path)

        g.encode_pauses(['ab'])

        g.encode_utterances()

        q = g.query_graph(g.utterance)

        res = q.all()

        assert (len(res) == 0)

예제 #22

0

파일 보기

파일: test_acoustics_formants.py 프로젝트: samihuc/PolyglotDB

def test_analyze_formants_vowel_segments(acoustic_utt_config, praat_path,
                                         results_test_dir):
    with CorpusContext(acoustic_utt_config) as g:
        g.config.formant_source = 'praat'
        g.config.praat_path = praat_path
        vowel_inventory = [
            'ih', 'iy', 'ah', 'uw', 'er', 'ay', 'aa', 'ae', 'eh', 'ow'
        ]
        g.analyze_vowel_formant_tracks(vowel_inventory=vowel_inventory)
        assert (g.has_formants(g.discourses[0], 'praat'))
        q = g.query_graph(g.phone).filter(g.phone.label == 'ow')
        q = q.columns(g.phone.begin, g.phone.end, g.phone.formants.track)
        results = q.all()
        output_path = os.path.join(results_test_dir, 'formant_vowel_data.csv')
        q.to_csv(output_path)
        assert (len(results) > 0)
        print(len(results))
        for r in results:
            # print(r.track)
            assert (r.track)

예제 #23

0

파일 보기

파일: test_acoustics_formants.py 프로젝트: jeffmielke/PolyglotDB

def test_analyze_formants_vowel_segments(acoustic_utt_config, praat_path, results_test_dir):
    with CorpusContext(acoustic_utt_config) as g:
        g.reset_acoustics()
        g.config.praat_path = praat_path
        g.encode_class(['ih', 'iy', 'ah', 'uw', 'er', 'ay', 'aa', 'ae', 'eh', 'ow'], 'vowel')
        g.analyze_vowel_formant_tracks(vowel_label='vowel', multiprocessing=False)
        assert (g.has_formants(g.discourses[0]))
        q = g.query_graph(g.phone).filter(g.phone.label == 'ow')
        q = q.columns(g.phone.begin, g.phone.end, g.phone.formants.track)
        results = q.all()
        output_path = os.path.join(results_test_dir, 'formant_vowel_data.csv')
        q.to_csv(output_path)
        assert (len(results) > 0)
        print(len(results))
        for r in results:
            # print(r.track)
            assert (len(r.track))

        g.reset_formants()
        assert not g.has_formants(g.discourses[0])

예제 #24

0

파일 보기

def test_extract_formants_full(acoustic_utt_config, praat_path,
                               export_test_dir):
    output_path = os.path.join(export_test_dir, 'full_formant_vowel_data.csv')
    with CorpusContext(acoustic_utt_config) as g:
        test_phone_label = 'ow'
        g.config.praat_path = praat_path
        vowel_inventory = [
            'ih', 'iy', 'ah', 'uw', 'er', 'ay', 'aa', 'ae', 'eh', 'ow'
        ]
        print("starting test")
        metadata = analyze_formant_points_refinement(g, vowel_inventory)
        assert (g.hierarchy.has_token_property('phone', 'F1'))
        q = g.query_graph(g.phone).filter(g.phone.label == test_phone_label)
        q = q.columns(g.phone.begin, g.phone.end, g.phone.F1.column_name('F1'))
        q.to_csv(output_path)
        results = q.all()
        assert (len(results) > 0)

        for r in results:
            assert (r['F1'])

예제 #25

0

파일 보기

 def connectToServer(self, ignore = False):
     host = self.hostEdit.text()
     if host == '':
         if not ignore:
             reply = QtWidgets.QMessageBox.critical(self,
                 "Invalid information", "IP address must be specified or named 'localhost'.")
         return
     port = self.portEdit.text()
     try:
         port = int(port)
     except ValueError:
         if not ignore:
             reply = QtWidgets.QMessageBox.critical(self,
                 "Invalid information", "Port must be an integer.")
         return
     user = self.userEdit.text()
     if not user:
         user = None
     password = self.passwordEdit.text()
     if not password:
         password = None
     current_corpus = self.corporaList.text()
     if current_corpus is None:
         current_corpus = ''
     config = CorpusConfig(current_corpus, graph_host = host, graph_port = port,
                     graph_user = user, graph_password = password)
     self.corporaList.clear()
     try:
         corpora = get_corpora_list(config)
         self.corporaList.add(corpora)
         if config.corpus_name and config.corpus_name in corpora:
             with CorpusContext(config) as c:
                 c.hierarchy = c.generate_hierarchy()
                 c.save_variables()
         self.corporaList.select(current_corpus)
     except (ConnectionError, AuthorizationError, NetworkAddressError) as e:
         self.configChanged.emit(None)
         if not ignore:
             reply = QtWidgets.QMessageBox.critical(self,
                 "Could not connect to server", str(e))
         return

예제 #26

0

파일 보기

def test_load_discourse(graph_db, mfa_test_dir, textgrid_test_dir):
    test_file_path = os.path.join(mfa_test_dir, "mfa_test.TextGrid")
    acoustic_path = os.path.join(textgrid_test_dir, 'acoustic_corpus.TextGrid')
    mfa_parser = inspect_mfa(test_file_path)
    parser = inspect_textgrid(acoustic_path)
    with CorpusContext('load_remove_test', **graph_db) as c:
        c.reset()
        c.load_discourse(parser, acoustic_path)
        c.load_discourse(mfa_parser, test_file_path)

        syllabics = ['ER', 'AE', 'IH', 'EH', 'ae', 'ih', 'er', 'eh']
        c.encode_syllabic_segments(syllabics)
        c.encode_syllables()

        q = c.query_graph(c.word).filter(c.word.label == 'JURASSIC')
        assert q.count() > 0
        q = c.query_graph(c.phone).filter(c.phone.label == 'AE')
        assert q.count() > 0
        q = c.query_lexicon(c.syllable).filter(c.syllable.label == 'JH.ER')
        assert q.count() > 0

        q = c.query_lexicon(
            c.lexicon_word).filter(c.lexicon_word.label == 'JURASSIC')
        assert q.count() > 0
        q = c.query_lexicon(
            c.lexicon_phone).filter(c.lexicon_phone.label == 'AE')
        assert q.count() > 0
        q = c.query_lexicon(
            c.lexicon_phone).filter(c.lexicon_phone.label == 'ae')
        assert q.count() > 0
        q = c.query_lexicon(
            c.lexicon_syllable).filter(c.lexicon_syllable.label == 'JH.ER')
        assert q.count() > 0

        q = c.query_discourses().filter(c.discourse.name == 'mfa_test')
        assert q.count() > 0
        q = c.query_speakers().filter(c.speaker.name == 'mfa')
        assert q.count() > 0

        d = c.discourse_sound_file('acoustic_corpus')
        assert os.path.exists(d['consonant_file_path'])

예제 #27

0

파일 보기

파일: test_acoustics_intensity.py 프로젝트: jeffmielke/PolyglotDB

def test_query_intensity(acoustic_utt_config):
    with CorpusContext(acoustic_utt_config) as g:
        q = g.query_graph(g.phone)
        q = q.filter(g.phone.label == 'ow')
        q = q.order_by(g.phone.begin.column_name('begin'))
        q = q.columns(g.phone.utterance.id.column_name('id'))
        utt_id = q.all()[0]['id']

        expected_intensity = {
            Decimal('4.23'): {
                'Intensity': 98
            },
            Decimal('4.24'): {
                'Intensity': 100
            },
            Decimal('4.25'): {
                'Intensity': 99
            },
            Decimal('4.26'): {
                'Intensity': 95.8
            },
            Decimal('4.27'): {
                'Intensity': 95.8
            }
        }
        g.save_intensity('acoustic_corpus',
                         expected_intensity,
                         utterance_id=utt_id)

        q = g.query_graph(g.phone)
        q = q.filter(g.phone.label == 'ow')
        q = q.order_by(g.phone.begin.column_name('begin'))
        q = q.columns(g.phone.label, g.phone.intensity.track)
        print(q.cypher())
        results = q.all()

        print(sorted(expected_intensity.items()))
        print(results[0].track)
        for point in results[0].track:
            assert (round(point['Intensity'],
                          1) == expected_intensity[point.time]['Intensity'])

예제 #28

0

파일 보기

파일: test_acoustics_formants.py 프로젝트: jeffmielke/PolyglotDB

def test_query_aggregate_formants(acoustic_utt_config):
    with CorpusContext(acoustic_utt_config) as g:
        q = g.query_graph(g.phone)
        q = q.filter(g.phone.label == 'ow')
        q = q.order_by(g.phone.begin.column_name('begin'))
        q = q.columns(g.phone.label, g.phone.formants.min,
                      g.phone.formants.max, g.phone.formants.mean)
        print(q.cypher())
        results = q.all()
        print(results[0])
        assert (round(results[0]['Min_F1'], 0) > 0)
        assert (round(results[0]['Max_F1'], 0) > 0)
        assert (round(results[0]['Mean_F1'], 0) > 0)

        assert (round(results[0]['Min_F2'], 0) > 0)
        assert (round(results[0]['Max_F2'], 0) > 0)
        assert (round(results[0]['Mean_F2'], 0) > 0)

        assert (round(results[0]['Min_F3'], 0) > 0)
        assert (round(results[0]['Max_F3'], 0) > 0)
        assert (round(results[0]['Mean_F3'], 0) > 0)

예제 #29

0

파일 보기

def test_query_onset_phone(syllable_morpheme_config):
    with CorpusContext(syllable_morpheme_config) as g:
        q = g.query_graph(g.phone).filter(g.phone.label == 'k')
        q = q.filter_left_aligned(g.syllable)
        print(q.cypher())
        assert (len(list(q.all())) == 0)

        q = g.query_graph(g.phone).filter(g.phone.label == 'k')
        q = q.filter(g.phone.begin == g.syllable.begin)
        print(q.cypher())
        assert (len(list(q.all())) == 0)

        q = g.query_graph(g.phone).filter(g.phone.label == 'k')
        q = q.filter_not_left_aligned(g.syllable)
        print(q.cypher())
        assert (len(list(q.all())) == 2)

        q = g.query_graph(g.phone).filter(g.phone.label == 'k')
        q = q.filter(g.phone.begin != g.syllable.begin)
        print(q.cypher())
        assert (len(list(q.all())) == 2)

예제 #30

0

파일 보기

def test_filter_on_type_subset(acoustic_config):
    with CorpusContext(acoustic_config) as g:
        q = g.query_lexicon(g.lexicon_phone).filter(g.lexicon_phone.label == 'aa')
        q.create_subset('+syllabic')
        phone = g.phone.filter_by_subset('+syllabic')
        q = g.query_graph(phone)
        q = q.order_by(phone.begin.column_name('begin'))
        q = q.columns(phone.begin.column_name('begin'),
                      phone.end.column_name('end'),
                      phone.duration.column_name('duration'))
        print(q.cypher())
        results = q.all()
        assert (len(results) == 3)
        assert (abs(results[0]['begin'] - 2.704) < 0.001)
        assert (abs(results[0]['duration'] - 0.078) < 0.001)

        assert (abs(results[1]['begin'] - 9.320) < 0.001)
        assert (abs(results[1]['duration'] - 0.122) < 0.001)

        assert (abs(results[2]['begin'] - 24.560) < 0.001)
        assert (abs(results[2]['duration'] - 0.039) < 0.001)