def test_utterance_nosilence(graph_db, textgrid_test_dir): from polyglotdb.io.textgrid import inspect_discourse_textgrid, load_discourse_textgrid tg_path = os.path.join(textgrid_test_dir, 'phone_word_no_silence.TextGrid') with CorpusContext('word_phone_nosilence', **graph_db) as g: g.reset() annotation_types = inspect_discourse_textgrid(tg_path) load_discourse_textgrid(g, tg_path, annotation_types) g.encode_utterances() q = g.query_graph(g.word).filter(g.word.label == 'b') q = q.columns(g.word.following.label.column_name('following_word')) print(q.cypher()) results = q.all() assert(len(results) == 1) assert(results[0].following_word is None) q = g.query_graph(g.phone).filter(g.phone.label == 'b') q = q.filter(g.phone.following.label == 'b') q = q.columns(g.word.following.label.column_name('following_word')) print(q.cypher()) results = q.all() assert(len(results) == 1) assert(results[0].following_word is None)
def acoustic_config(graph_db, textgrid_test_dir): config = CorpusConfig('acoustic', **graph_db) acoustic_path = os.path.join(textgrid_test_dir, 'acoustic_corpus.TextGrid') with CorpusContext(config) as c: c.reset() annotation_types = inspect_discourse_textgrid(acoustic_path) load_discourse_textgrid(c, acoustic_path, annotation_types) c.analyze_acoustics() return config
graph_db = {'host':'localhost', 'port': 7474, 'user': '******', 'password': '******'} def call_back(*args): args = [x for x in args if isinstance(x, str)] if args: print(' '.join(args)) reset = True if reset: print("Getting annotation types..") annotation_types = tio.inspect_discourse_textgrid(path_to_gp) print('Loading corpus...') with CorpusContext('gp_bulgarian', **graph_db) as g: g.reset() beg = time.time() tio.load_directory_textgrid(g, path_to_gp, annotation_types, call_back = print) end = time.time() print('Time taken: {}'.format(end - beg)) if __name__ == '__main__': with CorpusContext('gp_bulgarian', **graph_db) as g: q = g.query_graph(g.phones).filter(g.phones.label == 'd') print(q.cypher()) print(q.count())