def acoustic_config(graph_db, textgrid_test_dir): config = CorpusConfig('acoustic', **graph_db) acoustic_path = os.path.join(textgrid_test_dir, 'acoustic_corpus.TextGrid') with CorpusContext(config) as c: c.reset() parser = inspect_textgrid(acoustic_path) c.load(parser, acoustic_path) config.pitch_algorithm = 'acousticsim' config.formant_source = 'acousticsim' return config
def acoustic_config(graph_db, textgrid_test_dir): config = CorpusConfig('acoustic', **graph_db) acoustic_path = os.path.join(textgrid_test_dir, 'acoustic_corpus.TextGrid') with CorpusContext(config) as c: c.reset() parser = inspect_textgrid(acoustic_path) c.load(parser, acoustic_path) config.pitch_algorithm = 'acousticsim' config.formant_algorithm = 'acousticsim' return config
def acoustic_utt_config(graph_db, textgrid_test_dir): config = CorpusConfig('acoustic utt', **graph_db) acoustic_path = os.path.join(textgrid_test_dir, 'acoustic_corpus.TextGrid') with CorpusContext(config) as c: c.reset() parser = inspect_textgrid(acoustic_path) c.load(parser, acoustic_path) c.encode_pauses(['sil']) c.encode_utterances(min_pause_length = 0) config.pitch_algorithm = 'acousticsim' config.formant_algorithm = 'acousticsim' return config
def acoustic_utt_config(graph_db, textgrid_test_dir): config = CorpusConfig('acoustic utt', **graph_db) acoustic_path = os.path.join(textgrid_test_dir, 'acoustic_corpus.TextGrid') with CorpusContext(config) as c: c.reset() parser = inspect_textgrid(acoustic_path) c.load(parser, acoustic_path) c.encode_pauses(['sil']) c.encode_utterances(min_pause_length=0) config.pitch_algorithm = 'acousticsim' config.formant_algorithm = 'acousticsim' return config
def importCorpus(self, name): if not self.importFree: reply = QtWidgets.QMessageBox.warning( self, "Stop current import?", 'There is already an import ongoing, would you like to cancel it and start a new import?', buttons=QtWidgets.QMessageBox.Abort | QtWidgets.QMessageBox.Cancel) if reply == QtWidgets.QMessageBox.Cancel: return self.cancelImporter.emit() try: if name in get_corpora_list( CorpusConfig('', graph_host='localhost', graph_port=7474)): reply = QtWidgets.QMessageBox.warning( self, "Overwrite corpus?", 'The {} corpus appears to be imported already. Would you like to overwrite it?' .format(name), buttons=QtWidgets.QMessageBox.Ok | QtWidgets.QMessageBox.Cancel) if reply == QtWidgets.QMessageBox.Cancel: return except ConnectionError: reply = QtWidgets.QMessageBox.critical( self, "Could not connect to local server", 'Please make sure there is a local Neo4j server running.') return directory = QtWidgets.QFileDialog.getExistingDirectory( self, 'Select a directory containing the {} corpus'.format(name), os.path.expanduser('~')) if directory == '': return self.corpusToImport.emit(name, directory)
def run_query(self): time.sleep(0.1) name = self.kwargs['name'] directory = self.kwargs['directory'] reset = True config = CorpusConfig(name, graph_host='localhost', graph_port=7474) with CorpusContext(config) as c: if name == 'buckeye': parser = inspect_buckeye(directory) elif name == 'timit': parser = inspect_timit(directory) else: form = guess_textgrid_format(directory) if form == 'labbcat': parser = inspect_labbcat(directory) elif form == 'mfa': parser = inspect_mfa(directory) elif form == 'fave': parser = inspect_fave(directory) else: parser = inspect_textgrid(directory) parser.call_back = self.kwargs['call_back'] parser.stop_check = self.kwargs['stop_check'] parser.call_back('Resetting corpus...') if reset: c.reset(call_back=self.kwargs['call_back'], stop_check=self.kwargs['stop_check']) could_not_parse = c.load(parser, directory) return could_not_parse
def changeConfig(self, name): host = self.hostEdit.text() port = self.portEdit.text() user = self.userEdit.text() password = self.passwordEdit.text() config = CorpusConfig(name, graph_host = host, graph_port = port, graph_user = user, graph_password = password) self.configChanged.emit(config)
def fave_corpus_config(graph_db, fave_test_dir): config = CorpusConfig('fave_test_corpus', **graph_db) with CorpusContext(config) as c: c.reset() parser = inspect_fave(fave_test_dir) c.load(parser, fave_test_dir) return config
def overlapped_config(graph_db, textgrid_test_dir, acoustic_syllabics): config = CorpusConfig('overlapped', **graph_db) acoustic_path = os.path.join(textgrid_test_dir, 'overlapped_speech') with CorpusContext(config) as c: c.reset() parser = inspect_mfa(acoustic_path) c.load(parser, acoustic_path) c.encode_pauses(['sil']) c.encode_utterances(min_pause_length=0) c.encode_syllabic_segments(acoustic_syllabics) c.encode_syllables() config.pitch_algorithm = 'acousticsim' config.formant_source = 'acousticsim' return config
def timed_config(graph_db, corpus_data_timed): config = CorpusConfig('timed', **graph_db) with CorpusContext(config) as c: c.reset() c.add_types(*corpus_data_timed.types('timed')) c.initialize_import() c.add_discourse(corpus_data_timed) c.finalize_import() return config
def ursr_config(graph_db, corpus_data_ur_sr): config = CorpusConfig('ur_sr', **graph_db) with CorpusContext(config) as c: c.reset() c.add_types(*corpus_data_ur_sr.types('ur_sr')) c.initialize_import() c.add_discourse(corpus_data_ur_sr) c.finalize_import() return config
def subannotation_config(graph_db, subannotation_data): config = CorpusConfig('subannotations', **graph_db) with CorpusContext(config) as c: c.reset() c.add_types(*subannotation_data.types('subannotations')) c.initialize_import() c.add_discourse(subannotation_data) c.finalize_import() return config
def stressed_config(graph_db, textgrid_test_dir): config = CorpusConfig('stressed', **graph_db) stressed_path = os.path.join(textgrid_test_dir, 'stressed_corpus.TextGrid') with CorpusContext(config) as c: c.reset() parser = inspect_mfa(stressed_path) c.load(parser, stressed_path) return config
def partitur_corpus_config(graph_db, partitur_test_dir): config = CorpusConfig('partitur', **graph_db) partitur_path = os.path.join(partitur_test_dir, 'partitur_test.par,2') with CorpusContext(config) as c: c.reset() parser = inspect_partitur(partitur_path) c.load(parser, partitur_path) return config
def syllable_morpheme_config(graph_db, corpus_data_syllable_morpheme_srur): config = CorpusConfig('syllable_morpheme', **graph_db) with CorpusContext(config) as c: c.reset() c.add_types( *corpus_data_syllable_morpheme_srur.types('syllable_morpheme')) c.initialize_import() c.add_discourse(corpus_data_syllable_morpheme_srur) c.finalize_import() return config
def createConfig(self): name = self.corporaList.text() if name is None: return None host = self.hostEdit.text() port = self.portEdit.text() user = self.userEdit.text() password = self.passwordEdit.text() return CorpusConfig(name, graph_host = host, graph_port = port, graph_user = user, graph_password = password)
def summarized_config(graph_db, textgrid_test_dir): config = CorpusConfig('summarized', **graph_db) acoustic_path = os.path.join(textgrid_test_dir, 'acoustic_corpus.TextGrid') with CorpusContext(config) as c: c.reset() parser = inspect_textgrid(acoustic_path) c.load(parser, acoustic_path) return config
def syllable_morpheme_config(graph_db, corpus_data_syllable_morpheme_srur): config = CorpusConfig('syllable_morpheme', **graph_db) with CorpusContext(config) as c: c.reset() c.add_types(*corpus_data_syllable_morpheme_srur.types('syllable_morpheme')) c.initialize_import(corpus_data_syllable_morpheme_srur.speakers, corpus_data_syllable_morpheme_srur.token_headers, corpus_data_syllable_morpheme_srur.hierarchy.subannotations) c.add_discourse(corpus_data_syllable_morpheme_srur) c.finalize_import(corpus_data_syllable_morpheme_srur) return config
def ursr_config(graph_db, corpus_data_ur_sr): config = CorpusConfig('ur_sr', **graph_db) with CorpusContext(config) as c: c.reset() c.add_types(*corpus_data_ur_sr.types('ur_sr')) c.initialize_import(corpus_data_ur_sr.speakers, corpus_data_ur_sr.token_headers, corpus_data_ur_sr.hierarchy.subannotations) c.add_discourse(corpus_data_ur_sr) c.finalize_import(corpus_data_ur_sr) return config
def timed_config(graph_db, corpus_data_timed): config = CorpusConfig('timed', **graph_db) with CorpusContext(config) as c: c.reset() c.add_types(*corpus_data_timed.types('timed')) c.initialize_import(corpus_data_timed.speakers, corpus_data_timed.token_headers, corpus_data_timed.hierarchy.subannotations) c.add_discourse(corpus_data_timed) c.finalize_import(corpus_data_timed) return config
def reset(corpus_name): """Remove the database files produced from import.""" with ensure_local_database_running(corpus_name, port=8080, ip=server_ip, token=load_token()) as params: config = CorpusConfig(corpus_name, **params) with CorpusContext(config) as c: print('Resetting the corpus.') c.reset()
def french_config(graph_db, textgrid_test_dir): config = CorpusConfig('french', **graph_db) french_path = os.path.join(textgrid_test_dir, 'FR001_5.TextGrid') with CorpusContext(config) as c: c.reset() parser = inspect_textgrid(french_path) c.load(parser, french_path) c.encode_pauses(['sil', '<SIL>']) c.encode_utterances(min_pause_length=.15) return config
def subannotation_config(graph_db, subannotation_data): config = CorpusConfig('subannotations', **graph_db) with CorpusContext(config) as c: c.reset() c.add_types(*subannotation_data.types('subannotations')) c.initialize_import(subannotation_data.speakers, subannotation_data.token_headers, subannotation_data.hierarchy.subannotations) c.add_discourse(subannotation_data) c.finalize_import(subannotation_data.speakers, subannotation_data.token_headers, subannotation_data.hierarchy) return config
def connectToServer(self, ignore=False): host = self.hostEdit.text() if host == '': if not ignore: reply = QtWidgets.QMessageBox.critical( self, "Invalid information", "IP address must be specified or named 'localhost'.") return port = self.portEdit.text() try: port = int(port) except ValueError: if not ignore: reply = QtWidgets.QMessageBox.critical( self, "Invalid information", "Port must be an integer.") return user = self.userEdit.text() if not user: user = None password = self.passwordEdit.text() if not password: password = None current_corpus = self.corporaList.text() if current_corpus is None: current_corpus = '' config = CorpusConfig(current_corpus, graph_host=host, graph_port=port, graph_user=user, graph_password=password) self.corporaList.clear() try: corpora = get_corpora_list(config) self.corporaList.add(corpora) if config.corpus_name and config.corpus_name in corpora: with CorpusContext(config) as c: c.hierarchy = c.generate_hierarchy() c.save_variables() self.corporaList.select(current_corpus) except (ConnectionError, AuthorizationError, NetworkAddressError) as e: self.configChanged.emit(None) if not ignore: reply = QtWidgets.QMessageBox.critical( self, "Could not connect to server", str(e)) return
def connectToServer(self, ignore=False): host = self.hostEdit.text() if host == '': if not ignore: reply = QtWidgets.QMessageBox.critical( self, "Invalid information", "IP address must be specified or named 'localhost'.") return port = self.portEdit.text() try: port = int(port) except ValueError: if not ignore: reply = QtWidgets.QMessageBox.critical( self, "Invalid information", "Port must be an integer.") return user = self.userEdit.text() if not user: user = None password = self.passwordEdit.text() if not password: password = None config = CorpusConfig('', graph_host=host, graph_port=port, graph_user=user, graph_password=password) self.corporaList.clear() try: corpora = get_corpora_list(config) self.corporaList.add(corpora) self.configChanged.emit(config) except (ConnectionError, AuthorizationError, NetworkAddressError) as e: self.configChanged.emit(None) if not ignore: reply = QtWidgets.QMessageBox.critical( self, "Could not connect to server", str(e)) return self.checkAudio()
sys.path.insert(0,base) import polyglotdb.io as aio from polyglotdb.config import CorpusConfig from polyglotdb import CorpusContext graph_db = {'graph_host':'localhost', 'graph_port': 7474, 'graph_user': '******', 'graph_password': '******'} praat = r'C:\Users\michael\Documents\Praat\praatcon.exe' reaper = r'D:\Dev\Tools\REAPER-master\reaper.exe' config = CorpusConfig('buckeye', **graph_db) config.reaper_path = reaper #config.praat_path = praat def call_back(*args): args = [x for x in args if isinstance(x, str)] if args: print(' '.join(args)) if __name__ == '__main__': with CorpusContext(config) as g: g.encode_pauses('^[{<].*') g.encode_utterances(min_pause_length = 0.25) #utterances = g.get_utterances('s1901b', config.pause_words) #print(len(utterances))
import sys import os import time base = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) sys.path.insert(0,base) import polyglotdb.io as aio from speechtools.corpus import CorpusContext from polyglotdb.config import CorpusConfig graph_db = {'graph_host':'localhost', 'graph_port': 7474, 'graph_user': '******', 'graph_password': '******'} praat = r'C:\Users\michael\Documents\Praat\praatcon.exe' config = CorpusConfig('acoustic', **graph_db) config.reaper_path = r'D:\Dev\Tools\REAPER-master\reaper.exe' def call_back(*args): args = [x for x in args if isinstance(x, str)] if args: print(' '.join(args)) if __name__ == '__main__': with CorpusContext(config) as g: g.encode_pauses(['sil']) g.encode_utterances() g.analyze_acoustics()
from polyglotdb.config import CorpusConfig from polyglotdb import CorpusContext graph_db = { 'graph_host': 'localhost', 'graph_port': 7474, 'graph_user': '******', 'graph_password': '******' } praat = r'C:\Users\michael\Documents\Praat\praatcon.exe' reaper = r'D:\Dev\Tools\REAPER-master\reaper.exe' config = CorpusConfig('buckeye', **graph_db) config.reaper_path = reaper #config.praat_path = praat def call_back(*args): args = [x for x in args if isinstance(x, str)] if args: print(' '.join(args)) if __name__ == '__main__': with CorpusContext(config) as g: g.encode_pauses('^[{<].*') g.encode_utterances(min_pause_length=0.25)
if args.corpus_name not in directories: print( 'The corpus {0} does not have a directory (available: {1}). Please make it with a {0}.yaml file inside.' .format(args.corpus_name, ', '.join(directories))) sys.exit(1) corpus_conf = load_config(corpus_name) print('Processing...') #Connect to local database at 8080 with ensure_local_database_running(corpus_name, port=8080, token=common.load_token()) as params: #Load corpus context and config info config = CorpusConfig(corpus_name, **params) config.formant_source = 'praat' # Common set up if reset: with CorpusContext(config) as c: print("Resetting the corpus.") c.reset() common.loading(config, corpus_conf['corpus_directory'], corpus_conf['input_format']) common.lexicon_enrichment(config, corpus_conf['unisyn_spade_directory'], corpus_conf['dialect_code']) common.speaker_enrichment(config, corpus_conf['speaker_enrichment_file']) common.basic_enrichment( config, corpus_conf['vowel_inventory'] +
from polyglotdb import CorpusContext from polyglotdb.config import CorpusConfig import polyglotdb.io as pgio import sys import os graph_db = {'host': 'localhost', 'port': 7474} path_to_switchboard = os.path.join("/Volumes", "data", "corpora", "Switchboard_for_MFA") if __name__ == '__main__': config = CorpusConfig("switchboard", **graph_db) print("loading corpus...") with CorpusContext(config) as g: g.reset() parser = pgio.inspect_fave(path_to_switchboard) g.load(parser, path_to_switchboard) q = g.query_graph(g.word).filter(g.word.label == "think") results = q.all() assert (len(results) > 0) q = g.query_graph(g.phone).filter(g.phone.label == "ow") results_phone = q.all() assert (len(results_phone) > 0)
import polyglotdb.io as aio from polyglotdb import CorpusContext from polyglotdb.config import CorpusConfig graph_db = { 'graph_host': 'localhost', 'graph_port': 7474, 'graph_user': '******', 'graph_password': '******' } praat = r'C:\Users\michael\Documents\Praat\praatcon.exe' config = CorpusConfig('acoustic', **graph_db) config.reaper_path = r'D:\Dev\Tools\REAPER-master\reaper.exe' def call_back(*args): args = [x for x in args if isinstance(x, str)] if args: print(' '.join(args)) if __name__ == '__main__': with CorpusContext(config) as g: g.encode_pauses(['sil']) g.encode_utterances() g.analyze_acoustics()
# exports all sibilants graph_db = { 'graph_host': 'localhost', 'graph_port': 7474, 'graph_user': '******', 'graph_password': '******' } praat_path = 'C:\\Users\\samih\\Documents\\0_SPADE_labwork\\praatcon.exe' script_path = 'C:\\Users\\samih\\Documents\\0_SPADE_labwork\\PolyglotDB\\examples\\sibilant_jane.praat' #script_path = 'C:\\Users\\samih\\Documents\\0_SPADE_labwork\\PolyglotDB\\examples\\COG.praat' #script_path = 'C:\\Users\\samih\\Documents\\0_SPADE_labwork\\PolyglotDB\\examples\\COG_middle50percent.praat' output_path = 'C:\\Users\\samih\\Documents\\0_SPADE_labwork\\PolyglotDB\\examples\\sib_data.csv' config = CorpusConfig('librispeech', **graph_db) # config = CorpusConfig('acoustic utt', **graph_db) config.praat_path = praat_path if __name__ == '__main__': with CorpusContext(config) as g: g.encode_class(['S', 'Z', 'SH', 'ZH'], 'sibilant') # encode_class method is in featured.py begin = time.time() g.analyze_script('sibilant', script_path,
from polyglotdb.io import enrich_speakers_from_csv graph_db = { 'graph_host': 'localhost', 'graph_port': 7474, 'graph_user': '******', 'graph_password': '******' } praat = r'C:\Users\michael\Documents\Praat\praatcon.exe' reaper = r'D:\Dev\Tools\REAPER-master\reaper.exe' speaker_info_path = r'D:\Data\VIC\SpeakerInfo.txt' config = CorpusConfig('buckeye', **graph_db) config.reaper_path = reaper config.praat_path = praat config.pitch_algorithm = 'praat' def call_back(*args): args = [x for x in args if isinstance(x, str)] if args: print(' '.join(args)) if __name__ == '__main__': with CorpusContext(config) as g: g.reset_acoustics()
print("wrote to ", dest) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("timit_path", help="path to timit corpus") parser.add_argument("dest_path", help="path to destination") parser.add_argument("--reset", help="set to true to reset corpus", default=False) parser.add_argument("--convert", help="set to true if converting mfccs", default=False) args = parser.parse_args() corpus_name = "TIMIT" with ensure_local_database_running('database') as config: conf = CorpusConfig(corpus_name, **config) if args.reset: loading(conf, args.timit_path) if args.convert: filename_to_path = {} for root, dirs, files in os.walk(args.timit_path): for file in files: if re.match(".*\.[Ww][Aa][Vv]", file) is not None: src_filename = re.sub("\.[Ww][Aa][Vv]", "", file) path = os.path.join(root, file) filename_to_path[src_filename] = path export_textgrid(conf, args.dest_path, filename_to_path)