コード例 #1
0
ファイル: conftest.py プロジェクト: jeffmielke/PolyglotDB
def acoustic_config(graph_db, textgrid_test_dir):
    config = CorpusConfig('acoustic', **graph_db)

    acoustic_path = os.path.join(textgrid_test_dir, 'acoustic_corpus.TextGrid')
    with CorpusContext(config) as c:
        c.reset()
        parser = inspect_textgrid(acoustic_path)
        c.load(parser, acoustic_path)
    config.pitch_algorithm = 'acousticsim'
    config.formant_source = 'acousticsim'
    return config
コード例 #2
0
ファイル: conftest.py プロジェクト: mmcauliffe/PolyglotDB
def acoustic_config(graph_db, textgrid_test_dir):
    config = CorpusConfig('acoustic', **graph_db)

    acoustic_path = os.path.join(textgrid_test_dir, 'acoustic_corpus.TextGrid')
    with CorpusContext(config) as c:
        c.reset()
        parser = inspect_textgrid(acoustic_path)
        c.load(parser, acoustic_path)
    config.pitch_algorithm = 'acousticsim'
    config.formant_algorithm = 'acousticsim'
    return config
コード例 #3
0
ファイル: conftest.py プロジェクト: mmcauliffe/PolyglotDB
def acoustic_utt_config(graph_db, textgrid_test_dir):
    config = CorpusConfig('acoustic utt', **graph_db)

    acoustic_path = os.path.join(textgrid_test_dir, 'acoustic_corpus.TextGrid')
    with CorpusContext(config) as c:
        c.reset()
        parser = inspect_textgrid(acoustic_path)
        c.load(parser, acoustic_path)

        c.encode_pauses(['sil'])
        c.encode_utterances(min_pause_length = 0)

    config.pitch_algorithm = 'acousticsim'
    config.formant_algorithm = 'acousticsim'
    return config
コード例 #4
0
def acoustic_utt_config(graph_db, textgrid_test_dir):
    config = CorpusConfig('acoustic utt', **graph_db)

    acoustic_path = os.path.join(textgrid_test_dir, 'acoustic_corpus.TextGrid')
    with CorpusContext(config) as c:
        c.reset()
        parser = inspect_textgrid(acoustic_path)
        c.load(parser, acoustic_path)

        c.encode_pauses(['sil'])
        c.encode_utterances(min_pause_length=0)

    config.pitch_algorithm = 'acousticsim'
    config.formant_algorithm = 'acousticsim'
    return config
コード例 #5
0
    def importCorpus(self, name):
        if not self.importFree:
            reply = QtWidgets.QMessageBox.warning(
                self,
                "Stop current import?",
                'There is already an import ongoing, would you like to cancel it and start a new import?',
                buttons=QtWidgets.QMessageBox.Abort
                | QtWidgets.QMessageBox.Cancel)
            if reply == QtWidgets.QMessageBox.Cancel:
                return
            self.cancelImporter.emit()
        try:
            if name in get_corpora_list(
                    CorpusConfig('', graph_host='localhost', graph_port=7474)):
                reply = QtWidgets.QMessageBox.warning(
                    self,
                    "Overwrite corpus?",
                    'The {} corpus appears to be imported already.  Would you like to overwrite it?'
                    .format(name),
                    buttons=QtWidgets.QMessageBox.Ok
                    | QtWidgets.QMessageBox.Cancel)
                if reply == QtWidgets.QMessageBox.Cancel:
                    return
        except ConnectionError:
            reply = QtWidgets.QMessageBox.critical(
                self, "Could not connect to local server",
                'Please make sure there is a local Neo4j server running.')
            return

        directory = QtWidgets.QFileDialog.getExistingDirectory(
            self, 'Select a directory containing the {} corpus'.format(name),
            os.path.expanduser('~'))
        if directory == '':
            return
        self.corpusToImport.emit(name, directory)
コード例 #6
0
    def run_query(self):
        time.sleep(0.1)
        name = self.kwargs['name']
        directory = self.kwargs['directory']
        reset = True
        config = CorpusConfig(name, graph_host='localhost', graph_port=7474)
        with CorpusContext(config) as c:
            if name == 'buckeye':
                parser = inspect_buckeye(directory)
            elif name == 'timit':
                parser = inspect_timit(directory)
            else:
                form = guess_textgrid_format(directory)
                if form == 'labbcat':
                    parser = inspect_labbcat(directory)
                elif form == 'mfa':
                    parser = inspect_mfa(directory)
                elif form == 'fave':
                    parser = inspect_fave(directory)
                else:
                    parser = inspect_textgrid(directory)

            parser.call_back = self.kwargs['call_back']
            parser.stop_check = self.kwargs['stop_check']
            parser.call_back('Resetting corpus...')
            if reset:
                c.reset(call_back=self.kwargs['call_back'],
                        stop_check=self.kwargs['stop_check'])
            could_not_parse = c.load(parser, directory)
        return could_not_parse
コード例 #7
0
 def changeConfig(self, name):
     host = self.hostEdit.text()
     port = self.portEdit.text()
     user = self.userEdit.text()
     password = self.passwordEdit.text()
     config = CorpusConfig(name, graph_host = host, graph_port = port,
                     graph_user = user, graph_password = password)
     self.configChanged.emit(config)
コード例 #8
0
ファイル: conftest.py プロジェクト: jeffmielke/PolyglotDB
def fave_corpus_config(graph_db, fave_test_dir):
    config = CorpusConfig('fave_test_corpus', **graph_db)

    with CorpusContext(config) as c:
        c.reset()
        parser = inspect_fave(fave_test_dir)
        c.load(parser, fave_test_dir)
    return config
コード例 #9
0
ファイル: conftest.py プロジェクト: jeffmielke/PolyglotDB
def overlapped_config(graph_db, textgrid_test_dir, acoustic_syllabics):
    config = CorpusConfig('overlapped', **graph_db)

    acoustic_path = os.path.join(textgrid_test_dir, 'overlapped_speech')
    with CorpusContext(config) as c:
        c.reset()
        parser = inspect_mfa(acoustic_path)
        c.load(parser, acoustic_path)

        c.encode_pauses(['sil'])
        c.encode_utterances(min_pause_length=0)
        c.encode_syllabic_segments(acoustic_syllabics)
        c.encode_syllables()

    config.pitch_algorithm = 'acousticsim'
    config.formant_source = 'acousticsim'
    return config
コード例 #10
0
ファイル: conftest.py プロジェクト: esteng/PolyglotDB
def timed_config(graph_db, corpus_data_timed):
    config = CorpusConfig('timed', **graph_db)
    with CorpusContext(config) as c:
        c.reset()
        c.add_types(*corpus_data_timed.types('timed'))
        c.initialize_import()
        c.add_discourse(corpus_data_timed)
        c.finalize_import()
    return config
コード例 #11
0
ファイル: conftest.py プロジェクト: esteng/PolyglotDB
def ursr_config(graph_db, corpus_data_ur_sr):
    config = CorpusConfig('ur_sr', **graph_db)
    with CorpusContext(config) as c:
        c.reset()
        c.add_types(*corpus_data_ur_sr.types('ur_sr'))
        c.initialize_import()
        c.add_discourse(corpus_data_ur_sr)
        c.finalize_import()
    return config
コード例 #12
0
ファイル: conftest.py プロジェクト: esteng/PolyglotDB
def subannotation_config(graph_db, subannotation_data):
    config = CorpusConfig('subannotations', **graph_db)
    with CorpusContext(config) as c:
        c.reset()
        c.add_types(*subannotation_data.types('subannotations'))
        c.initialize_import()
        c.add_discourse(subannotation_data)
        c.finalize_import()
    return config
コード例 #13
0
ファイル: conftest.py プロジェクト: jeffmielke/PolyglotDB
def stressed_config(graph_db, textgrid_test_dir):
    config = CorpusConfig('stressed', **graph_db)

    stressed_path = os.path.join(textgrid_test_dir, 'stressed_corpus.TextGrid')
    with CorpusContext(config) as c:
        c.reset()
        parser = inspect_mfa(stressed_path)
        c.load(parser, stressed_path)
    return config
コード例 #14
0
ファイル: conftest.py プロジェクト: jeffmielke/PolyglotDB
def partitur_corpus_config(graph_db, partitur_test_dir):
    config = CorpusConfig('partitur', **graph_db)

    partitur_path = os.path.join(partitur_test_dir, 'partitur_test.par,2')
    with CorpusContext(config) as c:
        c.reset()
        parser = inspect_partitur(partitur_path)
        c.load(parser, partitur_path)
    return config
コード例 #15
0
ファイル: conftest.py プロジェクト: esteng/PolyglotDB
def syllable_morpheme_config(graph_db, corpus_data_syllable_morpheme_srur):
    config = CorpusConfig('syllable_morpheme', **graph_db)
    with CorpusContext(config) as c:
        c.reset()
        c.add_types(
            *corpus_data_syllable_morpheme_srur.types('syllable_morpheme'))
        c.initialize_import()
        c.add_discourse(corpus_data_syllable_morpheme_srur)
        c.finalize_import()
    return config
コード例 #16
0
 def createConfig(self):
     name = self.corporaList.text()
     if name is None:
         return None
     host = self.hostEdit.text()
     port = self.portEdit.text()
     user = self.userEdit.text()
     password = self.passwordEdit.text()
     return CorpusConfig(name, graph_host = host, graph_port = port,
                     graph_user = user, graph_password = password)
コード例 #17
0
ファイル: conftest.py プロジェクト: jeffmielke/PolyglotDB
def summarized_config(graph_db, textgrid_test_dir):
    config = CorpusConfig('summarized', **graph_db)

    acoustic_path = os.path.join(textgrid_test_dir, 'acoustic_corpus.TextGrid')
    with CorpusContext(config) as c:
        c.reset()
        parser = inspect_textgrid(acoustic_path)
        c.load(parser, acoustic_path)

    return config
コード例 #18
0
ファイル: conftest.py プロジェクト: jeffmielke/PolyglotDB
def syllable_morpheme_config(graph_db, corpus_data_syllable_morpheme_srur):
    config = CorpusConfig('syllable_morpheme', **graph_db)
    with CorpusContext(config) as c:
        c.reset()
        c.add_types(*corpus_data_syllable_morpheme_srur.types('syllable_morpheme'))
        c.initialize_import(corpus_data_syllable_morpheme_srur.speakers,
                            corpus_data_syllable_morpheme_srur.token_headers,
                            corpus_data_syllable_morpheme_srur.hierarchy.subannotations)
        c.add_discourse(corpus_data_syllable_morpheme_srur)
        c.finalize_import(corpus_data_syllable_morpheme_srur)
    return config
コード例 #19
0
ファイル: conftest.py プロジェクト: jeffmielke/PolyglotDB
def ursr_config(graph_db, corpus_data_ur_sr):
    config = CorpusConfig('ur_sr', **graph_db)
    with CorpusContext(config) as c:
        c.reset()
        c.add_types(*corpus_data_ur_sr.types('ur_sr'))
        c.initialize_import(corpus_data_ur_sr.speakers,
                            corpus_data_ur_sr.token_headers,
                            corpus_data_ur_sr.hierarchy.subannotations)
        c.add_discourse(corpus_data_ur_sr)
        c.finalize_import(corpus_data_ur_sr)
    return config
コード例 #20
0
ファイル: conftest.py プロジェクト: jeffmielke/PolyglotDB
def timed_config(graph_db, corpus_data_timed):
    config = CorpusConfig('timed', **graph_db)
    with CorpusContext(config) as c:
        c.reset()
        c.add_types(*corpus_data_timed.types('timed'))
        c.initialize_import(corpus_data_timed.speakers,
                            corpus_data_timed.token_headers,
                            corpus_data_timed.hierarchy.subannotations)
        c.add_discourse(corpus_data_timed)
        c.finalize_import(corpus_data_timed)
    return config
コード例 #21
0
ファイル: common.py プロジェクト: james-tanner/SPADE
def reset(corpus_name):
    """Remove the database files produced from import."""

    with ensure_local_database_running(corpus_name,
                                       port=8080,
                                       ip=server_ip,
                                       token=load_token()) as params:
        config = CorpusConfig(corpus_name, **params)
        with CorpusContext(config) as c:
            print('Resetting the corpus.')
            c.reset()
コード例 #22
0
ファイル: conftest.py プロジェクト: jeffmielke/PolyglotDB
def french_config(graph_db, textgrid_test_dir):
    config = CorpusConfig('french', **graph_db)

    french_path = os.path.join(textgrid_test_dir, 'FR001_5.TextGrid')
    with CorpusContext(config) as c:
        c.reset()
        parser = inspect_textgrid(french_path)
        c.load(parser, french_path)

        c.encode_pauses(['sil', '<SIL>'])
        c.encode_utterances(min_pause_length=.15)

    return config
コード例 #23
0
def subannotation_config(graph_db, subannotation_data):
    config = CorpusConfig('subannotations', **graph_db)
    with CorpusContext(config) as c:
        c.reset()
        c.add_types(*subannotation_data.types('subannotations'))
        c.initialize_import(subannotation_data.speakers,
                            subannotation_data.token_headers,
                            subannotation_data.hierarchy.subannotations)
        c.add_discourse(subannotation_data)
        c.finalize_import(subannotation_data.speakers,
                          subannotation_data.token_headers,
                          subannotation_data.hierarchy)
    return config
コード例 #24
0
 def connectToServer(self, ignore=False):
     host = self.hostEdit.text()
     if host == '':
         if not ignore:
             reply = QtWidgets.QMessageBox.critical(
                 self, "Invalid information",
                 "IP address must be specified or named 'localhost'.")
         return
     port = self.portEdit.text()
     try:
         port = int(port)
     except ValueError:
         if not ignore:
             reply = QtWidgets.QMessageBox.critical(
                 self, "Invalid information", "Port must be an integer.")
         return
     user = self.userEdit.text()
     if not user:
         user = None
     password = self.passwordEdit.text()
     if not password:
         password = None
     current_corpus = self.corporaList.text()
     if current_corpus is None:
         current_corpus = ''
     config = CorpusConfig(current_corpus,
                           graph_host=host,
                           graph_port=port,
                           graph_user=user,
                           graph_password=password)
     self.corporaList.clear()
     try:
         corpora = get_corpora_list(config)
         self.corporaList.add(corpora)
         if config.corpus_name and config.corpus_name in corpora:
             with CorpusContext(config) as c:
                 c.hierarchy = c.generate_hierarchy()
                 c.save_variables()
         self.corporaList.select(current_corpus)
     except (ConnectionError, AuthorizationError, NetworkAddressError) as e:
         self.configChanged.emit(None)
         if not ignore:
             reply = QtWidgets.QMessageBox.critical(
                 self, "Could not connect to server", str(e))
         return
コード例 #25
0
 def connectToServer(self, ignore=False):
     host = self.hostEdit.text()
     if host == '':
         if not ignore:
             reply = QtWidgets.QMessageBox.critical(
                 self, "Invalid information",
                 "IP address must be specified or named 'localhost'.")
         return
     port = self.portEdit.text()
     try:
         port = int(port)
     except ValueError:
         if not ignore:
             reply = QtWidgets.QMessageBox.critical(
                 self, "Invalid information", "Port must be an integer.")
         return
     user = self.userEdit.text()
     if not user:
         user = None
     password = self.passwordEdit.text()
     if not password:
         password = None
     config = CorpusConfig('',
                           graph_host=host,
                           graph_port=port,
                           graph_user=user,
                           graph_password=password)
     self.corporaList.clear()
     try:
         corpora = get_corpora_list(config)
         self.corporaList.add(corpora)
         self.configChanged.emit(config)
     except (ConnectionError, AuthorizationError, NetworkAddressError) as e:
         self.configChanged.emit(None)
         if not ignore:
             reply = QtWidgets.QMessageBox.critical(
                 self, "Could not connect to server", str(e))
         return
     self.checkAudio()
コード例 #26
0
sys.path.insert(0,base)

import polyglotdb.io as aio
from polyglotdb.config import CorpusConfig

from polyglotdb import CorpusContext


graph_db = {'graph_host':'localhost', 'graph_port': 7474,
            'graph_user': '******', 'graph_password': '******'}

praat = r'C:\Users\michael\Documents\Praat\praatcon.exe'

reaper = r'D:\Dev\Tools\REAPER-master\reaper.exe'

config = CorpusConfig('buckeye', **graph_db)

config.reaper_path = reaper
#config.praat_path = praat

def call_back(*args):
    args = [x for x in args if isinstance(x, str)]
    if args:
        print(' '.join(args))

if __name__ == '__main__':
    with CorpusContext(config) as g:
        g.encode_pauses('^[{<].*')
        g.encode_utterances(min_pause_length = 0.25)
        #utterances = g.get_utterances('s1901b', config.pause_words)
        #print(len(utterances))
コード例 #27
0
import sys
import os
import time
base = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
sys.path.insert(0,base)
import polyglotdb.io as aio

from speechtools.corpus import CorpusContext

from polyglotdb.config import CorpusConfig

graph_db = {'graph_host':'localhost', 'graph_port': 7474,
            'graph_user': '******', 'graph_password': '******'}

praat = r'C:\Users\michael\Documents\Praat\praatcon.exe'

config = CorpusConfig('acoustic', **graph_db)

config.reaper_path = r'D:\Dev\Tools\REAPER-master\reaper.exe'

def call_back(*args):
    args = [x for x in args if isinstance(x, str)]
    if args:
        print(' '.join(args))

if __name__ == '__main__':
    with CorpusContext(config) as g:
        g.encode_pauses(['sil'])
        g.encode_utterances()
        g.analyze_acoustics()
コード例 #28
0
from polyglotdb.config import CorpusConfig

from polyglotdb import CorpusContext

graph_db = {
    'graph_host': 'localhost',
    'graph_port': 7474,
    'graph_user': '******',
    'graph_password': '******'
}

praat = r'C:\Users\michael\Documents\Praat\praatcon.exe'

reaper = r'D:\Dev\Tools\REAPER-master\reaper.exe'

config = CorpusConfig('buckeye', **graph_db)

config.reaper_path = reaper
#config.praat_path = praat


def call_back(*args):
    args = [x for x in args if isinstance(x, str)]
    if args:
        print(' '.join(args))


if __name__ == '__main__':
    with CorpusContext(config) as g:
        g.encode_pauses('^[{<].*')
        g.encode_utterances(min_pause_length=0.25)
コード例 #29
0
ファイル: autovot.py プロジェクト: james-tanner/SPADE
    if args.corpus_name not in directories:
        print(
            'The corpus {0} does not have a directory (available: {1}).  Please make it with a {0}.yaml file inside.'
            .format(args.corpus_name, ', '.join(directories)))
        sys.exit(1)

    corpus_conf = load_config(corpus_name)

    print('Processing...')
    #Connect to local database at 8080
    with ensure_local_database_running(corpus_name,
                                       port=8080,
                                       token=common.load_token()) as params:
        #Load corpus context and config info
        config = CorpusConfig(corpus_name, **params)
        config.formant_source = 'praat'
        # Common set up
        if reset:
            with CorpusContext(config) as c:
                print("Resetting the corpus.")
                c.reset()
        common.loading(config, corpus_conf['corpus_directory'],
                       corpus_conf['input_format'])
        common.lexicon_enrichment(config,
                                  corpus_conf['unisyn_spade_directory'],
                                  corpus_conf['dialect_code'])
        common.speaker_enrichment(config,
                                  corpus_conf['speaker_enrichment_file'])
        common.basic_enrichment(
            config, corpus_conf['vowel_inventory'] +
from polyglotdb import CorpusContext
from polyglotdb.config import CorpusConfig
import polyglotdb.io as pgio
import sys
import os

graph_db = {'host': 'localhost', 'port': 7474}

path_to_switchboard = os.path.join("/Volumes", "data", "corpora",
                                   "Switchboard_for_MFA")

if __name__ == '__main__':
    config = CorpusConfig("switchboard", **graph_db)
    print("loading corpus...")
    with CorpusContext(config) as g:
        g.reset()
        parser = pgio.inspect_fave(path_to_switchboard)
        g.load(parser, path_to_switchboard)

        q = g.query_graph(g.word).filter(g.word.label == "think")

        results = q.all()

        assert (len(results) > 0)

        q = g.query_graph(g.phone).filter(g.phone.label == "ow")
        results_phone = q.all()
        assert (len(results_phone) > 0)
コード例 #31
0
import polyglotdb.io as aio

from polyglotdb import CorpusContext

from polyglotdb.config import CorpusConfig

graph_db = {
    'graph_host': 'localhost',
    'graph_port': 7474,
    'graph_user': '******',
    'graph_password': '******'
}

praat = r'C:\Users\michael\Documents\Praat\praatcon.exe'

config = CorpusConfig('acoustic', **graph_db)

config.reaper_path = r'D:\Dev\Tools\REAPER-master\reaper.exe'


def call_back(*args):
    args = [x for x in args if isinstance(x, str)]
    if args:
        print(' '.join(args))


if __name__ == '__main__':
    with CorpusContext(config) as g:
        g.encode_pauses(['sil'])
        g.encode_utterances()
        g.analyze_acoustics()
コード例 #32
0
# exports all sibilants

graph_db = {
    'graph_host': 'localhost',
    'graph_port': 7474,
    'graph_user': '******',
    'graph_password': '******'
}

praat_path = 'C:\\Users\\samih\\Documents\\0_SPADE_labwork\\praatcon.exe'
script_path = 'C:\\Users\\samih\\Documents\\0_SPADE_labwork\\PolyglotDB\\examples\\sibilant_jane.praat'
#script_path = 'C:\\Users\\samih\\Documents\\0_SPADE_labwork\\PolyglotDB\\examples\\COG.praat'
#script_path = 'C:\\Users\\samih\\Documents\\0_SPADE_labwork\\PolyglotDB\\examples\\COG_middle50percent.praat'
output_path = 'C:\\Users\\samih\\Documents\\0_SPADE_labwork\\PolyglotDB\\examples\\sib_data.csv'

config = CorpusConfig('librispeech', **graph_db)

# config = CorpusConfig('acoustic utt', **graph_db)

config.praat_path = praat_path

if __name__ == '__main__':
    with CorpusContext(config) as g:

        g.encode_class(['S', 'Z', 'SH', 'ZH'],
                       'sibilant')  # encode_class method is in featured.py

        begin = time.time()

        g.analyze_script('sibilant',
                         script_path,
コード例 #33
0
from polyglotdb.io import enrich_speakers_from_csv

graph_db = {
    'graph_host': 'localhost',
    'graph_port': 7474,
    'graph_user': '******',
    'graph_password': '******'
}

praat = r'C:\Users\michael\Documents\Praat\praatcon.exe'

reaper = r'D:\Dev\Tools\REAPER-master\reaper.exe'

speaker_info_path = r'D:\Data\VIC\SpeakerInfo.txt'

config = CorpusConfig('buckeye', **graph_db)

config.reaper_path = reaper
config.praat_path = praat
config.pitch_algorithm = 'praat'


def call_back(*args):
    args = [x for x in args if isinstance(x, str)]
    if args:
        print(' '.join(args))


if __name__ == '__main__':
    with CorpusContext(config) as g:
        g.reset_acoustics()
コード例 #34
0
ファイル: timit_loading_old.py プロジェクト: esteng/ULD
    print("wrote to ", dest)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("timit_path", help="path to timit corpus")
    parser.add_argument("dest_path", help="path to destination")
    parser.add_argument("--reset",
                        help="set to true to reset corpus",
                        default=False)
    parser.add_argument("--convert",
                        help="set to true if converting mfccs",
                        default=False)
    args = parser.parse_args()

    corpus_name = "TIMIT"
    with ensure_local_database_running('database') as config:
        conf = CorpusConfig(corpus_name, **config)
        if args.reset:
            loading(conf, args.timit_path)
        if args.convert:
            filename_to_path = {}
            for root, dirs, files in os.walk(args.timit_path):
                for file in files:
                    if re.match(".*\.[Ww][Aa][Vv]", file) is not None:
                        src_filename = re.sub("\.[Ww][Aa][Vv]", "", file)
                        path = os.path.join(root, file)
                        filename_to_path[src_filename] = path

        export_textgrid(conf, args.dest_path, filename_to_path)