コード例 #1
0
ファイル: test_misc.py プロジェクト: ynop/py-nltools
    def test_load_config(self):

        cfg = misc.load_config('.speechrc')

        host = cfg.get('tts', 'host')

        self.assertEqual (host, 'local')
コード例 #2
0
    def test_tts_mary(self):

        config = misc.load_config('.speechrc')

        tts = TTS(config.get('tts', 'host'), int(config.get('tts', 'port')))

        # test mary

        tts.engine = 'mary'

        for l, voice, word, ph in MARY_TESTS:

            tts.locale = l
            tts.voice = voice

            mary_ph = tts.gen_ipa(word)

            self.assertEqual(mary_ph, ph)

            wav = tts.synthesize(word)
            logging.debug('wav len: %d bytes.' % len(wav))
            self.assertGreater(len(wav), 100)

            wav = tts.synthesize(ph, mode='ipa')
            logging.debug('wav len: %d bytes.' % len(wav))
            self.assertGreater(len(wav), 100)
コード例 #3
0
    def test_tts_espeak(self):

        config = misc.load_config('.speechrc')

        tts = TTS(config.get('tts', 'host'), int(config.get('tts', 'port')))

        tts.engine = 'espeak'

        first = True
        for v, word, ph in ESPEAK_TESTS:

            tts.locale = v
            tts.voice = v

            espeak_ph = tts.gen_ipa(word)

            self.assertEqual(espeak_ph, ph)

            wav = tts.synthesize(word)
            logging.debug('wav len: %d bytes.' % len(wav))
            self.assertGreater(len(wav), 100)

            wav = tts.synthesize(ph, mode='ipa')
            logging.debug('wav len: %d bytes.' % len(wav))
            self.assertGreater(len(wav), 100)

            if first:
                tts.say(word)
                first = False
コード例 #4
0
    def setUp(self):

        config = misc.load_config('.airc')

        #
        # db, store
        #

        db_url = config.get('db', 'url')
        # db_url = 'sqlite:///tmp/foo.db'

        self.sas = SPARQLAlchemyStore(db_url, 'unittests', echo=True)
        self.context = u'http://example.com'
        
        #
        # import triples to test on
        #

        self.sas.clear_all_graphs()

        samplefn = 'tests/dt.n3'

        with codecs.open(samplefn, 'r', 'utf8') as samplef:

            data = samplef.read()

            self.sas.parse(data=data, context=self.context, format='n3')
コード例 #5
0
ファイル: test_aiprolog.py プロジェクト: Frostie2/zamia-ai
    def setUp(self):

        config = misc.load_config('.airc')

        #
        # logic DB
        #

        self.db = LogicDB(model.url)

        #
        # knowledge base
        #

        self.kb = AIKB(UNITTEST_MODULE)

        for prefix in COMMON_PREFIXES:
            self.kb.register_prefix(prefix, COMMON_PREFIXES[prefix])

        self.kb.clear_all_graphs()

        self.kb.parse_file (UNITTEST_CONTEXT, 'n3', 'tests/chancellors.n3')
        self.kb.parse_file (UNITTEST_CONTEXT, 'n3', 'tests/wev.n3')

        #
        # aiprolog environment setup
        #

        self.prolog_rt = AIPrologRuntime(self.db, self.kb)
        self.parser    = AIPrologParser()

        self.prolog_rt.set_trace(True)

        self.db.clear_module(UNITTEST_MODULE)
コード例 #6
0
    def __init__(self):

        #
        # prepare our lightweight sparql wrapper
        #

        self.query_prefixes = ''.join(
            map(lambda k: "PREFIX %s: <%s>\n" % (k, COMMON_PREFIXES[k]),
                COMMON_PREFIXES))

        #
        # set up graph store
        #

        config = misc.load_config('.nlprc')

        # self.graph = rdflib.ConjunctiveGraph('Sleepycat')
        # self.graph.open(RDF_LIB_STORE_PATH, create = True)

        # SQLAlchemy

        url = config.get('db', 'url')

        self.uri = rdflib.Literal(url)

        rdflib_sqlalchemy2.registerplugins()
        store = rdflib.plugin.get("SQLAlchemy2",
                                  rdflib.store.Store)(identifier=self.ident)
        self.graph = rdflib.ConjunctiveGraph(store, identifier=self.ident)
        self.graph.open(self.uri, create=True)
コード例 #7
0
ファイル: ai_cli.py プロジェクト: mpuels/zamia-ai
    def __init__(self):

        cmdln.Cmdln.__init__(self)

        self.config = misc.load_config('.airc')
        toplevel = self.config.get('semantics', 'toplevel')
        xsb_root = self.config.get('semantics', 'xsb_root')
        db_url = self.config.get('db', 'url')

        self.kernal = AIKernal(db_url, xsb_root, toplevel)
コード例 #8
0
ファイル: ai_kernal.py プロジェクト: thetimeofblack/zamia-ai
    def __init__(self, load_all_modules=False):

        self.config = misc.load_config('.airc')

        #
        # database
        #

        Session = sessionmaker(bind=model.engine)
        self.session = Session()

        #
        # TensorFlow (deferred, as tf can take quite a bit of time to set up)
        #

        self.tf_session = None
        self.nlp_model = None

        #
        # module management, setup
        #

        self.modules = {}
        self.initialized_modules = set()
        s = self.config.get('semantics', 'modules')
        self.all_modules = list(map(lambda s: s.strip(), s.split(',')))
        sys.path.append('modules')

        #
        # AIProlog parser, runtime
        #

        db_url = self.config.get('db', 'url')
        self.db = LogicDB(db_url)
        self.aip_parser = AIPrologParser(self)
        self.rt = AIPrologRuntime(self.db)
        self.dummyloc = SourceLocation('<rt>')

        #
        # alignment / word2vec (on-demand model loading)
        #
        self.w2v_model = None
        self.w2v_lang = None
        self.w2v_all_utterances = []

        #
        # load modules, if requested
        #
        if load_all_modules:
            for mn2 in self.all_modules:
                self.load_module(mn2)
                self.init_module(mn2)
コード例 #9
0
    def __init__(self):

        cmdln.Cmdln.__init__(self)

        self.config = misc.load_config('.airc')

        all_modules = list(
            map(lambda m: m.strip(),
                self.config.get('semantics', 'modules').split(',')))

        db_url = self.config.get('db', 'url')
        db = LogicDB(db_url)

        self.kernal = AIKernal(db=db, all_modules=all_modules)
コード例 #10
0
ファイル: nlp_kernal.py プロジェクト: ilibx/nlp
    def __init__(self):

        self.config = misc.load_config('.nlprc')

        #
        # database
        #

        Session = sessionmaker(bind=model.engine)
        self.session = Session()

        #
        # logic DB
        #

        self.db = LogicDB(self.session)

        #
        # knowledge base
        #

        self.kb = HALKB()

        #
        # TensorFlow (deferred, as tf can take quite a bit of time to set up)
        #

        self.tf_session = None
        self.nlp_model = None

        #
        # module management, setup
        #

        self.modules = {}
        s = self.config.get('semantics', 'modules')
        self.all_modules = map(lambda s: s.strip(), s.split(','))

        for mn2 in self.all_modules:
            self.load_module(mn2)

        #
        # prolog environment setup
        #

        self.prolog_engine = PrologAIEngine(self.db)

        self.parser = PrologParser()
コード例 #11
0
def main(model_name, dictionary, language_model, sequitur_model=None, debug=0,
         verbose=False, prompt_words=False, *audio_corpora):

    misc.init_app('speech_kaldi_export')

    if verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)

    language_model_dir = LANGUAGE_MODELS_DIR.resolve() / language_model
    exit_if_language_model_dir_doesnt_exist(language_model_dir)

    config = misc.load_config ('.speechrc')

    work_dir = ASR_MODELS_DIR / 'kaldi' / model_name
    kaldi_root = config.get("speech", "kaldi_root")

    data_dir = work_dir / "data"
    mfcc_dir = work_dir / "mfcc"

    wav16_dir = config.get("speech", "wav16")

    create_basic_work_dir_structure(
        str(data_dir),
        wav16_dir,
        str(mfcc_dir),
        str(work_dir),
        str(language_model_dir),
        kaldi_root)

    if sequitur_model:
        sequitur_model_path = str(SEQUITUR_MODEL_DIR / sequitur_model)
    else:
        sequitur_model_path = None

    generate_speech_and_text_corpora(data_dir,
                                     wav16_dir,
                                     debug,
                                     sequitur_model_path,
                                     dictionary,
                                     audio_corpora,
                                     prompt_words)

    copy_scripts_and_config_files(work_dir, kaldi_root)
コード例 #12
0
    def test_tts_pico(self):

        config = misc.load_config('.speechrc')

        tts = TTS(config.get('tts', 'host'), int(config.get('tts', 'port')))

        tts.engine = 'pico'

        for v, word in PICO_TESTS:

            tts.locale = v
            tts.voice = v

            wav = tts.synthesize(word)
            logging.debug('wav len: %d bytes.' % len(wav))
            self.assertGreater(len(wav), 100)

            tts.say(word)
コード例 #13
0
ファイル: test_aiprolog.py プロジェクト: gooofy/voxforge
    def setUp(self):

        config = misc.load_config('.airc')

        #
        # logic DB
        #

        self.db = LogicDB(model.url)

        #
        # aiprolog environment setup
        #

        self.prolog_rt = AIPrologRuntime(self.db)
        self.parser    = AIPrologParser(self.db)

        self.prolog_rt.set_trace(True)

        self.db.clear_module(UNITTEST_MODULE)
コード例 #14
0
    def setUp(self):

        config = misc.load_config('.airc')

        #
        # logic DB
        #

        self.db = LogicDB(model.url)

        #
        # aiprolog environment setup
        #

        self.prolog_rt = AIPrologRuntime(self.db)
        self.parser = AIPrologParser(self.db)

        self.prolog_rt.set_trace(True)

        self.db.clear_module(UNITTEST_MODULE)
コード例 #15
0
    def setUp(self):

        config = misc.load_config('.airc')

        #
        # db, store
        #

        db_url = config.get('db', 'url')
        # db_url = 'sqlite:///tmp/foo.db'

        self.sas = SPARQLAlchemyStore(db_url, 'unittests', echo=True, prefixes=COMMON_PREFIXES, aliases=RESOURCE_ALIASES)
        self.context = u'http://example.com'
        self.sas.clear_all_graphs()
       
        #
        # LDF Mirror
        #

        self.ldfmirror = LDFMirror (self.sas, ENDPOINTS)
コード例 #16
0
def main(verbose=False, *speech_corpora):
    """Scan directory for audio files and convert them to wav files

    For each speech corpus `speech_corpus`

    1. the resulting wav files are written to the directory
       `.speechrc.wav16`/<speech_corpus>/

    2. the transcripts in data/src/speech/<speech_corpus>/transcripts_*.csv are
       updated.
    """
    misc.init_app('speech_audio_scan')

    config = misc.load_config('.speechrc')

    speech_corpora_dir = Path(config.get("speech", "speech_corpora"))
    wav16 = Path(config.get("speech", "wav16"))

    if len(speech_corpora) < 1:
        logging.error("At least one speech corpus must be provided.")
        sys.exit(1)

    if verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)

    exit_if_corpus_is_missing(speech_corpora_dir, speech_corpora)

    for speech_corpus in speech_corpora:
        transcripts = Transcripts(corpus_name=speech_corpus, create_db=True)
        out_wav16_subdir = wav16 / speech_corpus
        out_wav16_subdir.mkdir(parents=True, exist_ok=True)
        in_root_corpus_dir = speech_corpora_dir / speech_corpus

        scan_audiodir(str(in_root_corpus_dir), transcripts,
                      str(out_wav16_subdir))

        transcripts.save()
        print speech_corpus, "new transcripts saved."
        print
コード例 #17
0
ファイル: kb.py プロジェクト: Frostie2/zamia-ai
    def __init__(self, kbname='kb'):

        #
        # prepare our lightweight sparql wrapper
        #

        self.query_prefixes = ''

        #
        # set up graph store
        #

        config = misc.load_config('.airc')

        # DB, SPARQLAlchemyStore

        db_url = config.get('db', 'url')

        self.sas = SPARQLAlchemyStore(db_url, kbname, echo=False)

        self.endpoints = {} # host name -> LDF endpoint
コード例 #18
0
def main(corpus, verbose=False):
    """Generate training sentences for language models

    Let text_corpus be the argument given on the command line.
    Then the corpus text_corpus is tokenized and each sentence is written on a
    separate line into `data/dst/text-corpora/<text_corpus>.txt`. All
    punctuation marks are stripped.
    """
    init_app('speech_sentences')

    if verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)

    config = load_config('.speechrc')

    TEXT_CORPORA_DIR.mkdir(parents=True, exist_ok=True)

    out_file = TEXT_CORPORA_DIR / (corpus + ".txt")

    with codecs.open(str(out_file), "w", "utf-8") as outf:
        # I haven't figured out how to refactor the processing algorithms of the
        # parole corpus to implement a generator.
        if corpus == "parole_de":
            corpus_path = config.get("speech", corpus)
            proc_parole_de(corpus_path, load_punkt_tokenizer, outf)
        elif corpus in TEXT_CORPORA:
            corpus_path = config.get("speech", corpus)
            for sentence in TEXT_CORPORA[corpus](corpus_path):
                outf.write(sentence + "\n")
        elif corpus in SPEECH_CORPORA:
            for sentence in SPEECH_CORPORA[corpus]():
                outf.write(sentence + "\n")
        else:
            raise Exception("This shouldn't happen.")

    logging.info('%s written.' % out_file)
コード例 #19
0
def main(verbose=False, debug_sgm_limit=0):
    """Train the Punkt tokenizer on the German Parole corpus"""
    init_app('speech_sentences')

    if verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)

    config = load_config('.speechrc')

    parole_path = config.get("speech", "parole_de")

    logging.info("training punkt...")

    punkt_trainer = nltk.tokenize.punkt.PunktTrainer()

    train_punkt_wrapper = parole.TrainPunktWrapper(punkt_trainer)

    parole.parole_crawl(parole_path, train_punkt_wrapper.train_punkt,
                        debug_sgm_limit)

    logging.info("finalizing punkt training...")
    punkt_trainer.finalize_training(verbose=True)
    logging.info("punkt training done. %d text segments." %
                 train_punkt_wrapper.punkt_count)

    params = punkt_trainer.get_params()
    # print "Params: %s" % repr(params)

    parole.PUNKT_PICKLEFN.parent.mkdir(parents=True, exist_ok=True)
    tokenizer = nltk.tokenize.punkt.PunktSentenceTokenizer(params)
    with open(str(parole.PUNKT_PICKLEFN), mode='wb') as f:
        pickle.dump(tokenizer, f, protocol=pickle.HIGHEST_PROTOCOL)

    logging.info('%s written.' % parole.PUNKT_PICKLEFN)
コード例 #20
0
    finally:
        msg_cond.release()


#
# init
#

misc.init_app(PROC_TITLE)

#
# config, cmdline
#

config = misc.load_config('.airc', defaults=DEFAULTS)

broker_host = config.get('mqtt', 'broker_host')
broker_port = config.getint('mqtt', 'broker_port')
broker_user = config.get('mqtt', 'broker_user')
broker_pw = config.get('mqtt', 'broker_pw')

ai_model = config.get('server', 'model')
lang = config.get('server', 'lang')
vf_login = config.get('server', 'vf_login')
rec_dir = config.get('server', 'rec_dir')
kaldi_model_dir = config.get('server', 'kaldi_model_dir')
kaldi_model = config.get('server', 'kaldi_model')
kaldi_acoustic_scale = config.getfloat('server', 'kaldi_acoustic_scale')
kaldi_beam = config.getfloat('server', 'kaldi_beam')
kaldi_frame_subsampling_factor = config.getint(
コード例 #21
0
from nltools.misc      import compress_ws, load_config, init_app
from nltools.tokenizer import tokenize

SENTENCEFN      = 'data/dst/speech/en/sentences.txt'
SENTENCES_STATS = 1000

DEBUG_LIMIT     = 0
# DEBUG_LIMIT     = 1000

#
# init 
#

init_app ('speech_sentences')

config = load_config ('.speechrc')

europarl       = config.get("speech", "europarl_en")
movie_dialogs  = config.get("speech", "cornell_movie_dialogs")
web_questions  = config.get("speech", "web_questions")
yahoo_answers  = config.get("speech", "yahoo_answers")

#
# commandline parsing
#

parser = OptionParser("usage: %prog [options] )")

parser.add_option("-v", "--verbose", action="store_true", dest="verbose",
                  help="enable verbose logging")
コード例 #22
0
ファイル: model.py プロジェクト: upsource/zamia-ai
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

import sys

from sqlalchemy import create_engine
from sqlalchemy import Column, Integer, String, Text, Unicode, UnicodeText, Enum, DateTime, ForeignKey, Index
from sqlalchemy.orm import relationship
from sqlalchemy.ext.declarative import declarative_base

from nltools import misc

config = misc.load_config('.airc')

# db_server = config.get("semantics", "dbserver")
# db_name   = config.get("semantics", "dbname")
# db_user   = config.get("semantics", "dbuser")
# db_pass   = config.get("semantics", "dbpass")
#
# # We connect with the help of the PostgreSQL URL
# # postgresql://federer:grandestslam@localhost:5432/tennis
# url = 'postgresql://{}:{}@{}:{}/{}'
# url = url.format(db_user, db_pass, db_server, 5432, db_name)

url = config.get("db", "url")

#engine = create_engine(url, echo=True)
engine = create_engine(url)
コード例 #23
0
ファイル: kb_weather.py プロジェクト: vishalbelsare/zamia-ai
def fetch_weather_forecast(kernal):

    config = misc.load_config('.airc')
    api_key = config.get("weather", "api_key")

    logging.debug('fetch_weather_forecast cronj ob, api key: %s' % api_key)

    sl = SourceLocation(fn='__internet__', col=0, line=0)

    #
    # resolve city ids, timezones
    #

    locations = {}

    # owmCityId(wdeLosAngeles, 5368361).
    solutions = kernal.rt.search_predicate('owmCityId', ['_1', '_2'])

    for s in solutions:

        location = s['_1'].name
        city_id = int(s['_2'].f)

        # aiTimezone(wdeNewYorkCity, "America/New_York").
        solutions2 = kernal.rt.search_predicate('aiTimezone', [location, '_1'])
        if len(solutions2) < 1:
            continue
        timezone = solutions2[0]['_1'].s

        solutions2 = kernal.rt.search_predicate('rdfsLabel',
                                                [location, 'en', '_1'])
        if len(solutions2) < 1:
            continue
        label = solutions2[0]['_1'].s

        # wdpdCoordinateLocation(wdeBerlin, "Point(13.383333333 52.516666666)").
        solutions2 = kernal.rt.search_predicate('wdpdCoordinateLocation',
                                                [location, '_1'])
        if len(solutions2) < 1:
            continue
        m = coord_matcher.match(solutions2[0]['_1'].s)
        if not m:
            continue
        geo_lat = float(m.group(2))
        geo_long = float(m.group(1))

        if not location in locations:
            locations[location] = {}
            locations[location]['city_id'] = city_id
            locations[location]['timezone'] = timezone
            locations[location]['label'] = label
            locations[location]['long'] = geo_long
            locations[location]['lat'] = geo_lat

    def mangle_label(label):
        return ''.join(map(lambda c: c if c.isalnum() else '', label))

    #
    # generate triples of weather and astronomical data
    #

    env = {}

    for location in locations:

        city_id = locations[location]['city_id']
        timezone = locations[location]['timezone']
        loc_label = mangle_label(locations[location]['label'])
        geo_lat = locations[location]['lat']
        geo_long = locations[location]['long']

        tz = pytz.timezone(timezone)

        ref_dt = datetime.now(tz).replace(hour=0,
                                          minute=0,
                                          second=0,
                                          microsecond=0)

        logging.debug("%s %s" % (location, ref_dt))

        #
        # sunrise / sunset
        #

        l = astral.Location()
        l.name = 'name'
        l.region = 'region'
        l.latitude = geo_lat
        l.longitude = geo_long
        l.timezone = timezone
        l.elevation = 0

        for day_offset in range(7):
            cur_date = (ref_dt + timedelta(days=day_offset)).date()

            sun = l.sun(date=cur_date, local=True)

            sun_const = u'aiUnlabeledSun%s%s' % (loc_label,
                                                 cur_date.strftime('%Y%m%d'))

            env = do_retract(env,
                             build_predicate('aiLocation', [sun_const, '_']))
            env = do_retract(env, build_predicate('aiDate', [sun_const, '_']))
            env = do_retract(env, build_predicate('aiDawn', [sun_const, '_']))
            env = do_retract(env, build_predicate('aiSunrise',
                                                  [sun_const, '_']))
            env = do_retract(env, build_predicate('aiNoon', [sun_const, '_']))
            env = do_retract(env, build_predicate('aiSunset',
                                                  [sun_const, '_']))
            env = do_retract(env, build_predicate('aiDusk', [sun_const, '_']))

            env = do_assertz(
                env,
                Clause(location=sl,
                       head=build_predicate('aiLocation',
                                            [sun_const, location])))
            env = do_assertz(
                env,
                Clause(location=sl,
                       head=build_predicate(
                           'aiDate',
                           [sun_const,
                            StringLiteral(cur_date.isoformat())])))
            env = do_assertz(
                env,
                Clause(location=sl,
                       head=build_predicate(
                           'aiDawn',
                           [sun_const,
                            StringLiteral(sun['dawn'].isoformat())])))
            env = do_assertz(
                env,
                Clause(
                    location=sl,
                    head=build_predicate(
                        'aiSunrise',
                        [sun_const,
                         StringLiteral(sun['sunrise'].isoformat())])))
            env = do_assertz(
                env,
                Clause(location=sl,
                       head=build_predicate(
                           'aiNoon',
                           [sun_const,
                            StringLiteral(sun['noon'].isoformat())])))
            env = do_assertz(
                env,
                Clause(
                    location=sl,
                    head=build_predicate(
                        'aiSunset',
                        [sun_const,
                         StringLiteral(sun['sunset'].isoformat())])))
            env = do_assertz(
                env,
                Clause(location=sl,
                       head=build_predicate(
                           'aiDusk',
                           [sun_const,
                            StringLiteral(sun['dusk'].isoformat())])))

            logging.debug("%s %s %s -> %s" %
                          (sun_const, cur_date, sun['sunrise'], sun['sunset']))

        #
        # fetch json forecast data from OpenWeatherMap
        #

        url = 'http://api.openweathermap.org/data/2.5/forecast?id=%s&APPID=%s' % (
            city_id, api_key)

        data = json.load(urllib2.urlopen(url))

        if not 'list' in data:
            logging.error('failed to fetch weather data for %s, got: %s' %
                          (location, repr(data)))
            continue

        # print repr(data['list'])

        for fc in data['list']:

            dt_to = datetime.strptime(fc['dt_txt'], '%Y-%m-%d %H:%M:%S')
            dt_to = dt_to.replace(tzinfo=pytz.utc)

            dt_from = dt_to - timedelta(hours=3)

            city_id = city_id
            temp_min = fc['main']['temp_min'] - KELVIN
            temp_max = fc['main']['temp_max'] - KELVIN
            code = fc['weather'][0]['id']
            precipitation = float(
                fc['rain']
                ['3h']) if 'rain' in fc and '3h' in fc['rain'] else 0.0
            icon = fc['weather'][0]['icon']
            description = fc['weather'][0]['description']
            clouds = float(fc['clouds']['all'])

            fc_const = 'aiUnlabeledFc%s%s' % (loc_label,
                                              dt_from.strftime('%Y%m%d%H%M%S'))

            logging.debug("%s on %s-%s city_id=%s" %
                          (fc_const, dt_from, dt_to, city_id))

            # aiDescription(aiUnlabeledFcFreudental20161205180000, "clear sky").
            # aiDtEnd(aiUnlabeledFcFreudental20161205180000, "2016-12-05T21:00:00+00:00").
            # aiTempMin(aiUnlabeledFcFreudental20161205180000, -6.666).
            # aiIcon(aiUnlabeledFcFreudental20161205180000, "01n").
            # aiLocation(aiUnlabeledFcFreudental20161205180000, wdeFreudental).
            # aiDtStart(aiUnlabeledFcFreudental20161205180000, "2016-12-05T18:00:00+00:00").
            # aiClouds(aiUnlabeledFcFreudental20161205180000, 0.0).
            # aiPrecipitation(aiUnlabeledFcFreudental20161205180000, 0.0).
            # aiTempMax(aiUnlabeledFcFreudental20161205180000, -6.45).

            env = do_retract(env,
                             build_predicate('aiDescription', [fc_const, '_']))
            env = do_retract(env, build_predicate('aiDtEnd', [fc_const, '_']))
            env = do_retract(env, build_predicate('aiTempMin',
                                                  [fc_const, '_']))
            env = do_retract(env, build_predicate('aiIcon', [fc_const, '_']))
            env = do_retract(env, build_predicate('aiLocation',
                                                  [fc_const, '_']))
            env = do_retract(env, build_predicate('aiDtStart',
                                                  [fc_const, '_']))
            env = do_retract(env, build_predicate('aiClouds', [fc_const, '_']))
            env = do_retract(
                env, build_predicate('aiPrecipitation', [fc_const, '_']))
            env = do_retract(env, build_predicate('aiTempMax',
                                                  [fc_const, '_']))

            env = do_assertz(
                env,
                Clause(location=sl,
                       head=build_predicate('aiLocation',
                                            [fc_const, location])))
            env = do_assertz(
                env,
                Clause(location=sl,
                       head=build_predicate('aiTempMin',
                                            [fc_const, temp_min])))
            env = do_assertz(
                env,
                Clause(location=sl,
                       head=build_predicate('aiTempMax',
                                            [fc_const, temp_max])))
            env = do_assertz(
                env,
                Clause(location=sl,
                       head=build_predicate('aiPrecipitation',
                                            [fc_const, precipitation])))
            env = do_assertz(
                env,
                Clause(location=sl,
                       head=build_predicate('aiClouds', [fc_const, clouds])))
            env = do_assertz(
                env,
                Clause(location=sl,
                       head=build_predicate(
                           'aiIcon', [fc_const, StringLiteral(icon)])))
            env = do_assertz(
                env,
                Clause(location=sl,
                       head=build_predicate(
                           'aiDescription',
                           [fc_const, StringLiteral(description)])))
            env = do_assertz(
                env,
                Clause(location=sl,
                       head=build_predicate(
                           'aiDtStart',
                           [fc_const,
                            StringLiteral(dt_from.isoformat())])))
            env = do_assertz(
                env,
                Clause(
                    location=sl,
                    head=build_predicate(
                        'aiDtEnd',
                        [fc_const, StringLiteral(dt_to.isoformat())])))

    kernal.rt.apply_overlay(WEATHER_DATA_MODULE, env)
コード例 #24
0
def main(verbose=False):
    """Convert gspv2 corpus to the VoxForge corpus format

    The variable `speech_arc` in ~/.speechrc must point to a folder
    gspv2 which is used as the source containing the original gspv2 corpus, 
    i.e. containing the subfolders dev, test, and train.

    The variable `speech_corpora` in ~/.speechrc must point to a folder
    where the resulting corpus should be written. The script will create
    a subfolder gspv2 here for the resulting voxforge-formatted data.
    """
    misc.init_app('speech_audio_scan')
    config = misc.load_config('.speechrc')

    if verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)

    speech_arc_dir = Path(config.get("speech", "speech_arc"))
    speech_corpora_dir = Path(config.get("speech", "speech_corpora"))
    src_root_dir = speech_arc_dir / "gspv2"
    dst_root_dir = speech_corpora_dir / "gspv2"

    exit_if_dst_root_dir_exists(dst_root_dir)

    speakers = set()
    speaker_gender = {}

    for folder in ['train', 'test', 'dev']:
        destdir = dst_root_dir

        src_dir = src_root_dir / folder

        num_files = len([f for f in src_dir.glob("*.xml")])

        cnt_files = 0

        for xml_path in src_dir.glob("*.xml"):
            f = str(xml_path)

            cnt_files += 1

            fbase = f[0:len(f) - 4]

            with codecs.open(f, 'r', 'utf-8') as xmlfile:
                # remove urls
                text = xmlfile.read()
                soup = BeautifulSoup(text)
                sentence = (soup.recording.sentence.string).strip()
                cleaned_sentence = (
                    soup.recording.cleaned_sentence.string).strip()
                sentence_id = int((soup.recording.sentence_id.string).strip())

                speaker_id = (soup.recording.speaker_id.string).strip()
                gender = (soup.recording.gender.string).strip()
                name = 'gsp%s' % speaker_id.replace('-', '')
                speakerdir = destdir / (name + "-1")

                if not speaker_id in speakers:
                    speakers.add(speaker_id)
                    speaker_gender[name] = \
                        'm' if gender == 'male' else 'f'
                    (speakerdir / "wav").mkdir(parents=True, exist_ok=True)
                    (speakerdir / "etc").mkdir(parents=True, exist_ok=True)

                for mic in [
                        'Yamaha', 'Kinect-Beam', 'Kinect-RAW', 'Realtek',
                        'Samson'
                ]:
                    srcaudiofn = src_dir / ('%s_%s.wav' % (fbase, mic))

                    if not srcaudiofn.is_file():
                        continue

                    audiofn = Path('%s-%s' % (fbase, mic)).name
                    dstaudiofn = speakerdir / "wav" / (audiofn + ".wav")

                    logging.info(
                        '%5d/%5d %s %s %s' %
                        (cnt_files, num_files, name, audiofn, str(srcaudiofn)))

                    prompts_orig = speakerdir / "etc" / "prompts-original"
                    with open(str(prompts_orig), 'a') as promptsf:
                        promptsf.write(
                            (u'%s %s\n' %
                             (audiofn, cleaned_sentence)).encode('utf8'))

                    copy_file(str(srcaudiofn), str(dstaudiofn))
コード例 #25
0
ファイル: prolog_shell.py プロジェクト: gooofy/zamia-prolog
from optparse import OptionParser
from StringIO import StringIO

from nltools import misc
from zamiaprolog.logicdb import LogicDB
from zamiaprolog.parser import PrologParser, PrologError
from zamiaprolog.runtime import PrologRuntime, PrologRuntimeError

# logging.basicConfig(level=logging.DEBUG)
logging.basicConfig(level=logging.INFO)

#
# init
#
misc.init_app('prolog_shell')
config = misc.load_config('.nlprc')

#
# readline, history
#

histfile = os.path.join(os.path.expanduser("~"), ".hal_prolog_history")
try:
    readline.read_history_file(histfile)
    # default history len is -1 (infinite), which may grow unruly
    readline.set_history_length(1000)
except IOError:
    pass
atexit.register(readline.write_history_file, histfile)

#
コード例 #26
0
def get_api_key():

    config = misc.load_config('.airc')
    return config.get('weather', 'api_key')
コード例 #27
0
ファイル: kb_weather.py プロジェクト: gooofy/voxforge
def fetch_weather_forecast(kernal):

    config  = misc.load_config('.airc')
    api_key = config.get("weather", "api_key")

    logging.debug ('fetch_weather_forecast cronj ob, api key: %s' % api_key)

    sl = SourceLocation(fn='__internet__', col=0, line=0)

    #
    # resolve city ids, timezones
    #

    locations = {}

    # owmCityId(wdeLosAngeles, 5368361). 
    solutions = kernal.rt.search_predicate ('owmCityId', ['_1', '_2'])

    for s in solutions:

        location = s['_1'].name
        city_id  = int(s['_2'].f)

        # aiTimezone(wdeNewYorkCity, "America/New_York").
        solutions2 = kernal.rt.search_predicate ('aiTimezone', [location, '_1'])
        if len(solutions2)<1:
            continue
        timezone = solutions2[0]['_1'].s

        solutions2 = kernal.rt.search_predicate ('rdfsLabel', [location, 'en', '_1'])
        if len(solutions2)<1:
            continue
        label = solutions2[0]['_1'].s

        # wdpdCoordinateLocation(wdeBerlin, "Point(13.383333333 52.516666666)").
        solutions2 = kernal.rt.search_predicate ('wdpdCoordinateLocation', [location, '_1'])
        if len(solutions2)<1:
            continue
        m = coord_matcher.match(solutions2[0]['_1'].s)
        if not m:
            continue
        geo_lat  = float(m.group(2))
        geo_long = float(m.group(1))

        if not location in locations:
            locations[location] = {}
            locations[location]['city_id']  = city_id
            locations[location]['timezone'] = timezone
            locations[location]['label']    = label
            locations[location]['long']     = geo_long
            locations[location]['lat']      = geo_lat

    def mangle_label(label):
        return ''.join(map(lambda c: c if c.isalnum() else '', label))

    #
    # generate triples of weather and astronomical data
    #

    env = {}

    for location in locations:

        city_id   = locations[location]['city_id']
        timezone  = locations[location]['timezone']
        loc_label = mangle_label(locations[location]['label'])
        geo_lat   = locations[location]['lat']
        geo_long  = locations[location]['long']

        tz = pytz.timezone(timezone)

        ref_dt = datetime.now(tz).replace( hour        = 0,
                                           minute      = 0,
                                           second      = 0,
                                           microsecond = 0)

        logging.debug("%s %s" % ( location, ref_dt ) )

        #
        # sunrise / sunset
        #

        l = astral.Location()
        l.name      = 'name'
        l.region    = 'region'
        l.latitude  = geo_lat
        l.longitude = geo_long
        l.timezone  = timezone
        l.elevation = 0

        for day_offset in range(7):
            cur_date = (ref_dt + timedelta(days=day_offset)).date()

            sun = l.sun(date=cur_date, local=True)

            sun_const = u'aiUnlabeledSun%s%s' % (loc_label, cur_date.strftime('%Y%m%d'))

            env = do_retract(env, build_predicate('aiLocation', [sun_const, '_']))
            env = do_retract(env, build_predicate('aiDate',     [sun_const, '_']))
            env = do_retract(env, build_predicate('aiDawn',     [sun_const, '_']))
            env = do_retract(env, build_predicate('aiSunrise',  [sun_const, '_']))
            env = do_retract(env, build_predicate('aiNoon',     [sun_const, '_']))
            env = do_retract(env, build_predicate('aiSunset',   [sun_const, '_']))
            env = do_retract(env, build_predicate('aiDusk',     [sun_const, '_']))

            env = do_assertz(env, Clause(location=sl, head=build_predicate('aiLocation', [sun_const, location])))
            env = do_assertz(env, Clause(location=sl, head=build_predicate('aiDate',     [sun_const, StringLiteral(cur_date.isoformat())])))
            env = do_assertz(env, Clause(location=sl, head=build_predicate('aiDawn',     [sun_const, StringLiteral(sun['dawn'].isoformat())])))
            env = do_assertz(env, Clause(location=sl, head=build_predicate('aiSunrise',  [sun_const, StringLiteral(sun['sunrise'].isoformat())])))
            env = do_assertz(env, Clause(location=sl, head=build_predicate('aiNoon',     [sun_const, StringLiteral(sun['noon'].isoformat())])))
            env = do_assertz(env, Clause(location=sl, head=build_predicate('aiSunset',   [sun_const, StringLiteral(sun['sunset'].isoformat())])))
            env = do_assertz(env, Clause(location=sl, head=build_predicate('aiDusk',     [sun_const, StringLiteral(sun['dusk'].isoformat())])))

            logging.debug ("%s %s %s -> %s" % (sun_const, cur_date, sun['sunrise'], sun['sunset']) )



        #
        # fetch json forecast data from OpenWeatherMap
        #

        url = 'http://api.openweathermap.org/data/2.5/forecast?id=%s&APPID=%s' % (city_id, api_key)

        data = json.load(urllib2.urlopen(url))

        if not 'list' in data:
            logging.error ('failed to fetch weather data for %s, got: %s' % (location, repr(data)))
            continue


        # print repr(data['list'])

        for fc in data['list']:

            dt_to   = datetime.strptime (fc['dt_txt'], '%Y-%m-%d %H:%M:%S')
            dt_to   = dt_to.replace(tzinfo=pytz.utc)

            dt_from = dt_to - timedelta(hours=3)

            city_id       = city_id
            temp_min      = fc['main']['temp_min']-KELVIN
            temp_max      = fc['main']['temp_max']-KELVIN
            code          = fc['weather'][0]['id']
            precipitation = float(fc['rain']['3h']) if 'rain' in fc and '3h' in fc['rain'] else 0.0
            icon          = fc['weather'][0]['icon']
            description   = fc['weather'][0]['description']
            clouds        = float(fc['clouds']['all'])

            fc_const = 'aiUnlabeledFc%s%s' % (loc_label, dt_from.strftime('%Y%m%d%H%M%S'))

            logging.debug ("%s on %s-%s city_id=%s" % (fc_const, dt_from, dt_to, city_id))

            # aiDescription(aiUnlabeledFcFreudental20161205180000, "clear sky").
            # aiDtEnd(aiUnlabeledFcFreudental20161205180000, "2016-12-05T21:00:00+00:00").
            # aiTempMin(aiUnlabeledFcFreudental20161205180000, -6.666).
            # aiIcon(aiUnlabeledFcFreudental20161205180000, "01n").
            # aiLocation(aiUnlabeledFcFreudental20161205180000, wdeFreudental).
            # aiDtStart(aiUnlabeledFcFreudental20161205180000, "2016-12-05T18:00:00+00:00").
            # aiClouds(aiUnlabeledFcFreudental20161205180000, 0.0).
            # aiPrecipitation(aiUnlabeledFcFreudental20161205180000, 0.0).
            # aiTempMax(aiUnlabeledFcFreudental20161205180000, -6.45).

            env = do_retract(env, build_predicate('aiDescription',   [fc_const, '_']))
            env = do_retract(env, build_predicate('aiDtEnd',         [fc_const, '_']))
            env = do_retract(env, build_predicate('aiTempMin',       [fc_const, '_']))
            env = do_retract(env, build_predicate('aiIcon',          [fc_const, '_']))
            env = do_retract(env, build_predicate('aiLocation',      [fc_const, '_']))
            env = do_retract(env, build_predicate('aiDtStart',       [fc_const, '_']))
            env = do_retract(env, build_predicate('aiClouds',        [fc_const, '_']))
            env = do_retract(env, build_predicate('aiPrecipitation', [fc_const, '_']))
            env = do_retract(env, build_predicate('aiTempMax',       [fc_const, '_']))

            env = do_assertz(env, Clause(location=sl, head=build_predicate('aiLocation',      [fc_const, location])))
            env = do_assertz(env, Clause(location=sl, head=build_predicate('aiTempMin',       [fc_const, temp_min])))
            env = do_assertz(env, Clause(location=sl, head=build_predicate('aiTempMax',       [fc_const, temp_max])))
            env = do_assertz(env, Clause(location=sl, head=build_predicate('aiPrecipitation', [fc_const, precipitation])))
            env = do_assertz(env, Clause(location=sl, head=build_predicate('aiClouds',        [fc_const, clouds])))
            env = do_assertz(env, Clause(location=sl, head=build_predicate('aiIcon',          [fc_const, StringLiteral(icon)])))
            env = do_assertz(env, Clause(location=sl, head=build_predicate('aiDescription',   [fc_const, StringLiteral(description)])))
            env = do_assertz(env, Clause(location=sl, head=build_predicate('aiDtStart',       [fc_const, StringLiteral(dt_from.isoformat())])))
            env = do_assertz(env, Clause(location=sl, head=build_predicate('aiDtEnd',         [fc_const, StringLiteral(dt_to.isoformat())])))

    kernal.rt.apply_overlay (WEATHER_DATA_MODULE, env)
コード例 #28
0
ファイル: abook-analyze.py プロジェクト: Atsutane/speech
PROC_TITLE = 'abook-analyze'

MODELDIR = '../data/models/kaldi-chain-generic-%s-latest' % LANG
MODEL = 'tdnn_sp'

#
# init terminal
#

misc.init_app(PROC_TITLE)

#
# config
#

config = misc.load_config('.speechrc')

#
# command line
#

parser = OptionParser("usage: %prog [options] directory")

parser.add_option("-v",
                  "--verbose",
                  action="store_true",
                  dest="verbose",
                  help="enable debug output")

(options, args) = parser.parse_args()
コード例 #29
0
def main(language_model, debug=0, verbose=False, *text_corpus):
    """Train n-gram language model on tokenized text corpora

    The resulting language model will be written to the directory
    data/dst/lm/<language_model>/. The search path for the tokenized text
    corpora is data/dst/text-corpora.

    Example:

        ./speech_build_lm.py my-language-model parole_de europarl_de

    A language model will be trained on the text corpora found in
    data/dst/text-corpora/parole_de.txt and
    data/dst/text-corpora/europarl_de.txt. The resulting language model
    will be written to the directory data/dst/lm/my-language-model/.
    """
    init_app('speech_build_lm')

    if len(text_corpus) < 1:
        logging.error("Argument text_corpus missing, at least one is "
                      "required.")
        sys.exit(1)

    if verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)

    config = load_config('.speechrc')
    srilm_root = config.get("speech", "srilm_root")
    ngram_path = Path('%s/bin/i686-m64/ngram' % srilm_root)
    ngram_count_path = Path('%s/bin/i686-m64/ngram-count' % srilm_root)

    if not ngram_path.exists():
        logging.error("Could not find required executable %s" % ngram_path)
        sys.exit(1)

    if not ngram_count_path.exists():
        logging.error("Could not find required executable %s" %
                      ngram_count_path)
        sys.exit(1)

    outdir = LANGUAGE_MODELS_DIR / language_model
    outdir.mkdir(parents=True, exist_ok=True)

    train_fn = outdir / "train_all.txt"

    num_sentences = 0

    with codecs.open(str(train_fn), 'w', 'utf8') as dstf:
        for text_corpus_name in text_corpus:
            src = TEXT_CORPORA_DIR / (text_corpus_name + ".txt")
            logging.info('reading from sources %s' % src)
            with codecs.open(str(src), 'r', 'utf8') as srcf:
                while True:

                    line = srcf.readline()
                    if not line:
                        break

                    dstf.write(line)

                    num_sentences += 1
                    if num_sentences % SENTENCES_STATS == 0:
                        logging.info('%8d sentences.' % num_sentences)

                    if debug > 0 and num_sentences >= debug:
                        logging.warning(
                            'stopping because sentence debug limit is reached.'
                        )
                        break

    logging.info('done. %s written, %d sentences.' % (train_fn, num_sentences))

    lm_fn = outdir / 'lm_full.arpa'
    train_ngram_model(ngram_count_path, train_fn, lm_fn)

    lm_pruned_fn = outdir / 'lm.arpa'
    prune_ngram_model(ngram_path, lm_fn, lm_pruned_fn)