def versionInfo():
    res = []
    buildInfoFile = "/wikispeech/wikispeech_server/build_info.txt"
    if os.path.isfile(buildInfoFile):
        with open(buildInfoFile) as fp:
            lines = fp.readlines()
            fp.close()
            for l in lines:
                res.append(l.strip())

    else:
        res.append("Application name: wikispeech")
        res.append("Build timestamp: n/a")
        res.append("Built by: user")

        try:
            tag = subprocess.check_output(["git", "describe",
                                           "--tags"]).decode("utf-8").strip()
            branch = subprocess.check_output(
                ["git", "rev-parse", "--abbrev-ref",
                 "HEAD"]).decode("utf-8").strip()
            log.info(tag)
            log.info(branch)
            res.append(("Release: %s on branch %s") % (tag, branch))
        except:
            log.warning("couldn't retrieve git release info: %s" %
                        sys.exc_info()[1])
            res.append("Release: unknown")

    res.append("Started: " + startedAt)
    return res
Esempio n. 2
0
def static_proxy_audio(path):
    audio_tmpdir = config.config.get("Audio settings","audio_tmpdir")
    audio_file_name = audio_tmpdir+"/"+path
    log.info("Looking for audio file %s" % audio_file_name)
    # send_static_file will guess the correct MIME type
    #return send_from_directory("tmp", path)
    return send_from_directory(os.getcwd()+"/"+audio_tmpdir, path)
def mapperMapFromMary(trans, lang, voice):

    log.info("mapperMapFromMary( %s , %s , %s )" % (trans, lang, voice))

    if "mapper" in voice:
        #Bad names.. It should be perhaps "external" and "internal" instead of "from" and "to"
        to_symbol_set = voice["mapper"]["from"]
        from_symbol_set = voice["mapper"]["to"]
    
    else:
        log.info("No marytts mapper defined for language %s" % lang)
        return trans

    ## hl remove quote_plus 20170613
    ## hb "" b j A: rn . % 9 j doesn't work, testing to put quote back in 20170626
    ##url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, quote_plus(trans))
    url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, quote(trans))
    ##url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, trans)

    log.debug("MAPPER URL before requests: %s" % url)

    r = requests.get(url)
    log.debug("MAPPER URL: "+r.url)
    response = r.text
    #log.debug("RESPONSE: %s" % response)
    try:
        response_json = json.loads(response)
        #log.debug("RESPONSE_JSON: %s" % response_json)
        new_trans = response_json["Result"]
    except:
        log.error("unable to map %s, from %s to %s. response was %s" % (trans, from_symbol_set, to_symbol_set, response))
        raise
    #log.debug("NEW TRANS: %s" % new_trans)
    return new_trans
 def __init__(self, cconfig):
     self.type = "TextprocComponent"
     self.module = cconfig["module"]
     self.call = cconfig["call"]
     if "mapper" in cconfig:
         try:
             self.mapper = Mapper(cconfig["mapper"]["from"], cconfig["mapper"]["to"])
         except MapperException as e:
             raise TextprocComponentException(e)
     if "module" in cconfig and cconfig["module"] == "adapters.marytts_adapter":
         log.info("Trying to create marytts component: %s" % cconfig)
         #For testing marytts_adapter
         #TODO? move to test function in marytts_adapter
         try:
             marytts_url = config.config.get("Services", "marytts")
             payload = {
                 "INPUT_TYPE": "TEXT",
                 "OUTPUT_TYPE": "INTONATION",
                 "LOCALE": "en_US",
                 "INPUT_TEXT": "test"
             }
             r = requests.get(marytts_url, params=payload)
             log.debug("CALLING MARYTTS: %s" % r.url)    
             xml = r.text
         except Exception as e:
             raise TextprocComponentException(e)
Esempio n. 5
0
def static_proxy_audio(path):
    audio_tmpdir = config.config.get("Audio settings","audio_tmpdir")
    audio_file_name = audio_tmpdir+"/"+path
    log.info("Looking for audio file %s" % audio_file_name)
    # send_static_file will guess the correct MIME type
    #return send_from_directory("tmp", path)
    return send_from_directory(os.getcwd()+"/"+audio_tmpdir, path)
Esempio n. 6
0
def lexserver_proxy(url):
    redirect_url = "http://localhost:8787/%s%s" % (
        (url, "?" +
         request.query_string.decode("utf-8") if request.query_string else ""))
    log.info("Lexserver proxy to: %s" % redirect_url)
    req = requests.get(redirect_url, stream=True)
    return Response(stream_with_context(req.iter_content()),
                    content_type=req.headers['content-type'])
Esempio n. 7
0
def loadJsonConfigurationFiles():
    global textprocessor_configs, voice_configs
    textprocessor_configs = []
    voice_configs = []

    cf_dir = "wikispeech_server"
    if config.config.has_option("Voice config", "config_files_location"):
        cf_dir = config.config.get("Voice config", "config_files_location")

    #The testing config file should always be there 
    #config_files = ["voice_config_for_testing.json"]
    #OR maybe it shouldn't??
    config_files = []
    
    if config.config.has_option("Voice config", "config_files"):
        #print(config.config.get("Voice config", "config_files"))
        cfs = config.config.get("Voice config", "config_files").split("\n")
        for cf in cfs:
            if cf not in config_files and cf != "":
                config_files.append(cf)

    for config_file in config_files:
        if os.path.isfile(config_file):
            path = config_file
        elif os.path.isfile("%s/%s" % (cf_dir, config_file)):
            path = "%s/%s" % (cf_dir, config_file)
        else:
            print("Config file %s or %s not found" % (config_file, "%s/%s" % (cf_dir, config_file)))
            sys.exit()
        with open(path) as json_file:
            log.info("Reading config file: %s" % path)
            json_like = json_file.read()
            json_str = remove_comments(json_like)
            #cf = json.load(json_file)
            cf = json.loads(json_str)
            if "textprocessor_configs" in cf:
                for tconf in cf["textprocessor_configs"]:
                    #Is the tp name already in the list?
                    addTp = True
                    for tc in textprocessor_configs:
                        if tc["name"] == tconf["name"]:
                            log.warning("Textprocessor %s defined more than once: file %s" % (tconf["name"], path))
                            addTp = False
                    if addTp:
                        tconf["config_file"] = path
                        textprocessor_configs.append(tconf)

            if "voice_configs" in cf:
                for vconf in cf["voice_configs"]:
                    #Is the voice name already in the list?
                    addVoice = True
                    for vc in voice_configs:
                        if vc["name"] == vconf["name"]:
                            log.warning("Voice %s defined more than once: file %s" % (vconf["name"], path))
                            addVoice = False
                    if addVoice:
                        vconf["config_file"] = path
                        voice_configs.append(vconf)
Esempio n. 8
0
def loadVoice(voice_config):
    try:
        log.info("Loading voice %s" % (voice_config["name"]))
        v = Voice(voice_config)        
        log.info("Done loading voice %s" % (voice_config["name"]))
        voices.append(v)
    except VoiceException as e:
        log.warning("Failed to load voice from %s. Reason:\n%s" % (voice_config,e))
        raise
Esempio n. 9
0
def loadTextprocessor(tp_config):
    try:
        log.info("Loading textprocessor %s" % (tp_config["name"]))
        tp = Textprocessor(tp_config)
        log.info("Done loading textprocessor %s" % (tp_config["name"]))
        textprocessors.append(tp)
    except TextprocessorException as e:
        log.warning("Failed to load textprocessor from %s. Reason:\n%s" % (tp_config,e))
        raise
def lexserver_proxy(url):
    lexicon_host = config.config.get("Services", "lexicon")
    redirect_url = "%s/%s%s" % (
        (lexicon_host, url, "?" +
         request.query_string.decode("utf-8") if request.query_string else ""))
    log.info("Lexserver proxy to: %s" % redirect_url)
    req = requests.get(redirect_url, stream=True)
    return Response(stream_with_context(req.iter_content()),
                    content_type=req.headers['content-type'])
Esempio n. 11
0
def mapperMapToMary(trans, lang, voice):

    log.debug("mapperMapToMary( %s, %s, %s)" % (trans, lang, voice))
    if "mapper" in voice:
        to_symbol_set = voice["mapper"]["to"]
        from_symbol_set = voice["mapper"]["from"]

        log.info("marytts mapper defined for language %s\nFrom: %s\nTo: %s" % (lang, from_symbol_set, to_symbol_set))
    
    else:        
        log.info("No marytts mapper defined for language %s" % lang)
        return trans

    
    ## hl remove quote_plus 20170613
    ## hb "" b j A: rn . % 9 j doesn't work, using quote  20170626

    ## url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, quote_plus(trans))
    url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, quote(trans))

    ##url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, trans)
    log.debug("MAPPER URL before requests: %s" % url)
    
    r = requests.get(url)
    log.debug("MAPPER URL: %s" % r.url)
    response = r.text
    log.debug("MAPPER RESPONSE: %s" % response)
    try:
        response_json = json.loads(response)
    except json.JSONDecodeError:
        log.error("JSONDecodeError:")
        log.error("RESPONSE: %s" % response)
        raise
        
    new_trans = response_json["Result"]

    #Special cases for Swedish pre-r allophones that are not handled by the mapper (because mary uses an old version of the phoneme set that desn't distinguish between normal and r-coloured E/{ (always E) and 2/9 (always 9). This should change in mary later on.
    if lang == "sv":
        new_trans = re.sub("{ - ",r"E - ", new_trans)
        new_trans = re.sub("{ ",r"E ", new_trans)
        new_trans = re.sub("2(:? -) r? ",r"9\1 r", new_trans)


    log.debug("NEW TRANS: %s" % new_trans)

    
    return new_trans
def mapperMapToMary(trans, lang, voice):

    log.debug("mapperMapToMary( %s, %s, %s)" % (trans, lang, voice))
    if "mapper" in voice:
        to_symbol_set = voice["mapper"]["to"]
        from_symbol_set = voice["mapper"]["from"]

        log.info("marytts mapper defined for language %s\nFrom: %s\nTo: %s" %
                 (lang, from_symbol_set, to_symbol_set))

    else:
        log.info("No marytts mapper defined for language %s" % lang)
        return trans

    ## hl remove quote_plus 20170613
    ## hb "" b j A: rn . % 9 j doesn't work, using quote  20170626

    ## url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, quote_plus(trans))
    url = mapper_url + "/mapper/map/%s/%s/%s" % (from_symbol_set,
                                                 to_symbol_set, quote(trans))

    ##url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, trans)
    log.debug("MAPPER URL before requests: %s" % url)

    r = requests.get(url)
    log.debug("MAPPER URL: %s" % r.url)
    response = r.text
    log.debug("MAPPER RESPONSE: %s" % response)
    try:
        response_json = json.loads(response)
    except json.JSONDecodeError:
        log.error("JSONDecodeError:")
        log.error("RESPONSE: %s" % response)
        raise

    new_trans = response_json["Result"]

    #Special cases for Swedish pre-r allophones that are not handled by the mapper (because mary uses an old version of the phoneme set that desn't distinguish between normal and r-coloured E/{ (always E) and 2/9 (always 9). This should change in mary later on.
    if lang == "sv":
        new_trans = re.sub("{ - ", r"E - ", new_trans)
        new_trans = re.sub("{ ", r"E ", new_trans)
        new_trans = re.sub("2(:? -) r? ", r"9\1 r", new_trans)

    log.debug("NEW TRANS: %s" % new_trans)

    return new_trans
Esempio n. 13
0
def synthesise(lang, voice, utterance, presynth=False, hostname=None):
    log.info("Utterance: %s" % utterance)
    input = utterance['original_text']
    log.info("Text: %s" % input)
    words = get_orth(utterance)
    log.info("Words: %s" % words)
    hashstring=input+'&Lang='+lang+'&Voice='+voice['name']
    try:
        hash_object=hashlib.md5(hashstring.encode('latin-1'))
    except:
        try:
            hash_object=hashlib.md5(hashstring.encode('utf-8'))
        except:
            hash_object=hashlib.md5(hashstring.encode())
    hashnumber=hash_object.hexdigest()
    inputfile=open("%s/bin/tts_%s.txt" % (ahotts_dir, hashnumber),"wb")
    inputfile.write(input.encode('latin-1')+'\n'.encode('latin-1'))
    inputfile.close()
    ahotts_command = "cd %s/bin ; ./tts_client -SetDur=y -Speed=%s -IP=%s -Port=%s -InputFile=tts_%s.txt -OutputFile=tts_%s.wav -WordFile=tts_%s.wrd -PhoFile=tts_%s.pho ; mv tts_%s.wav %s/%s/tts_%s.wav ; rm tts_%s.txt" % (ahotts_dir, ahotts_speed, ahotts_server_ip, ahotts_server_port, hashnumber, hashnumber, hashnumber, hashnumber, hashnumber, cwdir, tmpdir, hashnumber, hashnumber)
    log.info("Ahotts command: %s" % ahotts_command)
    os.system(ahotts_command)

    audio_url = "%s%s/%s" % (hostname, "audio",'tts_%s.wav' % hashnumber)
    words_times_file=open(ahotts_dir+'/tts_'+hashnumber+'.wrd','r')
    words_times=words_times_file.readlines()
    words_times_file.close()
    os.remove(ahotts_dir+'/tts_'+hashnumber+'.wrd')
    os.remove(ahotts_dir+'/tts_'+hashnumber+'.pho')
    log.info(str(words))
    words_times=list(map(lambda x:x[:-1].split(' ')[1],words_times))
    tokens = []
    starttime=0.0
    lastendtime=0.0
    for word_ind in range(len(words)):
        word=words[word_ind]
        if word_ind>len(words_times)-1:
            endtime=lastendtime
        else:
            endtime=float(words_times[word_ind])/1000
        tokens.append({"orth":word, "starttime":starttime, "endtime":endtime})
        starttime=endtime
        lastendtime=endtime
    return (audio_url, tokens)
Esempio n. 14
0
    def testVoice(self):
        log.info("Testing voice %s" % self.name)
        if self.engine == "marytts":
            voice_host = config.config.get("Services", "marytts")
            url = re.sub("process","voices",voice_host)
            log.debug("Calling url: %s" % url)
            try:
                r = requests.get(url)
            except:
                msg = "Marytts server not found at url %s" % (url)
                log.error(msg)
                raise VoiceException(msg)

            response = r.text
            log.debug("Response:\n%s" % response)
            marytts_voicenames = self.getMaryttsVoicenames(response)
            if not self.name in marytts_voicenames:
                msg = "Voice %s not found at url %s" % (self.name, url)
                log.error(msg)
                raise VoiceException(msg)
            else:
                log.info("Voice found at url %s" % url)
Esempio n. 15
0
    def testVoice(self):
        log.info("Testing voice %s" % self.name)
        if self.engine == "marytts":
            voice_host = config.config.get("Services", "marytts")
            url = re.sub("process","voices",voice_host)
            log.debug("Calling url: %s" % url)
            try:
                r = requests.get(url)
            except:
                msg = "Marytts server not found at url %s" % (url)
                log.error(msg)
                raise VoiceException(msg)

            response = r.text
            log.debug("Response:\n%s" % response)
            marytts_voicenames = self.getMaryttsVoicenames(response)
            if not self.name in marytts_voicenames:
                msg = "Voice %s not found at url %s" % (self.name, url)
                log.error(msg)
                raise VoiceException(msg)
            else:
                log.info("Voice found at url %s" % url)
        elif self.engine == "ahotts":
            cwdir = os.getcwd()
            tmpdir = config.config.get("Audio settings","audio_tmpdir")
            ahotts_dir = config.config.get("Services", "ahotts_dir")
            ahotts_server_ip = config.config.get("Services", "ahotts_server_ip")
            ahotts_server_port = config.config.get("Services", "ahotts_server_port")
            ahotts_command = "cd %s/bin; echo \"Hasierako proba\" > ahotts_test.txt; ./tts_client -IP=%s -Port=%s -InputFile=ahotts_test.txt -OutputFile=ahotts_test.wav ; mv ahotts_test.wav %s/%s/ahotts_test.wav ; rm ahotts_test.txt" % (ahotts_dir, ahotts_server_ip, ahotts_server_port, cwdir, tmpdir)
            os.system(ahotts_command)
            try:
                wavfile=open('%s/%s/ahotts_test.wav'%(cwdir, tmpdir),'r')
                wavfile.close()
                os.remove('%s/%s/ahotts_test.wav'%(cwdir, tmpdir))
            except:
                msg = "AhoTTS server not found at IP %s and Port %s" % (ahotts_server_ip,ahotts_server_port)
                log.error(msg)
                raise VoiceException(msg)
    def testLookup(self):
        lexicon_name = "wikispeech_testdb:sv"
        lexicon = Lexicon(lexicon_name)

        orth = "apa"

        #expected = [{'entryValidations': [], 'preferred': False, 'lexiconId': 2, 'partOfSpeech': 'NN', 'wordParts': 'apa', 'id': 74078, 'transcriptions': [{'language': 'sv-se', 'id': 79414, 'strn': '"" A: . p a', 'sources': [], 'entryId': 74078}], 'lemma': {'paradigm': 's1a-flicka', 'id': 8764, 'strn': 'apa', 'reading': ''}, 'status': {'id': 74078, 'source': 'nst', 'timestamp': '2017-04-06T09:40:10Z', 'current': True, 'name': 'imported'}, 'language': 'sv-se', 'strn': 'apa', 'morphology': 'SIN|IND|NOM|UTR'}, {'entryValidations': [], 'preferred': False, 'lexiconId': 2, 'partOfSpeech': 'VB', 'wordParts': 'apa', 'id': 74079, 'transcriptions': [{'language': 'sv-se', 'id': 79415, 'strn': '"" A: . p a', 'sources': [], 'entryId': 74079}], 'lemma': {'paradigm': 's1a-flicka', 'id': 8764, 'strn': 'apa', 'reading': ''}, 'status': {'id': 74079, 'source': 'nst', 'timestamp': '2017-04-06T09:40:10Z', 'current': True, 'name': 'imported'}, 'language': 'sv-se', 'strn': 'apa', 'morphology': ''}, {'entryValidations': [], 'preferred': False, 'lexiconId': 2, 'partOfSpeech': 'VB', 'wordParts': 'apa', 'id': 74080, 'transcriptions': [{'language': 'sv-se', 'id': 79416, 'strn': '"" A: . p a', 'sources': [], 'entryId': 74080}], 'lemma': {'paradigm': 's1a-flicka', 'id': 8764, 'strn': 'apa', 'reading': ''}, 'status': {'id': 74080, 'source': 'nst', 'timestamp': '2017-04-06T09:40:10Z', 'current': True, 'name': 'imported'}, 'language': 'sv-se', 'strn': 'apa', 'morphology': 'AKT|INF-IMP'}]

        #expected = [{'entryValidations': [], 'partOfSpeech': 'NN', 'language': 'sv-se', 'transcriptions': [{'id': 79410, 'entryId': 74074, 'sources': [], 'language': 'sv-se', 'strn': '"" A: . p a'}], 'id': 74074, 'preferred': False, 'morphology': 'SIN|IND|NOM|UTR', 'lemma': {'id': 8764, 'paradigm': 's1a-flicka', 'reading': '', 'strn': 'apa'}, 'wordParts': 'apa', 'strn': 'apa', 'lexiconId': 1, 'status': {'name': 'imported', 'id': 74074, 'current': True, 'source': 'nst', 'timestamp': '2017-05-12T10:55:49Z'}}, {'entryValidations': [], 'partOfSpeech': 'VB', 'language': 'sv-se', 'transcriptions': [{'id': 79411, 'entryId': 74075, 'sources': [], 'language': 'sv-se', 'strn': '"" A: . p a'}], 'id': 74075, 'preferred': False, 'morphology': '', 'lemma': {'id': 8764, 'paradigm': 's1a-flicka', 'reading': '', 'strn': 'apa'}, 'wordParts': 'apa', 'strn': 'apa', 'lexiconId': 1, 'status': {'name': 'imported', 'id': 74075, 'current': True, 'source': 'nst', 'timestamp': '2017-05-12T10:55:49Z'}}, {'entryValidations': [], 'partOfSpeech': 'VB', 'language': 'sv-se', 'transcriptions': [{'id': 79412, 'entryId': 74076, 'sources': [], 'language': 'sv-se', 'strn': '"" A: . p a'}], 'id': 74076, 'preferred': False, 'morphology': 'AKT|INF-IMP', 'lemma': {'id': 8764, 'paradigm': 's1a-flicka', 'reading': '', 'strn': 'apa'}, 'wordParts': 'apa', 'strn': 'apa', 'lexiconId': 1, 'status': {'name': 'imported', 'id': 74076, 'current': True, 'source': 'nst', 'timestamp': '2017-05-12T10:55:49Z'}}]

        #expected = [{'lexRef': {'DBRef': 'wikispeech_testdb', 'LexName': 'sv'}, 'lemma': {'strn': 'apa', 'paradigm': 's1a-flicka', 'reading': '', 'id': 8764}, 'strn': 'apa', 'transcriptions': [{'entryId': 74074, 'strn': '"" A: . p a', 'id': 79410, 'language': 'sv-se', 'sources': []}], 'preferred': False, 'partOfSpeech': 'NN', 'wordParts': 'apa', 'id': 74074, 'morphology': 'SIN|IND|NOM|UTR', 'status': {'source': 'nst', 'id': 74074, 'timestamp': '2017-08-17T11:57:08Z', 'name': 'imported', 'current': True}, 'language': 'sv-se', 'entryValidations': []}, {'lexRef': {'DBRef': 'wikispeech_testdb', 'LexName': 'sv'}, 'lemma': {'strn': 'apa', 'paradigm': 's1a-flicka', 'reading': '', 'id': 8764}, 'strn': 'apa', 'transcriptions': [{'entryId': 74075, 'strn': '"" A: . p a', 'id': 79411, 'language': 'sv-se', 'sources': []}], 'preferred': False, 'partOfSpeech': 'VB', 'wordParts': 'apa', 'id': 74075, 'morphology': '', 'status': {'source': 'nst', 'id': 74075, 'timestamp': '2017-08-17T11:57:08Z', 'name': 'imported', 'current': True}, 'language': 'sv-se', 'entryValidations': []}, {'lexRef': {'DBRef': 'wikispeech_testdb', 'LexName': 'sv'}, 'lemma': {'strn': 'apa', 'paradigm': 's1a-flicka', 'reading': '', 'id': 8764}, 'strn': 'apa', 'transcriptions': [{'entryId': 74076, 'strn': '"" A: . p a', 'id': 79412, 'language': 'sv-se', 'sources': []}], 'preferred': False, 'partOfSpeech': 'VB', 'wordParts': 'apa', 'id': 74076, 'morphology': 'AKT|INF-IMP', 'status': {'source': 'nst', 'id': 74076, 'timestamp': '2017-08-17T11:57:08Z', 'name': 'imported', 'current': True}, 'language': 'sv-se', 'entryValidations': []}]

        expected = [{'wordParts': 'apa', 'entryValidations': [], 'id': 74074, 'lexRef': {'LexName': 'sv', 'DBRef': 'wikispeech_testdb'}, 'status': {'timestamp': '2017-08-25T08:43:56Z', 'current': True, 'name': 'imported', 'source': 'nst', 'id': 74074}, 'morphology': 'SIN|IND|NOM|UTR', 'strn': 'apa', 'language': 'sv-se', 'partOfSpeech': 'NN', 'lemma': {'strn': 'apa', 'paradigm': 's1a-flicka', 'id': 8764, 'reading': ''}, 'transcriptions': [{'strn': '"" A: . p a', 'entryId': 74074, 'sources': [], 'language': 'sv-se', 'id': 79410}], 'preferred': False}, {'wordParts': 'apa', 'entryValidations': [], 'id': 74075, 'lexRef': {'LexName': 'sv', 'DBRef': 'wikispeech_testdb'}, 'status': {'timestamp': '2017-08-25T08:43:56Z', 'current': True, 'name': 'imported', 'source': 'nst', 'id': 74075}, 'morphology': '', 'strn': 'apa', 'language': 'sv-se', 'partOfSpeech': 'VB', 'lemma': {'strn': 'apa', 'paradigm': 's1a-flicka', 'id': 8764, 'reading': ''}, 'transcriptions': [{'strn': '"" A: . p a', 'entryId': 74075, 'sources': [], 'language': 'sv-se', 'id': 79411}], 'preferred': False}, {'wordParts': 'apa', 'entryValidations': [], 'id': 74076, 'lexRef': {'LexName': 'sv', 'DBRef': 'wikispeech_testdb'}, 'status': {'timestamp': '2017-08-25T08:43:56Z', 'current': True, 'name': 'imported', 'source': 'nst', 'id': 74076}, 'morphology': 'AKT|INF-IMP', 'strn': 'apa', 'language': 'sv-se', 'partOfSpeech': 'VB', 'lemma': {'strn': 'apa', 'paradigm': 's1a-flicka', 'id': 8764, 'reading': ''}, 'transcriptions': [{'strn': '"" A: . p a', 'entryId': 74076, 'sources': [], 'language': 'sv-se', 'id': 79412}], 'preferred': False}]


        
        result = lexicon.lookup(orth)
        log.info("RESULT: %s" % result)

        expected_first_trans = expected[0]['transcriptions'][0]['strn']
        result_first_trans = result[0]['transcriptions'][0]['strn']
        
        self.assertEqual(expected_first_trans,result_first_trans)
Esempio n. 17
0
def testVoice(voice_config):
    voice_host = config.config.get("Services", "marytts")
    url = re.sub("process", "voices", voice_host)
    name = voice_config["name"]

    log.debug("Calling url: %s" % url)
    try:
        r = requests.get(url)
    except:
        msg = "Marytts server not found at url %s" % (url)
        log.error(msg)
        raise VoiceException(msg)

    response = r.text
    log.debug("Response:\n%s" % response)
    marytts_voicenames = getMaryttsVoicenames(response)
    log.debug("marytts_voicenames: %s" % marytts_voicenames)
    if not name in marytts_voicenames:
        msg = "Voice %s not found at url %s" % (name, url)
        log.error(msg)
        raise VoiceException(msg)
    else:
        log.info("Voice found at url %s" % url)
def mapperMapFromMary(trans, lang, voice):

    log.info("mapperMapFromMary( %s , %s , %s )" % (trans, lang, voice))

    if "mapper" in voice:
        #Bad names.. It should be perhaps "external" and "internal" instead of "from" and "to"
        to_symbol_set = voice["mapper"]["from"]
        from_symbol_set = voice["mapper"]["to"]

    else:
        log.info("No marytts mapper defined for language %s" % lang)
        return trans

    ## hl remove quote_plus 20170613
    ## hb "" b j A: rn . % 9 j doesn't work, testing to put quote back in 20170626
    ##url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, quote_plus(trans))
    url = mapper_url + "/mapper/map/%s/%s/%s" % (from_symbol_set,
                                                 to_symbol_set, quote(trans))
    ##url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, trans)

    log.debug("MAPPER URL before requests: %s" % url)

    r = requests.get(url)
    log.debug("MAPPER URL: " + r.url)
    response = r.text
    #log.debug("RESPONSE: %s" % response)
    try:
        response_json = json.loads(response)
        #log.debug("RESPONSE_JSON: %s" % response_json)
        new_trans = response_json["Result"]
    except:
        log.error("unable to map %s, from %s to %s. response was %s" %
                  (trans, from_symbol_set, to_symbol_set, response))
        raise
    #log.debug("NEW TRANS: %s" % new_trans)
    return new_trans
Esempio n. 19
0
def synthesise(lang, voice, utterance, presynth=False):
    log.info("Utterance: %s" % utterance)
    words = get_orth(utterance)
    log.info("Words: %s" % words)
    ahotts_command = "cd ~/ahotts-code/bin; echo \"%s\" > input.txt; ./tts; mv Output.wav %s/%s/%s" % (" ".join(words), cwdir, tmpdir, wavfile_name)
    log.info("Ahotts command: %s" % ahotts_command)
    os.system(ahotts_command)

    audio_url = "%s/%s" % (prefix,wavfile_name)
    tokens = []
    for word in words:
        tokens.append({"orth":word, "endtime":0})
    return (audio_url, tokens)
Esempio n. 20
0
def wikispeech():
    global hostname

    from urllib.parse import urlparse
    parsed_uri = urlparse(request.url)
    hostname = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri)

    # log.debug("request.url: %s" % hostname)
    log.info("request: %s" % request)
    log.info("request.url: %s" % request.url)
    log.info("hostname: %s" % hostname)
    if not hostname.endswith("/"):
        hostname = hostname+"/"
    if "wikispeech.morf.se" in hostname: ## HL 20171121: force https for wikispeech.morf.se
        hostname = hostname.replace("http://","https://")
    log.debug("hostname: %s" % hostname)
        
    lang = getParam("lang")
    input = getParam("input")
    input_type = getParam("input_type", "text")
    output_type = getParam("output_type", "json")


    #For use with synthesis only
    presynth = getParam("presynth", False)
    if presynth == "True":
        presynth = True
    else:
        presynth = False



    textprocessor_name = getParam("textprocessor", "default_textprocessor")
    voice_name = getParam("voice", "default_voice")



    log.debug("WIKISPEECH CALL - LANG: %s, INPUT_TYPE: %s, OUTPUT_TYPE: %s, INPUT: %s" % (lang, input_type, output_type, input))

    supported_languages = getSupportedLanguages()

    if not lang or not input:
        return render_template("usage.html", server=hostname, languages=supported_languages)

    if lang not in supported_languages:
        return "Language %s not supported. Supported languages are: %s" % (lang, supported_languages)


    if input == "TEST_EXAMPLE":
        return json.dumps(getTestExample(lang))


    if input_type in ["text","ssml"]:
        markup = textproc(lang, textprocessor_name, input, input_type=input_type)
        if type(markup) == type(""):
            log.debug("RETURNING MESSAGE: %s" % markup)
            return markup
    else:
        return "input_type %s not supported" % input_type

    if output_type == "json":
        result = synthesise(lang, voice_name, markup,"markup",output_type, hostname=hostname, presynth=presynth)
        if type(result) == type(""):
            log.debug("RETURNING MESSAGE: %s" % result)
            return result

        #TODO
        #The player being developed at wikimedia depends on the output matching input exactly
        #phabricator T147547 
        #Some special characters, like "—" (em-dash) aren't returned properly by the TTS-server. This breaks the token-to-HTML mapping, since it relies on finding the exact same strings in the HTML as the tokens orth values.
        #Add a test for that here,
        #And then require adapter components to conform to this?
        #how, exactly ...
        msg = checkInputAndOutputTokens(input,result["tokens"])
        if msg:
            result["message"] = msg



        json_data = json.dumps(result)
        return Response(json_data, mimetype='application/json')

    else:
        return "output_type %s not supported" % output_type
Esempio n. 21
0
from wikispeech_server.voice import Voice, VoiceException

import os.path
import datetime
import pytz
from pytz import timezone

import subprocess

#################
#
# Test opusenc before anything else
#
################

log.info("\nOPUSENC\n\nChecking that opusenc is installed on your system..")
retval = os.system("opusenc -V")
if retval != 0:
    os.system("opusenc -V")
    log.error("ERROR: opusenc was not found. You should probably run something like\nsudo apt install opus-tools\n")
    sys.exit(1)
else:
    log.info("opusenc found.\n\nEND OPUSENC\n")


###############
#
# Load textprocessors and voices
#
###############
Esempio n. 22
0
def wikispeech():
    global hostname

    from urllib.parse import urlparse
    parsed_uri = urlparse(request.url)
    hostname = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri)

    # log.debug("request.url: %s" % hostname)
    log.debug("request: %s" % request)
    log.info("request.url: %s" % request.url)
    log.debug("hostname: %s" % hostname)
    if not hostname.endswith("/"):
        hostname = hostname+"/"
    if "wikispeech.morf.se" in hostname: ## HL 20171121: force https for wikispeech.morf.se
        hostname = hostname.replace("http://","https://")
    log.debug("hostname: %s" % hostname)
        
    lang = getParam("lang")
    input = getParam("input")
    input_type = getParam("input_type", "text")
    output_type = getParam("output_type", "json")




    textprocessor_name = getParam("textprocessor", "default_textprocessor")
    voice_name = getParam("voice", "default_voice")



    log.debug("WIKISPEECH CALL - LANG: %s, INPUT_TYPE: %s, OUTPUT_TYPE: %s, INPUT: %s" % (lang, input_type, output_type, input))

    supported_languages = getSupportedLanguages()

    if not lang or not input:
        return render_template("usage.html", server=hostname, languages=supported_languages)

    if lang not in supported_languages:
        return "Language %s not supported. Supported languages are: %s" % (lang, supported_languages)


    if input == "TEST_EXAMPLE":
        return json.dumps(getTestExample(lang))


    if input_type in ["text","ssml"]:
        markup = textproc(lang, textprocessor_name, input, input_type=input_type)
        if type(markup) == type(""):
            log.debug("RETURNING MESSAGE: %s" % markup)
            return markup
    else:
        return "input_type %s not supported" % input_type

    if output_type in ["json", "html"]:
        result = synthesise(lang, voice_name, markup,"markup",output_type, hostname=hostname)
        if type(result) == type(""):
            log.debug("RETURNING MESSAGE: %s" % result)
            return result

        #TODO
        #The player being developed at wikimedia depends on the output matching input exactly
        #phabricator T147547 
        #Some special characters, like "—" (em-dash) aren't returned properly by the TTS-server. This breaks the token-to-HTML mapping, since it relies on finding the exact same strings in the HTML as the tokens orth values.
        #Add a test for that here,
        #And then require adapter components to conform to this?
        #how, exactly ...
        msg = checkInputAndOutputTokens(input,result["tokens"])
        if msg:
            result["message"] = msg

        json_data = json.dumps(result)

                
        if output_type == "json":
            return Response(json_data, mimetype='application/json')

        elif output_type == "html":
            newtokens = []
            starttime = 0
            for token in result["tokens"]:
                token["starttime"] = starttime
                token["dur"] = token["endtime"]-starttime
                newtokens.append(token)
                starttime = token["endtime"]

            
            return render_template("output.html", audio_data=result["audio_data"], tokens=newtokens)


    else:
        return "output_type %s not supported" % output_type
Esempio n. 23
0
        return "{name:%s, lang:%s}" % (self.name, self.lang)

    def __str__(self):
        return {"name":self.name, "lang":self.lang}
        #return "{name:%s, lang:%s}" % (self.name, self.lang)




    
if __name__ == "__main__":

    log.log_level = "debug" #debug, info, warning, error

    voice_config = {
        "lang":"sv",
        "name":"stts_sv_nst-hsmm",
        "engine":"marytts",
        "adapter":"adapters.marytts_adapter",
        "mapper": {
            "from":"sv-se_ws-sampa",
            "to":"sv-se_sampa_mary"
        }
    }

    try:
        v = Voice(voice_config)
        log.info("Created voice %s from %s" % (v, voice_config))
    except VoiceException as e:
        log.error("Failed to create voice for %s\nException message was:\n%s" % (voice_config, e))
Esempio n. 24
0
def lexserver_proxy(url):
    lexicon_host = config.config.get("Services","lexicon")
    redirect_url = "%s/%s%s" % ((lexicon_host, url, "?" + request.query_string.decode("utf-8") if request.query_string else ""))
    log.info("Lexserver proxy to: %s" % redirect_url)
    req = requests.get(redirect_url, stream = True)
    return Response(stream_with_context(req.iter_content()), content_type = req.headers['content-type'])
Esempio n. 25
0
def static_proxy_js():
    filename = "wikispeech_simple_player.js"
    root_dir = os.getcwd()
    log.info("Looking for static file %s/%s" % (root_dir, filename))
    return send_from_directory(root_dir, filename)
import sys
import json
if __name__ == "__main__":
    import os
    sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/..")
    #print(sys.path)
    
import wikispeech_server.wikispeech as ws
import wikispeech_server.log as log


log.info("RUNNING VOICE CONFIG TESTS")

#HB 171127 host = "http://localhost:10000/"
host = "/"
test_client = ws.app.test_client()


r = test_client.options(host)
api_info = json.loads(r.data.decode('utf-8'))
supported_languages = api_info["GET"]["parameters"]["lang"]["allowed"]


def test_default_settings():
    #1) Test default settings for supported languages
    for lang in supported_languages:
        log.debug("START: %s" % lang)

        # GET:  curl "http://localhost:10000/?lang=en&input=test."
        r = test_client.get("%s?lang=%s&input=test." % (host,lang))
        log.debug(r.data.decode('utf-8'))
Esempio n. 27
0
            l.append("config_file: %s" % (self.config["config_file"]))
        
        return "{%s}" % ", ".join(l)

    def __str__(self):
        return self.__repr__()



    
if __name__ == "__main__":

    log.log_level = "debug" #debug, info, warning, error

    voice_config = {
        "lang":"sv",
        "name":"stts_sv_nst-hsmm",
        "engine":"marytts",
        "adapter":"adapters.marytts_adapter",
        "mapper": {
            "from":"sv-se_ws-sampa",
            "to":"sv-se_sampa_mary"
        }
    }

    try:
        v = Voice(voice_config)
        log.info("Created voice %s from %s" % (v, voice_config))
    except VoiceException as e:
        log.error("Failed to create voice for %s\nException message was:\n%s" % (voice_config, e))
Esempio n. 28
0
def static_test():
    log.info("Looking for static file %s" % "test.html")
    hostname = "http://localhost:10000/wikispeech"
    return render_template("test.html", server=hostname)
Esempio n. 29
0
def static_proxy_js2():
    filename = "wikispeech_simple_player.js"
    root_dir = os.getcwd()
    log.info("Looking for static file %s/%s" % (root_dir, filename))
    return send_from_directory(root_dir, filename)
Esempio n. 30
0
use_json_conf = False
if config.config.has_option("Voice config", "config_files_location"):
    use_json_conf = True
    #if use_json_conf, the json files defined in *.conf will be loaded, replacing voice_config.py
    
###
    


#################
#
# Test opusenc before anything else
#
################

log.info("\nOPUSENC\n\nChecking that opusenc is installed on your system..")
retval = os.system("opusenc -V")
if retval != 0:
    os.system("opusenc -V")
    log.error("ERROR: opusenc was not found. You should probably run something like\nsudo apt install opus-tools\n")
    sys.exit(1)
else:
    log.info("opusenc found.\n\nEND OPUSENC\n")



################
#
# Flask app
#
###############
Esempio n. 31
0
def static_test():
    log.info("Looking for static file %s" % "test.html")
    #HB this is wrong (won't work on morf)
    #hostname = "http://localhost:10000"
    hostname = request.url_root
    return render_template("test.html", server=hostname)
def test_all_settings():

    #2) Test all settings for supported languages.
    #foreach language
    #foreach textproc_config
    #foreach voice
    #test
    for lang in supported_languages:
        log.debug("START: %s" % lang)

        r = test_client.get("%stextprocessing/textprocessors/%s" %
                            (host, lang))
        #log.debug(r.url)
        textproc_configs = json.loads(r.data.decode('utf-8'))

        for textproc_config in textproc_configs:
            log.debug(textproc_config)
            tp_name = textproc_config["name"]

            if "skip_test" in textproc_config and textproc_config[
                    "skip_test"] == True:
                log.info("SKIPPING synthesis test with %s" % (tp_name))
                continue

            log.debug("START %s" % tp_name)
            url = "%stextprocessing/?input=test.&lang=%s&textprocessor=%s" % (
                host, lang, tp_name)
            log.debug("url: %s" % url)
            r = test_client.get(url)
            tmp = json.loads(r.data.decode('utf-8'))

            #tmp = r.data.decode('utf-8')
            log.debug("TP OUTPUT: %s" % tmp)

            url = "%ssynthesis/voices/%s" % (host, lang)
            log.debug("trying url: %s" % url)
            r = test_client.get(url)
            voices = json.loads(r.data.decode('utf-8'))

            for voice in voices:
                log.debug(voice)
                voice_name = voice["name"]
                log.debug("START %s" % voice_name)

                if "skip_test" in voice and voice["skip_test"] == True:
                    log.info("SKIPPING synthesis test with %s (%s)" %
                             (voice_name, voice["engine"]))
                    continue

                payload = {
                    "input": json.dumps(tmp),
                    "lang": lang,
                    "voice": voice_name,
                    "output_type": "test"
                }

                url = "%ssynthesis/" % (host)
                r = test_client.post(url, data=payload)
                if r.status_code != 200:
                    log.fatal("test call to URL %s failed, server error: %s" %
                              (url, r))
                res = json.loads(r.data.decode('utf-8'))
                log.debug(res)
                log.debug("DONE %s" % voice_name)

            log.debug("DONE %s" % tp_name)
        log.debug("DONE: %s" % lang)
Esempio n. 33
0
def static_proxy_workflow(path):
    filename = "workflow_demo/"+path
    root_dir = os.getcwd()
    log.info("Looking for static file %s/%s" % (root_dir, filename))
    return send_from_directory(root_dir, filename)
Esempio n. 34
0
def static_proxy_audio(path):
    audio_file_name = "tmp/" + path
    log.info("Looking for audio file %s" % audio_file_name)
    # send_static_file will guess the correct MIME type
    return send_from_directory("tmp", path)
import sys
import json
import wikispeech_server.wikispeech as ws
import wikispeech_server.log as log

log.info("RUNNING VOICE CONFIG TESTS")

host = "http://localhost:10000/wikispeech/"
test_client = ws.app.test_client()

r = test_client.options(host)
api_info = json.loads(r.data.decode('utf-8'))
supported_languages = api_info["GET"]["parameters"]["lang"]["allowed"]


def test_default_settings():
    #1) Test default settings for supported languages
    for lang in supported_languages:
        log.debug("START: %s" % lang)

        # GET:  curl "http://localhost:10000/wikispeech/?lang=en&input=test."
        r = test_client.get("%s?lang=%s&input=test." % (host, lang))
        log.debug(r.data.decode('utf-8'))
        log.debug("DONE: %s" % lang)


def test_all_settings():

    #2) Test all settings for supported languages.
    #foreach language
    #foreach textproc_config
Esempio n. 36
0
def static_test():
    log.info("Looking for static file %s" % "test.html")
    #HB this is wrong (won't work on morf)
    #hostname = "http://localhost:10000"
    hostname = request.url_root
    return render_template("test.html", server=hostname)
                                              

if __name__ == "__main__":

    log.log_level = "debug" #debug, info, warning, error

    tp_config = {
        "name":"wikitextproc_sv",
        "lang":"sv",
        "components":[
            {
                "module":"adapters.marytts_adapter",
                "call":"marytts_preproc",
                "mapper": {
                    "from":"sv-se_ws-sampa",
                    "to":"sv-se_sampa_mary"
                },
            },
            {
                "module":"adapters.lexicon_client",
                "call":"lexLookup",
                "lexicon":"wikispeech_testdb:sv"
            }
        ]
    }
    try:
        tp = Textprocessor(tp_config)
        log.info("Created textprocessor %s from %s" % (tp, tp_config))
    except TextprocessorException as e:
        log.error("Failed to create textprocessor for %s\nException message was:\n%s" % (tp_config, e))
Esempio n. 38
0
def static_proxy_workflow(path):
    filename = "workflow_demo/" + path
    root_dir = os.getcwd()
    log.info("Looking for static file %s/%s" % (root_dir, filename))
    return send_from_directory(root_dir, filename)