def textproc(lang, textprocessor_name, text, input_type="text"): tp_configs = list_tp_configs_by_language(lang) textprocessor = None if textprocessor_name == "default_textprocessor": for tp in tp_configs: if tp["lang"] == lang: textprocessor = tp break if textprocessor == None: return "ERROR: No textprocessor available for language %s" % lang else: for tp in tp_configs: if tp["name"] == textprocessor_name: textprocessor = tp break if textprocessor == None: #example http://localhost/?lang=sv&input=test&textprocessor=undefined return "ERROR: Textprocessor %s not defined for language %s" % (textprocessor_name, lang) log.debug("TEXTPROCESSOR: %s" % textprocessor) for component in textprocessor["components"]: module_name = component["module"] component_name = component["call"] log.debug("MODULE: %s" % module_name) log.debug("COMPONENT: %s" % component_name) #Import the defined module and function #mod = import_module(module_name) #HB testing mod = import_module("wikispeech_server."+module_name) #log.debug(mod) #log.debug(dir(mod)) process = getattr(mod, component_name) log.debug("PROCESS: %s" % process) #TODO clean this up to always use process(utt) if component_name == "tokenise": utt = process(text,lang=lang) utt["lang"] = lang utt["original_text"] = text #Simple mechanism to do only tokenisation #Build on this to do partial processing in other ways if getParam("process", "none") == "tokenise": return utt elif component_name == "marytts_preproc": utt = process(text, lang, component, input_type=input_type) else: try: utt = process(utt) except: utt = process(utt, lang, component) log.debug(str(utt)) return utt
def test(self): url = "%s/list" % self.base_url log.debug("LEXICON URL: %s" % url) print("LEXICON URL: %s" % url) try: r = requests.get(url) response = r.text response_json = json.loads(response) print(response_json) exists = False print(response_json) for lex in response_json: if lex['name'] == self.lexicon_name: exists = True if not exists: msg = "Lexicon does not exist: %s" % (self.lexicon_name) log.error(msg) raise LexiconException(msg) except json.JSONDecodeError: msg = "Unable to create lexicon client for %s. Response was: %s" % (self.lexicon_name, response) log.error(msg) raise LexiconException(msg) except Exception as e: msg = "Unable to create lexicon client for %s at url %s. Reason: %s" % (self.lexicon_name, url, e) log.warning(msg) raise LexiconException(msg)
def textproc(lang, textprocessor_name, text, input_type="text"): textprocessor = getTextprocessorByName(textprocessor_name, lang) if textprocessor == None: #example http://localhost/?lang=sv&input=test&textprocessor=undefined return "ERROR: Textprocessor %s not defined for language %s" % (textprocessor_name, lang) log.debug("TEXTPROCESSOR: %s" % textprocessor) #Loop over the list of components, modifying the utt structure created by the first component for component in textprocessor["components"]: module_name = component["module"] call = component["call"] log.debug("MODULE: %s" % module_name) log.debug("CALL: %s" % call) if "directory" in component: if not os.path.isdir(component["directory"]): print("ERROR: directory %s not found" % component["directory"]) sys.exit() directory = component["directory"] else: directory = "wikispeech_server" mod = import_module(directory, module_name) #Get the method to call (instead of defining the call in voice_config we could always use the same method name..) process = getattr(mod, call) log.debug("PROCESS: %s" % process) #TODO clean this up to always use process(utt,lang,component) #The first component needs to accept text and return a tokenised utterance (at the moment calls "tokenise" or "marytts_preproc") #If this is always true it should be a requirement, now it is just assumed if call == "tokenise": utt = process(text,lang=lang) utt["lang"] = lang utt["original_text"] = text #Simple mechanism to do only tokenisation #Build on this to do partial processing in other ways #HB 200217 not used atm but leaving it for now as a reminder if getParam("process", "none") == "tokenise": return utt elif call == "marytts_preproc": utt = process(text, lang, component, input_type=input_type) #Following the first component, they take and return an utterance else: utt = process(utt, lang=lang, componentConfig=component) log.debug(str(utt)) return utt
def textprocessing(): lang = getParam("lang") textprocessor_name = getParam("textprocessor", "default_textprocessor") input_type = getParam("input_type", "text") output_type = getParam("output_type", "json") input = getParam("input") if lang == None or input == None: options = getTextprocessingOptions() resp = make_response(json.dumps(options)) resp.headers["Content-type"] = "application/json" resp.headers["Allow"] = "OPTIONS, GET, POST, HEAD" return resp if input_type in ["text","ssml"]: markup = textproc(lang,textprocessor_name, input, input_type=input_type) #If "markup" is a string, just return it, it's an error message to the client. #TODO nicer way to handle error messages if type(markup) == type(""): log.debug("RETURNING MESSAGE: %s" % markup) return markup else: return "input_type %s not supported" % input_type if output_type == "json": json_data = json.dumps(markup) return Response(json_data, mimetype='application/json') else: return "output_type %s not supported" % output_type
def synthesis(): hostname = request.url_root lang = getParam("lang") input = getParam("input") voice_name = getParam("voice", "default_voice") input_type = getParam("input_type", "markup") output_type = getParam("output_type", "json") if lang == None or input == None: options = getSynthesisOptions() resp = make_response(json.dumps(options)) resp.headers["Content-type"] = "application/json" resp.headers["Allow"] = "OPTIONS, GET, POST, HEAD" return resp if lang not in synthesisSupportedLanguages(): return "synthesis does not support language %s" % lang input = json.loads(input) result = synthesise(lang,voice_name,input,input_type,output_type,hostname=hostname) #If result is a string, it is an error message to the client. #TODO nicer way of dealing with messages if type(result) == type(""): log.debug("RETURNING MESSAGE: %s" % result) return result json_data = json.dumps(result) return Response(json_data, mimetype='application/json')
def wikispeech_options2(): options = getWikispeechOptions() log.debug(options) resp = make_response(json.dumps(options)) resp.headers["Content-type"] = "application/json" resp.headers["Allow"] = "OPTIONS, GET, POST, HEAD" return resp
def cleanupOrth(orth): orig = orth if orth == None: orth = "" return orth #Remove soft hyphen if it occurs - it's a hidden character that causes problems in lookup orth = orth.replace("\xad","") #Remove Arabic diacritics if they occur #Bad place for this but where else? In mapper? FATHATAN = '\u064b' DAMMATAN = '\u064c' KASRATAN = '\u064d' FATHA = '\u064e' DAMMA = '\u064f' KASRA = '\u0650' SHADDA = '\u0651' SUKUN = '\u0652' TASHKEEL = (FATHATAN,DAMMATAN,KASRATAN,FATHA,DAMMA,KASRA,SUKUN,SHADDA) orth = re.sub("("+"|".join(TASHKEEL)+")","", orth) orth = orth.lower() log.debug("lexicon_client.cleanupOrth: %s -> %s" % (orig, orth)) return orth
def __init__(self, cconfig): self.type = "TextprocComponent" self.module = cconfig["module"] self.call = cconfig["call"] if "mapper" in cconfig: try: self.mapper = Mapper(cconfig["mapper"]["from"], cconfig["mapper"]["to"]) except MapperException as e: raise TextprocComponentException(e) if "module" in cconfig and cconfig["module"] == "adapters.marytts_adapter": log.info("Trying to create marytts component: %s" % cconfig) #For testing marytts_adapter #TODO? move to test function in marytts_adapter try: marytts_url = config.config.get("Services", "marytts") payload = { "INPUT_TYPE": "TEXT", "OUTPUT_TYPE": "INTONATION", "LOCALE": "en_US", "INPUT_TEXT": "test" } r = requests.get(marytts_url, params=payload) log.debug("CALLING MARYTTS: %s" % r.url) xml = r.text except Exception as e: raise TextprocComponentException(e)
def textprocessing(): lang = getParam("lang") textprocessor_name = getParam("textprocessor", "default_textprocessor") input_type = getParam("input_type", "text") output_type = getParam("output_type", "json") input = getParam("input") if lang == None or input == None: options = getTextprocessingOptions() resp = make_response(json.dumps(options)) resp.headers["Content-type"] = "application/json" resp.headers["Allow"] = "OPTIONS, GET, POST, HEAD" return resp if input_type in ["text","ssml"]: markup = textproc(lang,textprocessor_name, input, input_type=input_type) if type(markup) == type(""): log.debug("RETURNING MESSAGE: %s" % markup) return markup else: return "input_type %s not supported" % input_type if output_type == "json": json_data = json.dumps(markup) return Response(json_data, mimetype='application/json') else: return "output_type %s not supported" % output_type
def cleanupOrth(orth): orig = orth if orth == None: orth = "" return orth #Remove soft hyphen if it occurs - it's a hidden character that causes problems in lookup orth = orth.replace("\xad", "") #Remove Arabic diacritics if they occur #Bad place for this but where else? In mapper? FATHATAN = '\u064b' DAMMATAN = '\u064c' KASRATAN = '\u064d' FATHA = '\u064e' DAMMA = '\u064f' KASRA = '\u0650' SHADDA = '\u0651' SUKUN = '\u0652' TASHKEEL = (FATHATAN, DAMMATAN, KASRATAN, FATHA, DAMMA, KASRA, SUKUN, SHADDA) orth = re.sub("(" + "|".join(TASHKEEL) + ")", "", orth) orth = orth.lower() log.debug("lexicon_client.cleanupOrth: %s -> %s" % (orig, orth)) return orth
def test(self): url = "%s/list" % self.base_url log.debug("LEXICON URL: %s" % url) #print("LEXICON URL: %s" % url) try: r = requests.get(url) response = r.text response_json = json.loads(response) #print(response_json) exists = False #print(response_json) for lex in response_json: if lex['name'] == self.lexicon_name: exists = True if not exists: msg = "Lexicon does not exist: %s" % (self.lexicon_name) log.error(msg) raise LexiconException(msg) except json.JSONDecodeError: msg = "Unable to create lexicon client for %s. Response was: %s" % ( self.lexicon_name, response) log.error(msg) raise LexiconException(msg) except Exception as e: msg = "Unable to create lexicon client for %s at url %s. Reason: %s" % ( self.lexicon_name, url, e) log.warning(msg) raise LexiconException(msg)
def wikispeech_options2(): options = getWikispeechOptions() log.debug(options) resp = make_response(json.dumps(options)) resp.headers["Content-type"] = "application/json" resp.headers["Allow"] = "OPTIONS, GET, POST, HEAD" return resp
def mapperMapFromMary(trans, lang, voice): log.info("mapperMapFromMary( %s , %s , %s )" % (trans, lang, voice)) if "mapper" in voice: #Bad names.. It should be perhaps "external" and "internal" instead of "from" and "to" to_symbol_set = voice["mapper"]["from"] from_symbol_set = voice["mapper"]["to"] else: log.info("No marytts mapper defined for language %s" % lang) return trans ## hl remove quote_plus 20170613 ## hb "" b j A: rn . % 9 j doesn't work, testing to put quote back in 20170626 ##url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, quote_plus(trans)) url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, quote(trans)) ##url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, trans) log.debug("MAPPER URL before requests: %s" % url) r = requests.get(url) log.debug("MAPPER URL: "+r.url) response = r.text #log.debug("RESPONSE: %s" % response) try: response_json = json.loads(response) #log.debug("RESPONSE_JSON: %s" % response_json) new_trans = response_json["Result"] except: log.error("unable to map %s, from %s to %s. response was %s" % (trans, from_symbol_set, to_symbol_set, response)) raise #log.debug("NEW TRANS: %s" % new_trans) return new_trans
def mapIpaInput(ssml, textprocessor, sampa=None): for comp in textprocessor["components"]: if "mapper" in comp: sampa = comp["mapper"]["from"] if not sampa: raise ValueError( "No mapper defined in voice %s, don't know how to map ipa!" % textprocessor["name"]) phoneme_elements = re.findall("(<phoneme .+?\">)", ssml) for element in phoneme_elements: log.debug("Phoneme element: %s" % element) alphabet = re.findall("alphabet=\"([^\"]+)\"", element)[0] if alphabet == "ipa": ipa_trans = re.findall("ph=\"(.+)\">", element)[0] url = mapper_url + "/mapper/map/ipa/%s/%s" % (sampa, quote(ipa_trans)) r = requests.get(url) response = r.text try: response_json = json.loads(response) except: raise ValueError(response) sampa_trans = response_json["Result"] ssml = re.sub('alphabet="ipa"', 'alphabet="x-sampa"', ssml) ssml = re.sub(ipa_trans, sampa_trans, ssml) log.debug("mapIpaInput returns %s" % ssml) return ssml
def synthesis(): hostname = request.url_root lang = getParam("lang") input = getParam("input") voice_name = getParam("voice", "default_voice") input_type = getParam("input_type", "markup") output_type = getParam("output_type", "json") presynth = getParam("presynth", False) if presynth == "True": presynth = True else: presynth = False #log.debug "SYNTHESIS CALL - LANG: %s, INPUT_TYPE: %s, OUTPUT_TYPE: %s, INPUT: %s" % (lang, input_type, output_type, input) if lang not in synthesisSupportedLanguages(): return "synthesis does not support language %s" % lang #The input is a json string, needs to be a python dictionary input = json.loads(input) result = synthesise(lang, voice_name, input, input_type, output_type, hostname=hostname, presynth=presynth) if type(result) == type(""): log.debug("RETURNING MESSAGE: %s" % result) return result json_data = json.dumps(result) return Response(json_data, mimetype='application/json')
def textproc(lang, textprocessor_name, text, input_type="text"): tp_configs = list_tp_configs_by_language(lang) textprocessor = None if textprocessor_name == "default_textprocessor": for tp in tp_configs: if tp["lang"] == lang: textprocessor = tp break if textprocessor == None: return "ERROR: No textprocessor available for language %s" % lang else: for tp in tp_configs: if tp["name"] == textprocessor_name: textprocessor = tp break if textprocessor == None: #example http://localhost/wikispeech/?lang=sv&input=test&textprocessor=undefined return "ERROR: Textprocessor %s not defined for language %s" % ( textprocessor_name, lang) log.debug("TEXTPROCESSOR: %s" % textprocessor) for component in textprocessor["components"]: module_name = component["module"] component_name = component["call"] log.debug("MODULE: %s" % module_name) log.debug("COMPONENT: %s" % component_name) #Import the defined module and function #mod = import_module(module_name) #HB testing mod = import_module("wikispeech_server." + module_name) #log.debug(mod) #log.debug(dir(mod)) process = getattr(mod, component_name) log.debug("PROCESS: %s" % process) #TODO clean this up to always use process(utt) if component_name == "tokenise": utt = process(text) utt["lang"] = lang #Simple mechanism to do only tokenisation #Build on this to do partial processing in other ways if getParam("process", "none") == "tokenise": return utt elif component_name == "marytts_preproc": utt = process(text, lang, component, input_type=input_type) else: try: utt = process(utt) except: utt = process(utt, lang, component) log.debug(str(utt)) return utt
def addTransFromResponse(tokenlist, responseDict): for t in tokenlist: orth = t["orth"] orth = cleanupOrth(orth) if orth in responseDict: ph = responseDict[orth] t["trans"] = ph t["g2p_method"] = "lexicon" else: log.debug("No trans for %s" % orth)
def saveAndConvertAudio(audio_url): global config tmpdir = config.config.get("Audio settings","audio_tmpdir") log.debug("TMPDIR: %s" % tmpdir) if not os.path.isdir(tmpdir): os.system("mkdir -p %s" % tmpdir) fh = NamedTemporaryFile(mode='w+b', dir=tmpdir, delete=False) tmpwav = fh.name log.debug("audio_url:\n%s" % audio_url) r = requests.get(audio_url) log.debug(r.headers['content-type']) audio_data = r.content fh = NamedTemporaryFile(mode='w+b', dir=tmpdir, delete=False) tmpwav = fh.name fh.write(audio_data) fh.close() #tmpwav is now the synthesised wav file #tmpopus = "%s/%s.opus" % (tmpdir, tmpfilename) tmpopus = "%s.opus" % tmpwav convertcmd = "opusenc %s %s" % (tmpwav, tmpopus) log.debug("convertcmd: %s" % convertcmd) if log.log_level != "debug": convertcmd = "opusenc --quiet %s %s" % (tmpwav, tmpopus) retval = os.system(convertcmd) if retval != 0: log.error("ERROR: opusenc was not found. You should probably run something like\nsudo apt install opus-tools\n") #remove everything before the tmpdir, to build the external url #HB problem with wikimedia usage? #opus_url_suffix = re.sub("^.*/%s/" % tmpdir, "%s/" % tmpdir, tmpopus) opus_url_suffix = re.sub("^.*/%s/" % tmpdir, "", tmpopus) log.debug("opus_url_suffix: %s" % opus_url_suffix) #return tmpopus return_audio_data = True if return_audio_data: #audio_data = "data:audio/wav;base64,%s" % encode_audio(re.sub("^.*/", "wikispeech_server/tmp/", audio_file)).decode() audio_data = "%s" % encode_audio(tmpwav).decode() else: audio_data = "" return (opus_url_suffix, audio_data)
def mapSsmlTranscriptionsToMary(ssml, lang, tp_config): phoneme_elements = re.findall("(<phoneme [^>]+>)", ssml) for element in phoneme_elements: #log.debug(element) trans = re.findall("ph=\"(.+)\">", element)[0] log.debug("ws_trans: %s" % trans) mary_trans = mapperMapToMary(trans.replace(""", "\""), lang, tp_config) log.debug("mary_trans: %s" % mary_trans) ssml = re.sub(trans, mary_trans.replace("\"", """), ssml) #log.debug("MAPPED SSML: %s" % ssml) return ssml
def addTransFromResponse(tokenlist, responseDict): for t in tokenlist: orth = t["orth"] orth = cleanupOrth(orth) if orth in responseDict: ph = responseDict[orth] t["trans"] = ph t["g2p_method"] = "lexicon" if includePostag: t["pos"] = responseDict["postags"][orth] else: log.debug("No trans for %s" % orth)
def lexLookup(utt, lang, componentConfig): lexicon_name = componentConfig["lexicon"] #TODO Load lexicon here, before we have an external call to loadLexicon loadLexicon(lexicon_name) tokens = getTokens(utt) orthstring = getOrth(tokens) log.debug("ORTH TO LOOKUP: %s" % orthstring) responseDict = getLookupBySentence(orthstring, lexicon_name) addTransFromResponse(tokens, responseDict) return utt
def lexLookup(utt, lang, componentConfig): lexicon_name = componentConfig["lexicon"] #TODO Load lexicon here, before we have an external call to loadLexicon loadLexicon(lexicon_name) tokens = getTokens(utt) orthstring = getOrth(tokens) log.debug("ORTH TO LOOKUP: %s" % orthstring) responseDict = getLookupBySentence(orthstring, lexicon_name) addTransFromResponse(tokens, responseDict) return utt
def map(self, string): url = "%s/%s/%s/%s/%s" % (self.base_url, "map", self.from_symbol_set, self.to_symbol_set, string) r = requests.get(url) log.debug(r.url) response = r.text try: response_json = json.loads(response) new_string = response_json["Result"] return new_string except: log.error("unable to map string '%s'from %s to %s. response was %s" % (string,self.from_symbol_set, self.to_symbol_set, response)) raise MapperException
def testVoice(config): voice = config["espeak_voice"] espeak = "espeak-ng" #espeak = "espeak" try: retval = os.system("%s -v %s -q test" % (espeak, voice)) assert retval == 0 log.debug("Test successful for voice %s" % config["name"]) return True except: msg = "Failed command: '%s -v %s'" % (espeak, voice) log.error(msg) raise VoiceException(msg)
def saveAndConvertAudio(audio_url): global config tmpdir = config.config.get("Audio settings", "audio_tmpdir") log.debug("TMPDIR: %s" % tmpdir) if not os.path.isdir(tmpdir): os.system("mkdir -p %s" % tmpdir) fh = NamedTemporaryFile(mode='w+b', dir=tmpdir, delete=False) tmpwav = fh.name log.debug("audio_url:\n%s" % audio_url) r = requests.get(audio_url) log.debug(r.headers['content-type']) audio_data = r.content #fh = NamedTemporaryFile(mode='w+b', dir=tmpdir, delete=False) #tmpwav = fh.name fh.write(audio_data) fh.close() tmpopus = "%s.opus" % tmpwav convertcmd = "opusenc %s %s" % (tmpwav, tmpopus) log.debug("convertcmd: %s" % convertcmd) if log.log_level != "debug": convertcmd = "opusenc --quiet %s %s" % (tmpwav, tmpopus) retval = os.system(convertcmd) if retval != 0: log.error( "ERROR: opusenc was not found. You should probably run something like\nsudo apt install opus-tools\n" ) opus_url_suffix = re.sub("^.*/%s/" % tmpdir, "", tmpopus) log.debug("opus_url_suffix: %s" % opus_url_suffix) return_audio_data = True if return_audio_data: audio_data = "%s" % encode_audio(tmpopus).decode() else: audio_data = "" #Removing any remaining files in the tmpdir tmpfiles = glob.glob("%s/*" % tmpdir) for f in tmpfiles: os.unlink(f) return (opus_url_suffix, audio_data)
def getParam(param, default=None): value = None log.debug("getParam %s, request.method: %s" % (param, request.method)) if request.method == "GET": value = request.args.get(param) elif request.method == "POST": #log.debug(request) #log.debug(request.form) if param in request.form: value = request.form[param] log.debug("VALUE: %s" % value) if value == None: value = default return value
def test_textproc(): sent = "apa" try: res = textproc("sv", "default_textprocessor", sent) except: log.error( "Failed to do textprocessing.\nError type: %s\nError info:%s" % (sys.exc_info()[0], sys.exc_info()[1])) import traceback log.debug("Stacktrace:") traceback.print_tb(sys.exc_info()[2]) log.debug("END stacktrace") log.error("textprocessing test failure") log.error("No running marytts server found at %s" % config.config.get("Services", "marytts")) raise #TODO Better with exception than return value if type(res) == type("") and res.startswith("ERROR:"): log.error("Failed to do textprocessing") log.error(res) log.error("textprocessing test failure") raise log.debug("%s --> %s" % (sent, res)) log.debug("SUCCESS: textprocessing test")
def test_textproc(): sent = "apa" try: res = textproc("sv","default_textprocessor", sent) except: log.error("Failed to do textprocessing.\nError type: %s\nError info:%s" % (sys.exc_info()[0], sys.exc_info()[1])) import traceback log.debug("Stacktrace:") traceback.print_tb(sys.exc_info()[2]) log.debug("END stacktrace") log.error("textprocessing test failure") log.error("No running marytts server found at %s" % config.config.get("Services","marytts")) raise #TODO Better with exception than return value if type(res) == type("") and res.startswith("ERROR:"): log.error("Failed to do textprocessing") log.error(res) log.error("textprocessing test failure") raise log.debug("%s --> %s" % (sent,res)) log.debug("SUCCESS: textprocessing test")
def getParam(param,default=None): value = None log.debug("getParam %s, request.method: %s" % (param, request.method)) if request.method == "GET": value = request.args.get(param) elif request.method == "POST": #log.debug(request) #log.debug(request.form) if param in request.form: value = request.form[param] log.debug("VALUE: %s" % value) if value == None: value = default return value
def test(self): url = "%s/%s/%s/%s" % (self.base_url, "maptable", self.from_symbol_set, self.to_symbol_set) log.debug(url) try: r = requests.get(url) response = r.text response_json = json.loads(response) except json.JSONDecodeError: msg = "Unable to create mapper from %s to %s. Response was: %s" % (self.from_symbol_set, self.to_symbol_set, response) log.error(msg) raise MapperException(msg) except Exception as e: msg = "Unable to create mapper at url %s. Reason: %s" % (url, e) log.error(msg) raise MapperException(msg)
def map(self, string): url = "%s/%s/%s/%s/%s" % (self.base_url, "map", self.from_symbol_set, self.to_symbol_set, string) r = requests.get(url) log.debug(r.url) response = r.text try: response_json = json.loads(response) new_string = response_json["Result"] return new_string except: log.error( "unable to map string '%s'from %s to %s. response was %s" % (string, self.from_symbol_set, self.to_symbol_set, response)) raise MapperException
def mapperMapToMary(trans, lang, voice): log.debug("mapperMapToMary( %s, %s, %s)" % (trans, lang, voice)) if "mapper" in voice: to_symbol_set = voice["mapper"]["to"] from_symbol_set = voice["mapper"]["from"] log.info("marytts mapper defined for language %s\nFrom: %s\nTo: %s" % (lang, from_symbol_set, to_symbol_set)) else: log.info("No marytts mapper defined for language %s" % lang) return trans ## hl remove quote_plus 20170613 ## hb "" b j A: rn . % 9 j doesn't work, using quote 20170626 ## url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, quote_plus(trans)) url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, quote(trans)) ##url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, trans) log.debug("MAPPER URL before requests: %s" % url) r = requests.get(url) log.debug("MAPPER URL: %s" % r.url) response = r.text log.debug("MAPPER RESPONSE: %s" % response) try: response_json = json.loads(response) except json.JSONDecodeError: log.error("JSONDecodeError:") log.error("RESPONSE: %s" % response) raise new_trans = response_json["Result"] #Special cases for Swedish pre-r allophones that are not handled by the mapper (because mary uses an old version of the phoneme set that desn't distinguish between normal and r-coloured E/{ (always E) and 2/9 (always 9). This should change in mary later on. if lang == "sv": new_trans = re.sub("{ - ",r"E - ", new_trans) new_trans = re.sub("{ ",r"E ", new_trans) new_trans = re.sub("2(:? -) r? ",r"9\1 r", new_trans) log.debug("NEW TRANS: %s" % new_trans) return new_trans
def mapperMapToMary(trans, lang, voice): log.debug("mapperMapToMary( %s, %s, %s)" % (trans, lang, voice)) if "mapper" in voice: to_symbol_set = voice["mapper"]["to"] from_symbol_set = voice["mapper"]["from"] log.info("marytts mapper defined for language %s\nFrom: %s\nTo: %s" % (lang, from_symbol_set, to_symbol_set)) else: log.info("No marytts mapper defined for language %s" % lang) return trans ## hl remove quote_plus 20170613 ## hb "" b j A: rn . % 9 j doesn't work, using quote 20170626 ## url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, quote_plus(trans)) url = mapper_url + "/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, quote(trans)) ##url = mapper_url+"/mapper/map/%s/%s/%s" % (from_symbol_set, to_symbol_set, trans) log.debug("MAPPER URL before requests: %s" % url) r = requests.get(url) log.debug("MAPPER URL: %s" % r.url) response = r.text log.debug("MAPPER RESPONSE: %s" % response) try: response_json = json.loads(response) except json.JSONDecodeError: log.error("JSONDecodeError:") log.error("RESPONSE: %s" % response) raise new_trans = response_json["Result"] #Special cases for Swedish pre-r allophones that are not handled by the mapper (because mary uses an old version of the phoneme set that desn't distinguish between normal and r-coloured E/{ (always E) and 2/9 (always 9). This should change in mary later on. if lang == "sv": new_trans = re.sub("{ - ", r"E - ", new_trans) new_trans = re.sub("{ ", r"E ", new_trans) new_trans = re.sub("2(:? -) r? ", r"9\1 r", new_trans) log.debug("NEW TRANS: %s" % new_trans) return new_trans
def test(self): url = "%s/%s/%s/%s" % (self.base_url, "maptable", self.from_symbol_set, self.to_symbol_set) log.debug(url) try: r = requests.get(url) response = r.text response_json = json.loads(response) except json.JSONDecodeError: msg = "Unable to create mapper from %s to %s. Response was: %s" % ( self.from_symbol_set, self.to_symbol_set, response) log.error(msg) raise MapperException(msg) except Exception as e: msg = "Unable to create mapper at url %s. Reason: %s" % (url, e) log.error(msg) raise MapperException(msg)
def utt2ssml(item): log.debug(item) if item["tag"] == "t": word = item["text"] if "ph" in item: phns = map2espeak(item["ph"]) ssml = """<phoneme ph="%s">%s</phoneme>""" % (phns, word) else: ssml = word elif item["tag"] == "boundary": ssml = "<break/>" else: ssml_list = [] for child in item["children"]: ssml_list.append(utt2ssml(child)) ssml = " ".join(ssml_list) return ssml
def lookup(self, string): if string.strip() == "": log.warning("LEXICON LOOKUP STRING IS EMPTY!") return {} encString = urllib.parse.quote(string) url = "%s/%s?lexicons=%s&words=%s" % (self.base_url, "lookup", self.lexicon_name, encString) r = requests.get(url) log.debug("LEXICON LOOKUP URL: %s" % r.url) response = r.text try: response_json = json.loads(response) log.debug(response_json) return response_json except: log.error("unable to lookup '%s' in %s. response was %s" % (string, self.lexicon_name, response)) raise LexiconException(response)
def synthesise(lang, voice, input, hostname=None): if lang == "nb": xmllang = "no" else: xmllang = lang if "marytts_locale" in voice: locale = voice["marytts_locale"] else: locale = lang #xmllang, not lang, here. Marytts needs the xml:lang to match first part of LOCALE.. maryxml = utt2maryxml(xmllang, input, voice) log.debug("MARYXML: %s" % maryxml) #1) Call marytts to get output_tokens with timing params = { "INPUT_TYPE": "INTONATION", "OUTPUT_TYPE": "REALISED_ACOUSTPARAMS", "LOCALE": locale, "VOICE": voice["name"], "INPUT_TEXT": maryxml } r = requests.post(marytts_url, params=params) log.debug("runMarytts PARAMS URL (length %d): %s" % (len(r.url), r.url)) xml = r.text.encode("utf-8") log.debug("REPLY: %s" % xml) #Should raise an error if status is not OK (In particular if the url-too-long issue appears) r.raise_for_status() output_tokens = maryxml2tokensET(xml) #2) Call marytts (again..) to get the audio. We tried earlier with a marytts output_type "WIKISPEECH_JSON" to get tokens, timing, and audio in one call to marytts, but without success. At some point, try again, if we want to keep using marytts. params = { "INPUT_TYPE": "INTONATION", "OUTPUT_TYPE": "AUDIO", "AUDIO": "WAVE_FILE", "LOCALE": lang, "VOICE": voice["name"], "INPUT_TEXT": maryxml } #Actually doing synthesis here with call to marytts (possibly in future if we're returning audio data to client instead of url to file) #audio_r = requests.get(marytts_url,params=params) #audio_url = audio_r.url #Create the url. The call to marytts will be done in wikispeech.saveAndConvertAudio req = requests.Request('GET', marytts_url, params=params) prepped = req.prepare() audio_url = prepped.url log.debug("runMarytts AUDIO_URL: %s" % audio_url) return (audio_url, output_tokens)
def lookup(self, string): if string.strip() == "": log.warning("LEXICON LOOKUP STRING IS EMPTY!") return {} encString = urllib.parse.quote(string) url = "%s/%s?lexicons=%s&words=%s" % (self.base_url, "lookup", self.lexicon_name, encString) r = requests.get(url) log.debug("LEXICON LOOKUP URL: %s" % r.url) response = r.text try: response_json = json.loads(response) log.debug(response_json) return response_json except: log.error("unable to lookup '%s' in %s. response was %s" % (string, self.lexicon_name, response)) raise LexiconException(response)
def getTokens(utt): tokenlist = [] for p in utt["paragraphs"]: for s in p["sentences"]: for phr in s["phrases"]: for token in phr["tokens"]: if "mtu" in token and token["mtu"] == True: for word in token["words"]: #log.debug("SKIPPING %s" % word) if "g2p_method" in word: tokenlist.append(word) else: for word in token["words"]: #Only append to tokenlist if word doesn't have 'input_ssml_transcription' attribute if "input_ssml_transcription" not in word: tokenlist.append(word) log.debug("Appending to tokenlist: %s" % word) return tokenlist
def getTokens(utt): tokenlist = [] for p in utt["paragraphs"]: for s in p["sentences"]: for phr in s["phrases"]: for token in phr["tokens"]: if "mtu" in token and token["mtu"] == True: for word in token["words"]: #log.debug("SKIPPING %s" % word) if "g2p_method" in word: tokenlist.append(word) else: for word in token["words"]: #Only append to tokenlist if word doesn't have 'input_ssml_transcription' attribute if "input_ssml_transcription" not in word: tokenlist.append(word) log.debug("Appending to tokenlist: %s" % word) return tokenlist
def utt2phonemicsOLD(item): log.debug("utt2phonemics: %s" % item) if item["tag"] == "t": word = item["text"] if "ph" in item: phns = map2espeak(item["ph"]) phonemics = "[[%s]]" % phns.replace(" ", "") #ssml = """<phoneme ph="%s">%s</phoneme>""" % (phns, word) else: phonemics = word #ssml = word elif item["tag"] == "boundary": phonemics = "," #ssml = "<break/>" else: phn_list = [] for child in item["children"]: phn_list.append(utt2phonemics(child)) phonemics = " ".join(phn_list) return phonemics
def map2espeak(phonestring): #h @ - ' l @U #hh ax l ow1 phonestring = re.sub(" - ", " ", phonestring) #phonestring = re.sub("' ","1 ", phonestring) phones = phonestring.split(" ") espeakphones = [] for phone in phones: if phone in espeakmap: espeakphone = espeakmap[phone] else: espeakphone = phone espeakphones.append(espeakphone) espeak = " ".join(espeakphones) #move accents to following vowel espeak = re.sub(r"' (.+)(@|oU|e|E)", r"\1 ' \2", espeak) espeak = re.sub(r"\" (.+)(@|oU|e|E)", r"\1 ' \2", espeak) log.debug("MAPPED %s TO %s" % (phonestring, espeak)) return espeak
def utt2ssml(utterance): log.debug(utterance) ssml_list = [] paragraphs = utterance["paragraphs"] for paragraph in paragraphs: sentences = paragraph["sentences"] for sentence in sentences: phrases = sentence["phrases"] for phrase in phrases: tokens = phrase["tokens"] for token in tokens: words = token["words"] for word in words: orth = word["orth"] if "trans" in word: ws_trans = word["trans"] log.debug("WS_TRANS: %s" % ws_trans) flite_trans = map2flite(ws_trans) log.debug("FLITE_TRANS: %s" % flite_trans) ssml = """<phoneme ph="%s">%s</phoneme>""" % (flite_trans, orth) else: ssml = orth ssml_list.append(ssml) if "boundary" in phrase: ssml = "<break/>" ssml_list.append(ssml) ssml = " ".join(ssml_list) return ssml
def convertResponse(response_json): trans_dict = {} if includePostag: trans_dict["postags"] = {} #with list response: if type(response_json) == type([]): for response_item in response_json: log.debug("STATUS: %s" % response_item["status"]["name"]) if not response_item["status"]["name"] == "delete": response_orth = response_item["strn"] first_trans = response_item["transcriptions"][0]["strn"] pos = "" if includePostag and "partOfSpeech" in response_item: pos = response_item["partOfSpeech"] if "preferred" in response_item and response_item[ "preferred"] == True: log.debug("ORTH: %s, PREFERRED TRANS: %s" % (response_orth, first_trans)) trans_dict[response_orth] = first_trans if includePostag: trans_dict["postags"][response_orth] = pos else: #only add the first reading if none is preferred if not response_orth in trans_dict: log.debug("ORTH: %s, FIRST TRANS: %s" % (response_orth, first_trans)) trans_dict[response_orth] = first_trans if includePostag: trans_dict["postags"][response_orth] = pos return trans_dict
def marytts_postproc(lang, utt): if lang == "en": locale = "en_US" xmllang = "en" elif lang == "nb": locale = "no" xmllang = "no" else: locale = lang xmllang = lang #xmllang, not lang, here. Marytts needs the xml:lang to match first part of LOCALE.. xml = utt2maryxml(xmllang, utt) payload = { #"INPUT_TYPE":"PHONEMES", "INPUT_TYPE": "INTONATION", "OUTPUT_TYPE": "ALLOPHONES", "LOCALE": locale, "INPUT_TEXT": xml } r = requests.post(marytts_url, params=payload) log.debug("CALLING MARYTTS: %s" % r.url) #Should raise an error if status is not OK (In particular if the url-too-long issue appears) r.raise_for_status() xml = r.text log.debug("REPLY: %s" % xml) (marylang, utt) = maryxml2utt(xml) log.debug("marytts_postproc returning: %s" % utt) return utt
def synthesis(): hostname = request.url_root lang = getParam("lang") input = getParam("input") voice_name = getParam("voice", "default_voice") input_type = getParam("input_type", "markup") output_type = getParam("output_type", "json") presynth = getParam("presynth", False) if lang == None or input == None: options = getSynthesisOptions() resp = make_response(json.dumps(options)) resp.headers["Content-type"] = "application/json" resp.headers["Allow"] = "OPTIONS, GET, POST, HEAD" return resp if presynth == "True": presynth = True else: presynth=False #log.debug "SYNTHESIS CALL - LANG: %s, INPUT_TYPE: %s, OUTPUT_TYPE: %s, INPUT: %s" % (lang, input_type, output_type, input) if lang not in synthesisSupportedLanguages(): return "synthesis does not support language %s" % lang #The input is a json string, needs to be a python dictionary input = json.loads(input) result = synthesise(lang,voice_name,input,input_type,output_type,hostname=hostname,presynth=presynth) if type(result) == type(""): log.debug("RETURNING MESSAGE: %s" % result) return result json_data = json.dumps(result) return Response(json_data, mimetype='application/json')
def marytts_postproc(lang, utt): if lang == "en": locale = "en_US" xmllang = "en" elif lang == "nb": locale = "no" xmllang = "no" else: locale = lang xmllang = lang #xmllang, not lang, here. Marytts needs the xml:lang to match first part of LOCALE.. xml = utt2maryxml(xmllang, utt) payload = { #"INPUT_TYPE":"PHONEMES", "INPUT_TYPE":"INTONATION", "OUTPUT_TYPE":"ALLOPHONES", "LOCALE":locale, "INPUT_TEXT":xml } r = requests.post(marytts_url, params=payload) log.debug("CALLING MARYTTS: %s" % r.url) #Should raise an error if status is not OK (In particular if the url-too-long issue appears) r.raise_for_status() xml = r.text log.debug("REPLY: %s" % xml) (marylang, utt) = maryxml2utt(xml) log.debug("marytts_postproc returning: %s" % utt) return utt
def map2flite(phonestring): #h @ - ' l @U #hh ax l ow1 phonestring = re.sub("' ","1 ", phonestring) phones = phonestring.split(" ") flitephones = [] for phone in phones: if phone in flitemap: flitephone = flitemap[phone] else: flitephone = phone flitephones.append(flitephone) flite = " ".join(flitephones) #move accents to following vowel flite = re.sub(r"1 ([^.]*)(aa|ae|ah|ao|aw|ax|axr|ay|eh|ey|ih|iy|ow|oy|uh|uw)", r"\1\2 1", flite) flite = re.sub(" 1", "1", flite) flite = re.sub(" \. "," ", flite) log.debug("MAPPED %s TO %s" % (phonestring, flite)) return flite
def get_tp_config_by_nameOLD(name): for tp_config in textprocessor_configs: log.debug("get_tp_config_by_name: %s" % tp_config) log.debug("name: %s, wanted: %s" % (tp_config["name"], name)) if tp_config["name"] == name: log.debug("RETURNING: %s" % tp_config) return tp_config return None
def get_tp_config_by_name(name): for tp in textprocessors: log.debug("get_tp_config_by_name: %s" % tp) log.debug("name: %s, wanted: %s" % (tp.name, name)) if tp.name == name: log.debug("RETURNING: %s" % tp.config) return tp.config return None
def testVoice(self): log.info("Testing voice %s" % self.name) if self.engine == "marytts": voice_host = config.config.get("Services", "marytts") url = re.sub("process","voices",voice_host) log.debug("Calling url: %s" % url) try: r = requests.get(url) except: msg = "Marytts server not found at url %s" % (url) log.error(msg) raise VoiceException(msg) response = r.text log.debug("Response:\n%s" % response) marytts_voicenames = self.getMaryttsVoicenames(response) if not self.name in marytts_voicenames: msg = "Voice %s not found at url %s" % (self.name, url) log.error(msg) raise VoiceException(msg) else: log.info("Voice found at url %s" % url) elif self.engine == "ahotts": cwdir = os.getcwd() tmpdir = config.config.get("Audio settings","audio_tmpdir") ahotts_dir = config.config.get("Services", "ahotts_dir") ahotts_server_ip = config.config.get("Services", "ahotts_server_ip") ahotts_server_port = config.config.get("Services", "ahotts_server_port") ahotts_command = "cd %s/bin; echo \"Hasierako proba\" > ahotts_test.txt; ./tts_client -IP=%s -Port=%s -InputFile=ahotts_test.txt -OutputFile=ahotts_test.wav ; mv ahotts_test.wav %s/%s/ahotts_test.wav ; rm ahotts_test.txt" % (ahotts_dir, ahotts_server_ip, ahotts_server_port, cwdir, tmpdir) os.system(ahotts_command) try: wavfile=open('%s/%s/ahotts_test.wav'%(cwdir, tmpdir),'r') wavfile.close() os.remove('%s/%s/ahotts_test.wav'%(cwdir, tmpdir)) except: msg = "AhoTTS server not found at IP %s and Port %s" % (ahotts_server_ip,ahotts_server_port) log.error(msg) raise VoiceException(msg)
def test_default_settings(): #1) Test default settings for supported languages for lang in supported_languages: log.debug("START: %s" % lang) # GET: curl "http://localhost:10000/?lang=en&input=test." r = test_client.get("%s?lang=%s&input=test." % (host,lang)) log.debug(r.data.decode('utf-8')) log.debug("DONE: %s" % lang)
def mapSsmlTranscriptionsToMary(ssml, lang, tp_config): #cause of T180337 (synthesis fails on transcriptions containing ">") #phoneme_elements = re.findall("(<phoneme [^>]+>)", ssml) #.+? means shortest match phoneme_elements = re.findall("(<phoneme .+?\">)", ssml) for element in phoneme_elements: log.debug(element) trans = re.findall("ph=\"(.+)\">", element)[0] log.debug("ws_trans: %s" % trans) mary_trans = mapperMapToMary(trans.replace(""","\""), lang, tp_config) log.debug("mary_trans: %s" % mary_trans) mary_trans = mary_trans.replace("\"", """) mary_trans = mary_trans.replace("<", "<") log.debug("mary_trans(2): %s" % mary_trans) ssml = re.sub(trans, mary_trans, ssml) #log.debug("MAPPED SSML: %s" % ssml) return ssml
def test_lexicon_client(): lexicon = "wikispeech_testdb:sv" sent = "apa hund färöarna" trans = {} trans["apa"] = '"" A: . p a' trans["hund"] = '" h u0 n d' trans["färöarna"] = '"" f {: . % r 2: . a . rn a' try: lexicon_client.loadLexicon(lexicon) lex = lexicon_client.getLookupBySentence(sent, lexicon) log.debug("LEX: %s" % lex) except: log.error("Failed to do lexicon lookup.\nError type: %s\nError info:%s" % (sys.exc_info()[0], sys.exc_info()[1])) import traceback log.debug("Stacktrace:") if log.log_level == "debug": traceback.print_tb(sys.exc_info()[2]) log.debug("END stacktrace") log.error("lexicon lookup test failure") log.error("No running lexserver found at %s" % config.config.get("Services","lexicon")) raise for word in sent.split(" "): try: if lex[word] != trans[word]: log.error("lexicon lookup test failure") log.error("word %s, found %s, expected %s" % (word, lex[word], trans[word])) raise except KeyError: log.error("Lexicon lookup test failure: Word %s not found in lexicon %s" % (word, lexicon)) raise log.debug("SUCCESS: lexicon lookup test")
def synthesise_json(lang,voice,input): if lang == "nb": xmllang = "no" else: xmllang = lang if "marytts_locale" in voice: locale = voice["marytts_locale"] else: locale = lang #xmllang, not lang, here. Marytts needs the xml:lang to match first part of LOCALE.. maryxml = utt2maryxml(xmllang, input) log.debug("MARYXML: %s" % maryxml) #BUGFIX TODO #url = 'https://demo.morf.se/marytts/process' #url = "%s/%s" % (voice["server"]["url"], "process") #url = "http://morf.se:59125/process" #url = "http://localhost:59125/process" params = {"INPUT_TYPE":"ALLOPHONES", "OUTPUT_TYPE":"WIKISPEECH_JSON", "LOCALE":locale, "VOICE":voice["name"], "INPUT_TEXT":maryxml} r = requests.post(marytts_url,params=params) log.debug("runMarytts PARAMS URL (length %d): %s" % (len(r.url), r.url)) json = r.json() log.debug("REPLY: %s" % json) #Should raise an error if status is not OK (In particular if the url-too-long issue appears) r.raise_for_status() audio_url = json["audio"] output_tokens = json["tokens"] #log.debug("runMarytts AUDIO_URL: %s" % audio_url) return (audio_url, output_tokens)
def marytts_preproc(text, lang, tp_config, input_type="text"): if lang == "en": locale = "en_US" elif lang == "nb": locale = "no" else: locale = lang if input_type == "ssml": mary_input_type = "SSML" else: mary_input_type = "TEXT" if input_type == "ssml": text = mapSsmlTranscriptionsToMary(text, lang, tp_config) #FIX FOR ISSUE T164917: 600-talet loses number #Marytts uses ICU to expand numerals, but only numerals that are a full token. #In cases like this the number is just dropped. #The very simple fix is to insert space before the hyphen text = re.sub(r"([0-9]+)-tal",r"\1 -tal", text) payload = { "INPUT_TYPE": mary_input_type, #"OUTPUT_TYPE": "WORDS", "OUTPUT_TYPE": "INTONATION", #"OUTPUT_TYPE": "ALLOPHONES", "LOCALE": locale, "INPUT_TEXT": text } #Using output_type PHONEMES/INTONATION/ALLOPHONES means that marytts will phonetise the words first, and lexLookup will change the transcription if a word is found r = requests.get(marytts_url, params=payload) log.debug("CALLING MARYTTS: %s" % r.url) if r.status_code != 200: log.debug("marytts call failed with error %d" % r.status_code) log.debug("marytts error text %s" % r.text) raise ValueError("marytts call failed with error", r.status_code, r.text) xml = r.text #log.debug "REPLY:", xml (marylang, utt) = maryxml2utt(xml, tp_config) return utt
def convertResponse(response_json): trans_dict = {} #with list response: if type(response_json) == type([]): for response_item in response_json: log.debug("STATUS: %s" % response_item["status"]["name"]) if not response_item["status"]["name"] == "delete": response_orth = response_item["strn"] first_trans = response_item["transcriptions"][0]["strn"] if response_item["preferred"] == True: log.debug("ORTH: %s, PREFERRED TRANS: %s" % (response_orth,first_trans)) trans_dict[response_orth] = first_trans else: #only add the first reading if none is preferred if not response_orth in trans_dict: log.debug("ORTH: %s, FIRST TRANS: %s" % (response_orth,first_trans)) trans_dict[response_orth] = first_trans return trans_dict
def checkInputAndOutputTokens(input_string,output_token_list): msgs = [] for token in output_token_list: log.debug(token) if token["orth"] not in input_string: msgs.append("output token \"%s\" not found in input string \"%s\"" % (token["orth"], input_string)) #attempt to correct ... if len(msgs) > 0: input_string = re.sub(r"\s*([,.?!\"()])\s*",r" \1 ", input_string) input_string = re.sub(r"\s+", r" ", input_string) input_string = input_string.strip() input_list = input_string.split(" ") output_list = [elem["orth"] for elem in output_token_list if elem["orth"] != ""] if len(input_list) != len(output_list): msgs.append("WARNING: Unable to correct output token list. Input contains %d tokens, output contains %d non-empty tokens." % (len(input_list), len(output_list))) msgs.append("input token list : %s" % input_list) msgs.append("output token list: %s" % output_list) else: i = 0 j = 0 while i < len(input_list) and j < len(output_token_list): input_orth = input_list[i] output_orth = output_token_list[j]["orth"] #output_orth = output_list[i] if output_orth == "": j += 1 log.debug("skipping empty output token") else: log.debug("%s\t%s" % (input_orth, output_orth)) if input_orth != output_orth: output_token_list[j]["orth"] = input_orth msgs.append("REPLACED: %s -> %s" % (output_orth, input_orth)) i += 1 j += 1 return msgs
def marytts_preproc_tokenised_TOREMOVE(lang, utt): if lang == "en": locale = "en_US" else: locale = lang maryxml = tokeniser.utt2maryxml_TOKENS(lang,utt) payload = { "INPUT_TYPE":"TOKENS", #"OUTPUT_TYPE":"WORDS", "OUTPUT_TYPE":"PHONEMES", "LOCALE":locale, "INPUT_TEXT":maryxml } #Using output_type PHONEMES means that marytts will phonetise the words first, and lexLookup will change the transcription if a word is found r = requests.get(marytts_url, params=payload) log.debug("CALLING MARYTTS: %s" % r.url) xml = r.text log.debug("REPLY: %s" % xml) (marylang, utt) = maryxml2utt(xml) log.debug("marytts_preproc_tokenised returns %s" % utt) return utt