def handle_translation(self, request_id): """" Handler connecting to the Microsoft Translator service. Requires a Bing AppID as documented at MSDN: - http://msdn.microsoft.com/en-us/library/ff512421.aspx """ handle = open('/tmp/{0}.message'.format(request_id), 'r+b') message = TranslationRequestMessage() message.ParseFromString(handle.read()) source = self.language_code(message.source_language) target = self.language_code(message.target_language) _source_text = message.source_text.split('\n') result = u'' batches = len(_source_text) / self.__batch__ for batch in range(batches): _start = batch * self.__batch__ _end = _start + self.__batch__ text = u'\n'.join(_source_text[_start:_end]) result += self._batch_translate(source, target, text) result += '\n' last_batch = len(_source_text) % self.__batch__ if last_batch: text = u'\n'.join(_source_text[-last_batch:]) result += self._batch_translate(source, target, text) result += '\n' message.target_text = result handle.seek(0) handle.write(message.SerializeToString()) handle.close()
def handle_translation(self, request_id): """ Translates text using the Accurat Moses SMT system. """ handle = open('/tmp/{0}.message'.format(request_id), 'r+b') message = TranslationRequestMessage() message.ParseFromString(handle.read()) # First, we write out the source text to file. source = open('/tmp/{0}.source'.format(request_id), 'w') source.write(message.source_text.encode('utf-8')) # Check if the last line ends with a line break, otherwise Moses # I/O implementation does not accept the input! if not message.source_text.endswith('\n'): source.write('\n') source.close() source_language = self.language_code(message.source_language) target_language = self.language_code(message.target_language) # This is a special instance of the Moses worker, with pre-defined # knowledge about the ACCURAT Moses configurations. We use this # approach to ensure that only one Moses process at a time can be # started; by doing so, we can avoid memory issues. MOSES_CMD = '/share/accurat/run/wmt10/bin/moses-irstlm/mosesdecoder' \ '/mosesdecoder/moses-cmd/src/moses' MOSES_CONFIG = '/share/accurat/mtserver/accurat/{0}-{1}/' \ 'moses.ini.bin'.format(source_language, target_language) # Then, we invoke the Moses command reading from the source file # and writing to a target file, also inside /tmp. This blocks until # the Moses process finishes. shell_cmd = "{0} -f {1} < /tmp/{2}.source > /tmp/{3}.target".format( MOSES_CMD, MOSES_CONFIG, request_id, request_id) proc_stdout, proc_stderr = Popen(shell_cmd, shell=True, stdout=PIPE, stderr=PIPE).communicate() # Wait for some time to ensure file I/O is completed. sleep(2) # We can now load the translation from the target file. target = open('/tmp/{0}.target'.format(request_id), 'r') target_text = target.read() message.target_text = unicode(target_text, 'utf-8') target.close() keyvalue = message.packet_data.add() keyvalue.key = 'STDOUT' keyvalue.value = proc_stdout keyvalue = message.packet_data.add() keyvalue.key = 'STDERR' keyvalue.value = proc_stderr handle.seek(0) handle.write(message.SerializeToString()) handle.close()
def handle_translation(self, request_id): """ Translates text using the Moses SMT system. """ handle = open('/tmp/{0}.message'.format(request_id), 'r+b') message = TranslationRequestMessage() message.ParseFromString(handle.read()) # First, we write out the source text to file. source = open('/tmp/{0}.source'.format(request_id), 'w') source.write(message.source_text.encode('utf-8')) source.close() # Then, we invoke the Moses command reading from the source file # and writing to a target file, also inside /tmp. This blocks until # the Moses process finishes. shell_cmd = "{0} -f {1} < /tmp/{2}.source > /tmp/{3}.target".format( self.MOSES_CMD, self.MOSES_CONFIG, request_id, request_id) process = Popen(shell_cmd, shell=True) process.wait() # Wait for some time to ensure file I/O is completed. sleep(2) # We can now load the translation from the target file. target = open('/tmp/{0}.target'.format(request_id), 'r') target_text = target.read() message.target_text = unicode(target_text, 'utf-8') target.close() handle.seek(0) handle.write(message.SerializeToString()) handle.close()
def handle_translation(self, request_id): """ Translates text using Yahoo! Babel Fish. """ handle = open('/tmp/{0}.message'.format(request_id), 'r+b') message = TranslationRequestMessage() message.ParseFromString(handle.read()) source = self.language_code(message.source_language) target = self.language_code(message.target_language) the_data = urllib.urlencode({'lp': '{0}_{1}'.format(source, target), 'text': message.source_text.encode('utf-8'), 'ei': 'utf8'}) the_url = 'http://babelfish.yahoo.com/translate_txt?{0}'.format( the_data) the_header = {'User-agent': 'Mozilla/5.0'} opener = urllib2.build_opener(urllib2.HTTPHandler) http_request = urllib2.Request(the_url, None, the_header) http_handle = opener.open(http_request) content = http_handle.read() http_handle.close() result_exp = re.compile('type="hidden" name="p" value="([^"]+)', re.I|re.U) result = result_exp.search(content) if result: target_text = result.group(1) message.target_text = unicode(target_text, 'latin-1') handle.seek(0) handle.write(message.SerializeToString()) handle.close()
def handle_translation(self, request_id): """ Dummy translation handler that blocks for a random amount of time. Returns all-uppercase version of Text as translation. """ # Block up to 100 seconds... interval = 50 + int(random() * 100) self.LOGGER.info("Sleeping for {0} seconds...".format(interval)) sleep(interval) # The dummy implementation takes the source text from /tmp/$id.source # and writes an upper-cased version of that text to /tmp/$id.target. self.LOGGER.debug("Finalizing result for request {0}".format( request_id)) handle = open('{0}/{1}.message'.format(self.message_path, request_id), 'r+b') message = TranslationRequestMessage() message.ParseFromString(handle.read()) message.target_text = message.source_text.upper() handle.seek(0) handle.write(message.SerializeToString()) handle.close()
def handle_translation(self, request_id): """ Translates text from German->English using the Moses SMT system. You have to adapt MOSES_CMD and MOSES_CONFIG to the correct values :) """ handle = open("/tmp/{0}.message".format(request_id), "r+b") message = TranslationRequestMessage() message.ParseFromString(handle.read()) # First, we write out the source text to file. source = open("/tmp/{0}.source".format(request_id), "w") source.write(message.source_text.encode("utf-8")) source.close() # Then, we invoke the Moses command reading from the source file # and writing to a target file, also inside /tmp. This blocks until # the Moses process finishes. shell_cmd = "{0} -f {1} < /tmp/{2}.source > /tmp/{3}.target".format( MOSES_CMD, MOSES_CONFIG, request_id, request_id ) process = Popen(shell_cmd, shell=True) process.wait() # We can now load the translation from the target file. target = open("/tmp/{0}.target".format(request_id), "r") message.target_text = unicode(target.read(), "utf-8") target.close() handle.seek(0) handle.write(message.SerializeToString()) handle.close()
def handle_translation(self, request_id): """ Translates text using the Accurat Moses SMT system. """ handle = open('/tmp/{0}.message'.format(request_id), 'r+b') message = TranslationRequestMessage() message.ParseFromString(handle.read()) # First, we write out the source text to file. source = open('/tmp/{0}.source'.format(request_id), 'w') source.write(message.source_text.encode('utf-8')) # Check if the last line ends with a line break, otherwise Moses # I/O implementation does not accept the input! if not message.source_text.endswith('\n'): source.write('\n') source.close() source_language = self.language_code(message.source_language) target_language = self.language_code(message.target_language) # This is a special instance of the Moses worker, with pre-defined # knowledge about the ACCURAT Moses configurations. We use this # approach to ensure that only one Moses process at a time can be # started; by doing so, we can avoid memory issues. MOSES_CMD = '/share/accurat/run/wmt10/bin/moses-irstlm/mosesdecoder' \ '/mosesdecoder/moses-cmd/src/moses' MOSES_CONFIG = '/share/accurat/mtserver/accurat/{0}-{1}/' \ 'moses.ini.bin'.format(source_language, target_language) # Then, we invoke the Moses command reading from the source file # and writing to a target file, also inside /tmp. This blocks until # the Moses process finishes. shell_cmd = "{0} -f {1} < /tmp/{2}.source > /tmp/{3}.target".format( MOSES_CMD, MOSES_CONFIG, request_id, request_id) process = Popen(shell_cmd, shell=True) process.wait() # Wait for some time to ensure file I/O is completed. sleep(2) # We can now load the translation from the target file. target = open('/tmp/{0}.target'.format(request_id), 'r') target_text = target.read() message.target_text = unicode(target_text, 'utf-8') target.close() handle.seek(0) handle.write(message.SerializeToString()) handle.close()
def handle_translation(self, request_id): """ Translates text from German->English using the Lucy RBMT system. Uses the XML-RPC server wrapper running at msv-3207.sb.dfki.de. """ handle = open('/tmp/{0}.message'.format(request_id), 'r+b') message = TranslationRequestMessage() message.ParseFromString(handle.read()) proxy = xmlrpclib.ServerProxy('http://msv-3207.sb.dfki.de:9999/') assert(proxy.isAlive()) source = self.language_code(message.source_language) target = self.language_code(message.target_language) content = proxy.lucyTranslate(message.source_text, source, target) # Results are stored in a field with key: '{EN,ES,DE,FR}.txt'. target_key = target[:2] if target_key == 'SP': target_key = 'ES' elif target_key == 'GE': target_key = 'DE' result = content.get('{0}.txt'.format(target_key)) trees = content.get('tre') # We have to parse the result text and filter out Lucy's alternative # translations, e.g.: # # The apple does not fall far from the <A[tribe|stem|trunk]>. # # For this example, we will return "...from the tribe." as target text # while the "raw" translation as well as the trees are return inside # the TranslationRequestMessage's packet_data list. if result: filter_exp = re.compile('<.\[(.+?)(\|.+?)?\]>', re.I|re.U) filtered_result = filter_exp.sub('\g<1>', result) message.target_text = unicode(filtered_result, 'utf-8') keyvalue = message.packet_data.add() keyvalue.key = 'RAW_RESULT' keyvalue.value = result if trees: keyvalue = message.packet_data.add() keyvalue.key = 'TREES' keyvalue.value = trees handle.seek(0) handle.write(message.SerializeToString()) handle.close()
def handle_translation(self, request_id): """ Translation handler that obtains a translation via the Google translation web front end. """ handle = open('/tmp/{0}.message'.format(request_id), 'r+b') message = TranslationRequestMessage() message.ParseFromString(handle.read()) source = self.language_code(message.source_language) target = self.language_code(message.target_language) the_url = 'http://translate.google.com/translate_t' the_data = urllib.urlencode({'js': 'n', 'sl': source, 'tl': target, 'text': message.source_text.encode('utf-8')}) the_header = {'User-agent': 'Mozilla/5.0'} opener = urllib2.build_opener(urllib2.HTTPHandler) http_request = urllib2.Request(the_url, the_data, the_header) http_handle = opener.open(http_request) content = http_handle.read() http_handle.close() result_exp = re.compile( '<span id=result_box class="long_text">(.*)</span></div>', re.I|re.U|re.S) result = result_exp.search(content) if result: # Normalize HTML line breaks to \n. result = result.group(1).replace('<br>', '\n') # Extract all <span>...</span> tags containing the translation. span_exp = re.compile('<span.*?>([^<]+?)</span>', re.I|re.U|re.S) span_iter = span_exp.finditer(result) spans = [unicode(match.group(1), 'utf-8') for match in span_iter] # Construct target text from list of spans, normalizing \n+ to \n. target_text = u'\n'.join([span.strip() for span in spans]) multibreaks = re.compile('\n+', re.I|re.U|re.S) target_text = multibreaks.sub(u'\n', target_text) message.target_text = target_text handle.seek(0) handle.write(message.SerializeToString()) handle.close()
def handle_translation(self, request_id): """ Translates text from German->English using Microsoft Translator. Requires a Bing AppID as documented at MSDN: - http://msdn.microsoft.com/en-us/library/ff512421.aspx """ handle = open('/tmp/{0}.message'.format(request_id), 'r+b') message = TranslationRequestMessage() message.ParseFromString(handle.read()) source = self.language_code(message.source_language) target = self.language_code(message.target_language) app_id = '9259D297CB9F67680C259FD62734B07C0D528312' the_data = urllib.urlencode({'appId': app_id, 'from': source, 'to': target, 'text': message.source_text.encode('utf-8')}) the_url = 'http://api.microsofttranslator.com/v2/Http.svc/' \ 'Translate?{0}'.format(the_data) the_header = {'User-agent': 'Mozilla/5.0'} opener = urllib2.build_opener(urllib2.HTTPHandler) http_request = urllib2.Request(the_url, None, the_header) http_handle = opener.open(http_request) content = http_handle.read() http_handle.close() result_exp = re.compile('<string xmlns="http://schemas.microsoft.' \ 'com/2003/10/Serialization/">(.*?)</string>', re.I|re.U) result = result_exp.search(content) if result: target_text = result.group(1) message.target_text = unicode(target_text, 'utf-8') handle.seek(0) handle.write(message.SerializeToString()) handle.close()
def handle_translation(self, request_id): """ Translates text using the connected Moses SMT server system. """ handle = open('/tmp/{0}.message'.format(request_id), 'r+b') message = TranslationRequestMessage() message.ParseFromString(handle.read()) proxy = xmlrpclib.ServerProxy('{0}:{1}'.format(self.MOSES_HOST, self.MOSES_PORT)) result = [] for text in message.source_text.split(u'\n'): content = proxy.translate({'text': text}) result.append(content.get('text', '\n')) if result: message.target_text = u'\n'.join(result) handle.seek(0) handle.write(message.SerializeToString()) handle.close()
def handle_translation(self, request_id): """ Handler connecting to the Yahoo! Babel Fish service. """ handle = open('/tmp/{0}.message'.format(request_id), 'r+b') message = TranslationRequestMessage() message.ParseFromString(handle.read()) source = self.language_code(message.source_language) target = self.language_code(message.target_language) # Insert splitter tokens to allow re-construction of original lines. _source_text = [] for source_line in message.source_text.split('\n'): _source_text.append(source_line.strip()) _source_text.append(self.__splitter__) result = u'' batches = len(_source_text) / self.__batch__ for batch in range(batches): _start = batch * self.__batch__ _end = _start + self.__batch__ text = u'\n'.join(_source_text[_start:_end]) result += self._batch_translate(source, target, text) result += '\n' sleep(30) last_batch = len(_source_text) % self.__batch__ if last_batch: text = u'\n'.join(_source_text[-last_batch:]) result += self._batch_translate(source, target, text) result += '\n' message.target_text = result handle.seek(0) handle.write(message.SerializeToString()) handle.close()
def handle_translation(self, request_id): """ Translation handler that obtains a translation via the Google translation web front end. """ handle = open('/tmp/{0}.message'.format(request_id), 'r+b') message = TranslationRequestMessage() message.ParseFromString(handle.read()) source = self.language_code(message.source_language) target = self.language_code(message.target_language) the_url = 'http://translate.google.com/translate_t' the_data = urllib.urlencode({'js': 'n', 'sl': source, 'tl': target, 'text': message.source_text.encode('utf-8')}) the_header = {'User-agent': 'Mozilla/5.0'} opener = urllib2.build_opener(urllib2.HTTPHandler) http_request = urllib2.Request(the_url, the_data, the_header) http_handle = opener.open(http_request) content = http_handle.read() http_handle.close() result_exp = re.compile('<textarea name=utrans wrap=SOFT ' \ 'dir="ltr" id=suggestion.*>(.*?)</textarea>', re.I|re.U) result = result_exp.search(content) if result: target_html = result.group(1) target_text = target_html.replace('<br>', '\n') message.target_text = unicode(target_text, 'utf-8') handle.seek(0) handle.write(message.SerializeToString()) handle.close()