class MicrosoftTranslator(AbstractTranslator): name = "microsoft" def __init__(self): super(MicrosoftTranslator, self).__init__() if self.options is not None: client_id = self.options.get("client_id") client_secret = self.options.get("client_secret") if client_id is None or client_secret is None: raise ValueError( "Misconfigured application. If you use the Microsoft Translator, provide a client_id and a client_secret" ) self.client = MSTranslator(client_id=client_id, client_secret=client_secret) else: self.client = None self._languages = None @property def languages(self): if self._languages is not None: return self._languages if self.client is None: self._languages = [] try: self._languages = self.client.get_languages() except MSTranslatorApiException: return [] return self._languages def _translate(self, texts, language, origin_language="en"): """ [ 'Hello' ], 'es' => { 'Hello' : 'Hola' } """ if self.client is None: return {} if language not in self.languages: return {} app.logger.debug("Translating %r to %r using Microsoft Translator API" % (texts, language)) try: ms_translations = self.client.translate_array(texts=texts, to_lang=language, from_lang=origin_language) except MSTranslatorApiException as e: traceback.print_exc() app.logger.warn("Error translating using Microsoft Translator API: %s" % e, exc_info=True) return {} app.logger.debug("Translated %s sentences using Microsoft Translator API" % len(ms_translations)) translations = {} for text, translation in zip(texts, ms_translations): translated_text = translation.get("TranslatedText") if translated_text: translations[text] = translated_text return translations
class BingTranslator: def __init__(self): client_id = os.environ.get("BING_TRANSLATION_CLIENT_ID", "gigaware123") client_secret = os.environ.get("BING_TRANSLATION_SECRET", "Dp3afp41sR/sDsKRK3uWPt2i4WbAKqKB5q6RhCI9a1Q=") if not client_id or not client_secret: raise Exception("bing translation client id or client secret not found") self.translator = Translator(client_id, client_secret) def translate(self, untranslated, target_language): return self.translator.translate(untranslated, target_language, from_lang='en') def translate_all(self, untranslated, target_language): return self.translator.translate_array(untranslated, target_language, from_lang='en')
def translate(): if not request.json: abort(make_response(jsonify(message="Bad zapros"), 400)) # sanitize html data = bleach.clean(request.json['text']) if len(data) > 400: abort(make_response(jsonify(message="Slishkom long stroka"), 400)) splitted_original = re.split('(\W+)', data, flags=re.UNICODE) # transliterate flag. if true - the world will be transliterated transliterate_flag = True word_regex = re.compile('\w+', re.U) # array indexes of the words that will be really translated # used to put the translated words back in their places to_translate_indexes = [] # array of the words that will be translated to_translate = [] for i in range(0, len(splitted_original)): # if is's a word, not a comma, or space or whatever if re.match(word_regex, splitted_original[i]): # transliterate it or push in array for translation if transliterate_flag: splitted_original[i] = translit(splitted_original[i], 'ru', reversed=True) else: to_translate_indexes.append(i) to_translate.append(splitted_original[i]) transliterate_flag = not transliterate_flag translated = [{}] try: # translate the words translator = Translator(app.config['TRANSLATOR_ID'], app.config['TRANSLATOR_SECRET']) translated = translator.translate_array(to_translate, 'en', 'ru') except Exception as e: abort(make_response(jsonify(message="Try again popozhe"), 500)) # put the translated words back using the to_translate_indexes array for z in range(0, len(to_translate_indexes)): t = translated[z]['TranslatedText'] splitted_original[to_translate_indexes[z]] = t res = ''.join(splitted_original) return jsonify(translation=res, status=200)
class MicrosoftTranslator(AbstractTranslator): name = 'microsoft' def __init__(self): super(MicrosoftTranslator, self).__init__() if self.options is not None: client_id = self.options.get('client_id') client_secret = self.options.get('client_secret') if client_id is None or client_secret is None: raise ValueError("Misconfigured application. If you use the Microsoft Translator, provide a client_id and a client_secret") self.client = MSTranslator(client_id = client_id, client_secret = client_secret) else: self.client = None self._languages = None @property def languages(self): if self._languages is not None: return self._languages if self.client is None: self._languages = [] try: self._languages = self.client.get_languages() except MSTranslatorApiException: return [] except Exception: return [] return self._languages def _translate(self, texts, language, origin_language = 'en'): """ [ 'Hello' ], 'es' => { 'Hello' : 'Hola' } """ if self.client is None: return {} if language not in self.languages: return {} slices = [ # the size of a slice can't be over 10k characters in theory (we try to keep them under 5k in practice) # [ element1, element2, element3 ...] [], ] current_slice = slices[0] for text in texts: current_slice.append(text) if len(u''.join(current_slice).encode('utf8')) > 2000: current_slice = [] slices.append(current_slice) app.logger.debug("Texts splitted in {} slices".format(len(slices))) for pos, slice in enumerate(slices): app.logger.debug(" slice: {}: {} characters".format(pos, len(''.join(slice).encode('utf8')))) ms_translations = [] errors = False for current_slice in slices: if current_slice: app.logger.debug("Translating %r to %r using Microsoft Translator API" % (current_slice, language)) try: current_ms_translations = self.client.translate_array(texts = current_slice, to_lang = language, from_lang = origin_language) except (MSTranslatorApiException, ArgumentOutOfRangeException, ValueError, Exception) as e: traceback.print_exc() app.logger.warn("Error translating using Microsoft Translator API: %s" % e, exc_info = True) errors = True continue else: ms_translations.extend(list(current_ms_translations)) app.logger.debug("Translated %s sentences using Microsoft Translator API" % len(current_ms_translations)) if errors and not ms_translations: return {} translations = {} for text, translation in zip(texts, ms_translations): translated_text = translation.get('TranslatedText') if translated_text: translations[text] = translated_text return translations
def crawlCourseEssence(course_list): url = "https://course.ncu.edu.tw/Course/main/query/byKeywords?" #ex: https://course.ncu.edu.tw/Course/main/query/byKeywords?serialNo=11001&outline=11001&semester=1031 course_essence_list = [] de = DepEssence() objective_buffer = [] ob = 0 content_buffer = [] content_ch_buffer = [] cb = 0 translator = Translator('21KRtranslator', '1VYijs8FLyy7wmD/x1KsSWficJPiH61jywgGBM5m+iA=') for i, c in enumerate(course_list): if (i==0): de.id = str(c.category.id) de.category = c.category de.course_tree = c.leaf_of course_essence_list = [] params = urllib.urlencode({'serialNo': c.serial_no, 'outline': c.serial_no, 'semester': c.semester}) #dom = requests.get(url=url, params=params, headers={'Cookie': 'JSESSIONID=7257F09EDF368A37341694B4A4D7B72E', 'Accept-Language': 'zh-tw,zh;q=0.8,en-us;q=0.5,en;q=0.3', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:31.0) Gecko/20100101 Firefox/31.0'}) dom = pq(url=url+params, headers={'Cookie': 'JSESSIONID=7257F09EDF368A37341694B4A4D7B72E', 'Accept-Language': 'zh-tw,zh;q=0.8,en-us;q=0.5,en;q=0.3', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:31.0) Gecko/20100101 Firefox/31.0'}) raw = dom('script').text().encode('utf_8').decode('unicode_escape').split('\';')[0].replace('var JData = \'', '') data = json.loads(raw) try: if(data['msg']==u'notfound'):continue except KeyError: ce = CourseEssence() ce.id = c.id ce.course = c ce.course_tree = c.leaf_of ce.category = c.category ce.objective = unicode(data['courseObject']).replace(' ', ' ') ce.content = unicode(data['courseContent']).replace(' ', ' ') objective_buffer.append(ce.objective) content_buffer.append(ce.content) #return translator.translate_array(content_buffer, 'zh-CHT') ce.ability_list = [] for m in data['courseMap']: if(m['strength']==u'N/A'):continue ability = CoreAbility(ability=unicode(m['core']), rating=m['strength'][1:2], evaluation=unicode(m['testType'])[0:-1].split(',')) ce.ability_list.append(ability) course_essence_list.append(ce) i=0 p = re.compile(ur'(\(\d*/\d*\))|(\d+\.)|(\d)|(<br/>)|(\([a-z]+\-[a-z]+\))|(\([a-z]+\-[a-z]+\s[a-z]+\))|()|(•)|(gt)|(^lt)|(\d*/\d*)|(败)|(^I{1,3})') while(i<=len(content_buffer)): min = 4 if (len(content_buffer)-i >= 4) else len(content_buffer)-i tmp_cont = translator.translate_array(content_buffer[i:i+min], 'en') #tmp_obj = translator.translate_array(objective_buffer[i:i+min], 'en') j=0 for ce in course_essence_list[i:i+min]: ce.content_en = p.sub('', tmp_cont[j]['TranslatedText']) #ce.objective_en = tmp_obj[j]['TranslatedText'] #ce.save() j+=1 i+=5 i=0 while(i<=len(objective_buffer)): min = 4 if (len(objective_buffer)-i >= 4) else len(objective_buffer)-i #tmp_cont = translator.translate_array(content_buffer[i:i+min], 'en') tmp_obj = translator.translate_array(objective_buffer[i:i+min], 'en') j=0 for ce in course_essence_list[i:i+min]: #ce.content_en = tmp_cont[j]['TranslatedText'] ce.objective_en = p.sub('', tmp_obj[j]['TranslatedText']) ce.save() j+=1 i+=5 de.course_essence_list = course_essence_list de.save() return course_essence_list
class MicrosoftTranslator(AbstractTranslator): name = 'microsoft' def __init__(self): super(MicrosoftTranslator, self).__init__() if self.options is not None: client_id = self.options.get('client_id') client_secret = self.options.get('client_secret') if client_id is None or client_secret is None: raise ValueError( "Misconfigured application. If you use the Microsoft Translator, provide a client_id and a client_secret" ) self.client = MSTranslator(client_id=client_id, client_secret=client_secret) else: self.client = None self._languages = None @property def languages(self): if self._languages is not None: return self._languages if self.client is None: self._languages = [] try: self._languages = self.client.get_languages() except MSTranslatorApiException: return [] except Exception: return [] return self._languages def _translate(self, texts, language, origin_language='en'): """ [ 'Hello' ], 'es' => { 'Hello' : 'Hola' } """ if self.client is None: return {} if language not in self.languages: return {} slices = [ # the size of a slice can't be over 10k characters in theory (we try to keep them under 5k in practice) # [ element1, element2, element3 ...] [], ] current_slice = slices[0] for text in texts: current_slice.append(text) if len(u''.join(current_slice).encode('utf8')) > 2000: current_slice = [] slices.append(current_slice) app.logger.debug("Texts splitted in {} slices".format(len(slices))) for pos, slice in enumerate(slices): app.logger.debug(" slice: {}: {} characters".format( pos, len(''.join(slice).encode('utf8')))) ms_translations = [] errors = False for current_slice in slices: if current_slice: app.logger.debug( "Translating %r to %r using Microsoft Translator API" % (current_slice, language)) try: current_ms_translations = self.client.translate_array( texts=current_slice, to_lang=language, from_lang=origin_language) except (MSTranslatorApiException, ArgumentOutOfRangeException, ValueError, Exception) as e: traceback.print_exc() app.logger.warn( "Error translating using Microsoft Translator API: %s" % e, exc_info=True) errors = True continue else: ms_translations.extend(list(current_ms_translations)) app.logger.debug( "Translated %s sentences using Microsoft Translator API" % len(current_ms_translations)) if errors and not ms_translations: return {} translations = {} for text, translation in zip(texts, ms_translations): translated_text = translation.get('TranslatedText') if translated_text: translations[text] = translated_text return translations
common_lang1_words = [ line.strip() for line in codecs.open(common_lang1_words_filename, encoding="utf_8").readlines() ] lang2_translations = [] translator = Translator(app_name, app_secret) for startIdx in (np.array(range(int(len(common_lang1_words) / 100))) * 100): print >> sys.stderr, startIdx endIdx = startIdx + 100 curr_words = common_lang1_words[startIdx:endIdx] try: curr_translations = translator.translate_array(curr_words, lang2_code) lang2_translations.append(curr_translations) except Exception, e: print >> sys.stderr, "Failed:", startIdx print >> sys.stderr, e sys.exit(1) translated_words = [ trans["TranslatedText"] for trans in reduce(lambda list1, list2: list1 + list2, lang2_translations) ] output_file = codecs.open(output_filename, 'w', encoding="utf_8") # Generate translation pairings, but only for one-to-one word mappings: for word_idx in range(len(translated_words)):
def get_translated_text(en_word_list, target_language, client_id, api_key): translator = Translator(client_id, api_key) result = translator.translate_array(en_word_list, target_language) word_list = [i['TranslatedText'] for i in result] word_list.insert(0, target_language) return word_list