Esempio n. 1
0
class MicrosoftTranslator(AbstractTranslator):
    name = "microsoft"

    def __init__(self):
        super(MicrosoftTranslator, self).__init__()
        if self.options is not None:
            client_id = self.options.get("client_id")
            client_secret = self.options.get("client_secret")
            if client_id is None or client_secret is None:
                raise ValueError(
                    "Misconfigured application. If you use the Microsoft Translator, provide a client_id and a client_secret"
                )
            self.client = MSTranslator(client_id=client_id, client_secret=client_secret)
        else:
            self.client = None

        self._languages = None

    @property
    def languages(self):
        if self._languages is not None:
            return self._languages
        if self.client is None:
            self._languages = []
        try:
            self._languages = self.client.get_languages()
        except MSTranslatorApiException:
            return []
        return self._languages

    def _translate(self, texts, language, origin_language="en"):
        """ [ 'Hello' ], 'es' => { 'Hello' : 'Hola' } """
        if self.client is None:
            return {}

        if language not in self.languages:
            return {}

        app.logger.debug("Translating %r to %r using Microsoft Translator API" % (texts, language))
        try:
            ms_translations = self.client.translate_array(texts=texts, to_lang=language, from_lang=origin_language)
        except MSTranslatorApiException as e:
            traceback.print_exc()
            app.logger.warn("Error translating using Microsoft Translator API: %s" % e, exc_info=True)
            return {}

        app.logger.debug("Translated %s sentences using Microsoft Translator API" % len(ms_translations))

        translations = {}
        for text, translation in zip(texts, ms_translations):
            translated_text = translation.get("TranslatedText")
            if translated_text:
                translations[text] = translated_text

        return translations
Esempio n. 2
0
class BingTranslator:
    def __init__(self):
        client_id = os.environ.get("BING_TRANSLATION_CLIENT_ID", "gigaware123")
        client_secret = os.environ.get("BING_TRANSLATION_SECRET", "Dp3afp41sR/sDsKRK3uWPt2i4WbAKqKB5q6RhCI9a1Q=")

        if not client_id or not client_secret:
            raise Exception("bing translation client id or client secret not found")

        self.translator = Translator(client_id, client_secret)

    def translate(self, untranslated, target_language):
        return self.translator.translate(untranslated, target_language, from_lang='en')

    def translate_all(self, untranslated, target_language):
        return self.translator.translate_array(untranslated, target_language, from_lang='en')
Esempio n. 3
0
def translate():
    if not request.json:
        abort(make_response(jsonify(message="Bad zapros"), 400))
    # sanitize html
    data = bleach.clean(request.json['text'])
    if len(data) > 400:
        abort(make_response(jsonify(message="Slishkom long stroka"), 400))
    splitted_original = re.split('(\W+)', data, flags=re.UNICODE)
    # transliterate flag. if true - the world will be transliterated
    transliterate_flag = True
    word_regex = re.compile('\w+', re.U)
    # array indexes of the words that will be really translated
    # used to put the translated words back in their places
    to_translate_indexes = []
    # array of the words that will be translated
    to_translate = []
    for i in range(0, len(splitted_original)):
        # if is's a word, not a comma, or space or whatever
        if re.match(word_regex, splitted_original[i]):
            # transliterate it or push in array for translation
            if transliterate_flag:
                splitted_original[i] = translit(splitted_original[i],
                                                'ru',
                                                reversed=True)
            else:
                to_translate_indexes.append(i)
                to_translate.append(splitted_original[i])
            transliterate_flag = not transliterate_flag
    translated = [{}]
    try:
        # translate the words
        translator = Translator(app.config['TRANSLATOR_ID'],
                                app.config['TRANSLATOR_SECRET'])
        translated = translator.translate_array(to_translate, 'en', 'ru')
    except Exception as e:
        abort(make_response(jsonify(message="Try again popozhe"), 500))
    # put the translated words back using the to_translate_indexes array
    for z in range(0, len(to_translate_indexes)):
        t = translated[z]['TranslatedText']
        splitted_original[to_translate_indexes[z]] = t
    res = ''.join(splitted_original)
    return jsonify(translation=res, status=200)
Esempio n. 4
0
class MicrosoftTranslator(AbstractTranslator):
    name = 'microsoft'

    def __init__(self):
        super(MicrosoftTranslator, self).__init__()
        if self.options is not None:
            client_id = self.options.get('client_id')
            client_secret = self.options.get('client_secret')
            if client_id is None or client_secret is None:
                raise ValueError("Misconfigured application. If you use the Microsoft Translator, provide a client_id and a client_secret")
            self.client = MSTranslator(client_id = client_id, client_secret = client_secret)
        else:
            self.client = None

        self._languages = None

    @property
    def languages(self):
        if self._languages is not None:
            return self._languages
        if self.client is None:
            self._languages = []
        try:
            self._languages = self.client.get_languages()
        except MSTranslatorApiException:
            return []
        except Exception:
            return []
        return self._languages

    def _translate(self, texts, language, origin_language = 'en'):
        """ [ 'Hello' ], 'es' => { 'Hello' : 'Hola' } """
        if self.client is None:
            return {}

        if language not in self.languages:
            return {}
        
        slices = [
            # the size of a slice can't be over 10k characters in theory (we try to keep them under 5k in practice)
            # [ element1, element2, element3 ...]
            [],
        ]
        current_slice = slices[0]

        for text in texts:
            current_slice.append(text)
            if len(u''.join(current_slice).encode('utf8')) > 2000:
                current_slice = []
                slices.append(current_slice)

        app.logger.debug("Texts splitted in {} slices".format(len(slices)))
        for pos, slice in enumerate(slices):
            app.logger.debug("  slice: {}: {} characters".format(pos, len(''.join(slice).encode('utf8'))))
        
        ms_translations = []
        errors = False
        for current_slice in slices:
            if current_slice:
                app.logger.debug("Translating %r to %r using Microsoft Translator API" % (current_slice, language))
                try:
                    current_ms_translations = self.client.translate_array(texts = current_slice, to_lang = language, from_lang = origin_language)
                except (MSTranslatorApiException, ArgumentOutOfRangeException, ValueError, Exception) as e:
                    traceback.print_exc()
                    app.logger.warn("Error translating using Microsoft Translator API: %s" % e, exc_info = True)
                    errors = True
                    continue
                else:
                    ms_translations.extend(list(current_ms_translations))
                    app.logger.debug("Translated %s sentences using Microsoft Translator API" % len(current_ms_translations))

        if errors and not ms_translations:
            return {}
        
        translations = {}
        for text, translation in zip(texts, ms_translations):
            translated_text = translation.get('TranslatedText')
            if translated_text:
                translations[text] = translated_text
        
        return translations
Esempio n. 5
0
def crawlCourseEssence(course_list):
	url = "https://course.ncu.edu.tw/Course/main/query/byKeywords?"
	#ex: https://course.ncu.edu.tw/Course/main/query/byKeywords?serialNo=11001&outline=11001&semester=1031
	course_essence_list = []
	de = DepEssence()
	objective_buffer = []
	ob = 0
	content_buffer = []
	content_ch_buffer = []
	cb = 0
	translator = Translator('21KRtranslator', '1VYijs8FLyy7wmD/x1KsSWficJPiH61jywgGBM5m+iA=')
	for i, c in enumerate(course_list):
		if (i==0):
			de.id = str(c.category.id)
			de.category = c.category
			de.course_tree = c.leaf_of
			course_essence_list = []
		params = urllib.urlencode({'serialNo': c.serial_no, 'outline': c.serial_no, 'semester': c.semester})
		#dom = requests.get(url=url, params=params, headers={'Cookie': 'JSESSIONID=7257F09EDF368A37341694B4A4D7B72E', 'Accept-Language': 'zh-tw,zh;q=0.8,en-us;q=0.5,en;q=0.3', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:31.0) Gecko/20100101 Firefox/31.0'})
		dom = pq(url=url+params, headers={'Cookie': 'JSESSIONID=7257F09EDF368A37341694B4A4D7B72E', 'Accept-Language': 'zh-tw,zh;q=0.8,en-us;q=0.5,en;q=0.3', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:31.0) Gecko/20100101 Firefox/31.0'})
		raw = dom('script').text().encode('utf_8').decode('unicode_escape').split('\';')[0].replace('var JData = \'', '')
		data = json.loads(raw)
		try:
			if(data['msg']==u'notfound'):continue
		except KeyError:
			ce = CourseEssence()
			ce.id = c.id
			ce.course = c
			ce.course_tree = c.leaf_of
			ce.category = c.category
			ce.objective = unicode(data['courseObject']).replace(' ', ' ')
			ce.content = unicode(data['courseContent']).replace(' ', ' ')
			objective_buffer.append(ce.objective)
			content_buffer.append(ce.content)
			#return translator.translate_array(content_buffer, 'zh-CHT')
			ce.ability_list = []
			
			for m in data['courseMap']:
				if(m['strength']==u'N/A'):continue
				ability = CoreAbility(ability=unicode(m['core']), rating=m['strength'][1:2], evaluation=unicode(m['testType'])[0:-1].split(','))
				ce.ability_list.append(ability)

			course_essence_list.append(ce)
		
	
	i=0
	p = re.compile(ur'(\(\d*/\d*\))|(\d+\.)|(\d)|(<br/>)|(\([a-z]+\-[a-z]+\))|(\([a-z]+\-[a-z]+\s[a-z]+\))|()|(•)|(gt)|(^lt)|(\d*/\d*)|(败)|(^I{1,3})')
	while(i<=len(content_buffer)):
		min = 4 if (len(content_buffer)-i >= 4) else len(content_buffer)-i
		tmp_cont = translator.translate_array(content_buffer[i:i+min], 'en')
		#tmp_obj = translator.translate_array(objective_buffer[i:i+min], 'en')
		j=0
		for ce in course_essence_list[i:i+min]:
			ce.content_en = p.sub('', tmp_cont[j]['TranslatedText'])
			#ce.objective_en = tmp_obj[j]['TranslatedText']
			#ce.save()
			j+=1
		i+=5

	i=0
	while(i<=len(objective_buffer)):
		min = 4 if (len(objective_buffer)-i >= 4) else len(objective_buffer)-i
		#tmp_cont = translator.translate_array(content_buffer[i:i+min], 'en')
		tmp_obj = translator.translate_array(objective_buffer[i:i+min], 'en')
		j=0
		for ce in course_essence_list[i:i+min]:
			#ce.content_en = tmp_cont[j]['TranslatedText']
			ce.objective_en = p.sub('', tmp_obj[j]['TranslatedText'])
			ce.save()
			j+=1
		i+=5
	de.course_essence_list = course_essence_list
	de.save()
	return course_essence_list
Esempio n. 6
0
class MicrosoftTranslator(AbstractTranslator):
    name = 'microsoft'

    def __init__(self):
        super(MicrosoftTranslator, self).__init__()
        if self.options is not None:
            client_id = self.options.get('client_id')
            client_secret = self.options.get('client_secret')
            if client_id is None or client_secret is None:
                raise ValueError(
                    "Misconfigured application. If you use the Microsoft Translator, provide a client_id and a client_secret"
                )
            self.client = MSTranslator(client_id=client_id,
                                       client_secret=client_secret)
        else:
            self.client = None

        self._languages = None

    @property
    def languages(self):
        if self._languages is not None:
            return self._languages
        if self.client is None:
            self._languages = []
        try:
            self._languages = self.client.get_languages()
        except MSTranslatorApiException:
            return []
        except Exception:
            return []
        return self._languages

    def _translate(self, texts, language, origin_language='en'):
        """ [ 'Hello' ], 'es' => { 'Hello' : 'Hola' } """
        if self.client is None:
            return {}

        if language not in self.languages:
            return {}

        slices = [
            # the size of a slice can't be over 10k characters in theory (we try to keep them under 5k in practice)
            # [ element1, element2, element3 ...]
            [],
        ]
        current_slice = slices[0]

        for text in texts:
            current_slice.append(text)
            if len(u''.join(current_slice).encode('utf8')) > 2000:
                current_slice = []
                slices.append(current_slice)

        app.logger.debug("Texts splitted in {} slices".format(len(slices)))
        for pos, slice in enumerate(slices):
            app.logger.debug("  slice: {}: {} characters".format(
                pos, len(''.join(slice).encode('utf8'))))

        ms_translations = []
        errors = False
        for current_slice in slices:
            if current_slice:
                app.logger.debug(
                    "Translating %r to %r using Microsoft Translator API" %
                    (current_slice, language))
                try:
                    current_ms_translations = self.client.translate_array(
                        texts=current_slice,
                        to_lang=language,
                        from_lang=origin_language)
                except (MSTranslatorApiException, ArgumentOutOfRangeException,
                        ValueError, Exception) as e:
                    traceback.print_exc()
                    app.logger.warn(
                        "Error translating using Microsoft Translator API: %s"
                        % e,
                        exc_info=True)
                    errors = True
                    continue
                else:
                    ms_translations.extend(list(current_ms_translations))
                    app.logger.debug(
                        "Translated %s sentences using Microsoft Translator API"
                        % len(current_ms_translations))

        if errors and not ms_translations:
            return {}

        translations = {}
        for text, translation in zip(texts, ms_translations):
            translated_text = translation.get('TranslatedText')
            if translated_text:
                translations[text] = translated_text

        return translations
common_lang1_words = [
    line.strip() for line in codecs.open(common_lang1_words_filename,
                                         encoding="utf_8").readlines()
]

lang2_translations = []

translator = Translator(app_name, app_secret)

for startIdx in (np.array(range(int(len(common_lang1_words) / 100))) * 100):
    print >> sys.stderr, startIdx
    endIdx = startIdx + 100
    curr_words = common_lang1_words[startIdx:endIdx]
    try:
        curr_translations = translator.translate_array(curr_words, lang2_code)
        lang2_translations.append(curr_translations)
    except Exception, e:
        print >> sys.stderr, "Failed:", startIdx
        print >> sys.stderr, e
        sys.exit(1)

translated_words = [
    trans["TranslatedText"]
    for trans in reduce(lambda list1, list2: list1 + list2, lang2_translations)
]

output_file = codecs.open(output_filename, 'w', encoding="utf_8")

# Generate translation pairings, but only for one-to-one word mappings:
for word_idx in range(len(translated_words)):
Esempio n. 8
0
def get_translated_text(en_word_list, target_language, client_id, api_key):
    translator = Translator(client_id, api_key)
    result = translator.translate_array(en_word_list, target_language)
    word_list = [i['TranslatedText'] for i in result]
    word_list.insert(0, target_language)
    return word_list