def test_minimize_context(self): from zeeguu_api.api.translate_and_bookmark import minimize_context ctx = "Onderhandelaars ChristenUnie praten over positie h**o-ouders" from_lang_code = "nl" word_str = "Onderhandelaars" assert (minimize_context(ctx, from_lang_code, word_str))
def test_get_translation_where_gslobe_fails_but_translate_succeeds( self, mock_get_all_translations): self.data["context"] = 'Die klein Jäger' self.data["word"] = "krassen" MIN_CONTEXT, self.data["query"] = minimize_context( self.data["context"], self.data["from_lang_code"], self.data["word"]) mock_get_all_translations.return_value = (MockTranslator({ "klein": ["small"], "krassen": ["big", "extreme"] }).translate(self.data["query"])) form_data = dict(url=self.data["url"], context=self.data["context"], word=self.data["word"]) with patch("zeeguu_api.api.translate_and_bookmark.minimize_context" ) as (mock_minimize_context): mock_minimize_context.return_value = (MIN_CONTEXT, self.data["query"]) alternatives = self.json_from_api_post(self.api_endpoint, form_data) mock_minimize_context.assert_called_once_with( self.data["context"], self.data["from_lang_code"], self.data["word"]) mock_get_all_translations.assert_called_once_with(self.data) first_alternative = alternatives['translations'][0] assert first_alternative is not None second_alternative = alternatives['translations'][1] assert second_alternative is not None
def test_get_possible_translations2(self, mock_get_all_translations): CONTEXT = ("Da sich nicht eindeutig erkennen lässt, " "ob Emojis Männer oder eben doch womöglich " "glatzköpfig Frauen darstellen,") WORD = "glatzköpfig" TITLE = "lala" FROM_LANG_CODE = "de" TO_LANG_CODE = "en" API_ENDPOINT = "/get_possible_translations/%s/%s" % ( FROM_LANG_CODE, TO_LANG_CODE) MIN_CONTEXT, QUERY = minimize_context(CONTEXT, FROM_LANG_CODE, WORD) mock_get_all_translations.return_value = ( MockTranslator({"glatzköpfig": ["gifpökztalg"]}).translate(QUERY)) with patch( "zeeguu_api.api.translate_and_bookmark.minimize_context") as ( mock_minimize_context): mock_minimize_context.return_value = (MIN_CONTEXT, QUERY) translations = self.json_from_api_post( API_ENDPOINT, dict(context=CONTEXT, url=VERY_EASY_STORY_URL, word=WORD, title=TITLE)) mock_minimize_context.assert_called_once_with( CONTEXT, FROM_LANG_CODE, WORD) data = {"from_lang_code": FROM_LANG_CODE, "to_lang_code": TO_LANG_CODE, "context": CONTEXT, "url": VERY_EASY_STORY_URL, "word": WORD, "title": TITLE, "query": QUERY} mock_get_all_translations.assert_called_once_with(data) assert "gifpökztalg" in str(translations)
def test_get_possible_translations(self, mock_get_all_translations): self.data["context"] = 'Die klein Jäger' self.data["word"] = "klein" MIN_CONTEXT, self.data["query"] = minimize_context( self.data["context"], self.data["from_lang_code"], self.data["word"]) mock_get_all_translations.return_value = ( MockTranslator({"klein": ["small", "little"], "krassen": ["big"]}).translate(self.data["query"])) form_data = dict( url=self.data["url"], context=self.data["context"], word=self.data["word"]) with patch( "zeeguu_api.api.translate_and_bookmark.minimize_context") as ( mock_minimize_context): mock_minimize_context.return_value = ( MIN_CONTEXT, self.data["query"]) alternatives = self.json_from_api_post(self.api_endpoint, form_data) mock_minimize_context.assert_called_once_with( self.data["context"], self.data["from_lang_code"], self.data["word"]) first_alternative = alternatives['translations'][0] second_alternative = alternatives['translations'][1] mock_get_all_translations.assert_called_once_with(self.data) assert first_alternative is not None assert second_alternative is not None assert (first_alternative["likelihood"] >= second_alternative["likelihood"])
def test_get_possible_translations(self, mock_get_all_translations): CONTEXT = "das ist sehr schon" WORD = "schon" TITLE = "lala" FROM_LANG_CODE = "de" TO_LANG_CODE = "en" API_ENDPOINT = "/get_possible_translations/%s/%s" % ( FROM_LANG_CODE, TO_LANG_CODE) MIN_CONTEXT, QUERY = minimize_context(CONTEXT, FROM_LANG_CODE, WORD) mock_get_all_translations.return_value = ( MockTranslator({"schon": ["nice"]}).translate(QUERY)) with patch( "zeeguu_api.api.translate_and_bookmark.minimize_context") as ( mock_minimize_context): mock_minimize_context.return_value = (MIN_CONTEXT, QUERY) translations = self.json_from_api_post( API_ENDPOINT, dict(context=CONTEXT, url=VERY_EASY_STORY_URL, word=WORD, title=TITLE)) mock_minimize_context.assert_called_once_with( CONTEXT, FROM_LANG_CODE, WORD) data = {"from_lang_code": FROM_LANG_CODE, "to_lang_code": TO_LANG_CODE, "context": CONTEXT, "url": VERY_EASY_STORY_URL, "word": WORD, "title": TITLE, "query": QUERY} mock_get_all_translations.assert_called_once_with(data) assert "nice" in str(translations)
def test_get_possible_translations(self, mock_get_all_translations): self.data["context"] = "Die klein Jäger" self.data["word"] = "klein" MIN_CONTEXT, self.data["query"] = minimize_context( self.data["context"], self.data["from_lang_code"], self.data["word"]) mock_get_all_translations.return_value = MockTranslator({ "klein": ["small", "little"], "krassen": ["big"] }).translate(self.data["query"]) form_data = dict(url=self.data["url"], context=self.data["context"], word=self.data["word"]) with patch("zeeguu_api.api.translate_and_bookmark.minimize_context" ) as (mock_minimize_context): mock_minimize_context.return_value = (MIN_CONTEXT, self.data["query"]) alternatives = self.json_from_api_post(self.api_endpoint, form_data) mock_minimize_context.assert_called_once_with( self.data["context"], self.data["from_lang_code"], self.data["word"]) first_alternative = alternatives["translations"][0] second_alternative = alternatives["translations"][1] mock_get_all_translations.assert_called_once_with(self.data) assert first_alternative is not None assert second_alternative is not None assert first_alternative["likelihood"] >= second_alternative[ "likelihood"]
def translate_and_bookmark(from_lang_code, to_lang_code): """ @deprecated This should be deprecated and /get_possible_translations used instead However, it is still used by the zeeguu chrome extension. This expects in the post parameter the following: - word (to translate) - context (surrounding paragraph of the original word ) - url (of the origin) - title (of the origin page) /get_possible_translations has very similar behavior, only that if focuses on returning the possible alternative translations :param from_lang_code: :param to_lang_code: :return: """ data = {"from_lang_code": from_lang_code, "to_lang_code": to_lang_code} word_str = unquote_plus(request.form['word']) data["word"] = word_str url_str = request.form.get('url', '') data["url"] = url_str title_str = request.form.get('title', '') data["title"] = title_str context_str = request.form.get('context', '') data["context"] = context_str # the url comes from elsewhere not from the reader, so we find or creat the article article = Article.find_or_create(db_session, url_str) article_id = article.id try: minimal_context, query = minimize_context( data["context"], data["from_lang_code"], data["word"]) data["query"] = query translations = get_all_translations(data).translations best_guess = translations[0]["translation"] bookmark = Bookmark.find_or_create(db_session, flask.g.user, word_str, from_lang_code, best_guess, to_lang_code, minimal_context, url_str, title_str, article_id) except ValueError as e: zeeguu_core.log(f"minimize context failed {e}on: {context_str} x {from_lang_code} x {word_str} ") return context_str, query return json_result(dict( bookmark_id=bookmark.id, translation=best_guess))
def get_possible_translations(from_lang_code, to_lang_code): """ Returns a list of possible translations in :param to_lang_code for :param word in :param from_lang_code. You must also specify the :param context, :param url, and :param title of the page where the word was found. The context is the sentence. :return: json array with translations """ data = {"from_lang_code": from_lang_code, "to_lang_code": to_lang_code} data["context"] = request.form.get('context', '') url = request.form.get('url', '') data["url"] = url article_id = None if 'articleID' in url: article_id = url.split('articleID=')[-1] url = Article.query.filter_by(id=article_id).one().url.as_canonical_string() elif 'articleURL' in url: url = url.split('articleURL=')[-1] else: # the url comes from elsewhere not from the reader, so we find or creat the article article = Article.find_or_create(db_session, url) article_id = article.id zeeguu_core.log(f"url before being saved: {url}") word_str = request.form['word'] data["word"] = word_str title_str = request.form.get('title', '') data["title"] = title_str zeeguu_core.log(f'translating to... {data["to_lang_code"]}') minimal_context, query = minimize_context( data["context"], data["from_lang_code"], data["word"]) zeeguu_core.log(f"Query to translate is: {query}") data["query"] = query translations = get_all_translations(data).translations zeeguu_core.log(f"Got translations: {translations}") # translators talk about quality, but our users expect likelihood. # rename the key in the dictionary for t in translations: t['likelihood'] = t.pop("quality") t['source'] = t.pop('service_name') best_guess = translations[0]["translation"] Bookmark.find_or_create(db_session, flask.g.user, word_str, from_lang_code, best_guess, to_lang_code, minimal_context, url, title_str, article_id) return json_result(dict(translations=translations))
def contribute_translation(from_lang_code, to_lang_code): """ User contributes a translation they think is appropriate for a given :param word in :param from_lang_code in a given :param context The :param translation is in :param to_lang_code Together with the two words and the textual context, you must submit also the :param url, :param title of the page where the original word and context occurred. :return: in case of success, the bookmark_id and main translation """ # All these POST params are mandatory word_str = unquote_plus(request.form['word']) translation_str = request.form['translation'] url = request.form.get('url', '') context_str = request.form.get('context', '') title_str = request.form.get('title', '') # when a translation is added by hand, the servicename_translation is None # thus we set it to MANUAL service_name = request.form.get('servicename_translation', 'MANUAL') article_id = None if 'articleID' in url: article_id = url.split('articleID=')[-1] url = Article.query.filter_by(id=article_id).one().url.as_canonical_string() elif 'articleURL' in url: url = url.split('articleURL=')[-1] else: # the url comes from elsewhere not from the reader, so we find or creat the article article = Article.find_or_create(db_session, url) article_id = article.id # Optional POST param selected_from_predefined_choices = request.form.get('selected_from_predefined_choices', '') minimal_context, _ = minimize_context(context_str, from_lang_code, word_str) bookmark = Bookmark.find_or_create(db_session, flask.g.user, word_str, from_lang_code, translation_str, to_lang_code, minimal_context, url, title_str, article_id) # Inform apimux about translation selection data = {"word_str": word_str, "translation_str": translation_str, "url": url, "context_size": len(context_str), "service_name": service_name} contribute_trans(data) return json_result(dict(bookmark_id=bookmark.id))
def test_get_possible_translations2(self, mock_get_all_translations): CONTEXT = ("Da sich nicht eindeutig erkennen lässt, " "ob Emojis Männer oder eben doch womöglich " "glatzköpfig Frauen darstellen,") WORD = "glatzköpfig" TITLE = "lala" FROM_LANG_CODE = "de" TO_LANG_CODE = "en" API_ENDPOINT = "/get_possible_translations/%s/%s" % ( FROM_LANG_CODE, TO_LANG_CODE, ) MIN_CONTEXT, QUERY = minimize_context(CONTEXT, FROM_LANG_CODE, WORD) mock_get_all_translations.return_value = MockTranslator({ "glatzköpfig": ["gifpökztalg"] }).translate(QUERY) with patch("zeeguu_api.api.translate_and_bookmark.minimize_context" ) as (mock_minimize_context): mock_minimize_context.return_value = (MIN_CONTEXT, QUERY) translations = self.json_from_api_post( API_ENDPOINT, dict(context=CONTEXT, url=VERY_EASY_STORY_URL, word=WORD, title=TITLE), ) mock_minimize_context.assert_called_once_with( CONTEXT, FROM_LANG_CODE, WORD) data = { "from_lang_code": FROM_LANG_CODE, "to_lang_code": TO_LANG_CODE, "context": CONTEXT, "url": VERY_EASY_STORY_URL, "word": WORD, "title": TITLE, "query": QUERY, } mock_get_all_translations.assert_called_once_with(data) assert "gifpökztalg" in str(translations)
def test_get_possible_translations(self, mock_get_all_translations): CONTEXT = "das ist sehr schon" WORD = "schon" TITLE = "lala" FROM_LANG_CODE = "de" TO_LANG_CODE = "en" API_ENDPOINT = "/get_possible_translations/%s/%s" % ( FROM_LANG_CODE, TO_LANG_CODE, ) MIN_CONTEXT, QUERY = minimize_context(CONTEXT, FROM_LANG_CODE, WORD) mock_get_all_translations.return_value = MockTranslator({ "schon": ["nice"] }).translate(QUERY) with patch("zeeguu_api.api.translate_and_bookmark.minimize_context" ) as (mock_minimize_context): mock_minimize_context.return_value = (MIN_CONTEXT, QUERY) translations = self.json_from_api_post( API_ENDPOINT, dict(context=CONTEXT, url=VERY_EASY_STORY_URL, word=WORD, title=TITLE), ) mock_minimize_context.assert_called_once_with( CONTEXT, FROM_LANG_CODE, WORD) data = { "from_lang_code": FROM_LANG_CODE, "to_lang_code": TO_LANG_CODE, "context": CONTEXT, "url": VERY_EASY_STORY_URL, "word": WORD, "title": TITLE, "query": QUERY, } mock_get_all_translations.assert_called_once_with(data) assert "nice" in str(translations)
def get_one_translation(from_lang_code, to_lang_code): """ Addressing some of the problems with the get_next_translations... - it should be separated in get_first and get_alternatives - alternatively it can be get one and get all To think about: - it would also make sense to separate translation from logging; or at least, allow for situations where a translation is not associated with an url... or? :return: json array with translations """ word_str = request.form["word"] url = request.form.get("url") title_str = request.form.get("title", "") context = request.form.get("context", "") minimal_context, query = minimize_context(context, from_lang_code, word_str) translation = own_translation( flask.g.user, word_str, from_lang_code, to_lang_code, minimal_context ) if translation: return json_result(dict(translations=translation)) translations = get_next_results( { "from_lang_code": from_lang_code, "to_lang_code": to_lang_code, "url": request.form.get("url"), "word": word_str, "title": title_str, "query": query, "context": minimal_context, }, number_of_results=1, ).translations # do we really need this? # translators talk about quality, but our users expect likelihood. # rename the key in the dictionary for t in translations: t["likelihood"] = t.pop("quality") t["source"] = t["service_name"] article_id = None if "article?id=" in url: article_id = url.split("article?id=")[-1] url = Article.query.filter_by(id=article_id).one().url.as_canonical_string() else: # the url comes from elsewhere not from the reader, so we find or creat the article article = Article.find_or_create(db_session, url) article_id = article.id if len(translations) > 0: best_guess = translations[0]["translation"] Bookmark.find_or_create( db_session, flask.g.user, word_str, from_lang_code, best_guess, to_lang_code, minimal_context, url, title_str, article_id, ) return json_result(dict(translations=translations))
def get_next_translations(from_lang_code, to_lang_code): """ Returns a list of possible translations in :param to_lang_code for :param word in :param from_lang_code. You must also specify the :param context, :param url, and :param title of the page where the word was found. The context is the sentence. :return: json array with translations """ data = {"from_lang_code": from_lang_code, "to_lang_code": to_lang_code} data["context"] = request.form.get("context", "") url = request.form.get("url", "") number_of_results = int(request.form.get("numberOfResults", -1)) service_name = request.form.get("service", "") exclude_services = [] if service_name == "" else [service_name] currentTranslation = request.form.get("currentTranslation", "") exclude_results = [] if currentTranslation == "" else [currentTranslation.lower()] data["url"] = url article_id = request.form.get("articleID", None) if article_id == None: if "articleID" in url: article_id = url.split("articleID=")[-1] url = Article.query.filter_by(id=article_id).one().url.as_canonical_string() elif "articleURL" in url: url = url.split("articleURL=")[-1] else: # the url comes from elsewhere not from the reader, so we find or creat the article article = Article.find_or_create(db_session, url) article_id = article.id zeeguu_core.log(f"url before being saved: {url}") word_str = request.form["word"] data["word"] = word_str title_str = request.form.get("title", "") data["title"] = title_str zeeguu_core.log(f'translating to... {data["to_lang_code"]}') minimal_context, query = minimize_context( data["context"], data["from_lang_code"], data["word"] ) zeeguu_core.log(f"Query to translate is: {query}") data["query"] = query first_call_for_this_word = len(exclude_services) == 0 if first_call_for_this_word: translations = own_or_crowdsourced_translation( flask.g.user, word_str, from_lang_code, to_lang_code, minimal_context ) if translations: return json_result(dict(translations=translations)) translations = get_next_results( data, exclude_services=exclude_services, exclude_results=exclude_results, number_of_results=number_of_results, ).translations # translators talk about quality, but our users expect likelihood. # rename the key in the dictionary for t in translations: t["likelihood"] = t.pop("quality") t["source"] = t["service_name"] if len(translations) > 0 and first_call_for_this_word: best_guess = translations[0]["translation"] Bookmark.find_or_create( db_session, flask.g.user, word_str, from_lang_code, best_guess, to_lang_code, minimal_context, url, title_str, article_id, ) return json_result(dict(translations=translations))
def test_translate_and_bookmark(self, mock_get_all_translations): self.data["context"] = 'Die klein Jäger' self.data["word"] = "Die" MIN_CONTEXT, self.data["query"] = minimize_context( self.data["context"], self.data["from_lang_code"], self.data["word"]) new_data = self.data.copy() new_data["word"] = "kleine" NEW_MIN_CONTEXT, new_data["query"] = minimize_context( new_data["context"], new_data["from_lang_code"], new_data["word"]) mock_translator = MockTranslator({"Die": ["The"], "kleine": ["small"]}) new_mock_translator = MockTranslator({ "Die": ["The"], "kleine": ["small"] }) # bookmark1 call will modify the object, we need to return a copy of # the same object for bookmark2 call mock_get_all_translations.side_effect = [ mock_translator.translate(self.data["query"]), mock_translator.translate(self.data["query"]), new_mock_translator.translate(new_data["query"]) ] form_data = dict(url=self.data["url"], context=self.data["context"], word=self.data["word"]) with patch("zeeguu_api.api.translate_and_bookmark.minimize_context" ) as (mock_minimize_context): mock_minimize_context.return_value = (MIN_CONTEXT, self.data["query"]) bookmark1 = self.json_from_api_post( '/translate_and_bookmark/%s/%s' % (self.data["from_lang_code"], self.data["to_lang_code"]), form_data) bookmark2 = self.json_from_api_post( '/translate_and_bookmark/%s/%s' % (self.data["from_lang_code"], self.data["to_lang_code"]), form_data) calls = [ call(self.data["context"], self.data["from_lang_code"], self.data["word"]) ] * 2 mock_minimize_context.assert_has_calls(calls) assert (bookmark1["bookmark_id"] == bookmark2["bookmark_id"]) form_data["word"] = new_data["word"] with patch("zeeguu_api.api.translate_and_bookmark.minimize_context" ) as (mock_minimize_context): mock_minimize_context.return_value = (NEW_MIN_CONTEXT, new_data["query"]) bookmark3 = self.json_from_api_post( '/translate_and_bookmark/%s/%s' % (self.data["from_lang_code"], self.data["to_lang_code"]), form_data) mock_minimize_context.assert_called_once_with( new_data["context"], new_data["from_lang_code"], new_data["word"]) calls = [call(self.data), call(self.data), call(new_data)] mock_get_all_translations.assert_has_calls(calls) self.assertTrue(bookmark3['translation'] == 'small')
def test_translate_and_bookmark(self, mock_get_all_translations): self.data["context"] = 'Die klein Jäger' self.data["word"] = "Die" MIN_CONTEXT, self.data["query"] = minimize_context( self.data["context"], self.data["from_lang_code"], self.data["word"]) new_data = self.data.copy() new_data["word"] = "kleine" NEW_MIN_CONTEXT, new_data["query"] = minimize_context( new_data["context"], new_data["from_lang_code"], new_data["word"]) mock_translator = MockTranslator({"Die": ["The"], "kleine": ["small"]}) new_mock_translator = MockTranslator( {"Die": ["The"], "kleine": ["small"]}) # bookmark1 call will modify the object, we need to return a copy of # the same object for bookmark2 call mock_get_all_translations.side_effect = [ mock_translator.translate(self.data["query"]), mock_translator.translate(self.data["query"]), new_mock_translator.translate(new_data["query"])] form_data = dict( url=self.data["url"], context=self.data["context"], word=self.data["word"]) with patch( "zeeguu_api.api.translate_and_bookmark.minimize_context") as ( mock_minimize_context): mock_minimize_context.return_value = ( MIN_CONTEXT, self.data["query"]) bookmark1 = self.json_from_api_post( '/translate_and_bookmark/%s/%s' % ( self.data["from_lang_code"], self.data["to_lang_code"]), form_data) bookmark2 = self.json_from_api_post( '/translate_and_bookmark/%s/%s' % ( self.data["from_lang_code"], self.data["to_lang_code"]), form_data) calls = [call(self.data["context"], self.data["from_lang_code"], self.data["word"])] * 2 mock_minimize_context.assert_has_calls(calls) assert (bookmark1["bookmark_id"] == bookmark2["bookmark_id"]) form_data["word"] = new_data["word"] with patch( "zeeguu_api.api.translate_and_bookmark.minimize_context") as ( mock_minimize_context): mock_minimize_context.return_value = ( NEW_MIN_CONTEXT, new_data["query"]) bookmark3 = self.json_from_api_post( '/translate_and_bookmark/%s/%s' % ( self.data["from_lang_code"], self.data["to_lang_code"]), form_data) mock_minimize_context.assert_called_once_with( new_data["context"], new_data["from_lang_code"], new_data["word"]) calls = [call(self.data), call(self.data), call(new_data)] mock_get_all_translations.assert_has_calls(calls) self.assertTrue(bookmark3['translation'] == 'small')