def detect_language(source): """ Detects language of source text """ t = Translator(settings.MS_TRANSLATOR_CLIENT_ID, settings.MS_TRANSLATOR_CLIENT_SECRET) bingcode = t.detect_lang(source) return Language.objects.get(bingcode=bingcode)
def translate(source, lang_from, lang_to): """ Translates source text from and to scpecified language """ lang_from_bingcode = lang_from.bingcode if lang_from else None t = Translator(settings.MS_TRANSLATOR_CLIENT_ID, settings.MS_TRANSLATOR_CLIENT_SECRET) return t.translate(source, lang_from_bingcode, lang_to.bingcode)
def btranslate(text_message, langfrom, langto): translator = Translator(client_id, client_secret) phrase_translated = translator.translate(text_message, lang_from=langfrom, lang_to=langto) #print phrase_translated return phrase_translated.encode('utf8')
def btranslate(text_message, langfrom, langto): client_id = config.get('microsoft', 'client_id') client_secret = config.get('microsoft', 'client_secret') translator = Translator(client_id, client_secret) phrase_translated = translator.translate( text_message, lang_from=langfrom, lang_to=langto) return phrase_translated.encode('utf8')
def makeFrenchQuery(myQuery): terms = parseQuery(myQuery) translator = Translator('myPythonTranslate64', 'YqNxIMdOAoaF5Am+/BT84sdQ1q7ZCNA1stU0viWcGi4=') frenchQuery = [] for term in terms: translate = translator.translate(term, lang_from='en', lang_to='fr') frenchQuery.append(translate) return frenchQuery
def btranslate(text_message, langfrom, langto): client_id = config.get('microsoft', 'client_id') client_secret = config.get('microsoft', 'client_secret') translator = Translator(client_id, client_secret) phrase_translated = translator.translate(text_message, lang_from=langfrom, lang_to=langto) return phrase_translated.encode('utf8')
def analyse_entry(self, entry, params): txt = entry['nif:isString'] lang = params.get("lang") lang_to = params.get("lang_to") key = params.get("key") print(params) translator = Translator(key) txttranslated = translator.translate(txt, lang_from=lang, lang_to=lang_to) entry['output'] = {"nif:isString": txttranslated} yield entry
def get_translation(to_translate): try: t = Translator('dorsalfunbot', 'lWJjt3W86DqQX5J+VGCDsvD3LU9/eZFvG0VQj4k6J/Y=') from_lang = t.detect_lang(to_translate) if from_lang == 'en': to_lang = 'fr' header = 'en -> fr:' else: to_lang = 'en' header = '{} -> en:'.format(from_lang) trans_text = t.translate(to_translate, lang_from=from_lang, lang_to=to_lang) return '{} {}'.format(header, trans_text) except Exception as e: print("Translation error: {}".format(e)) return None
def _checkTitle(title): res = "" try: translator = Translator(random.choice(key_choices)) res = translator.detect_lang([title]) # # # translator = Translator(random.choice(key_choices)) # # res = translate_text(title,'es',) # # res = translator.detect_langs([title]) # res = detect_language(title) # print(res) # res = TextBlob(title).detect_language() except (IndexError, ValueError): pass return res
def translate(texts: Union[str, List[str]], target_language: Lang = Lang.en) -> List[str]: """Microsoft translation implementation, see `brain.feature.translate`""" warnings.warn("Azure is not used as translation service anymore", DeprecationWarning, stacklevel=2) if not isinstance(texts, list): texts = [texts] # todo: reuse enum from language_detect translate_client = Translator(_KEY) translations = [ translate_client.translate(t, lang_to=target_language.name) for t in texts ] if not translations: raise Exception('no translations') return translations
def engine(): # Get question from user as text question = request.form['question'] #translate question translator = Translator('emad_punk123456', 'R0go6LNQEj3CVh7nhyHw/DLenWLuQNjyjdhnZ0okRGE=') translated_question = translator.translate(question, lang_from='ar', lang_to='en') lower_q = translated_question.lower() #unicode_question=unicode(question, "utf-8") # Send question to api for question analysis and generate query # this Online App is An example For apply Quepy Model on DBpedia # We use it only as a quick demo but we have alot we need to do .. if translated_question != "": url = "http://quepy.machinalis.com/engine/get_query?question=" + lower_q #gs.translate(text, 'de', 'en') # encoded_url = quote_plus(url.encode("utf-8")) # Get sparql query as json r = requests.get(url) j = r.json() # Select only the sparql code query = (j['queries'][0]['query']) #print (query) # Send sparql-query to SPARQLWrapper Model to run it on dbpedia sparql = SPARQLWrapper("http://dbpedia.org/sparql") sparql.setQuery(query) sparql.setReturnFormat(JSON) # Get results as json results = sparql.query().convert() #convert question string to array q2 = question.split(' ') #wiki_question = wikipedia.summary(question) # return json to answer page to view. return render_template('answer.html', query=query, question=lower_q, results=results, q2=q2, url=url)
def translate(self, textToTranslate, languageList): if type(textToTranslate) is not str: raise ValueError("The argument textToTranslate must be string") #to save our translation limit the api calls are commented. uncoment to enable translation translator = Translator( 'GeorgiKaradjov', 'Y2c414NBMQlVgVPZK7vmFT7WZ/DJ4sKRYsTxG9NAXlQ=') tempTranslation = textToTranslate languageFrom = Language.English for language in languageList: if issubclass(type(language), Language): tempTranslation = translator.translate( tempTranslation, lang_from=languageFrom.value, lang_to=language.value) languageFrom = language else: raise ValueError( "You must pass only valid values from Languages enum") tempTranslation = translator.translate(tempTranslation, lang_from=languageFrom.value, lang_to=Language.English.value) return tempTranslation
def run(self): response = '' try: if self.source == "gtranslateweb": user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7' host = 'https://translate.google.com/m' headers = { 'User-Agent': user_agent, } params = { 'sl': self.options['source_language'], 'tl': self.options['target_language'], 'q': self.options['source_text'] } html = requests.get(host, params=params, headers=headers) soup = BeautifulSoup(html.text, "lxml") for div in soup.find_all('div'): if div.has_attr('class'): if div['class'][0] == 't0': response = div.string elif self.source == "mstranslator": self.translator = Translator(self.mstranslate_key) response = self.translator.translate( self.options['source_text'], lang_from=self.options['source_language'], lang_to=self.options['target_language']) elif self.source == "yandex": self.translator = YandexTranslate(self.yandex_key) response = self.translator.translate( self.options['source_text'], self.options['source_language'] + "-" + self.options['target_language'])['text'][0] except Exception as e: response = '[Error] ' + str(e) if not self.aborted: self.finished.emit(self.source, response)
import json from mstranslator import Translator with open("all_translated_tweets.json") as f: d = json.load(f) f.close() def translate_tweets(translator, tweets): r_tweets = [] for tweet in tweets: r_tweets.append(translator.translate(tweet, lang_to='en')) return r_tweets translator = Translator('8a050fdaf6b74b22af7589f7261a3f0a') country = "Colombia" for tweets in d[country][1]: all_eng = True count = 0 eng_count = 0 for each in tweets: print(country, count, eng_count) if (translator.detect_lang(each) != 'en'): count += 1 if (count > 5): all_eng = False break else: eng_count += 1
import wikipedia from mstranslator import Translator translator = Translator("8dc97e03193b4674b160cfe2b07c8223") print(translator.translate('Привет, мир!', lang_from='ru', lang_to='en')) import wikipedia wikipedia.set_lang("en") mlen = wikipedia.page("machine learning") en_content = mlen.content wikipedia.set_lang("fr") #print(mlen.content) mlfr = wikipedia.page("machine learning") fr_content = mlfr.content #print(mlfr.content)
def translate_2(text, source_language_code, target_language_code): client_id = "irlab-pan-author-obfuscation-16" client_secret = "TWGKKcKQ/VoASe1EksnZBNNVA8mlThBsPQ/5z7Wqkkk=" translator = Translator(client_id, client_secret) translation = translator.translate( text, lang_from=source_language_code, lang_to=target_language_code) return translation
from nltk import FreqDist from nltk.tokenize import sent_tokenize, word_tokenize, RegexpTokenizer from nltk.corpus import stopwords from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter from pdfminer.converter import TextConverter from pdfminer.layout import LAParams from pdfminer.pdfpage import PDFPage from io import StringIO from shutil import move from collections import Counter from pdfminer.pdfparser import PDFParser from pdfminer.pdfdocument import PDFDocument dataPath = os.path.abspath(os.path.relpath('../data')) translator = Translator('KEY') class LanguageDetection(): def __init__(self): pass def _getNumPages(self, id): try: pdf_file = PyPDF2.PdfFileReader( open(os.path.join(dataPath, 'pdf/tocheck', str(id) + '.pdf'), 'rb')) return pdf_file.getNumPages() except: return ""
def setUp(self): self.translator = Translator(client_id, client_secret) self.translator_mock = TranslatorMock(client_id, client_secret)
import dicttoxml from flask import abort, jsonify, Response import json import time from tweet_tracker_api.job_management import job from mstranslator import Translator from tweet_tracker_api.MongoDBFacade import MongoDBFacade from flask import session import logging tweets = None ram_tweets = None translator = Translator('4+9RqJt9le3aEYoc6sfoYDgTkMy+xXVUL7g4U9Nrz6w=') def setup(collection, ram_collection): """ This function allows server.py to set up the collection. :param collection: The MongoDB collection object to get tweets from. """ global tweets global ram_tweets tweets = collection ram_tweets = ram_collection from bson import ObjectId from bson import json_util class JSONEncoder(json.JSONEncoder):
def request_image(window, keyword, num_of_try=0, translate=True): """ Queries Bing for images and retries up to 5 times if the randomly selected image could not be accessed :param keyword: string which specifies the image content :param num_of_try: internal parameter that increases if the selected image could not be retrieved (e.g. Forbidden Error) :param translate: Should the keyword be translated to english before the search? (may increase result size) :return: The image data in bytes """ if keyword is None: return None if translate: ms_key = open('../ms.key').read() trans = Translator('__RealTimeStoryIllustrator__', ms_key) translatedkw = trans.translate(keyword, lang_from='de', lang_to='en') print("IMAGE SERVICE: Getting image for " + str(keyword) + ". Searched for the english translation '" + str(translatedkw) + "'.") else: translatedkw = keyword print("IMAGE SERVICE: Getting image for " + str(keyword) + ".") if num_of_try > 5: # no images were found logger.error("IMAGE SERVICE: Could not find an image after 5 tries for " + str(translatedkw) + ".") return None # OLD CODE FOR SEARCHING BEGIN # term = urllib.parse.quote_plus(translatedkw) # sites = [line.rstrip() for line in # open(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', 'sites.txt'), # encoding="utf-8")] # excludedsites = "" # for site in sites: # excludedsites = excludedsites + "-site:" + urllib.parse.quote_plus(site) + '%20' # img_type = '%7Eillustration+AND+clipart' # opener = urllib.request.build_opener() # opener.addheaders = [('User-agent', 'Mozilla/5.0')] # url = ('http://ajax.googleapis.com/ajax/services/search/images?' + # 'v=1.0&q=' + term + '%20' + img_type + '%20' + excludedsites + '%20&userip=91.141.0.105' + # '&rsz=8&imgsz=medium&safe=active' + '&tbs=ic:color') # OLD CODE FOR SEARCHING END try: params = {'$format': 'json', '$top': 10, 'ImageFilters': '\'Size:Small\''} bing_key = open('../bing.key').read() api = BingSearchAPI(bing_key) result = api.search_image(str(translatedkw+'+AND+(illustration+OR+clipart)'), params) amount = len(result.json()['d']['results']) # print(json.dumps(result.json(), sort_keys=True, indent=2)) # print(result.json()) # print(result.json()['d']['results'][0]['MediaUrl']) img_num = random.randint(0, amount-1) data = urllib.request.urlopen(result.json()['d']['results'][img_num]['MediaUrl'], timeout=2).read() return data except Exception as e: # have to catch everything since socket exceptions seem to be broken print("ERROR in IMAGE SERVICE: Trying again, request was denied "+str(e)) return request_image(window, keyword, num_of_try + 1, translate=translate)
import os import csv from mstranslator import Translator CLIENT_ID = 'NLP' CLIENT_SECRET = '7I2PWW9SJDRNj72bIjTqh7xOn7eke+rPNDx94JUKJEA=' translator = Translator('NLP', CLIENT_SECRET) files_in_dir = os.listdir("Data/Raw/") for datum in files_in_dir: print "Translating " + datum o = open("Data/Translated/"+datum,'w') g = open("Data/Raw/"+datum,'r') f = csv.reader(g, delimiter=',') allposts = [] writer = csv.writer(o) for line in f: allposts.append(translator.translate(line[0].decode("utf-8"), lang_from='hi', lang_to='en')) allposts = [[row] for row in allposts] writer.writerows(allposts) # o.write(gs.translate(line,'hi')) o.close() g.close() print "Translation complete"
from langdetect import detect from mongodb.connection import getMongoConnection import config import sys from mstranslator import Translator translator = Translator('c64707b4ecb74bd7b4a78ead5fa7b708') def translate(company_name): conn = getMongoConnection() db = conn[config.Mongo_DB_NAME] print db print "something yar" while db[company_name].find({"translated": True}).count() > 0: job = db[company_name].find_one({"translated": True}) lan = detect(job['snippet']) if str(lan) != 'en': for key in job: if key == "city" or key == "snippet": job[key] = translator.translate(job[key], lang_to="en") db[company_name].update({"url": job["url"]}, {"$set": { key: job[key] }}, upsert=False, multi=False) if key == "job_summary": flag = 0 try: job[key] = translator.translate(unicode( job[key], "utf-8"),
def run(self, options): #self.mutex.lock() self.running = True suggestions_html = '<table border="0.5" cellspacing="0" cellpadding="2" width="100%" style="border-color:gray;">' if self.tm_source_segments_cache is None: self.refresh_tm() if not self.running: return if self.tm_source_segments_cache: matching_segments = db_op.get_translation_memory(self.tm_path, self.tm_source_segments_cache, options['target_language'], options['source_text'], 60) else: matching_segments = [] if not self.running: return if options['context']: suggestions_html += '<tr>' suggestions_html += '<td valign="middle"><img src="images/code_white_24dp.svg"></td>' suggestions_html += '<td><font color="gray">Occurrences (first 4):</font>' for index, occurrence in enumerate(options['context']): if index > 3: break suggestions_html += '<br>' + occurrence[0] + ':' + occurrence[1] suggestions_html += '</td></tr>' if options['previous_text']: if options['previous_text'] != '': suggestions_html += '<tr>' suggestions_html += '<td valign="middle"><img src="images/undo_white_24dp.svg"></td>' suggestions_html += '<td><font color="gray">Previous text:</font><br>' suggestions_html += html.escape(options['previous_text']) suggestions_html += '</td></tr>' for index, row in enumerate(matching_segments): suggestions_html += '<tr>' suggestions_html += '<td valign="middle"><img src="images/storage_white_24dp.svg"></td>' suggestions_html += '<td><font color="gray">TM match (' + str(row[0]) + '%):</font><br>' suggestions_html += html.escape(row[1]) suggestions_html += '<br><font color="gray">Translated text:</font><br>' suggestions_html += html.escape(row[2]) suggestions_html += '</td></tr>' if index + 1 >= self.limit: break if not self.running: return suggestions_html += '</table>' self.finished.emit(suggestions_html) #Machine translation settings = QtCore.QSettings("Babelruins.org", "BlackCAT") translator = Translator(settings.value('plugins_mstranslate_api_key', "")) try: mst_response = translator.translate(options['source_text'], options['source_language'], options['target_language']) if mst_response: #suggestions_html += '<table border="0.5" cellspacing="0" cellpadding="2" width="100%" style="border-color:gray;">' suggestions_html = suggestions_html[:-8] suggestions_html += '<tr>' suggestions_html += '<td valign="middle"><img src="images/computer_white_24dp.svg"></td>' suggestions_html += '<td><font color="gray">Microsoft Translate:</font><br>' suggestions_html += html.escape(mst_response) suggestions_html += '</td></tr>' suggestions_html += '</table>' #suggestions_html.replace('</table>', mt_html) if not self.running: return self.finished.emit(suggestions_html) except Exception as e: print(str(e))
def dict_mapping(word, key=key_user): translator = Translator(key) return translator.translate(word, lang_from='en', lang_to='th')
import goslate import sqlite3 conn = sqlite3.connect('../database/twitter.sqlite') cur = conn.cursor() from mstranslator import Translator translator = Translator('XXXXXXXXXXXX') #cur.executescript('ALTER TABLE followers ADD loc_ind INTEGER;') #cur.execute('UPDATE followers SET loc_ind = 0') while True: cur.execute('SELECT id FROM t2 WHERE loc_ind <> 1') try: set = cur.fetchone()[0] except: print("All text has been translated") break cur.execute('SELECT location FROM t2 WHERE id = ?', (set, )) loc = cur.fetchone()[0] try: val = translator.translate(loc, lang_to='en') cur.execute('UPDATE t2 SET location = ? WHERE id = ?', (val, set)) cur.execute('UPDATE t2 SET loc_ind = 1 WHERE id = ?', (set, )) conn.commit() except: cur.execute('UPDATE t2 SET location = ? WHERE id = ?', (val, set)) cur.execute('UPDATE t2 SET loc_ind = 1 WHERE id = ?', (set, )) conn.commit() cur.close()
def languageDetection(): # sql = 'select id from resolved_papers where downloaded = 1 and npages >= 5 and pdf2text = 1 and english = 0 and id in (12, 70, 74, 77, 92, 108, 110, 111, 113, 127, 128, 129, 133, 136, 145, 149, 151, 189, 210, 223, 238, 247, 253, 276, 287, 289, 291, 292, 303, 308, 345, 346, 347, 349, 350, 351, 354, 355, 359, 360, 361, 362, 363, 364, 365, 368, 377, 381, 389, 393, 395, 406, 414, 424, 439, 446, 448, 549, 554, 558, 574, 577, 578, 579, 581, 582, 583, 585, 588, 589, 591, 592, 595, 597, 601, 604, 605, 609, 613, 621, 625, 682, 684, 712, 713, 714, 715, 716, 717, 719, 722, 723, 724, 726, 730, 731, 732, 734, 735, 738, 739, 740, 743, 749, 751, 752, 753, 754, 755, 758, 765, 782, 787, 816, 822, 830, 836, 851, 857, 860, 861, 869, 882, 970, 1044, 1045, 1047, 1050, 1052, 1055, 1056, 1057, 1058, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1068, 1069, 1072, 1073, 1074, 1075, 1076, 1079, 1080, 1083, 1084, 1086, 1087, 1089, 1094, 1100, 1104, 1105, 1106, 1115, 1116, 1117, 1122, 1124, 1125, 1126, 1131, 1133, 1142, 1143, 1146, 1150, 1151, 1172, 1174, 1176, 1184, 1194, 1248, 1283, 1301, 1307, 1309, 1367, 1381, 1417, 1419, 1452, 1456, 1482, 1491, 1507, 1511, 1513, 1522, 1542, 1562, 1585, 1587, 1591, 1624, 1626, 1628, 1652, 1687, 1688, 1689, 1692, 1693, 1694, 1696, 1698, 1699, 1701, 1704, 1710, 1711, 1714, 1716, 1719, 1720, 1727, 1728, 1730, 1745, 1750, 1751, 1755, 1757, 1770, 1809, 1815, 1820, 1831, 1835, 1872, 1884, 1887, 1898, 1935, 1955, 1993, 2009, 2025, 2026, 2029, 2030, 2031, 2199, 2241, 2244, 2246, 2275, 2276, 2277, 2278, 2279, 2305, 2323, 2324, 2325, 2327, 2328, 2347, 2360, 2402, 2404, 2410, 2415, 2442, 2448, 2450, 2451, 2452, 2461, 2462, 2467, 2477, 2509, 2510, 2512, 2513, 2518, 2522, 2524, 2531, 2543, 2547, 2554, 2555, 2576, 2577, 2578, 2579, 2580, 2583, 2586, 2605, 2609, 2624, 2629, 2646, 2651, 2652, 2653, 2655, 2656, 2659, 2661, 2662, 2671, 2676, 2677, 2756, 2757, 2758, 2760, 2761, 2762, 2768, 2771, 2772, 2773, 2774, 2776, 2777, 2781, 2782, 2783, 2786, 2789, 2790, 2791, 2792, 2793, 2794, 2795, 2798, 2811, 2815, 2822, 2869, 2884, 2907, 2913, 2920, 2924, 3029, 3127, 3141, 3146, 3172, 3173, 3174, 3175, 3176, 3177, 3178, 3180, 3182, 3183, 3184, 3185, 3189, 3192, 3194, 3198, 3199, 3202, 3203, 3207, 3208, 3211, 3223, 3224, 3230, 3236, 3252, 3253, 3262, 3275, 3302, 3305, 3316, 3365, 3388, 3389, 3391, 3392, 3396, 3397, 3398, 3399, 3400, 3401, 3402, 3405, 3406, 3408, 3409, 3412, 3415, 3416, 3418, 3419, 3420, 3421, 3422, 3423, 3424, 3425, 3426, 3427, 3428, 3431, 3432, 3433, 3436, 3438, 3439, 3443, 3444, 3445, 3446, 3450, 3452, 3455, 3456, 3458, 3461, 3466, 3467, 3470, 3503, 3526, 3532, 3536, 3538, 3541, 3542, 3543, 3549, 3563, 3573, 3597, 3598, 3620, 3626, 3662, 3819, 3921, 3922, 3923, 3925, 3927, 3931, 3932, 3933, 3934, 3935, 3936, 3937, 3938, 3939, 3940, 3942, 3943, 3944, 3945, 3948, 3950, 3952, 3953, 3954, 3955, 3957, 3958, 3959, 3960, 3961, 3963, 3965, 3966, 3967, 3968, 3971, 3972, 3980, 3988, 3995, 4000, 4005, 4011, 4039, 4043, 4046, 4048, 4050, 4059, 4077, 4086, 4089, 4098, 4101, 4104, 4109, 4111, 4123, 4127, 4170, 4184, 4203, 4215, 4221, 4235, 4287, 4295, 4345, 4362, 4367, 4448, 4449, 4451, 4452, 4453, 4454, 4455, 4457, 4458, 4459, 4460, 4461, 4462, 4463, 4464, 4465, 4466, 4467, 4468, 4469, 4470, 4472, 4478, 4480, 4481, 4482, 4496, 4500, 4504, 4508, 4513, 4518, 4523, 4524, 4548, 4551, 4567, 4572, 4598, 4607, 4608, 4611, 4657, 4786, 4788, 4789, 4791, 4792, 4793, 4794, 4795, 4796, 4797, 4798, 4799, 4804, 4805, 4811, 4815, 4817, 4819, 4829, 4839, 4840, 5037, 5038, 5040, 5047, 5179, 5191, 5192, 5210, 5248, 5249, 5266, 5275, 5276, 5322, 5323, 5327, 5330, 5362, 5410, 5411, 5416, 5451, 5462, 5493, 5494, 5496, 5519, 5536, 5548, 5555, 5587, 5588, 5589, 5590, 5591, 5594, 5599, 5617, 5633, 5636, 5660, 5667, 5695, 5697, 5701, 5702, 5706, 5767, 5768, 5769, 5773, 5778, 5786, 5831, 5832, 5833, 5835, 5836, 5837, 5839, 5844, 5849, 5850, 5858, 5860, 5889, 5901, 5915, 5916, 5918, 5920, 5991, 5992, 5993, 5994, 5995, 6009, 6045, 6079, 6080, 6081, 6083, 6084, 6085, 6086, 6087, 6100, 6101, 6107, 6185, 6249, 6278, 6279, 6280, 6281, 6282, 6283, 6285, 6305, 6306, 6387, 6393, 6396, 6397, 6398, 6411, 6439, 6498, 6505, 6511, 6513, 6518, 6520, 6524, 6525, 6526, 6527, 6532, 6543, 6553, 6555, 6565, 6566, 6569, 6573, 6574, 6581, 6585, 6601, 6605, 6606, 6612, 6615, 6617, 6621, 6645, 6646, 6648, 6651, 6652, 6658, 6660, 6667, 6672, 6676, 6682, 6684, 6688, 6690, 6692, 6693, 6700, 6704, 6743, 6769, 6771, 6772, 6775, 6778, 6783, 6785, 6789, 6793, 6818, 6824, 6829, 6830, 6834, 6839, 6845, 6846, 6849, 6850, 6855, 6859, 6866, 6873, 6878, 6887, 6888, 6889, 6890, 6907, 6926, 6945, 6948, 6954, 6963, 7006, 7066, 7082, 7102, 7121, 7162, 7163, 7271, 7272, 7273, 7285, 7314, 7315, 7350, 7362, 7364, 7398, 7441, 7442, 7443, 7444, 7446, 7451, 7454, 7456, 7462, 7464, 7504, 7515, 7516, 7547, 7548, 7634, 7659, 7660, 7661, 7662, 7663, 7664, 7665, 7672, 7776, 7777, 7783, 7784, 7788, 7789, 7792, 7795, 7797, 7798, 7799, 7809, 7831, 7889, 7917, 7918, 7920, 7926, 7930, 7932, 7933, 7935, 7936, 7941, 7944, 7960, 7962, 7971, 8008, 8017, 8070, 8075, 8076, 8110, 8111, 8112, 8117, 8120, 8128, 8129, 8130, 8133, 8136, 8140, 8143, 8144, 8145, 8148, 8149, 8150, 8153, 8154, 8159, 8163, 8203, 8225, 8268, 8270, 8302, 8310, 8312, 8419, 8421, 8496, 8497, 8498, 8500, 8505, 8506, 8507, 8508, 8510, 8513, 8517, 8533, 8543, 8584, 8710, 8717, 8718, 8719, 8720, 8721, 8722, 8724, 8726, 8730, 8732, 8733, 8734, 8737, 8739, 8740, 8741, 8742, 8743, 8744, 8745, 8747, 8748, 8750, 8751, 8752, 8753, 8754, 8755, 8756, 8757, 8759, 8761, 8764, 8766, 8768, 8769, 8773, 8774, 8775, 8784, 8811, 8817, 9042, 9056, 9207, 9219, 9240, 9249, 9273, 9318, 9322, 9422, 9457, 9485, 9562, 9623, 9647, 9836, 9837, 9922, 10067, 10068, 10069, 10168, 10185, 10288, 10400, 10401, 10513, 10515, 10606, 10700, 10702, 10703, 10771, 10772, 10819, 10821, 10927, 11019, 11056, 11113, 11142, 11143, 11225, 11226, 11227, 11343, 11361, 11362, 11364, 11377, 11448, 11460, 11461, 11462, 11463, 11465, 11466, 11468, 11493, 11609, 11610, 11611, 11617, 11638, 11659, 11718, 11748, 11749, 11750, 11751, 11762, 11821, 11850, 11891, 11898, 11911, 11913, 11914, 11915, 11916, 11917, 11918, 11919, 11920, 11921, 11922, 11923, 11926, 11928, 11934, 11955, 11980, 12026, 12030, 12044, 12092, 12093, 12094, 12095, 12096, 12098, 12100, 12101, 12102, 12103, 12104, 12105, 12106, 12107, 12108, 12109, 12110, 12111, 12112, 12113, 12114, 12122, 12123, 12125, 12144, 12147, 12234, 12235, 12237, 12256, 12305, 12339, 12346, 12407, 12448, 12511, 12665, 12705, 12706, 12708, 12709, 12710, 12711, 12712, 12713, 12714, 12716, 12717, 12718, 12719, 12720, 12721, 12722, 12725, 12729, 12742, 12753, 12762, 12802, 12813, 12816, 12821, 12823, 12843, 12856, 12905, 12907, 13006, 13061, 13062, 13063, 13137, 13138, 13198, 13329, 13330, 13331, 13332, 13494, 13495, 13582, 13583, 13584, 13585, 13586, 13697, 13833, 13834, 13835, 13836, 13837, 13840, 14160, 14161, 14200, 14341, 14342, 14343, 14590, 14591, 14597, 14610, 14614, 14631, 14632, 14633, 14634, 14635, 14650, 14655, 14656, 14689, 14726, 14777, 14870, 14871, 14872, 14921, 14922, 14923, 14991, 14992, 14993, 14994, 14995, 15136, 15137, 15138, 15139, 15140, 15141, 15142, 15143, 15152, 15216, 15265, 15277, 15387, 15388, 15483, 15546, 15550, 15587, 15590, 15623, 15641, 15653, 15711, 15712, 15730, 15743, 15763, 15794, 15805, 15821, 15831, 15884, 15932, 16039, 16122, 16124, 16153, 16175, 16181, 16220, 16233, 16264, 16277, 16306, 16361, 16377, 16391, 16392, 16393, 16402, 16404, 16431, 16439, 16440, 16444, 16447, 16448, 16455, 16457, 16463, 16468, 16513, 16524, 16528, 16551, 16569, 16594, 16596, 16600, 16610, 16647, 16648, 16718, 16731, 16763, 16765, 16794, 16795, 16899, 16948, 16962, 16993, 16998, 17011, 17013, 17034, 17061, 17062, 17141, 17142, 17143, 17144, 17155, 17158, 17248, 17262, 17263, 17264, 17265, 17266, 17333, 17334, 17335, 17395, 17396, 17398, 17400, 17401, 17405, 17410, 17412, 17417, 17420, 17431, 17547, 17584, 17585, 17587, 17599, 17674, 17676, 17677, 17679, 17711, 17719, 17749, 17750, 17751, 17752, 17753, 17754, 17756, 17757, 17811, 17812, 17814, 17948, 17963, 17964, 17965, 17989, 17998, 18083, 18139, 18145, 18165, 18229, 18230, 18257, 18264, 18273, 18321, 18322, 18323, 18351, 18515, 18548, 18599, 18600, 18623, 18637, 18675, 18676, 18687, 18698, 18736, 18753, 18768, 18792, 18794, 18797, 18823, 18828, 18830, 18850, 18851, 18853, 18854, 18857, 18882, 18885, 18886, 18887, 18888, 18891, 18892, 18893, 18894, 18898, 18901, 18904, 18930, 18947, 18967, 18968, 18970, 18972, 18973, 18974, 18976, 18977, 18980, 18982, 18983, 18984, 18985, 18986, 18991, 19006, 19059, 19060, 19061, 19062, 19064, 19066, 19067, 19069, 19071, 19103, 19104, 19110, 19116, 19153, 19180, 19181, 19186, 19263, 19272, 19273, 19280, 19318, 19409, 19425, 19428, 19456, 19528, 19531, 19538, 19606, 19607, 19609, 19610, 19612, 19613, 19616, 19623, 19636, 19647, 19648, 19685, 19798, 19799, 19800, 19801, 19802, 19805, 19806, 19807, 19808, 19811, 19812, 19813, 19816, 19820, 19821, 19836, 19874, 19875, 19878, 19960, 19985, 20051, 20052, 20053, 20054, 20055, 20056, 20057, 20058, 20059, 20061, 20062, 20063, 20064, 20065, 20066, 20069, 20070, 20071, 20072, 20074, 20078, 20079, 20081, 20084, 20088, 20090, 20110, 20156, 20157, 20168, 20189, 20193, 20245, 20344, 20345, 20346, 20347, 20348, 20349, 20350, 20353, 20354, 20355, 20356, 20357, 20358, 20359, 20360, 20361, 20362, 20363, 20365, 20368, 20370, 20371, 20373, 20374, 20377, 20391, 20392, 20396, 20398, 20400, 20444, 20476, 20520, 20682, 20685, 20687, 20688, 20689, 20690, 20691, 20692, 20693, 20694, 20695, 20698, 20699, 20700, 20701, 20702, 20703, 20707, 20709, 20714, 20728, 20760, 20774, 20864, 20865, 20866, 20867, 20868, 20869, 20870, 20872, 20874, 20899, 20909, 20962, 21041, 21042, 21117, 21118, 21121, 21139, 21146, 21227, 21271, 21272, 21273, 21274, 21275, 21425, 21430, 21493, 21505, 21507, 21510, 21513, 21612, 21616, 21621, 21622, 21623, 21624, 21667, 21675, 21751, 21765, 21766, 21767, 21846, 21847, 21856, 21857, 21858, 21871, 21872, 21873, 21875, 21876, 21877, 21881, 21883, 21885, 21924, 21925, 21957, 21977, 21978, 21979, 21980, 21984, 21985, 21993, 21997, 21999, 22001, 22031, 22033, 22082, 22113, 22175, 22228, 22247, 22271, 22272, 22371, 22374, 22462, 22463, 22613, 22694, 22695, 22696, 22697, 22700, 22880, 22881, 22882, 22883, 22884, 22901, 22977, 22978, 22979, 22981, 23030, 23032, 23191, 23230, 23236, 23238, 23291, 23340, 23453, 23552, 23553, 23744, 23761, 23774, 24016, 24025, 24037, 24085, 24090, 24096, 24125, 24126, 24128, 24129, 24130, 24132, 24133, 24140, 24141, 24142, 24145, 24150, 24151, 24152, 24153, 24155, 24168, 24169, 24170, 24171, 24172, 24173, 24174, 24181, 24186, 24187, 24189, 24190, 24192, 24193, 24206, 24207, 24208, 24209, 24210, 24211, 24212, 24213, 24214, 24239, 24243, 24244, 24246, 24247, 24249, 24250, 24251, 24252, 24253, 24254, 24255, 24256, 24257, 24258, 24261, 24290, 24297, 24298, 24299, 24300, 24301, 24302, 24303, 24304, 24305, 24307, 24308, 24315, 24326, 24330, 24334, 24335, 24336, 24350, 24364, 24365, 24366, 24367, 24368, 24371, 24372, 24390, 24391, 24393, 24405, 24406, 24408, 24411, 24412, 24413, 24415, 24438, 24439, 24440, 24473, 24474, 24476, 24477, 24478, 24479, 24480, 24481, 24483, 24484, 24485, 24486, 24487, 24520, 24522, 24523, 24524, 24525, 24526, 24527, 24528, 24529, 24530, 24531, 24532, 24533, 24535, 24536, 24537, 24540, 24541, 24542, 24543, 24544, 24545, 24546, 24547, 24549, 24550, 24576, 24586, 24621, 24622, 24623, 24624, 24625, 24626, 24627, 24628, 24629, 24630, 24631, 24632, 24633, 24634, 24635, 24636, 24637, 24638, 24639, 24640, 24641, 24642, 24644, 24645, 24646, 24647, 24648, 24651, 24652, 24653, 24654, 24655, 24656, 24657, 24712, 24713, 24714, 24715, 24716, 24717, 24719, 24720, 24721, 24722, 24723, 24724, 24731, 24775, 24795, 24812, 24831, 24833, 24835, 24836, 24845, 24846, 24851, 24869, 24877, 24888, 24889, 24907, 24926, 24952, 25091, 25169, 25177, 25178, 25195, 25206, 25247, 25248, 25251, 25267, 25340, 25345, 25455, 25456, 25460, 25464, 25754, 25822, 25845, 25865, 25890, 25891, 25893, 25914, 25975, 25976, 25978, 25980, 25982, 25986, 25996, 26003, 26074, 26112, 26143, 26172, 26182, 26183, 26186, 26194, 26202, 26283, 26284, 26287, 26289, 26293, 26303, 26316, 26320, 26322, 26463, 26465, 26467, 26469, 26476, 26481, 26486, 26489, 26497, 26596, 26663, 26678, 26717, 27136, 27183, 27307, 27340, 27341, 27342, 27344, 27348, 27355, 27607, 27608, 27609, 27610, 27623, 27635, 27641, 27922, 27937, 28165, 28263, 28277, 28422, 28433, 28437, 28508, 28738, 28739, 28740, 28743, 28748, 28820, 28990, 28993, 28997, 29008, 29009, 29010, 29011, 29079, 29084, 29090, 29093, 29101, 29102, 29104, 29105, 29106, 29112, 29113, 29114, 29119, 29120, 29122, 29123, 29124, 29125, 29129, 29130, 29133, 29134, 29135, 29137, 29139, 29146, 29147, 29172, 29174, 29176, 29184, 29191, 29192, 29194, 29200, 29201, 29203, 29221, 29224, 29225, 29226, 29232, 29234, 29258, 29265, 29268, 29273, 29274, 29275, 29276, 29277, 29278, 29280, 29281, 29282, 29300, 29301, 29302, 29310, 29313, 29314, 29315, 29316, 29320, 29382, 29435, 29436, 29454, 29457, 29458, 29468, 29469, 29470, 29473, 29475, 29476, 29477, 29481, 29482, 29483, 29485, 29500, 29501, 29503, 29504, 29505, 29508, 29513, 29515, 29524, 29532, 29533, 29534, 29535, 29537, 29549, 29553, 29556, 29561, 29574, 29618, 29634, 29635, 29637, 29639, 29665, 29666, 29668, 29669, 29672, 29682, 29693, 29709, 29710, 29711, 29717, 29741, 29742, 29746, 29747, 29752, 29753, 29755, 29756, 29759, 29804, 29805, 29832, 29998, 30003, 30005, 30006, 30007, 30009, 30019, 30025, 30040, 30074, 30075, 30077, 30078, 30080, 30082, 30083, 30084, 30290, 30291, 30293, 30349, 30350, 30351, 30352, 30353, 30354, 30358, 30376, 30392, 30424, 30426, 30589, 30590, 30591, 30613, 30614, 30615, 30616, 30617, 30619, 30627, 30628, 30647, 30954, 30958, 30985, 30986, 31316, 31317, 31331, 31334, 31336, 31357, 31358, 31359, 31360, 31497, 31501, 31502, 31503, 31504, 31526, 31527, 31528, 31882, 31883, 31884, 31890, 31891, 31892, 31893, 31894, 31929, 31966, 31970, 32153, 32498, 32520, 32583, 32618, 32683, 32769, 32780, 32788, 32847, 32848, 32857, 32872, 33058, 33148, 33153, 33255, 33275, 33279, 33300, 33513, 33519, 33520, 33521, 33522, 33524, 33525, 33527, 33528, 33534, 33578, 33579, 33580, 33581, 33582, 33584, 33585, 33586, 33587, 33589, 33591, 33593, 33594, 33599, 33600, 33602, 33619, 33634, 33655, 33753, 33845, 33846, 33866, 33868, 33869, 33871, 33873, 33883, 33888, 33890, 33891, 33907, 33926, 33931, 33933, 33934, 33936, 33972, 33973, 33978, 33987, 33988, 33989, 33990, 33991, 33992, 33993, 33997, 33998, 34000, 34001, 34007, 34015, 34050, 34058, 34081, 34082, 34085, 34086, 34089, 34091, 34092, 34095, 34260, 34265, 34293, 34294, 34295, 34296, 34297, 34309, 34315, 34316, 34320, 34346, 34399, 34419, 34461, 34462, 34463, 34464, 34465, 34469, 34503, 34527, 34590, 34816, 34827, 34845, 34846, 34849, 34852, 34853, 34863, 34941, 34971, 35015, 35020, 35134, 35136, 35144, 35156, 35206, 35221, 35264, 35285, 35292, 35294, 35295, 35296, 35299, 35300, 35301, 35309, 35311, 35315, 35321, 35323, 35324, 35328, 35329, 35330, 35331, 35332, 35342, 35343, 35347, 35351, 35356, 35357, 35386, 35415, 35428, 35440, 35459, 35467, 35471, 35474, 35529, 35562, 35575, 35634, 35637, 35646, 35655, 35663, 35691, 35704, 35732, 35733, 35744, 35835, 35853, 35881, 35884, 35887, 35889, 35893, 35894, 35896, 35897, 35898, 35899, 35900, 35901, 35902, 35907, 35909, 35910, 35917, 35918, 35920, 35921, 35923, 35926, 35928, 35929, 35930, 35939, 35941, 35943, 35944, 35948, 35949, 35950, 35951, 35953, 35954, 35957, 35979, 35997, 35998, 36000, 36018, 36021, 36023, 36089, 36093, 36098, 36099, 36102, 36105, 36111, 36136, 36154, 36172, 36173, 36175, 36193, 36200, 36210, 36223, 36225, 36226, 36229, 36230, 36233, 36239, 36240, 36241, 36242, 36244, 36246, 36247, 36248, 36249, 36258, 36264, 36267, 36269, 36370, 36433, 36437, 36469, 36479, 36480, 36481, 36504, 36515, 36520, 36521, 36529, 36530, 36550, 36584, 36599, 36600, 36608, 36614, 36666, 36674, 36685, 36707, 36717, 36736, 36743, 36756, 36760, 36775, 36784, 36785, 36787, 36804, 36830, 36843, 36844, 36850, 36854, 36860, 36870, 36874, 36875, 36876, 36877, 36879, 36952, 36958, 36979, 36980, 36991, 36996, 37050, 37051, 37058, 37092, 37093, 37111, 37117, 37120, 37123, 37137, 37142, 37147, 37148, 37149, 37150, 37151, 37152, 37170, 37176, 37187, 37190, 37192, 37193, 37198, 37201, 37205, 37209, 37217, 37221, 37226, 37227, 37231, 37242, 37244, 37255, 37266, 37319, 37324, 37352, 37365, 37375, 37415, 37429, 37448, 37450, 37452, 37495, 37518, 37519, 37569, 37570, 37572, 37573, 37576, 37597, 37608, 37627, 37676, 37677, 37735, 37743, 37748, 37749, 37750, 37751, 37756, 37758, 37766, 37767, 37792, 37801, 37805, 37807, 37808, 37812, 37828, 37834, 37835, 37838, 37840, 37841, 37842, 37843, 37844, 37845, 37846, 37849, 37850, 37852, 37854, 37863, 37866, 37873, 37877, 37880, 37881, 37883, 37897, 37900, 37908, 37927, 37996, 38008, 38081, 38085, 38091, 38092, 38161, 38183, 38187, 38195, 38200, 38282, 38292, 38300, 38302, 38303, 38309, 38314, 38316, 38317, 38321, 38360, 38368, 38374, 38382, 38398, 38399, 38402, 38403, 38410, 38411, 38420, 38429, 38431, 38439, 38452, 38464, 38467, 38483, 38499, 38500, 38514, 38515, 38530, 38533, 38547, 38548, 38556, 38558, 38559, 38560, 38561, 38563, 38564, 38565, 38566, 38567, 38568, 38569, 38571, 38574, 38575, 38578, 38619, 38635);' sql = 'select id from resolved_papers where downloaded = 1 and npages >= 5 and pdf2text = 1 and english = 0 and id in (12, 70, 74, 77, 92, 108, 110, 111, 113, 127, 128, 129, 133, 136, 145, 149, 151, 189, 210, 223, 238, 247, 253, 276, 287, 289, 291, 292, 303, 308, 345, 346, 347, 349, 350, 351, 354, 355, 359, 360, 361, 362, 363, 364, 365, 368, 377, 381, 389, 393, 395, 406, 414, 424, 439, 446, 448, 549, 554, 558, 574, 577, 578, 579, 581, 582, 583, 585, 588, 589, 591, 592, 595, 597, 601, 604, 605, 609, 613, 621, 625, 682, 684, 712, 713, 714, 715, 716, 717, 719, 722, 723, 724, 726, 730, 731, 732, 734, 735, 738, 739, 740, 743, 749, 751, 752, 753, 754, 755, 758, 765, 782, 787, 816, 822, 830, 836, 851, 857, 860, 861, 869, 882, 970, 1044, 1045, 1047, 1050, 1052, 1055, 1056, 1057, 1058, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1068, 1069, 1072, 1073, 1074, 1075, 1076, 1079, 1080, 1083, 1084, 1086, 1087, 1089, 1094, 1100, 1104, 1105, 1106, 1115, 1116, 1117, 1122, 1124, 1125, 1126, 1131, 1133, 1142, 1143, 1146, 1150, 1151, 1172, 1174, 1176, 1184, 1194, 1248, 1283, 1301, 1307, 1309, 1367, 1381, 1417, 1419, 1452, 1456, 1482, 1491, 1507, 1511, 1513, 1522, 1542, 1562, 1585, 1587, 1591, 1624, 1626, 1628, 1652, 1687, 1688, 1689, 1692, 1693, 1694, 1696, 1698, 1699, 1701, 1704, 1710, 1711, 1714, 1716, 1719, 1720, 1727, 1728, 1730, 1745, 1750, 1751, 1755, 1757, 1770, 1809, 1815, 1820, 1831, 1835, 1872, 1884, 1887, 1898, 1935, 1955, 1993, 2009, 2025, 2026, 2029, 2030, 2031, 2199, 2241, 2244, 2246, 2275, 2276, 2277, 2278, 2279, 2305, 2323, 2324, 2325, 2327, 2328, 2347, 2360, 2402, 2404, 2410, 2415, 2442, 2448, 2450, 2451, 2452, 2461, 2462, 2467, 2477, 2509, 2510, 2512, 2513, 2518, 2522, 2524, 2531, 2543, 2547, 2554, 2555, 2576, 2577, 2578, 2579, 2580, 2583, 2586, 2605, 2609, 2624, 2629, 2646, 2651, 2652, 2653, 2655, 2656, 2659, 2661, 2662, 2671, 2676, 2677, 2756, 2757, 2758, 2760, 2761, 2762, 2768, 2771, 2772, 2773, 2774, 2776, 2777, 2781, 2782, 2783, 2786, 2789, 2790, 2791, 2792, 2793, 2794, 2795, 2798, 2811, 2815, 2822, 2869, 2884, 2907, 2913, 2920, 2924, 3029, 3127, 3141, 3146, 3172, 3173, 3174, 3175, 3176, 3177, 3178, 3180, 3182, 3183, 3184, 3185, 3189, 3192, 3194, 3198, 3199, 3202, 3203, 3207, 3208, 3211, 3223, 3224, 3230, 3236, 3252, 3253, 3262, 3275, 3302, 3305, 3316, 3365, 3388, 3389, 3391, 3392, 3396, 3397, 3398, 3399, 3400, 3401, 3402, 3405, 3406, 3408, 3409, 3412, 3415, 3416, 3418, 3419, 3420, 3421, 3422, 3423, 3424, 3425, 3426, 3427, 3428, 3431, 3432, 3433, 3436, 3438, 3439, 3443, 3444, 3445, 3446, 3450, 3452, 3455, 3456, 3458, 3461, 3466, 3467, 3470, 3503, 3526, 3532, 3536, 3538, 3541, 3542, 3543, 3549, 3563, 3573, 3597, 3598, 3620, 3626, 3662, 3819, 3921, 3922, 3923, 3925, 3927, 3931, 3932, 3933, 3934, 3935, 3936, 3937, 3938, 3939, 3940, 3942, 3943, 3944, 3945, 3948, 3950, 3952, 3953, 3954, 3955, 3957, 3958, 3959, 3960, 3961, 3963, 3965, 3966, 3967, 3968, 3971, 3972, 3980, 3988, 3995, 4000, 4005, 4011, 4039, 4043, 4046, 4048, 4050, 4059, 4077, 4086, 4089, 4098, 4101, 4104, 4109, 4111, 4123, 4127, 4170, 4184, 4203, 4215, 4221, 4235, 4287, 4295, 4345, 4362, 4367, 4448, 4449, 4451, 4452, 4453, 4454, 4455, 4457, 4458, 4459, 4460, 4461, 4462, 4463, 4464, 4465, 4466, 4467, 4468, 4469, 4470, 4472, 4478, 4480, 4481, 4482, 4496, 4500, 4504, 4508, 4513, 4518, 4523, 4524, 4548, 4551, 4567, 4572, 4598, 4607, 4608, 4611, 4657, 4786, 4788, 4789, 4791, 4792, 4793, 4794, 4795, 4796, 4797, 4798, 4799, 4804, 4805, 4811, 4815, 4817, 4819, 4829, 4839, 4840, 5037, 5038, 5040, 5047, 5179, 5191, 5192, 5210, 5248, 5249, 5266, 5275, 5276, 5322, 5323, 5327, 5330, 5362, 5410, 5411, 5416, 5451, 5462, 5493, 5494, 5496, 5519, 5536, 5548, 5555, 5587, 5588, 5589, 5590, 5591, 5594, 5599, 5617, 5633, 5636, 5660, 5667, 5695, 5697, 5701, 5702, 5706, 5767, 5768, 5769, 5773, 5778, 5786, 5831, 5832, 5833, 5835, 5836, 5837, 5839, 5844, 5849, 5850, 5858, 5860, 5889, 5901, 5915, 5916, 5918, 5920, 5991, 5992, 5993, 5994, 5995, 6009, 6045, 6079, 6080, 6081, 6083, 6084, 6085, 6086, 6087, 6100, 6101, 6107, 6185, 6249, 6278, 6279, 6280, 6281, 6282, 6283, 6285, 6305, 6306, 6387, 6393, 6396, 6397, 6398, 6411, 6439, 6498, 6505, 6511, 6513, 6518, 6520, 6524, 6525, 6526, 6527, 6532, 6543, 6553, 6555, 6565, 6566, 6569, 6573, 6574, 6581, 6585, 6601, 6605, 6606, 6612, 6615, 6617, 6621, 6645, 6646, 6648, 6651, 6652, 6658, 6660, 6667, 6672, 6676, 6682, 6684, 6688, 6690, 6692, 6693, 6700, 6704, 6743, 6769, 6771, 6772, 6775, 6778, 6783, 6785, 6789, 6793, 6818, 6824, 6829, 6830, 6834, 6839, 6845, 6846, 6849, 6850, 6855, 6859, 6866, 6873, 6878, 6887, 6888, 6889, 6890, 6907, 6926, 6945, 6948, 6954, 6963, 7006, 7066, 7082, 7102, 7121, 7162, 7163, 7271, 7272, 7273, 7285, 7314, 7315, 7350, 7362, 7364, 7398, 7441, 7442, 7443, 7444, 7446, 7451, 7454, 7456, 7462, 7464, 7504, 7515, 7516, 7547, 7548, 7634, 7659, 7660, 7661, 7662, 7663, 7664, 7665, 7672, 7776, 7777, 7783, 7784, 7788, 7789, 7792, 7795, 7797, 7798, 7799, 7809, 7831, 7889, 7917, 7918, 7920, 7926, 7930, 7932, 7933, 7935, 7936, 7941, 7944, 7960, 7962, 7971, 8008, 8017, 8070, 8075, 8076, 8110, 8111, 8112, 8117, 8120, 8128, 8129, 8130, 8133, 8136, 8140, 8143, 8144, 8145, 8148, 8149, 8150, 8153, 8154, 8159, 8163, 8203, 8225, 8268, 8270, 8302, 8310, 8312, 8419, 8421, 8496, 8497, 8498, 8500, 8505, 8506, 8507, 8508, 8510, 8513, 8517, 8533, 8543, 8584, 8710, 8717, 8718, 8719, 8720, 8721, 8722, 8724, 8726, 8730, 8732, 8733, 8734, 8737, 8739, 8740, 8741, 8742, 8743, 8744, 8745, 8747, 8748, 8750, 8751, 8752, 8753, 8754, 8755, 8756, 8757, 8759, 8761, 8764, 8766, 8768, 8769, 8773, 8774, 8775, 8784, 8811, 8817, 9042, 9056, 9207, 9219, 9240, 9249, 9273, 9318, 9322, 9422, 9457, 9485, 9562, 9623, 9647, 9836, 9837, 9922, 10067, 10068, 10069, 10168, 10185, 10288, 10400, 10401, 10513, 10515, 10606, 10700, 10702, 10703, 10771, 10772, 10819, 10821, 10927, 11019, 11056, 11113, 11142, 11143, 11225, 11226, 11227, 11343, 11361, 11362, 11364, 11377, 11448, 11460, 11461, 11462, 11463, 11465, 11466, 11468, 11493, 11609, 11610, 11611, 11617, 11638, 11659, 11718, 11748, 11749, 11750, 11751, 11762, 11821, 11850, 11891, 11898, 11911, 11913, 11914, 11915, 11916, 11917, 11918, 11919, 11920, 11921, 11922, 11923, 11926, 11928, 11934, 11955, 11980, 12026, 12030, 12044, 12092, 12093, 12094, 12095, 12096, 12098, 12100, 12101, 12102, 12103, 12104, 12105, 12106, 12107, 12108, 12109, 12110, 12111, 12112, 12113, 12114, 12122, 12123, 12125, 12144, 12147, 12234, 12235, 12237, 12256, 12305, 12339, 12346, 12407, 12448, 12511, 12665, 12705, 12706, 12708, 12709, 12710, 12711, 12712, 12713, 12714, 12716, 12717, 12718, 12719, 12720, 12721, 12722, 12725, 12729, 12742, 12753, 12762, 12802, 12813, 12816, 12821, 12823, 12843, 12856, 12905, 12907, 13006, 13061, 13062, 13063, 13137, 13138, 13198, 13329, 13330, 13331, 13332, 13494, 13495, 13582, 13583, 13584, 13585, 13586, 13697, 13833, 13834, 13835, 13836, 13837, 13840, 14160, 14161, 14200, 14341, 14342, 14343, 14590, 14591, 14597, 14610, 14614, 14631, 14632, 14633, 14634, 14635, 14650, 14655, 14656, 14689, 14726, 14777, 14870, 14871, 14872, 14921, 14922, 14923, 14991, 14992, 14993, 14994, 14995, 15136, 15137, 15138, 15139, 15140, 15141, 15142, 15143, 15152, 15216, 15265, 15277, 15387, 15388, 15483, 15546, 15550, 15587, 15590, 15623, 15641, 15653, 15711, 15712, 15730, 15743, 15763, 15794, 15805, 15821, 15831, 15884, 15932, 16039, 16122, 16124, 16153, 16175, 16181, 16220, 16233, 16264, 16277, 16306, 16361, 16377, 16391, 16392, 16393, 16402, 16404, 16431, 16439, 16440, 16444, 16447, 16448, 16455, 16457, 16463, 16468, 16513, 16524, 16528, 16551, 16569, 16594, 16596, 16600, 16610, 16647, 16648, 16718, 16731, 16763, 16765, 16794, 16795, 16899, 16948, 16962, 16993, 16998, 17011, 17013, 17034, 17061, 17062, 17141, 17142, 17143, 17144, 17155, 17158, 17248, 17262, 17263, 17264, 17265, 17266, 17333, 17334, 17335, 17395, 17396, 17398, 17400, 17401, 17405, 17410, 17412, 17417, 17420, 17431, 17547, 17584, 17585, 17587, 17599, 17674, 17676, 17677, 17679, 17711, 17719, 17749, 17750, 17751, 17752, 17753, 17754, 17756, 17757, 17811, 17812, 17814, 17948, 17963, 17964, 17965, 17989, 17998, 18083, 18139, 18145, 18165, 18229, 18230, 18257, 18264, 18273, 18321, 18322, 18323, 18351, 18515, 18548, 18599, 18600, 18623, 18637, 18675, 18676, 18687, 18698, 18736, 18753, 18768, 18792, 18794, 18797, 18823, 18828, 18830, 18850, 18851, 18853, 18854, 18857, 18882, 18885, 18886, 18887, 18888, 18891, 18892, 18893, 18894, 18898, 18901, 18904, 18930, 18947, 18967, 18968, 18970, 18972, 18973, 18974, 18976, 18977, 18980, 18982, 18983, 18984, 18985, 18986, 18991, 19006, 19059, 19060, 19061, 19062, 19064, 19066, 19067, 19069, 19071, 19103, 19104, 19110, 19116, 19153, 19180, 19181, 19186, 19263, 19272, 19273, 19280, 19318, 19409, 19425, 19428, 19456, 19528, 19531, 19538, 19606, 19607, 19609, 19610, 19612, 19613, 19616, 19623, 19636, 19647, 19648, 19685, 19798, 19799, 19800, 19801, 19802, 19805, 19806, 19807, 19808, 19811, 19812, 19813, 19816, 19820, 19821, 19836, 19874, 19875, 19878, 19960, 19985, 20051, 20052, 20053, 20054, 20055, 20056, 20057, 20058, 20059, 20061, 20062, 20063, 20064, 20065, 20066, 20069, 20070, 20071, 20072, 20074, 20078, 20079, 20081, 20084, 20088, 20090, 20110, 20156, 20157, 20168, 20189, 20193, 20245, 20344, 20345, 20346, 20347, 20348, 20349, 20350, 20353, 20354, 20355, 20356, 20357, 20358, 20359, 20360, 20361, 20362, 20363, 20365, 20368, 20370, 20371, 20373, 20374, 20377, 20391, 20392, 20396, 20398, 20400, 20444, 20476, 20520, 20682, 20685, 20687, 20688, 20689, 20690, 20691, 20692, 20693, 20694, 20695, 20698, 20699, 20700, 20701, 20702, 20703, 20707, 20709, 20714, 20728, 20760, 20774, 20864, 20865, 20866, 20867, 20868, 20869, 20870, 20872, 20874, 20899, 20909, 20962, 21041, 21042, 21117, 21118, 21121, 21139, 21146, 21227, 21271, 21272, 21273, 21274, 21275, 21425, 21430, 21493, 21505, 21507, 21510, 21513, 21612, 21616, 21621, 21622, 21623, 21624, 21667, 21675, 21751, 21765, 21766, 21767, 21846, 21847, 21856, 21857, 21858, 21871, 21872, 21873, 21875, 21876, 21877, 21881, 21883, 21885, 21924, 21925, 21957, 21977, 21978, 21979, 21980, 21984, 21985, 21993, 21997, 21999, 22001, 22031, 22033, 22082, 22113, 22175, 22228, 22247, 22271, 22272, 22371, 22374, 22462, 22463, 22613, 22694, 22695, 22696, 22697, 22700, 22880, 22881, 22882, 22883, 22884, 22901, 22977, 22978, 22979, 22981, 23030, 23032, 23191, 23230, 23236, 23238, 23291, 23340, 23453, 23552, 23553, 23744, 23761, 23774, 24016, 24025, 24037, 24085, 24090, 24096, 24125, 24126, 24128, 24129, 24130, 24132, 24133, 24140, 24141, 24142, 24145, 24150, 24151, 24152, 24153, 24155, 24168, 24169, 24170, 24171, 24172, 24173, 24174, 24181, 24186, 24187, 24189, 24190, 24192, 24193, 24206, 24207, 24208, 24209, 24210, 24211, 24212, 24213, 24214, 24239, 24243, 24244, 24246, 24247, 24249, 24250, 24251, 24252, 24253, 24254, 24255, 24256, 24257, 24258, 24261, 24290, 24297, 24298, 24299, 24300, 24301, 24302, 24303, 24304, 24305, 24307, 24308, 24315, 24326, 24330, 24334, 24335, 24336, 24350, 24364, 24365, 24366, 24367, 24368, 24371, 24372, 24390, 24391, 24393, 24405, 24406, 24408, 24411, 24412, 24413, 24415, 24438, 24439, 24440, 24473, 24474, 24476, 24477, 24478, 24479, 24480, 24481, 24483, 24484, 24485, 24486, 24487, 24520, 24522, 24523, 24524, 24525, 24526, 24527, 24528, 24529, 24530, 24531, 24532, 24533, 24535, 24536, 24537, 24540, 24541, 24542, 24543, 24544, 24545, 24546, 24547, 24549, 24550, 24576, 24586, 24621, 24622, 24623, 24624, 24625, 24626, 24627, 24628, 24629, 24630, 24631, 24632, 24633, 24634, 24635, 24636, 24637, 24638, 24639, 24640, 24641, 24642, 24644, 24645, 24646, 24647, 24648, 24651, 24652, 24653, 24654, 24655, 24656, 24657, 24712, 24713, 24714, 24715, 24716, 24717, 24719, 24720, 24721, 24722, 24723, 24724, 24731, 24775, 24795, 24812, 24831, 24833, 24835, 24836, 24845, 24846, 24851, 24869, 24877, 24888, 24889, 24907, 24926, 24952, 25091, 25169, 25177, 25178, 25195, 25206, 25247, 25248, 25251, 25267, 25340, 25345, 25455, 25456, 25460, 25464, 25754, 25822, 25845, 25865, 25890, 25891, 25893, 25914, 25975, 25976, 25978, 25980, 25982, 25986, 25996, 26003, 26074, 26112, 26143, 26172, 26182, 26183, 26186, 26194, 26202, 26283, 26284, 26287, 26289, 26293, 26303, 26316, 26320, 26322, 26463, 26465, 26467, 26469, 26476, 26481, 26486, 26489, 26497, 26596, 26663, 26678, 26717, 27136, 27183, 27307, 27340, 27341, 27342, 27344, 27348, 27355, 27607, 27608, 27609, 27610, 27623, 27635, 27641, 27922, 27937, 28165, 28263, 28277, 28422, 28433, 28437, 28508, 28738, 28739, 28740, 28743, 28748, 28820, 28990, 28993, 28997, 29008, 29009, 29010, 29011, 29079, 29084, 29090, 29093, 29101, 29102, 29104, 29105, 29106, 29112, 29113, 29114, 29119, 29120, 29122, 29123, 29124, 29125, 29129, 29130, 29133, 29134, 29135, 29137, 29139, 29146, 29147, 29172, 29174, 29176, 29184, 29191, 29192, 29194, 29200, 29201, 29203, 29221, 29224, 29225, 29226, 29232, 29234, 29258, 29265, 29268, 29273, 29274, 29275, 29276, 29277, 29278, 29280, 29281, 29282, 29300, 29301, 29302, 29310, 29313, 29314, 29315, 29316, 29320, 29382, 29435, 29436, 29454, 29457, 29458, 29468, 29469, 29470, 29473, 29475, 29476, 29477, 29481, 29482, 29483, 29485, 29500, 29501, 29503, 29504, 29505, 29508, 29513, 29515, 29524, 29532, 29533, 29534, 29535, 29537, 29549, 29553, 29556, 29561, 29574, 29618, 29634, 29635, 29637, 29639, 29665, 29666, 29668, 29669, 29672, 29682, 29693, 29709, 29710, 29711, 29717, 29741, 29742, 29746, 29747, 29752, 29753, 29755, 29756, 29759, 29804, 29805, 29832, 29998, 30003, 30005, 30006, 30007, 30009, 30019, 30025, 30040, 30074, 30075, 30077, 30078, 30080, 30082, 30083, 30084, 30290, 30291, 30293, 30349, 30350, 30351, 30352, 30353, 30354, 30358, 30376, 30392, 30424, 30426, 30589, 30590, 30591, 30613, 30614, 30615, 30616, 30617, 30619, 30627, 30628, 30647, 30954, 30958, 30985, 30986, 31316, 31317, 31331, 31334, 31336, 31357, 31358, 31359, 31360, 31497, 31501, 31502, 31503, 31504, 31526, 31527, 31528, 31882, 31883, 31884, 31890, 31891, 31892, 31893, 31894, 31929, 31966, 31970, 32153, 32498, 32520, 32583, 32618, 32683, 32769, 32780, 32788, 32847, 32848, 32857, 32872, 33058, 33148, 33153, 33255, 33275, 33279, 33300, 33513, 33519, 33520, 33521, 33522, 33524, 33525, 33527, 33528, 33534, 33578, 33579, 33580, 33581, 33582, 33584, 33585, 33586, 33587, 33589, 33591, 33593, 33594, 33599, 33600, 33602, 33619, 33634, 33655, 33753, 33845, 33846, 33866, 33868, 33869, 33871, 33873, 33883, 33888, 33890, 33891, 33907, 33926, 33931, 33933, 33934, 33936, 33972, 33973, 33978, 33987, 33988, 33989, 33990, 33991, 33992, 33993, 33997, 33998, 34000, 34001, 34007, 34015, 34050, 34058, 34081, 34082, 34085, 34086, 34089, 34091, 34092, 34095, 34260, 34265, 34293, 34294, 34295, 34296, 34297, 34309, 34315, 34316, 34320, 34346, 34399, 34419, 34461, 34462, 34463, 34464, 34465, 34469, 34503, 34527, 34590, 34816, 34827, 34845, 34846, 34849, 34852, 34853, 34863, 34941, 34971, 35015, 35020, 35134, 35136, 35144, 35156, 35206, 35221, 35264, 35285, 35292, 35294, 35295, 35296, 35299, 35300, 35301, 35309, 35311, 35315, 35321, 35323, 35324, 35328, 35329, 35330, 35331, 35332, 35342, 35343, 35347, 35351, 35356, 35357, 35386, 35415, 35428, 35440, 35459, 35467, 35471, 35474, 35529, 35562, 35575, 35634, 35637, 35646, 35655, 35663, 35691, 35704, 35732, 35733, 35744, 35835, 35853, 35881, 35884, 35887, 35889, 35893, 35894, 35896, 35897, 35898, 35899, 35900, 35901, 35902, 35907, 35909, 35910, 35917, 35918, 35920, 35921, 35923, 35926, 35928, 35929, 35930, 35939, 35941, 35943, 35944, 35948, 35949, 35950, 35951, 35953, 35954, 35957, 35979, 35997, 35998, 36000, 36018, 36021, 36023, 36089, 36093, 36098, 36099, 36102, 36105, 36111, 36136, 36154, 36172, 36173, 36175, 36193, 36200, 36210, 36223, 36225, 36226, 36229, 36230, 36233, 36239, 36240, 36241, 36242, 36244, 36246, 36247, 36248, 36249, 36258, 36264, 36267, 36269, 36370, 36433, 36437, 36469, 36479, 36480, 36481, 36504, 36515, 36520, 36521, 36529, 36530, 36550, 36584, 36599, 36600, 36608, 36614, 36666, 36674, 36685, 36707, 36717, 36736, 36743, 36756, 36760, 36775, 36784, 36785, 36787, 36804, 36830, 36843, 36844, 36850, 36854, 36860, 36870, 36874, 36875, 36876, 36877, 36879, 36952, 36958, 36979, 36980, 36991, 36996, 37050, 37051, 37058, 37092, 37093, 37111, 37117, 37120, 37123, 37137, 37142, 37147, 37148, 37149, 37150, 37151, 37152, 37170, 37176, 37187, 37190, 37192, 37193, 37198, 37201, 37205, 37209, 37217, 37221, 37226, 37227, 37231, 37242, 37244, 37255, 37266, 37319, 37324, 37352, 37365, 37375, 37415, 37429, 37448, 37450, 37452, 37495, 37518, 37519, 37569, 37570, 37572, 37573, 37576, 37597, 37608, 37627, 37676, 37677, 37735, 37743, 37748, 37749, 37750, 37751, 37756, 37758, 37766, 37767, 37792, 37801, 37805, 37807, 37808, 37812, 37828, 37834, 37835, 37838, 37840, 37841, 37842, 37843, 37844, 37845, 37846, 37849, 37850, 37852, 37854, 37863, 37866, 37873, 37877, 37880, 37881, 37883, 37897, 37900, 37908, 37927, 37996, 38008, 38081, 38085, 38091, 38092, 38161, 38183, 38187, 38195, 38200, 38282, 38292, 38300, 38302, 38303, 38309, 38314, 38316, 38317, 38321, 38360, 38368, 38374, 38382, 38398, 38399, 38402, 38403, 38410, 38411, 38420, 38429, 38431, 38439, 38452, 38464, 38467, 38483, 38499, 38500, 38514, 38515, 38530, 38533, 38547, 38548, 38556, 38558, 38559, 38560, 38561, 38563, 38564, 38565, 38566, 38567, 38568, 38569, 38571, 38574, 38575, 38578, 38619, 38635);' print(sql) papers = pd.read_sql(sql, con=db) for index, row in papers.iterrows(): lang = None id = row[0] english = 0 other = 0 text = "" res = "" print(id) if id: # with open(os.path.join('data/txt', str(id) + '.txt')) as infile: with open( os.path.join( '/Volumes/SeagateBackupPlusDrive/CLPD2019_FULL/txt', str(id) + '.txt')) as infile: for line in infile: if not re.match(r'^\s*$', line): line = re.sub(r"-\n", "", line) line = re.sub(r"\n", " ", line) text += line infile.close() lenText = len(text) nrequest = round(float(lenText) / 5000) count = 1 while count <= nrequest: res = '' content = "" posIni = (count * 5000) - 5000 posFin = (count * 5000) - 1 content += text[posIni:posFin] try: translator = Translator(random.choice(key_choices)) res = translator.detect_lang([content]) except: pass if res: if res == 'en': english += 1 else: other += 1 count += 1 if english > other: lang = "English" sql = "update resolved_papers set english = 1 where id = %s" % ( id) else: lang = "Other" try: cur.execute(sql) db.commit() except: db.rollback() print("Id: %s. Language: %s" % (id, lang)) print("Done!")
class TranslatorTestCase(unittest.TestCase): def setUp(self): self.translator = Translator(client_id, client_secret) self.translator_mock = TranslatorMock(client_id, client_secret) def test_translate(self): t = self.translator.translate('world', 'en', 'ru') self.assertEqual('мир', t) def test_translate_array(self): ts = self.translator.translate_array(['hello', 'world'], 'en', 'ru') translations = [t['TranslatedText'] for t in ts] self.assertEqual(['Привет', 'мир'], translations) def test_translate_array2(self): ts = self.translator.translate_array2(['hello', 'world', 'Hello. How are you?'], 'en', 'ru') translations = [t['TranslatedText'] for t in ts] self.assertEqual(['Привет', 'мир', 'Привет. Как ваши дела?'], translations) alignments = [t['Alignment'] for t in ts] self.assertEqual(['0:4-0:5', '0:4-0:2', '0:5-0:6 7:18-8:21'], alignments) def test_get_translations(self): t = self.translator.get_translations('world', 'en', 'ru') self.assertIsInstance(t, dict) self.assertIn('Translations', t) def test_break_sentences(self): t = self.translator.break_sentences('Hello. How are you?', 'en') self.assertEqual(['Hello. ', 'How are you?'], t) def test_add_translation(self): url = self.translator_mock.add_translation('orig', 'trans', 'en', 'ru', user='******') self.assertIn('originalText=orig', url) self.assertIn('translatedText=trans', url) def test_get_langs(self): langs = self.translator.get_langs() self.assertIsInstance(langs, list) self.assertIn('en', langs) def test_get_lang_names(self): lang_names = self.translator.get_lang_names(['ru', 'en'], 'en') self.assertEqual(['Russian', 'English'], lang_names) def test_get_speackable_langs(self): langs = self.translator.get_langs(speakable=True) self.assertIsInstance(langs, list) self.assertIn('en-us', langs) def test_detect_lang(self): self.assertEqual('en', self.translator.detect_lang('Hello')) def test_detect_langs(self): self.assertEqual(['en', 'ru'], self.translator.detect_langs(['Hello', 'Привет'])) def test_speak(self): self.assertIsNotNone(self.translator.speak('Hello', 'en')) def test_speak_to_file(self): s = StringIO() self.translator.speak_to_file(s, 'Hello', 'en') s.seek(0) self.assertTrue(len(s.read()) > 0)
# -*- coding: utf-8 -*- from langdetect import detect from mongodb.connection import getMongoConnection import config import sys from mstranslator import Translator translator = Translator('a4462ffdf09e4ce08bb2df759478229a') def translate(company_name): conn = getMongoConnection() db = conn[config.Mongo_DB_NAME] while db[company_name].find({"translated": "false"}).count() > 0: job = db[company_name].find_one({"translated": "false"}) try: lan = detect(unicode(job['job_summary'], "utf-8")) except: lan = detect(job['job_summary']) print lan print job if str(lan) != 'en': for key in job: if key == "city" or key == "snippet": job[key] = translator.translate(job[key], lang_to="en") db[company_name].update({"url": job["url"]}, {"$set": { key: job[key] }}, upsert=False, multi=False) if key == "job_summary":
def setUp(self): self.translator = Translator(SUBSCRIPTION_KEY) self.translator_mock = TranslatorMock(SUBSCRIPTION_KEY)
import os from string import join import time import random from slackclient import SlackClient from mstranslator import Translator import config import alify #initialize translator API translator = Translator(config.MSTRANS_ID) # instantiate Slack & Twilio clients AAAAAND # bot's ID as an environment variable or constant ######### STARTERBOT / BRUNDRETTFAMILY SLACK #slack_client = SlackClient(os.environ.get('SLACK_BOT_TOKEN')) #brundrettfamily starterbot #BOT_ID = os.environ.get("BOT_ID") #starterbot brundrettfamily #BOT_NAME = "StarterBot" ######### TRANSBOT / PIZZABALLS SLACK slack_client = SlackClient(config.TRANS_BOT_TOKEN) #pizzaballs transbot BOT_ID = config.TRANS_BOT_ID #transbot / pizzaballs BOT_NAME = "TransBot" # constants AT_BOT = "<@" + BOT_ID + ">" EXAMPLE_COMMAND = "do" TRANS_COMMAND = "nazify" def handle_trans_cmd(command, channel):
# -*- coding: utf-8 -*- from mstranslator import Translator translator = Translator('hswn_client_id', 'mLmqE4kYtcVddhVG2Vq2MLsSTDbhKCmuxqfsrU7Lj7M=') def translate(word): return translator.translate(word, lang_from='hi', lang_to='en')
def btranslate(text_message, langfrom, langto): translator = Translator(client_id, client_secret) phrase_translated = translator.translate(text_message, lang_from=langfrom, lang_to=langto) # print phrase_translated return phrase_translated.encode("utf8")
from mstranslator import Translator translator = Translator('your_own_access_key') def trans(word): return translator.translate(word, lang_from='en', lang_to='pl')
https://textblob.readthedocs.io/en/dev/ https://pypi.python.org/pypi/mstranslator """ # Twitter info consumer_key = 'GW8wstK6iPAn4UY4FgnxfTd2h' consumer_secret = '6pIpAy32sOAATULfZqQWrXbe5sbXaQdUofPys1mSVlEMkIzKAa' access_token = '104984260-whCiE24vIwPgnx8Ym41LxEy8dldRRNFu8fLbiGzs' access_token_secret = 'oanaiTCnCPCnh1rHkMT7FxHAUeuT5NEnQZTGGnrITYmTd' # Microsoft DataMarket for translation - limited plan for 2 million characters per month client_id = 'bruno_sentiment_analysis' client_secret = 'Kd/gHVZgXTkXoAEGaK/or71IrEQr1DmG98q8eLQmaGU=' # MS Translator translator = Translator(client_id, client_secret) # Language properties for translation LANG_FROM = 'pt' LANG_TO = 'en' # Authenticating on twitter auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) # API fully authenticated api = tweepy.API(auth) # Tweets SEARCH_TERM = 'unisul' public_tweets = api.search(SEARCH_TERM)
#-*- coding: utf-8 -*- from mstranslator import Translator from dateTime import getCurrentDate from dateTime import getCurrentTime import sys import goslate import json import MySQLdb reload(sys) sys.setdefaultencoding('utf-8') CLIENT_ID = "translatelate" CLIENT_SECRET = "GK7MyfCoz1NUrNuehRZBlLWLNpHMgDaAZoT8MkQOMlI=" translator_ms = Translator(CLIENT_ID, CLIENT_SECRET) languages = ["af", "sq", "ar","be", "bg", "ca", "zh-CN", "zh-TW", "hr", "cs", "da", "nl", "en", "et", "tl", "fi", "fr", "gl", "de", "el", "iw", "hi", "hu", "is", "id", "ga", "it", "ja", "ko", "lv", "lt", "mk", "ms", "mt", "no", "fa", "pl", "pt", "ro", "ru", "sr", "sk", "sl", "es", "sw", "sv", "th", "tr", "uk", "vi", "cy", "yi"] def validateLanguage(lang): if lang in languages: return True return False def saveData(host, user, passwd, db, result, original, from_lang, to_lang, tableName): dBase = MySQLdb.connect(host, user, passwd, db, charset='utf8', use_unicode=True)
# Name: pleaseTranslate (/u/pleaseTranslate) # Author: Saroekin (/u/Saroekin) # Version: Python 2.7.6 #Files or importations that are used elsewhere in program. import os import praw import time import traceback import externals_pleaseTranslate from mstranslator import Translator #Setting up account. translator = Translator(externals_pleaseTranslate.client_ID, externals_pleaseTranslate.client_secret) #User's username and password. Username = externals_pleaseTranslate.username Password = externals_pleaseTranslate.password #What reddit sees from the bot's requests. user_agent = externals_pleaseTranslate.user_agent r = praw.Reddit(user_agent=user_agent) print("\n\nLogging in. . .\n\n") r.login(Username, Password) #Set of list variables for program. fullLangNames = externals_pleaseTranslate.fullLangNames abbrevLangNames = externals_pleaseTranslate.abbrevLangNames supportedLangs = externals_pleaseTranslate.supportedLangs
site_id=MySQLdb.escape_string(str(doc['site_id'])), link_text=link_text, ftype=MySQLdb.escape_string(str(doc['ftype'])), doc_type=MySQLdb.escape_string(str(doc['doc_type'])), full_url=MySQLdb.escape_string(str(doc['full_url']))) cursor.execute(sql) #cursor.execute('INSERT INTO docs_sites (site_id, link_text, ftype, doc_type, full_url) VAALUES ({site_id}, "{link_text}", "{ftype}", "{doc_type}", "{full_url}")' # .format(site_id=MySQLdb.escape_string(doc['site_id']), link_text=MySQLdb.escape_string(doc['link_text']), ftype=MySQLdb.escape_string(doc['ftype']), # doc_type=MySQLdb.escape_string(doc['doc_type']), full_url=MySQLdb.escape_string(doc['full_url']))) #------------------------------------------------------------------------------------------------------ # Запускаем автоперевод блоков, тоже в потоках # Если была такая настройка у проекта #------------------------------------------------------------------------------------------------------ translator = Translator(trans_client, trans_secret) langs = getLangsProject(siteID) sql = 'SELECT * FROM blocks WHERE site_id = {projectID}'.format( projectID=siteID) cursor.execute(sql) blocks = cursor.fetchall() # for lang in langs: # langTo = lang[3] # langID = lang[0] # pool = ThreadPool(2) # results = pool.map(translateBlock, blocks) # results = pool.map(createEmptyTranslate, blocks) # pool.close() # pool.join()
#Bing translator Spanish->English #English stopwords #English SnowballStemmer import sys import re from nltk.corpus import stopwords from mstranslator import Translator #English and Spanish stemmer available from nltk.stem import snowball import string import regex #English because we translate first stemmer = snowball.EnglishStemmer(ignore_stopwords=False) translator = Translator('60864ac93121426d8fbbb1e2581a8c3e') stop_words_list = [] flat_stop_words_list = [] exclusion_list_en_es = [] has_hashtag_or_mention = [False] * 800 punctuation = [] punctuation.append(list(string.punctuation[2:6])) punctuation.append(string.punctuation[9]) punctuation.append(list(string.punctuation[20:22])) def make_stop_words_list(): #exclude words which are in both dictionaries #bc they will be counted toward both languages and