def translate_data(self): translator = google_translate.GoogleTranslator() for k, v in self.json_data.items(): # print (v) input_s = v["status"] lang = translator.detect(input_s) trans = False self.json_data[k]["location"] = {} self.json_data[k]["location"]["lat"] = self.json_data[k][ "latitude"] self.json_data[k]["location"]["lon"] = self.json_data[k][ "longitude"] if (lang != None): # remove hashtag for translation convenience status = input_s.replace("#", "") self.json_data[k]["lang"] = lang if (lang != "english"): trans = True status = translator.translate(input_s, "english") self.json_data[k]["status_en"] = status if (status != None): self.get_sentiment(k, status) res = {} res["action"] = KEY res["data"] = self.json_data return json.dumps(res)
def translate(query): translator = google_translate.GoogleTranslator() result = translator.translate(query.encode('utf-8'), "german") return result
def main(args): os.chdir("..") # setup pot_file_path = LOCALE_PATH_TMPL.format(lang="en_US") po_file_path = LOCALE_PATH_TMPL.format(lang=args.language) if not os.path.exists(pot_file_path): perror("Failed to locate POT file, exiting...", pot_file_path) if not os.path.exists(po_file_path): perror("Failed to locate PO file, exiting...", po_file_path) pot_file = polib.pofile(pot_file_path) po_file = polib.pofile(po_file_path) # check headers pinfo("Checking PO headers") pot_headers = pot_file.metadata po_headers = po_file.metadata if pot_headers["Project-Id-Version"] != po_headers["Project-Id-Version"]: pwarn("'Project-Id-Version' headers do not match", exit=args.werror) if pot_headers["POT-Creation-Date"] != po_headers["POT-Creation-Date"]: pwarn("'POT-Creation-Date' headers do not match", exit=args.werror) po_creation_date = parse_date(po_headers["POT-Creation-Date"]) po_revision_date = parse_date(po_headers["PO-Revision-Date"]) # Aware datetimes convert to UTC automatically when comparing if po_revision_date <= po_creation_date: pwarn("PO file seems outdated", exit=args.werror) if "Language" in po_headers and po_headers["Language"] != args.language: pwarn("'Language' header does not match with the given language", po_headers["Language"], args.werror) pinfo("Last-Translator: {}".format(po_headers["Last-Translator"])) # check translations if args.only_headers: sys.exit(0) pinfo("Checking translations, this might take a while...") eta = timedelta(seconds=len(pot_file) * WTIME) pinfo("Approximate time to check translations online: {}".format(eta)) pot_msgid = [entry.msgid for entry in pot_file] po_msgid = [entry.msgid for entry in po_file] # lists to hold reports missing_msgid = [] not_translated = [] same_msgstr = [] with_typo = [] verify_trans = [] fuzzy_trans = po_file.fuzzy_entries() for msgid in pot_msgid: if msgid not in po_msgid: missing_msgid.append(msgid) translator = google_translate.GoogleTranslator(timeout=5.0, retries=2, wait_time=WTIME) # Get a valid source language for Google # for example convert 'ar_SA' to 'ar' or 'zh_CN' to 'zh-CN' src_lang = args.language if src_lang not in translator._lang_dict: src_lang = src_lang.replace("_", "-") if src_lang not in translator._lang_dict: src_lang = src_lang.split("-")[0] for entry in po_file: if not entry.translated(): not_translated.append(entry) elif entry.msgid == entry.msgstr: same_msgstr.append(entry) else: if args.no_translate: continue word_dict = translator.get_info_dict(entry.msgstr, "en", src_lang) if word_dict is not None: if word_dict["has_typo"]: with_typo.append(entry) if word_dict["translation"].lower() != entry.msgid.lower(): found = False # Check verbs, nouns, adverbs, etc.. for key in word_dict["extra"]: if entry.msgid.lower() in word_dict["extra"][key].keys( ): found = True break if not found: verify_trans.append((entry, word_dict["translation"])) sleep(WTIME) # time to report print("=" * 25 + "Report" + "=" * 25) if missing_msgid: print("Missing msgids") for msgid in missing_msgid: print(" \"{}\"".format(msgid)) if not_translated: print("Not translated") for entry in not_translated: print(" line: {} msgid: \"{}\"".format(entry.linenum, entry.msgid)) if same_msgstr: print("Same msgstr") for entry in same_msgstr: print(" line: {} msgid: \"{}\"".format(entry.linenum, entry.msgid)) if with_typo: print("With typo") for entry in with_typo: print(" line: {} msgid: \"{}\" msgstr: \"{}\"".format( entry.linenum, entry.msgid, entry.msgstr)) if verify_trans: print("Verify translation") for item in verify_trans: entry, translation = item print(" line: {} msgid: \"{}\" trans: \"{}\"".format( entry.linenum, entry.msgid, translation)) if fuzzy_trans: print("Fuzzy translations") for entry in fuzzy_trans: print(" line: {} msgid: \"{}\"".format(entry.linenum, entry.msgid)) total = len(missing_msgid) + len(not_translated) + len(same_msgstr) + len( with_typo) + len(verify_trans) + len(fuzzy_trans) print("") print("Missing msgids\t\t: {}".format(len(missing_msgid))) print("Not translated\t\t: {}".format(len(not_translated))) print("Same msgstr\t\t: {}".format(len(same_msgstr))) print("With typo\t\t: {}".format(len(with_typo))) print("Verify translation\t: {}".format(len(verify_trans))) print("Fuzzy translations\t: {}".format(len(fuzzy_trans))) print("Total\t\t\t: {}".format(total))
def main(args): os.chdir("..") # setup pot_file_path = LOCALE_PATH_TMPL.format(lang="en_US") po_file_path = LOCALE_PATH_TMPL.format(lang=args.language) if not os.path.exists(pot_file_path): perror("Failed to locate POT file, exiting...", pot_file_path) if not os.path.exists(po_file_path): perror("Failed to locate PO file, exiting...", po_file_path) pot_file = polib.pofile(pot_file_path) po_file = polib.pofile(po_file_path) # check headers pinfo("Checking PO headers") pot_headers = pot_file.metadata po_headers = po_file.metadata if pot_headers["Project-Id-Version"] != po_headers["Project-Id-Version"]: pwarn("'Project-Id-Version' headers do not match", exit=args.werror) if pot_headers["POT-Creation-Date"] != po_headers["POT-Creation-Date"]: pwarn("'POT-Creation-Date' headers do not match", exit=args.werror) po_creation_date = parse_date(po_headers["POT-Creation-Date"]) po_revision_date = parse_date(po_headers["PO-Revision-Date"]) # Aware datetimes convert to UTC automatically when comparing if po_revision_date <= po_creation_date: pwarn("PO file seems outdated", exit=args.werror) if "Language" in po_headers and po_headers["Language"] != args.language: pwarn("'Language' header does not match with the given language", po_headers["Language"], args.werror) pinfo("Last-Translator: {}".format(po_headers["Last-Translator"])) # check translations if args.only_headers: sys.exit(0) pinfo("Checking translations, this might take a while...") pot_msgid = [entry.msgid for entry in pot_file] po_msgid = [entry.msgid for entry in po_file] # lists to hold reports missing_msgid = [] not_translated = [] same_msgstr = [] with_typo = [] verify_trans = [] fuzzy_trans = po_file.fuzzy_entries() for msgid in pot_msgid: if msgid not in po_msgid: missing_msgid.append(msgid) # Init translator only if the '--no-translate' flag is NOT set translator = None if not args.no_translate: translator = google_translate.GoogleTranslator(timeout=5.0, retries=2, wait_time=WTIME) # Set source language for GoogleTranslator if args.tlang is not None: src_lang = args.tlang pinfo("Forcing '{}' as the translator's source language".format( src_lang)) else: # Get a valid source language for Google # for example convert 'ar_SA' to 'ar' or 'zh_CN' to 'zh-CN' src_lang = args.language if src_lang not in translator._lang_dict: src_lang = src_lang.replace("_", "-") if src_lang not in translator._lang_dict: src_lang = src_lang.split("-")[0] # Keep entries that need further analysis using the translator further_analysis = [] for entry in po_file: if not entry.translated(): not_translated.append(entry) elif entry.msgid == entry.msgstr: same_msgstr.append(entry) else: further_analysis.append(entry) if translator is not None and further_analysis: # eta = (items_to_analyze * (WTIME + avg_ms)) - WTIME # We subtract WTIME at the end because there is no wait for the last item on the list # avg_msg = 200ms eta_seconds = (len(further_analysis) * (WTIME + 0.2)) - WTIME eta_seconds = int(round(eta_seconds)) eta = timedelta(seconds=eta_seconds) pinfo("Approximate time to check translations online: {}".format(eta)) # Pass translations as a list since GoogleTranslator can handle them words_dict = translator.get_info_dict( [entry.msgstr for entry in further_analysis], "en", src_lang) for index, word_dict in enumerate(words_dict): # Get the corresponding POEntry since the words_dict does not contain those entry = further_analysis[index] if word_dict is not None: if word_dict["has_typo"]: with_typo.append(entry) if word_dict["translation"].lower() != entry.msgid.lower(): found = False # Check verbs, nouns, adverbs, etc.. for key in word_dict["extra"]: if entry.msgid.lower() in word_dict["extra"][key].keys( ): found = True break if not found: verify_trans.append((entry, word_dict["translation"])) # time to report print("=" * 25 + "Report" + "=" * 25) if missing_msgid: print("Missing msgids") for msgid in missing_msgid: print(" \"{}\"".format(msgid)) if not_translated: print("Not translated") for entry in not_translated: print(" line: {} msgid: \"{}\"".format(entry.linenum, entry.msgid)) if same_msgstr: print("Same msgstr") for entry in same_msgstr: print(" line: {} msgid: \"{}\"".format(entry.linenum, entry.msgid)) if with_typo: print("With typo") for entry in with_typo: print(" line: {} msgid: \"{}\" msgstr: \"{}\"".format( entry.linenum, entry.msgid, entry.msgstr)) if verify_trans: print("Verify translation") for item in verify_trans: entry, translation = item print(" line: {} msgid: \"{}\" trans: \"{}\"".format( entry.linenum, entry.msgid, translation)) if fuzzy_trans: print("Fuzzy translations") for entry in fuzzy_trans: print(" line: {} msgid: \"{}\"".format(entry.linenum, entry.msgid)) total = len(missing_msgid) + len(not_translated) + len(same_msgstr) + len( with_typo) + len(verify_trans) + len(fuzzy_trans) print("") print("Missing msgids\t\t: {}".format(len(missing_msgid))) print("Not translated\t\t: {}".format(len(not_translated))) print("Same msgstr\t\t: {}".format(len(same_msgstr))) print("With typo\t\t: {}".format(len(with_typo))) print("Verify translation\t: {}".format(len(verify_trans))) print("Fuzzy translations\t: {}".format(len(fuzzy_trans))) print("Total\t\t\t: {}".format(total)) print("") print("Total entries\t\t: {}".format(len(po_file)))
s_time = 1474369886 one_day = 60 * 60 * 24 terminate_time = s_time - one_day endtime = s_time while endtime > terminate_time: pe = "tweetid,epoch,#s" ean = {"#s": "status"} response = table.scan(FilterExpression=Attr('epoch').between( endtime - 200, endtime), ProjectionExpression=pe, ExpressionAttributeNames=ean) endtime -= 201 items = response['Items'] #print(len(items)) translator = google_translate.GoogleTranslator() keyw = {} for item in items: tid = item["tweetid"] input_s = item["status"] lang = translator.detect(input_s) trans = False if (lang != None): # remove hashtag for translation convenience status = input_s.replace("#", "") if (lang != "english"): # print(status) trans = True status = translator.translate(input_s, "english")