Esempio n. 1
0
    def translate_data(self):
        translator = google_translate.GoogleTranslator()
        for k, v in self.json_data.items():
            # print (v)
            input_s = v["status"]
            lang = translator.detect(input_s)
            trans = False
            self.json_data[k]["location"] = {}
            self.json_data[k]["location"]["lat"] = self.json_data[k][
                "latitude"]
            self.json_data[k]["location"]["lon"] = self.json_data[k][
                "longitude"]

            if (lang != None):

                # remove hashtag for translation convenience
                status = input_s.replace("#", "")
                self.json_data[k]["lang"] = lang
                if (lang != "english"):
                    trans = True
                    status = translator.translate(input_s, "english")
                    self.json_data[k]["status_en"] = status

                if (status != None):
                    self.get_sentiment(k, status)

        res = {}
        res["action"] = KEY
        res["data"] = self.json_data
        return json.dumps(res)
Esempio n. 2
0
def translate(query):
    translator = google_translate.GoogleTranslator()
    result = translator.translate(query.encode('utf-8'), "german")
    return result
def main(args):
    os.chdir("..")

    # setup
    pot_file_path = LOCALE_PATH_TMPL.format(lang="en_US")
    po_file_path = LOCALE_PATH_TMPL.format(lang=args.language)

    if not os.path.exists(pot_file_path):
        perror("Failed to locate POT file, exiting...", pot_file_path)

    if not os.path.exists(po_file_path):
        perror("Failed to locate PO file, exiting...", po_file_path)

    pot_file = polib.pofile(pot_file_path)
    po_file = polib.pofile(po_file_path)

    # check headers
    pinfo("Checking PO headers")

    pot_headers = pot_file.metadata
    po_headers = po_file.metadata

    if pot_headers["Project-Id-Version"] != po_headers["Project-Id-Version"]:
        pwarn("'Project-Id-Version' headers do not match", exit=args.werror)

    if pot_headers["POT-Creation-Date"] != po_headers["POT-Creation-Date"]:
        pwarn("'POT-Creation-Date' headers do not match", exit=args.werror)

    po_creation_date = parse_date(po_headers["POT-Creation-Date"])
    po_revision_date = parse_date(po_headers["PO-Revision-Date"])

    # Aware datetimes convert to UTC automatically when comparing
    if po_revision_date <= po_creation_date:
        pwarn("PO file seems outdated", exit=args.werror)

    if "Language" in po_headers and po_headers["Language"] != args.language:
        pwarn("'Language' header does not match with the given language",
              po_headers["Language"], args.werror)

    pinfo("Last-Translator: {}".format(po_headers["Last-Translator"]))

    # check translations
    if args.only_headers:
        sys.exit(0)

    pinfo("Checking translations, this might take a while...")

    eta = timedelta(seconds=len(pot_file) * WTIME)

    pinfo("Approximate time to check translations online: {}".format(eta))

    pot_msgid = [entry.msgid for entry in pot_file]
    po_msgid = [entry.msgid for entry in po_file]

    # lists to hold reports
    missing_msgid = []
    not_translated = []
    same_msgstr = []
    with_typo = []
    verify_trans = []
    fuzzy_trans = po_file.fuzzy_entries()

    for msgid in pot_msgid:
        if msgid not in po_msgid:
            missing_msgid.append(msgid)

    translator = google_translate.GoogleTranslator(timeout=5.0,
                                                   retries=2,
                                                   wait_time=WTIME)

    # Get a valid source language for Google
    # for example convert 'ar_SA' to 'ar' or 'zh_CN' to 'zh-CN'
    src_lang = args.language

    if src_lang not in translator._lang_dict:
        src_lang = src_lang.replace("_", "-")

        if src_lang not in translator._lang_dict:
            src_lang = src_lang.split("-")[0]

    for entry in po_file:
        if not entry.translated():
            not_translated.append(entry)

        elif entry.msgid == entry.msgstr:
            same_msgstr.append(entry)

        else:
            if args.no_translate:
                continue

            word_dict = translator.get_info_dict(entry.msgstr, "en", src_lang)

            if word_dict is not None:
                if word_dict["has_typo"]:
                    with_typo.append(entry)

                if word_dict["translation"].lower() != entry.msgid.lower():

                    found = False

                    # Check verbs, nouns, adverbs, etc..
                    for key in word_dict["extra"]:
                        if entry.msgid.lower() in word_dict["extra"][key].keys(
                        ):
                            found = True
                            break

                    if not found:
                        verify_trans.append((entry, word_dict["translation"]))

            sleep(WTIME)

    # time to report
    print("=" * 25 + "Report" + "=" * 25)

    if missing_msgid:
        print("Missing msgids")

        for msgid in missing_msgid:
            print("  \"{}\"".format(msgid))

    if not_translated:
        print("Not translated")

        for entry in not_translated:
            print("  line: {} msgid: \"{}\"".format(entry.linenum,
                                                    entry.msgid))

    if same_msgstr:
        print("Same msgstr")

        for entry in same_msgstr:
            print("  line: {} msgid: \"{}\"".format(entry.linenum,
                                                    entry.msgid))

    if with_typo:
        print("With typo")

        for entry in with_typo:
            print("  line: {} msgid: \"{}\" msgstr: \"{}\"".format(
                entry.linenum, entry.msgid, entry.msgstr))

    if verify_trans:
        print("Verify translation")

        for item in verify_trans:
            entry, translation = item
            print("  line: {} msgid: \"{}\" trans: \"{}\"".format(
                entry.linenum, entry.msgid, translation))

    if fuzzy_trans:
        print("Fuzzy translations")

        for entry in fuzzy_trans:
            print("  line: {} msgid: \"{}\"".format(entry.linenum,
                                                    entry.msgid))

    total = len(missing_msgid) + len(not_translated) + len(same_msgstr) + len(
        with_typo) + len(verify_trans) + len(fuzzy_trans)

    print("")
    print("Missing msgids\t\t: {}".format(len(missing_msgid)))
    print("Not translated\t\t: {}".format(len(not_translated)))
    print("Same msgstr\t\t: {}".format(len(same_msgstr)))
    print("With typo\t\t: {}".format(len(with_typo)))
    print("Verify translation\t: {}".format(len(verify_trans)))
    print("Fuzzy translations\t: {}".format(len(fuzzy_trans)))
    print("Total\t\t\t: {}".format(total))
Esempio n. 4
0
def main(args):
    os.chdir("..")

    # setup
    pot_file_path = LOCALE_PATH_TMPL.format(lang="en_US")
    po_file_path = LOCALE_PATH_TMPL.format(lang=args.language)

    if not os.path.exists(pot_file_path):
        perror("Failed to locate POT file, exiting...", pot_file_path)

    if not os.path.exists(po_file_path):
        perror("Failed to locate PO file, exiting...", po_file_path)

    pot_file = polib.pofile(pot_file_path)
    po_file = polib.pofile(po_file_path)

    # check headers
    pinfo("Checking PO headers")

    pot_headers = pot_file.metadata
    po_headers = po_file.metadata

    if pot_headers["Project-Id-Version"] != po_headers["Project-Id-Version"]:
        pwarn("'Project-Id-Version' headers do not match", exit=args.werror)

    if pot_headers["POT-Creation-Date"] != po_headers["POT-Creation-Date"]:
        pwarn("'POT-Creation-Date' headers do not match", exit=args.werror)

    po_creation_date = parse_date(po_headers["POT-Creation-Date"])
    po_revision_date = parse_date(po_headers["PO-Revision-Date"])

    # Aware datetimes convert to UTC automatically when comparing
    if po_revision_date <= po_creation_date:
        pwarn("PO file seems outdated", exit=args.werror)

    if "Language" in po_headers and po_headers["Language"] != args.language:
        pwarn("'Language' header does not match with the given language",
              po_headers["Language"], args.werror)

    pinfo("Last-Translator: {}".format(po_headers["Last-Translator"]))

    # check translations
    if args.only_headers:
        sys.exit(0)

    pinfo("Checking translations, this might take a while...")

    pot_msgid = [entry.msgid for entry in pot_file]
    po_msgid = [entry.msgid for entry in po_file]

    # lists to hold reports
    missing_msgid = []
    not_translated = []
    same_msgstr = []
    with_typo = []
    verify_trans = []
    fuzzy_trans = po_file.fuzzy_entries()

    for msgid in pot_msgid:
        if msgid not in po_msgid:
            missing_msgid.append(msgid)

    # Init translator only if the '--no-translate' flag is NOT set
    translator = None
    if not args.no_translate:
        translator = google_translate.GoogleTranslator(timeout=5.0,
                                                       retries=2,
                                                       wait_time=WTIME)

        # Set source language for GoogleTranslator
        if args.tlang is not None:
            src_lang = args.tlang
            pinfo("Forcing '{}' as the translator's source language".format(
                src_lang))
        else:
            # Get a valid source language for Google
            # for example convert 'ar_SA' to 'ar' or 'zh_CN' to 'zh-CN'
            src_lang = args.language

            if src_lang not in translator._lang_dict:
                src_lang = src_lang.replace("_", "-")

                if src_lang not in translator._lang_dict:
                    src_lang = src_lang.split("-")[0]

    # Keep entries that need further analysis using the translator
    further_analysis = []

    for entry in po_file:
        if not entry.translated():
            not_translated.append(entry)

        elif entry.msgid == entry.msgstr:
            same_msgstr.append(entry)

        else:
            further_analysis.append(entry)

    if translator is not None and further_analysis:
        # eta = (items_to_analyze * (WTIME + avg_ms)) - WTIME
        # We subtract WTIME at the end because there is no wait for the last item on the list
        # avg_msg = 200ms
        eta_seconds = (len(further_analysis) * (WTIME + 0.2)) - WTIME
        eta_seconds = int(round(eta_seconds))

        eta = timedelta(seconds=eta_seconds)
        pinfo("Approximate time to check translations online: {}".format(eta))

        # Pass translations as a list since GoogleTranslator can handle them
        words_dict = translator.get_info_dict(
            [entry.msgstr for entry in further_analysis], "en", src_lang)

        for index, word_dict in enumerate(words_dict):
            # Get the corresponding POEntry since the words_dict does not contain those
            entry = further_analysis[index]

            if word_dict is not None:
                if word_dict["has_typo"]:
                    with_typo.append(entry)

                if word_dict["translation"].lower() != entry.msgid.lower():

                    found = False

                    # Check verbs, nouns, adverbs, etc..
                    for key in word_dict["extra"]:
                        if entry.msgid.lower() in word_dict["extra"][key].keys(
                        ):
                            found = True
                            break

                    if not found:
                        verify_trans.append((entry, word_dict["translation"]))

    # time to report
    print("=" * 25 + "Report" + "=" * 25)

    if missing_msgid:
        print("Missing msgids")

        for msgid in missing_msgid:
            print("  \"{}\"".format(msgid))

    if not_translated:
        print("Not translated")

        for entry in not_translated:
            print("  line: {} msgid: \"{}\"".format(entry.linenum,
                                                    entry.msgid))

    if same_msgstr:
        print("Same msgstr")

        for entry in same_msgstr:
            print("  line: {} msgid: \"{}\"".format(entry.linenum,
                                                    entry.msgid))

    if with_typo:
        print("With typo")

        for entry in with_typo:
            print("  line: {} msgid: \"{}\" msgstr: \"{}\"".format(
                entry.linenum, entry.msgid, entry.msgstr))

    if verify_trans:
        print("Verify translation")

        for item in verify_trans:
            entry, translation = item
            print("  line: {} msgid: \"{}\" trans: \"{}\"".format(
                entry.linenum, entry.msgid, translation))

    if fuzzy_trans:
        print("Fuzzy translations")

        for entry in fuzzy_trans:
            print("  line: {} msgid: \"{}\"".format(entry.linenum,
                                                    entry.msgid))

    total = len(missing_msgid) + len(not_translated) + len(same_msgstr) + len(
        with_typo) + len(verify_trans) + len(fuzzy_trans)

    print("")
    print("Missing msgids\t\t: {}".format(len(missing_msgid)))
    print("Not translated\t\t: {}".format(len(not_translated)))
    print("Same msgstr\t\t: {}".format(len(same_msgstr)))
    print("With typo\t\t: {}".format(len(with_typo)))
    print("Verify translation\t: {}".format(len(verify_trans)))
    print("Fuzzy translations\t: {}".format(len(fuzzy_trans)))
    print("Total\t\t\t: {}".format(total))
    print("")
    print("Total entries\t\t: {}".format(len(po_file)))
s_time = 1474369886
one_day = 60 * 60 * 24
terminate_time = s_time - one_day
endtime = s_time
while endtime > terminate_time:

    pe = "tweetid,epoch,#s"
    ean = {"#s": "status"}
    response = table.scan(FilterExpression=Attr('epoch').between(
        endtime - 200, endtime),
                          ProjectionExpression=pe,
                          ExpressionAttributeNames=ean)
    endtime -= 201
    items = response['Items']
    #print(len(items))
    translator = google_translate.GoogleTranslator()
    keyw = {}
    for item in items:
        tid = item["tweetid"]
        input_s = item["status"]
        lang = translator.detect(input_s)
        trans = False

        if (lang != None):

            # remove hashtag for translation convenience
            status = input_s.replace("#", "")
            if (lang != "english"):
                # print(status)
                trans = True
                status = translator.translate(input_s, "english")