Exemple #1
0
def main():
    """Checks whether we are in the correct directory and everything's there,
    then collects glossary entries from all PO files and writes a new glossary
    file.

    Output is restricted to source terms that are already in the
    glossary.
    """
    if len(sys.argv) == 3 or len(sys.argv) == 4:
        print('Generating glossary:')
    else:
        print(
            'Usage: generate_glossary.py <input-glossary> <output-glossary> [locale]'
        )
        return 1

    try:
        print('Current time: %s' % time.ctime())
        # Prepare the paths
        input_glossary = os.path.abspath(
            os.path.join(os.path.dirname(__file__), sys.argv[1]))
        output_glossary = os.path.abspath(
            os.path.join(os.path.dirname(__file__), sys.argv[2]))
        locale = 'all'
        if len(sys.argv) == 4:
            locale = sys.argv[3]

        if (not (os.path.exists(input_glossary)
                 and os.path.isfile(input_glossary))):
            print('There is no glossary file at ' + input_glossary)
            return 1

        po_dir = os.path.abspath(
            os.path.join(os.path.dirname(__file__), '../po'))
        output_path = make_path(os.path.dirname(__file__), '../po_validation')
        result = generate_glossary(po_dir, output_path, input_glossary,
                                   output_glossary, locale)
        print('Current time: %s' % time.ctime())
        return result

    except Exception:
        print('Something went wrong:')
        traceback.print_exc()
        delete_path(make_path(output_path, 'temp_glossary'))
        return 1
def main():
    """Checks whether we are in the correct directory and everything's there,
    then collects glossary entries from all PO files and writes a new glossary
    file.

    Output is restricted to source terms that are already in the
    glossary.
    """
    if len(sys.argv) == 3 or len(sys.argv) == 4:
        print('Generating glossary:')
    else:
        print(
            'Usage: generate_glossary.py <input-glossary> <output-glossary> [locale]')
        return 1

    try:
        print('Current time: %s' % time.ctime())
        # Prepare the paths
        input_glossary = os.path.abspath(os.path.join(
            os.path.dirname(__file__), sys.argv[1]))
        output_glossary = os.path.abspath(os.path.join(
            os.path.dirname(__file__), sys.argv[2]))
        locale = 'all'
        if len(sys.argv) == 4:
            locale = sys.argv[3]

        if (not (os.path.exists(input_glossary) and os.path.isfile(input_glossary))):
            print('There is no glossary file at ' + input_glossary)
            return 1

        po_dir = os.path.abspath(os.path.join(
            os.path.dirname(__file__), '../po'))
        output_path = make_path(os.path.dirname(__file__), '../po_validation')
        result = generate_glossary(
            po_dir, output_path, input_glossary, output_glossary, locale)
        print('Current time: %s' % time.ctime())
        return result

    except Exception:
        print('Something went wrong:')
        traceback.print_exc()
        delete_path(make_path(output_path, 'temp_glossary'))
        return 1
Exemple #3
0
def main():
    """Checks whether we are in the correct directory and everything's there,
    then runs a glossary check over all PO files."""
    if len(sys.argv) == 2 or len(sys.argv) == 3:
        print('Running glossary checks:')
    else:
        print(
            'Usage: glossary_checks.py <relative-path-to-glossary> [locale]')
        return 1

    try:
        print('Current time: %s' % time.ctime())
        # Prepare the paths
        glossary_file = os.path.abspath(os.path.join(
            os.path.dirname(__file__), sys.argv[1]))
        locale = 'all'
        if len(sys.argv) == 3:
            locale = sys.argv[2]

        if (not (os.path.exists(glossary_file) and os.path.isfile(glossary_file))):
            print('There is no glossary file at ' + glossary_file)
            return 1

        input_path = os.path.abspath(os.path.join(
            os.path.dirname(__file__), '../po'))
        output_path = make_path(os.path.dirname(
            __file__), '../po_validation/translators')
        result = check_translations_with_glossary(
            input_path, output_path, glossary_file, locale)
        print('Current time: %s' % time.ctime())
        return result

    except Exception:
        print('Something went wrong:')
        traceback.print_exc()
        delete_path(make_path(output_path, 'temp_glossary'))
        return 1
def main():
    """Checks whether we are in the correct directory and everything's there,
    then runs a glossary check over all PO files."""
    if len(sys.argv) == 2 or len(sys.argv) == 3:
        print('Running glossary checks:')
    else:
        print('Usage: glossary_checks.py <relative-path-to-glossary> [locale]')
        return 1

    try:
        print('Current time: %s' % time.ctime())
        # Prepare the paths
        glossary_file = os.path.abspath(
            os.path.join(os.path.dirname(__file__), sys.argv[1]))
        locale = 'all'
        if len(sys.argv) == 3:
            locale = sys.argv[2]

        if not (os.path.exists(glossary_file)
                and os.path.isfile(glossary_file)):
            print('There is no glossary file at ' + glossary_file)
            return 1

        input_path = os.path.abspath(
            os.path.join(os.path.dirname(__file__), '../po'))
        output_path = make_path(os.path.dirname(__file__),
                                '../po_validation/translators')
        result = check_translations_with_glossary(input_path, output_path,
                                                  glossary_file, locale)
        print('Current time: %s' % time.ctime())
        return result

    except Exception:
        print('Something went wrong:')
        traceback.print_exc()
        delete_path(make_path(output_path, 'temp_glossary'))
        return 1
Exemple #5
0
def generate_glossary(po_dir, output_path, input_glossary, output_glossary,
                      only_locale):
    """Main loop.

    Uses poterminology from the Translate Toolkit to collect glossary entries for all files in 'po_dir' for the given 'only_locale'. If 'only_locale' = "all", processes all locales. Then reads the <input_glossary>, adds new entries that were obtained by the glossary generation if there are any gaps, and then writes the results to <output_glossary>.
    """

    # Find the locale files to process
    print('Locale: ' + only_locale)
    locales = []
    glossaries = defaultdict(list)

    if only_locale != 'all':
        locales.append(only_locale)
    else:
        # Get locales from the Transifex glossary file
        header_row = read_csv_file(input_glossary)[0]
        regex = re.compile('^(translation_)(.+)$')
        for header in header_row:
            match = regex.match(header)
            if match:
                locales.append(match.group(2))

    temp_path = make_path(output_path, 'temp_glossary')

    for locale in locales:
        print('Processing locale: ' + locale)
        # Generate the pot glossary
        input_path = po_dir + '/*/' + locale + '.po'
        pot_path = os.path.join(temp_path, 'glossary_' + locale + '.po')

        try:
            # We need shell=True for the wildcards.
            poterminology_result = check_output(
                ['poterminology ' + input_path + ' -o ' + pot_path],
                stderr=subprocess.STDOUT,
                shell=True)
            if 'Error' in poterminology_result:
                print('Error running poterminology:\n  FILE: ' + input_path +
                      '\n  OUTPUT PATH: ' + output_path + '\n  ' +
                      poterminology_result.split('\n', 1)[1])
                return False

        except CalledProcessError:
            print('Failed to run poterminology:\n  FILE: ' + input_path +
                  '\n  OUTPUT PATH: ' + output_path + '\n  ' +
                  poterminology_result.split('\n', 1)[1])
            return False

        # Convert to csv for easy parsing
        csv_file = os.path.join(temp_path, 'glossary_' + locale + '.csv')
        call(['po2csv', '--progress=none', pot_path, csv_file])
        # The po file is no longer needed, delete it.
        os.remove(pot_path)

        transifex_glossary = load_transifex_glossary(input_glossary, locale)
        extracted_glossary = load_extracted_glossary(csv_file, locale)

        # Add generated translation if necessary
        for key in transifex_glossary.keys():
            if transifex_glossary[
                    key].translation == '' and extracted_glossary.has_key(key):
                extracted_entry = extracted_glossary[key]
                if extracted_entry.translation != '':
                    transifex_entry = transifex_glossary[key]
                    transifex_entry.translation = extracted_entry.translation
                    transifex_entry.translation_comment = 'AUTOGENERATED - PLEASE PROOFREAD!'
                    transifex_glossary[key] = transifex_entry
        glossaries[locale] = transifex_glossary

    # Now collect the date for the global csv file
    # Write header
    print('Writing results to ' + output_glossary)
    result = 'term,pos,comment,'
    for locale in locales:
        result = result + 'translation_' + locale + ','
        result = result + 'comment_' + locale + ','
    result = result[0:-1] + '\n'

    source_terms = load_transifex_source_terms(input_glossary)
    # Collect all translations for each source term
    for key in source_terms:
        result = result + '"%s","%s","%s",' % (source_terms[key].term.replace(
            '"', '""'), source_terms[key].wordclass.replace(
                '"', '""'), source_terms[key].term_comment.replace('"', '""'))
        for locale in locales:
            glossary = glossaries[locale]
            translation = ''
            translation_comment = ''
            if glossary.has_key(key):
                translation = glossary[key].translation.replace('"', '""')
                translation_comment = glossary[
                    key].translation_comment.replace('"', '""')
            result = result + \
                '"%s","%s",' % (translation, translation_comment)
        result = result[0:-1] + '\n'

    # Now write the file.
    with open(output_glossary, 'wt') as dest_file:
        dest_file.write(result)

    # Cleanup.
    delete_path(temp_path)
    if not os.listdir(output_path):
        os.rmdir(output_path)
    print('Done.')
    return 0
Exemple #6
0
def check_translations_with_glossary(input_path, output_path, glossary_file, only_locale):
    """Main loop.

    Loads the Transifex and Hunspell glossaries, converts all po files
    for languages that have glossary entries to temporary csv files,
    runs the check and then reports any hits to csv files.
    """
    print('Locale: ' + only_locale)
    temp_path = make_path(output_path, 'temp_glossary')
    hits = []
    locale_list = defaultdict(list)

    glossaries = defaultdict(list)
    load_hunspell_locales(only_locale)

    source_directories = sorted(os.listdir(input_path), key=str.lower)
    for dirname in source_directories:
        dirpath = os.path.join(input_path, dirname)
        if os.path.isdir(dirpath):
            source_files = sorted(os.listdir(dirpath), key=str.lower)
            sys.stdout.write("\nChecking text domain '" + dirname + "': ")
            sys.stdout.flush()
            failed = 0
            for source_filename in source_files:
                po_file = dirpath + '/' + source_filename
                if source_filename.endswith('.po'):
                    locale = source_filename[0:-3]
                    if only_locale == 'all' or locale == only_locale:
                        # Load the glossary if we haven't seen this locale
                        # before
                        if len(glossaries[locale]) < 1:
                            sys.stdout.write(
                                '\nLoading glossary for ' + locale)
                            glossaries[locale].append(
                                load_glossary(glossary_file, locale))
                            sys.stdout.write(' - %d entries ' %
                                             len(glossaries[locale][0]))
                            sys.stdout.flush()
                        # Only bother with locales that have glossary entries
                        if len(glossaries[locale][0]) > 0:
                            sys.stdout.write(locale + ' ')
                            sys.stdout.flush()
                            if len(locale_list[locale]) < 1:
                                locale_list[locale].append(locale)
                            csv_file = os.path.abspath(os.path.join(
                                temp_path, dirname + '_' + locale + '.csv'))
                            # Convert to csv for easy parsing
                            call(['po2csv', '--progress=none', po_file, csv_file])

                            # Now run the actual check
                            current_hits = check_file(
                                csv_file, glossaries, locale, dirname)
                            for hit in current_hits:
                                hits.append(hit)

                            # The csv file is no longer needed, delete it.
                            os.remove(csv_file)

    hits = sorted(hits, key=lambda FailedTranslation: [
                  FailedTranslation.locale, FailedTranslation.translation])
    for locale in locale_list:
        locale_result = '"glossary_term","glossary_translation","source","target","file","location"\n'
        counter = 0
        for hit in hits:
            if hit.locale == locale:
                row = '"%s","%s","%s","%s","%s","%s"\n' % (
                    hit.term, hit.translation, hit.source, hit.target, hit.po_file, hit.location)
                locale_result = locale_result + row
                counter = counter + 1
        dest_filepath = make_path(output_path, locale)
        with open(dest_filepath + '/glossary_check.csv', 'wt') as dest_file:
            dest_file.write(locale_result)
        # Uncomment this line to print a statistic of the number of hits for each locale
        # print("%s\t%d"%(locale, counter))

    delete_path(temp_path)
    return 0
def generate_glossary(po_dir, output_path, input_glossary, output_glossary, only_locale):
    """Main loop.

    Uses poterminology from the Translate Toolkit to collect glossary entries for all files in 'po_dir' for the given 'only_locale'. If 'only_locale' = "all", processes all locales. Then reads the <input_glossary>, adds new entries that were obtained by the glossary generation if there are any gaps, and then writes the results to <output_glossary>.
    """

    # Find the locale files to process
    print('Locale: ' + only_locale)
    locales = []
    glossaries = defaultdict(list)

    if only_locale != 'all':
        locales.append(only_locale)
    else:
        # Get locales from the Transifex glossary file
        header_row = read_csv_file(input_glossary)[0]
        regex = re.compile('^(translation_)(.+)$')
        for header in header_row:
            match = regex.match(header)
            if match:
                locales.append(match.group(2))

    temp_path = make_path(output_path, 'temp_glossary')

    for locale in locales:
        print('Processing locale: ' + locale)
        # Generate the pot glossary
        input_path = po_dir + '/*/' + locale + '.po'
        pot_path = os.path.join(temp_path, 'glossary_' + locale + '.po')

        try:
            # We need shell=True for the wildcards.
            poterminology_result = check_output(
                ['poterminology ' + input_path + ' -o ' + pot_path], stderr=subprocess.STDOUT, shell=True)
            if 'Error' in poterminology_result:
                print('Error running poterminology:\n  FILE: ' + input_path + '\n  OUTPUT PATH: ' +
                      output_path + '\n  ' + poterminology_result.split('\n', 1)[1])
                return False

        except CalledProcessError:
            print('Failed to run poterminology:\n  FILE: ' + input_path + '\n  OUTPUT PATH: ' +
                  output_path + '\n  ' + poterminology_result.split('\n', 1)[1])
            return False

        # Convert to csv for easy parsing
        csv_file = os.path.join(temp_path, 'glossary_' + locale + '.csv')
        call(['po2csv', '--progress=none', pot_path, csv_file])
        # The po file is no longer needed, delete it.
        os.remove(pot_path)

        transifex_glossary = load_transifex_glossary(input_glossary, locale)
        extracted_glossary = load_extracted_glossary(csv_file, locale)

        # Add generated translation if necessary
        for key in transifex_glossary.keys():
            if transifex_glossary[key].translation == '' and extracted_glossary.has_key(key):
                extracted_entry = extracted_glossary[key]
                if extracted_entry.translation != '':
                    transifex_entry = transifex_glossary[key]
                    transifex_entry.translation = extracted_entry.translation
                    transifex_entry.translation_comment = 'AUTOGENERATED - PLEASE PROOFREAD!'
                    transifex_glossary[key] = transifex_entry
        glossaries[locale] = transifex_glossary

    # Now collect the date for the global csv file
    # Write header
    print('Writing results to ' + output_glossary)
    result = 'term,pos,comment,'
    for locale in locales:
        result = result + 'translation_' + locale + ','
        result = result + 'comment_' + locale + ','
    result = result[0:-1] + '\n'

    source_terms = load_transifex_source_terms(input_glossary)
    # Collect all translations for each source term
    for key in source_terms:
        result = result + '"%s","%s","%s",' % (source_terms[key].term.replace('"', '""'), source_terms[
                                               key].wordclass.replace('"', '""'), source_terms[key].term_comment.replace('"', '""'))
        for locale in locales:
            glossary = glossaries[locale]
            translation = ''
            translation_comment = ''
            if glossary.has_key(key):
                translation = glossary[key].translation.replace('"', '""')
                translation_comment = glossary[
                    key].translation_comment.replace('"', '""')
            result = result + \
                '"%s","%s",' % (translation, translation_comment)
        result = result[0:-1] + '\n'

    # Now write the file.
    with open(output_glossary, 'wt') as dest_file:
        dest_file.write(result)

    # Cleanup.
    delete_path(temp_path)
    if not os.listdir(output_path):
        os.rmdir(output_path)
    print('Done.')
    return 0
def check_translations_with_glossary(input_path, output_path, glossary_file,
                                     only_locale):
    """Main loop.

    Loads the Transifex and Hunspell glossaries, converts all po files
    for languages that have glossary entries to temporary csv files,
    runs the check and then reports any hits to csv files.
    """
    print('Locale: ' + only_locale)
    temp_path = make_path(output_path, 'temp_glossary')
    hits = []
    locale_list = []

    glossaries = {}
    load_hunspell_locales()

    source_directories = sorted(os.listdir(input_path), key=str.lower)
    for dirname in source_directories:
        dirpath = os.path.join(input_path, dirname)
        if os.path.isdir(dirpath):
            source_files = sorted(os.listdir(dirpath), key=str.lower)
            sys.stdout.write("\nChecking text domain '" + dirname + "': ")
            sys.stdout.flush()
            for source_filename in source_files:
                po_file = dirpath + '/' + source_filename
                if source_filename.endswith('.po'):
                    locale = source_filename[0:-3]
                    if only_locale in ('all', locale):
                        # Load the glossary if we haven't seen this locale
                        # before
                        if locale not in glossaries:
                            sys.stdout.write('\nLoading glossary for ' +
                                             locale)
                            glossaries[locale] = load_glossary(
                                glossary_file, locale)
                            sys.stdout.write(' - %d entries ' %
                                             len(glossaries[locale]))
                            sys.stdout.flush()
                        # Only bother with locales that have glossary entries
                        if len(glossaries[locale]) > 0:
                            sys.stdout.write(locale + ' ')
                            sys.stdout.flush()
                            if locale not in locale_list:
                                locale_list.append(locale)
                            csv_file = os.path.abspath(
                                os.path.join(temp_path,
                                             dirname + '_' + locale + '.csv'))
                            # Convert to csv for easy parsing
                            try:
                                subprocess.run([
                                    'po2csv', '--progress=none', po_file,
                                    csv_file
                                ],
                                               check=True)

                                # Now run the actual check
                                current_hits = check_file(
                                    csv_file, glossaries, locale, dirname)
                                for hit in current_hits:
                                    hits.append(hit)
                            except subprocess.CalledProcessError:
                                print('Error with po2csv for:', po_file,
                                      csv_file)

                            # The csv file is no longer needed, delete it.
                            os.remove(csv_file)

    hits = sorted(hits,
                  key=lambda FailedTranslation:
                  [FailedTranslation.locale, FailedTranslation.translation])
    for locale in locale_list:
        locale_result = '"glossary_term","glossary_translation","source","target","file","location"\n'
        counter = 0
        for hit in hits:
            if hit.locale == locale:
                row = '"%s","%s","%s","%s","%s","%s"\n' % (
                    hit.term, hit.translation, hit.source, hit.target,
                    hit.po_file, hit.location)
                locale_result = locale_result + row
                counter = counter + 1
        dest_filepath = make_path(output_path, locale)
        with open(dest_filepath + '/glossary_check.csv', 'wt') as dest_file:
            dest_file.write(locale_result)
        # Uncomment this line to print a statistic of the number of hits for each locale
        # print("%s\t%d"%(locale, counter))

    delete_path(temp_path)
    return 0