Beispiel #1
0
def get_orphaned_files(repository, update_result):
    new_files, all_files, file_info = update_result

    crowdin_files = [
        key for key, value in file_info.items()
        if key.find(repository.crowdin.dest_folder) == 0 and 'id' in value
    ]

    local_files = [get_local_file(repository, key) for key in crowdin_files]

    deleted_files = [
        file for file in local_files
        if not os.path.isfile('%s/%s' % (repository.github.git_root, file))
    ]

    crowdin_rename_candidates = defaultdict(list)

    for file in crowdin_files:
        crowdin_rename_candidates[file[file.rfind('/') + 1:]].append(file)

    deleted_file_candidates = {
        get_crowdin_file(repository, file): [
            candidate
            for candidate in crowdin_rename_candidates[file[file.rfind('/') +
                                                            1:]]
            if candidate != get_crowdin_file(repository, file)
        ]
        for file in deleted_files
    }

    return [(key, value[0]) for key, value in deleted_file_candidates.items()
            if len(value) == 1]
Beispiel #2
0
def delete_code_translations(repository, file_name, file_info):
    crowdin_file_name = get_crowdin_file(repository, file_name)

    if crowdin_file_name not in file_info:
        return False

    logging.info('Checking auto code translations for file %s' % file_name)

    def is_within_code_tag(x):
        return x.text and x.text.find('[TOC') == 0 or \
            x.text and x.text.find('CVSS') != -1 and x.text.find('CVE') != -1 or \
            x.name == 'code' or x.name == 'pre' or \
            x.find_parent('code') is not None or x.find_parent('pre') is not None or \
            x.find_parent(attrs={'id': 'front-matter'}) is not None

    def is_rst_directive(x):
        return x.text and ( \
            x.text.find('====') != -1 or x.text.find('----') != -1 or \
            x.text == '..' or x.text.find('::') != -1 or x.text.find(':') == 0 or \
            x.text.find(':doc:') != -1 or x.text.find(':ref:') != -1 or \
            x.text.lower() == x.text or \
            x.text[-3:] == '.md' or x.text[-4:] == '.rst' \
        )

    def hide_translation(translation_id, response_data):
        if not response_data['translation']['hidden']:
            crowdin_http_request(repository,
                                 '/backend/translation/change_visibility',
                                 'GET',
                                 translation_id=translation_id,
                                 hidden=1)

    def always_true(x):
        return True

    def is_auto_translation(x):
        return x['user']['login'] == 'is-user'

    if file_name[-4:] == '.rst':
        has_suggestions = process_suggestions(repository, crowdin_file_name,
                                              file_info, is_rst_directive,
                                              hide_translation, always_true)
    else:
        has_suggestions = process_suggestions(repository, crowdin_file_name,
                                              file_info, is_within_code_tag,
                                              hide_translation,
                                              is_auto_translation)

    if not has_suggestions:
        return False

    target_file = 'ja/' + file_name[3:] if file_name[
        0:3] == 'en/' else file_name.replace('/en/', '/ja/')

    if os.path.exists(target_file):
        os.remove(target_file)

    return True
Beispiel #3
0
def get_file_ids(repository, files, file_info):
    candidate_files = {
        file: get_crowdin_file(repository, file)
        for file in files
    }

    return {
        file: file_info[crowdin_file]['id']
        for file, crowdin_file in candidate_files.items()
        if crowdin_file in file_info
    }
Beispiel #4
0
def get_crowdin_config_entry(repository, file):
    assert (not os.path.isdir(file))

    if file[0:3] == 'en/':
        translation = '%two_letters_code%/' + file[3:]
    else:
        translation = file.replace('/en/', '/%two_letters_code%/')

    dest = '/' + get_crowdin_file(repository, file)

    return {'source': file, 'dest': dest, 'translation': translation}
Beispiel #5
0
def pre_translate(repository, code_check_needed, translation_needed,
                  file_info):
    code_check_needed_file_ids = get_file_ids(repository, code_check_needed,
                                              file_info)

    for file, crowdin_file in sorted(code_check_needed_file_ids.items()):
        file_metadata = file_info[get_crowdin_file(repository, file)]

        if file_metadata['phrases'] != file_metadata['translated']:
            delete_code_translations(repository, file, file_info)

    translation_needed_file_ids = get_file_ids(repository, translation_needed,
                                               file_info)

    missing_phrases_files = {}

    for file, crowdin_file in sorted(translation_needed_file_ids.items()):
        file_metadata = file_info[get_crowdin_file(repository, file)]

        if file_metadata['phrases'] != file_metadata['translated']:
            missing_phrases_files[crowdin_file] = crowdin_file
            print(
                '%s (%s != %s)' %
                (file, file_metadata['phrases'], file_metadata['translated']))
        else:
            print(
                '%s (%s == %s)' %
                (file, file_metadata['phrases'], file_metadata['translated']))

    if len(missing_phrases_files) > 0:
        #translate_with_machine(repository, 'tm', missing_phrases_files)
        translate_with_machine(repository, 'deepl-translator',
                               missing_phrases_files)
        translate_with_machine(repository, 'google-translate',
                               missing_phrases_files)

    file_info = get_crowdin_file_info(repository)

    return get_crowdin_file_info(repository)
Beispiel #6
0
def get_issue_body(repository, folder, file_info):
    prefix = get_crowdin_file(repository, folder)

    matching_files = sorted([(key, metadata)
                             for key, metadata in file_info.items()
                             if key.find(prefix) == 0])

    if len(matching_files) == 0:
        return ''

    external_links = [
        get_issue_line(repository, prefix, key, metadata)
        for key, metadata in matching_files
    ]

    return '\n'.join(external_links).strip()
def add_mt_disclaimers(repository, file_info):
    now = datetime.now()

    os.chdir(repository.github.git_root)

    root_folder = repository.github.single_folder \
        if repository.github.single_folder is not None else repository.github.project_folder

    for file in get_eligible_files(repository, git.ls_files(root_folder), 'en'):
        crowdin_file = get_crowdin_file(repository, file)

        target_file = 'ja/' + file[3:] if file[0:3] == 'en/' else file.replace('/en/', '/ja/')

        if not os.path.isfile('%s/%s' % (repository.github.git_root, target_file)):
            if target_file[-9:] == '.markdown':
                target_file = target_file[:-9] + '.md'
            elif target_file[-3:] == '.md':
                target_file = target_file[:-3] + '.markdown'

            if not os.path.isfile('%s/%s' % (repository.github.git_root, target_file)):
                continue

        if crowdin_file not in file_info or file_info[crowdin_file]['translated'] == file_info[crowdin_file]['approved']:
            continue

        new_lines = []

        with open(target_file, 'r') as f:
            new_lines = [
                line for line in f.readlines()
                    if line.find('<p class="alert alert-info"><span class="wysiwyg-color-blue120">') == -1
            ]

        content = '%s\n%s' % (''.join(new_lines), disclaimer_text)

        with open(target_file, 'w') as f:
            f.write(content)

        git.add(target_file)

    git.commit('-m', 'Added machine translation disclaimer %s' % now.strftime("%Y-%m-%d %H:%M:%S"))

    os.chdir(initial_dir)
Beispiel #8
0
def update_translation_issues(repository, file_info):
    issues = []

    api_path = '/repos/%s/issues?milestone=%d' % \
        (repository.github.origin, get_milestone_map(repository)['human'])

    status_code, headers, issues = filter_github_request_all(api_path)

    target_column_map = {}

    issue_map = get_issue_map(repository)

    folder_map = {value: key for key, value in issue_map.items()}

    for issue in issues:
        issue_number = issue['number']

        if issue_number not in folder_map:
            continue

        folder = folder_map[issue_number]
        key = get_crowdin_file(repository, folder)

        if key in file_info:
            approved = file_info[key]['approved']
            translated = file_info[key]['translated']
            phrases = file_info[key]['phrases']

            if approved == phrases:
                target_column_map[folder] = 4
            elif approved > 0:
                target_column_map[folder] = 3
            elif translated == phrases:
                target_column_map[folder] = 2
            elif translated > 0:
                target_column_map[folder] = 1
            else:
                target_column_map[folder] = 0
        else:
            target_column_map[folder] = None

    update_issue_columns(repository, file_info, target_column_map)
Beispiel #9
0
def crowdin_download_translations(repository, refresh_files, file_info):
    updated_files = list(refresh_files)

    for file in refresh_files:
        crowdin_file = get_crowdin_file(repository, file)

        if crowdin_file not in file_info:
            continue

        metadata = file_info[crowdin_file]
        updated_files.append(file)

        target_file = 'ja/' + file[3:] if file[0:3] == 'en/' else file.replace(
            '/en/', '/ja/')

        if not os.path.isfile(target_file):
            updated_files.append(file)
            continue

    if len(updated_files) > 0:
        configure_crowdin(repository, updated_files)

        _crowdin('download', '-l', 'ja')
def generate_html_files(repository, file_info):
    now = datetime.now()

    os.chdir(repository.github.git_root)

    root_folder = repository.github.single_folder \
        if repository.github.single_folder is not None else repository.github.project_folder

    titles = {}

    for file in get_eligible_files(repository, git.ls_files(root_folder), 'en'):
        crowdin_file = get_crowdin_file(repository, file)

        target_file = 'ja/' + file[3:] if file[0:3] == 'en/' else file.replace('/en/', '/ja/')

        if not os.path.isfile('%s/%s' % (repository.github.git_root, target_file)):
            if target_file[-9:] == '.markdown':
                target_file = target_file[:-9] + '.md'
            elif target_file[-3:] == '.md':
                target_file = target_file[:-3] + '.markdown'

            if not os.path.isfile('%s/%s' % (repository.github.git_root, target_file)):
                continue

        html_file = target_file[:target_file.rfind('.')] + '.html'
        titles[html_file] = get_title(file)

        _pandoc(target_file, html_file, '--from=gfm', '--to=html')

        git.add(html_file)

    git.commit('-m', 'Updated pandoc conversion %s' % now.strftime("%Y-%m-%d %H:%M:%S"))

    os.chdir(initial_dir)

    return titles
def sync_articles(repository,
                  domain,
                  language,
                  articles,
                  article_paths,
                  refresh_articles=None,
                  refresh_paths=None):
    if refresh_articles is not None:
        logging.info('Updating translations for %d articles' %
                     len(refresh_paths))

        new_files, all_files, file_info = update_repository(
            repository, list(refresh_paths.values()), sync_sources=True)
    else:
        logging.info('Downloading latest translations for %d articles' %
                     len(article_paths))

        new_files, all_files, file_info = update_repository(
            repository, list(article_paths.values()), sync_sources=False)

    old_dir = os.getcwd()

    os.chdir(repository.github.git_root)

    for article_id, file in sorted(article_paths.items()):
        article = articles[article_id]
        target_file = language + '/' + file[3:] if file[
            0:3] == 'en/' else file.replace('/en/', '/%s/' % language)

        if not os.path.isfile(target_file):
            continue

        if language in article['label_names'] and 'mt' not in article[
                'label_names']:
            print(target_file, 'not machine translated')
            os.remove(target_file, )
            git.checkout(target_file)
            continue

        crowdin_file = get_crowdin_file(repository, file)

        if crowdin_file not in file_info:
            continue

        file_metadata = file_info[crowdin_file]

        if file_metadata['phrases'] != file_metadata['translated']:
            print(target_file, 'not fully translated')
            os.remove(target_file)
            git.checkout(target_file)
            continue

        new_title, old_content, new_content = add_disclaimer_zendesk(
            article, target_file, language)

        if old_content != new_content:
            with open(target_file, 'w') as f:
                f.write('<h1>%s</h1>\n' % new_title)
                f.write(new_content)

        git.add(target_file)

    if refresh_articles is not None:
        git.commit('-m', 'Translated new articles: %s' % datetime.now())
    else:
        git.commit('-m', 'Translated existing articles: %s' % datetime.now())

    os.chdir(old_dir)

    return file_info