def get_orphaned_files(repository, update_result): new_files, all_files, file_info = update_result crowdin_files = [ key for key, value in file_info.items() if key.find(repository.crowdin.dest_folder) == 0 and 'id' in value ] local_files = [get_local_file(repository, key) for key in crowdin_files] deleted_files = [ file for file in local_files if not os.path.isfile('%s/%s' % (repository.github.git_root, file)) ] crowdin_rename_candidates = defaultdict(list) for file in crowdin_files: crowdin_rename_candidates[file[file.rfind('/') + 1:]].append(file) deleted_file_candidates = { get_crowdin_file(repository, file): [ candidate for candidate in crowdin_rename_candidates[file[file.rfind('/') + 1:]] if candidate != get_crowdin_file(repository, file) ] for file in deleted_files } return [(key, value[0]) for key, value in deleted_file_candidates.items() if len(value) == 1]
def delete_code_translations(repository, file_name, file_info): crowdin_file_name = get_crowdin_file(repository, file_name) if crowdin_file_name not in file_info: return False logging.info('Checking auto code translations for file %s' % file_name) def is_within_code_tag(x): return x.text and x.text.find('[TOC') == 0 or \ x.text and x.text.find('CVSS') != -1 and x.text.find('CVE') != -1 or \ x.name == 'code' or x.name == 'pre' or \ x.find_parent('code') is not None or x.find_parent('pre') is not None or \ x.find_parent(attrs={'id': 'front-matter'}) is not None def is_rst_directive(x): return x.text and ( \ x.text.find('====') != -1 or x.text.find('----') != -1 or \ x.text == '..' or x.text.find('::') != -1 or x.text.find(':') == 0 or \ x.text.find(':doc:') != -1 or x.text.find(':ref:') != -1 or \ x.text.lower() == x.text or \ x.text[-3:] == '.md' or x.text[-4:] == '.rst' \ ) def hide_translation(translation_id, response_data): if not response_data['translation']['hidden']: crowdin_http_request(repository, '/backend/translation/change_visibility', 'GET', translation_id=translation_id, hidden=1) def always_true(x): return True def is_auto_translation(x): return x['user']['login'] == 'is-user' if file_name[-4:] == '.rst': has_suggestions = process_suggestions(repository, crowdin_file_name, file_info, is_rst_directive, hide_translation, always_true) else: has_suggestions = process_suggestions(repository, crowdin_file_name, file_info, is_within_code_tag, hide_translation, is_auto_translation) if not has_suggestions: return False target_file = 'ja/' + file_name[3:] if file_name[ 0:3] == 'en/' else file_name.replace('/en/', '/ja/') if os.path.exists(target_file): os.remove(target_file) return True
def get_file_ids(repository, files, file_info): candidate_files = { file: get_crowdin_file(repository, file) for file in files } return { file: file_info[crowdin_file]['id'] for file, crowdin_file in candidate_files.items() if crowdin_file in file_info }
def get_crowdin_config_entry(repository, file): assert (not os.path.isdir(file)) if file[0:3] == 'en/': translation = '%two_letters_code%/' + file[3:] else: translation = file.replace('/en/', '/%two_letters_code%/') dest = '/' + get_crowdin_file(repository, file) return {'source': file, 'dest': dest, 'translation': translation}
def pre_translate(repository, code_check_needed, translation_needed, file_info): code_check_needed_file_ids = get_file_ids(repository, code_check_needed, file_info) for file, crowdin_file in sorted(code_check_needed_file_ids.items()): file_metadata = file_info[get_crowdin_file(repository, file)] if file_metadata['phrases'] != file_metadata['translated']: delete_code_translations(repository, file, file_info) translation_needed_file_ids = get_file_ids(repository, translation_needed, file_info) missing_phrases_files = {} for file, crowdin_file in sorted(translation_needed_file_ids.items()): file_metadata = file_info[get_crowdin_file(repository, file)] if file_metadata['phrases'] != file_metadata['translated']: missing_phrases_files[crowdin_file] = crowdin_file print( '%s (%s != %s)' % (file, file_metadata['phrases'], file_metadata['translated'])) else: print( '%s (%s == %s)' % (file, file_metadata['phrases'], file_metadata['translated'])) if len(missing_phrases_files) > 0: #translate_with_machine(repository, 'tm', missing_phrases_files) translate_with_machine(repository, 'deepl-translator', missing_phrases_files) translate_with_machine(repository, 'google-translate', missing_phrases_files) file_info = get_crowdin_file_info(repository) return get_crowdin_file_info(repository)
def get_issue_body(repository, folder, file_info): prefix = get_crowdin_file(repository, folder) matching_files = sorted([(key, metadata) for key, metadata in file_info.items() if key.find(prefix) == 0]) if len(matching_files) == 0: return '' external_links = [ get_issue_line(repository, prefix, key, metadata) for key, metadata in matching_files ] return '\n'.join(external_links).strip()
def add_mt_disclaimers(repository, file_info): now = datetime.now() os.chdir(repository.github.git_root) root_folder = repository.github.single_folder \ if repository.github.single_folder is not None else repository.github.project_folder for file in get_eligible_files(repository, git.ls_files(root_folder), 'en'): crowdin_file = get_crowdin_file(repository, file) target_file = 'ja/' + file[3:] if file[0:3] == 'en/' else file.replace('/en/', '/ja/') if not os.path.isfile('%s/%s' % (repository.github.git_root, target_file)): if target_file[-9:] == '.markdown': target_file = target_file[:-9] + '.md' elif target_file[-3:] == '.md': target_file = target_file[:-3] + '.markdown' if not os.path.isfile('%s/%s' % (repository.github.git_root, target_file)): continue if crowdin_file not in file_info or file_info[crowdin_file]['translated'] == file_info[crowdin_file]['approved']: continue new_lines = [] with open(target_file, 'r') as f: new_lines = [ line for line in f.readlines() if line.find('<p class="alert alert-info"><span class="wysiwyg-color-blue120">') == -1 ] content = '%s\n%s' % (''.join(new_lines), disclaimer_text) with open(target_file, 'w') as f: f.write(content) git.add(target_file) git.commit('-m', 'Added machine translation disclaimer %s' % now.strftime("%Y-%m-%d %H:%M:%S")) os.chdir(initial_dir)
def update_translation_issues(repository, file_info): issues = [] api_path = '/repos/%s/issues?milestone=%d' % \ (repository.github.origin, get_milestone_map(repository)['human']) status_code, headers, issues = filter_github_request_all(api_path) target_column_map = {} issue_map = get_issue_map(repository) folder_map = {value: key for key, value in issue_map.items()} for issue in issues: issue_number = issue['number'] if issue_number not in folder_map: continue folder = folder_map[issue_number] key = get_crowdin_file(repository, folder) if key in file_info: approved = file_info[key]['approved'] translated = file_info[key]['translated'] phrases = file_info[key]['phrases'] if approved == phrases: target_column_map[folder] = 4 elif approved > 0: target_column_map[folder] = 3 elif translated == phrases: target_column_map[folder] = 2 elif translated > 0: target_column_map[folder] = 1 else: target_column_map[folder] = 0 else: target_column_map[folder] = None update_issue_columns(repository, file_info, target_column_map)
def crowdin_download_translations(repository, refresh_files, file_info): updated_files = list(refresh_files) for file in refresh_files: crowdin_file = get_crowdin_file(repository, file) if crowdin_file not in file_info: continue metadata = file_info[crowdin_file] updated_files.append(file) target_file = 'ja/' + file[3:] if file[0:3] == 'en/' else file.replace( '/en/', '/ja/') if not os.path.isfile(target_file): updated_files.append(file) continue if len(updated_files) > 0: configure_crowdin(repository, updated_files) _crowdin('download', '-l', 'ja')
def generate_html_files(repository, file_info): now = datetime.now() os.chdir(repository.github.git_root) root_folder = repository.github.single_folder \ if repository.github.single_folder is not None else repository.github.project_folder titles = {} for file in get_eligible_files(repository, git.ls_files(root_folder), 'en'): crowdin_file = get_crowdin_file(repository, file) target_file = 'ja/' + file[3:] if file[0:3] == 'en/' else file.replace('/en/', '/ja/') if not os.path.isfile('%s/%s' % (repository.github.git_root, target_file)): if target_file[-9:] == '.markdown': target_file = target_file[:-9] + '.md' elif target_file[-3:] == '.md': target_file = target_file[:-3] + '.markdown' if not os.path.isfile('%s/%s' % (repository.github.git_root, target_file)): continue html_file = target_file[:target_file.rfind('.')] + '.html' titles[html_file] = get_title(file) _pandoc(target_file, html_file, '--from=gfm', '--to=html') git.add(html_file) git.commit('-m', 'Updated pandoc conversion %s' % now.strftime("%Y-%m-%d %H:%M:%S")) os.chdir(initial_dir) return titles
def sync_articles(repository, domain, language, articles, article_paths, refresh_articles=None, refresh_paths=None): if refresh_articles is not None: logging.info('Updating translations for %d articles' % len(refresh_paths)) new_files, all_files, file_info = update_repository( repository, list(refresh_paths.values()), sync_sources=True) else: logging.info('Downloading latest translations for %d articles' % len(article_paths)) new_files, all_files, file_info = update_repository( repository, list(article_paths.values()), sync_sources=False) old_dir = os.getcwd() os.chdir(repository.github.git_root) for article_id, file in sorted(article_paths.items()): article = articles[article_id] target_file = language + '/' + file[3:] if file[ 0:3] == 'en/' else file.replace('/en/', '/%s/' % language) if not os.path.isfile(target_file): continue if language in article['label_names'] and 'mt' not in article[ 'label_names']: print(target_file, 'not machine translated') os.remove(target_file, ) git.checkout(target_file) continue crowdin_file = get_crowdin_file(repository, file) if crowdin_file not in file_info: continue file_metadata = file_info[crowdin_file] if file_metadata['phrases'] != file_metadata['translated']: print(target_file, 'not fully translated') os.remove(target_file) git.checkout(target_file) continue new_title, old_content, new_content = add_disclaimer_zendesk( article, target_file, language) if old_content != new_content: with open(target_file, 'w') as f: f.write('<h1>%s</h1>\n' % new_title) f.write(new_content) git.add(target_file) if refresh_articles is not None: git.commit('-m', 'Translated new articles: %s' % datetime.now()) else: git.commit('-m', 'Translated existing articles: %s' % datetime.now()) os.chdir(old_dir) return file_info