def convert_ts_files_to_po(self): findFiles = FindFiles() for tsfile in findFiles.find(self.temp_dir, '*.ts'): fileName, fileExtension = os.path.splitext(tsfile) logging.info('converting: {0}'.format(fileName)) os.system('ts2po {0} -o {1}.po'.format(tsfile, fileName))
def _remove_non_translation_files(self): findFiles = FindFiles() for filename in findFiles.find(self.temp_dir, "*"): if re.match(self.pattern, filename) is None and os.path.exists(filename): os.remove(filename)
def convert(self): self.findFiles = FindFiles() self._convert_ts_files_to_po() self._convert_string_files_to_po() self._convert_ini_files_to_po() self._convert_php_resources_files_to_po() self._convert_android_resources_files_to_po() self._convert_properties_files_to_po()
def process_files(self): findFiles = FindFiles() files = findFiles.find_recursive(self.json_dir, '*.json') indexed = 0 for filename in files: indexed += self._process_file(filename) print("Processed {0} files, indexed {1} variants".format(len(files), indexed))
def _remove_non_translation_files(self): findFiles = FindFiles() for filename in findFiles.find(self.temp_dir, '*'): if (re.match(self.pattern, filename) is None and os.path.exists(filename)): os.remove(filename)
def convert_ini_files_to_po(self): findFiles = FindFiles() for tsfile in findFiles.find(self.temp_dir, "*.ini"): dirName = os.path.dirname(tsfile) logging.info("convert: {0}".format(dirName)) filename = "{0}/strings-ca.po".format(dirName) cmd = "prop2po -t {0}/en.ini {0}/ca.ini --encoding=utf-8 " "--personality=strings -o {1}" os.system(cmd.format(dirName, filename))
def convert_ini_files_to_po(self): findFiles = FindFiles() for tsfile in findFiles.find(self.temp_dir, '*.ini'): dirName = os.path.dirname(tsfile) logging.info('convert: {0}'.format(dirName)) filename = '{0}/strings-ca.po'.format(dirName) cmd = 'prop2po -t {0}/en.ini {0}/ca.ini --encoding=utf-8 ' \ '--personality=strings -o {1}' os.system(cmd.format(dirName, filename))
def _remove_non_translation_files(self): ''' We clean up other PO files like fr.po, es.po, to prevent to be added to the translation memory ''' findFiles = FindFiles() for filename in findFiles.find(self.temp_dir, '*'): if re.match(self.pattern, filename) is None and \ os.path.exists(filename): os.remove(filename)
def add_comments(self): if not self.add_source: return findFiles = FindFiles() for filename in findFiles.find(self.temp_dir, "*.po"): relative = filename.replace(self.temp_dir, "") pofile = POFile() msg = "Translation source: {0} from project '{1}'".format(relative, self.project) pofile.add_comment_to_all_entries(filename, msg)
def build(self): convert = ConvertFiles() convert.convert() self.clean_up_after_convert() self.add_comments() findFiles = FindFiles() localtm = 'tm-local.po' files = findFiles.find(self.temp_dir, '*.po') if len(files) == 0: logging.info('No files to add in fileset: {0}'. format(self.name)) return if os.path.isfile(localtm): os.remove(localtm) # Build using a local memory translation file for filename in files: if self._should_exclude_file(filename): continue msg = 'Adding file: {0} to translation memory' logging.info(msg.format(filename)) if os.path.isfile(localtm): backup = 'tm-project-previous.po' shutil.copy(localtm, backup) cmd = 'msgcat -tutf-8 --use-first -o {0} {1} {2} 2> /dev/null' os.system(cmd.format(localtm, backup, filename)) os.remove(backup) else: shutil.copy(filename, localtm) # Add to the project TM if os.path.isfile(self.tm_file): backup = 'tm-project-previous.po' shutil.copy(self.tm_file, backup) cmd = 'msgcat -tutf-8 --use-first -o {0} {1} {2} 2> /dev/null' os.system(cmd.format(self.tm_file, backup, localtm)) os.remove(backup) else: shutil.copy(localtm, self.tm_file) if os.path.exists(localtm): os.remove(localtm) self._clean_up()
def _copy_to_output(self): if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) findFiles = FindFiles() files = findFiles.find(self.temp_dir, '*.po') for source in files: dirname = os.path.dirname(source) if dirname != self.temp_dir: d = self.output_dir + dirname[len(self.temp_dir):] if not os.path.exists(d): os.makedirs(d) target = self.output_dir + source[len(self.temp_dir):] shutil.copy(source, target)
def add_comments(self): if not self.add_source: return findFiles = FindFiles() for filename in findFiles.find(self.temp_dir, '*.po'): relative = filename.replace(self.temp_dir, '') pofile = POFile(filename) msg = 'Translation source: {0} from project \'{1}\''.format( relative, self.project ) pofile.add_comment_to_all_entries(msg)
def process(self): stopwords_file = open("stop-words/stop-words.txt") self._read_stop_words(stopwords_file) findFiles = FindFiles() f = open('corpus.txt', 'w') for filename in findFiles.find(self.directory, '*.po'): print("Reading: " + filename) pofile = polib.pofile(filename) terms = {} for entry in pofile.translated_entries(): self.strings += 1 msgid = self._clean_string(entry.msgid) msgstr = self._clean_string(entry.msgstr) if self._should_select_string(msgid, msgstr) is False: continue self.strings_selected += 1 log = u'source:{0} ({1}) - target:{2} ({3}) - {4}\n'. \ format(msgid, entry.msgid, msgstr, entry.msgstr, filename) f.write(log.encode('utf-8')) if not msgid in terms.keys(): translations = [] else: translations = terms[msgid] self.source_words.add(msgid) translations.append(msgstr) terms[msgid] = translations self.documents[filename] = terms self.files += 1 #if self.files > 3: # break f.close()
def build(self): findFiles = FindFiles() localtm = 'tm-local.po' if os.path.isfile(localtm): os.remove(localtm) # Build using a local memory translation file for filename in findFiles.find(self.temp_dir, '*.po'): print 'Do: {0}'.format(filename) exclude = False for exfilename in self.excluded: if filename.find(exfilename) != -1: exclude = True if exclude: logging.info('Excluding file: {0}'.format(filename)) continue msg = 'Adding file: {0} to translation memory' logging.info(msg.format(filename)) if os.path.isfile(localtm): backup = 'tm-project-previous.po' shutil.copy(localtm, backup) cmd = 'msgcat -tutf-8 --use-first -o {0} {1} {2}' os.system(cmd.format(localtm, backup, filename)) os.remove(backup) else: shutil.copy(filename, localtm) # Add to the project TM if os.path.isfile(self.tm_file): backup = 'tm-project-previous.po' shutil.copy(self.tm_file, backup) cmd = 'msgcat -tutf-8 --use-first -o {0} {1} {2}' os.system(cmd.format(self.tm_file, backup, localtm)) os.remove(backup) else: shutil.copy(localtm, self.tm_file) if os.path.exists(localtm): os.remove(localtm) self._clean_up()
def build(self): convert = ConvertFiles() convert.convert() self.clean_up_after_convert() self.add_comments() findFiles = FindFiles() files = findFiles.find(self.temp_dir, '*.po') if len(files) == 0: logging.info('No files to add in fileset: {0}'.format(self.name)) return fileset_tm = 'fileset-tm.po' self.po_catalog = POCatalog(fileset_tm) self._build_tm_for_fileset(fileset_tm, files) self._add_tm_for_fileset_to_project_tm(fileset_tm) self._delete_tm_fileset(fileset_tm)
def add_comments(self): if not self.add_source: return findFiles = FindFiles() for filename in findFiles.find(self.temp_dir, '*.po'): relative = filename.replace(self.temp_dir, '') pofile = POFile(filename) if self.project_name.lower().strip() == self.name.lower().strip(): msg = 'Source: {0} from project \'{1}\'' \ .format(relative, self.project_name) else: msg = 'Source: {0} from project \'{1} - {2}\'' \ .format(relative, self.project_name, self.name) pofile.add_comment_to_all_entries(msg) pofile.calculate_localized_string_checksum(self.checksum)
def build(self): convert = ConvertFiles() convert.convert() self.clean_up_after_convert() self.add_comments() findFiles = FindFiles() files = findFiles.find(self.temp_dir, '*.po') if len(files) == 0: logging.info('No files to add in fileset: {0}'. format(self.name)) return fileset_tm = 'fileset-tm.po' self.po_catalog = POCatalog(fileset_tm) self._build_tm_for_fileset(fileset_tm, files) self._add_tm_for_fileset_to_project_tm(fileset_tm) self._delete_tm_fileset(fileset_tm)
def _check_number_of_files(self, tm_filename, extensions, expected_files, minimum_size): files = 0 findFiles = FindFiles() for filename in findFiles.find(self.temp_dir, extensions): files = files + 1 size = os.path.getsize(filename) if size < minimum_size: self.errors += 1 print('File {0} has size {1} but expected was at least {2}'. format(filename, size, minimum_size)) if files != expected_files: self.errors += 1 print('{0} expected {1} files but contains {2}'.format(tm_filename, expected_files, files))
def _check_pofiles_content(self): """ Check if by mistake we have included non Catalan language strings in the transtation memories """ # The list of invalid chars is specific to Catalan language invalid_chars = {u'á', u'ñ', u'ë', u'ù', u'â', u'ê', u'î', u'ô', u'û', u'ë', u'ÿ', u'ä', u'ö'} try: THRESHOLD_PERCENTAGE = 1 findFiles = FindFiles() for filename in findFiles.find(self.temp_dir, "*.po"): poFile = pofile(filename) invalid = 0 for entry in poFile: # Only localized segments. Skips developers names, # untranslated country names, etc if entry.msgid == entry.msgstr: continue for char in entry.msgstr.lower(): if char in invalid_chars: invalid = invalid + 1 if len(poFile) > 100 and invalid > 0: percentage = 100.0 * invalid / len(poFile) if percentage > THRESHOLD_PERCENTAGE: self.errors = self.errors + 1 print "Unsual number of invalid chars at {0} ({1}%)".\ format(filename, str(percentage)) except Exception as detail: print detail
def _remove_non_translation_only_files(self): findFiles = FindFiles() for filename in findFiles.find(self.temp_dir, '*'): if filename.endswith('en.po') or filename.endswith('en.ts'): os.remove(filename)
class ConvertFiles: def __init__(self, convert_dir): self.convert_dir = convert_dir self.findFiles = None def convert(self): self.findFiles = FindFiles() self._convert_ts_files_to_po() self._convert_string_files_to_po() self._convert_ini_files_to_po() self._convert_php_resources_files_to_po() self._convert_android_resources_files_to_po() self._convert_properties_files_to_po() self._convert_json_files_to_po() self._convert_yml_files_to_po() def _convert_ts_files_to_po(self): for tsfile in self.findFiles.find(self.convert_dir, "*.ts"): fileName, fileExtension = os.path.splitext(tsfile) logging.info("convert ts file: {0}".format(tsfile)) os.system("ts2po {0} -o {1}.po".format(tsfile, fileName)) def _convert_string_files_to_po(self): for tsfile in self.findFiles.find(self.convert_dir, "ca.strings"): dirName = os.path.dirname(tsfile) logging.info("convert strings file: {0}".format(dirName)) filename = "{0}/strings-ca.po".format(dirName) # Allow process files with duplicated entries cmd = "prop2po -t {0}/en.strings {0}/ca.strings " "--personality strings --duplicates merge -o {1}" os.system(cmd.format(dirName, filename)) def _convert_properties_files_to_po(self): for tsfile in self.findFiles.find(self.convert_dir, "ca.properties"): dirName = os.path.dirname(tsfile) logging.info("convert properties file: {0}".format(dirName)) filename = "{0}/properties-ca.po".format(dirName) # Allow process files with duplicated entries cmd = "prop2po -t {0}/en.properties {0}/ca.properties " "--personality java --duplicates merge -o {1}" os.system(cmd.format(dirName, filename)) def _convert_ini_files_to_po(self): for inifile in self.findFiles.find(self.convert_dir, "ca.ini"): dirName = os.path.dirname(inifile) logging.info("convert ini file: {0}".format(inifile)) # http://bugs.locamotion.org/show_bug.cgi?id=3148 # The rename operations can be removed when the issue is fixed os.rename("{0}/en.ini".format(dirName), "{0}/en.strings".format(dirName)) os.rename("{0}/ca.ini".format(dirName), "{0}/ca.strings".format(dirName)) filename = "{0}/strings-ca.po".format(dirName) cmd = "prop2po -t {0}/en.strings {0}/ca.strings --encoding=utf-8 " "--personality=strings -o {1}" os.system(cmd.format(dirName, filename)) def _convert_php_resources_files_to_po(self): if len(self.findFiles.find(self.convert_dir, "*.php")) == 0: return logging.info("convert php directory: {0}".format(self.convert_dir)) # Name arbitrary choosen (not sepecific to an expected dir structure) OUT_DIRNAME = "po-files" cmd = "cd {0} && php2po -t en -i ca " "-o {1}".format(self.convert_dir, OUT_DIRNAME) os.system(cmd) def _convert_android_resources_files_to_po(self): if len(self.findFiles.find(self.convert_dir, "*.xml")) == 0: return logging.info("convert Android directory: {0}".format(self.convert_dir)) # See: https://pypi.python.org/pypi/android2po/1.2.0 # If you do not specify --gettext ., the file is writen in ../locale # outside the tmp directory in our case cmd = "cd {0} && a2po init ca --gettext .".format(self.convert_dir) os.system(cmd) def _convert_json_files_to_po(self): for jsonfile in self.findFiles.find(self.convert_dir, "ca.json"): dirName = os.path.dirname(jsonfile) logging.info("convert json file: {0}".format(dirName)) filename = "{0}/json-ca.po".format(dirName) cmd = "json2po -t {0}/en.json -i {0}/ca.json " "-o {1}".format(dirName, filename) os.system(cmd) def _convert_yml_files_to_po(self): for ymlfile in self.findFiles.find(self.convert_dir, "ca.yml"): dirName = os.path.dirname(ymlfile) logging.info("convert yml file: {0}".format(dirName)) cmd = "i18n-translate convert --locale_dir {0} -f yml -l ca -t po -d en".format(dirName) os.system(cmd)
class ConvertFiles(): def __init__(self): self.temp_dir = './tmp' self.findFiles = None def convert(self): self.findFiles = FindFiles() self._convert_ts_files_to_po() self._convert_string_files_to_po() self._convert_ini_files_to_po() self._convert_php_resources_files_to_po() self._convert_android_resources_files_to_po() self._convert_properties_files_to_po() def _convert_ts_files_to_po(self): for tsfile in self.findFiles.find(self.temp_dir, '*.ts'): fileName, fileExtension = os.path.splitext(tsfile) logging.info('convert ts file: {0}'.format(fileName)) os.system('ts2po {0} -o {1}.po'.format(tsfile, fileName)) def _convert_string_files_to_po(self): for tsfile in self.findFiles.find(self.temp_dir, '*.strings'): dirName = os.path.dirname(tsfile) logging.info('convert strings file: {0}'.format(dirName)) filename = '{0}/strings-ca.po'.format(dirName) # Allow process files with duplicated entries cmd = 'prop2po -t {0}/en.strings {0}/ca.strings ' \ '--personality strings --duplicates merge -o {1}' os.system(cmd.format(dirName, filename)) def _convert_properties_files_to_po(self): for tsfile in self.findFiles.find(self.temp_dir, '*.properties'): dirName = os.path.dirname(tsfile) logging.info('convert properties file: {0}'.format(dirName)) filename = '{0}/properties-ca.po'.format(dirName) # Allow process files with duplicated entries cmd = 'prop2po -t {0}/en.properties {0}/ca.properties ' \ '--personality java --duplicates merge -o {1}' os.system(cmd.format(dirName, filename)) def _convert_ini_files_to_po(self): for inifile in self.findFiles.find(self.temp_dir, 'ca.ini'): dirName = os.path.dirname(inifile) logging.info('convert ini file: {0}'.format(inifile)) # http://bugs.locamotion.org/show_bug.cgi?id=3148 # The rename operations can be removed when the issue is fixed os.rename('{0}/en.ini'.format(dirName), '{0}/en.strings'.format(dirName)) os.rename('{0}/ca.ini'.format(dirName), '{0}/ca.strings'.format(dirName)) filename = '{0}/strings-ca.po'.format(dirName) cmd = 'prop2po -t {0}/en.strings {0}/ca.strings --encoding=utf-8 '\ '--personality=strings -o {1}' os.system(cmd.format(dirName, filename)) def _convert_php_resources_files_to_po(self): if len(self.findFiles.find(self.temp_dir, '*.php')) == 0: return # Name arbitrary choosen (not sepecific to an expected dir structure) OUT_DIRNAME = 'po-files' cmd = 'cd {0} && php2po -t en -i ca ' \ '-o {1}'.format(self.temp_dir, OUT_DIRNAME) os.system(cmd) def _convert_android_resources_files_to_po(self): if len(self.findFiles.find(self.temp_dir, '*.xml')) == 0: return # See: https://pypi.python.org/pypi/android2po/1.2.0 # If you do not specify --gettext ., the file is writen in ../locale # outside the tmp directory in our case cmd = 'cd {0} && a2po init ca --gettext .'.format(self.temp_dir) os.system(cmd)