def _check_pofiles_content(self): """ Check if by mistake we have included non Catalan language strings in the transtation memories """ # The list of invalid chars is specific to Catalan language invalid_chars = {u"á", u"ñ", u"ë", u"ù", u"â", u"ê", u"î", u"ô", u"û", u"ë", u"ÿ", u"ä", u"ö"} try: THRESHOLD_PERCENTAGE = 1 findFiles = FindFiles() for filename in findFiles.find(self.temp_dir, "*.po"): poFile = pofile(filename) invalid = 0 for entry in poFile: # Only localized segments. Skips developers names, # untranslated country names, etc if entry.msgid == entry.msgstr: continue for char in entry.msgstr.lower(): if char in invalid_chars: invalid = invalid + 1 if len(poFile) > 100 and invalid > 0: percentage = 100.0 * invalid / len(poFile) if percentage > THRESHOLD_PERCENTAGE: self.errors = self.errors + 1 print "Unsual number of invalid chars at {0} ({1}%)".format(filename, str(percentage)) except Exception as detail: print detail
def _get_po_entries(self, directory): entries = 0 findFiles = FindFiles() for filename in findFiles.find(directory, '*.po'): poFile = pofile(filename) entries += len(poFile.translated_entries()) return entries
def _check_number_of_files(self, tm_filename, extensions, expected_files, minimum_size): files = 0 findFiles = FindFiles() for filename in findFiles.find(self.temp_dir, extensions): files = files + 1 size = os.path.getsize(filename) if size < minimum_size: self.errors += 1 print("File {0} has size {1} but expected was at least {2}".format(filename, size, minimum_size)) if files != expected_files: self.errors += 1 print("{0} expected {1} files but contains {2}".format(tm_filename, expected_files, files))
def process(self): stopwords_file = open("terminology/stop-words/stop-words.txt") self._read_stop_words(stopwords_file) findFiles = FindFiles() f = open('corpus.txt', 'w') for filename in findFiles.find(self.directory, '*.po'): try: print("Reading: " + filename) pofile = polib.pofile(filename) terms = {} for entry in pofile.translated_entries(): self.strings += 1 msgid = self._clean_string(entry.msgid) msgstr = self._clean_string(entry.msgstr) if not self._should_select_string(msgid, msgstr): continue self.strings_selected += 1 log = u'source:{0} ({1}) - target:{2} ({3}) - {4}\n' log = log.format(msgid, entry.msgid, msgstr, entry.msgstr, filename) f.write(log) if msgid not in terms.keys(): translations = [] else: translations = terms[msgid] self.source_words.add(msgid) translations.append(msgstr) terms[msgid] = translations self.documents[filename] = terms self.files += 1 except Exception as detail: logging.error("Cannot read {0}:{1}".format(filename, str(detail))) f.close()
def _check_pofiles_content(self): """ Check if by mistake we have included non Catalan language strings in the transtation memories """ # The list of invalid chars is specific to Catalan language invalid_chars = {'á', 'ñ', 'ë', 'ù', 'â', 'ê', 'î', 'ô', 'û', 'ë', 'ÿ', 'ä', 'ö'} try: THRESHOLD_PERCENTAGE_INVALID_CHARS = 1 THRESHOLD_PERCENTAGE_NOT_LOCALIZED = 30 findFiles = FindFiles() for filename in findFiles.find(self.temp_dir, "*.po"): poFile = pofile(filename) invalid = 0 not_localized = 0 for entry in poFile: # Only localized segments. Skips developers names, # untranslated country names, etc if entry.msgid == entry.msgstr: not_localized = not_localized + 1 continue for char in entry.msgstr.lower(): if char in invalid_chars: invalid = invalid + 1 if len(poFile) < 100: continue if invalid > 0: percentage = 100.0 * invalid / len(poFile) if percentage > THRESHOLD_PERCENTAGE_INVALID_CHARS: print("Unsual number of invalid chars at {0} ({1:.2f}%)". format(filename, percentage)) if not_localized > 0: percentage = 100.0 * not_localized / len(poFile) if percentage > THRESHOLD_PERCENTAGE_NOT_LOCALIZED: print("Unsual number of untranslated strings at {0} ({1:.2f}%)". format(filename, percentage)) except Exception as detail: print(detail)
def process(self): stopwords_file = open("stop-words/stop-words.txt") self._read_stop_words(stopwords_file) findFiles = FindFiles() f = open('corpus.txt', 'w') for filename in findFiles.find(self.directory, '*.po'): print("Reading: " + filename) pofile = polib.pofile(filename) terms = {} for entry in pofile.translated_entries(): self.strings += 1 msgid = self._clean_string(entry.msgid) msgstr = self._clean_string(entry.msgstr) if not self._should_select_string(msgid, msgstr): continue self.strings_selected += 1 log = u'source:{0} ({1}) - target:{2} ({3}) - {4}\n' log = log.format(msgid, entry.msgid, msgstr, entry.msgstr, filename) f.write(log.encode('utf-8')) if not msgid in terms.keys(): translations = [] else: translations = terms[msgid] self.source_words.add(msgid) translations.append(msgstr) terms[msgid] = translations self.documents[filename] = terms self.files += 1 f.close()
def process(self): stopwords_file = open("terminology/stop-words/stop-words.txt") self._read_stop_words(stopwords_file) findFiles = FindFiles() f = open("corpus.txt", "w") for filename in findFiles.find(self.directory, "*.po"): print("Reading: " + filename) pofile = polib.pofile(filename) terms = {} for entry in pofile.translated_entries(): self.strings += 1 msgid = self._clean_string(entry.msgid) msgstr = self._clean_string(entry.msgstr) if not self._should_select_string(msgid, msgstr): continue self.strings_selected += 1 log = u"source:{0} ({1}) - target:{2} ({3}) - {4}\n" log = log.format(msgid, entry.msgid, msgstr, entry.msgstr, filename) f.write(log) if not msgid in terms.keys(): translations = [] else: translations = terms[msgid] self.source_words.add(msgid) translations.append(msgstr) terms[msgid] = translations self.documents[filename] = terms self.files += 1 f.close()
def _check_number_of_files(self, tm_filename, extensions, expected_files, minimum_size): files = 0 findFiles = FindFiles() for filename in findFiles.find(self.temp_dir, extensions): files = files + 1 size = os.path.getsize(filename) if size < minimum_size: self.errors += 1 print('File {0} has size {1} but expected was at least {2}'. format(filename, size, minimum_size)) if files != expected_files: self.errors += 1 print('{0} expected {1} files but contains {2}'.format(tm_filename, expected_files, files))
def _check_pofiles_content(self): """ Check if by mistake we have included non Catalan language strings in the transtation memories """ # The list of invalid chars is specific to Catalan language invalid_chars = { u'á', u'ñ', u'ë', u'ù', u'â', u'ê', u'î', u'ô', u'û', u'ë', u'ÿ', u'ä', u'ö' } try: THRESHOLD_PERCENTAGE = 1 findFiles = FindFiles() for filename in findFiles.find(self.temp_dir, "*.po"): poFile = pofile(filename) invalid = 0 for entry in poFile: # Only localized segments. Skips developers names, # untranslated country names, etc if entry.msgid == entry.msgstr: continue for char in entry.msgstr.lower(): if char in invalid_chars: invalid = invalid + 1 if len(poFile) > 100 and invalid > 0: percentage = 100.0 * invalid / len(poFile) if percentage > THRESHOLD_PERCENTAGE: self.errors = self.errors + 1 print "Unsual number of invalid chars at {0} ({1}%)".\ format(filename, str(percentage)) except Exception as detail: print detail
def test_find_recursive(self): directory = path.dirname(path.realpath(__file__)) directory += '/data/findfiles/' filenames = FindFiles().find_recursive(directory, "*") results = [] for filename in filenames: results.append(filename[len(directory):]) self.assertEqual(6, len(results)) self.assertEqual("dir1/dir1-dir2/dir1-dir2-file1.txt", results[0]) self.assertEqual("dir1/dir1-file1.txt", results[1]) self.assertEqual("dir1/dir1-file2.txt", results[2]) self.assertEqual("dir2/dir2-file1.txt", results[3]) self.assertEqual("root-file1.txt", results[4]) self.assertEqual("root-file2.txt", results[5])
def _clean_pos(self, directory): findFiles = FindFiles() for filename in findFiles.find(directory, '*.po'): remove(filename)
def _clean_pos(self, directory): findFiles = FindFiles() for filename in findFiles.find(directory, '*.po'): remove(filename)
def generate_report(self, source_dir): lt, pology = self.read_config() print("Source directory: " + source_dir) report_filename = os.path.basename( os.path.normpath(source_dir)) + ".html" report = Report() languagetool = LanguageTool(lt) report.create_project_report(lt['lt-html-dir'], lt['lt_output'], report_filename, languagetool._get_lt_version()) for po_file in FindFiles().find_recursive(source_dir, "*.po"): txt_file = po_file + ".txt" json_file = po_file + ".json" po_transonly = po_file + "-translated-only.po" pology_report = po_file + "-pology.html" file_report = po_file + "-report.html" start_time = time.time() rslt = self.transonly_po_and_extract_text(po_file, po_transonly, txt_file) if not rslt: continue if os.stat(txt_file).st_size == 0: print("No translations in file:" + txt_file) continue start_time = time.time() languagetool.run_lt(lt, txt_file, json_file) po_file_logname = po_file[len(source_dir) + 1:] print("LT runned PO {0} - {1:.2f}s".format( po_file_logname, time.time() - start_time)) start_time = time.time() languagetool.generate_lt_report(lt['lt-html-dir'], json_file, file_report) if os.path.isfile(file_report): report.add_file_to_project_report(file_report) else: print("Unable to add:" + file_report) continue start_time = time.time() self.run_pology(pology, po_transonly, pology_report) print("Pology runned PO {0} - {1:.2f}s".format( po_file_logname, time.time() - start_time)) if os.path.isfile(pology_report): report.add_file_to_project_report(pology_report) os.remove(pology_report) else: report.add_string_to_project_report( 'El Pology no ha detectat cap error.') os.remove(txt_file) os.remove(json_file) os.remove(po_transonly) os.remove(file_report) footer_filename = os.path.join(lt['lt-html-dir'], "footer.html") report.add_file_to_project_report(footer_filename) report.close()
def _process_project(self, project_id, project_name, filename, softcatala): entries = set() directory = os.path.join(self.po_directory,"individual_pos/", project_id) findFiles = FindFiles() for filename in findFiles.find_recursive(directory, '*.po'): self._process_file(project_id, project_name, filename, softcatala, entries)
def _process_project(self, name, filename, softcatala): entries = set() directory = os.path.join(self.po_directory,"individual_pos/", name.lower()) findFiles = FindFiles() for filename in findFiles.find_recursive(directory, '*.po'): self._process_file(name, filename, softcatala, entries)