def _check_pofiles_content(self):
        """
            Check if by mistake we have included non Catalan language
            strings in the transtation memories
        """

        # The list of invalid chars is specific to Catalan language
        invalid_chars = {u"á", u"ñ", u"ë", u"ù", u"â", u"ê", u"î", u"ô", u"û", u"ë", u"ÿ", u"ä", u"ö"}

        try:

            THRESHOLD_PERCENTAGE = 1
            findFiles = FindFiles()
            for filename in findFiles.find(self.temp_dir, "*.po"):
                poFile = pofile(filename)

                invalid = 0
                for entry in poFile:
                    # Only localized segments. Skips developers names,
                    # untranslated country names, etc
                    if entry.msgid == entry.msgstr:
                        continue

                    for char in entry.msgstr.lower():
                        if char in invalid_chars:
                            invalid = invalid + 1

                if len(poFile) > 100 and invalid > 0:
                    percentage = 100.0 * invalid / len(poFile)
                    if percentage > THRESHOLD_PERCENTAGE:
                        self.errors = self.errors + 1
                        print "Unsual number of invalid chars at {0} ({1}%)".format(filename, str(percentage))

        except Exception as detail:
            print detail
    def _get_po_entries(self, directory):
        entries = 0
        findFiles = FindFiles()
        for filename in findFiles.find(directory, '*.po'):
            poFile = pofile(filename)
            entries += len(poFile.translated_entries())

        return entries
    def _check_number_of_files(self, tm_filename, extensions, expected_files, minimum_size):
        files = 0
        findFiles = FindFiles()
        for filename in findFiles.find(self.temp_dir, extensions):
            files = files + 1

            size = os.path.getsize(filename)
            if size < minimum_size:
                self.errors += 1
                print("File {0} has size {1} but expected was at least {2}".format(filename, size, minimum_size))

        if files != expected_files:
            self.errors += 1
            print("{0} expected {1} files but contains {2}".format(tm_filename, expected_files, files))
    def process(self):
        stopwords_file = open("terminology/stop-words/stop-words.txt")
        self._read_stop_words(stopwords_file)

        findFiles = FindFiles()

        f = open('corpus.txt', 'w')

        for filename in findFiles.find(self.directory, '*.po'):

            try:
                print("Reading: " + filename)

                pofile = polib.pofile(filename)

                terms = {}
                for entry in pofile.translated_entries():
                    self.strings += 1

                    msgid = self._clean_string(entry.msgid)
                    msgstr = self._clean_string(entry.msgstr)

                    if not self._should_select_string(msgid, msgstr):
                        continue

                    self.strings_selected += 1

                    log = u'source:{0} ({1}) - target:{2} ({3}) - {4}\n'
                    log = log.format(msgid, entry.msgid, msgstr, entry.msgstr,
                                     filename)

                    f.write(log)

                    if msgid not in terms.keys():
                        translations = []
                    else:
                        translations = terms[msgid]

                    self.source_words.add(msgid)
                    translations.append(msgstr)
                    terms[msgid] = translations

                self.documents[filename] = terms
                self.files += 1
            except Exception as detail:
                logging.error("Cannot read {0}:{1}".format(filename, str(detail)))

        f.close()
    def _check_pofiles_content(self):
        """
            Check if by mistake we have included non Catalan language
            strings in the transtation memories
        """

        # The list of invalid chars is specific to Catalan language
        invalid_chars = {'á', 'ñ', 'ë', 'ù', 'â', 'ê', 'î', 'ô', 'û',
                         'ë', 'ÿ', 'ä', 'ö'}

        try:

            THRESHOLD_PERCENTAGE_INVALID_CHARS = 1
            THRESHOLD_PERCENTAGE_NOT_LOCALIZED = 30
            findFiles = FindFiles()
            for filename in findFiles.find(self.temp_dir, "*.po"):
                poFile = pofile(filename)

                invalid = 0
                not_localized = 0
                for entry in poFile:
                    # Only localized segments. Skips developers names,
                    # untranslated country names, etc
                    if entry.msgid == entry.msgstr:
                        not_localized = not_localized + 1
                        continue

                    for char in entry.msgstr.lower():
                        if char in invalid_chars:
                            invalid = invalid + 1

                if len(poFile) < 100:
                    continue

                if invalid > 0:
                    percentage = 100.0 * invalid / len(poFile)
                    if percentage > THRESHOLD_PERCENTAGE_INVALID_CHARS:
                        print("Unsual number of invalid chars at {0} ({1:.2f}%)".
                              format(filename, percentage))

                if not_localized > 0:
                    percentage = 100.0 * not_localized / len(poFile)
                    if percentage > THRESHOLD_PERCENTAGE_NOT_LOCALIZED:
                        print("Unsual number of untranslated strings at {0} ({1:.2f}%)".
                              format(filename, percentage))

        except Exception as detail:
            print(detail)
Esempio n. 6
0
    def process(self):
        stopwords_file = open("stop-words/stop-words.txt")
        self._read_stop_words(stopwords_file)

        findFiles = FindFiles()

        f = open('corpus.txt', 'w')

        for filename in findFiles.find(self.directory, '*.po'):
            print("Reading: " + filename)

            pofile = polib.pofile(filename)

            terms = {}
            for entry in pofile.translated_entries():
                self.strings += 1

                msgid = self._clean_string(entry.msgid)
                msgstr = self._clean_string(entry.msgstr)

                if not self._should_select_string(msgid, msgstr):
                    continue

                self.strings_selected += 1

                log = u'source:{0} ({1}) - target:{2} ({3}) - {4}\n'
                log = log.format(msgid, entry.msgid, msgstr, entry.msgstr,
                                 filename)

                f.write(log.encode('utf-8'))

                if not msgid in terms.keys():
                    translations = []
                else:
                    translations = terms[msgid]

                self.source_words.add(msgid)
                translations.append(msgstr)
                terms[msgid] = translations

            self.documents[filename] = terms
            self.files += 1

        f.close()
Esempio n. 7
0
    def process(self):
        stopwords_file = open("terminology/stop-words/stop-words.txt")
        self._read_stop_words(stopwords_file)

        findFiles = FindFiles()

        f = open("corpus.txt", "w")

        for filename in findFiles.find(self.directory, "*.po"):
            print("Reading: " + filename)

            pofile = polib.pofile(filename)

            terms = {}
            for entry in pofile.translated_entries():
                self.strings += 1

                msgid = self._clean_string(entry.msgid)
                msgstr = self._clean_string(entry.msgstr)

                if not self._should_select_string(msgid, msgstr):
                    continue

                self.strings_selected += 1

                log = u"source:{0} ({1}) - target:{2} ({3}) - {4}\n"
                log = log.format(msgid, entry.msgid, msgstr, entry.msgstr, filename)

                f.write(log)

                if not msgid in terms.keys():
                    translations = []
                else:
                    translations = terms[msgid]

                self.source_words.add(msgid)
                translations.append(msgstr)
                terms[msgid] = translations

            self.documents[filename] = terms
            self.files += 1

        f.close()
Esempio n. 8
0
    def _check_number_of_files(self,
                               tm_filename,
                               extensions,
                               expected_files,
                               minimum_size):
        files = 0
        findFiles = FindFiles()
        for filename in findFiles.find(self.temp_dir, extensions):
            files = files + 1

            size = os.path.getsize(filename)
            if size < minimum_size:
                self.errors += 1
                print('File {0} has size {1} but expected was at least {2}'.
                      format(filename, size, minimum_size))

        if files != expected_files:
            self.errors += 1
            print('{0} expected {1} files but contains {2}'.format(tm_filename,
                  expected_files, files))
Esempio n. 9
0
    def _check_pofiles_content(self):
        """
            Check if by mistake we have included non Catalan language
            strings in the transtation memories
        """

        # The list of invalid chars is specific to Catalan language
        invalid_chars = {
            u'á', u'ñ', u'ë', u'ù', u'â', u'ê', u'î', u'ô', u'û', u'ë', u'ÿ',
            u'ä', u'ö'
        }

        try:

            THRESHOLD_PERCENTAGE = 1
            findFiles = FindFiles()
            for filename in findFiles.find(self.temp_dir, "*.po"):
                poFile = pofile(filename)

                invalid = 0
                for entry in poFile:
                    # Only localized segments. Skips developers names,
                    # untranslated country names, etc
                    if entry.msgid == entry.msgstr:
                        continue

                    for char in entry.msgstr.lower():
                        if char in invalid_chars:
                            invalid = invalid + 1

                if len(poFile) > 100 and invalid > 0:
                    percentage = 100.0 * invalid / len(poFile)
                    if percentage > THRESHOLD_PERCENTAGE:
                        self.errors = self.errors + 1
                        print "Unsual number of invalid chars at {0} ({1}%)".\
                              format(filename, str(percentage))

        except Exception as detail:
            print detail
    def test_find_recursive(self):
        directory = path.dirname(path.realpath(__file__))
        directory += '/data/findfiles/'

        filenames = FindFiles().find_recursive(directory, "*")

        results = []
        for filename in filenames:
            results.append(filename[len(directory):])

        self.assertEqual(6, len(results))
        self.assertEqual("dir1/dir1-dir2/dir1-dir2-file1.txt", results[0])
        self.assertEqual("dir1/dir1-file1.txt", results[1])
        self.assertEqual("dir1/dir1-file2.txt", results[2])
        self.assertEqual("dir2/dir2-file1.txt", results[3])
        self.assertEqual("root-file1.txt", results[4])
        self.assertEqual("root-file2.txt", results[5])
 def _clean_pos(self, directory):
     findFiles = FindFiles()
     for filename in findFiles.find(directory, '*.po'):
         remove(filename)
 def _clean_pos(self, directory):
     findFiles = FindFiles()
     for filename in findFiles.find(directory, '*.po'):
         remove(filename)
    def generate_report(self, source_dir):

        lt, pology = self.read_config()
        print("Source directory: " + source_dir)

        report_filename = os.path.basename(
            os.path.normpath(source_dir)) + ".html"

        report = Report()
        languagetool = LanguageTool(lt)
        report.create_project_report(lt['lt-html-dir'], lt['lt_output'],
                                     report_filename,
                                     languagetool._get_lt_version())

        for po_file in FindFiles().find_recursive(source_dir, "*.po"):
            txt_file = po_file + ".txt"
            json_file = po_file + ".json"
            po_transonly = po_file + "-translated-only.po"
            pology_report = po_file + "-pology.html"
            file_report = po_file + "-report.html"

            start_time = time.time()
            rslt = self.transonly_po_and_extract_text(po_file, po_transonly,
                                                      txt_file)
            if not rslt:
                continue

            if os.stat(txt_file).st_size == 0:
                print("No translations in file:" + txt_file)
                continue

            start_time = time.time()
            languagetool.run_lt(lt, txt_file, json_file)
            po_file_logname = po_file[len(source_dir) + 1:]
            print("LT runned PO {0} - {1:.2f}s".format(
                po_file_logname,
                time.time() - start_time))

            start_time = time.time()
            languagetool.generate_lt_report(lt['lt-html-dir'], json_file,
                                            file_report)

            if os.path.isfile(file_report):
                report.add_file_to_project_report(file_report)
            else:
                print("Unable to add:" + file_report)
                continue

            start_time = time.time()
            self.run_pology(pology, po_transonly, pology_report)
            print("Pology runned PO {0} - {1:.2f}s".format(
                po_file_logname,
                time.time() - start_time))

            if os.path.isfile(pology_report):
                report.add_file_to_project_report(pology_report)
                os.remove(pology_report)
            else:
                report.add_string_to_project_report(
                    'El Pology no ha detectat cap error.')

            os.remove(txt_file)
            os.remove(json_file)
            os.remove(po_transonly)
            os.remove(file_report)

        footer_filename = os.path.join(lt['lt-html-dir'], "footer.html")
        report.add_file_to_project_report(footer_filename)
        report.close()
Esempio n. 14
0
 def _process_project(self, project_id, project_name, filename, softcatala):
     entries = set()
     directory = os.path.join(self.po_directory,"individual_pos/", project_id)
     findFiles = FindFiles()
     for filename in findFiles.find_recursive(directory, '*.po'):
         self._process_file(project_id, project_name, filename, softcatala, entries)
 def _process_project(self, name, filename, softcatala):
     entries = set()
     directory = os.path.join(self.po_directory,"individual_pos/", name.lower())
     findFiles = FindFiles()
     for filename in findFiles.find_recursive(directory, '*.po'):
         self._process_file(name, filename, softcatala, entries)