Example #1
0
    def __init__(self, params):

        self.tfilters = [[get_hook_ireq(x, abort=True), x]
                         for x in (params.filter or [])]

        # Number of modified headers.
        self.nmod = 0
Example #2
0
    def __init__(self, params):

        cfgs = cfg.section("enchant")

        self.providers = (",".join(params.provider or "")
                          or cfgs.string("provider") or None)

        self.lang = (params.lang or cfgs.string("language") or None)

        self.envs = params.env
        if self.envs is None and cfgs.string("environment") is not None:
            self.envs = cfgs.string("environment").split(",")
        if self.envs is None:
            self.envs = []
        self.envs = [x.strip() for x in self.envs]

        self.accel = params.accel

        self.markup = params.markup

        self.skip_rx = None
        if params.skip is not None:
            flags = re.U
            if not params.case:
                flags |= re.I
            self.skip_rx = re.compile(params.skip, flags)

        self.pfilters = [[get_hook_ireq(x, abort=True), x]
                         for x in (params.filter or [])]

        self.suponly = params.suponly

        self.words_only = params.list
        self.lokalize = params.lokalize

        # Langenv-dependent elements built along the way.
        self.checkers = {}
        self.word_lists = {}

        # Tracking of unknown words.
        self.unknown_words = set()

        # Indicators to the caller:
        self.caller_sync = False  # no need to sync catalogs
        self.caller_monitored = False  # no need for monitored messages
Example #3
0
    def __init__ (self, params):

        self.nmatch = 0 # Number of match for finalize
        self.connection=None # Connection to LanguageTool server

        self.setLang=params.lang
        self.setAccel=params.accel
        self.setMarkup=params.markup
        self.lokalize = params.lokalize

        # LanguageTool server parameters.
        host=params.host
        port=params.port
        #TODO: autodetect tcp port by reading LanguageTool config file if host is localhost

        # As LT server does not seem to read disabled rules from his config file, we manage exception here
        #TODO: investigate deeper this problem and make a proper bug report to LT devs.
        self.disabledRules=["UPPERCASE_SENTENCE_START","COMMA_PARENTHESIS_WHITESPACE"]

        # Create connection to the LanguageTool server
        self.connection=HTTPConnection(host, port)

        self.pfilters = [[get_hook_ireq(x, abort=True), x]
                         for x in (params.filter or [])]
Example #4
0
    def __init__(self, params):

        self.nmatch = 0  # Number of match for finalize
        self.unknownWords = None  # If not None, only list of faulty word is display (to ease copy/paste into personal dictionary)
        self.filename = ""  # File name we are processing
        self.xmlFile = None  # File handle to write XML output

        # Build Aspell options.
        self.aspellOptions = {}

        # - assume markup in messages (provide option to disable?)
        self.aspellOptions["mode"] = "sgml"
        # FIXME: In fact not needed? The words are sent parsed to checker.

        self.lang = params.lang
        self.encoding = params.enc
        self.variety = params.var

        cfgs = cfg.section("aspell")
        if not self.lang:
            self.lang = cfgs.string("language")
        if not self.encoding:
            self.encoding = cfgs.string("encoding")
        if not self.variety:
            self.variety = cfgs.string("variety")

        self.loc_encoding = locale.getlocale()[1]
        if not self.encoding:
            self.encoding = self.loc_encoding
        if not self.encoding:
            self.encoding = "UTF-8"

        self.encoding = self._encoding_for_aspell(self.loc_encoding)
        self.aspellOptions["lang"] = self.lang.encode(
            self.loc_encoding) if self.lang else None
        self.aspellOptions["encoding"] = self.encoding.encode(
            self.loc_encoding)
        if self.variety:
            self.aspellOptions["variety"] = self.variety.encode(
                self.loc_encoding) if self.variety else None

        self.unknownWords = None
        if params.list:
            self.unknownWords = set()

        if params.xml:
            xmlPath = params.xml
            if os.access(dirname(abspath(xmlPath)), os.W_OK):
                #TODO: create nice api to manage xml file and move it to rules.py
                self.xmlFile = open(xmlPath, "w", "utf-8")
                self.xmlFile.write('<?xml version="1.0" encoding="UTF-8"?>\n')
                self.xmlFile.write(
                    '<pos date="%s">\n' %
                    strftime('%c').decode(locale.getpreferredencoding()))
            else:
                warning(
                    _("@info",
                      "Cannot open file '%(file)s'. XML output disabled.",
                      file=xmlPath))

        self.accel = params.accel
        self.markup = params.markup

        self.skipRx = None
        if params.skip:
            flags = re.U
            if not params.case:
                flags |= re.I
            self.skipRx = re.compile(params.skip, flags)

        self.pfilters = [[get_hook_ireq(x, abort=True), x]
                         for x in (params.filter or [])]

        self.envs = None
        if self.envs is None and params.env is not None:
            self.envs = params.env
        if self.envs is None and cfgs.string("environment") is not None:
            self.envs = cfgs.string("environment").split(",")
        if self.envs is None:
            self.envs = []
        self.envs = [x.strip() for x in self.envs]

        self.suponly = params.suponly
        if not self.suponly:
            self.suponly = cfgs.boolean("supplements-only", False)

        # NOTE: Temporary hack, remove when word splitting becomes smarter.
        self.simsp = params.simsp
        if not self.simsp:
            self.simsp = cfgs.boolean("simple-split", False)

        self.lokalize = params.lokalize

        # Language-dependent elements built along the way.
        self.aspells = {}
        self.ignoredContexts = {}
        self.personalDicts = {}
        self.tmpDictFiles = {}

        # Indicators to the caller:
        self.caller_sync = False  # no need to sync catalogs
        self.caller_monitored = False  # no need for monitored messages
Example #5
0
class Sieve (object):


    def __init__ (self, params):

        self.nmatch = 0

        self.p = params

        # Build matching function.
        # It takes as arguments: filtered message, message, catalog,
        # and highlight specification (which is filled on matches).

        def make_match_group (names, negatable=False, orlinked=False):

            names_negs = [(x, False) for x in names]
            if negatable:
                names_negs.extend([(x, True) for x in names])

            matchers = []
            for name, neg in names_negs:
                nname = name
                if neg:
                    nname = "n" + name
                values = getattr(params, nname)
                if values is None: # parameter not given
                    continue
                if not isinstance(values, list):
                    values = [values]
                for value in values:
                    try:
                        if name == "fexpr":
                            m = make_msg_matcher(value, params)
                        else:
                            m = make_matcher(name, value, [], params, neg)
                    except ExprError, e:
                        raise SieveError(str_to_unicode(str(e)))
                    matchers.append(m)

            if orlinked:
                expr = lambda *a: reduce(lambda s, m: s or m(*a),
                                         matchers, False)
            else:
                expr = lambda *a: reduce(lambda s, m: s and m(*a),
                                         matchers, True)
            return expr

        # - first matchers which are always AND
        expr_and = make_match_group([
            "transl", "obsol", "active", "plural", "maxchar", "lspan", "espan",
            "flag", "branch",
        ], negatable=True, orlinked=False)

        # - then matchers which can be AND or OR
        expr_andor = make_match_group([
            "msgctxt", "msgid", "msgstr", "comment",
            "fexpr",
        ], negatable=True, orlinked=self.p.or_match)

        # - all together
        self.matcher = lambda *a: expr_and(*a) and expr_andor(*a)

        # Prepare replacement.
        self.replrxs = []
        if self.p.replace is not None:
            if not self.p.msgstr:
                raise SieveError(
                    _("@info",
                      "Cannot perform replacement if match "
                      "on '%(field)s' is not given.",
                      field="msgstr"))
            rxflags = re.U
            if not self.p.case:
                rxflags |= re.I
            for rxstr in self.p.msgstr:
                self.replrxs.append(re.compile(rxstr, rxflags))

        # Resolve filtering hooks.
        self.pfilters = []
        for hreq in self.p.filter or []:
            self.pfilters.append(get_hook_ireq(hreq, abort=True))

        # Unless replacement or marking requested, no need to monitor/sync.
        if self.p.replace is None and not self.p.mark:
            self.caller_sync = False
            self.caller_monitored = False
Example #6
0
def _resolve_ui_w (headrefs, tagrefs, uipathseps, uicpaths, uicpathenv,
                   xmlescape, pfhook, mkeyw, invmkeyw, quiet, fdiralt,
                   modtext, spanrep):
    """
    Worker for resolver factories.
    """

    # Convert sequences into sets, for fast membership checks.
    if not isinstance(tagrefs, set):
        tagrefs = set(tagrefs)
    if not isinstance(headrefs, set):
        headrefs = set(headrefs)
    if not isinstance(uipathseps, set):
        uipathseps = set(uipathseps)

    # Markup keywords should remain None if not a sequence or string.
    if mkeyw is not None:
        if isinstance(mkeyw, basestring):
            mkeyw = [mkeyw]
        mkeyw = set(mkeyw)

    # Construct post-filtering hook.
    if pfhook is None:
        pfhook = lambda x: x
    elif isinstance(pfhook, basestring):
        pfhook = get_hook_ireq(pfhook)
    # ...else assume it is already a hook function.

    # Regular expressions for finding and extracting UI references.
    # Add a never-match expression to start regexes for all reference types,
    # so that it can be applied even if the category has no entries.
    rxflags = re.U|re.I
    # - by tags
    rxstr = r"<\s*(%s)\b.*?>" % "|".join(list(tagrefs) + ["\x04"])
    uiref_start_tag_rx = re.compile(rxstr, rxflags)
    uiref_extract_tag_rx = {}
    for tag in tagrefs:
        rxstr = r"<\s*(%s)\b.*?>(.*?)(<\s*/\s*\1\s*>)" % tag
        uiref_extract_tag_rx[tag] = re.compile(rxstr, rxflags)
    # - by heads
    rxstr = r"(%s)" % "|".join(list(headrefs) + ["\x04"])
    uiref_start_head_rx = re.compile(rxstr, rxflags)
    uiref_extract_head_rx = {}
    for head in headrefs:
        rxstr = r"%s(.)(.*?)\1" % head
        uiref_extract_head_rx[head] = re.compile(rxstr, rxflags)

    # Lazy-evaluated data.
    ldata = {}

    # Function to split text by UI references, into list of tuples with
    # the text segment preceeding the reference as first element,
    # the reference as second element, and span indices of the reference
    # against complete text as the third and fourth elements;
    # trailing text segment has None as reference, and invalid span.
    # "Blah <ui>foo</ui> blah ~%/bar/ blah." ->
    # [("Blah <ui>", "foo", 9, 12), ("</ui> blah ", "bar", 26, 29),
    #  (" blah.", None, -1, -1)]
    def split_by_uiref (text, msg, cat, errspans):

        rsplit = []

        ltext = len(text)
        p = 0
        while True:
            mt = uiref_start_tag_rx.search(text, p)
            if mt: pt = mt.start()
            else: pt = ltext
            mh = uiref_start_head_rx.search(text, p)
            if mh: ph = mh.start()
            else: ph = ltext

            if pt < ph:
                # Tagged UI reference.
                tag = mt.group(1)
                m = uiref_extract_tag_rx[tag].search(text, pt)
                if not m:
                    errmsg = _("@info \"tag\" is a tag in HTML/XML context",
                               "Non-terminated UI reference by tag '%(tag)s'.",
                               tag=tag)
                    errspans.append(mt.span() + (errmsg,))
                    if not spanrep and not quiet:
                        warning_on_msg(errmsg, msg, cat)
                    break

                uirefpath = m.group(2)
                pe = m.end() - len(m.group(3))
                ps = pe - len(uirefpath)

            elif ph < pt:
                # Headed UI reference.
                head = mh.group(1)
                m = uiref_extract_head_rx[head].search(text, ph)
                if not m:
                    errmsg = _("@info \"head\" is the leading part of "
                               "UI reference, e.g. '~%' in '~%/Save All/'",
                               "Non-terminated UI reference by "
                               "head '%(head)s'.",
                               head=head)
                    errspans.append(mh.span() + (errmsg,))
                    if not spanrep and not quiet:
                        warning_on_msg(errmsg, msg, cat)
                    break

                uirefpath = m.group(2)
                ps, pe = m.span()

            else:
                # Both positions equal, meaning end of text.
                break

            ptext_uiref = _split_uirefpath(text[p:ps], uirefpath, uipathseps)
            for ptext, uiref in ptext_uiref:
                rsplit.append((ptext, uiref, ps, pe))
            p = pe

        # Trailing segment (or everything after an error).
        rsplit.append((text[p:], None, -1, -1))

        return rsplit


    # Function to resolve given UI reference
    # (part that needs to be under closure).
    def resolve_single_uiref (uiref, msg, cat, resolver_helper):

        if ldata.get("uicpaths") is None:
            ldata["uicpaths"] = _collect_ui_catpaths(uicpaths, uicpathenv)
        if ldata.get("actcatfile") != cat.filename:
            ldata["actcatfile"] = cat.filename
            ldata["normcats"] = _load_norm_ui_cats(cat, ldata["uicpaths"],
                                                   xmlescape)
        normcats = ldata["normcats"]

        hookcl_f3c = lambda uiref: resolver_helper(uiref, msg, cat, True, False)
        hookcl_v3c = lambda uiref: resolver_helper(uiref, msg, cat, False, True)
        uiref_res, errmsgs = _resolve_single_uiref(uiref, normcats,
                                                   hookcl_f3c, hookcl_v3c,
                                                   fdiralt)
        uiref_res = pfhook(uiref_res)

        return uiref_res, errmsgs


    # The resolver itself, in two parts.
    def resolver_helper (msgstr, msg, cat, modtext, spanrep):

        errspans = []
        tsegs = []

        if (   mkeyw is None
            or (not invmkeyw and mkeyw.intersection(cat.markup() or set()))
            or (invmkeyw and not mkeyw.intersection(cat.markup() or set()))
        ):
            rsplit = split_by_uiref(msgstr, msg, cat, errspans)

            for ptext, uiref, start, end in rsplit:
                tsegs.append(ptext)
                if uiref is not None:
                    uiref_res, errmsgs = resolve_single_uiref(uiref, msg, cat,
                                                              resolver_helper)
                    tsegs.append(uiref_res)
                    errspans.extend([(start, end, x) for x in errmsgs])
                    if not spanrep and not quiet:
                        for errmsg in errmsgs:
                            warning_on_msg(errmsg, msg, cat)

        else:
            tsegs.append(msgstr)

        if modtext: # F3C hook
            return "".join(tsegs)
        elif spanrep: # V3C hook
            return errspans
        else: # S3C hook
            return len(errspans)

    def resolver (msgstr, msg, cat):

        return resolver_helper(msgstr, msg, cat, modtext, spanrep)

    return resolver
Example #7
0
File: stats.py Project: KDE/pology
    def __init__(self, params):

        self.p = params

        # Templates correspondence.
        # Mapping of catalogs to templates, in form of <search>:<replace>.
        # For each catalog file path, the first <search> substring is replaced
        # by <replace>, and .po replaced with .pot, to construct its template
        # file path. All templates not found under such paths are reported.
        # Furthermore, all subdirs of these paths are searched for templates
        # without corresponding catalogs, and every such template is counted
        # as fully untranslated PO.
        if self.p.templates:
            if ":" not in self.p.templates:
                self.tspec_srch = self.p.templates
                self.tspec_repl = ""
            else:
                self.tspec_srch, self.tspec_repl = self.p.templates.split(
                    ":", 1)

        # Turn off table display if a bar view has been selected.
        self.p.table = True
        if self.p.msgbar or self.p.wbar or self.p.msgfmt:
            self.p.table = False

        # Filenames of catalogs which are not fully translated.
        self.incomplete_catalogs = {}

        # Counted categories.
        self.count_spec = (
            ("trn",
             _("@title:row translated messages/words/characters",
               "translated")),
            ("fuz", _("@title:row fuzzy messages/words/characters", "fuzzy")),
            ("unt",
             _("@title:row untranslated messages/words/characters",
               "untranslated")),
            ("tot", _("@title:row fuzzy messages/words/characters", "total")),
            ("obs", _("@title:row fuzzy messages/words/characters",
                      "obsolete")),
        )

        # FIXME: After parameter parser can deliver requested sequence type.
        if self.p.branch is not None:
            self.p.branch = set(self.p.branch)

        # Parse line/entry spans.
        def parse_span(spanspec):
            lst = spanspec is not None and spanspec.split(":") or ("", "")
            if len(lst) != 2:
                raise SieveError(
                    _("@info", "Wrong number of elements in span "
                      "specification '%(spec)s'.",
                      spec=self.p.lspan))
            nlst = []
            for el in lst:
                if not el:
                    nlst.append(None)
                else:
                    try:
                        nlst.append(int(el))
                    except:
                        raise SieveError(
                            _("@info", "Not an integer number in span "
                              "specification '%(spec)s'.",
                              spec=self.p.lspan))
            return tuple(nlst)

        self.lspan = parse_span(self.p.lspan)
        self.espan = parse_span(self.p.espan)

        # Number of counts per category:
        # messages, words in original, words in translation,
        # characters in original, characters in translation.
        self.counts_per_cat = 5

        # Category counts per catalog filename.
        self.counts = {}

        # Collections of all confirmed templates and tentative template subdirs.
        self.matched_templates = {}
        self.template_subdirs = []
        if self.p.templates:
            for rpath in params.root_paths:
                if os.path.isfile(rpath):
                    rpath = os.path.dirname(rpath)
                rpath = rpath.replace(self.tspec_srch, self.tspec_repl, 1)
                self.template_subdirs.append(rpath)
        # Map of template to translation subdirs.
        self.mapped_template_subdirs = {}

        # Some indicators of metamessages.
        self.xml2po_meta_msgid = dict([(x, True)
                                       for x in ("translator-credits", )])
        self.xml2pot_meta_msgid = dict([(x, True)
                                        for x in ("ROLES_OF_TRANSLATORS",
                                                  "CREDIT_FOR_TRANSLATORS")])
        self.kde_meta_msgctxt = dict([
            (x, True) for x in ("NAME OF TRANSLATORS", "EMAIL OF TRANSLATORS")
        ])

        # Resolve filtering hooks.
        self.pfilters = []
        for hreq in self.p.filter or []:
            self.pfilters.append(get_hook_ireq(hreq, abort=True))

        # Indicators to the caller:
        self.caller_sync = False  # no need to sync catalogs
        self.caller_monitored = False  # no need for monitored messages