Exemple #1
0
    def __init__(self, slang, tlang, options):

        cmdpath = options.transerv_bin or "apertium"
        try:
            subprocess.call(cmdpath,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
        except OSError:
            error(
                _("@info Apertium is machine translation software",
                  "Apertium executable not found at '%(path)s'.",
                  path=cmdpath))

        if options.tmode is not None:
            mode = options.tmode
        else:
            mode = "%s-%s" % (slang, tlang)

        optional_parameters = u""
        if options.data_directory:
            optional_parameters = u"-d %s" % options.data_directory

        self.cmdline = u"%s -u -f html-noent %s %s" % (
            cmdpath, optional_parameters, mode)

        entpath = os.path.join(datadir(), "spec", "html.entities")
        self.htmlents = read_entities(entpath)
Exemple #2
0
def rootdir():
    """
    Get root directory to trapnakron derivation files.

    @returns: root directory path
    @rtype: string
    """

    return os.path.join(pology.datadir(), "lang", "sr", "trapnakron")
Exemple #3
0
def _load_phrases(dictnames):

    phrases = set()

    for dictname in dictnames:
        exfile = os.path.join(datadir(), "lang", "nn", "exclusion",
                              dictname + ".dat")

        phrases1 = codecs.open(exfile, "r", "UTF-8").read().split("\n")[:-1]
        phrases1 = map(_normph, phrases1)
        phrases.update(phrases1)

    return phrases
Exemple #4
0
    def _get_word_list_files(self, lang, env):
        # Collect all applicable dictionaries.

        dictFiles = set()
        spellRoot = join(datadir(), "lang", lang, "spell")
        spellSub = join(".", (env or ""))
        while spellSub:
            spellDir = join(spellRoot, spellSub)
            if isdir(spellDir):
                for item in os.listdir(spellDir):
                    if item.endswith(".aspell"):
                        dictFiles.add(join(spellDir, item))
            spellSub = dirname(spellSub)
        return dictFiles
Exemple #5
0
def _get_word_list_files(lang, env):

    # Collect word list paths.
    wlist_files = set()
    spell_root = os.path.join(datadir(), "lang", lang, "spell")
    spell_subdir = os.path.join(".", (env or ""))
    while spell_subdir:
        spell_dir = os.path.join(spell_root, spell_subdir)
        if os.path.isdir(spell_dir):
            for item in os.listdir(spell_dir):
                if item.endswith(".aspell"):
                    wlist_files.add(os.path.join(spell_dir, item))
        spell_subdir = os.path.dirname(spell_subdir)
    return wlist_files
Exemple #6
0
def _compose_personal_dict(lang, envs):

    # Collect all applicable dictionary files
    # (for a given environment, in its subdirectiory and all above).
    dictpaths = set()
    spell_root = os.path.join(datadir(), "lang", lang, "spell")
    for env in (envs or [""]):
        spell_sub = os.path.join(".", env)
        while spell_sub:
            spell_dir = os.path.join(spell_root, spell_sub)
            if os.path.isdir(spell_dir):
                for item in os.listdir(spell_dir):
                    if item.endswith(".aspell"):
                        dictpaths.add(os.path.join(spell_dir, item))
            spell_sub = os.path.dirname(spell_sub)
    dictpaths = list(dictpaths)
    dictpaths.sort()

    if not dictpaths:
        return None, False

    # If only one dictionary found, Aspell can use it as-is.
    if len(dictpaths) == 1:
        return dictpaths[0], False

    # Composit all dictionary files into one temporary.
    words = []
    for dictpath in dictpaths:
        words.extend(_read_dict_file(dictpath))
    tmpf = tempfile.NamedTemporaryFile()
    tmpf.close()
    try:
        tmpf = codecs.open(tmpf.name, "w", "UTF-8")
        tmpf.write("personal_ws-1.1 %s %d UTF-8\n" % (lang, len(words)))
        tmpf.writelines([x + "\n" for x in words])
        tmpf.close()
    except Exception, e:
        raise PologyError(
            _("@info", "Cannot create composited spelling dictionary "
              "in current working directory:\n%(msg)s",
              msg=e))
Exemple #7
0
    "space": _check_space,
    "docbook": _check_dbmarkup,
    "man": _check_man,
}

# --------------------------------------
# Utilities.

# Try to heuristically detect which type of markup is used in the message.
# Detection is conservative: better report no markup, than wrong markup.

from pology.markup import collect_xml_spec_l1
from pology import datadir

_tags_wml = _known_tags
_specpath = os.path.join(datadir(), "spec", "pango.l1")
_tags_pango = collect_xml_spec_l1(_specpath).keys()

_first_tag_rx = re.compile(r"<\s*(\w+)[^>]*>", re.U)


# Return keyword of markup detected in the text.
def _detect_markup_in_text (text):

    m = _first_tag_rx.search(text)
    if m:
        tag = m.group(1)
        if tag in _tags_wml:
            return "wml"
        elif tag in _tags_pango:
            return "pango"
Exemple #8
0
def main():

    locale.setlocale(locale.LC_ALL, "")

    # Get defaults for command line options from global config.
    cfgsec = pology_config.section("posieve")
    def_do_skip = cfgsec.boolean("skip-on-error", True)
    def_msgfmt_check = cfgsec.boolean("msgfmt-check", False)
    def_skip_obsolete = cfgsec.boolean("skip-obsolete", False)

    # Setup options and parse the command line.
    usage = _("@info command usage",
              "%(cmd)s [OPTIONS] SIEVE [POPATHS...]",
              cmd="%prog")
    desc = _(
        "@info command description",
        "Apply sieves to PO paths, which may be either single PO files or "
        "directories to search recursively for PO files. "
        "Some of the sieves only examine PO files, while others "
        "modify them as well. "
        "The first non-option argument is the sieve name; "
        "a list of several comma-separated sieves can be given too.")
    ver = _("@info command version", u"%(cmd)s (Pology) %(version)s\n"
            u"Copyright © 2007, 2008, 2009, 2010 "
            u"Chusslove Illich (Часлав Илић) &lt;%(email)s&gt;",
            cmd="%prog",
            version=version(),
            email="*****@*****.**")

    opars = ColorOptionParser(usage=usage, description=desc, version=ver)
    opars.add_option(
        "-a",
        "--announce-entry",
        action="store_true",
        dest="announce_entry",
        default=False,
        help=_("@info command line option description",
               "Announce that header or message is just about to be sieved."))
    opars.add_option("-b",
                     "--skip-obsolete",
                     action="store_true",
                     dest="skip_obsolete",
                     default=def_skip_obsolete,
                     help=_("@info command line option description",
                            "Do not sieve obsolete messages."))
    opars.add_option(
        "-c",
        "--msgfmt-check",
        action="store_true",
        dest="msgfmt_check",
        default=def_msgfmt_check,
        help=_("@info command line option description",
               "Check catalogs by %(cmd)s and skip those which do not pass.",
               cmd="msgfmt -c"))
    opars.add_option("-u",
                     "--single-entry",
                     metavar=_("@info command line value placeholder",
                               "ENTRY_NUMBER"),
                     action="store",
                     dest="single_entry",
                     default=0,
                     help=_("@info command line option description",
                            "Only perform the check on this ENTRY_NUMBER."))
    opars.add_option(
        "--force-sync",
        action="store_true",
        dest="force_sync",
        default=False,
        help=_("@info command line option description",
               "Force rewriting of all messages, whether modified or not."))
    opars.add_option("-H",
                     "--help-sieves",
                     action="store_true",
                     dest="help_sieves",
                     default=False,
                     help=_("@info command line option description",
                            "Show help for applied sieves."))
    opars.add_option("--issued-params",
                     action="store_true",
                     dest="issued_params",
                     default=False,
                     help=_(
                         "@info command line option description",
                         "Show all issued sieve parameters "
                         "(from command line and user configuration)."))
    opars.add_option("-l",
                     "--list-sieves",
                     action="store_true",
                     dest="list_sieves",
                     default=False,
                     help=_("@info command line option description",
                            "List available internal sieves."))
    opars.add_option("--list-options",
                     action="store_true",
                     dest="list_options",
                     default=False,
                     help=_("@info command line option description",
                            "List the names of available options."))
    opars.add_option("--list-sieve-names",
                     action="store_true",
                     dest="list_sieve_names",
                     default=False,
                     help=_("@info command line option description",
                            "List the names of available internal sieves."))
    opars.add_option("--list-sieve-params",
                     action="store_true",
                     dest="list_sieve_params",
                     default=False,
                     help=_("@info command line option description",
                            "List the parameters known to issued sieves."))
    opars.add_option("-m",
                     "--output-modified",
                     metavar=_("@info command line value placeholder", "FILE"),
                     action="store",
                     dest="output_modified",
                     default=None,
                     help=_("@info command line option description",
                            "Output names of modified files into FILE."))
    opars.add_option("--no-skip",
                     action="store_false",
                     dest="do_skip",
                     default=def_do_skip,
                     help=_(
                         "@info command line option description",
                         "Do not try to skip catalogs which signal errors."))
    opars.add_option("--no-sync",
                     action="store_false",
                     dest="do_sync",
                     default=True,
                     help=_("@info command line option description",
                            "Do not write any modifications to catalogs."))
    opars.add_option("-q",
                     "--quiet",
                     action="store_true",
                     dest="quiet",
                     default=False,
                     help=_(
                         "@info command line option description",
                         "Do not display any progress info "
                         "(does not influence sieves themselves)."))
    opars.add_option("-s",
                     metavar=_("@info command line value placeholder",
                               "NAME[:VALUE]"),
                     action="append",
                     dest="sieve_params",
                     default=[],
                     help=_("@info command line option description",
                            "Pass a parameter to sieves."))
    opars.add_option(
        "-S",
        metavar=_("@info command line value placeholder", "NAME[:VALUE]"),
        action="append",
        dest="sieve_no_params",
        default=[],
        help=_(
            "@info command line option description",
            "Remove a parameter to sieves "
            "(e.g. if it was issued through user configuration)."))
    opars.add_option("-v",
                     "--verbose",
                     action="store_true",
                     dest="verbose",
                     default=False,
                     help=_("@info command line option description",
                            "Output more detailed progress information."))
    add_cmdopt_filesfrom(opars)
    add_cmdopt_incexc(opars)
    add_cmdopt_colors(opars)

    (op, free_args) = opars.parse_args(str_to_unicode(sys.argv[1:]))

    if op.list_options:
        report(list_options(opars))
        sys.exit(0)

    if len(free_args) < 1 and not (op.list_sieves or op.list_sieve_names):
        error(_("@info", "No sieve to apply given."))

    op.raw_sieves = []
    op.raw_paths = []
    if len(free_args) > 2 and op.single_entry != 0:
        error(
            _("@info",
              "With single entry mode, you can only give one input file."))

    if len(free_args) >= 1:
        op.raw_sieves = free_args[0]
        op.raw_paths = free_args[1:]

    # Could use some speedup.
    try:
        import psyco
        psyco.full()
    except ImportError:
        pass

    set_coloring_globals(ctype=op.coloring_type, outdep=(not op.raw_colors))

    # Dummy-set all internal sieves as requested if sieve listing required.
    sieves_requested = []
    if op.list_sieves or op.list_sieve_names:
        # Global sieves.
        modpaths = glob.glob(os.path.join(datadir(), "sieve", "[a-z]*.py"))
        modpaths.sort()
        for modpath in modpaths:
            sname = os.path.basename(modpath)[:-3]  # minus .py
            sname = sname.replace("_", "-")
            sieves_requested.append(sname)
        # Language-specific sieves.
        modpaths = glob.glob(
            os.path.join(datadir(), "lang", "*", "sieve", "[a-z]*.py"))
        modpaths.sort()
        for modpath in modpaths:
            sname = os.path.basename(modpath)[:-3]  # minus .py
            sname = sname.replace("_", "-")
            lang = os.path.basename(os.path.dirname(os.path.dirname(modpath)))
            sieves_requested.append(lang + ":" + sname)

    # No need to load and setup sieves if only listing sieve names requested.
    if op.list_sieve_names:
        report("\n".join(sieves_requested))
        sys.exit(0)

    # Load sieve modules from supplied names in the command line.
    if not sieves_requested:
        sieves_requested = op.raw_sieves.split(",")
    sieve_modules = []
    for sieve_name in sieves_requested:
        # Resolve sieve file.
        if not sieve_name.endswith(".py"):
            # One of internal sieves.
            if ":" in sieve_name:
                # Language-specific internal sieve.
                lang, name = sieve_name.split(":")
                sieve_path_base = os.path.join("lang", lang, "sieve", name)
            else:
                sieve_path_base = os.path.join("sieve", sieve_name)
            sieve_path_base = sieve_path_base.replace("-", "_") + ".py"
            sieve_path = os.path.join(datadir(), sieve_path_base)
        else:
            # Sieve name is its path.
            sieve_path = sieve_name
        try:
            sieve_file = open(unicode_to_str(sieve_path))
            # ...unicode_to_str because of exec below.
        except IOError:
            error(_("@info", "Cannot load sieve '%(file)s'.", file=sieve_path))
        # Load file into new module.
        sieve_mod_name = "sieve_" + str(len(sieve_modules))
        sieve_mod = imp.new_module(sieve_mod_name)
        exec sieve_file in sieve_mod.__dict__
        sieve_file.close()
        sys.modules[sieve_mod_name] = sieve_mod  # to avoid garbage collection
        sieve_modules.append((sieve_name, sieve_mod))
        if not hasattr(sieve_mod, "Sieve"):
            error(
                _("@info",
                  "Module '%(file)s' does not define %(classname)s class.",
                  file=sieve_path,
                  classname="Sieve"))

    # Setup sieves (description, known parameters...)
    pp = ParamParser()
    snames = []
    for name, mod in sieve_modules:
        scview = pp.add_subcmd(name)
        if hasattr(mod, "setup_sieve"):
            mod.setup_sieve(scview)
        snames.append(name)

    # If info on sieves requested, report and exit.
    if op.list_sieves:
        report(_("@info", "Available internal sieves:"))
        report(pp.listcmd(snames))
        sys.exit(0)
    elif op.list_sieve_params:
        params = set()
        for scview in pp.cmdviews():
            params.update(scview.params(addcol=True))
        report("\n".join(sorted(params)))
        sys.exit(0)
    elif op.help_sieves:
        report(_("@info", "Help for sieves:"))
        report("")
        report(pp.help(snames))
        sys.exit(0)

    # Prepare sieve parameters for parsing.
    sieve_params = list(op.sieve_params)
    # - append paramaters according to configuration
    sieve_params.extend(read_config_params(pp.cmdviews(), sieve_params))
    # - remove paramaters according to command line
    if op.sieve_no_params:
        sieve_params_mod = []
        for parspec in sieve_params:
            if parspec.split(":", 1)[0] not in op.sieve_no_params:
                sieve_params_mod.append(parspec)
        sieve_params = sieve_params_mod

    # If assembly of issued parameters requested, report and exit.
    if op.issued_params:
        escparams = []
        for parspec in sieve_params:
            if ":" in parspec:
                param, value = parspec.split(":", 1)
                escparam = "%s:%s" % (param, escape_sh(value))
            else:
                escparam = parspec
            escparams.append(escparam)
        fmtparams = " ".join(["-s%s" % x for x in sorted(escparams)])
        if fmtparams:
            report(fmtparams)
        sys.exit(0)

    # Parse sieve parameters.
    sparams, nacc_params = pp.parse(sieve_params, snames)
    if nacc_params:
        error(
            _("@info", "Parameters not accepted by any of issued subcommands: "
              "%(paramlist)s.",
              paramlist=format_item_list(nacc_params)))

    # ========================================
    # FIXME: Think of something less ugly.
    # Add as special parameter to each sieve:
    # - root paths from which the catalogs are collected
    # - whether destination independent coloring is in effect
    # - test function for catalog selection
    root_paths = []
    if op.raw_paths:
        root_paths.extend(op.raw_paths)
    if op.files_from:
        for ffpath in op.files_from:
            root_paths.extend(collect_paths_from_file(ffpath))
    if not op.raw_paths and not op.files_from:
        root_paths = ["."]
    is_cat_included = build_path_selector(incnames=op.include_names,
                                          incpaths=op.include_paths,
                                          excnames=op.exclude_names,
                                          excpaths=op.exclude_paths)
    for p in sparams.values():
        p.root_paths = root_paths
        p.raw_colors = op.raw_colors
        p.is_cat_included = is_cat_included
    # ========================================

    # Create sieves.
    sieves = []
    for name, mod in sieve_modules:
        sieves.append(mod.Sieve(sparams[name]))

    # Get the message monitoring indicator from the sieves.
    # Monitor unless all sieves have requested otherwise.
    use_monitored = False
    for sieve in sieves:
        if getattr(sieve, "caller_monitored", True):
            use_monitored = True
            break
    if op.verbose and not use_monitored:
        report(_("@info:progress", "--> Not monitoring messages."))

    # Get the sync indicator from the sieves.
    # Sync unless all sieves have requested otherwise,
    # and unless syncing is disabled globally in command line.
    do_sync = False
    for sieve in sieves:
        if getattr(sieve, "caller_sync", True):
            do_sync = True
            break
    if not op.do_sync:
        do_sync = False
    if op.verbose and not do_sync:
        report(_("@info:progress", "--> Not syncing after sieving."))

    # Open in header-only mode if no sieve has message processor.
    # Categorize sieves by the presence of message/header processors.
    use_headonly = True
    header_sieves = []
    header_sieves_last = []
    message_sieves = []
    for sieve in sieves:
        if hasattr(sieve, "process"):
            use_headonly = False
            message_sieves.append(sieve)
        if hasattr(sieve, "process_header"):
            header_sieves.append(sieve)
        if hasattr(sieve, "process_header_last"):
            header_sieves_last.append(sieve)
    if op.verbose and use_headonly:
        report(_("@info:progress",
                 "--> Opening catalogs in header-only mode."))

    # Collect catalog paths.
    fnames = collect_paths_cmdline(rawpaths=op.raw_paths,
                                   incnames=op.include_names,
                                   incpaths=op.include_paths,
                                   excnames=op.exclude_names,
                                   excpaths=op.exclude_paths,
                                   filesfrom=op.files_from,
                                   elsecwd=True,
                                   respathf=collect_catalogs,
                                   abort=True)

    if op.do_skip:
        errwarn = warning
        errwarn_on_msg = warning_on_msg
    else:
        errwarn = error
        errwarn_on_msg = error_on_msg

    # Prepare inline progress indicator.
    if not op.quiet:
        update_progress = init_file_progress(fnames,
                                             addfmt=t_("@info:progress",
                                                       "Sieving: %(file)s"))

    # Sieve catalogs.
    modified_files = []
    for fname in fnames:
        if op.verbose:
            report(_("@info:progress", "Sieving %(file)s...", file=fname))
        elif not op.quiet:
            update_progress(fname)

        if op.msgfmt_check:
            d1, oerr, ret = collect_system(
                ["msgfmt", "-o", "/dev/null", "-c", fname])
            if ret != 0:
                oerr = oerr.strip()
                errwarn(
                    _("@info:progress", "%(file)s: %(cmd)s check failed:\n"
                      "%(msg)s",
                      file=fname,
                      cmd="msgfmt -c",
                      msg=oerr))
                warning(
                    _("@info:progress",
                      "Skipping catalog due to syntax check failure."))
                continue

        try:
            cat = Catalog(fname,
                          monitored=use_monitored,
                          headonly=use_headonly,
                          single_entry=int(op.single_entry))
        except CatalogSyntaxError, e:
            errwarn(
                _("@info:progress",
                  "%(file)s: Parsing failed: %(msg)s",
                  file=fname,
                  msg=e))
            warning(
                _("@info:progress",
                  "Skipping catalog due to parsing failure."))
            continue

        skip = False
        # First run all header sieves.
        if header_sieves and op.announce_entry:
            report(
                _("@info:progress",
                  "Sieving header of %(file)s...",
                  file=fname))
        for sieve in header_sieves:
            try:
                ret = sieve.process_header(cat.header, cat)
            except SieveCatalogError, e:
                errwarn(
                    _("@info:progress",
                      "%(file)s:header: Sieving failed: %(msg)s",
                      file=fname,
                      msg=e))
                skip = True
                break
            if ret not in (None, 0):
                break
Exemple #9
0
                except A.AspellError, e:
                    raise SieveError(
                        _("@info", "Cannot initialize Aspell:\n%(msg)s",
                          msg=e))
            else:
                # Create simple internal checker that only checks against
                # internal supplemental dictionaries.
                personalDict = self.personalDicts[ckey]
                if not personalDict:
                    raise SieveError(
                        _("@info", "No supplemental dictionaries found."))
                self.aspells[ckey] = _QuasiSpell(personalDict, self.encoding)

            # Load list of contexts by which to ignore messages.
            self.ignoredContexts[ckey] = []
            ignoredContextFile = join(datadir(), "lang", clang, "spell",
                                      "ignoredContext")
            if isfile(ignoredContextFile):
                for line in open(ignoredContextFile, "r", "utf-8"):
                    line = line.strip()
                    if line.startswith("#") or line == "":
                        continue
                    else:
                        self.ignoredContexts[ckey].append(line.lower())

        # Get language-dependent stuff.
        self.aspell = self.aspells[ckey]
        self.ignoredContext = self.ignoredContexts[ckey]

        # Force explicitly given accelerators and markup.
        if self.accel is not None: