def _init_matcher(self):
        """
        Initialize the matcher to be used by the C{TerminologyPlaceable} parser.
        """
        if self.matcher in TerminologyPlaceable.matchers:
            TerminologyPlaceable.matchers.remove(self.matcher)

        self.store = TranslationStore()
        self.store.makeindex()
        self.matcher = terminologymatcher(self.store)
        TerminologyPlaceable.matchers.append(self.matcher)
Exemple #2
0
    def _init_matcher(self):
        """
        Initialize the matcher to be used by the C{TerminologyPlaceable} parser.
        """
        if self.matcher in TerminologyPlaceable.matchers:
            TerminologyPlaceable.matchers.remove(self.matcher)

        from translate.storage.base import TranslationStore
        self.store = TranslationStore()
        self.store.makeindex()
        from translate.search.match import terminologymatcher
        self.matcher = terminologymatcher(self.store)
        TerminologyPlaceable.matchers.append(self.matcher)
Exemple #3
0
    def __check_escape(self, string, xml, target_language=None):
        """Helper that checks that a string is output with the right escape."""
        unit = self.UnitClass("teststring")

        if (target_language is not None):
            store = TranslationStore()
            store.settargetlanguage(target_language)
            unit._store = store

        unit.target = string

        print("unit.target:", repr(unit.target))
        print("xml:", repr(xml))

        assert str(unit) == xml
    def __check_escape(self, string, xml, target_language=None):
        """Helper that checks that a string is output with the right escape."""
        unit = self.UnitClass("Test String")

        if (target_language is not None):
            store = TranslationStore()
            store.settargetlanguage(target_language)
            unit._store = store

        unit.target = string

        print("unit.target:", repr(unit.target))
        print("xml:", repr(xml))

        assert str(unit) == xml
    def init_matcher(self, filename=''):
        """
        Initialize the matcher to be used by the C{TerminologyPlaceable} parser.
        """
        if self.matcher in TerminologyPlaceable.matchers:
            TerminologyPlaceable.matchers.remove(self.matcher)

        if os.path.isfile(filename):
            logging.debug('Loading terminology from %s' % (filename))
            self.store = factory.getobject(filename)
        else:
            logging.debug('Creating empty terminology store')
            self.store = TranslationStore()
        self.store.makeindex()
        self.matcher = terminologymatcher(self.store)
        TerminologyPlaceable.matchers.append(self.matcher)
Exemple #6
0
    def _init_matcher(self):
        """
        Initialize the matcher to be used by the C{TerminologyPlaceable} parser.
        """
        if self.matcher in TerminologyPlaceable.matchers:
            TerminologyPlaceable.matchers.remove(self.matcher)

        self.store = TranslationStore()
        self.store.makeindex()
        self.matcher = terminologymatcher(self.store)
        TerminologyPlaceable.matchers.append(self.matcher)
Exemple #7
0
    def _init_matcher(self):
        """
        Initialize the matcher to be used by the C{TerminologyPlaceable} parser.
        """
        if self.matcher in TerminologyPlaceable.matchers:
            TerminologyPlaceable.matchers.remove(self.matcher)

        from translate.storage.base import TranslationStore

        self.store = TranslationStore()
        self.store.makeindex()
        from translate.search.match import terminologymatcher

        self.matcher = terminologymatcher(self.store)
        TerminologyPlaceable.matchers.append(self.matcher)
Exemple #8
0
class TerminologyModel(BaseTerminologyModel):
    """
    Terminology model that queries Open-tran.eu for terminology results.
    """

    __gtype_name__ = "OpenTranTerminology"
    display_name = _("Open-Tran.eu")
    description = _("Terms from Open-Tran.eu")

    # INITIALIZERS #
    def __init__(self, internal_name, controller):
        super(TerminologyModel, self).__init__(controller)

        self.internal_name = internal_name
        self.load_config()

        self.main_controller = controller.main_controller
        self.term_controller = controller
        self.matcher = None
        self._init_matcher()

        self.opentrantm = self._find_opentran_tm()
        if self.opentrantm is None:
            self._init_opentran_client()
        else:
            self.opentrantm.connect("match-found", self._on_match_found)
            self.__setup_opentrantm_lang_watchers()

    def _find_opentran_tm(self):
        """
        Try and find an existing OpenTranClient instance, used by the OpenTran
        TM model.
        """
        plugin_ctrl = self.main_controller.plugin_controller
        if "tm" not in plugin_ctrl.plugins:
            return None

        tm_ctrl = plugin_ctrl.plugins["tm"].tmcontroller
        if "opentran" not in tm_ctrl.plugin_controller.plugins:
            return None

        return tm_ctrl.plugin_controller.plugins["opentran"]

    def _init_matcher(self):
        """
        Initialize the matcher to be used by the C{TerminologyPlaceable} parser.
        """
        if self.matcher in TerminologyPlaceable.matchers:
            TerminologyPlaceable.matchers.remove(self.matcher)

        from translate.storage.base import TranslationStore

        self.store = TranslationStore()
        self.store.makeindex()
        from translate.search.match import terminologymatcher

        self.matcher = terminologymatcher(self.store)
        TerminologyPlaceable.matchers.append(self.matcher)

    def _init_opentran_client(self):
        """
        Create and initialize a new Open-Tran client. This should only happen
        when the Open-Tran TM model plug-in is not loaded.
        """
        plugin_ctrlr = self.main_controller.plugin_controller
        lang_ctrlr = self.main_controller.lang_controller
        # The following two values were copied from plugins/tm/__init__.py
        max_candidates = 5
        min_similarity = 70

        # Try to get max_candidates and min_quality from the TM plug-in
        if "tm" in plugin_ctrlr.plugins:
            max_candidates = plugin_ctrlr.plugins["tm"].config["max_matches"]
            min_similarity = plugin_ctrlr.plugins["tm"].config["min_quality"]

        from virtaal.support import opentranclient

        self.opentranclient = opentranclient.OpenTranClient(
            max_candidates=max_candidates, min_similarity=min_similarity
        )
        self.opentranclient.source_lang = lang_ctrlr.source_lang.code
        self.opentranclient.target_lang = lang_ctrlr.target_lang.code

        self.__setup_lang_watchers()
        self.__setup_cursor_watcher()

    def __setup_cursor_watcher(self):
        unitview = self.main_controller.unit_controller.view

        def cursor_changed(cursor):
            self.__start_query()

        store_ctrlr = self.main_controller.store_controller

        def store_loaded(store_ctrlr):
            if hasattr(self, "_cursor_connect_id"):
                self.cursor.disconnect(self._cursor_connect_id)
            self.cursor = store_ctrlr.cursor
            self._cursor_connect_id = self.cursor.connect("cursor-changed", cursor_changed)
            cursor_changed(self.cursor)

        store_ctrlr.connect("store-loaded", store_loaded)
        if store_ctrlr.store:
            store_loaded(store_ctrlr)

    def __setup_lang_watchers(self):
        def client_lang_changed(client, lang):
            self.cache = {}
            self._init_matcher()
            self.__start_query()

        self._connect_ids.append(
            (self.opentranclient.connect("source-lang-changed", client_lang_changed), self.opentranclient)
        )
        self._connect_ids.append(
            (self.opentranclient.connect("target-lang-changed", client_lang_changed), self.opentranclient)
        )

        lang_controller = self.main_controller.lang_controller
        self._connect_ids.append(
            (
                lang_controller.connect(
                    "source-lang-changed", lambda _c, lang: self.opentranclient.set_source_lang(lang)
                ),
                lang_controller,
            )
        )
        self._connect_ids.append(
            (
                lang_controller.connect(
                    "target-lang-changed", lambda _c, lang: self.opentranclient.set_target_lang(lang)
                ),
                lang_controller,
            )
        )

    def __setup_opentrantm_lang_watchers(self):
        def set_lang(ctrlr, lang):
            self.cache = {}
            self._init_matcher()

        self._connect_ids.append(
            (self.opentrantm.tmclient.connect("source-lang-changed", set_lang), self.opentrantm.tmclient)
        )
        self._connect_ids.append(
            (self.opentrantm.tmclient.connect("target-lang-changed", set_lang), self.opentrantm.tmclient)
        )

    def __start_query(self):
        unit = self.main_controller.unit_controller.current_unit
        if not unit:
            return
        query_str = unit.source
        if not self.cache.has_key(query_str):
            self.cache[query_str] = None
            logging.debug("Query string: %s (target lang: %s)" % (query_str, self.opentranclient.target_lang))
            self.opentranclient.translate_unit(query_str, lambda *args: self.add_last_suggestions(self.opentranclient))

    # METHODS #
    def add_last_suggestions(self, opentranclient):
        """Grab the last suggestions from the TM client."""
        added = False
        if opentranclient.last_suggestions:
            for sugg in opentranclient.last_suggestions:
                units = self.create_suggestions(sugg)
                if units:
                    for u in units:
                        self.store.addunit(u)
                        self.store.add_unit_to_index(u)
                    added = True
            opentranclient.last_suggestions = []
        if added:
            self.matcher.inittm(self.store)
            unitview = self.main_controller.unit_controller.view
            self.main_controller.placeables_controller.apply_parsers(
                elems=[src.elem for src in unitview.sources], parsers=[TerminologyPlaceable.parse]
            )
            for src in unitview.sources:
                src.refresh()

    def create_suggestions(self, suggestion):
        # Skip any suggestions where the suggested translation contains parenthesis
        if re.match(r"\(.*\)", suggestion["text"]):
            return []

        units = []

        for proj in suggestion["projects"]:
            # Skip fuzzy matches:
            if proj["flags"] != 0:
                continue

            source = proj["orig_phrase"].strip()
            # Skip strings that are too short
            if len(source) < MIN_TERM_LENGTH:
                continue
            # Skip any units containing parenthesis
            if re.match(r"\(.*\)", source):
                continue
            unit = TranslationUnit(source)

            target = suggestion["text"].strip()

            # Skip phrases already found:
            old_unit = self.store.findunit(proj["orig_phrase"])
            if old_unit and old_unit.target == target:
                continue
            # We mostly want to work with lowercase strings, but in German (and
            # some languages with a related writing style), this will probably
            # irritate more often than help, since nouns are always written to
            # start with capital letters.
            target_lang_code = self.main_controller.lang_controller.target_lang.code
            if not data.normalize_code(target_lang_code) in ("de", "de-de", "lb", "als", "ksh", "stq", "vmf"):
                # unless the string contains multiple consecutive uppercase
                # characters or using some type of camel case, we take it to
                # lower case
                if not is_case_sensitive(target):
                    target = target.lower()
            unit.target = target
            units.append(unit)
        return units

    def destroy(self):
        super(TerminologyModel, self).destroy()
        if self.matcher in TerminologyPlaceable.matchers:
            TerminologyPlaceable.matchers.remove(self.matcher)

    # EVENT HANDLERS #
    def _on_match_found(self, *args):
        self.add_last_suggestions(self.opentrantm.tmclient)
class TerminologyModel(BaseTerminologyModel):
    """
    Terminology model that queries Open-tran.eu for terminology results.
    """

    __gtype_name__ = 'OpenTranTerminology'
    display_name = _('Open-Tran.eu')
    description = _('Terms from Open-Tran.eu')

    # INITIALIZERS #
    def __init__(self, internal_name, controller):
        super(TerminologyModel, self).__init__(controller)

        self.internal_name = internal_name
        self.load_config()

        self.main_controller = controller.main_controller
        self.term_controller = controller
        self.matcher = None
        self._init_matcher()

        self.opentrantm = self._find_opentran_tm()
        if self.opentrantm is None:
            self._init_opentran_client()
        else:
            self.opentrantm.connect('match-found', self._on_match_found)
            self.__setup_opentrantm_lang_watchers()

    def _find_opentran_tm(self):
        """
        Try and find an existing OpenTranClient instance, used by the OpenTran
        TM model.
        """
        plugin_ctrl = self.main_controller.plugin_controller
        if 'tm' not in plugin_ctrl.plugins:
            return None

        tm_ctrl = plugin_ctrl.plugins['tm'].tmcontroller
        if 'opentran' not in tm_ctrl.plugin_controller.plugins:
            return None

        return tm_ctrl.plugin_controller.plugins['opentran']

    def _init_matcher(self):
        """
        Initialize the matcher to be used by the C{TerminologyPlaceable} parser.
        """
        if self.matcher in TerminologyPlaceable.matchers:
            TerminologyPlaceable.matchers.remove(self.matcher)

        self.store = TranslationStore()
        self.store.makeindex()
        self.matcher = terminologymatcher(self.store)
        TerminologyPlaceable.matchers.append(self.matcher)

    def _init_opentran_client(self):
        """
        Create and initialize a new Open-Tran client. This should only happen
        when the Open-Tran TM model plug-in is not loaded.
        """
        plugin_ctrlr = self.main_controller.plugin_controller
        lang_ctrlr = self.main_controller.lang_controller
        # The following two values were copied from plugins/tm/__init__.py
        max_candidates = 5
        min_similarity = 70

        # Try to get max_candidates and min_quality from the TM plug-in
        if 'tm' in plugin_ctrlr.plugins:
            max_candidates = plugin_ctrlr.plugins['tm'].config['max_matches']
            min_similarity = plugin_ctrlr.plugins['tm'].config['min_quality']

        self.opentranclient = opentranclient.OpenTranClient(
            max_candidates=max_candidates, min_similarity=min_similarity)
        self.opentranclient.source_lang = lang_ctrlr.source_lang.code
        self.opentranclient.target_lang = lang_ctrlr.target_lang.code

        self.__setup_lang_watchers()
        self.__setup_cursor_watcher()

    def __setup_cursor_watcher(self):
        unitview = self.main_controller.unit_controller.view

        def cursor_changed(cursor):
            self.__start_query()

        store_ctrlr = self.main_controller.store_controller

        def store_loaded(store_ctrlr):
            if hasattr(self, '_cursor_connect_id'):
                self.cursor.disconnect(self._cursor_connect_id)
            self.cursor = store_ctrlr.cursor
            self._cursor_connect_id = self.cursor.connect(
                'cursor-changed', cursor_changed)
            cursor_changed(self.cursor)

        store_ctrlr.connect('store-loaded', store_loaded)
        if store_ctrlr.store:
            store_loaded(store_ctrlr)

    def __setup_lang_watchers(self):
        def client_lang_changed(client, lang):
            self.cache = {}
            self._init_matcher()
            self.__start_query()

        self._connect_ids.append(
            (self.opentranclient.connect('source-lang-changed',
                                         client_lang_changed),
             self.opentranclient))
        self._connect_ids.append(
            (self.opentranclient.connect('target-lang-changed',
                                         client_lang_changed),
             self.opentranclient))

        lang_controller = self.main_controller.lang_controller
        self._connect_ids.append((lang_controller.connect(
            'source-lang-changed',
            lambda _c, lang: self.opentranclient.set_source_lang(lang)),
                                  lang_controller))
        self._connect_ids.append((lang_controller.connect(
            'target-lang-changed',
            lambda _c, lang: self.opentranclient.set_target_lang(lang)),
                                  lang_controller))

    def __setup_opentrantm_lang_watchers(self):
        def set_lang(ctrlr, lang):
            self.cache = {}
            self._init_matcher()

        self._connect_ids.append(
            (self.opentrantm.tmclient.connect('source-lang-changed', set_lang),
             self.opentrantm.tmclient))
        self._connect_ids.append(
            (self.opentrantm.tmclient.connect('target-lang-changed', set_lang),
             self.opentrantm.tmclient))

    def __start_query(self):
        unit = self.main_controller.unit_controller.current_unit
        if not unit:
            return
        query_str = unit.source
        if not self.cache.has_key(query_str):
            self.cache[query_str] = None
            logging.debug('Query string: %s (target lang: %s)' %
                          (query_str, self.opentranclient.target_lang))
            self.opentranclient.translate_unit(
                query_str,
                lambda *args: self.add_last_suggestions(self.opentranclient))

    # METHODS #
    def add_last_suggestions(self, opentranclient):
        """Grab the last suggestions from the TM client."""
        if opentranclient.last_suggestions is not None:
            added = False
            for sugg in opentranclient.last_suggestions:
                units = self.create_suggestions(sugg)
                if units:
                    for u in units:
                        self.store.addunit(u)
                        self.store.add_unit_to_index(u)
                    added = True
            if added:
                self.matcher.inittm(self.store)
        unitview = self.main_controller.unit_controller.view
        self.main_controller.placeables_controller.apply_parsers(
            elems=[src.elem for src in unitview.sources],
            parsers=[TerminologyPlaceable.parse])
        for src in unitview.sources:
            src.refresh()

    def create_suggestions(self, suggestion):
        # Skip any suggestions where the suggested translation contains parenthesis
        if re.match(r'\(.*\)', suggestion['text']):
            return []

        units = []

        for proj in suggestion['projects']:
            # Skip fuzzy matches:
            if proj['flags'] != 0:
                continue

            source = proj['orig_phrase'].strip()
            # Skip strings that are too short
            if len(source) < MIN_TERM_LENGTH:
                continue
            # Skip any units containing parenthesis
            if re.match(r'\(.*\)', source):
                continue
            unit = TranslationUnit(source)

            target = suggestion['text'].strip()

            # Skip phrases already found:
            old_unit = self.store.findunit(proj['orig_phrase'])
            if old_unit and old_unit.target == target:
                continue
            # We mostly want to work with lowercase strings, but in German (and
            # some languages with a related writing style), this will probably
            # irritate more often than help, since nouns are always written to
            # start with capital letters.
            target_lang_code = self.main_controller.lang_controller.target_lang.code
            if not data.normalize_code(target_lang_code) in (
                    'de', 'de-de', 'lb', 'als', 'ksh', 'stq', 'vmf'):
                # unless the string contains multiple consecutive uppercase
                # characters or using some type of camel case, we take it to
                # lower case
                if not is_case_sensitive(target):
                    target = target.lower()
            unit.target = target
            units.append(unit)
        return units

    def destroy(self):
        super(TerminologyModel, self).destroy()
        if self.matcher in TerminologyPlaceable.matchers:
            TerminologyPlaceable.matchers.remove(self.matcher)

    # EVENT HANDLERS #
    def _on_match_found(self, *args):
        self.add_last_suggestions(self.opentrantm.tmclient)
class TerminologyModel(BaseTerminologyModel):
    """A terminology back-end to access the Translate.org.za-managed terminology."""

    __gtype_name__ = 'AutoTermTerminology'
    display_name = _('Localization Terminology')
    description = _('Selected localization terminology')

    _l10n_URL = 'http://terminology.locamotion.org/l10n/%(srclang)s/%(tgtlang)s'

    TERMDIR = os.path.join(pan_app.get_config_dir(), 'autoterm')

    # INITIALIZERS #
    def __init__(self, internal_name, controller):
        super(TerminologyModel, self).__init__(controller)
        self.internal_name = internal_name
        self.client = HTTPClient()
        self.client.set_virtaal_useragent()

        self.load_config()

        if not os.path.isdir(self.TERMDIR):
            os.mkdir(self.TERMDIR)

        self.main_controller = controller.main_controller
        self.term_controller = controller
        self.matcher = None
        self.init_matcher()

        lang_controller = self.main_controller.lang_controller
        self.source_lang = lang_controller.source_lang.code
        self.target_lang = lang_controller.target_lang.code
        self._connect_ids.append((
            lang_controller.connect('source-lang-changed', self._on_lang_changed, 'source'),
            lang_controller
        ))
        self._connect_ids.append((
            lang_controller.connect('target-lang-changed', self._on_lang_changed, 'target'),
            lang_controller
        ))

        self.update_terms()

    def init_matcher(self, filename=''):
        """
        Initialize the matcher to be used by the C{TerminologyPlaceable} parser.
        """
        if self.matcher in TerminologyPlaceable.matchers:
            TerminologyPlaceable.matchers.remove(self.matcher)

        if os.path.isfile(filename):
            logging.debug('Loading terminology from %s' % (filename))
            self.store = factory.getobject(filename)
        else:
            logging.debug('Creating empty terminology store')
            self.store = TranslationStore()
        self.store.makeindex()
        self.matcher = terminologymatcher(self.store)
        TerminologyPlaceable.matchers.append(self.matcher)


    # ACCESSORS #
    def _get_curr_term_filename(self, srclang=None, tgtlang=None, ext=None):
        if srclang is None:
            srclang = self.source_lang
        if tgtlang is None:
            tgtlang = self.target_lang
        if not ext:
            ext = 'po'

        base = '%s__%s' % (srclang, tgtlang)
        for filename in os.listdir(self.TERMDIR):
            if filename.startswith(base):
                return filename
        return base + os.extsep + ext
    curr_term_filename = property(_get_curr_term_filename)


    # METHODS #
    def update_terms(self, srclang=None, tgtlang=None):
        """Update the terminology file for the given language or all if none specified."""
        if srclang is None:
            srclang = self.source_lang
        if tgtlang is None:
            tgtlang = self.target_lang

        if srclang is None and tgtlang is None:
            # Update all files
            return

        if srclang is None or tgtlang is None:
            raise ValueError('Both srclang and tgtlang must be specified')

        if not self.is_update_needed(srclang, tgtlang):
            logging.debug('Skipping update for (%s, %s) language pair' % (srclang, tgtlang))
            localfile = self._get_curr_term_filename(srclang, tgtlang)
            localfile = os.path.join(self.TERMDIR, localfile)
            self.init_matcher(localfile)
            return

        self._update_term_file(srclang, tgtlang)

    def is_update_needed(self, srclang, tgtlang):
        localfile = self._get_curr_term_filename(srclang, tgtlang)
        localfile = os.path.join(self.TERMDIR, localfile)
        if not os.path.isfile(localfile):
            return True
        stats = os.stat(localfile)
        from datetime import datetime
        return (time.mktime(datetime.now().timetuple()) - stats.st_mtime) > THREE_DAYS

    def _check_for_update(self, srclang, tgtlang):
        localfile = self._get_curr_term_filename(srclang, tgtlang)
        localfile = os.path.join(self.TERMDIR, localfile)
        etag = None
        if os.path.isfile(localfile) and localfile in self.config:
            etag = self.config[os.path.abspath(localfile)]

        url = self._l10n_URL % {'srclang': srclang, 'tgtlang': tgtlang}

        if not os.path.isfile(localfile):
            localfile = None
        callback = lambda *args: self._process_header(localfile=localfile, *args)

        if logging.root.level != logging.DEBUG:
            self.client.get(url, callback, etag)
        else:
            def error_log(request, result):
                logging.debug('Could not get %s: status %d' % (url, request.status))
            self.client.get(url, callback, etag, error_callback=error_log)

    def _get_ext_from_url(self, url):
        from urlparse import urlparse
        parsed = urlparse(url)
        #dir, filename = os.path.split(parsed.path)
        #rewritten for compatibility with Python 2.4:
        dir, filename = os.path.split(parsed[2])
        if not filename or '.' not in filename:
            return None
        ext = filename.split('.')[-1]
        if not ext:
            ext = None
        return ext

    def _get_ext_from_store_guess(self, content):
        from StringIO import StringIO
        from translate.storage.factory import _guessextention
        s = StringIO(content)
        try:
            return _guessextention(s)
        except ValueError:
            pass
        return None

    def _process_header(self, request, result, localfile=None):
        if request.status == 304:
            logging.debug('ETag matches for file %s :)' % (localfile))
        elif request.status == 200:
            if not localfile:
                ext = self._get_ext_from_url(request.get_effective_url())
                if ext is None:
                    ext = self._get_ext_from_store_guess(result)
                if ext is None:
                    logging.debug('Unable to determine extension for store. Defaulting to "po".')
                    ext = 'po'
                localfile = self._get_curr_term_filename(ext=ext)
                localfile = os.path.join(self.TERMDIR, localfile)
            logging.debug('Saving to %s' % (localfile))
            open(localfile, 'w').write(result)

            # Find ETag header and save the value
            headers = request.result_headers.getvalue().splitlines()
            etag = ''
            etagline = [l for l in headers if l.lower().startswith('etag:')]
            if etagline:
                etag = etagline[0][7:-1]
            self.config[os.path.abspath(localfile)] = etag
        else:
            logging.debug('Unhandled status code: %d' % (request.status))
            localfile = ''

        if os.path.isfile(localfile):
            # Update mtime
            os.utime(localfile, None)
        self.init_matcher(localfile)

    def _update_term_file(self, srclang, tgtlang):
        """Update the terminology file for the given languages."""
        self.init_matcher() # Make sure that the matcher is empty until we have an update
        filename = self._get_curr_term_filename(srclang, tgtlang)
        localfile = os.path.join(self.TERMDIR, filename)

        self._check_for_update(srclang, tgtlang)


    # SIGNAL HANDLERS #
    def _on_lang_changed(self, lang_controller, lang, which):
        setattr(self, '%s_lang' % (which), lang)
        self.update_terms(self.source_lang, self.target_lang)