def _init_matcher(self): """ Initialize the matcher to be used by the C{TerminologyPlaceable} parser. """ if self.matcher in TerminologyPlaceable.matchers: TerminologyPlaceable.matchers.remove(self.matcher) self.store = TranslationStore() self.store.makeindex() self.matcher = terminologymatcher(self.store) TerminologyPlaceable.matchers.append(self.matcher)
def _init_matcher(self): """ Initialize the matcher to be used by the C{TerminologyPlaceable} parser. """ if self.matcher in TerminologyPlaceable.matchers: TerminologyPlaceable.matchers.remove(self.matcher) from translate.storage.base import TranslationStore self.store = TranslationStore() self.store.makeindex() from translate.search.match import terminologymatcher self.matcher = terminologymatcher(self.store) TerminologyPlaceable.matchers.append(self.matcher)
def __check_escape(self, string, xml, target_language=None): """Helper that checks that a string is output with the right escape.""" unit = self.UnitClass("teststring") if (target_language is not None): store = TranslationStore() store.settargetlanguage(target_language) unit._store = store unit.target = string print("unit.target:", repr(unit.target)) print("xml:", repr(xml)) assert str(unit) == xml
def __check_escape(self, string, xml, target_language=None): """Helper that checks that a string is output with the right escape.""" unit = self.UnitClass("Test String") if (target_language is not None): store = TranslationStore() store.settargetlanguage(target_language) unit._store = store unit.target = string print("unit.target:", repr(unit.target)) print("xml:", repr(xml)) assert str(unit) == xml
def init_matcher(self, filename=''): """ Initialize the matcher to be used by the C{TerminologyPlaceable} parser. """ if self.matcher in TerminologyPlaceable.matchers: TerminologyPlaceable.matchers.remove(self.matcher) if os.path.isfile(filename): logging.debug('Loading terminology from %s' % (filename)) self.store = factory.getobject(filename) else: logging.debug('Creating empty terminology store') self.store = TranslationStore() self.store.makeindex() self.matcher = terminologymatcher(self.store) TerminologyPlaceable.matchers.append(self.matcher)
class TerminologyModel(BaseTerminologyModel): """ Terminology model that queries Open-tran.eu for terminology results. """ __gtype_name__ = "OpenTranTerminology" display_name = _("Open-Tran.eu") description = _("Terms from Open-Tran.eu") # INITIALIZERS # def __init__(self, internal_name, controller): super(TerminologyModel, self).__init__(controller) self.internal_name = internal_name self.load_config() self.main_controller = controller.main_controller self.term_controller = controller self.matcher = None self._init_matcher() self.opentrantm = self._find_opentran_tm() if self.opentrantm is None: self._init_opentran_client() else: self.opentrantm.connect("match-found", self._on_match_found) self.__setup_opentrantm_lang_watchers() def _find_opentran_tm(self): """ Try and find an existing OpenTranClient instance, used by the OpenTran TM model. """ plugin_ctrl = self.main_controller.plugin_controller if "tm" not in plugin_ctrl.plugins: return None tm_ctrl = plugin_ctrl.plugins["tm"].tmcontroller if "opentran" not in tm_ctrl.plugin_controller.plugins: return None return tm_ctrl.plugin_controller.plugins["opentran"] def _init_matcher(self): """ Initialize the matcher to be used by the C{TerminologyPlaceable} parser. """ if self.matcher in TerminologyPlaceable.matchers: TerminologyPlaceable.matchers.remove(self.matcher) from translate.storage.base import TranslationStore self.store = TranslationStore() self.store.makeindex() from translate.search.match import terminologymatcher self.matcher = terminologymatcher(self.store) TerminologyPlaceable.matchers.append(self.matcher) def _init_opentran_client(self): """ Create and initialize a new Open-Tran client. This should only happen when the Open-Tran TM model plug-in is not loaded. """ plugin_ctrlr = self.main_controller.plugin_controller lang_ctrlr = self.main_controller.lang_controller # The following two values were copied from plugins/tm/__init__.py max_candidates = 5 min_similarity = 70 # Try to get max_candidates and min_quality from the TM plug-in if "tm" in plugin_ctrlr.plugins: max_candidates = plugin_ctrlr.plugins["tm"].config["max_matches"] min_similarity = plugin_ctrlr.plugins["tm"].config["min_quality"] from virtaal.support import opentranclient self.opentranclient = opentranclient.OpenTranClient( max_candidates=max_candidates, min_similarity=min_similarity ) self.opentranclient.source_lang = lang_ctrlr.source_lang.code self.opentranclient.target_lang = lang_ctrlr.target_lang.code self.__setup_lang_watchers() self.__setup_cursor_watcher() def __setup_cursor_watcher(self): unitview = self.main_controller.unit_controller.view def cursor_changed(cursor): self.__start_query() store_ctrlr = self.main_controller.store_controller def store_loaded(store_ctrlr): if hasattr(self, "_cursor_connect_id"): self.cursor.disconnect(self._cursor_connect_id) self.cursor = store_ctrlr.cursor self._cursor_connect_id = self.cursor.connect("cursor-changed", cursor_changed) cursor_changed(self.cursor) store_ctrlr.connect("store-loaded", store_loaded) if store_ctrlr.store: store_loaded(store_ctrlr) def __setup_lang_watchers(self): def client_lang_changed(client, lang): self.cache = {} self._init_matcher() self.__start_query() self._connect_ids.append( (self.opentranclient.connect("source-lang-changed", client_lang_changed), self.opentranclient) ) self._connect_ids.append( (self.opentranclient.connect("target-lang-changed", client_lang_changed), self.opentranclient) ) lang_controller = self.main_controller.lang_controller self._connect_ids.append( ( lang_controller.connect( "source-lang-changed", lambda _c, lang: self.opentranclient.set_source_lang(lang) ), lang_controller, ) ) self._connect_ids.append( ( lang_controller.connect( "target-lang-changed", lambda _c, lang: self.opentranclient.set_target_lang(lang) ), lang_controller, ) ) def __setup_opentrantm_lang_watchers(self): def set_lang(ctrlr, lang): self.cache = {} self._init_matcher() self._connect_ids.append( (self.opentrantm.tmclient.connect("source-lang-changed", set_lang), self.opentrantm.tmclient) ) self._connect_ids.append( (self.opentrantm.tmclient.connect("target-lang-changed", set_lang), self.opentrantm.tmclient) ) def __start_query(self): unit = self.main_controller.unit_controller.current_unit if not unit: return query_str = unit.source if not self.cache.has_key(query_str): self.cache[query_str] = None logging.debug("Query string: %s (target lang: %s)" % (query_str, self.opentranclient.target_lang)) self.opentranclient.translate_unit(query_str, lambda *args: self.add_last_suggestions(self.opentranclient)) # METHODS # def add_last_suggestions(self, opentranclient): """Grab the last suggestions from the TM client.""" added = False if opentranclient.last_suggestions: for sugg in opentranclient.last_suggestions: units = self.create_suggestions(sugg) if units: for u in units: self.store.addunit(u) self.store.add_unit_to_index(u) added = True opentranclient.last_suggestions = [] if added: self.matcher.inittm(self.store) unitview = self.main_controller.unit_controller.view self.main_controller.placeables_controller.apply_parsers( elems=[src.elem for src in unitview.sources], parsers=[TerminologyPlaceable.parse] ) for src in unitview.sources: src.refresh() def create_suggestions(self, suggestion): # Skip any suggestions where the suggested translation contains parenthesis if re.match(r"\(.*\)", suggestion["text"]): return [] units = [] for proj in suggestion["projects"]: # Skip fuzzy matches: if proj["flags"] != 0: continue source = proj["orig_phrase"].strip() # Skip strings that are too short if len(source) < MIN_TERM_LENGTH: continue # Skip any units containing parenthesis if re.match(r"\(.*\)", source): continue unit = TranslationUnit(source) target = suggestion["text"].strip() # Skip phrases already found: old_unit = self.store.findunit(proj["orig_phrase"]) if old_unit and old_unit.target == target: continue # We mostly want to work with lowercase strings, but in German (and # some languages with a related writing style), this will probably # irritate more often than help, since nouns are always written to # start with capital letters. target_lang_code = self.main_controller.lang_controller.target_lang.code if not data.normalize_code(target_lang_code) in ("de", "de-de", "lb", "als", "ksh", "stq", "vmf"): # unless the string contains multiple consecutive uppercase # characters or using some type of camel case, we take it to # lower case if not is_case_sensitive(target): target = target.lower() unit.target = target units.append(unit) return units def destroy(self): super(TerminologyModel, self).destroy() if self.matcher in TerminologyPlaceable.matchers: TerminologyPlaceable.matchers.remove(self.matcher) # EVENT HANDLERS # def _on_match_found(self, *args): self.add_last_suggestions(self.opentrantm.tmclient)
class TerminologyModel(BaseTerminologyModel): """ Terminology model that queries Open-tran.eu for terminology results. """ __gtype_name__ = 'OpenTranTerminology' display_name = _('Open-Tran.eu') description = _('Terms from Open-Tran.eu') # INITIALIZERS # def __init__(self, internal_name, controller): super(TerminologyModel, self).__init__(controller) self.internal_name = internal_name self.load_config() self.main_controller = controller.main_controller self.term_controller = controller self.matcher = None self._init_matcher() self.opentrantm = self._find_opentran_tm() if self.opentrantm is None: self._init_opentran_client() else: self.opentrantm.connect('match-found', self._on_match_found) self.__setup_opentrantm_lang_watchers() def _find_opentran_tm(self): """ Try and find an existing OpenTranClient instance, used by the OpenTran TM model. """ plugin_ctrl = self.main_controller.plugin_controller if 'tm' not in plugin_ctrl.plugins: return None tm_ctrl = plugin_ctrl.plugins['tm'].tmcontroller if 'opentran' not in tm_ctrl.plugin_controller.plugins: return None return tm_ctrl.plugin_controller.plugins['opentran'] def _init_matcher(self): """ Initialize the matcher to be used by the C{TerminologyPlaceable} parser. """ if self.matcher in TerminologyPlaceable.matchers: TerminologyPlaceable.matchers.remove(self.matcher) self.store = TranslationStore() self.store.makeindex() self.matcher = terminologymatcher(self.store) TerminologyPlaceable.matchers.append(self.matcher) def _init_opentran_client(self): """ Create and initialize a new Open-Tran client. This should only happen when the Open-Tran TM model plug-in is not loaded. """ plugin_ctrlr = self.main_controller.plugin_controller lang_ctrlr = self.main_controller.lang_controller # The following two values were copied from plugins/tm/__init__.py max_candidates = 5 min_similarity = 70 # Try to get max_candidates and min_quality from the TM plug-in if 'tm' in plugin_ctrlr.plugins: max_candidates = plugin_ctrlr.plugins['tm'].config['max_matches'] min_similarity = plugin_ctrlr.plugins['tm'].config['min_quality'] self.opentranclient = opentranclient.OpenTranClient( max_candidates=max_candidates, min_similarity=min_similarity) self.opentranclient.source_lang = lang_ctrlr.source_lang.code self.opentranclient.target_lang = lang_ctrlr.target_lang.code self.__setup_lang_watchers() self.__setup_cursor_watcher() def __setup_cursor_watcher(self): unitview = self.main_controller.unit_controller.view def cursor_changed(cursor): self.__start_query() store_ctrlr = self.main_controller.store_controller def store_loaded(store_ctrlr): if hasattr(self, '_cursor_connect_id'): self.cursor.disconnect(self._cursor_connect_id) self.cursor = store_ctrlr.cursor self._cursor_connect_id = self.cursor.connect( 'cursor-changed', cursor_changed) cursor_changed(self.cursor) store_ctrlr.connect('store-loaded', store_loaded) if store_ctrlr.store: store_loaded(store_ctrlr) def __setup_lang_watchers(self): def client_lang_changed(client, lang): self.cache = {} self._init_matcher() self.__start_query() self._connect_ids.append( (self.opentranclient.connect('source-lang-changed', client_lang_changed), self.opentranclient)) self._connect_ids.append( (self.opentranclient.connect('target-lang-changed', client_lang_changed), self.opentranclient)) lang_controller = self.main_controller.lang_controller self._connect_ids.append((lang_controller.connect( 'source-lang-changed', lambda _c, lang: self.opentranclient.set_source_lang(lang)), lang_controller)) self._connect_ids.append((lang_controller.connect( 'target-lang-changed', lambda _c, lang: self.opentranclient.set_target_lang(lang)), lang_controller)) def __setup_opentrantm_lang_watchers(self): def set_lang(ctrlr, lang): self.cache = {} self._init_matcher() self._connect_ids.append( (self.opentrantm.tmclient.connect('source-lang-changed', set_lang), self.opentrantm.tmclient)) self._connect_ids.append( (self.opentrantm.tmclient.connect('target-lang-changed', set_lang), self.opentrantm.tmclient)) def __start_query(self): unit = self.main_controller.unit_controller.current_unit if not unit: return query_str = unit.source if not self.cache.has_key(query_str): self.cache[query_str] = None logging.debug('Query string: %s (target lang: %s)' % (query_str, self.opentranclient.target_lang)) self.opentranclient.translate_unit( query_str, lambda *args: self.add_last_suggestions(self.opentranclient)) # METHODS # def add_last_suggestions(self, opentranclient): """Grab the last suggestions from the TM client.""" if opentranclient.last_suggestions is not None: added = False for sugg in opentranclient.last_suggestions: units = self.create_suggestions(sugg) if units: for u in units: self.store.addunit(u) self.store.add_unit_to_index(u) added = True if added: self.matcher.inittm(self.store) unitview = self.main_controller.unit_controller.view self.main_controller.placeables_controller.apply_parsers( elems=[src.elem for src in unitview.sources], parsers=[TerminologyPlaceable.parse]) for src in unitview.sources: src.refresh() def create_suggestions(self, suggestion): # Skip any suggestions where the suggested translation contains parenthesis if re.match(r'\(.*\)', suggestion['text']): return [] units = [] for proj in suggestion['projects']: # Skip fuzzy matches: if proj['flags'] != 0: continue source = proj['orig_phrase'].strip() # Skip strings that are too short if len(source) < MIN_TERM_LENGTH: continue # Skip any units containing parenthesis if re.match(r'\(.*\)', source): continue unit = TranslationUnit(source) target = suggestion['text'].strip() # Skip phrases already found: old_unit = self.store.findunit(proj['orig_phrase']) if old_unit and old_unit.target == target: continue # We mostly want to work with lowercase strings, but in German (and # some languages with a related writing style), this will probably # irritate more often than help, since nouns are always written to # start with capital letters. target_lang_code = self.main_controller.lang_controller.target_lang.code if not data.normalize_code(target_lang_code) in ( 'de', 'de-de', 'lb', 'als', 'ksh', 'stq', 'vmf'): # unless the string contains multiple consecutive uppercase # characters or using some type of camel case, we take it to # lower case if not is_case_sensitive(target): target = target.lower() unit.target = target units.append(unit) return units def destroy(self): super(TerminologyModel, self).destroy() if self.matcher in TerminologyPlaceable.matchers: TerminologyPlaceable.matchers.remove(self.matcher) # EVENT HANDLERS # def _on_match_found(self, *args): self.add_last_suggestions(self.opentrantm.tmclient)
class TerminologyModel(BaseTerminologyModel): """A terminology back-end to access the Translate.org.za-managed terminology.""" __gtype_name__ = 'AutoTermTerminology' display_name = _('Localization Terminology') description = _('Selected localization terminology') _l10n_URL = 'http://terminology.locamotion.org/l10n/%(srclang)s/%(tgtlang)s' TERMDIR = os.path.join(pan_app.get_config_dir(), 'autoterm') # INITIALIZERS # def __init__(self, internal_name, controller): super(TerminologyModel, self).__init__(controller) self.internal_name = internal_name self.client = HTTPClient() self.client.set_virtaal_useragent() self.load_config() if not os.path.isdir(self.TERMDIR): os.mkdir(self.TERMDIR) self.main_controller = controller.main_controller self.term_controller = controller self.matcher = None self.init_matcher() lang_controller = self.main_controller.lang_controller self.source_lang = lang_controller.source_lang.code self.target_lang = lang_controller.target_lang.code self._connect_ids.append(( lang_controller.connect('source-lang-changed', self._on_lang_changed, 'source'), lang_controller )) self._connect_ids.append(( lang_controller.connect('target-lang-changed', self._on_lang_changed, 'target'), lang_controller )) self.update_terms() def init_matcher(self, filename=''): """ Initialize the matcher to be used by the C{TerminologyPlaceable} parser. """ if self.matcher in TerminologyPlaceable.matchers: TerminologyPlaceable.matchers.remove(self.matcher) if os.path.isfile(filename): logging.debug('Loading terminology from %s' % (filename)) self.store = factory.getobject(filename) else: logging.debug('Creating empty terminology store') self.store = TranslationStore() self.store.makeindex() self.matcher = terminologymatcher(self.store) TerminologyPlaceable.matchers.append(self.matcher) # ACCESSORS # def _get_curr_term_filename(self, srclang=None, tgtlang=None, ext=None): if srclang is None: srclang = self.source_lang if tgtlang is None: tgtlang = self.target_lang if not ext: ext = 'po' base = '%s__%s' % (srclang, tgtlang) for filename in os.listdir(self.TERMDIR): if filename.startswith(base): return filename return base + os.extsep + ext curr_term_filename = property(_get_curr_term_filename) # METHODS # def update_terms(self, srclang=None, tgtlang=None): """Update the terminology file for the given language or all if none specified.""" if srclang is None: srclang = self.source_lang if tgtlang is None: tgtlang = self.target_lang if srclang is None and tgtlang is None: # Update all files return if srclang is None or tgtlang is None: raise ValueError('Both srclang and tgtlang must be specified') if not self.is_update_needed(srclang, tgtlang): logging.debug('Skipping update for (%s, %s) language pair' % (srclang, tgtlang)) localfile = self._get_curr_term_filename(srclang, tgtlang) localfile = os.path.join(self.TERMDIR, localfile) self.init_matcher(localfile) return self._update_term_file(srclang, tgtlang) def is_update_needed(self, srclang, tgtlang): localfile = self._get_curr_term_filename(srclang, tgtlang) localfile = os.path.join(self.TERMDIR, localfile) if not os.path.isfile(localfile): return True stats = os.stat(localfile) from datetime import datetime return (time.mktime(datetime.now().timetuple()) - stats.st_mtime) > THREE_DAYS def _check_for_update(self, srclang, tgtlang): localfile = self._get_curr_term_filename(srclang, tgtlang) localfile = os.path.join(self.TERMDIR, localfile) etag = None if os.path.isfile(localfile) and localfile in self.config: etag = self.config[os.path.abspath(localfile)] url = self._l10n_URL % {'srclang': srclang, 'tgtlang': tgtlang} if not os.path.isfile(localfile): localfile = None callback = lambda *args: self._process_header(localfile=localfile, *args) if logging.root.level != logging.DEBUG: self.client.get(url, callback, etag) else: def error_log(request, result): logging.debug('Could not get %s: status %d' % (url, request.status)) self.client.get(url, callback, etag, error_callback=error_log) def _get_ext_from_url(self, url): from urlparse import urlparse parsed = urlparse(url) #dir, filename = os.path.split(parsed.path) #rewritten for compatibility with Python 2.4: dir, filename = os.path.split(parsed[2]) if not filename or '.' not in filename: return None ext = filename.split('.')[-1] if not ext: ext = None return ext def _get_ext_from_store_guess(self, content): from StringIO import StringIO from translate.storage.factory import _guessextention s = StringIO(content) try: return _guessextention(s) except ValueError: pass return None def _process_header(self, request, result, localfile=None): if request.status == 304: logging.debug('ETag matches for file %s :)' % (localfile)) elif request.status == 200: if not localfile: ext = self._get_ext_from_url(request.get_effective_url()) if ext is None: ext = self._get_ext_from_store_guess(result) if ext is None: logging.debug('Unable to determine extension for store. Defaulting to "po".') ext = 'po' localfile = self._get_curr_term_filename(ext=ext) localfile = os.path.join(self.TERMDIR, localfile) logging.debug('Saving to %s' % (localfile)) open(localfile, 'w').write(result) # Find ETag header and save the value headers = request.result_headers.getvalue().splitlines() etag = '' etagline = [l for l in headers if l.lower().startswith('etag:')] if etagline: etag = etagline[0][7:-1] self.config[os.path.abspath(localfile)] = etag else: logging.debug('Unhandled status code: %d' % (request.status)) localfile = '' if os.path.isfile(localfile): # Update mtime os.utime(localfile, None) self.init_matcher(localfile) def _update_term_file(self, srclang, tgtlang): """Update the terminology file for the given languages.""" self.init_matcher() # Make sure that the matcher is empty until we have an update filename = self._get_curr_term_filename(srclang, tgtlang) localfile = os.path.join(self.TERMDIR, filename) self._check_for_update(srclang, tgtlang) # SIGNAL HANDLERS # def _on_lang_changed(self, lang_controller, lang, which): setattr(self, '%s_lang' % (which), lang) self.update_terms(self.source_lang, self.target_lang)