Beispiel #1
0
    def __init__(self,
                 db_file,
                 max_candidates=3,
                 min_similarity=75,
                 max_length=1000):

        self.max_candidates = max_candidates
        self.min_similarity = min_similarity
        self.max_length = max_length

        if not isinstance(db_file, unicode):
            db_file = unicode(db_file)  # don't know which encoding
        self.db_file = db_file
        # share connections to same database file between different instances
        if db_file not in self._tm_dbs:
            self._tm_dbs[db_file] = {}
        self._tm_db = self._tm_dbs[db_file]

        # FIXME: do we want to do any checks before we initialize the DB?
        self.init_database()
        self.fulltext = False
        self.init_fulltext()

        self.comparer = LevenshteinComparer(self.max_length)

        self.preload_db()
Beispiel #2
0
    def _check_alttrans(self, unit):
        if not hasattr(unit, 'getalttrans'):
            return []
        alttrans = unit.getalttrans()
        if not alttrans:
            return []

        from translate.search.lshtein import LevenshteinComparer
        lcomparer = LevenshteinComparer(max_len=1000)

        results = []
        for alt in alttrans:
            quality = lcomparer.similarity(unit.source, alt.source,
                                           self.controller.min_quality - 15)
            # let's check if it is useful, but be more lenient
            if quality < self.controller.min_quality - 10:
                continue
            tmsource = _('This file')

            xmlelement = getattr(alt, 'xmlelement', None)
            if xmlelement is not None:
                origin = alt.xmlelement.get('origin', '')
                if origin:
                    if origin == "lmc":
                        # Experimental code to test lmc research. Everything
                        # in a try block, just in case.
                        try:
                            from lxml import etree
                            import os.path
                            extras = xmlelement.xpath(
                                'processing-instruction()')
                            meta = dict((pi.target, pi.text) for pi in extras)
                            tmsource = [
                                meta.get("contact-name", ""),
                                meta.get("category", ""),
                                os.path.splitext(meta.get("original", ""))[0]
                            ]
                            tmsource = u"\n".join(filter(None, tmsource))
                        except Exception, e:
                            import logging
                            logging.info(e)

                    tmsource += "\n" + origin

            results.append({
                'source': alt.source,
                'target': alt.target,
                'quality': quality,
                'tmsource': tmsource,
            })
Beispiel #3
0
    def __init__(self, max_candidates=3, min_similarity=75, max_length=1000):
        gobject.GObject.__init__(self)
        HTTPClient.__init__(self)

        self.max_candidates = max_candidates
        self.min_similarity = min_similarity
        self.comparer = LevenshteinComparer(max_length)
        self.last_suggestions = []  # used by the open-tran terminology backend

        self._languages = set()

        self.source_lang = None
        self.target_lang = None
        #detect supported language

        self.url_getlanguages = 'http://open-tran.eu/json/supported'
        self.url_translate = 'http://%s.%s.open-tran.eu/json/suggest'
        langreq = RESTRequest(self.url_getlanguages, id='')
        self.add(langreq)
        langreq.connect('http-success',
                        lambda langreq, response: self.got_languages(response))
Beispiel #4
0
 def comparer(self):
     if not hasattr(self, '_comparer'):
         max_length = current_app.config.get('MAX_LENGTH', 1000)
         self._comparer = LevenshteinComparer(max_length)
     return self._comparer
Beispiel #5
0
def get_tm_results(request, unit):
    """Gets a list of TM results for the current object.

    :return: JSON string with a list of TM results.
    """

    max_len = settings.LV_MAX_LENGTH
    min_similarity = settings.LV_MIN_SIMILARITY

    results = []

    # Shortcut Levenshtein comparer, since the distance, by definition, can't
    # be less than the difference in string length
    diff_len = unit.source_length * (100 - min_similarity)/100
    max_unit_len = unit.source_length + diff_len
    min_unit_len = unit.source_length - diff_len

    criteria = {
        'target_lang': unit.store.translation_project.language,
        'source_lang': unit.store.translation_project.project.source_language,
        'source_length__range': (min_unit_len, max_unit_len),
    }
    tmunits = TMUnit.objects.filter(**criteria).exclude(unit=unit)

    comparer = LevenshteinComparer(max_len)
    for tmunit in tmunits:
        quality = comparer.similarity(tmunit.source, unit.source,
                                      min_similarity)
        if quality >= min_similarity:
            project = tmunit.project
            profile = tmunit.submitted_by
            result = {
                'source': tmunit.source,
                'target': tmunit.target,
                'quality': quality,
                'project': {
                    'project': project.code,
                    'projectname': project.fullname,
                    'absolute_url': project.get_absolute_url(),
                    'icon': _get_project_icon(project),
                }
            }

            if profile is not None:
                submissions = Submission.objects.filter(
                                submitter=profile,
                                type=SubmissionTypes.NORMAL,
                                ).distinct().count()
                suggestions = SuggestionStat.objects.filter(
                                suggester=profile,
                                ).distinct().count()
                translations = submissions - suggestions  # XXX: is this correct?
                title = _("By %s on %s<br/><br/>%s translations<br/>%s suggestions" % (
                            profile.user.get_full_name(),
                            tmunit.submitted_on,
                            translations, suggestions))

                result['translator'] = {
                    'username': unicode(profile.user),
                    'title': title,
                    'absolute_url': profile.get_absolute_url(),
                    'gravatar': profile.gravatar_url(24),
                }

            results.append(result)

    return HttpResponse(jsonify(results), mimetype="application/json")