Beispiel #1
0
    def handle(self, *args, **options):
        languages = Language.objects.all()

        base = IndexUpdate.objects.all()

        if base.count() == 0:
            return

        with FULLTEXT_INDEX.source_writer(buffered = False) as writer:
            for update in base.iterator():
                Unit.objects.add_to_source_index(
                    update.unit.checksum,
                    update.unit.source,
                    update.unit.context,
                    writer)

        for lang in languages:
            with FULLTEXT_INDEX.target_writer(lang = lang.code, buffered = False) as writer:
                for update in base.filter(unit__translation__language =
                    lang).exclude(unit__target = '').iterator():
                    Unit.objects.add_to_target_index(
                        update.unit.checksum,
                        update.unit.target,
                        writer)


        base.delete()
Beispiel #2
0
    def handle(self, *args, **options):
        languages = Language.objects.all()
        if options['clean']:
            create_source_index()
            for lang in languages:
                create_target_index(lang = lang.code)

        base = self.get_units(*args, **options)

        if base.count() == 0:
            return

        with FULLTEXT_INDEX.source_writer(buffered = False) as writer:
            for unit in base.values('checksum', 'source', 'context').iterator():
                Unit.objects.add_to_source_index(
                    unit['checksum'],
                    unit['source'],
                    unit['context'],
                    writer)

        for lang in languages:
            with FULLTEXT_INDEX.target_writer(lang = lang.code, buffered = False) as writer:
                for unit in base.filter(translation__language =
                    lang).exclude(target = '').values('checksum', 'target').iterator():
                    Unit.objects.add_to_target_index(
                        unit['checksum'],
                        unit['target'],
                        writer)
Beispiel #3
0
    def handle(self, *args, **options):
        languages = Language.objects.all()

        base = IndexUpdate.objects.all()

        if base.count() == 0:
            return

        with FULLTEXT_INDEX.source_writer(buffered=False) as writer:
            for update in base.iterator():
                Unit.objects.add_to_source_index(update.unit.checksum,
                                                 update.unit.source,
                                                 update.unit.context, writer)

        for lang in languages:
            with FULLTEXT_INDEX.target_writer(lang=lang.code,
                                              buffered=False) as writer:
                for update in base.filter(
                        unit__translation__language=lang).exclude(
                            unit__target='').iterator():
                    Unit.objects.add_to_target_index(update.unit.checksum,
                                                     update.unit.target,
                                                     writer)

        base.delete()
Beispiel #4
0
    def search(self, query, source=True, context=True, translation=True, checksums=False):
        """
        Performs full text search on defined set of fields.

        Returns queryset unless checksums is set.
        """
        ret = set()
        if source or context:
            with FULLTEXT_INDEX.source_searcher(not settings.OFFLOAD_INDEXING) as searcher:
                if source:
                    ret = ret.union(self.__search(searcher, "source", SOURCE_SCHEMA, query))
                if context:
                    ret = ret.union(self.__search(searcher, "context", SOURCE_SCHEMA, query))

        if translation:
            sample = self.all()[0]
            with FULLTEXT_INDEX.target_searcher(
                sample.translation.language.code, not settings.OFFLOAD_INDEXING
            ) as searcher:
                ret = ret.union(self.__search(searcher, "target", TARGET_SCHEMA, query))

        if checksums:
            return ret

        return self.filter(checksum__in=ret)
Beispiel #5
0
    def handle(self, *args, **options):
        languages = Language.objects.all()
        if options['clean']:
            create_source_index()
            for lang in languages:
                create_target_index(lang=lang.code)

        base = self.get_units(*args, **options)

        if base.count() == 0:
            return

        with FULLTEXT_INDEX.source_writer(buffered=False) as writer:
            for unit in base.values('checksum', 'source',
                                    'context').iterator():
                Unit.objects.add_to_source_index(unit['checksum'],
                                                 unit['source'],
                                                 unit['context'], writer)

        for lang in languages:
            with FULLTEXT_INDEX.target_writer(lang=lang.code,
                                              buffered=False) as writer:
                for unit in base.filter(translation__language=lang).exclude(
                        target='').values('checksum', 'target').iterator():
                    Unit.objects.add_to_target_index(unit['checksum'],
                                                     unit['target'], writer)
Beispiel #6
0
    def add_to_index(self, unit, writer_target=None, writer_source=None):
        """
        Updates/Adds to all indices given unit.
        """
        if writer_target is None:
            writer_target = FULLTEXT_INDEX.target_writer(unit.translation.language.code)
        if writer_source is None:
            writer_source = FULLTEXT_INDEX.source_writer()

        self.add_to_source_index(unit.checksum, unit.source, unit.context, writer_source)
        self.add_to_target_index(unit.checksum, unit.target, writer_target)
Beispiel #7
0
    def add_to_index(self, unit):
        """
        Updates/Adds to all indices given unit.
        """
        if settings.OFFLOAD_INDEXING:
            from weblate.trans.models import IndexUpdate

            IndexUpdate.objects.get_or_create(unit=unit)
            return

        writer_target = FULLTEXT_INDEX.target_writer(unit.translation.language.code)
        writer_source = FULLTEXT_INDEX.source_writer()

        self.add_to_source_index(unit.checksum, unit.source, unit.context, writer_source)
        self.add_to_target_index(unit.checksum, unit.target, writer_target)
Beispiel #8
0
    def similar(self, unit):
        '''
        Finds similar units to current unit.
        '''
        ret = set([unit.checksum])
        with FULLTEXT_INDEX.source_searcher(not appsettings.OFFLOAD_INDEXING) as searcher:
            # Extract up to 10 terms from the source
            terms = [kw for kw, score in searcher.key_terms_from_text('source', unit.source, numterms=10) if not kw in IGNORE_SIMILAR]
            cnt = len(terms)
            # Try to find at least configured number of similar strings, remove up to 4 words
            while len(ret) < appsettings.SIMILAR_MESSAGES and cnt > 0 and len(terms) - cnt < 4:
                for search in itertools.combinations(terms, cnt):
                    results = self.search(
                        ' '.join(search),
                        True,
                        False,
                        False,
                        True
                    )
                    ret = ret.union(results)
                cnt -= 1

        return self.filter(
            translation__subproject__project=unit.translation.subproject.project,
            translation__language=unit.translation.language,
            checksum__in=ret
        ).exclude(
            target__in=['', unit.target]
        )
Beispiel #9
0
    def similar(self, unit):
        '''
        Finds similar units to current unit.
        '''
        ret = set([unit.checksum])
        with FULLTEXT_INDEX.source_searcher(
                not settings.OFFLOAD_INDEXING) as searcher:
            # Extract up to 10 terms from the source
            terms = [
                kw for kw, score in searcher.key_terms_from_text(
                    'source', unit.source, numterms=10)
                if not kw in IGNORE_SIMILAR
            ]
            cnt = len(terms)
            # Try to find at least configured number of similar strings, remove up to 4 words
            while len(ret) < settings.SIMILAR_MESSAGES and cnt > 0 and len(
                    terms) - cnt < 4:
                for search in itertools.combinations(terms, cnt):
                    ret = ret.union(
                        self.search(' '.join(search), True, False, False,
                                    True))
                cnt -= 1

        return self.filter(
            translation__subproject__project=unit.translation.subproject.
            project,
            translation__language=unit.translation.language,
            checksum__in=ret).exclude(target__in=['', unit.target])
Beispiel #10
0
    def add_to_index(self, unit):
        '''
        Updates/Adds to all indices given unit.
        '''
        if settings.OFFLOAD_INDEXING:
            from weblate.trans.models import IndexUpdate
            IndexUpdate.objects.get_or_create(unit=unit)
            return

        writer_target = FULLTEXT_INDEX.target_writer(
            unit.translation.language.code)
        writer_source = FULLTEXT_INDEX.source_writer()

        self.add_to_source_index(unit.checksum, unit.source, unit.context,
                                 writer_source)
        self.add_to_target_index(unit.checksum, unit.target, writer_target)
Beispiel #11
0
    def handle(self, *args, **options):
        languages = Language.objects.all()

        # Optionally rebuild indices from scratch
        if options['clean']:
            create_source_index()
            for lang in languages:
                create_target_index(lang=lang.code)

        units = self.get_units(*args, **options)

        # Update source index
        with FULLTEXT_INDEX.source_writer(buffered=False) as writer:
            checksums = units.values('checksum', 'source', 'context')
            for unit in checksums.iterator():
                Unit.objects.add_to_source_index(
                    unit['checksum'],
                    unit['source'],
                    unit['context'],
                    writer
                )

        # Update per language indices
        for lang in languages:
            with FULLTEXT_INDEX.target_writer(lang=lang.code, buffered=False) as writer:

                language_units = units.filter(
                    translation__language=lang
                ).exclude(
                    target=''
                ).values(
                    'checksum', 'target'
                )

                for unit in language_units.iterator():
                    Unit.objects.add_to_target_index(
                        unit['checksum'],
                        unit['target'],
                        writer
                    )
Beispiel #12
0
    def search(self,
               query,
               source=True,
               context=True,
               translation=True,
               checksums=False):
        '''
        Performs full text search on defined set of fields.

        Returns queryset unless checksums is set.
        '''
        ret = set()
        if source or context:
            with FULLTEXT_INDEX.source_searcher(
                    not settings.OFFLOAD_INDEXING) as searcher:
                if source:
                    ret = ret.union(
                        self.__search(searcher, 'source', SOURCE_SCHEMA,
                                      query))
                if context:
                    ret = ret.union(
                        self.__search(searcher, 'context', SOURCE_SCHEMA,
                                      query))

        if translation:
            sample = self.all()[0]
            with FULLTEXT_INDEX.target_searcher(
                    sample.translation.language.code,
                    not settings.OFFLOAD_INDEXING) as searcher:
                ret = ret.union(
                    self.__search(searcher, 'target', TARGET_SCHEMA, query))

        if checksums:
            return ret

        return self.filter(checksum__in=ret)