Esempio n. 1
0
    def similar(self, unit):
        '''
        Finds similar units to current unit.
        '''
        ret = set([unit.checksum])
        with FULLTEXT_INDEX.source_searcher(not appsettings.OFFLOAD_INDEXING) as searcher:
            # Extract up to 10 terms from the source
            terms = [kw for kw, score in searcher.key_terms_from_text('source', unit.source, numterms=10) if not kw in IGNORE_SIMILAR]
            cnt = len(terms)
            # Try to find at least configured number of similar strings, remove up to 4 words
            while len(ret) < appsettings.SIMILAR_MESSAGES and cnt > 0 and len(terms) - cnt < 4:
                for search in itertools.combinations(terms, cnt):
                    results = self.search(
                        ' '.join(search),
                        True,
                        False,
                        False,
                        True
                    )
                    ret = ret.union(results)
                cnt -= 1

        return self.filter(
            translation__subproject__project=unit.translation.subproject.project,
            translation__language=unit.translation.language,
            checksum__in=ret
        ).exclude(
            target__in=['', unit.target]
        )
Esempio n. 2
0
    def similar(self, unit):
        '''
        Finds similar units to current unit.
        '''
        ret = set([unit.checksum])
        with FULLTEXT_INDEX.source_searcher(
                not settings.OFFLOAD_INDEXING) as searcher:
            # Extract up to 10 terms from the source
            terms = [
                kw for kw, score in searcher.key_terms_from_text(
                    'source', unit.source, numterms=10)
                if not kw in IGNORE_SIMILAR
            ]
            cnt = len(terms)
            # Try to find at least configured number of similar strings, remove up to 4 words
            while len(ret) < settings.SIMILAR_MESSAGES and cnt > 0 and len(
                    terms) - cnt < 4:
                for search in itertools.combinations(terms, cnt):
                    ret = ret.union(
                        self.search(' '.join(search), True, False, False,
                                    True))
                cnt -= 1

        return self.filter(
            translation__subproject__project=unit.translation.subproject.
            project,
            translation__language=unit.translation.language,
            checksum__in=ret).exclude(target__in=['', unit.target])
Esempio n. 3
0
    def search(self, query, source=True, context=True, translation=True, checksums=False):
        """
        Performs full text search on defined set of fields.

        Returns queryset unless checksums is set.
        """
        ret = set()
        if source or context:
            with FULLTEXT_INDEX.source_searcher(not settings.OFFLOAD_INDEXING) as searcher:
                if source:
                    ret = ret.union(self.__search(searcher, "source", SOURCE_SCHEMA, query))
                if context:
                    ret = ret.union(self.__search(searcher, "context", SOURCE_SCHEMA, query))

        if translation:
            sample = self.all()[0]
            with FULLTEXT_INDEX.target_searcher(
                sample.translation.language.code, not settings.OFFLOAD_INDEXING
            ) as searcher:
                ret = ret.union(self.__search(searcher, "target", TARGET_SCHEMA, query))

        if checksums:
            return ret

        return self.filter(checksum__in=ret)
Esempio n. 4
0
    def search(self,
               query,
               source=True,
               context=True,
               translation=True,
               checksums=False):
        '''
        Performs full text search on defined set of fields.

        Returns queryset unless checksums is set.
        '''
        ret = set()
        if source or context:
            with FULLTEXT_INDEX.source_searcher(
                    not settings.OFFLOAD_INDEXING) as searcher:
                if source:
                    ret = ret.union(
                        self.__search(searcher, 'source', SOURCE_SCHEMA,
                                      query))
                if context:
                    ret = ret.union(
                        self.__search(searcher, 'context', SOURCE_SCHEMA,
                                      query))

        if translation:
            sample = self.all()[0]
            with FULLTEXT_INDEX.target_searcher(
                    sample.translation.language.code,
                    not settings.OFFLOAD_INDEXING) as searcher:
                ret = ret.union(
                    self.__search(searcher, 'target', TARGET_SCHEMA, query))

        if checksums:
            return ret

        return self.filter(checksum__in=ret)