Ejemplo n.º 1
0
    def search(self, query, source=True, context=True, translation=True, checksums=False):
        """
        Performs full text search on defined set of fields.

        Returns queryset unless checksums is set.
        """
        ret = set()

        # Search in source or context
        if source or context:
            index = FULLTEXT_INDEX.source_searcher(not appsettings.OFFLOAD_INDEXING)
            with index as searcher:
                if source:
                    results = self.__search(searcher, "source", SOURCE_SCHEMA, query)
                    ret = ret.union(results)
                if context:
                    results = self.__search(searcher, "context", SOURCE_SCHEMA, query)
                    ret = ret.union(results)

        # Search in target
        if translation:
            sample = self.all()[0]
            index = FULLTEXT_INDEX.target_searcher(sample.translation.language.code, not appsettings.OFFLOAD_INDEXING)
            with index as searcher:
                results = self.__search(searcher, "target", TARGET_SCHEMA, query)
                ret = ret.union(results)

        if checksums:
            return ret

        return self.filter(checksum__in=ret)
Ejemplo n.º 2
0
    def fulltext(self, query, source=True, context=True, translation=True,
                 checksums=False):
        '''
        Performs full text search on defined set of fields.

        Returns queryset unless checksums is set.
        '''
        ret = set()

        # Search in source or context
        if source or context:
            index = FULLTEXT_INDEX.source_searcher(
                not appsettings.OFFLOAD_INDEXING
            )
            with index as searcher:
                if source:
                    results = self.__search(
                        searcher,
                        'source',
                        SOURCE_SCHEMA,
                        query
                    )
                    ret = ret.union(results)
                if context:
                    results = self.__search(
                        searcher,
                        'context',
                        SOURCE_SCHEMA,
                        query
                    )
                    ret = ret.union(results)

        # Search in target
        if translation:
            sample = self.all()[0]
            index = FULLTEXT_INDEX.target_searcher(
                sample.translation.language.code,
                not appsettings.OFFLOAD_INDEXING
            )
            with index as searcher:
                results = self.__search(
                    searcher,
                    'target',
                    TARGET_SCHEMA,
                    query
                )
                ret = ret.union(results)

        if checksums:
            return ret

        return self.filter(checksum__in=ret)
Ejemplo n.º 3
0
    def fulltext(self, query, source=True, context=True, translation=True,
                 checksums=False):
        '''
        Performs full text search on defined set of fields.

        Returns queryset unless checksums is set.
        '''
        ret = set()

        # Search in source or context
        if source or context:
            index = FULLTEXT_INDEX.source_searcher()
            with index as searcher:
                if source:
                    results = self.__search(
                        searcher,
                        'source',
                        SOURCE_SCHEMA,
                        query
                    )
                    ret = ret.union(results)
                if context:
                    results = self.__search(
                        searcher,
                        'context',
                        SOURCE_SCHEMA,
                        query
                    )
                    ret = ret.union(results)

        # Search in target
        if translation:
            sample = self.all()[0]
            index = FULLTEXT_INDEX.target_searcher(
                sample.translation.language.code,
            )
            with index as searcher:
                results = self.__search(
                    searcher,
                    'target',
                    TARGET_SCHEMA,
                    query
                )
                ret = ret.union(results)

        if checksums:
            return ret

        return self.filter(checksum__in=ret)
Ejemplo n.º 4
0
    def more_like_this(self, unit, top=5):
        '''
        Finds closely similar units.
        '''
        index = FULLTEXT_INDEX.source_searcher()
        source_string = unit.get_source_plurals()[0]
        parser = qparser.QueryParser('source', SOURCE_SCHEMA)
        parsed = parser.parse(source_string)
        checksums = set()
        with index as searcher:
            # Search for same string
            results = searcher.search(parsed)
            if len(results) == 0:
                return self.none()
            first_hit = results[0]
            # Find similar results to first one
            more_results = first_hit.more_like_this(
                'source',
                source_string,
                top
            )
            # Include all more like this results
            for result in more_results:
                checksums.add(result['checksum'])
            # Remove all original matches
            for result in results:
                checksums.discard(result['checksum'])

        return self.filter(
            checksum__in=checksums,
            translation__language=unit.translation.language,
            translated=True
        ).exclude(
            pk=unit.id
        )
Ejemplo n.º 5
0
    def add_to_index(self, unit, source=True):
        """
        Updates/Adds to all indices given unit.
        """
        if appsettings.OFFLOAD_INDEXING:
            from trans.models import IndexUpdate

            IndexUpdate.objects.get_or_create(unit=unit, source=source)
            return

        writer_target = FULLTEXT_INDEX.target_writer(unit.translation.language.code)
        writer_source = FULLTEXT_INDEX.source_writer()

        if source:
            self.add_to_source_index(unit.checksum, unit.source, unit.context, writer_source)
        self.add_to_target_index(unit.checksum, unit.target, writer_target)
Ejemplo n.º 6
0
    def add_to_index(self, unit, source=True):
        '''
        Updates/Adds to all indices given unit.
        '''
        if appsettings.OFFLOAD_INDEXING:
            from trans.models.unitdata import IndexUpdate
            IndexUpdate.objects.get_or_create(unit=unit, source=source)
            return

        writer_target = FULLTEXT_INDEX.target_writer(
            unit.translation.language.code
        )
        writer_source = FULLTEXT_INDEX.source_writer()

        if source:
            self.add_to_source_index(
                unit.checksum,
                unit.source,
                unit.context,
                writer_source)
        self.add_to_target_index(
            unit.checksum,
            unit.target,
            writer_target)
Ejemplo n.º 7
0
    def same_source(self, unit):
        '''
        Finds units with same source.
        '''
        index = FULLTEXT_INDEX.source_searcher()
        source_string = unit.get_source_plurals()[0]
        parser = qparser.QueryParser('source', SOURCE_SCHEMA)
        parsed = parser.parse(source_string)
        checksums = set()
        with index as searcher:
            # Search for same string
            results = searcher.search(parsed)
            for result in results:
                checksums.add(result['checksum'])

        return self.filter(
            checksum__in=checksums,
            translation__language=unit.translation.language,
            translated=True
        ).exclude(
            pk=unit.id
        )
Ejemplo n.º 8
0
    def similar(self, unit):
        """
        Finds similar units to current unit.
        """
        ret = set([unit.checksum])
        index = FULLTEXT_INDEX.source_searcher(not appsettings.OFFLOAD_INDEXING)
        with index as searcher:
            # Extract up to 10 terms from the source
            key_terms = searcher.key_terms_from_text("source", unit.source, numterms=10)
            terms = [kw[0] for kw in key_terms if not kw in IGNORE_SIMILAR]
            cnt = len(terms)
            # Try to find at least configured number of similar strings,
            # remove up to 4 words
            while len(ret) < appsettings.SIMILAR_MESSAGES and cnt > 0 and len(terms) - cnt < 4:
                for search in itertools.combinations(terms, cnt):
                    results = self.search(" ".join(search), True, False, False, True)
                    ret = ret.union(results)
                cnt -= 1

        project = unit.translation.subproject.project
        return self.filter(
            translation__subproject__project=project, translation__language=unit.translation.language, checksum__in=ret
        ).exclude(target__in=["", unit.target])
Ejemplo n.º 9
0
    def more_like_this(self, unit):
        '''
        Finds closely similar units.
        '''
        index = FULLTEXT_INDEX.source_searcher(
            not appsettings.OFFLOAD_INDEXING
        )
        source_string = unit.get_source_plurals()[0]
        parser = qparser.QueryParser('source', SOURCE_SCHEMA)
        parsed = parser.parse(source_string)
        checksums = set()
        with index as searcher:
            # Search for same string
            results = searcher.search(parsed)
            if len(results) == 0:
                return self.none()
            first_hit = results[0]
            # Find similar results to first one
            more_results = first_hit.more_like_this(
                'source',
                source_string,
                500
            )
            # Include all more like this results
            for result in more_results:
                checksums.add(result['checksum'])
            # Remove all original matches
            for result in results:
                checksums.discard(result['checksum'])

        return self.filter(
            checksum__in=checksums,
            translation__language=unit.translation.language,
            translated=True
        ).exclude(
            pk=unit.id
        )
Ejemplo n.º 10
0
    def same_source(self, unit):
        '''
        Finds units with same source.
        '''
        index = FULLTEXT_INDEX.source_searcher(
            not appsettings.OFFLOAD_INDEXING
        )
        source_string = unit.get_source_plurals()[0]
        parser = qparser.QueryParser('source', SOURCE_SCHEMA)
        parsed = parser.parse(source_string)
        checksums = set()
        with index as searcher:
            # Search for same string
            results = searcher.search(parsed)
            for result in results:
                checksums.add(result['checksum'])

        return self.filter(
            checksum__in=checksums,
            translation__language=unit.translation.language,
            translated=True
        ).exclude(
            pk=unit.id
        )