def similar(self, unit): ''' Finds similar units to current unit. ''' ret = set([unit.checksum]) with FULLTEXT_INDEX.source_searcher(not appsettings.OFFLOAD_INDEXING) as searcher: # Extract up to 10 terms from the source terms = [kw for kw, score in searcher.key_terms_from_text('source', unit.source, numterms=10) if not kw in IGNORE_SIMILAR] cnt = len(terms) # Try to find at least configured number of similar strings, remove up to 4 words while len(ret) < appsettings.SIMILAR_MESSAGES and cnt > 0 and len(terms) - cnt < 4: for search in itertools.combinations(terms, cnt): results = self.search( ' '.join(search), True, False, False, True ) ret = ret.union(results) cnt -= 1 return self.filter( translation__subproject__project=unit.translation.subproject.project, translation__language=unit.translation.language, checksum__in=ret ).exclude( target__in=['', unit.target] )
def similar(self, unit): ''' Finds similar units to current unit. ''' ret = set([unit.checksum]) with FULLTEXT_INDEX.source_searcher( not settings.OFFLOAD_INDEXING) as searcher: # Extract up to 10 terms from the source terms = [ kw for kw, score in searcher.key_terms_from_text( 'source', unit.source, numterms=10) if not kw in IGNORE_SIMILAR ] cnt = len(terms) # Try to find at least configured number of similar strings, remove up to 4 words while len(ret) < settings.SIMILAR_MESSAGES and cnt > 0 and len( terms) - cnt < 4: for search in itertools.combinations(terms, cnt): ret = ret.union( self.search(' '.join(search), True, False, False, True)) cnt -= 1 return self.filter( translation__subproject__project=unit.translation.subproject. project, translation__language=unit.translation.language, checksum__in=ret).exclude(target__in=['', unit.target])
def search(self, query, source=True, context=True, translation=True, checksums=False): """ Performs full text search on defined set of fields. Returns queryset unless checksums is set. """ ret = set() if source or context: with FULLTEXT_INDEX.source_searcher(not settings.OFFLOAD_INDEXING) as searcher: if source: ret = ret.union(self.__search(searcher, "source", SOURCE_SCHEMA, query)) if context: ret = ret.union(self.__search(searcher, "context", SOURCE_SCHEMA, query)) if translation: sample = self.all()[0] with FULLTEXT_INDEX.target_searcher( sample.translation.language.code, not settings.OFFLOAD_INDEXING ) as searcher: ret = ret.union(self.__search(searcher, "target", TARGET_SCHEMA, query)) if checksums: return ret return self.filter(checksum__in=ret)
def search(self, query, source=True, context=True, translation=True, checksums=False): ''' Performs full text search on defined set of fields. Returns queryset unless checksums is set. ''' ret = set() if source or context: with FULLTEXT_INDEX.source_searcher( not settings.OFFLOAD_INDEXING) as searcher: if source: ret = ret.union( self.__search(searcher, 'source', SOURCE_SCHEMA, query)) if context: ret = ret.union( self.__search(searcher, 'context', SOURCE_SCHEMA, query)) if translation: sample = self.all()[0] with FULLTEXT_INDEX.target_searcher( sample.translation.language.code, not settings.OFFLOAD_INDEXING) as searcher: ret = ret.union( self.__search(searcher, 'target', TARGET_SCHEMA, query)) if checksums: return ret return self.filter(checksum__in=ret)