def handle(self, *args, **options): languages = Language.objects.all() base = IndexUpdate.objects.all() if base.count() == 0: return with FULLTEXT_INDEX.source_writer(buffered = False) as writer: for update in base.iterator(): Unit.objects.add_to_source_index( update.unit.checksum, update.unit.source, update.unit.context, writer) for lang in languages: with FULLTEXT_INDEX.target_writer(lang = lang.code, buffered = False) as writer: for update in base.filter(unit__translation__language = lang).exclude(unit__target = '').iterator(): Unit.objects.add_to_target_index( update.unit.checksum, update.unit.target, writer) base.delete()
def handle(self, *args, **options): languages = Language.objects.all() if options['clean']: create_source_index() for lang in languages: create_target_index(lang = lang.code) base = self.get_units(*args, **options) if base.count() == 0: return with FULLTEXT_INDEX.source_writer(buffered = False) as writer: for unit in base.values('checksum', 'source', 'context').iterator(): Unit.objects.add_to_source_index( unit['checksum'], unit['source'], unit['context'], writer) for lang in languages: with FULLTEXT_INDEX.target_writer(lang = lang.code, buffered = False) as writer: for unit in base.filter(translation__language = lang).exclude(target = '').values('checksum', 'target').iterator(): Unit.objects.add_to_target_index( unit['checksum'], unit['target'], writer)
def handle(self, *args, **options): languages = Language.objects.all() base = IndexUpdate.objects.all() if base.count() == 0: return with FULLTEXT_INDEX.source_writer(buffered=False) as writer: for update in base.iterator(): Unit.objects.add_to_source_index(update.unit.checksum, update.unit.source, update.unit.context, writer) for lang in languages: with FULLTEXT_INDEX.target_writer(lang=lang.code, buffered=False) as writer: for update in base.filter( unit__translation__language=lang).exclude( unit__target='').iterator(): Unit.objects.add_to_target_index(update.unit.checksum, update.unit.target, writer) base.delete()
def search(self, query, source=True, context=True, translation=True, checksums=False): """ Performs full text search on defined set of fields. Returns queryset unless checksums is set. """ ret = set() if source or context: with FULLTEXT_INDEX.source_searcher(not settings.OFFLOAD_INDEXING) as searcher: if source: ret = ret.union(self.__search(searcher, "source", SOURCE_SCHEMA, query)) if context: ret = ret.union(self.__search(searcher, "context", SOURCE_SCHEMA, query)) if translation: sample = self.all()[0] with FULLTEXT_INDEX.target_searcher( sample.translation.language.code, not settings.OFFLOAD_INDEXING ) as searcher: ret = ret.union(self.__search(searcher, "target", TARGET_SCHEMA, query)) if checksums: return ret return self.filter(checksum__in=ret)
def handle(self, *args, **options): languages = Language.objects.all() if options['clean']: create_source_index() for lang in languages: create_target_index(lang=lang.code) base = self.get_units(*args, **options) if base.count() == 0: return with FULLTEXT_INDEX.source_writer(buffered=False) as writer: for unit in base.values('checksum', 'source', 'context').iterator(): Unit.objects.add_to_source_index(unit['checksum'], unit['source'], unit['context'], writer) for lang in languages: with FULLTEXT_INDEX.target_writer(lang=lang.code, buffered=False) as writer: for unit in base.filter(translation__language=lang).exclude( target='').values('checksum', 'target').iterator(): Unit.objects.add_to_target_index(unit['checksum'], unit['target'], writer)
def add_to_index(self, unit, writer_target=None, writer_source=None): """ Updates/Adds to all indices given unit. """ if writer_target is None: writer_target = FULLTEXT_INDEX.target_writer(unit.translation.language.code) if writer_source is None: writer_source = FULLTEXT_INDEX.source_writer() self.add_to_source_index(unit.checksum, unit.source, unit.context, writer_source) self.add_to_target_index(unit.checksum, unit.target, writer_target)
def add_to_index(self, unit): """ Updates/Adds to all indices given unit. """ if settings.OFFLOAD_INDEXING: from weblate.trans.models import IndexUpdate IndexUpdate.objects.get_or_create(unit=unit) return writer_target = FULLTEXT_INDEX.target_writer(unit.translation.language.code) writer_source = FULLTEXT_INDEX.source_writer() self.add_to_source_index(unit.checksum, unit.source, unit.context, writer_source) self.add_to_target_index(unit.checksum, unit.target, writer_target)
def similar(self, unit): ''' Finds similar units to current unit. ''' ret = set([unit.checksum]) with FULLTEXT_INDEX.source_searcher(not appsettings.OFFLOAD_INDEXING) as searcher: # Extract up to 10 terms from the source terms = [kw for kw, score in searcher.key_terms_from_text('source', unit.source, numterms=10) if not kw in IGNORE_SIMILAR] cnt = len(terms) # Try to find at least configured number of similar strings, remove up to 4 words while len(ret) < appsettings.SIMILAR_MESSAGES and cnt > 0 and len(terms) - cnt < 4: for search in itertools.combinations(terms, cnt): results = self.search( ' '.join(search), True, False, False, True ) ret = ret.union(results) cnt -= 1 return self.filter( translation__subproject__project=unit.translation.subproject.project, translation__language=unit.translation.language, checksum__in=ret ).exclude( target__in=['', unit.target] )
def similar(self, unit): ''' Finds similar units to current unit. ''' ret = set([unit.checksum]) with FULLTEXT_INDEX.source_searcher( not settings.OFFLOAD_INDEXING) as searcher: # Extract up to 10 terms from the source terms = [ kw for kw, score in searcher.key_terms_from_text( 'source', unit.source, numterms=10) if not kw in IGNORE_SIMILAR ] cnt = len(terms) # Try to find at least configured number of similar strings, remove up to 4 words while len(ret) < settings.SIMILAR_MESSAGES and cnt > 0 and len( terms) - cnt < 4: for search in itertools.combinations(terms, cnt): ret = ret.union( self.search(' '.join(search), True, False, False, True)) cnt -= 1 return self.filter( translation__subproject__project=unit.translation.subproject. project, translation__language=unit.translation.language, checksum__in=ret).exclude(target__in=['', unit.target])
def add_to_index(self, unit): ''' Updates/Adds to all indices given unit. ''' if settings.OFFLOAD_INDEXING: from weblate.trans.models import IndexUpdate IndexUpdate.objects.get_or_create(unit=unit) return writer_target = FULLTEXT_INDEX.target_writer( unit.translation.language.code) writer_source = FULLTEXT_INDEX.source_writer() self.add_to_source_index(unit.checksum, unit.source, unit.context, writer_source) self.add_to_target_index(unit.checksum, unit.target, writer_target)
def handle(self, *args, **options): languages = Language.objects.all() # Optionally rebuild indices from scratch if options['clean']: create_source_index() for lang in languages: create_target_index(lang=lang.code) units = self.get_units(*args, **options) # Update source index with FULLTEXT_INDEX.source_writer(buffered=False) as writer: checksums = units.values('checksum', 'source', 'context') for unit in checksums.iterator(): Unit.objects.add_to_source_index( unit['checksum'], unit['source'], unit['context'], writer ) # Update per language indices for lang in languages: with FULLTEXT_INDEX.target_writer(lang=lang.code, buffered=False) as writer: language_units = units.filter( translation__language=lang ).exclude( target='' ).values( 'checksum', 'target' ) for unit in language_units.iterator(): Unit.objects.add_to_target_index( unit['checksum'], unit['target'], writer )
def search(self, query, source=True, context=True, translation=True, checksums=False): ''' Performs full text search on defined set of fields. Returns queryset unless checksums is set. ''' ret = set() if source or context: with FULLTEXT_INDEX.source_searcher( not settings.OFFLOAD_INDEXING) as searcher: if source: ret = ret.union( self.__search(searcher, 'source', SOURCE_SCHEMA, query)) if context: ret = ret.union( self.__search(searcher, 'context', SOURCE_SCHEMA, query)) if translation: sample = self.all()[0] with FULLTEXT_INDEX.target_searcher( sample.translation.language.code, not settings.OFFLOAD_INDEXING) as searcher: ret = ret.union( self.__search(searcher, 'target', TARGET_SCHEMA, query)) if checksums: return ret return self.filter(checksum__in=ret)