def do_test(self): fulltext = Fulltext() fulltext.storage = self.storage sindex = fulltext.get_source_index() self.assertIsNotNone(sindex) tindex = fulltext.get_target_index('cs') self.assertIsNotNone(tindex) writer = sindex.writer() writer.update_document( pk=1, source="source", context="context", location="location", ) writer.commit() writer = tindex.writer() writer.update_document( pk=1, target="target", comment="comment" ) writer.commit() for item in ('source', 'context', 'location', 'target'): self.assertEqual( fulltext.search(item, ['cs'], {item: True}), set([1]) )
def save(self, same_content=False, same_state=False, force_insert=False, backend=False, **kwargs): """ Wrapper around save to warn when save did not come from git backend (eg. commit or by parsing file). """ # Warn if request is not coming from backend if not backend: self.log_error( 'Unit.save called without backend sync: %s', ''.join(traceback.format_stack()) ) # Store number of words if not same_content or not self.num_words: self.num_words = len(self.get_source_plurals()[0].split()) # Actually save the unit super(Unit, self).save(**kwargs) # Update checks if content or fuzzy flag has changed if not same_content or not same_state: self.run_checks(same_state, same_content, force_insert) # Update fulltext index if content has changed or this is a new unit if force_insert or not same_content: Fulltext.update_index_unit(self)
def optimize_fulltext(): fulltext = Fulltext() index = fulltext.get_source_index() index.optimize() languages = Language.objects.have_translation() for lang in languages: index = fulltext.get_target_index(lang.code) index.optimize()
def test_add(self): self.edit_unit( 'Hello, world!\n', 'Nazdar svete!\n' ) unit = self.get_translation().unit_set.get( source='Hello, world!\n', ) Fulltext.update_index_unit(unit) Fulltext.update_index_unit(unit)
def update_source_units(self, previous_source, user): """Update source for units withing same component. This is needed when editing template translation for monolingual formats. """ # Find relevant units same_source = Unit.objects.filter( translation__component=self.translation.component, id_hash=self.id_hash, ).exclude( id=self.id ) # Update source, number of words and content_hash same_source.update( source=self.source, num_words=self.num_words, content_hash=self.content_hash ) # Find reverted units reverted = same_source.filter( state=STATE_FUZZY, previous_source=self.source ) reverted_ids = set(reverted.values_list('id', flat=True)) reverted.update( state=STATE_TRANSLATED, previous_source='' ) # Set fuzzy on changed same_source.filter( state=STATE_TRANSLATED ).exclude( id__in=reverted_ids ).update( state=STATE_FUZZY, previous_source=previous_source, ) # Update source index and stats for unit in same_source.iterator(): unit.update_has_comment() unit.update_has_suggestion() unit.run_checks(False, False) Fulltext.update_index_unit(unit) Change.objects.create( unit=unit, action=Change.ACTION_SOURCE_CHANGE, user=user, author=user, old=previous_source, target=self.source, ) unit.translation.invalidate_cache()
def handle(self, *args, **options): # Optimize index if options['optimize']: optimize_fulltext() return fulltext = Fulltext() # Optionally rebuild indices from scratch if options['clean'] or options['all']: fulltext.cleanup() if options['all']: self.process_all(fulltext) else: self.process_filtered(fulltext, **options)
def cleanup_fulltext(): """Remove stale units from fulltext""" fulltext = Fulltext() languages = list(Language.objects.values_list('code', flat=True)) + [None] # We operate only on target indexes as they will have all IDs anyway for lang in languages: if lang is None: index = fulltext.get_source_index() else: index = fulltext.get_target_index(lang) try: fields = index.reader().all_stored_fields() except EmptyIndexError: continue for item in fields: if Unit.objects.filter(pk=item['pk']).exists(): continue fulltext.clean_search_unit(item['pk'], lang)
def update_source_units(self, previous_source, user): """Update source for units withing same component. This is needed when editing template translation for monolingual formats. """ # Find relevant units same_source = Unit.objects.filter( translation__component=self.translation.component, id_hash=self.id_hash, ).exclude( id=self.id ) for unit in same_source.iterator(): # Update source, number of words and content_hash unit.source = self.source unit.num_words = self.num_words unit.content_hash = self.content_hash # Find reverted units if (unit.state == STATE_FUZZY and unit.previous_source == self.source): # Unset fuzzy on reverted unit.state = STATE_TRANSLATED unit.previous_source = '' elif unit.state >= STATE_TRANSLATED: # Set fuzzy on changed unit.state = STATE_FUZZY unit.previous_source = previous_source # Update source index and stats unit.update_has_comment() unit.update_has_suggestion() unit.save() Fulltext.update_index_unit(unit) Change.objects.create( unit=unit, action=Change.ACTION_SOURCE_CHANGE, user=user, author=user, old=previous_source, target=self.source, ) unit.translation.invalidate_cache()
def save(self, same_content=False, same_state=False, force_insert=False, **kwargs): """ Wrapper around save to warn when save did not come from git backend (eg. commit or by parsing file). """ # Store number of words if not same_content or not self.num_words: self.num_words = len(self.get_source_plurals()[0].split()) # Actually save the unit super(Unit, self).save(**kwargs) # Update checks if content or fuzzy flag has changed if not same_content or not same_state: self.run_checks(same_state, same_content) # Update fulltext index if content has changed or this is a new unit if force_insert or not same_content: Fulltext.update_index_unit(self)
def clone_test_repos(self): dirs = ['test-repo.git', 'test-repo.hg', 'test-repo.svn'] # Remove possibly existing directories for name in dirs: path = self.get_repo_path(name) if os.path.exists(path): shutil.rmtree(path, onerror=remove_readonly) # Remove cached paths keys = ['git_repo_path', 'mercurial_repo_path', 'subversion_repo_path'] for key in keys: if key in self.__dict__: del self.__dict__[key] # Remove possibly existing project directory test_repo_path = os.path.join(settings.DATA_DIR, 'vcs', 'test') if os.path.exists(test_repo_path): shutil.rmtree(test_repo_path, onerror=remove_readonly) # Remove indexes Fulltext.cleanup()
def test_cleanup(self): orig_fake = Fulltext.FAKE Fulltext.FAKE = False fulltext = Fulltext() try: component = self.create_component() index = fulltext.get_source_index() self.assertEqual(len(list(index.reader().all_stored_fields())), 12) # Create dangling suggestion Suggestion.objects.create( project=component.project, content_hash=1, language=component.translation_set.all()[0].language, ) # Remove all translations Translation.objects.all().delete() call_command('cleanuptrans') self.assertEqual(Suggestion.objects.count(), 0) self.assertEqual(Source.objects.count(), 0) self.assertEqual(len(list(index.reader().all_stored_fields())), 0) finally: Fulltext.FAKE = orig_fake
def handle(self, *args, **options): fulltext = Fulltext() # Optimize index if options['optimize']: self.optimize_index(fulltext) return # Optionally rebuild indices from scratch if options['clean']: fulltext.cleanup() # Open writer source_writer = fulltext.get_source_index().writer() target_writers = {} try: # Process all units for unit in self.iterate_units(**options): lang = unit.translation.language.code # Lazy open writer if lang not in target_writers: target_writers[lang] = fulltext.get_target_index( lang ).writer() # Update target index if unit.translation: fulltext.update_target_unit_index( target_writers[lang], unit ) # Update source index fulltext.update_source_unit_index(source_writer, unit) finally: # Close all writers source_writer.commit() for code in target_writers: target_writers[code].commit()
def do_test(self): fulltext = Fulltext() fulltext.storage = self.storage sindex = fulltext.get_source_index() self.assertIsNotNone(sindex) tindex = fulltext.get_target_index('cs') self.assertIsNotNone(tindex) writer = sindex.writer() writer.update_document( pk=1, source="source", context="context", location="location", ) writer.commit() writer = tindex.writer() writer.update_document(pk=1, target="target", comment="comment") writer.commit() for item in ('source', 'context', 'location', 'target'): self.assertEqual(fulltext.search(item, ['cs'], {item: True}), set([1]))
def clone_test_repos(self): # Path where to clone remote repo for tests self.git_base_repo_path = os.path.join( settings.DATA_DIR, 'test-base-repo.git' ) # Repository on which tests will be performed self.git_repo_path = os.path.join( settings.DATA_DIR, 'test-repo.git' ) # Path where to clone remote repo for tests self.mercurial_base_repo_path = os.path.join( settings.DATA_DIR, 'test-base-repo.hg' ) # Repository on which tests will be performed self.mercurial_repo_path = os.path.join( settings.DATA_DIR, 'test-repo.hg' ) # Path where to clone remote repo for tests self.subversion_base_repo_path = os.path.join( settings.DATA_DIR, 'test-base-repo.svn' ) # Repository on which tests will be performed self.subversion_repo_path = os.path.join( settings.DATA_DIR, 'test-repo.svn' ) # Extract repo for testing self.optional_extract( self.git_base_repo_path, 'test-base-repo.git.tar' ) # Remove possibly existing directory if os.path.exists(self.git_repo_path): shutil.rmtree(self.git_repo_path, onerror=remove_readonly) # Create repository copy for the test shutil.copytree(self.git_base_repo_path, self.git_repo_path) # Extract repo for testing self.optional_extract( self.mercurial_base_repo_path, 'test-base-repo.hg.tar' ) # Remove possibly existing directory if os.path.exists(self.mercurial_repo_path): shutil.rmtree(self.mercurial_repo_path, onerror=remove_readonly) # Create repository copy for the test shutil.copytree( self.mercurial_base_repo_path, self.mercurial_repo_path ) # Extract repo for testing self.optional_extract( self.subversion_base_repo_path, 'test-base-repo.svn.tar' ) # Remove possibly existing directory if os.path.exists(self.subversion_repo_path): shutil.rmtree(self.subversion_repo_path, onerror=remove_readonly) # Create repository copy for the test shutil.copytree( self.subversion_base_repo_path, self.subversion_repo_path ) # Remove possibly existing project directory test_repo_path = os.path.join(settings.DATA_DIR, 'vcs', 'test') if os.path.exists(test_repo_path): shutil.rmtree(test_repo_path, onerror=remove_readonly) # Remove indexes Fulltext.cleanup()
def search(self, params, project=None, component=None, language=None, translation=None): """High level wrapper for searching.""" if translation is not None: component = translation.component language = translation.language if component is not None: project = component.project base = self.prefetch() if params['type'] != 'all': base = self.filter_type( params['type'], project, language, params.get('ignored', False) ) if (params.get('date') or params.get('exclude_user') or params.get('only_user')): base = base.review( params.get('date'), params.get('exclude_user'), params.get('only_user'), project, component, language, translation ) if 'lang' in params and params['lang']: base = base.filter(translation__language__code__in=params['lang']) if 'q' not in params or not params['q']: result = base elif params['search'] in ('exact', 'substring', 'regex'): queries = [] if params['search'] == 'exact': modifier = '__iexact' elif params['search'] == 'regex': modifier = '__regex' else: modifier = '__icontains' for param in SEARCH_FILTERS: if param in params and params[param]: queries.append(param) query = functools.reduce( lambda q, value: q | Q(**{'{0}{1}'.format(value, modifier): params['q']}), queries, Q() ) result = base.filter(query) else: langs = set(self.values_list( 'translation__language__code', flat=True )) result = base.filter( pk__in=Fulltext().search( params['q'], langs, params ) ) return result
def more_like_queue(pk, source, top, queue): """ Multiprocess wrapper around more_like. """ result = Fulltext().more_like(pk, source, top) queue.put(result)
def test_add(self): self.edit_unit('Hello, world!\n', 'Nazdar svete!\n') unit = self.get_translation().unit_set.get(source='Hello, world!\n', ) Fulltext.update_index_unit(unit) Fulltext.update_index_unit(unit)
def update_fulltext_index(self): command = UpdateIndexCommand() command.do_update(Fulltext(), 100000)
def clone_test_repos(self): # Path where to clone remote repo for tests self.git_base_repo_path = os.path.join(settings.DATA_DIR, 'test-base-repo.git') # Repository on which tests will be performed self.git_repo_path = os.path.join(settings.DATA_DIR, 'test-repo.git') # Path where to clone remote repo for tests self.mercurial_base_repo_path = os.path.join(settings.DATA_DIR, 'test-base-repo.hg') # Repository on which tests will be performed self.mercurial_repo_path = os.path.join(settings.DATA_DIR, 'test-repo.hg') # Path where to clone remote repo for tests self.subversion_base_repo_path = os.path.join(settings.DATA_DIR, 'test-base-repo.svn') # Repository on which tests will be performed self.subversion_repo_path = os.path.join(settings.DATA_DIR, 'test-repo.svn') # Extract repo for testing self.optional_extract(self.git_base_repo_path, 'test-base-repo.git.tar') # Remove possibly existing directory if os.path.exists(self.git_repo_path): shutil.rmtree(self.git_repo_path, onerror=remove_readonly) # Create repository copy for the test shutil.copytree(self.git_base_repo_path, self.git_repo_path) # Extract repo for testing self.optional_extract(self.mercurial_base_repo_path, 'test-base-repo.hg.tar') # Remove possibly existing directory if os.path.exists(self.mercurial_repo_path): shutil.rmtree(self.mercurial_repo_path, onerror=remove_readonly) # Create repository copy for the test shutil.copytree(self.mercurial_base_repo_path, self.mercurial_repo_path) # Extract repo for testing self.optional_extract(self.subversion_base_repo_path, 'test-base-repo.svn.tar') # Remove possibly existing directory if os.path.exists(self.subversion_repo_path): shutil.rmtree(self.subversion_repo_path, onerror=remove_readonly) # Create repository copy for the test shutil.copytree(self.subversion_base_repo_path, self.subversion_repo_path) # Remove possibly existing project directory test_repo_path = os.path.join(settings.DATA_DIR, 'vcs', 'test') if os.path.exists(test_repo_path): shutil.rmtree(test_repo_path, onerror=remove_readonly) # Remove indexes Fulltext.cleanup()