def calculate_checks(self, check_names, unit_fk_filter, store_fk_filter): logger.info('Calculating quality checks for all units...') QualityCheck.delete_unknown_checks() checks = QualityCheck.objects.filter(**unit_fk_filter) if check_names: checks = checks.filter(name__in=check_names) checks = checks.values('id', 'name', 'unit_id', 'category', 'false_positive') all_units_checks = {} for check in checks: all_units_checks.setdefault(check['unit_id'], {})[check['name']] = check unit_count = 0 units = Unit.simple_objects.select_related('store') units.query.clear_ordering(True) for unit in units.filter(**store_fk_filter).iterator(): unit_count += 1 unit_checks = {} if unit.id in all_units_checks: unit_checks = all_units_checks[unit.id] if unit.update_qualitychecks(keep_false_positives=True, check_names=check_names, existing=unit_checks): # update unit.mtime # TODO: add new action type `quality checks were updated`? Unit.simple_objects.filter(id=unit.id).update(mtime=timezone.now()) if unit_count % 10000 == 0: logger.info("%d units processed" % unit_count)
def clear_checks(self): QualityCheck.delete_unknown_checks()
def calculate_checks(check_names=None, translation_project=None): store_fk_filter = {} unit_fk_filter = {} if translation_project is not None: store_fk_filter = { 'store__translation_project': translation_project, } unit_fk_filter = { 'unit__store__translation_project': translation_project, } logging.info('Calculating quality checks for all units...') QualityCheck.delete_unknown_checks() checks = QualityCheck.objects.filter(**unit_fk_filter) if check_names is not None: checks = checks.filter(name__in=check_names) checks = checks.values('id', 'name', 'unit_id', 'category', 'false_positive') all_units_checks = {} for check in checks: all_units_checks.setdefault(check['unit_id'], {})[check['name']] = check unit_filter = { 'state__gt': OBSOLETE } unit_filter.update(store_fk_filter) # unit's query is faster without `select_related('store')` units = Unit.simple_objects.filter(**unit_filter) \ .order_by('store__id') store = None # units are ordered by store, we update dirty cache after we switch # to another store for unit_count, unit in enumerate(units.iterator(), start=1): if store is None or unit.store_id != store.id: if store is not None: store.update_dirty_cache() # we get unit.store only if the store differs from previous store = Store.simple_objects.get(id=unit.store_id) # HACKISH: set unit.store to avoid extra querying in # `unit.update_quality_checks()` method unit.store = store unit_checks = {} if unit.id in all_units_checks: unit_checks = all_units_checks[unit.id] if unit.update_qualitychecks(keep_false_positives=True, check_names=check_names, existing=unit_checks): # update unit.mtime but avoid to use unit.save() # because it can trigger unnecessary things: # logging, stats cache updating # TODO: add new action type `quality checks were updated`? Unit.simple_objects.filter(id=unit.id).update(mtime=timezone.now()) if unit_count % 10000 == 0: logging.info("%d units processed" % unit_count) if store is not None: store.update_dirty_cache()
def process(self, **options): calculate_checks = options.get('calculate_checks', False) calculate_wordcount = options.get('calculate_wordcount', False) check_names = options.get('check_names', []) store_filter = options.get('store_filter', {}) unit_fk_filter = options.get('unit_fk_filter', {}) store_fk_filter = options.get('store_fk_filter', {}) logging.info('Initializing stores...') stores = Store.objects.all() if store_filter: stores = stores.filter(**store_filter) self._init_stores(stores) # if check_names is non-empty then stats for only these checks # will be updated if not check_names: self._init_stats() self._init_checks() if calculate_checks: logging.info('Calculating quality checks for all units...') QualityCheck.delete_unknown_checks() unit_count = 0 for i, store in enumerate(stores.iterator(), start=1): logging.info("update_qualitychecks for %s" % store.pootle_path) for unit in store.units.iterator(): unit_count += 1 unit.update_qualitychecks(keep_false_positives=True, check_names=check_names) if i % 20 == 0: logging.info("%d units processed" % unit_count) if calculate_wordcount: logging.info('Calculating wordcount for all units...') unit_count = 0 for i, store in enumerate(stores.iterator(), start=1): logging.info("calculate wordcount for %s" % store.pootle_path) for unit in store.unit_set.iterator(): unit_count += 1 unit.update_wordcount() unit.save() if i % 20 == 0: logging.info("%d units processed" % unit_count) logging.info('Setting quality check stats values for all stores...') self._set_qualitycheck_stats(unit_fk_filter) if not check_names: logging.info('Setting last action values for all stores...') self._set_last_action_stats(store_fk_filter) logging.info('Setting last updated values for all stores...') self._set_last_updated_stats(store_fk_filter) logging.info('Setting mtime values for all stores...') self._set_mtime_stats(store_fk_filter) logging.info('Setting wordcount stats values for all stores...') self._set_wordcount_stats(store_fk_filter) logging.info('Setting suggestion count values for all stores...') self._set_suggestion_stats(unit_fk_filter) logging.info('Setting empty values for other cache entries...') self._set_empty_values()
def calculate_checks(check_names=None, translation_project=None): store_fk_filter = {} unit_fk_filter = {} if translation_project is not None: store_fk_filter = { 'store__translation_project': translation_project, } unit_fk_filter = { 'unit__store__translation_project': translation_project, } logging.info('Calculating quality checks for all units...') QualityCheck.delete_unknown_checks() checks = QualityCheck.objects.filter(**unit_fk_filter) if check_names is not None: checks = checks.filter(name__in=check_names) checks = checks.values('id', 'name', 'unit_id', 'category', 'false_positive') all_units_checks = {} for check in checks: all_units_checks.setdefault(check['unit_id'], {})[check['name']] = check unit_filter = {'state__gt': OBSOLETE} unit_filter.update(store_fk_filter) # unit's query is faster without `select_related('store')` units = Unit.simple_objects.filter(**unit_filter) \ .order_by('store__id') store = None # units are ordered by store, we update dirty cache after we switch # to another store for unit_count, unit in enumerate(units.iterator(), start=1): if store is None or unit.store_id != store.id: if store is not None: store.update_dirty_cache() # we get unit.store only if the store differs from previous store = Store.simple_objects.get(id=unit.store_id) # HACKISH: set unit.store to avoid extra querying in # `unit.update_quality_checks()` method unit.store = store unit_checks = {} if unit.id in all_units_checks: unit_checks = all_units_checks[unit.id] if unit.update_qualitychecks(keep_false_positives=True, check_names=check_names, existing=unit_checks): # update unit.mtime but avoid to use unit.save() # because it can trigger unnecessary things: # logging, stats cache updating # TODO: add new action type `quality checks were updated`? Unit.simple_objects.filter(id=unit.id).update(mtime=timezone.now()) if unit_count % 10000 == 0: logging.info("%d units processed" % unit_count) if store is not None: store.update_dirty_cache()