def get_objects_to_dump(domain, excludes): """ :param domain: domain name to filter with :param app_list: List of (app_config, model) tuples to dump :param excluded_models: List of model classes to exclude :return: generator yielding models objects """ excluded_apps, excluded_models = get_excluded_apps_and_models(excludes) app_config_models = _get_app_list(excluded_apps) # Collate the objects to be serialized. for model in serializers.sort_dependencies(app_config_models.items()): if model in excluded_models: continue using = router.db_for_read(model) if settings.USE_PARTITIONED_DATABASE and using == partition_config.get_proxy_db( ): using = partition_config.get_form_processing_dbs() else: using = [using] for db_alias in using: if not model._meta.proxy and router.allow_migrate_model( db_alias, model): objects = model._default_manager queryset = objects.using(db_alias).order_by( model._meta.pk.name) filters = get_model_domain_filters(model, domain) for filter in filters: for obj in queryset.filter(filter).iterator(): yield obj
def test_settings(self): """ The tests in this class assume a certain partitioned setup to ensure the partitioning is working properly, so this test makes sure those assumptions are valid. """ self.assertEqual(len(settings.PARTITION_DATABASE_CONFIG['shards']), 2) self.assertIn(self.db1, settings.PARTITION_DATABASE_CONFIG['shards']) self.assertIn(self.db2, settings.PARTITION_DATABASE_CONFIG['shards']) self.assertEqual( settings.PARTITION_DATABASE_CONFIG['shards'][self.db1], [0, 1]) self.assertEqual( settings.PARTITION_DATABASE_CONFIG['shards'][self.db2], [2, 3]) self.assertEqual(set(partition_config.get_form_processing_dbs()), set([self.db1, self.db2])) self.assertEqual(ShardAccessor.get_database_for_doc(self.p1_uuid1), self.db1) self.assertEqual(ShardAccessor.get_database_for_doc(self.p1_uuid2), self.db1) self.assertEqual(ShardAccessor.get_database_for_doc(self.p1_uuid3), self.db1) self.assertEqual(ShardAccessor.get_database_for_doc(self.p2_uuid1), self.db2) self.assertEqual(ShardAccessor.get_database_for_doc(self.p2_uuid2), self.db2) self.assertEqual(ShardAccessor.get_database_for_doc(self.p2_uuid3), self.db2)
def run_query_across_partitioned_databases(model_class, q_expression, values=None): """ Runs a query across all partitioned databases and produces a generator with the results. :param model_class: A Django model class :param q_expression: An instance of django.db.models.Q representing the filter to apply :param values: (optional) If specified, should be a list of values to retrieve rather than retrieving entire objects. If a list with a single value is given, the result will be a generator of single values. If a list with multiple values is given, the result will be a generator of tuples. :return: A generator with the results """ if settings.USE_PARTITIONED_DATABASE: db_names = partition_config.get_form_processing_dbs() else: db_names = ['default'] if values and not isinstance(values, (list, tuple)): raise ValueError("Expected a list or tuple") for db_name in db_names: qs = model_class.objects.using(db_name).filter(q_expression) if values: if len(values) == 1: qs = qs.values_list(*values, flat=True) else: qs = qs.values_list(*values) for result in qs: yield result
def _get_docs_from_accessor(self, accessor, start, last_doc_pk=None, limit=500): all_docs = [] for from_db in partition_config.get_form_processing_dbs(): all_docs.extend(accessor.get_docs(from_db, start)) return all_docs
def test_settings(self): """ The tests in this class assume a certain partitioned setup to ensure the partitioning is working properly, so this test makes sure those assumptions are valid. """ self.assertEqual(len(settings.PARTITION_DATABASE_CONFIG['shards']), 2) self.assertIn(self.db1, settings.PARTITION_DATABASE_CONFIG['shards']) self.assertIn(self.db2, settings.PARTITION_DATABASE_CONFIG['shards']) self.assertEqual(settings.PARTITION_DATABASE_CONFIG['shards'][self.db1], [0, 1]) self.assertEqual(settings.PARTITION_DATABASE_CONFIG['shards'][self.db2], [2, 3]) self.assertEqual(set(partition_config.get_form_processing_dbs()), set([self.db1, self.db2]))
def get_all_model_querysets_for_domain(model_class, domain): using = router.db_for_read(model_class) if settings.USE_PARTITIONED_DATABASE and using == partition_config.get_proxy_db(): using = partition_config.get_form_processing_dbs() else: using = [using] for db_alias in using: if not model_class._meta.proxy and router.allow_migrate_model(db_alias, model_class): objects = model_class._default_manager queryset = objects.using(db_alias).order_by(model_class._meta.pk.name) filters = get_model_domain_filters(model_class, domain) for filter in filters: yield model_class, queryset.filter(filter)
def get_all_model_iterators_builders_for_domain(model_class, domain, limit_to_db=None): using = router.db_for_read(model_class) if settings.USE_PARTITIONED_DATABASE and using == partition_config.get_proxy_db(): using = partition_config.get_form_processing_dbs() else: using = [using] if limit_to_db: if limit_to_db not in using: raise DomainDumpError('DB specified is not valide for ' 'model class: {} not in {}'.format(limit_to_db, using)) using = [limit_to_db] for db_alias in using: if not model_class._meta.proxy and router.allow_migrate_model(db_alias, model_class): iterator_builder = APP_LABELS_WITH_FILTER_KWARGS_TO_DUMP[get_model_label(model_class)] yield model_class, iterator_builder.build(domain, model_class, db_alias)
def test_get_database_for_docs(self): # test that sharding 1000 docs gives a distribution withing some tolerance # (bit of a vague test) N = 1000 doc_ids = [str(i) for i in range(N)] doc_db_map = ShardAccessor.get_database_for_docs(doc_ids) doc_count_per_db = defaultdict(int) for db_alias in doc_db_map.values(): doc_count_per_db[db_alias] += 1 num_dbs = len(partition_config.get_form_processing_dbs()) even_split = int(N // num_dbs) tolerance = N * 0.05 # 5% tollerance diffs = [abs(even_split - count) for count in doc_count_per_db.values()] outliers = [diff for diff in diffs if diff > tolerance] message = 'partitioning not within tollerance: tolerance={}, diffs={}'.format(tolerance, diffs) self.assertEqual(len(outliers), 0, message)
def test_objects_only_in_one_db(self): case_id = uuid4().hex form = create_form_for_test(DOMAIN, case_id=case_id) dbs_with_form = [] dbs_with_case = [] for db in partition_config.get_form_processing_dbs(): form_in_db = XFormInstanceSQL.objects.using(db).filter(form_id=form.form_id).exists() if form_in_db: dbs_with_form.append(db) case_in_db = CommCareCaseSQL.objects.using(db).filter(case_id=case_id).exists() if case_in_db: dbs_with_case.append(db) self.assertEqual(1, len(dbs_with_form)) self.assertEqual(1, len(dbs_with_case))
def test_get_database_for_docs(self): # test that sharding 1000 docs gives a distribution withing some tolerance # (bit of a vague test) N = 1000 doc_ids = [str(i) for i in range(N)] doc_db_map = ShardAccessor.get_database_for_docs(doc_ids) doc_count_per_db = defaultdict(int) for db_alias in doc_db_map.values(): doc_count_per_db[db_alias] += 1 num_dbs = len(partition_config.get_form_processing_dbs()) even_split = int(N / num_dbs) tolerance = N * 0.05 # 5% tollerance diffs = [abs(even_split - count) for count in doc_count_per_db.values()] outliers = [diff for diff in diffs if diff > tolerance] message = 'partitioning not within tollerance: tolerance={}, diffs={}'.format(tolerance, diffs) self.assertEqual(len(outliers), 0, message)
def test_models_are_located_in_correct_dbs(self): main_db = partition_config.get_main_db() proxy_db = partition_config.get_proxy_db() partitioned_dbs = partition_config.get_form_processing_dbs() for model_class in self.get_scheduling_models(): # scheduling models exist in main db self.assertModelExists(model_class, main_db) # scheduling models do not exist in partitioned dbs for db in ([proxy_db] + partitioned_dbs): self.assertModelDoesNotExist(model_class, db) for model_class in self.get_scheduling_partitioned_models(): # scheduling partitioned models do not exist in main db self.assertModelDoesNotExist(model_class, main_db) # scheduling partitioned models exist in paritioned dbs for db in ([proxy_db] + partitioned_dbs): self.assertModelExists(model_class, db)
def test_models_are_located_in_correct_dbs(self, app_label, is_partitioned): main_db = partition_config.get_main_db() proxy_db = partition_config.get_proxy_db() partitioned_dbs = partition_config.get_form_processing_dbs() for model_class in self.get_models(app_label): if is_partitioned: # models do not exist in main db self.assertModelDoesNotExist(model_class, main_db) # models exist in paritioned dbs for db in ([proxy_db] + partitioned_dbs): self.assertModelExists(model_class, db) else: # models exist in main db self.assertModelExists(model_class, main_db) # models do not exist in partitioned dbs for db in ([proxy_db] + partitioned_dbs): self.assertModelDoesNotExist(model_class, db)
def test_objects_distributed_to_all_dbs(self): """ Rudimentary test to ensure that not all cases / forms get saved to the same DB. """ num_forms = 20 for i in range(num_forms): create_form_for_test(DOMAIN, case_id=uuid4().hex) forms_per_db = {} cases_per_db = {} for db in partition_config.get_form_processing_dbs(): forms_per_db[db] = XFormInstanceSQL.objects.using(db).filter(domain=DOMAIN).count() cases_per_db[db] = CommCareCaseSQL.objects.using(db).filter(domain=DOMAIN).count() self.assertEqual(num_forms, sum(forms_per_db.values()), forms_per_db) self.assertEqual(num_forms, sum(cases_per_db.values()), cases_per_db) self.assertTrue( all(num_forms_in_db < num_forms for num_forms_in_db in forms_per_db.values()), forms_per_db ) self.assertTrue( all(num_cases_in_db < num_forms for num_cases_in_db in cases_per_db.values()), cases_per_db )
def _get_db_list_to_query(): if settings.USE_PARTITIONED_DATABASE: return partition_config.get_form_processing_dbs() return [None]
def _analyse(cls): for db_alias in partition_config.get_form_processing_dbs(): db_cursor = connections[db_alias].cursor() with db_cursor as cursor: cursor.execute('ANALYSE') # the doc count query relies on this
def setUpClass(cls): if not settings.USE_PARTITIONED_DATABASE: # https://github.com/nose-devs/nose/issues/946 raise SkipTest('Only applicable if sharding is setup') super(ShardingTests, cls).setUpClass() assert len(partition_config.get_form_processing_dbs()) > 1
def _get_docs(self, start, last_doc_pk=None, limit=500): accessor = self.accessor_class() all_docs = [] for from_db in partition_config.get_form_processing_dbs(): all_docs.extend(accessor.get_docs(from_db, start)) return all_docs
def test_get_doc_count(self): doc_count = sum( self.accessor_class().get_doc_count(from_db) for from_db in partition_config.get_form_processing_dbs() ) self.assertEqual(8, doc_count)
def get_db_aliases_for_partitioned_query(): if settings.USE_PARTITIONED_DATABASE: db_names = partition_config.get_form_processing_dbs() else: db_names = ['default'] return db_names
def test_get_doc_count(self): doc_count = sum( self.accessor_class().get_doc_count(from_db) for from_db in partition_config.get_form_processing_dbs()) self.assertEqual(len(self.all_doc_ids), doc_count)
def tearDown(self): for db in partition_config.get_form_processing_dbs(): AlertScheduleInstance.objects.using(db).filter(domain=self.domain).delete() XFormInstanceSQL.objects.using(db).filter(domain=self.domain).delete()
def tearDown(self): for db in partition_config.get_form_processing_dbs(): AlertScheduleInstance.objects.using(db).filter( domain=self.domain).delete() TimedScheduleInstance.objects.using(db).filter( domain=self.domain).delete()