예제 #1
0
def get_objects_to_dump(domain, excludes):
    """
    :param domain: domain name to filter with
    :param app_list: List of (app_config, model) tuples to dump
    :param excluded_models: List of model classes to exclude
    :return: generator yielding models objects
    """
    excluded_apps, excluded_models = get_excluded_apps_and_models(excludes)
    app_config_models = _get_app_list(excluded_apps)

    # Collate the objects to be serialized.
    for model in serializers.sort_dependencies(app_config_models.items()):
        if model in excluded_models:
            continue

        using = router.db_for_read(model)
        if settings.USE_PARTITIONED_DATABASE and using == partition_config.get_proxy_db(
        ):
            using = partition_config.get_form_processing_dbs()
        else:
            using = [using]

        for db_alias in using:
            if not model._meta.proxy and router.allow_migrate_model(
                    db_alias, model):
                objects = model._default_manager

                queryset = objects.using(db_alias).order_by(
                    model._meta.pk.name)

                filters = get_model_domain_filters(model, domain)
                for filter in filters:
                    for obj in queryset.filter(filter).iterator():
                        yield obj
 def test_settings(self):
     """
     The tests in this class assume a certain partitioned setup to ensure the
     partitioning is working properly, so this test makes sure those assumptions
     are valid.
     """
     self.assertEqual(len(settings.PARTITION_DATABASE_CONFIG['shards']), 2)
     self.assertIn(self.db1, settings.PARTITION_DATABASE_CONFIG['shards'])
     self.assertIn(self.db2, settings.PARTITION_DATABASE_CONFIG['shards'])
     self.assertEqual(
         settings.PARTITION_DATABASE_CONFIG['shards'][self.db1], [0, 1])
     self.assertEqual(
         settings.PARTITION_DATABASE_CONFIG['shards'][self.db2], [2, 3])
     self.assertEqual(set(partition_config.get_form_processing_dbs()),
                      set([self.db1, self.db2]))
     self.assertEqual(ShardAccessor.get_database_for_doc(self.p1_uuid1),
                      self.db1)
     self.assertEqual(ShardAccessor.get_database_for_doc(self.p1_uuid2),
                      self.db1)
     self.assertEqual(ShardAccessor.get_database_for_doc(self.p1_uuid3),
                      self.db1)
     self.assertEqual(ShardAccessor.get_database_for_doc(self.p2_uuid1),
                      self.db2)
     self.assertEqual(ShardAccessor.get_database_for_doc(self.p2_uuid2),
                      self.db2)
     self.assertEqual(ShardAccessor.get_database_for_doc(self.p2_uuid3),
                      self.db2)
예제 #3
0
def run_query_across_partitioned_databases(model_class, q_expression, values=None):
    """
    Runs a query across all partitioned databases and produces a generator
    with the results.

    :param model_class: A Django model class

    :param q_expression: An instance of django.db.models.Q representing the
    filter to apply

    :param values: (optional) If specified, should be a list of values to retrieve rather
    than retrieving entire objects. If a list with a single value is given, the result will
    be a generator of single values. If a list with multiple values is given, the result
    will be a generator of tuples.

    :return: A generator with the results
    """
    if settings.USE_PARTITIONED_DATABASE:
        db_names = partition_config.get_form_processing_dbs()
    else:
        db_names = ['default']

    if values and not isinstance(values, (list, tuple)):
        raise ValueError("Expected a list or tuple")

    for db_name in db_names:
        qs = model_class.objects.using(db_name).filter(q_expression)
        if values:
            if len(values) == 1:
                qs = qs.values_list(*values, flat=True)
            else:
                qs = qs.values_list(*values)

        for result in qs:
            yield result
 def _get_docs_from_accessor(self,
                             accessor,
                             start,
                             last_doc_pk=None,
                             limit=500):
     all_docs = []
     for from_db in partition_config.get_form_processing_dbs():
         all_docs.extend(accessor.get_docs(from_db, start))
     return all_docs
예제 #5
0
    def test_settings(self):
        """
        The tests in this class assume a certain partitioned setup to ensure the
        partitioning is working properly, so this test makes sure those assumptions
        are valid.
        """

        self.assertEqual(len(settings.PARTITION_DATABASE_CONFIG['shards']), 2)
        self.assertIn(self.db1, settings.PARTITION_DATABASE_CONFIG['shards'])
        self.assertIn(self.db2, settings.PARTITION_DATABASE_CONFIG['shards'])
        self.assertEqual(settings.PARTITION_DATABASE_CONFIG['shards'][self.db1], [0, 1])
        self.assertEqual(settings.PARTITION_DATABASE_CONFIG['shards'][self.db2], [2, 3])
        self.assertEqual(set(partition_config.get_form_processing_dbs()), set([self.db1, self.db2]))
예제 #6
0
def get_all_model_querysets_for_domain(model_class, domain):
    using = router.db_for_read(model_class)
    if settings.USE_PARTITIONED_DATABASE and using == partition_config.get_proxy_db():
        using = partition_config.get_form_processing_dbs()
    else:
        using = [using]

    for db_alias in using:
        if not model_class._meta.proxy and router.allow_migrate_model(db_alias, model_class):
            objects = model_class._default_manager

            queryset = objects.using(db_alias).order_by(model_class._meta.pk.name)

            filters = get_model_domain_filters(model_class, domain)
            for filter in filters:
                yield model_class, queryset.filter(filter)
예제 #7
0
def get_all_model_querysets_for_domain(model_class, domain):
    using = router.db_for_read(model_class)
    if settings.USE_PARTITIONED_DATABASE and using == partition_config.get_proxy_db():
        using = partition_config.get_form_processing_dbs()
    else:
        using = [using]

    for db_alias in using:
        if not model_class._meta.proxy and router.allow_migrate_model(db_alias, model_class):
            objects = model_class._default_manager

            queryset = objects.using(db_alias).order_by(model_class._meta.pk.name)

            filters = get_model_domain_filters(model_class, domain)
            for filter in filters:
                yield model_class, queryset.filter(filter)
예제 #8
0
파일: dump.py 프로젝트: xbryanc/commcare-hq
def get_all_model_iterators_builders_for_domain(model_class, domain, limit_to_db=None):
    using = router.db_for_read(model_class)
    if settings.USE_PARTITIONED_DATABASE and using == partition_config.get_proxy_db():
        using = partition_config.get_form_processing_dbs()
    else:
        using = [using]

    if limit_to_db:
        if limit_to_db not in using:
            raise DomainDumpError('DB specified is not valide for '
                                  'model class: {} not in {}'.format(limit_to_db, using))
        using = [limit_to_db]

    for db_alias in using:
        if not model_class._meta.proxy and router.allow_migrate_model(db_alias, model_class):
            iterator_builder = APP_LABELS_WITH_FILTER_KWARGS_TO_DUMP[get_model_label(model_class)]
            yield model_class, iterator_builder.build(domain, model_class, db_alias)
예제 #9
0
    def test_get_database_for_docs(self):
        # test that sharding 1000 docs gives a distribution withing some tolerance
        # (bit of a vague test)
        N = 1000
        doc_ids = [str(i) for i in range(N)]
        doc_db_map = ShardAccessor.get_database_for_docs(doc_ids)
        doc_count_per_db = defaultdict(int)
        for db_alias in doc_db_map.values():
            doc_count_per_db[db_alias] += 1

        num_dbs = len(partition_config.get_form_processing_dbs())
        even_split = int(N // num_dbs)
        tolerance = N * 0.05  # 5% tollerance
        diffs = [abs(even_split - count) for count in doc_count_per_db.values()]
        outliers = [diff for diff in diffs if diff > tolerance]
        message = 'partitioning not within tollerance: tolerance={}, diffs={}'.format(tolerance, diffs)
        self.assertEqual(len(outliers), 0, message)
예제 #10
0
    def test_objects_only_in_one_db(self):
        case_id = uuid4().hex
        form = create_form_for_test(DOMAIN, case_id=case_id)

        dbs_with_form = []
        dbs_with_case = []
        for db in partition_config.get_form_processing_dbs():
            form_in_db = XFormInstanceSQL.objects.using(db).filter(form_id=form.form_id).exists()
            if form_in_db:
                dbs_with_form.append(db)

            case_in_db = CommCareCaseSQL.objects.using(db).filter(case_id=case_id).exists()
            if case_in_db:
                dbs_with_case.append(db)

        self.assertEqual(1, len(dbs_with_form))
        self.assertEqual(1, len(dbs_with_case))
예제 #11
0
    def test_get_database_for_docs(self):
        # test that sharding 1000 docs gives a distribution withing some tolerance
        # (bit of a vague test)
        N = 1000
        doc_ids = [str(i) for i in range(N)]
        doc_db_map = ShardAccessor.get_database_for_docs(doc_ids)
        doc_count_per_db = defaultdict(int)
        for db_alias in doc_db_map.values():
            doc_count_per_db[db_alias] += 1

        num_dbs = len(partition_config.get_form_processing_dbs())
        even_split = int(N / num_dbs)
        tolerance = N * 0.05  # 5% tollerance
        diffs = [abs(even_split - count) for count in doc_count_per_db.values()]
        outliers = [diff for diff in diffs if diff > tolerance]
        message = 'partitioning not within tollerance: tolerance={}, diffs={}'.format(tolerance, diffs)
        self.assertEqual(len(outliers), 0, message)
예제 #12
0
    def test_objects_only_in_one_db(self):
        case_id = uuid4().hex
        form = create_form_for_test(DOMAIN, case_id=case_id)

        dbs_with_form = []
        dbs_with_case = []
        for db in partition_config.get_form_processing_dbs():
            form_in_db = XFormInstanceSQL.objects.using(db).filter(form_id=form.form_id).exists()
            if form_in_db:
                dbs_with_form.append(db)

            case_in_db = CommCareCaseSQL.objects.using(db).filter(case_id=case_id).exists()
            if case_in_db:
                dbs_with_case.append(db)

        self.assertEqual(1, len(dbs_with_form))
        self.assertEqual(1, len(dbs_with_case))
예제 #13
0
파일: dump.py 프로젝트: dimagi/commcare-hq
def get_all_model_iterators_builders_for_domain(model_class, domain, limit_to_db=None):
    using = router.db_for_read(model_class)
    if settings.USE_PARTITIONED_DATABASE and using == partition_config.get_proxy_db():
        using = partition_config.get_form_processing_dbs()
    else:
        using = [using]

    if limit_to_db:
        if limit_to_db not in using:
            raise DomainDumpError('DB specified is not valide for '
                                  'model class: {} not in {}'.format(limit_to_db, using))
        using = [limit_to_db]

    for db_alias in using:
        if not model_class._meta.proxy and router.allow_migrate_model(db_alias, model_class):
            iterator_builder = APP_LABELS_WITH_FILTER_KWARGS_TO_DUMP[get_model_label(model_class)]
            yield model_class, iterator_builder.build(domain, model_class, db_alias)
    def test_models_are_located_in_correct_dbs(self):
        main_db = partition_config.get_main_db()
        proxy_db = partition_config.get_proxy_db()
        partitioned_dbs = partition_config.get_form_processing_dbs()

        for model_class in self.get_scheduling_models():
            # scheduling models exist in main db
            self.assertModelExists(model_class, main_db)

            # scheduling models do not exist in partitioned dbs
            for db in ([proxy_db] + partitioned_dbs):
                self.assertModelDoesNotExist(model_class, db)

        for model_class in self.get_scheduling_partitioned_models():
            # scheduling partitioned models do not exist in main db
            self.assertModelDoesNotExist(model_class, main_db)

            # scheduling partitioned models exist in paritioned dbs
            for db in ([proxy_db] + partitioned_dbs):
                self.assertModelExists(model_class, db)
def test_models_are_located_in_correct_dbs(self, app_label, is_partitioned):
    main_db = partition_config.get_main_db()
    proxy_db = partition_config.get_proxy_db()
    partitioned_dbs = partition_config.get_form_processing_dbs()

    for model_class in self.get_models(app_label):
        if is_partitioned:
            # models do not exist in main db
            self.assertModelDoesNotExist(model_class, main_db)

            # models exist in paritioned dbs
            for db in ([proxy_db] + partitioned_dbs):
                self.assertModelExists(model_class, db)
        else:
            # models exist in main db
            self.assertModelExists(model_class, main_db)

            # models do not exist in partitioned dbs
            for db in ([proxy_db] + partitioned_dbs):
                self.assertModelDoesNotExist(model_class, db)
예제 #16
0
def test_models_are_located_in_correct_dbs(self, app_label, is_partitioned):
    main_db = partition_config.get_main_db()
    proxy_db = partition_config.get_proxy_db()
    partitioned_dbs = partition_config.get_form_processing_dbs()

    for model_class in self.get_models(app_label):
        if is_partitioned:
            # models do not exist in main db
            self.assertModelDoesNotExist(model_class, main_db)

            # models exist in paritioned dbs
            for db in ([proxy_db] + partitioned_dbs):
                self.assertModelExists(model_class, db)
        else:
            # models exist in main db
            self.assertModelExists(model_class, main_db)

            # models do not exist in partitioned dbs
            for db in ([proxy_db] + partitioned_dbs):
                self.assertModelDoesNotExist(model_class, db)
예제 #17
0
    def test_objects_distributed_to_all_dbs(self):
        """
        Rudimentary test to ensure that not all cases / forms get saved to the same DB.
        """
        num_forms = 20
        for i in range(num_forms):
            create_form_for_test(DOMAIN, case_id=uuid4().hex)

        forms_per_db = {}
        cases_per_db = {}
        for db in partition_config.get_form_processing_dbs():
            forms_per_db[db] = XFormInstanceSQL.objects.using(db).filter(domain=DOMAIN).count()
            cases_per_db[db] = CommCareCaseSQL.objects.using(db).filter(domain=DOMAIN).count()

        self.assertEqual(num_forms, sum(forms_per_db.values()), forms_per_db)
        self.assertEqual(num_forms, sum(cases_per_db.values()), cases_per_db)
        self.assertTrue(
            all(num_forms_in_db < num_forms for num_forms_in_db in forms_per_db.values()),
            forms_per_db
        )
        self.assertTrue(
            all(num_cases_in_db < num_forms for num_cases_in_db in cases_per_db.values()),
            cases_per_db
        )
예제 #18
0
    def test_objects_distributed_to_all_dbs(self):
        """
        Rudimentary test to ensure that not all cases / forms get saved to the same DB.
        """
        num_forms = 20
        for i in range(num_forms):
            create_form_for_test(DOMAIN, case_id=uuid4().hex)

        forms_per_db = {}
        cases_per_db = {}
        for db in partition_config.get_form_processing_dbs():
            forms_per_db[db] = XFormInstanceSQL.objects.using(db).filter(domain=DOMAIN).count()
            cases_per_db[db] = CommCareCaseSQL.objects.using(db).filter(domain=DOMAIN).count()

        self.assertEqual(num_forms, sum(forms_per_db.values()), forms_per_db)
        self.assertEqual(num_forms, sum(cases_per_db.values()), cases_per_db)
        self.assertTrue(
            all(num_forms_in_db < num_forms for num_forms_in_db in forms_per_db.values()),
            forms_per_db
        )
        self.assertTrue(
            all(num_cases_in_db < num_forms for num_cases_in_db in cases_per_db.values()),
            cases_per_db
        )
예제 #19
0
def _get_db_list_to_query():
    if settings.USE_PARTITIONED_DATABASE:
        return partition_config.get_form_processing_dbs()
    return [None]
예제 #20
0
 def _analyse(cls):
     for db_alias in partition_config.get_form_processing_dbs():
         db_cursor = connections[db_alias].cursor()
         with db_cursor as cursor:
             cursor.execute('ANALYSE')  # the doc count query relies on this
예제 #21
0
 def setUpClass(cls):
     if not settings.USE_PARTITIONED_DATABASE:
         # https://github.com/nose-devs/nose/issues/946
         raise SkipTest('Only applicable if sharding is setup')
     super(ShardingTests, cls).setUpClass()
     assert len(partition_config.get_form_processing_dbs()) > 1
예제 #22
0
 def _get_docs(self, start, last_doc_pk=None, limit=500):
     accessor = self.accessor_class()
     all_docs = []
     for from_db in partition_config.get_form_processing_dbs():
         all_docs.extend(accessor.get_docs(from_db, start))
     return all_docs
예제 #23
0
 def test_get_doc_count(self):
     doc_count = sum(
         self.accessor_class().get_doc_count(from_db)
         for from_db in partition_config.get_form_processing_dbs()
     )
     self.assertEqual(8, doc_count)
예제 #24
0
파일: util.py 프로젝트: mekete/commcare-hq
def get_db_aliases_for_partitioned_query():
    if settings.USE_PARTITIONED_DATABASE:
        db_names = partition_config.get_form_processing_dbs()
    else:
        db_names = ['default']
    return db_names
 def _analyse(cls):
     for db_alias in partition_config.get_form_processing_dbs():
         db_cursor = connections[db_alias].cursor()
         with db_cursor as cursor:
             cursor.execute('ANALYSE')  # the doc count query relies on this
예제 #26
0
 def setUpClass(cls):
     if not settings.USE_PARTITIONED_DATABASE:
         # https://github.com/nose-devs/nose/issues/946
         raise SkipTest('Only applicable if sharding is setup')
     super(ShardingTests, cls).setUpClass()
     assert len(partition_config.get_form_processing_dbs()) > 1
 def test_get_doc_count(self):
     doc_count = sum(
         self.accessor_class().get_doc_count(from_db)
         for from_db in partition_config.get_form_processing_dbs())
     self.assertEqual(len(self.all_doc_ids), doc_count)
예제 #28
0
 def tearDown(self):
     for db in partition_config.get_form_processing_dbs():
         AlertScheduleInstance.objects.using(db).filter(domain=self.domain).delete()
         XFormInstanceSQL.objects.using(db).filter(domain=self.domain).delete()
 def tearDown(self):
     for db in partition_config.get_form_processing_dbs():
         AlertScheduleInstance.objects.using(db).filter(
             domain=self.domain).delete()
         TimedScheduleInstance.objects.using(db).filter(
             domain=self.domain).delete()
예제 #30
0
파일: util.py 프로젝트: kkrampa/commcare-hq
def get_db_aliases_for_partitioned_query():
    if settings.USE_PARTITIONED_DATABASE:
        db_names = partition_config.get_form_processing_dbs()
    else:
        db_names = ['default']
    return db_names