Ejemplo n.º 1
0
class TestDiskCostGuessIndex(CostModelTestCase):
    def setUp(self):
        CostModelTestCase.setUp(self)
        self.cm = DiskCostComponent(self.state)
        self.cm.no_index_size_estimation = False
        self.ops = []
        for sess in self.workload:
            map(self.ops.append, sess["operations"])

    def testGuessIndex_consistentAnswer(self):
        """Check that guessIndex always returns the same answer for the same input"""

        # initialize design
        d = Design()
        d.addCollection("apple")
        d.addIndex("apple", ["field00", "field01"])
        d.addIndex("apple", ["field01", "field00"])
        d.addIndex("apple", ["field00"])
        d.addIndex("apple", ["field01"])

        for i in xrange(len(self.ops) - 2):
            op = self.ops[i]
            last_index, last_covering = (None, None)
            for i in xrange(100):
                best_index, covering, index_size, slot_size = self.cm.guess_op_info(
                    d, op)
                self.assertIsNotNone(best_index)
                self.assertIsNotNone(covering)
                if not last_index is None:
                    self.assertEqual(last_index, best_index)
                    self.assertEqual(last_covering, covering)
                last_index, last_covering = (best_index, covering)
            ## FOR

    ## DEF

    def testGuessIndex_indexInIncorrectOrder(self):
        """
            Design with index (field01, field00)
            1. query uses index (field00)
            result: not using index because that query uses indexes in order
            2. query uses index (field01)
            result: using index (field01, field00) because this is the best match
            3. query uses index (field01, field00)
            result: using index (field01, field00) because they match the best

            Design with index (field00, field01)
            4. query uses index (field01, field00)
            result: using no index because the index order is not correct

            Design with index (field01, field02, field00)
            5. query uses index (field01, field00)
            result: using index (field01, field02, field00) because they match the best
            result: not cover index because the index order in design is not correct
        """
        # initialize design
        d = Design()
        d.addCollection("apple")
        d.addIndex("apple", ["field01", "field00"])

        # query 1: get query, queries on field00
        op = self.ops[0]

        # Guess index
        best_index, covering, index_size, slot_size = self.cm.guess_op_info(
            d, op)

        self.assertEqual(best_index, None)
        self.assertFalse(covering)

        # query 2: get query, queries on field01
        op = self.ops[1]

        # Guess index
        best_index, covering, index_size, slot_size = self.cm.guess_op_info(
            d, op)

        self.assertEqual(len(best_index), 2)
        self.assertEqual(best_index[0], "field01")
        self.assertEqual(best_index[1], "field00")
        self.assertFalse(covering)

        # query 3: get query, queries on field01 and field00
        op = self.ops[2]

        # Guess index
        best_index, covering, index_size, slot_size = self.cm.guess_op_info(
            d, op)

        self.assertEqual(len(best_index), 2)
        self.assertEqual(best_index[0], "field01")
        self.assertEqual(best_index[1], "field00")
        self.assertFalse(covering)

        # query 4:
        d = Design()
        d.addCollection("apple")
        d.addIndex("apple", ["field00", "field01"])

        op = self.ops[2]

        # Guess index
        best_index, covering, index_size, slot_size = self.cm.guess_op_info(
            d, op)

        self.assertEqual(len(best_index), 2)
        self.assertFalse(covering)

        # query 5:
        d = Design()
        d.addCollection("apple")
        d.addIndex("apple", ["field01", "field02", "field00"])

        op = self.ops[2]

        # Guess index
        best_index, covering, index_size, slot_size = self.cm.guess_op_info(
            d, op)

        self.assertEqual(len(best_index), 3)
        self.assertEqual(best_index[0], "field01")
        self.assertEqual(best_index[1], "field02")
        self.assertEqual(best_index[2], "field00")
        self.assertFalse(covering)

    def testGuessIndex_indexChooseTheMostMatch(self):
        """
            Design with index (field01, field00), (field01),
            1. query uses index (field01) without projection field
            result: using index (field01) because they match the most
            2. query used index (field01, field00) without projection field
            result: using index (field01, field00) because they match the most

            If we have a design building indexes on (field01) only
            3. query uses index (field01, field00) without projection field
            result: using index (field01) because they match the most

            If we have a design building indexes on (field01, field03, field00), (field01)
            4. query uses index (field01, field00)
            result: using index (field01) because field01 is shorter
        """
        # initialize design
        d = Design()
        d.addCollection("apple")
        d.addIndex("apple", ["field01", "field00"])
        d.addIndex("apple", ["field01"])

        # query 1: get query
        op = self.ops[1]

        # Guess index
        best_index, covering, index_size, slot_size = self.cm.guess_op_info(
            d, op)

        self.assertEqual(len(best_index), 1)
        self.assertEqual(best_index[0], 'field01')
        self.assertFalse(covering)

        # query 2:  get query
        op = self.ops[2]

        # Guess index
        best_index, covering, index_size, slot_size = self.cm.guess_op_info(
            d, op)

        self.assertEqual(len(best_index), 2)
        self.assertEqual(best_index[0], 'field01')
        self.assertEqual(best_index[1], 'field00')
        self.assertFalse(covering)

        ## query 3:
        d = Design()
        d.addCollection("apple")
        d.addIndex("apple", ["field01"])

        op = self.ops[2]

        # Guess index
        best_index, covering, index_size, slot_size = self.cm.guess_op_info(
            d, op)

        self.assertEqual(best_index[0], 'field01')
        self.assertFalse(covering)

        # query 4:
        d = Design()
        d.addCollection("apple")
        d.addIndex("apple", ["field01", "field03", "field00"])
        d.addIndex("apple", ["field01"])
        op = self.ops[2]

        # Guess index
        best_index, covering, index_size, slot_size = self.cm.guess_op_info(
            d, op)

        self.assertEqual(len(best_index), 1)
        self.assertEqual(best_index[0], 'field01')
        self.assertFalse(covering)

    def testGuessIndex_indexChooseWithProjectionField(self):
        """
            If a query uses one of the indexes the design has but its projection uses
            one of the indexes the design has, we should choose the index with both
            query index and projection index
        """
        # If we have a design with index (field00), (field00, field02)
        # 1. query uses field00 but its projection field is {field02: xx}
        # result: we should choose (field00, field02) as the best index

        # initialize design
        d = Design()
        d.addCollection("apple")
        d.addIndex("apple", ["field00", "field02"])
        d.addIndex("apple", ["field00"])

        op = self.ops[0]

        # Guess index
        best_index, covering, index_size, slot_size = self.cm.guess_op_info(
            d, op)

        self.assertEqual(len(best_index), 2)
        self.assertEqual(best_index[0], "field00")
        self.assertEqual(best_index[1], "field02")
        self.assertTrue(covering)

    ## DEF

    def testGuessIndex_indexChooseWithoutProjectionField(self):
        """
            If a query uses all the indexes but doesn't have a projection field,
            we still think it is not a covering index
        """
        # If we have a design with indexes(field00, field01)
        # 1. query uses (field00, field01) but there is no projection field
        # result: we should choose (field00, field02) but the index is not a covering index

        # initialize design
        d = Design()
        d.addCollection("apple")
        d.addIndex("apple", ["field00", "field01"])

        op = self.ops[3]

        # Guess index
        best_index, covering, index_size, slot_size = self.cm.guess_op_info(
            d, op)

        self.assertEqual(best_index[0], "field00")
        self.assertEqual(best_index[1], "field01")
        self.assertFalse(covering)

    ## DEF

    def testGuessIndex_IndexSizeEstimation(self):
        """
            Check if the size of the indexes vary
        """
        d = Design()
        d.addCollection("apple")

        d.addIndex("apple", ["field00"])
        d.addIndex("apple", ["field01"])
        d.addIndex("apple", ["field00", "field01"])

        # op0 use index (field00)
        op0 = self.ops[0]

        # op1 use index (field01)
        op1 = self.ops[1]

        # op2 use index (field01, field00)
        op2 = self.ops[2]

        # op3 use index (field00, field01)
        op3 = self.ops[3]

        # Guess index
        best_index, covering, index_size, slot_size = self.cm.guess_op_info(
            d, op0)
        self.assertEqual(24 + 8, index_size)

        best_index, covering, index_size, slot_size = self.cm.guess_op_info(
            d, op1)
        self.assertEqual(24 + 8, index_size)

        best_index, covering, index_size, slot_size = self.cm.guess_op_info(
            d, op2)
        self.assertEqual(24 + 24 + 8, index_size)

        best_index, covering, index_size, slot_size = self.cm.guess_op_info(
            d, op3)
        self.assertEqual(24 + 24 + 8, index_size)

    ## DEF

    def testGuessIndex_IndexSizeEstimation_Denormalization(self):
        """
            If collection A is denormalized into B, then the index for collection B should have larger size now
            (If and only if the index is built on a field that is included by both collection A and collection B)
        """
        d = Design()
        d.addCollection("apple")
        d.addCollection("microsoft")
        d.addCollection("google")

        d.addIndex("apple", ["field00"])
        d.addIndex("microsoft", ["field00"])
        d.addIndex("google", ["field00"])

        # op4 use index (field00) but it only goes to collection microsoft
        op4 = self.ops[4]

        # Guess index

        # Without denormalization
        best_index, covering, index_size_0, slot_size = self.cm.guess_op_info(
            d, op4)

        # With one denormalization
        d.setDenormalizationParent("apple", "microsoft")
        self.cm.buildEmbeddingCostDictionary(d)
        best_index, covering, index_size_1, slot_size = self.cm.guess_op_info(
            d, op4)

        self.assertGreater(index_size_1, index_size_0)

        # With chained denormalization
        self.cm.reset()
        d.setDenormalizationParent("google", "apple")
        self.cm.buildEmbeddingCostDictionary(d)
        best_index, covering, index_size_2, slot_size = self.cm.guess_op_info(
            d, op4)

        self.assertGreater(index_size_2, index_size_1)