Beispiel #1
0
 def __init__(self, p1, p2):
     super(ProductPlan, self).__init__()
     self.p1 = p1
     self.p2 = p2
     self._schema = Schema()
     self._schema.addAll(p1.schema())
     self._schema.addAll(p2.schema())
    def __init__(self, tx, lhs, rhs):
        super(MultibufferProductPlan, self).__init__()
        self.tx = tx
        self.lhs = MaterializePlan(tx, lhs)
        self.rhs = rhs

        self.schema = Schema()
        self.schema.addAll(lhs.schema())
        self.schema.addAll(rhs.schema())
Beispiel #3
0
 def __init__(self, p1, p2, ii, joinfield):
     super(IndexJoinPlan, self).__init__()
     self.p1 = p1
     self.p2 = p2
     self.ii = ii
     self.joinfield = joinfield
     self.sch = Schema()
     self.sch.addAll(p1.schema())
     self.sch.addAll(p2.schema())
Beispiel #4
0
 def __init__(self, tx, p, groupfields, aggfns):
     super(GroupByPlan, self).__init__()
     self.p = SortPlan(tx, p, groupfields)
     self.groupfields = groupfields
     self.aggfns = aggfns
     self.sch = Schema()
     for fldname in groupfields:
         self.sch.add(fldname, p.schema())
     for fn in aggfns:
         self.sch.addIntField(fn.fieldName())
Beispiel #5
0
 def createIdxLayout(self):
     sch = Schema()
     sch.addIntField("block")
     sch.addIntField("id")
     if self.tblSchema.type(self.fldname) == INTEGER:
         sch.addIntField("dataval")
     else:
         fldlen = self.tblSchema.length(self.fldname)
         sch.addStringField("dataval", fldlen)
     return Layout(sch)
Beispiel #6
0
class ProjectPlan(Plan):

    #
    #     * Creates a new project node in the query tree,
    #     * having the specified subquery and field list.
    #     * @param p the subquery
    #     * @param fieldlist the list of fields
    #
    def __init__(self, p, fieldlist):
        super(ProjectPlan, self).__init__()
        self.p = p
        self._schema = Schema()
        for fldname in fieldlist:
            self._schema.add(fldname, p.schema())

    #
    #     * Creates a project scan for this query.
    #     * @see Plan#open()
    #
    def open(self):
        s = self.p.open()
        return ProjectScan(s, self._schema.fields())

    #
    #     * Estimates the number of block accesses in the projection,
    #     * which is the same as in the underlying query.
    #     * @see Plan#blocksAccessed()
    #
    def blocksAccessed(self):
        return self.p.blocksAccessed()

    #
    #     * Estimates the number of output records in the projection,
    #     * which is the same as in the underlying query.
    #     * @see Plan#recordsOutput()
    #
    def recordsOutput(self):
        return self.p.recordsOutput()

    #
    #     * Estimates the number of distinct field values
    #     * in the projection,
    #     * which is the same as in the underlying query.
    #     * @see Plan#distinctValues(String)
    #
    def distinctValues(self, fldname):
        return self.p.distinctValues(fldname)

    #
    #     * Returns the schema of the projection,
    #     * which is taken from the field list.
    #     * @see Plan#schema()
    #
    def schema(self):
        return self._schema
Beispiel #7
0
    def __init__(self, tx, p1, p2, fldname1, fldname2):
        super(MergeJoinPlan, self).__init__()
        self.fldname1 = fldname1
        sortlist1 = [fldname1]
        self.p1 = SortPlan(tx, p1, sortlist1)

        self.fldname2 = fldname2
        sortlist2 = [fldname2]
        self.p2 = SortPlan(tx, p2, sortlist2)

        self.sch = Schema()
        self.sch.addAll(p1.schema())
        self.sch.addAll(p2.schema())
Beispiel #8
0
 def joinSubPred(self, sch1, sch2):
     result = Predicate()
     newsch = Schema()
     newsch.addAll(sch1)
     newsch.addAll(sch2)
     for t in self.terms:
         if not t.appliesTo(sch1) and not t.appliesTo(sch2) and t.appliesTo(
                 newsch):
             result.terms.append(t)
     if len(result.terms) == 0:
         return None
     else:
         return result
Beispiel #9
0
    def __init__(self, tx, idxname, leafLayout):
        super(BTreeIndex, self).__init__()
        self.tx = tx
        #  deal with the leaves
        self.leaftbl = idxname + "leaf"
        self.leafLayout = leafLayout
        if tx.size(self.leaftbl) == 0:
            blk = tx.append(self.leaftbl)
            node = BTPage(tx, blk, leafLayout)
            node.format(blk, -1)

        #  deal with the directory
        dirsch = Schema()
        dirsch.add("block", leafLayout.schema())
        dirsch.add("dataval", leafLayout.schema())
        dirtbl = idxname + "dir"
        self.dirLayout = Layout(dirsch)
        self.rootblk = BlockId(dirtbl, 0)
        if tx.size(dirtbl) == 0:
            #  create new root block
            tx.append(dirtbl)
            node = BTPage(tx, self.rootblk, self.dirLayout)
            node.format(self.rootblk, 0)
            #  insert initial directory entry
            fldtype = dirsch.type("dataval")
            minval = Constant(
                Integer.MIN_VALUE) if fldtype == INTEGER else Constant("")
            node.insertDir(0, minval, 0)
            node.close()
Beispiel #10
0
 def getLayout(self, tblname, tx):
     size = -1
     tcat = TableScan(tx, "tblcat", self.tcatLayout)
     while tcat.next():
         if tcat.getString("tblname") == tblname:
             size = tcat.getInt("slotsize")
             break
     tcat.close()
     sch = Schema()
     offsets = {}
     fcat = TableScan(tx, "fldcat", self.fcatLayout)
     while fcat.next():
         if fcat.getString("tblname") == tblname:
             fldname = fcat.getString("fldname")
             fldtype = fcat.getInt("type")
             fldlen = fcat.getInt("length")
             offset = fcat.getInt("offset")
             offsets[fldname] = offset
             sch.addField(fldname, fldtype, fldlen)
     fcat.close()
     return Layout(sch, offsets, size)
Beispiel #11
0
    def main(cls, args):
        # db = SimpleDB("tblmgrtest", 400, 8)
        fm = FileMgr(File("tblmgrtest"), 400)
        lm = LogMgr(fm, "simpledb.log")
        bm = BufferMgr(fm, lm, 8)
        tx = Transaction(fm, lm, bm)

        tm = TableMgr(True, tx)
        sch = Schema()
        sch.addIntField("A")
        sch.addStringField("B", 9)
        tm.createTable("MyTable", sch, tx)

        layout = tm.getLayout("MyTable", tx)
        size = layout.slotSize()
        sch2 = layout.schema()
        print("MyTable has slot size " + str(size))
        print("Its fields are:")
        for fldname in sch2.fields():
            if sch2.type(fldname) == INTEGER:
                _type = "int"
            else:
                strlen = sch2.length(fldname)
                _type = "varchar(" + str(strlen) + ")"
            print(fldname + ": " + _type)
        tx.commit()
Beispiel #12
0
 def __init__(self, isNew, tblMgr, tx):
     self.tblMgr = tblMgr
     if isNew:
         sch = Schema()
         sch.addStringField("viewname", TableMgr.MAX_NAME)
         sch.addStringField("viewdef", ViewMgr.MAX_VIEWDEF)
         tblMgr.createTable("viewcat", sch, tx)
Beispiel #13
0
 def main(cls, args):
     sch = Schema()
     sch.addIntField("A")
     sch.addStringField("B", 9)
     layout = Layout(sch)
     for fldname in layout.schema().fields():
         offset = layout.offset(fldname)
         print(fldname + " has offset " + str(offset))
Beispiel #14
0
 def __init__(self, isnew, tblmgr, statmgr, tx):
     if isnew:
         sch = Schema()
         sch.addStringField("indexname", TableMgr.MAX_NAME)
         sch.addStringField("tablename", TableMgr.MAX_NAME)
         sch.addStringField("fieldname", TableMgr.MAX_NAME)
         tblmgr.createTable("idxcat", sch, tx)
     self.tblmgr = tblmgr
     self.statmgr = statmgr
     self.layout = tblmgr.getLayout("idxcat", tx)
Beispiel #15
0
 def fieldType(self, fldname):
     schema = Schema()
     if self.lex.matchKeyword("int"):
         self.lex.eatKeyword("int")
         schema.addIntField(fldname)
     else:
         self.lex.eatKeyword("varchar")
         self.lex.eatDelim('(')
         strLen = self.lex.eatIntConstant()
         self.lex.eatDelim(')')
         schema.addStringField(fldname, strLen)
     return schema
Beispiel #16
0
    def main(cls, args):
        # db = SimpleDB("recordtest", 400, 8)
        fm = FileMgr(File("recordtest"), 400)
        lm = LogMgr(fm, "simpledb.log")
        bm = BufferMgr(fm, lm, 8)
        tx = Transaction(fm, lm, bm)

        sch = Schema()
        sch.addIntField("A")
        sch.addStringField("B", 9)
        layout = Layout(sch)
        for fldname in layout.schema().fields():
            offset = layout.offset(fldname)
            print(fldname + " has offset " + str(offset))
        blk = tx.append("testfile")
        tx.pin(blk)
        rp = RecordPage(tx, blk, layout)
        rp.format()

        print("Filling the page with random records.")
        slot = rp.insertAfter(-1)
        while slot >= 0:
            n = random.randint(0, 50)
            rp.setInt(slot, "A", n)
            rp.setString(slot, "B", "rec" + str(n))
            print("inserting into slot " + str(slot) + ": {" + str(n) + ", " + "rec" + str(n) + "}")
            slot = rp.insertAfter(slot)

        print("Deleting these records, whose A-values are less than 25.")

        count = 0
        slot = rp.nextAfter(-1)
        while slot >= 0:
            a = rp.getInt(slot, "A")
            b = rp.getString(slot, "B")
            if a < 25:
                count += 1
                print("slot " + str(slot) + ": {" + str(a) + ", " + str(b) + "}")
                rp.delete(slot)
            slot = rp.nextAfter(slot)

        print(str(count) + " values under 25 were deleted.\n")
        print("Here are the remaining records.")
        slot = rp.nextAfter(-1)

        while slot >= 0:
            a = rp.getInt(slot, "A")
            b = rp.getString(slot, "B")
            print("slot " + str(slot) + ": {" + str(a) + ", " + str(b) + "}")
            slot = rp.nextAfter(slot)
        tx.unpin(blk)
        tx.commit()
Beispiel #17
0
    def main(cls, args):
        # db = SimpleDB("tabletest", 400, 8)
        fm = FileMgr(File("tabletest"), 400)
        lm = LogMgr(fm, "simpledb.log")
        bm = BufferMgr(fm, lm, 8)
        tx = Transaction(fm, lm, bm)

        sch = Schema()
        sch.addIntField("A")
        sch.addStringField("B", 9)
        layout = Layout(sch)
        for fldname in layout.schema().fields():
            offset = layout.offset(fldname)
            print(fldname + " has offset " + str(offset))

        print("Filling the table with 50 random records.")
        ts = TableScan(tx, "T", layout)
        for i in range(50):
            ts.insert()
            n = random.randint(0, 50)
            ts.setInt("A", n)
            ts.setString("B", "rec" + str(n))
            print("inserting into slot " + ts.getRid().__str__() + ": {" +
                  str(n) + ", " + "rec" + str(n) + "}")

        print("Deleting these records, whose A-values are less than 25.")
        count = 0
        ts.beforeFirst()
        while ts.next():
            a = ts.getInt("A")
            b = ts.getString("B")
            if a < 25:
                count += 1
                print("slot " + ts.getRid().__str__() + ": {" + str(a) + ", " +
                      b + "}")
                ts.delete()
        print(str(count) + " values under 10 were deleted.\n")

        print("Here are the remaining records.")
        ts.beforeFirst()
        while ts.next():
            a = ts.getInt("A")
            b = ts.getString("B")
            print("slot " + ts.getRid().__str__() + ": {" + str(a) + ", " + b +
                  "}")
        ts.close()
        tx.commit()
Beispiel #18
0
    def main(cls, args):
        # db = SimpleDB("scantest1")
        fm = FileMgr(File("scantest1"), 400)
        lm = LogMgr(fm, "simpledb.log")
        bm = BufferMgr(fm, lm, 8)
        tx = Transaction(fm, lm, bm)

        sch1 = Schema()
        sch1.addIntField("A")
        sch1.addStringField("B", 9)
        layout = Layout(sch1)
        s1 = TableScan(tx, "T", layout)

        s1.beforeFirst()
        n = 200
        print("Inserting " + str(n) + " random records.")
        for i in range(n):
            s1.insert()
            k = random.randint(0, 50)
            s1.setInt("A", k)
            s1.setString("B", "rec" + str(k))
        s1.close()

        s2 = TableScan(tx, "T", layout)

        #  selecting all records where A=10
        c = Constant(10)
        t = Term(Expression("A"), Expression(c))
        pred = Predicate(t)
        print("The predicate is " + pred.__str__())
        s3 = SelectScan(s2, pred)

        fields = ["B"]
        s4 = ProjectScan(s3, fields)
        while s4.next():
            print(s4.getString("B"))
        s4.close()
        tx.commit()
Beispiel #19
0
class ProductPlan(Plan):

    #
    #     * Creates a new product node in the query tree,
    #     * having the two specified subqueries.
    #     * @param p1 the left-hand subquery
    #     * @param p2 the right-hand subquery
    #
    def __init__(self, p1, p2):
        super(ProductPlan, self).__init__()
        self.p1 = p1
        self.p2 = p2
        self._schema = Schema()
        self._schema.addAll(p1.schema())
        self._schema.addAll(p2.schema())

    #
    #     * Creates a product scan for this query.
    #     * @see Plan#open()
    #
    def open(self):
        s1 = self.p1.open()
        s2 = self.p2.open()
        return ProductScan(s1, s2)

    #
    #     * Estimates the number of block accesses in the product.
    #     * The formula is:
    #     * <pre> B(product(p1,p2)) = B(p1) + R(p1)*B(p2) </pre>
    #     * @see Plan#blocksAccessed()
    #
    def blocksAccessed(self):
        return self.p1.blocksAccessed() + (self.p1.recordsOutput() *
                                           self.p2.blocksAccessed())

    #
    #     * Estimates the number of output records in the product.
    #     * The formula is:
    #     * <pre> R(product(p1,p2)) = R(p1)*R(p2) </pre>
    #     * @see Plan#recordsOutput()
    #
    def recordsOutput(self):
        return self.p1.recordsOutput() * self.p2.recordsOutput()

    #
    #     * Estimates the distinct number of field values in the product.
    #     * Since the product does not increase or decrease field values,
    #     * the estimate is the same as in the appropriate underlying query.
    #     * @see Plan#distinctValues(String)
    #
    def distinctValues(self, fldname):
        if self.p1.schema().hasField(fldname):
            return self.p1.distinctValues(fldname)
        else:
            return self.p2.distinctValues(fldname)

    #
    #     * Returns the schema of the product,
    #     * which is the union of the schemas of the underlying queries.
    #     * @see Plan#schema()
    #
    def schema(self):
        return self._schema
Beispiel #20
0
class GroupByPlan(Plan):

    #
    #     * Create a groupby plan for the underlying query.
    #     * The grouping is determined by the specified
    #     * collection of group fields,
    #     * and the aggregation is computed by the
    #     * specified collection of aggregation functions.
    #     * @param p a plan for the underlying query
    #     * @param groupfields the group fields
    #     * @param aggfns the aggregation functions
    #     * @param tx the calling transaction
    #
    def __init__(self, tx, p, groupfields, aggfns):
        super(GroupByPlan, self).__init__()
        self.p = SortPlan(tx, p, groupfields)
        self.groupfields = groupfields
        self.aggfns = aggfns
        self.sch = Schema()
        for fldname in groupfields:
            self.sch.add(fldname, p.schema())
        for fn in aggfns:
            self.sch.addIntField(fn.fieldName())

    #
    #     * This method opens a sort plan for the specified plan.
    #     * The sort plan ensures that the underlying records
    #     * will be appropriately grouped.
    #     * @see Plan#open()
    #
    def open(self):
        s = self.p.open()
        return GroupByScan(s, self.groupfields, self.aggfns)

    #
    #     * Return the number of blocks required to
    #     * compute the aggregation,
    #     * which is one pass through the sorted table.
    #     * It does <i>not</i> include the one-time cost
    #     * of materializing and sorting the records.
    #     * @see Plan#blocksAccessed()
    #
    def blocksAccessed(self):
        return self.p.blocksAccessed()

    #
    #     * Return the number of groups.  Assuming equal distribution,
    #     * this is the product of the distinct values
    #     * for each grouping field.
    #     * @see Plan#recordsOutput()
    #
    def recordsOutput(self):
        numgroups = 1
        for fldname in self.groupfields:
            numgroups *= self.p.distinctValues(fldname)
        return numgroups

    #
    #     * Return the number of distinct values for the
    #     * specified field.  If the field is a grouping field,
    #     * then the number of distinct values is the same
    #     * as in the underlying query.
    #     * If the field is an aggregate field, then we
    #     * assume that all values are distinct.
    #     * @see Plan#distinctValues(String)
    #
    def distinctValues(self, fldname):
        if self.p.schema().hasField(fldname):
            return self.p.distinctValues(fldname)
        else:
            return self.recordsOutput()

    #
    #     * Returns the schema of the output table.
    #     * The schema consists of the group fields,
    #     * plus one field for each aggregation function.
    #     * @see Plan#schema()
    #
    def schema(self):
        return self.sch
Beispiel #21
0
    def main(cls, args):
        # db = SimpleDB("producttest")
        fm = FileMgr(File("producttest"), 400)
        lm = LogMgr(fm, "simpledb.log")
        bm = BufferMgr(fm, lm, 8)
        tx = Transaction(fm, lm, bm)

        sch1 = Schema()
        sch1.addIntField("A")
        sch1.addStringField("B", 9)
        layout1 = Layout(sch1)
        ts1 = TableScan(tx, "T1", layout1)

        sch2 = Schema()
        sch2.addIntField("C")
        sch2.addStringField("D", 9)
        layout2 = Layout(sch2)
        ts2 = TableScan(tx, "T2", layout2)

        ts1.beforeFirst()
        n = 200
        print("Inserting " + str(n) + " records into T1.")
        for i in range(n):
            ts1.insert()
            ts1.setInt("A", i)
            ts1.setString("B", "aaa" + str(i))
        ts1.close()

        ts2.beforeFirst()
        print("Inserting " + str(n) + " records into T2.")
        for i in range(n):
            ts2.insert()
            ts2.setInt("C", n - i - 1)
            ts2.setString("D", "bbb" + str((n - i - 1)))
        ts2.close()

        s1 = TableScan(tx, "T1", layout1)
        s2 = TableScan(tx, "T2", layout2)
        s3 = ProductScan(s1, s2)
        while s3.next():
            print(s3.getString("B"))
        s3.close()
        tx.commit()
Beispiel #22
0
 def __init__(self, p, fieldlist):
     super(ProjectPlan, self).__init__()
     self.p = p
     self._schema = Schema()
     for fldname in fieldlist:
         self._schema.add(fldname, p.schema())
class MultibufferProductPlan(Plan):

    #
    #     * Creates a product plan for the specified queries.
    #     * @param lhs the plan for the LHS query
    #     * @param rhs the plan for the RHS query
    #     * @param tx the calling transaction
    #
    def __init__(self, tx, lhs, rhs):
        super(MultibufferProductPlan, self).__init__()
        self.tx = tx
        self.lhs = MaterializePlan(tx, lhs)
        self.rhs = rhs

        self.schema = Schema()
        self.schema.addAll(lhs.schema())
        self.schema.addAll(rhs.schema())

    #
    #     * A scan for this query is created and returned, as follows.
    #     * First, the method materializes its LHS and RHS queries.
    #     * It then determines the optimal chunk size,
    #     * based on the size of the materialized RHS file and the
    #     * number of available buffers.
    #     * It creates a chunk plan for each chunk, saving them in a list.
    #     * Finally, it creates a multiscan for this list of plans,
    #     * and returns that scan.
    #     * @see Plan#open()
    #
    def open(self):
        leftscan = self.lhs.open()
        tt = self.copyRecordsFrom(self.rhs)
        return MultibufferProductScan(self.tx, leftscan, tt.tableName(),
                                      tt.getLayout())

    #
    #     * Returns an estimate of the number of block accesses
    #     * required to execute the query. The formula is:
    #     * <pre> B(product(p1,p2)) = B(p2) + B(p1)*C(p2) </pre>
    #     * where C(p2) is the number of chunks of p2.
    #     * The method uses the current number of available buffers
    #     * to calculate C(p2), and so this value may differ
    #     * when the query scan is opened.
    #     * @see Plan#blocksAccessed()
    #
    def blocksAccessed(self):
        #  this guesses at the # of chunks
        avail = self.tx.availableBuffs()
        size = MaterializePlan(self.tx, self.rhs).blocksAccessed()
        numchunks = int(size / avail)
        return self.rhs.blocksAccessed() + (self.lhs.blocksAccessed() *
                                            numchunks)

    #
    #     * Estimates the number of output records in the product.
    #     * The formula is:
    #     * <pre> R(product(p1,p2)) = R(p1)*R(p2) </pre>
    #     * @see Plan#recordsOutput()
    #
    def recordsOutput(self):
        return self.lhs.recordsOutput() * self.rhs.recordsOutput()

    #
    #     * Estimates the distinct number of field values in the product.
    #     * Since the product does not increase or decrease field values,
    #     * the estimate is the same as in the appropriate underlying query.
    #     * @see Plan#distinctValues(String)
    #
    def distinctValues(self, fldname):
        if self.lhs.schema().hasField(fldname):
            return self.lhs.distinctValues(fldname)
        else:
            return self.rhs.distinctValues(fldname)

    #
    #     * Returns the schema of the product,
    #     * which is the union of the schemas of the underlying queries.
    #     * @see Plan#schema()
    #
    def schema(self):
        return self.schema

    def copyRecordsFrom(self, p):
        src = p.open()
        sch = p.schema()
        t = TempTable(self.tx, sch)
        dest = t.open()
        while src.next():
            dest.insert()
            for fldname in sch.fields():
                dest.setVal(fldname, src.getVal(fldname))
        src.close()
        dest.close()
        return t
Beispiel #24
0
 def __init__(self, isNew, tx):
     tcatSchema = Schema()
     tcatSchema.addStringField("tblname", TableMgr.MAX_NAME)
     tcatSchema.addIntField("slotsize")
     self.tcatLayout = Layout(tcatSchema)
     fcatSchema = Schema()
     fcatSchema.addStringField("tblname", TableMgr.MAX_NAME)
     fcatSchema.addStringField("fldname", TableMgr.MAX_NAME)
     fcatSchema.addIntField("type")
     fcatSchema.addIntField("length")
     fcatSchema.addIntField("offset")
     self.fcatLayout = Layout(fcatSchema)
     if isNew:
         self.createTable("tblcat", tcatSchema, tx)
         self.createTable("fldcat", fcatSchema, tx)
Beispiel #25
0
    def main(cls, args):
        # db = SimpleDB("scantest2")
        fm = FileMgr(File("scantest2"), 400)
        lm = LogMgr(fm, "simpledb.log")
        bm = BufferMgr(fm, lm, 8)
        tx = Transaction(fm, lm, bm)

        sch1 = Schema()
        sch1.addIntField("A")
        sch1.addStringField("B", 9)
        layout1 = Layout(sch1)
        us1 = TableScan(tx, "T1", layout1)
        us1.beforeFirst()
        n = 200
        print("Inserting " + str(n) + " records into T1.")
        for i in range(n):
            us1.insert()
            us1.setInt("A", i)
            us1.setString("B", "bbb" + str(i))
        us1.close()

        sch2 = Schema()
        sch2.addIntField("C")
        sch2.addStringField("D", 9)
        layout2 = Layout(sch2)
        us2 = TableScan(tx, "T2", layout2)
        us2.beforeFirst()
        print("Inserting " + str(n) + " records into T2.")
        for i in range(n):
            us2.insert()
            us2.setInt("C", n - i - 1)
            us2.setString("D", "ddd" + str((n - i - 1)))
        us2.close()

        s1 = TableScan(tx, "T1", layout1)
        s2 = TableScan(tx, "T2", layout2)
        s3 = ProductScan(s1, s2)
        #  selecting all records where A=C
        t = Term(Expression("A"), Expression("C"))
        pred = Predicate(t)
        print("The predicate is " + pred.__str__())
        s4 = SelectScan(s3, pred)

        #  projecting on [B,D]
        c = ["B", "D"]
        s5 = ProjectScan(s4, c)
        while s5.next():
            print(s5.getString("B") + " " + s5.getString("D"))
        s5.close()
        tx.commit()
Beispiel #26
0
class MergeJoinPlan(Plan):

    #
    #     * Creates a mergejoin plan for the two specified queries.
    #     * The RHS must be materialized after it is sorted,
    #     * in order to deal with possible duplicates.
    #     * @param p1 the LHS query plan
    #     * @param p2 the RHS query plan
    #     * @param fldname1 the LHS join field
    #     * @param fldname2 the RHS join field
    #     * @param tx the calling transaction
    #
    def __init__(self, tx, p1, p2, fldname1, fldname2):
        super(MergeJoinPlan, self).__init__()
        self.fldname1 = fldname1
        sortlist1 = [fldname1]
        self.p1 = SortPlan(tx, p1, sortlist1)

        self.fldname2 = fldname2
        sortlist2 = [fldname2]
        self.p2 = SortPlan(tx, p2, sortlist2)

        self.sch = Schema()
        self.sch.addAll(p1.schema())
        self.sch.addAll(p2.schema())

    #  The method first sorts its two underlying scans
    #      * on their join field. It then returns a mergejoin scan
    #      * of the two sorted table scans.
    #      * @see Plan#open()
    #
    def open(self):
        s1 = self.p1.open()
        s2 = self.p2.open()
        return MergeJoinScan(s1, s2, self.fldname1, self.fldname2)

    #
    #     * Return the number of block acceses required to
    #     * mergejoin the sorted tables.
    #     * Since a mergejoin can be preformed with a single
    #     * pass through each table, the method returns
    #     * the sum of the block accesses of the
    #     * materialized sorted tables.
    #     * It does <i>not</i> include the one-time cost
    #     * of materializing and sorting the records.
    #     * @see Plan#blocksAccessed()
    #
    def blocksAccessed(self):
        return self.p1.blocksAccessed() + self.p2.blocksAccessed()

    #
    #     * Return the number of records in the join.
    #     * Assuming uniform distribution, the formula is:
    #     * <pre> R(join(p1,p2)) = R(p1)*R(p2)/max{V(p1,F1),V(p2,F2)}</pre>
    #     * @see Plan#recordsOutput()
    #
    def recordsOutput(self):
        maxvals = max(self.p1.distinctValues(self.fldname1),
                      self.p2.distinctValues(self.fldname2))
        return int(
            (self.p1.recordsOutput() * self.p2.recordsOutput()) / maxvals)

    #
    #     * Estimate the distinct number of field values in the join.
    #     * Since the join does not increase or decrease field values,
    #     * the estimate is the same as in the appropriate underlying query.
    #     * @see Plan#distinctValues(String)
    #
    def distinctValues(self, fldname):
        if self.p1.schema().hasField(fldname):
            return self.p1.distinctValues(fldname)
        else:
            return self.p2.distinctValues(fldname)

    #
    #     * Return the schema of the join,
    #     * which is the union of the schemas of the underlying queries.
    #     * @see Plan#schema()
    #
    def schema(self):
        return self.sch
Beispiel #27
0
class IndexJoinPlan(Plan):

    #
    #     * Implements the join operator,
    #     * using the specified LHS and RHS plans.
    #     * @param p1 the left-hand plan
    #     * @param p2 the right-hand plan
    #     * @param ii information about the right-hand index
    #     * @param joinfield the left-hand field used for joining
    #
    def __init__(self, p1, p2, ii, joinfield):
        super(IndexJoinPlan, self).__init__()
        self.p1 = p1
        self.p2 = p2
        self.ii = ii
        self.joinfield = joinfield
        self.sch = Schema()
        self.sch.addAll(p1.schema())
        self.sch.addAll(p2.schema())

    #
    #     * Opens an indexjoin scan for this query
    #     * @see Plan#open()
    #
    def open(self):
        s = self.p1.open()
        #  throws an exception if p2 is not a tableplan
        ts = self.p2.open()
        idx = self.ii.open()
        return IndexJoinScan(s, idx, self.joinfield, ts)

    #
    #     * Estimates the number of block accesses to compute the join.
    #     * The formula is:
    #     * <pre> B(indexjoin(p1,p2,idx)) = B(p1) + R(p1)*B(idx)
    #     *       + R(indexjoin(p1,p2,idx) </pre>
    #     * @see Plan#blocksAccessed()
    #
    def blocksAccessed(self):
        return self.p1.blocksAccessed() + (self.p1.recordsOutput() * self.ii.blocksAccessed()) + self.recordsOutput()

    #
    #     * Estimates the number of output records in the join.
    #     * The formula is:
    #     * <pre> R(indexjoin(p1,p2,idx)) = R(p1)*R(idx) </pre>
    #     * @see Plan#recordsOutput()
    #
    def recordsOutput(self):
        return self.p1.recordsOutput() * self.ii.recordsOutput()

    #
    #     * Estimates the number of distinct values for the
    #     * specified field.
    #     * @see Plan#distinctValues(String)
    #
    def distinctValues(self, fldname):
        if self.p1.schema().hasField(fldname):
            return self.p1.distinctValues(fldname)
        else:
            return self.p2.distinctValues(fldname)

    #
    #     * Returns the schema of the index join.
    #     * @see Plan#schema()
    #
    def schema(self):
        return self.sch
Beispiel #28
0
    def main(cls, args):
        # db = SimpleDB("metadatamgrtest", 400, 8)
        fm = FileMgr(File("metadatamgrtest"), 400)
        lm = LogMgr(fm, "simpledb.log")
        bm = BufferMgr(fm, lm, 8)
        tx = Transaction(fm, lm, bm)

        mdm = MetadataMgr(True, tx)
        sch = Schema()
        sch.addIntField("A")
        sch.addStringField("B", 9)

        #  Part 1: Table Metadata
        mdm.createTable("MyTable", sch, tx)
        layout = mdm.getLayout("MyTable", tx)
        size = layout.slotSize()
        sch2 = layout.schema()
        print("MyTable has slot size " + str(size))
        print("Its fields are:")
        for fldname in sch2.fields():
            if sch2.type(fldname) == INTEGER:
                _type = "int"
            else:
                strlen = sch2.length(fldname)
                _type = "varchar(" + str(strlen) + ")"
            print(fldname + ": " + _type)

        #  Part 2: Statistics Metadata
        ts = TableScan(tx, "MyTable", layout)
        for i in range(50):
            ts.insert()
            n = random.randint(0, 50)
            ts.setInt("A", n)
            ts.setString("B", "rec" + str(n))

        si = mdm.getStatInfo("MyTable", layout, tx)
        print("B(MyTable) = " + str(si.blocksAccessed()))
        print("R(MyTable) = " + str(si.recordsOutput()))
        print("V(MyTable,A) = " + str(si.distinctValues("A")))
        print("V(MyTable,B) = " + str(si.distinctValues("B")))

        #  Part 3: View Metadata
        viewdef = "select B from MyTable where A = 1"
        mdm.createView("viewA", viewdef, tx)
        v = mdm.getViewDef("viewA", tx)
        print("View def = " + v)

        #  Part 4: Index Metadata
        mdm.createIndex("indexA", "MyTable", "A", tx)
        mdm.createIndex("indexB", "MyTable", "B", tx)
        idxmap = mdm.getIndexInfo("MyTable", tx)
        ii = idxmap.get('A')
        print(ii)
        print("B(indexA) = " + str(ii.blocksAccessed()))
        print("R(indexA) = " + str(ii.recordsOutput()))
        print("V(indexA,A) = " + str(ii.distinctValues("A")))
        print("V(indexA,B) = " + str(ii.distinctValues("B")))

        ii = idxmap.get("B")
        print("B(indexB) = " + str(ii.blocksAccessed()))
        print("R(indexB) = " + str(ii.recordsOutput()))
        print("V(indexB,A) = " + str(ii.distinctValues("A")))
        print("V(indexB,B) = " + str(ii.distinctValues("B")))
        tx.commit()