def __init__(self, tx, idxname, leafLayout): super(BTreeIndex, self).__init__() self.tx = tx # deal with the leaves self.leaftbl = idxname + "leaf" self.leafLayout = leafLayout if tx.size(self.leaftbl) == 0: blk = tx.append(self.leaftbl) node = BTPage(tx, blk, leafLayout) node.format(blk, -1) # deal with the directory dirsch = Schema() dirsch.add("block", leafLayout.schema()) dirsch.add("dataval", leafLayout.schema()) dirtbl = idxname + "dir" self.dirLayout = Layout(dirsch) self.rootblk = BlockId(dirtbl, 0) if tx.size(dirtbl) == 0: # create new root block tx.append(dirtbl) node = BTPage(tx, self.rootblk, self.dirLayout) node.format(self.rootblk, 0) # insert initial directory entry fldtype = dirsch.type("dataval") minval = Constant( Integer.MIN_VALUE) if fldtype == INTEGER else Constant("") node.insertDir(0, minval, 0) node.close()
class ProjectPlan(Plan): # # * Creates a new project node in the query tree, # * having the specified subquery and field list. # * @param p the subquery # * @param fieldlist the list of fields # def __init__(self, p, fieldlist): super(ProjectPlan, self).__init__() self.p = p self._schema = Schema() for fldname in fieldlist: self._schema.add(fldname, p.schema()) # # * Creates a project scan for this query. # * @see Plan#open() # def open(self): s = self.p.open() return ProjectScan(s, self._schema.fields()) # # * Estimates the number of block accesses in the projection, # * which is the same as in the underlying query. # * @see Plan#blocksAccessed() # def blocksAccessed(self): return self.p.blocksAccessed() # # * Estimates the number of output records in the projection, # * which is the same as in the underlying query. # * @see Plan#recordsOutput() # def recordsOutput(self): return self.p.recordsOutput() # # * Estimates the number of distinct field values # * in the projection, # * which is the same as in the underlying query. # * @see Plan#distinctValues(String) # def distinctValues(self, fldname): return self.p.distinctValues(fldname) # # * Returns the schema of the projection, # * which is taken from the field list. # * @see Plan#schema() # def schema(self): return self._schema
class GroupByPlan(Plan): # # * Create a groupby plan for the underlying query. # * The grouping is determined by the specified # * collection of group fields, # * and the aggregation is computed by the # * specified collection of aggregation functions. # * @param p a plan for the underlying query # * @param groupfields the group fields # * @param aggfns the aggregation functions # * @param tx the calling transaction # def __init__(self, tx, p, groupfields, aggfns): super(GroupByPlan, self).__init__() self.p = SortPlan(tx, p, groupfields) self.groupfields = groupfields self.aggfns = aggfns self.sch = Schema() for fldname in groupfields: self.sch.add(fldname, p.schema()) for fn in aggfns: self.sch.addIntField(fn.fieldName()) # # * This method opens a sort plan for the specified plan. # * The sort plan ensures that the underlying records # * will be appropriately grouped. # * @see Plan#open() # def open(self): s = self.p.open() return GroupByScan(s, self.groupfields, self.aggfns) # # * Return the number of blocks required to # * compute the aggregation, # * which is one pass through the sorted table. # * It does <i>not</i> include the one-time cost # * of materializing and sorting the records. # * @see Plan#blocksAccessed() # def blocksAccessed(self): return self.p.blocksAccessed() # # * Return the number of groups. Assuming equal distribution, # * this is the product of the distinct values # * for each grouping field. # * @see Plan#recordsOutput() # def recordsOutput(self): numgroups = 1 for fldname in self.groupfields: numgroups *= self.p.distinctValues(fldname) return numgroups # # * Return the number of distinct values for the # * specified field. If the field is a grouping field, # * then the number of distinct values is the same # * as in the underlying query. # * If the field is an aggregate field, then we # * assume that all values are distinct. # * @see Plan#distinctValues(String) # def distinctValues(self, fldname): if self.p.schema().hasField(fldname): return self.p.distinctValues(fldname) else: return self.recordsOutput() # # * Returns the schema of the output table. # * The schema consists of the group fields, # * plus one field for each aggregation function. # * @see Plan#schema() # def schema(self): return self.sch