Beispiel #1
0
def query2Hash_test(db, printOutput):
    query2 = db.query().fromTable('part') \
        .join(db.query().fromTable('lineitem').where('L_RETURNFLAG == \'R\''),
              rhsSchema=db.relationSchema('lineitem'),
              method='hash',
              lhsHashFn='hash(P_PARTKEY) % 111', lhsKeySchema=DBSchema('P_PARTKEY', [('P_PARTKEY', 'int')]),
              rhsHashFn='hash(L_PARTKEY) % 111', rhsKeySchema=DBSchema('L_PARTKEY', [('L_PARTKEY', 'int')])) \
            .groupBy(groupSchema=DBSchema('P_NAME', [('P_NAME', 'char(55)')]),
                    aggSchema=DBSchema('COUNT', [('COUNT', 'int')]),
                    groupExpr=(lambda e: e.P_NAME),
                    aggExprs=[(0, lambda acc, e: acc + 1, lambda x: x)],
                    groupHashFn=(lambda gbVal: hash(gbVal[0]) % 111)) \
        .finalize()

    start = time.time()
    processedQuery = [
        query2.schema().unpack(tup) for page in db.processQuery(query2)
        for tup in page[1]
    ]
    end = time.time()
    duration = end - start
    print('Query: Query 2, Hash')
    if printOutput:
        print('Results: ', processedQuery)
    print('Time', duration)
    return
Beispiel #2
0
def query1(db):
    query = db.query().fromTable('lineitem')\
        .where(
            "(L_SHIPDATE >= 19940101) and (L_SHIPDATE < 19950101) and (0.06-0.01 <= L_DISCOUNT <= 0.06 + 0.01) and (L_QUANTITY < 24")\
        .groupBy(
            groupSchema=DBSchema('groupKey', [('ONE', 'int')]),
            groupExpr=(lambda e: 1),
            aggSchema=DBSchema('groupBy', [('revenue', 'float')]),
            aggExprs=[(0, lambda acc, e: acc + (e.L_EXTENDEDPRICE * e.L_DISCOUNT), lambda x: x)], \
            groupHashFn=(lambda gbVal: hash(gbVal) % 1))\
        .select(
            {'revenue': ('revenue', 'float')})\
        .finalize()
    return query
Beispiel #3
0
 def testHashJoin(self):
     schema = self.db.relationSchema('employee')
     e2schema = schema.rename('employee2', {'id': 'id2', 'age': 'age2', 'dept_id': 'dept_id2'})
     keySchema = DBSchema('employeeKey', [('id', 'int')])
     keySchema2 = DBSchema('employeeKey2', [('id2', 'int')])
     hashJoin = self.db.query().fromTable('employee').join( \
         self.db.query().fromTable('employee'), \
         rhsSchema=e2schema, \
         method='hash', \
         lhsHashFn='hash(id) % 4', lhsKeySchema=keySchema, \
         rhsHashFn='hash(id2) % 4', rhsKeySchema=keySchema2, \
         ).finalize()
     results = self.getResults(hashJoin)
     self.assertEqual(len(results), self.numEmployees)
Beispiel #4
0
 def validateSchema(self):
     if self.lhsPlan.schema().match(self.rhsPlan.schema()):
         schemaName = self.operatorType() + str(self.id())
         schemaFields = self.lhsPlan.schema().schema()
         self.unionSchema = DBSchema(schemaName, schemaFields)
     else:
         raise ValueError("Union operator type error, mismatched input schemas")
Beispiel #5
0
 def createRelation(self, relationName, relationFields):
     if relationName not in self.relationMap:
         schema = DBSchema(relationName, relationFields)
         self.relationMap[relationName] = schema
         self.storage.createRelation(relationName, schema)
         self.checkpoint()
     else:
         raise ValueError("Relation '" + relationName + "' already exists")
Beispiel #6
0
def query2(db):
    query = db.query().fromTable('part')\
        .join(
            db.query().fromTable('lineitem').where("(L_SHIPDATE >= 19950901) and (L_SHIPDATE < 19951001)"),
            method='hash',
            lhsHashFn='hash(P_PARTKEY) % 7', lhsKeySchema=DBSchema('partkey2',[('P_PARTKEY', 'int')]),
            rhsHashFn='hash(L_PARTKEY) % 7', rhsKeySchema=DBSchema('partkey1', [('L_PARTKEY', 'int')])) \
        .groupBy(
            groupSchema=DBSchema('groupKey', [('ONE', 'int')]),
            aggSchema=DBSchema('groupBy', [('promo_revenue', 'float')]),
            groupExpr=(lambda e: 1),
            aggExprs=[(0, lambda acc, e: acc + (e.L_EXTENDEDPRICE * (1 - e.L_DISCOUNT)), lambda x: x)],
            groupHashFn=(lambda gbVal: hash(gbVal) % 1))\
        .select(
            {'promo_revenue' : ('promo_revenue', 'float')}).finalize()

    return query
Beispiel #7
0
def query3bBNL_test(db, printOutput):
    query3 = db.query().fromTable('part') \
        .join(db.query().fromTable('lineitem'),
              rhsSchema=db.relationSchema('lineitem'),
              method='block-nested-loops',
              expr='L_PARTKEY == P_PARTKEY') \
        .join(db.query().fromTable('orders'),
              rhsSchema=db.relationSchema('orders'),
              method='block-nested-loops',
              expr='L_ORDERKEY == O_ORDERKEY') \
        .join(db.query().fromTable('customer'),
              rhsSchema=db.relationSchema('customer'),
              method='block-nested-loops',
              expr='O_CUSTKEY == C_CUSTKEY') \
        .join(db.query().fromTable('nation'),
              rhsSchema=db.relationSchema('nation'),
              method='block-nested-loops',
              expr='C_NATIONKEY == N_NATIONKEY') \
        .groupBy(groupSchema=DBSchema('FIRST', [('N_NAME', 'char(25)'), ('P_NAME', 'char(55)')]),
                 aggSchema=DBSchema('SUM', [('SUM', 'double')]),
                 groupExpr=(lambda e: (e.N_NAME, e.P_NAME)),
                 aggExprs=[(0, lambda acc, e: acc + e.L_QUANTITY, lambda x: x)],
                 groupHashFn=(lambda gbVal: hash(gbVal[0]) % 111)) \
        .groupBy(groupSchema=DBSchema('SECOND', [('N_NAME', 'char(25)')]),
                 aggSchema=DBSchema('MAX', [('MAX', 'double')]),
                 groupExpr=(lambda e: e.N_NAME),
                 aggExprs=[(0, lambda acc, e: max(acc, e.SUM), lambda x: x)],
                 groupHashFn=(lambda gbVal: hash(gbVal[0]) % 111)) \
        .select({'NATION': ('N_NAME', 'char(25)'), 'MAX': ('MAX', 'double')}) \
        .finalize()

    start = time.time()
    processedQuery = [
        query3.schema().unpack(tup) for page in db.processQuery(query3)
        for tup in page[1]
    ]
    end = time.time()
    duration = end - start
    print('Query: Query 3b (Order Reversed), BNL')
    if printOutput:
        print('Results: ', processedQuery)
    print('Time', duration)
    return
Beispiel #8
0
 def testGroupBy(self):
     # SELECT id, min(age), max(age) FROM Employee GROUP BY (id % 2)
     aggMinMaxSchema = DBSchema('minmax', [('minAge', 'int'), ('maxAge', 'int')])
     keySchema = DBSchema('employeeKey', [('id', 'int')])
     groupBy = self.db.query().fromTable('employee').groupBy( \
         groupSchema=keySchema, \
         aggSchema=aggMinMaxSchema, \
         groupExpr=(lambda e: e.id % 2), \
         aggExprs=[(sys.maxsize, lambda acc, e: min(acc, e.age), lambda x: x), \
                   (0, lambda acc, e: max(acc, e.age), lambda x: x)], \
         groupHashFn=(lambda gbVal: hash(gbVal[0]) % 2) \
         ).finalize()
     results = self.getResults(groupBy)
     self.assertEqual(len(results), 2)
     for result in results:
         if result.id == 0:
             self.assertEqual(result.minAge, 20)
             self.assertEqual(result.maxAge, 56)
         else:
             self.assertEqual(result.minAge, 22)
             self.assertEqual(result.maxAge, 58)
Beispiel #9
0
def query3(db):
    query = db.query().fromTable('customer')\
        .join(
            db.query().fromTable('orders'),
            method='hash',
            lhsHashFn='hash(C_CUSTKEY) % 5', lhsKeySchema=DBSchema('customerKey1', [('C_CUSTKEY', 'int')]),
            rhsHashFn='hash(O_CUSTKEY) % 5', rhsKeySchema=DBSchema('customerKey2', [('O_CUSTKEY', 'int')]))\
        .join(
            db.query().fromTable('lineitem'),
            method='hash',
            lhsHashFn='hash(O_ORDERKEY) % 5', lhsKeySchema=DBSchema('orderKey1', [('O_ORDERKEY', 'int')]),
            rhsHashFn='hash(L_ORDERKEY) % 5', rhsKeySchema=DBSchema('orderkey2', [('L_ORDERKEY', 'int')]))\
        .where(
            "C_MKTSEGMENT == 'BUILDING' and O_ORDERDATE < 19950315 and L_SHIPDATE > 19950315")\
        .groupBy(
            groupSchema=DBSchema('groupKey', [('L_ORDERKEY', 'int'), ('O_ORDERDATE', 'int'), ('O_SHIPPRIORITY', 'int')]),
            aggSchema=DBSchema('groupAgg', [('revenue', 'float')]),
            groupExpr=(lambda e: (e.L_ORDERKEY, e.O_ORDERDATE, e.O_SHIPPRIORITY)),
            aggExprs=[(0, lambda acc, e: acc + (e.L_EXTENDEDPRICE * (1 - e.L_DISCOUNT)), lambda x: x)],
            groupHashFn=(lambda gbVal: hash(gbVal) % 10))\
        .select(
            {'l_orderkey': ('L_ORDERKEY', 'int'),
            'revenue': ('revenue', 'float'),
            'o_orderdate': ('O_ORDERDATE', 'int'),
            'o_shippriority': ('O_SHIPPRIORITY', 'int')})\
        .finalize()
    return query
Beispiel #10
0
 def testIndexJoin(self):
     schema = self.db.relationSchema('employee')
     e2schema = schema.rename('employee2', {'id': 'id2', 'age': 'age2', 'dept_id': 'dept_id2'})
     lhsKeySchema = DBSchema('employeeAge', [('age2', 'int')])
     join = self.db.query().fromTable('employee').join( \
         self.db.query().fromTable('employee'), \
         lhsSchema=e2schema, \
         lhsKeySchema=lhsKeySchema, \
         method='indexed', \
         indexId=self.idxId, \
         expr='True'
     ).finalize()
     results = self.getResults(join)
     self.assertEqual(len(results), self.numEmployees)
Beispiel #11
0
    def setUp(self):
        warnings.simplefilter("ignore", ResourceWarning)

        # Start with a fresh 'employee' table
        self.db = Database.Database()
        self.db.createRelation('employee', [('id', 'int'), ('age', 'int'), ('dept_id', 'int')])

        # Populate employees with department 0 or 1
        self.numEmployees = 20
        empSchema = self.db.relationSchema('employee')

        # Create an index over employee age
        keySchema = DBSchema('employeeAge', [('age', 'int')])
        self.idxId = self.db.storageEngine().createIndex('employee', empSchema, keySchema, False)

        for tup in [empSchema.pack(empSchema.instantiate(i, 2 * i + 20, i % 2)) for i in range(self.numEmployees)]:
            self.db.insertTuple(empSchema.name, tup)
Beispiel #12
0
def query1Hash_test(db, printOutput):
    query1 = db.query().fromTable('part') \
        .join(db.query().fromTable('partsupp').where('PS_AVAILQTY == 1'),
              rhsSchema=db.relationSchema('partsupp'),
              method='hash',
              lhsHashFn='hash(P_PARTKEY) % 111', lhsKeySchema=DBSchema('P_PARTKEY', [('P_PARTKEY', 'int')]),
              rhsHashFn='hash(PS_PARTKEY) % 111', rhsKeySchema=DBSchema('PS_PARTKEY', [('PS_PARTKEY', 'int')])) \
        .join(db.query().fromTable('supplier'),
              rhsSchema=db.relationSchema('supplier'),
              method='hash',
              lhsHashFn='hash(PS_SUPPKEY) % 111', lhsKeySchema=DBSchema('PS_SUPPKEY', [('PS_SUPPKEY', 'int')]),
              rhsHashFn='hash(S_SUPPKEY) % 111', rhsKeySchema=DBSchema('S_SUPPKEY', [('S_SUPPKEY', 'int')])) \
        .union(
        db.query().fromTable('part')
            .join(db.query().fromTable('partsupp').where('PS_SUPPLYCOST < 5'),
                  rhsSchema=db.relationSchema('partsupp'),
                  method='hash',
                  lhsHashFn='hash(P_PARTKEY) % 111', lhsKeySchema=DBSchema('P_PARTKEY', [('P_PARTKEY', 'int')]),
                  rhsHashFn='hash(PS_PARTKEY) % 111', rhsKeySchema=DBSchema('PS_PARTKEY', [('PS_PARTKEY', 'int')])) \
            .join(db.query().fromTable('supplier'),
                  rhsSchema=db.relationSchema('supplier'),
                  method='hash',
                  lhsHashFn='hash(PS_SUPPKEY) % 111', lhsKeySchema=DBSchema('PS_SUPPKEY', [('PS_SUPPKEY', 'int')]),
                  rhsHashFn='hash(S_SUPPKEY) % 111', rhsKeySchema=DBSchema('S_SUPPKEY', [('S_SUPPKEY', 'int')]))) \
        .select({'P_NAME': ('P_NAME', 'char(55)'), 'S_NAME': ('S_NAME', 'char(25)')}) \
        .finalize()

    start = time.time()
    processedQuery = [
        query1.schema().unpack(tup) for page in db.processQuery(query1)
        for tup in page[1]
    ]
    end = time.time()
    duration = end - start
    print('Query: Query 1, Hash')
    if printOutput:
        print('Results: ', processedQuery)
    print('Time', duration)
    return
Beispiel #13
0
    def processAllPages(self):
        schema = self.operatorType() + str(self.id())
        fields = self.groupSchema.schema() + self.aggSchema.schema()
        outputSchema = DBSchema(schema, fields)

        relIds = []
        for (pageId, page) in iter(self.subPlan):
            for tpl in page:
                group = self.groupExpr(self.subSchema.unpack(tpl))
                key = self.groupHashFn((group, None))
                relId = str(self.id) + "u" + str(key)
                self.storage.createRelation(relId, self.subSchema)
                self.storage.insertTuple(relId, tpl)
                if relId not in relIds:
                    relIds.append(relId)

        for rid in relIds:
            groupDict = {}
            for (pageId, page) in self.storage.pages(rid):
                for tpl in page:
                    groupKey = self.groupExpr(self.subSchema.unpack(tpl))
                    if groupKey not in groupDict:
                        groupDict[groupKey] = []
                        for trio in self.aggExprs:
                            groupDict[groupKey].append(trio[0])

                    for i in range(len(self.aggExprs)):
                        groupDict[groupKey][i] = self.aggExprs[i][1](
                            groupDict[groupKey][i], self.subSchema.unpack(tpl))

            for key in groupDict:
                for i in range(len(self.aggExprs)):
                    groupDict[key][i] = self.aggExprs[i][2](groupDict[key][i])

            for key in groupDict:
                outTuple = outputSchema.instantiate(
                    key, *[f for f in groupDict[key]])
                self.emitOutputTuple(self.outputSchema.pack(outTuple))
        return self.storage.pages(self.relationId())
Beispiel #14
0
def query4(db):
    query = db.query().fromTable('nation')\
        .join(
            db.query().fromTable('customer'),
            method='hash',
            lhsHashFn='hash(N_NATIONKEY) % 11', lhsKeySchema=DBSchema('nationKey1', [('N_NATIONKEY', 'int')]),
            rhsHashFn='hash(C_NATIONKEY) % 11', rhsKeySchema=DBSchema('nationKey2', [('C_NATIONKEY', 'int')]))\
        .join(
            db.query().fromTable('orders'),
            method='hash',
            lhsHashFn='hash(C_CUSTKEY) % 11', lhsKeySchema=DBSchema('custKey1', [('C_CUSTKEY', 'int')]),
            rhsHashFn='hash(O_CUSTKEY) % 11', rhsKeySchema=DBSchema('custKey2', [('O_CUSTKEY', 'int')])) \
        .join(
            db.query().fromTable('lineitem'),
            method='hash',
            lhsHashFn='hash(O_ORDERKEY) % 11', lhsKeySchema=DBSchema('orderKey1', [('O_ORDERKEY', 'int')]),
            rhsHashFn='hash(L_ORDERKEY) % 11', rhsKeySchema=DBSchema('orderKey2', [('L_ORDERKEY', 'int')])) \
        .where(
            "O_ORDERDATE >= 19931001 and O_ORDERDATE < 19940101 and L_RETURNFLAG == 'R'")\
        .groupBy(
            groupSchema=DBSchema('groupKey', [('C_CUSTKEY','int'),('C_NAME','char(25)'),('C_ACCTBAL','float'),('C_PHONE','char(15)'),('N_NAME','char(25)'),('C_ADDRESS','char(40)'),('C_COMMENT','char(117)')]),
            aggSchema=DBSchema('groupAgg', [('revenue', 'float')]),
            groupExpr=(lambda e: (e.C_CUSTKEY, e.C_NAME, e.C_ACCTBAL, e.C_PHONE, e.N_NAME, e.N_NAME, e.C_ADDRESS, e.C_COMMENT)),
            aggExprs=[(0, lambda acc, e: acc + (e.L_EXTENDEDPRICE * (1 - e.L_DISCOUNT)), lambda x: x)],
            groupHashFn=(lambda gbVal: hash(gbVal) % 11))\
        .select(
            {'c_custkey': ('C_CUSTKEY', 'int'),
             'c_name': ('C_NAME', 'char(25)'),
             'revenue': ('revenue', 'float'),
             'c_acctbal': ('C_ACCTBAL', 'float'),
             'n_name': ('N_NAME', 'char(25)'),
             'c_address': ('C_ADDRESS', 'char(40)'),
             'c_phone': ('C_PHONE', 'char(15)'),
             'c_comment': ('C_COMMENT', 'char(117)')})\
        .finalize()
    return query
Beispiel #15
0
 def validate(self, operator, storage):
     if operator.operatorType() == "TableScan":
         return operator.schema()
     elif operator.operatorType() == "Select":
         operator.storage = storage
         return self.validate(operator.subPlan, storage)
     elif operator.operatorType() == "Project":
         operator.storage = storage
         self.validate(operator.subPlan, storage)
         return DBSchema( operator.relationId(), \
                             [(k, v[1]) for (k,v) in operator.projectExprs.items()])
     elif operator.operatorType() == "GroupBy":
         self.validate(operator.subPlan, storage)
         operator.storage = storage
         return operator.schema()
     elif operator.operatorType() == "UnionAll":
         operator.storage = storage
         return self.validate(operator.subPlan, storage)
     else:
         operator.lhsSchema = self.validate(operator.lhsPlan, storage)
         operator.rhsSchema = self.validate(operator.rhsPlan, storage)
         operator.initializeSchema()
         operator.storage = storage
         return operator.schema()
Beispiel #16
0
def tests(printOutput):
	db = setup()
	'''select
			sum(l_extendedprice * l_discount) as revenue
		from
			lineitem
		where
			l_shipdate >= 19940101
			and l_shipdate < 19950101
			and l_discount between 0.06 - 0.01 and 0.06 + 0.01
			and l_quantity < 24'''
	query1 = db.query().fromTable('lineitem') \
				.where('L_SHIPDATE >= 19940101') \
		.where('L_SHIPDATE < 19950101') \
		.where('L_DISCOUNT > 0.06 - 0.01') \
		.where('L_DISCOUNT < 0.06 + 0.01') \
		.where('L_QUANTITY < 24') \
		.groupBy(groupSchema=DBSchema('REV', [('dum', 'int')]),
								 aggSchema=DBSchema('SUM', [('SUM', 'double')]),
								 groupExpr=(lambda e: (1)),
								 aggExprs=[(0, lambda acc, e: acc + e.L_EXTENDEDPRICE * e.L_DISCOUNT, lambda x: x)],
								 groupHashFn=(lambda gbVal: hash(gbVal[0]) % 111)) \
		.select({'REVENUE': ('SUM', 'double')}) \
		.finalize()
		
	'''select
				sum(l_extendedprice * (1 - l_discount)) as promo_revenue
	from
				lineitem,
				part
	where
				l_partkey = p_partkey
				and l_shipdate >= 19950901
				and l_shipdate < 19951001'''
	query2 = db.query().fromTable('lineitem') \
		.join(db.query().fromTable('part'),
							rhsSchema=db.relationSchema('part'),
							method='block-nested-loops',
							expr='L_PARTKEY == P_PARTKEY') \
		.where('L_SHIPDATE >= 19950901') \
		.where('L_SHIPDATE < 19951001') \
		.groupBy(groupSchema=DBSchema('REV', [('dum', 'int')]),
								 aggSchema=DBSchema('SUM', [('SUM', 'double')]),
								 groupExpr=(lambda e: (1)),
								 aggExprs=[(0, lambda acc, e: acc + e.L_EXTENDEDPRICE * (1 - e.L_DISCOUNT), lambda x: x)],
								 groupHashFn=(lambda gbVal: hash(gbVal[0]) % 111)) \
		.select({'PROMO_REVENUE': ('SUM', 'double')}) \
		.finalize()
		
	'''select
				l_orderkey,
				sum(l_extendedprice * (1 - l_discount)) as revenue,
				o_orderdate,
				o_shippriority
	from
				customer,
				orders,
				lineitem
	where
				c_mktsegment = 'BUILDING'
				and c_custkey = o_custkey
				and l_orderkey = o_orderkey
				and o_orderdate < 19950315
				and l_shipdate > 19950315
	group by
		l_orderkey,
				o_orderdate,
				o_shippriority'''
	query3 = db.query().fromTable('customer') \
		.join(db.query().fromTable('orders'),
							rhsSchema=db.relationSchema('orders'),
							method='block-nested-loops',
							expr='C_CUSTKEY == O_CUSTKEY') \
		.join(db.query().fromTable('lineitem'),
							rhsSchema=db.relationSchema('lineitem'),
							method='block-nested-loops',
							expr='L_ORDERKEY == O_ORDERKEY') \
		.where('L_SHIPDATE == \'BUILDING\'') \
		.where('O_ORDERDATE < 19950315') \
		.where('L_SHIPDATE > 19950315') \
		.groupBy(groupSchema=DBSchema('FIRST', [('L_ORDERKEY', 'int'), ('O_ORDERDATE', 'char(10)'), ('O_SHIPPRIORITY', 'int')]),
								 aggSchema=DBSchema('SUM', [('SUM', 'double')]),
								 groupExpr=(lambda e: (e.L_ORDERKEY, e.O_ORDERDATE, e.O_SHIPPRIORITY)),
								 aggExprs=[(0, lambda acc, e: acc + e.L_EXTENDEDPRICE * (1 - e.L_DISCOUNT), lambda x: x)],
								 groupHashFn=(lambda gbVal: hash(gbVal[0]) % 111)) \
		.select({'L_ORDERKEY': ('L_ORDERKEY', 'int'), 'REVENUE': ('SUM', 'double'),'O_ORDERDATE': ('O_ORDERDATE', 'char(10)'),'O_SHIPPRIORITY': ('O_SHIPPRIORITY', 'int')}) \
		.finalize()
		
	'''select
				c_custkey,
				c_name,
				sum(l_extendedprice * (1 - l_discount)) as revenue,
				c_acctbal,
				n_name,
				c_address,
				c_phone,
				c_comment
	from
				customer,
				orders,
				lineitem,
				nation
	where
				c_custkey = o_custkey
				and l_orderkey = o_orderkey
				and o_orderdate >= 19931001
				and o_orderdate < 19940101
				and l_returnflag = 'R'
				and c_nationkey = n_nationkey
	group by
				c_custkey,
				c_name,
				c_acctbal,
				c_phone,
				n_name,
				c_address,
				c_comment'''
	query4 = db.query().fromTable('customer') \
		.join(db.query().fromTable('orders'),
							rhsSchema=db.relationSchema('orders'),
							method='block-nested-loops',
							expr='C_CUSTKEY == O_CUSTKEY') \
		.join(db.query().fromTable('lineitem'),
							rhsSchema=db.relationSchema('lineitem'),
							method='block-nested-loops',
							expr='L_ORDERKEY == O_ORDERKEY') \
		.where('L_SHIPDATE == \'BUILDING\'') \
		.where('O_ORDERDATE >= 19931001') \
		.where('O_ORDERDATE < 19940101') \
		.where('L_RETURNFLAG == \'R\'') \
		.join(db.query().fromTable('nation'),
							rhsSchema=db.relationSchema('nation'),
							method='block-nested-loops',
							expr='C_NATIONKEY == N_NATIONKEY') \
		.groupBy(groupSchema=DBSchema('FIRST', [('C_CUSTKEY', 'int'), ('C_NAME', 'char(25)'), ('C_ACCTBAL', 'double'), ('C_PHONE', 'char(15)'), ('N_NAME', 'char(25)'),('C_ADDRESS', 'char(40)'),('C_COMMENT', 'char(117)')]),
								 aggSchema=DBSchema('SUM', [('SUM', 'double')]),
								 groupExpr=(lambda e: (e.C_CUSTKEY, e.C_NAME, e.C_ACCTBAL, e.C_PHONE, e.N_NAME, e.C_ADDRESS, e.C_PHONE, e.C_COMMENT)),
								 aggExprs=[(0, lambda acc, e: acc + e.L_EXTENDEDPRICE * (1 - e.L_DISCOUNT), lambda x: x)],
								 groupHashFn=(lambda gbVal: hash(gbVal[0]) % 111)) \
		.select({'C_CUSTKEY' : ('C_CUSTKEY', 'int'), 'C_NAME' : ('C_NAME', 'char(25)'),'SUM' : ('SUM', 'double'), 'C_ACCTBAL' : ('C_ACCTBAL', 'double'), 'C_PHONE' : ('C_PHONE', 'char(15)'), 'N_NAME' : ('N_NAME', 'char(25)'),'C_ADDRESS' : ('C_ADDRESS', 'char(40)'),'C_COMMENT' : ('C_COMMENT', 'char(117)')}) \
		.finalize()
		
	'''select
				n_name,
				sum(l_extendedprice * (1 - l_discount)) as revenue
	from
				customer,
				orders,
				lineitem,
				supplier,
				nation,
				region
	where
				c_custkey = o_custkey
				and l_orderkey = o_orderkey
				and l_suppkey = s_suppkey
				and c_nationkey = s_nationkey
				and s_nationkey = n_nationkey
				and n_regionkey = r_regionkey
				and r_name = 'ASIA'
				and o_orderdate >= 19940101
				and o_orderdate < 19950101
	group by
				n_name'''
	query5 = db.query().fromTable('customer') \
		.join(db.query().fromTable('orders'),
							rhsSchema=db.relationSchema('orders'),
							method='block-nested-loops',
							expr='C_CUSTKEY == O_CUSTKEY') \
		.join(db.query().fromTable('lineitem'),
							rhsSchema=db.relationSchema('lineitem'),
							method='block-nested-loops',
							expr='L_ORDERKEY == O_ORDERKEY') \
		.join(db.query().fromTable('supplier'),
							rhsSchema=db.relationSchema('supplier'),
							method='block-nested-loops',
							expr='L_SUPPKEY == S_SUPPKEY and C_NATIONKEY == S_NATIONKEY') \
		.join(db.query().fromTable('nation'),
							rhsSchema=db.relationSchema('nation'),
							method='block-nested-loops',
							expr='S_NATIONKEY == N_NATIONKEY') \
		.join(db.query().fromTable('region'),
							rhsSchema=db.relationSchema('region'),
							method='block-nested-loops',
							expr='N_REGIONKEY == R_REGIONKEY') \
		.where('L_SHIPDATE == \'BUILDING\'') \
		.where('R_NAME == \'ASIA\'') \
		.where('O_ORDERDATE >= 19940101') \
		.where('O_ORDERDATE < 19950101') \
		.groupBy(groupSchema=DBSchema('FIRST', [('N_NAME', 'char(25)')]),
								 aggSchema=DBSchema('SUM', [('SUM', 'double')]),
								 groupExpr=(lambda e: (e.L_ORDERKEY, e.O_ORDERDATE, e.O_SHIPPRIORITY)),
								 aggExprs=[(0, lambda acc, e: acc + e.L_EXTENDEDPRICE * (1 - e.L_DISCOUNT), lambda x: x)],
								 groupHashFn=(lambda gbVal: hash(gbVal[0]) % 111)) \
		.select({'N_NAME': ('N_NAME', 'char(25)'), 'SUM' : ('SUM', 'double')}) \
		.finalize()
	

	queries = [query1, query2, query3, query4, query5]
	names = ['query1', 'query2', 'query3', 'query4', 'query5']
	opt = Optimizer.Optimizer(db)
	for i,q in enumerate(queries):
		querytest(db, printOutput, q, names[i], opt)
Beispiel #17
0
    def initializeSchemas(self):
        tpchNamesAndFields = [
            ('part', [('P_PARTKEY', 'int'), ('P_NAME', 'char(55)'),
                      ('P_MFGR', 'char(25)'), ('P_BRAND', 'char(10)'),
                      ('P_TYPE', 'char(25)'), ('P_SIZE', 'int'),
                      ('P_CONTAINER', 'char(10)'), ('P_RETAILPRICE', 'double'),
                      ('P_COMMENT', 'char(23)')], "issssisds"),
            ('supplier', [('S_SUPPKEY', 'int'), ('S_NAME', 'char(25)'),
                          ('S_ADDRESS', 'char(40)'), ('S_NATIONKEY', 'int'),
                          ('S_PHONE', 'char(15)'), ('S_ACCTBAL', 'double'),
                          ('S_COMMENT', 'char(101)')], "issisds"),
            ('partsupp', [('PS_PARTKEY', 'int'), ('PS_SUPPKEY', 'int'),
                          ('PS_AVAILQTY', 'int'), ('PS_SUPPLYCOST', 'double'),
                          ('PS_COMMENT', 'char(199)')], "iiids"),
            ('customer', [('C_CUSTKEY', 'int'), ('C_NAME', 'char(25)'),
                          ('C_ADDRESS', 'char(40)'), ('C_NATIONKEY', 'int'),
                          ('C_PHONE', 'char(15)'), ('C_ACCTBAL', 'double'),
                          ('C_MKTSEGMENT', 'char(10)'),
                          ('C_COMMENT', 'char(117)')], "issisdss"),
            (
                'orders',
                [
                    ('O_ORDERKEY', 'int'),
                    ('O_CUSTKEY', 'int'),
                    ('O_ORDERSTATUS', 'char(1)'),
                    ('O_TOTALPRICE', 'double'),
                    ('O_ORDERDATE', 'int'),  # date
                    ('O_ORDERPRIORITY', 'char(15)'),
                    ('O_CLERK', 'char(15)'),
                    ('O_SHIPPRIORITY', 'int'),
                    ('O_COMMENT', 'char(79)')
                ],
                "iisdtssis"),
            (
                'lineitem',
                [
                    ('L_ORDERKEY', 'int'),
                    ('L_PARTKEY', 'int'),
                    ('L_SUPPKEY', 'int'),
                    ('L_LINENUMBER', 'int'),
                    ('L_QUANTITY', 'double'),
                    ('L_EXTENDEDPRICE', 'double'),
                    ('L_DISCOUNT', 'double'),
                    ('L_TAX', 'double'),
                    ('L_RETURNFLAG', 'char(1)'),
                    ('L_LINESTATUS', 'char(1)'),
                    ('L_SHIPDATE', 'int'),  # date
                    ('L_COMMITDATE', 'int'),  # date
                    ('L_RECEIPTDATE', 'int'),  # date
                    ('L_SHIPINSTRUCT', 'char(25)'),
                    ('L_SHIPMODE', 'char(10)'),
                    ('L_COMMENT', 'char(44)')
                ],
                "iiiiddddsstttsss"),
            ('nation', [('N_NATIONKEY', 'int'), ('N_NAME', 'char(25)'),
                        ('N_REGIONKEY', 'int'),
                        ('N_COMMENT', 'char(152)')], "isis"),
            ('region', [('R_REGIONKEY', 'int'), ('R_NAME', 'char(25)'),
                        ('R_COMMENT', 'char(152)')], "iss")
        ]

        self.schemas = dict(
            map(lambda x: (x[0], DBSchema(x[0], x[1])), tpchNamesAndFields))
        self.parsers = dict(
            map(lambda x: (x[0], self.buildParser(x[2])), tpchNamesAndFields))
Beispiel #18
0
 def initializeSchema(self):
     schema = self.operatorType() + str(self.id())
     fields = self.groupSchema.schema() + self.aggSchema.schema()
     self.outputSchema = DBSchema(schema, fields)
Beispiel #19
0
    def checkJoins(self, joins, allAttributes, cachedLHS, cachedRHS):
        for join in joins:
            if join.joinMethod == 'hash':
                lhsCheck = join.lhsKeySchema.fields  # This is a list
                rhsCheck = join.rhsKeySchema.fields  # This is a list
                joinAttr = lhsCheck + rhsCheck

                contains = all(x in allAttributes for x in joinAttr) and any(
                    y in cachedRHS.schema().fields for y in joinAttr)

                # print(lhsCheck, rhsCheck, contains)
                if contains:
                    keySchema1 = DBSchema('keyschema1', [(lhsCheck[0], 'int')])
                    keySchema2 = DBSchema('keyschema2', [(rhsCheck[0], 'int')])
                    hashFn1 = 'hash(' + lhsCheck[0] + ') % 7'
                    hashFn2 = 'hash(' + rhsCheck[0] + ') % 7'
                    joinExpr = lhsCheck[0] + ' == ' + rhsCheck[0]

                    if lhsCheck[0] in cachedLHS.schema().fields:
                        return True, {
                            'joinMethod': 'hash',
                            'lhsHashFn': hashFn1,
                            'lhsKeySchema': keySchema1,
                            'rhsHashFn': hashFn2,
                            'rhsKeySchema': keySchema2,
                            'joinExpr': joinExpr
                        }
                    else:
                        return True, {
                            'joinMethod': 'hash',
                            'lhsHashFn': hashFn2,
                            'lhsKeySchema': keySchema2,
                            'rhsHashFn': hashFn1,
                            'rhsKeySchema': keySchema1,
                            'joinExpr': joinExpr
                        }

            elif join.joinExpr:
                joinAttr = ExpressionInfo(join.joinExpr).getAttributes()
                contains = all(x in allAttributes for x in joinAttr)

                if contains:
                    joinAttr1 = joinAttr.pop()
                    joinAttr2 = joinAttr.pop()

                    try:
                        joinAttr.pop()
                    except KeyError:
                        pass
                        # print('There are only two attributes in this, as expected')

                    keySchema1 = DBSchema('keySchema1', [([joinAttr1, 'int'])])
                    keySchema2 = DBSchema('keySchema1', [([joinAttr2, 'int'])])
                    hashFn1 = 'hash(' + joinAttr1 + ') % 7'
                    hashFn2 = 'hash(' + joinAttr2 + ') % 7'
                    joinExpr = join.joinExpr

                    if joinAttr1 in cachedLHS.schema().fields:
                        return True, {
                            'joinMethod': 'hash',
                            'lhsHashFn': hashFn1,
                            'lhsKeySchema': keySchema1,
                            'rhsHashFn': hashFn2,
                            'rhsKeySchema': keySchema2,
                            'joinExpr': joinExpr
                        }
                    else:
                        return True, {
                            'joinMethod': 'hash',
                            'lhsHashFn': hashFn2,
                            'lhsKeySchema': keySchema2,
                            'rhsHashFn': hashFn1,
                            'rhsKeySchema': keySchema1,
                            'joinExpr': joinExpr
                        }
        else:
            return False, None
Beispiel #20
0
 def initializeSchema(self):
     schema = self.operatorType() + str(self.id())
     fields = self.lhsSchema.schema() + self.rhsSchema.schema()
     self.joinSchema = DBSchema(schema, fields)
Beispiel #21
0
from
        customer,
        orders,
        lineitem
where
        c_mktsegment = 'BUILDING'
        and c_custkey = o_custkey
        and l_orderkey = o_orderkey
        and o_orderdate < 19950315
        and l_shipdate > 19950315
group by
        l_orderkey,
        o_orderdate,
        o_shippriority
'''
ls1 = DBSchema('customerKey1', [('C_CUSTKEY', 'int')])
rs1 = DBSchema('customerKey2', [('O_CUSTKEY', 'int')])

ls2 = DBSchema('orderKey1', [('O_ORDERKEY', 'int')])
rs2 = DBSchema('orderkey2', [('L_ORDERKEY', 'int')])

groupKeySchema = DBSchema('groupKey', [('L_ORDERKEY', 'int'),
                                       ('O_ORDERDATE', 'int'),
                                       ('O_SHIPPRIORITY', 'int')])
groupAggSchema = DBSchema('groupAgg', [('revenue', 'float')])

query3 = db.query().fromTable('customer').join( \
            db.query().fromTable('orders'),
            method = 'hash', \
            lhsHashFn = 'hash(C_CUSTKEY) % 5', lhsKeySchema = ls1, \
            rhsHashFn = 'hash(O_CUSTKEY) % 5', rhsKeySchema = rs1).join( \
Beispiel #22
0
from Storage.BufferPool import BufferPool
from Catalog.Schema import DBSchema
from Storage.StorageEngine import StorageEngine

schemas = [
        DBSchema('part', 
           [('p_partkey', 'int'),
           ('p_name',    'char(55)'),
           ('p_mfgr',    'char(25)'),
           ('p_brand',   'char(10)'),
           ('p_type',    'char(25)'),
           ('p_size',    'int'),
           ('p_container', 'char(10)'),
           ('p_retailprice', 'float'),
           ('p_comment', 'char(23)')])
        ,
        DBSchema('supplier',
            [('s_suppkey', 'int'),
            ('s_name', 'char(25)'),
            ('s_address', 'char(40)'),
            ('s_nationkey', 'int'),
            ('s_phone', 'char(15)'),
            ('s_acctbal', 'float'),
            ('s_comment', 'char(101)')])
        ,
        DBSchema('partsupp',
            [('ps_partkey', 'int'),
            ('ps_suppkey', 'int'),
            ('ps_availqty', 'int'),
            ('ps_supplycost', 'float'),
            ('ps_comment', 'char(199)')])
Beispiel #23
0
part = db.query().fromTable('part').select({
    'P_NAME': ('P_NAME', 'char(55)'),
    'P_PARTKEY': ('P_PARTKEY', 'int')
})
partsupp = db.query().fromTable('partsupp').where('PS_AVAILQTY == 1').select({
    'PS_PARTKEY': ('PS_PARTKEY', 'int'),
    'PS_SUPPKEY': ('PS_SUPPKEY', 'int')
})
supplier = db.query().fromTable('supplier').select({
    'S_NAME': ('S_NAME', 'char(25)'),
    'S_SUPPKEY': ('S_SUPPKEY', 'int')
})

join_ps_p = partsupp.join(\
              part, \
              rhsSchema = DBSchema('part', [('P_NAME','char(55)'), ('P_PARTKEY','int')]), \
              method = 'block-nested-loops', \
              expr = 'PS_PARTKEY == P_PARTKEY')

join_three = join_ps_p.join(\
               supplier, \
               rhsSchema = DBSchema('supplier', [('S_NAME','char(25)'), ('S_SUPPKEY', 'int')]), \
               method = 'block-nested-loops',
               expr = 'PS_SUPPKEY == S_SUPPKEY'
               ).select({'P_NAME':('P_NAME','char(55)'), 'S_NAME':('S_NAME','char(25)')})

partsupp2 = db.query().fromTable('partsupp').where('PS_SUPPLYCOST < 5').select(
    {
        'PS_PARTKEY': ('PS_PARTKEY', 'int'),
        'PS_SUPPKEY': ('PS_SUPPKEY', 'int')
    })
Beispiel #24
0
exp 1

TPC-H Query 6: a 4-chain filter and aggregate query

select
        sum(l_extendedprice * l_discount) as revenue
from
        lineitem
where
        l_shipdate >= 19940101
        and l_shipdate < 19950101
        and l_discount between 0.06 - 0.01 and 0.06 + 0.01
        and l_quantity < 24
'''

groupKeySchema = DBSchema('groupKey', [('ONE', 'int')])
groupAggSchema = DBSchema('groupBy', [('revenue', 'float')])

query1 = db.query().fromTable('lineitem').where( \
            "(L_SHIPDATE >= 19940101) and (L_SHIPDATE < 19950101) and \
            (0.06 - 0.01 <= L_DISCOUNT <= 0.06 + 0.01) and (L_QUANTITY < 24)"                                                                             ).groupBy( \
            groupSchema=groupKeySchema, \
            aggSchema=groupAggSchema, \
            groupExpr=(lambda e: 1), \
            aggExprs=[(0, lambda acc, e: acc + (e.L_EXTENDEDPRICE * e.L_DISCOUNT), lambda x: x)], \
            groupHashFn=(lambda gbVal: hash(gbVal) % 1)).select( \
            {'revenue' : ('revenue', 'float')}).finalize()
'''
exp 2

TPC-H Query 14: a 2-way join and aggregate query
Beispiel #25
0
        schema1.pack(
            schema1.instantiate(i, random.randint(0, 50),
                                'This is a testing tuple.'))
        for i in range(0, 10000)
]:
    _ = db.insertTuple(schema1.name, tup)

for tup in [
        schema2.pack(
            schema2.instantiate(i, random.randint(0, 50),
                                'This is a testing tuple.'))
        for i in range(0, 10000)
]:
    _ = db.insertTuple(schema2.name, tup)

keySchema = DBSchema('employeeKey', [('id', 'int')])
keySchema2 = DBSchema('employeeKey2', [('id2', 'int')])

query5 = db.query().fromTable('employeeeee').join( \
          db.query().fromTable('employeeeee2'), \
          rhsSchema=schema2, \
          method='hash', \
          lhsHashFn=(lambda e : e.id % 4),  lhsKeySchema=keySchema, \
          rhsHashFn=(lambda e : e.id2 % 4), rhsKeySchema=keySchema2, \
        ).finalize()

print(query5.explain())


def readResult():
    for page in db.processQuery(query5):
Beispiel #26
0
    def joinsOptimizer(self, operator, aPaths):
        defaultScaleFactor = 10
        defaultPartiNumber = 5

        n = len(aPaths)
        planList = []
        costList = []
        # i = 1
        for aPath in aPaths:
            # Here we define cost by number of pages.
            cards = Plan(root=aPath).sample(defaultScaleFactor)
            pageSize, _, _ = self.db.storage.relationStats(aPath.relationId())
            numPages = cards / (pageSize / aPath.schema().size)
            # Here we only consider reorganize joins
            # so that we simple put accessPaths' total cost as 0.
            planList.append(aPath)
            costList.append((numPages, 0))
        # i = 2...n
        for i in range(1, n):
            # find all possible two way join in current planList
            # put the potential joins in potentialP
            # put the potential joins cost in potentialC
            m = len(planList)
            potentialP = []
            potentialC = []
            for j in range(0, m - 1):
                for k in range(j + 1, m):
                    self.pcntr += 1
                    potentialP.append((planList[j], planList[k]))
                    potentialC.append(3 * (costList[j][0] + costList[k][0]) +
                                      costList[j][1] + costList[k][1])
            # find the cheapest joinable join (total cost)
            # build the join, remove the used two base plan and add the new join to planList
            # modify the costList as well
            while (potentialC):
                currC = min(potentialC)
                currP = potentialP[potentialC.index(currC)]
                potentialC.remove(currC)
                potentialP.remove(currP)
                if (self.joinable(operator, currP)):
                    (lField, rField) = self.joinable(operator, currP)
                    lhsSchema = currP[0].schema()
                    rhsSchema = currP[1].schema()
                    lKeySchema = DBSchema(
                        'left',
                        [(f, t)
                         for (f, t) in lhsSchema.schema() if f == lField])
                    rKeySchema = DBSchema(
                        'right',
                        [(f, t)
                         for (f, t) in rhsSchema.schema() if f == rField])
                    lHashFn = 'hash(' + lField + ') % ' + str(
                        defaultPartiNumber)
                    rHashFn = 'hash(' + rField + ') % ' + str(
                        defaultPartiNumber)
                    newJoin = Join(currP[0], currP[1], method='hash', \
                                   lhsHashFn=lHashFn, lhsKeySchema=lKeySchema, \
                                   rhsHashFn=rHashFn, rhsKeySchema=rKeySchema)

                    newJoin.prepare(self.db)
                    totalCost = currC
                    cards = Plan(root=newJoin).sample(defaultScaleFactor)
                    pageSize, _, _ = self.db.storage.relationStats(
                        newJoin.relationId())
                    pages = cards / (pageSize / newJoin.schema().size)

                    id1 = planList.index(currP[0])
                    _ = planList.pop(id1)
                    id2 = planList.index(currP[1])
                    _ = planList.pop(id2)
                    planList.append(newJoin)
                    _ = costList.pop(id1)
                    _ = costList.pop(id2)
                    costList.append((pages, totalCost))
                    break
        print("GreedyOptimizer plan considered: ", self.pcntr)
        return planList[0]
Beispiel #27
0
import io, math, os, os.path, pickle, struct
from struct import Struct

from Catalog.Identifiers import PageId, FileId, TupleId
from Catalog.Schema      import DBSchema
from Storage.Page        import PageHeader, Page
from Storage.SlottedPage import SlottedPageHeader, SlottedPage

from Storage.File import *

import shutil, Storage.BufferPool, Storage.FileManager

schema = DBSchema('employee', [('id', 'int'), ('age', 'int')])

bp = Storage.BufferPool.BufferPool()

fm = Storage.FileManager.FileManager(bufferPool=bp)

bp.setFileManager(fm)

fm.createRelation(schema.name, schema)

(fId, f) = fm.relationFile(schema.name)

f.numPages() == 0

pId  = PageId(fId, 0)
pId1 = PageId(fId, 1)

p    = Page(pageId=pId,  buffer=bytes(f.pageSize()), schema=schema)
p1   = Page(pageId=pId1, buffer=bytes(f.pageSize()), schema=schema)
Beispiel #28
0
import Database
from Query.Optimizer import Optimizer
from Query.BushyOptimizer import BushyOptimizer
from Catalog.Schema import DBSchema
from time import time

db = Database.Database()
deptSchema = DBSchema('department', [('d_id', 'int'), ('d_name', 'char(30)')])
emplSchema = DBSchema('employee', [('e_id', 'int'), ('e_name', 'char(30)'),
                                   ('e_projectid', 'int')])
projSchema = DBSchema('project', [('p_id', 'int'), ('p_name', 'char(30)')])
gratSchema = DBSchema('grant', [('g_id', 'int'), ('g_projectid', 'int'),
                                ('g_source', 'char(30)')])
synSchema1 = DBSchema('syn1', [('a', 'int'), ('b', 'char(30)')])
synSchema2 = DBSchema('syn2', [('c', 'int'), ('d', 'char(30)'), ('e', 'int')])

db.createRelation('department', [('d_id', 'int'), ('d_name', 'char(30)')])
db.createRelation('employee', [('e_id', 'int'), ('e_name', 'char(30)'),
                               ('e_projectid', 'int')])
db.createRelation('project', [('p_id', 'int'), ('p_name', 'char(30)')])
db.createRelation('grant', [('g_id', 'int'), ('g_projectid', 'int'),
                            ('g_source', 'char(30)')])
db.createRelation('syn1', [('a', 'int'), ('b', 'char(30)')])
db.createRelation('syn2', [('c', 'int'), ('d', 'char(30)'), ('e', 'int')])

for tup in [
        deptSchema.pack(deptSchema.instantiate(i, "Nature" + str(i)))
        for i in range(4000)
]:
    _ = db.insertTuple('department', tup)
for tup in [
Beispiel #29
0
 def __init__(self, subPlan, projectExprs, **kwargs):
     super().__init__(**kwargs)
     self.subPlan = subPlan
     self.projectExprs = projectExprs
     self.outputSchema = DBSchema(self.relationId(), \
                           [(k, v[1]) for (k,v) in self.projectExprs.items()])
Beispiel #30
0
def query5(db):
    query = db.query().fromTable('region')\
        .join(
            db.query().fromTable('nation'),
            method='hash',
            lhsHashFn='hash(R_REGIONKEY) % 11', lhsKeySchema=DBSchema('regionKey1', [('R_REGIONKEY', 'int')]),
            rhsHashFn='hash(N_REGIONKEY) % 11', rhsKeySchema=DBSchema('regionKey2', [('N_REGIONKEY', 'int')]))\
        .join(
            db.query().fromTable('supplier'),
            method='hash',
            lhsHashFn='hash(N_NATIONKEY) % 11', lhsKeySchema=DBSchema('nationKey1', [('N_NATIONKEY', 'int')]),
            rhsHashFn='hash(S_NATIONKEY) % 11', rhsKeySchema=DBSchema('nationKey2', [('S_NATIONKEY', 'int')])) \
        .join(
            db.query().fromTable('lineitem'),
            method='hash',
            lhsHashFn='hash(S_SUPPKEY) % 11', lhsKeySchema=DBSchema('suppKey1', [('S_SUPPKEY', 'int')]),
            rhsHashFn='hash(L_SUPPKEY) % 11', rhsKeySchema=DBSchema('suppKey2', [('L_SUPPKEY', 'int')])) \
        .join(
            db.query().fromTable('orders'),
            method='hash',
            lhsHashFn='hash(L_ORDERKEY) % 11', lhsKeySchema=DBSchema('orderKey1', [('L_ORDERKEY', 'int')]),
            rhsHashFn='hash(O_ORDERKEY) % 11', rhsKeySchema=DBSchema('orderKey2', [('O_ORDERKEY', 'int')])) \
        .join(
            db.query().fromTable('customer'),
            method='hash',
            lhsHashFn='hash(O_CUSTKEY) % 11', lhsKeySchema=DBSchema('custKey1', [('O_CUSTKEY', 'int')]),
            rhsHashFn='hash(C_CUSTKEY) % 11', rhsKeySchema=DBSchema('custKey2', [('C_CUSTKEY', 'int')])) \
        .where(
            "R_NAME == 'ASIA' and O_ORDERDATE >= 19940101 and O_ORDERDATE < 19950101")\
        .groupBy(
            groupSchema=DBSchema('groupKey', [('N_NAME','char(25)')]),
            aggSchema=DBSchema('groupAgg', [('revenue', 'float')]),
            groupExpr=(lambda e: e.N_NAME),
            aggExprs=[(0, lambda acc, e: acc + (e.L_EXTENDEDPRICE * (1 - e.L_DISCOUNT)), lambda x: x)],
            groupHashFn=(lambda gbVal: hash(gbVal) % 11))\
        .select(
            {'n_name': ('n_name', 'char(25)'),
             'revenue': ('revenue', 'float')})\
        .finalize()
    return query