def query19(): quantity1 = helper.rand(1, 10) quantity2 = helper.rand(10, 20) quantity3 = helper.rand(20, 30) brand1 = helper.getBrand() brand2 = helper.getBrand() brand3 = helper.getBrand() select = "SUM(l_extendedprice * (1 - l_discount) ) AS revenue" fromTbl = "lineitem, part" where1 = f"p_partkey = l_partkey AND p_brand = '{brand1}' AND p_container in ( 'SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND l_quantity >= {quantity1} AND l_quantity <= {quantity1} + 10 AND p_size between 1 and 5 AND l_shipmode in ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON'" where2 = f"p_partkey = l_partkey AND p_brand = '{brand2}' AND p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND l_quantity >= {quantity2} and l_quantity <= {quantity2} + 10 AND p_size between 1 and 10 AND l_shipmode in ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON'" where3 = f"p_partkey = l_partkey AND p_brand = '{brand3}' AND p_container in ( 'LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND l_quantity >= {quantity3} AND l_quantity <= {quantity3} + 10 AND p_size between 1 and 15 AND l_shipmode in ('AIR', 'AIR REG') AND l_shipinstruct = 'DELIVER IN PERSON'" query = f"SELECT {select} FROM {fromTbl} WHERE ({where1}) OR ({where2}) OR ({where3})" return query
def query16(): brand = helper.getBrand() type = helper.getTypeString2() size = str(helper.rand(1, 50)) for i in range(2, 9): size = size + ", " + str(helper.rand(1, 50)) select = "p_brand, p_type, p_size, COUNT(distinct ps_suppkey) AS supplier_cnt" fromTbl = "partsupp, part" subQuery = "SELECT s_suppkey FROM supplier WHERE s_comment LIKE '%Customer%Complaints%'" where = f"p_partkey = ps_partkey AND p_brand <> '[BRAND]' AND p_type not like '{type}%' AND p_size in ({size}) AND ps_suppkey not in ({subQuery})" group = "p_brand, p_type, p_size" order = "supplier_cnt DESC, p_brand, p_type, p_size" query = f"SELECT {select} FROM {fromTbl} WHERE {where} GROUP BY {group} ORDER BY {order}" return query
def query14(): randDate = date(helper.rand(1993, 1997), helper.rand(1, 12), 1) if db == "duck": addDays = str(helper.monthsToDays(randDate, 1)) elif db == "monet": addDays = "interval '1' month" if db == "sqlite" or db == "pandas": dateIdentifier = "" secondDate = f"date('{randDate}', '+1 month')" else: dateIdentifier = "DATE " secondDate = f"DATE '{randDate}' + {addDays}" select = "100.00 * SUM(CASE WHEN p_type like 'PROMO%' THEN l_extendedprice*(1-l_discount) ELSE 0 END) / SUM(l_extendedprice * (1 - l_discount)) AS promo_revenue" fromTbl = "lineitem, part" where = f"l_partkey = p_partkey AND l_shipdate >= {dateIdentifier}'{randDate}' AND l_shipdate < {secondDate}" query = f"SELECT {select} FROM {fromTbl} WHERE {where}" return query
def query18(): quantity = helper.rand(312, 315) #Does not exists for scale factor of 0.1 select = "c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, SUM(l_quantity)" fromTbl = "customer, orders, lineitem" subQuery = f"SELECT l_orderkey FROM lineitem GROUP BY l_orderkey HAVING SUM(l_quantity) > {quantity}" where = f"o_orderkey in ({subQuery}) AND c_custkey = o_custkey AND o_orderkey = l_orderkey" group = "c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice" order = "o_totalprice desc, o_orderdate" query = f"SELECT {select} FROM {fromTbl} WHERE {where} GROUP BY {group} ORDER BY {order}" return query
def query6(): randDate = date(helper.rand(1993, 1997), 1, 1) discount = str(random.uniform(0.02, 0.09)) quantity = str(helper.rand(24, 25)) if db == "duck": addDays = str(helper.yearsToDays(randDate, 1)) elif db == "monet": addDays = "interval '1' year" if db == "sqlite" or db == "pandas": dateIdentifier = "" secondDate = f"date('{randDate}', '+1 year')" else: dateIdentifier = "DATE " secondDate = f"DATE '{randDate}' + {addDays}" select = "sum(l_extendedprice * l_discount) as revenue" fromTbl = "lineitem" where = f"l_shipdate >= {dateIdentifier}'{randDate}' AND l_shipdate < {secondDate} AND l_discount between {discount} - 0.01 AND {discount} + 0.01 AND l_quantity < {quantity}" query = f"SELECT {select} FROM {fromTbl} WHERE {where}" return query
def query2(): region = helper.getRName() randType = helper.getType3() size = helper.rand(1, 50) subQuery = f"SELECT min(ps_supplycost) FROM partsupp, supplier, nation, region WHERE p_partkey = ps_partkey AND s_suppkey = ps_suppkey AND s_nationkey = n_nationkey AND n_regionkey = r_regionkey AND r_name = '{region}'" select = "s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment" fromTbl = "part, supplier, partsupp, nation, region" where = f"p_partkey = ps_partkey AND s_suppkey = ps_suppkey AND p_size = {str(size)} AND p_type like '%{randType}' AND s_nationkey = n_nationkey AND n_regionkey = r_regionkey AND r_name = '{region}' AND ps_supplycost = ({subQuery})" order = "s_acctbal desc, n_name, s_name, p_partkey" limit = " LIMIT 100" query = f"SELECT {select} FROM {fromTbl} WHERE {where} ORDER BY {order} {limit};" return query
def query1(): delta = helper.rand(60, 120) if db == "sqlite" or db == "pandas": date = f"date('1998-12-01', '-{delta} day')" else: date = f"DATE '1998-12-01' - {str(delta)} " select = "l_returnflag, l_linestatus, sum(l_quantity) as sum_qty, sum(l_extendedprice) as sum_base_price, sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, sum(l_extendedprice * ( 1 - l_discount) * ( 1 + l_tax)) as sum_charge, avg(l_quantity) as avg_qty, avg(l_extendedprice) as avg_price, avg(l_discount) as avg_disc, count(*) as count_order" fromTbl = "lineitem" where = f"l_shipdate <= {date}" group = "l_returnflag, l_linestatus" order = "l_returnflag, l_linestatus" query = f"SELECT {select} FROM {fromTbl} WHERE {where} GROUP BY {group} ORDER BY {order}" return query
def query1(): delta = helper.rand(60, 120) inputDate = datetime(1998, 12, 1) - timedelta(days=delta) if db == "sqlite" or db == "pandas": dateIdentifier = "" else: dateIdentifier = "DATE " select = "l_returnflag, l_linestatus, sum(l_quantity) as sum_qty, sum(l_extendedprice) as sum_base_price, sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, sum(l_extendedprice * ( 1 - l_discount) * ( 1 + l_tax)) as sum_charge, avg(l_quantity) as avg_qty, avg(l_extendedprice) as avg_price, avg(l_discount) as avg_disc, count(*) as count_order" fromTbl = "lineitem" where = f"l_shipdate <= {dateIdentifier}'{inputDate}'" group = "l_returnflag, l_linestatus" order = "l_returnflag, l_linestatus" query = f"SELECT {select} FROM {fromTbl} WHERE {where} GROUP BY {group} ORDER BY {order}" return query
def query12(): (shipmode1, shipmode2) = helper.getModes() randDate = date(helper.rand(1993, 1997), 1, 1) if db == "duck": addDays = str(helper.yearsToDays(randDate, 1)) elif db == "monet": addDays = "interval '1' year" if db == "sqlite" or db == "pandas": dateIdentifier = "" secondDate = f"date('{randDate}', '+1 year')" else: dateIdentifier = "DATE " secondDate = f"DATE '{randDate}' + {addDays}" select = "l_shipmode, SUM(CASE WHEN o_orderpriority ='1-URGENT' OR o_orderpriority ='2-HIGH' THEN 1 ELSE 0 END) AS high_line_count, SUM(CASE WHEN o_orderpriority <> '1-URGENT' AND o_orderpriority <> '2-HIGH' THEN 1 ELSE 0 END) AS low_line_count" fromTbl = "orders, lineitem" where = f"o_orderkey = l_orderkey AND l_shipmode in ('{shipmode1}', '{shipmode2}') AND l_commitdate < l_receiptdate AND l_shipdate < l_commitdate AND l_receiptdate >= {dateIdentifier}'{randDate}' AND l_receiptdate < {secondDate}" group = "l_shipmode" order = "l_shipmode" query = f"SELECT {select} FROM {fromTbl} WHERE {where} GROUP BY {group} ORDER BY {order}" return query
def query5(): region = helper.getRName() randDate = date(helper.rand(1993, 1997), 1, 1) if db == "duck": addDays = str(helper.yearsToDays(randDate, 1)) elif db == "monet": addDays = "interval '1' year" if db == "sqlite" or db == "pandas": dateIdentifier = "" secondDate = f"date('{randDate}', '+1 year')" else: dateIdentifier = "DATE " secondDate = f"DATE '{randDate}' + {addDays}" select = "n_name, sum(l_extendedprice * (1 - l_discount)) as revenue" fromTbl = "customer, orders, lineitem, supplier, nation, region" where = f"c_custkey = o_custkey AND l_orderkey = o_orderkey AND l_suppkey = s_suppkey AND c_nationkey = s_nationkey AND s_nationkey = n_nationkey AND n_regionkey = r_regionkey AND r_name = '{region}' AND o_orderdate >= {dateIdentifier}'{randDate}' AND o_orderdate < {secondDate}" group = "n_name" order = "revenue desc" query = f"SELECT {select} FROM {fromTbl} WHERE {where} GROUP BY {group} ORDER BY {order}" return query
def query20(): color = helper.getColor() randDate = date(helper.rand(1993, 1997), 1, 1) (nation, tmp) = helper.getNNames() if db == "duck": addDays = str(helper.yearsToDays(randDate, 1)) elif db == "monet": addDays = "interval '1' year" if db == "sqlite" or db == "pandas": dateIdentifier = "" secondDate = f"date('{randDate}', '+1 year')" else: dateIdentifier = "DATE " secondDate = f"DATE '{randDate}' + {addDays}" select = "s_name, s_address" fromTbl = "supplier, nation" subQuery = f"SELECT ps_suppkey FROM partsupp WHERE ps_partkey in (SELECT p_partkey FROM part WHERE p_name LIKE '{color}%') AND ps_availqty > (SELECT 0.5 * SUM(l_quantity) FROM lineitem WHERE l_partkey = ps_partkey AND l_suppkey = ps_suppkey AND l_shipdate >= {dateIdentifier}'{randDate}' AND l_shipdate < {secondDate})" where = f"s_suppkey IN ({subQuery}) AND s_nationkey = n_nationkey AND n_name = '{nation}'" order = "s_name" query = f"SELECT {select} FROM {fromTbl} WHERE {where} ORDER BY {order}" return query