def test_simple_model(self): sqlExecutor = SqlExecutor() sqlExecutor.execute("set n_epoch=10") sqlExecutor.execute("set reg_type='mdn'") sqlExecutor.execute("set density_type='mdn'") sqlExecutor.execute("set b_grid_search='False'") sqlExecutor.execute("set csv_split_char='|'") sqlExecutor.execute( "set table_header=" + "'ss_sold_date_sk|ss_sold_time_sk|ss_item_sk|ss_customer_sk|ss_cdemo_sk|ss_hdemo_sk|" + "ss_addr_sk|ss_store_sk|ss_promo_sk|ss_ticket_number|ss_quantity|ss_wholesale_cost|" + "ss_list_price|ss_sales_price|ss_ext_discount_amt|ss_ext_sales_price|" + "ss_ext_wholesale_cost|ss_ext_list_price|ss_ext_tax|ss_coupon_amt|ss_net_paid|" + "ss_net_paid_inc_tax|ss_net_profit|none'") sqlExecutor.execute( "create table test_ss40g_sm(ss_sales_price real, ss_sold_date_sk real) from '/data/tpcds/40G/ss_600k.csv' method uniform size 600 scale data num_of_points2.csv" ) predictions = sqlExecutor.execute( "select avg(ss_sales_price) from test_ss40g_sm where 2451119 <=ss_sold_date_sk<= 2451483 " ) sqlExecutor.execute("drop table test_ss40g_sm") self.assertTrue(predictions)
def test_categorical_one_model(self): sqlExecutor = SqlExecutor() sqlExecutor.execute("set b_grid_search='False'") sqlExecutor.execute("set csv_split_char='|'") sqlExecutor.execute("set one_model='true'") sqlExecutor.execute( "set table_header=" + "'ss_sold_date_sk|ss_sold_time_sk|ss_item_sk|ss_customer_sk|ss_cdemo_sk|ss_hdemo_sk|" + "ss_addr_sk|ss_store_sk|ss_promo_sk|ss_ticket_number|ss_quantity|ss_wholesale_cost|" + "ss_list_price|ss_sales_price|ss_ext_discount_amt|ss_ext_sales_price|" + "ss_ext_wholesale_cost|ss_ext_list_price|ss_ext_tax|ss_coupon_amt|ss_net_paid|" + "ss_net_paid_inc_tax|ss_net_profit|none'") sqlExecutor.execute( "create table test_ss40g_categorical_one_model(ss_sales_price real, ss_sold_date_sk real, ss_coupon_amt categorical, ) from '/data/tpcds/40G/ss_600k.csv' GROUP BY ss_store_sk method uniform size 600" ) # ss_ext_discount_amt categorical predictions = sqlExecutor.execute( "select count(ss_sales_price) from test_ss40g_categorical_one_model where 2451119 <=ss_sold_date_sk<= 2451483 and ss_coupon_amt='0.00' group by ss_store_sk" ) # and ss_ext_discount_amt='0.00' # sqlExecutor.execute("drop table test_ss40g_categorical_one_model") self.assertTrue(predictions)
def test_drop_clause(self): sqlExecutor = SqlExecutor() status = sqlExecutor.execute("drop table model2drop") self.assertFalse(status)
def test_embedding(self): sqlExecutor = SqlExecutor() sqlExecutor.execute("set b_grid_search='False'") sqlExecutor.execute("set csv_split_char='|'") sqlExecutor.execute("set encoder='embedding'") sqlExecutor.execute( "set table_header=" + "'ss_sold_date_sk|ss_sold_time_sk|ss_item_sk|ss_customer_sk|ss_cdemo_sk|ss_hdemo_sk|" + "ss_addr_sk|ss_store_sk|ss_promo_sk|ss_ticket_number|ss_quantity|ss_wholesale_cost|" + "ss_list_price|ss_sales_price|ss_ext_discount_amt|ss_ext_sales_price|" + "ss_ext_wholesale_cost|ss_ext_list_price|ss_ext_tax|ss_coupon_amt|ss_net_paid|" + "ss_net_paid_inc_tax|ss_net_profit|none'") sqlExecutor.execute( "create table test_ss40g_embedding(ss_sales_price real, ss_sold_date_sk real) from '../data/tpcds/10g/ss_10g_520k.csv' GROUP BY ss_store_sk method uniform size 600" ) predictions = sqlExecutor.execute( "select avg(ss_sales_price) from test_ss40g_embedding where 2451119 <=ss_sold_date_sk<= 2451483 group by ss_store_sk" ) sqlExecutor.execute("drop table test_ss40g_embedding") self.assertTrue(predictions)
class Query1: def __init__(self): self.mdl_name = None self.sql_executor = None def build_model(self, mdl_name: str = "ss_10g", encoder='binary'): self.mdl_name = mdl_name self.sql_executor = SqlExecutor() self.sql_executor.execute("set v='True'") # self.sql_executor.execute("set device='cpu'") self.sql_executor.execute("set b_grid_search='false'") self.sql_executor.execute("set b_print_to_screen='false'") self.sql_executor.execute("set csv_split_char='|'") self.sql_executor.execute("set batch_size=1000") self.sql_executor.execute( "set table_header=" + "'ss_sold_date_sk|ss_sold_time_sk|ss_item_sk|ss_customer_sk|ss_cdemo_sk|ss_hdemo_sk|" + "ss_addr_sk|ss_store_sk|ss_promo_sk|ss_ticket_number|ss_quantity|ss_wholesale_cost|" + "ss_list_price|ss_sales_price|ss_ext_discount_amt|ss_ext_sales_price|" + "ss_ext_wholesale_cost|ss_ext_list_price|ss_ext_tax|ss_coupon_amt|ss_net_paid|" + "ss_net_paid_inc_tax|ss_net_profit|none'") # sql_executor.execute("set table_header=" + # "'ss_sold_date_sk|ss_store_sk|ss_sales_price'") self.sql_executor.execute("set encoder='" + encoder + "'") self.sql_executor.execute("set n_mdn_layer_node_reg=50") # 5 self.sql_executor.execute("set n_mdn_layer_node_density=60") # 30 self.sql_executor.execute("set n_jobs=1") # 2 self.sql_executor.execute("set n_hidden_layer=2") # 1 self.sql_executor.execute("set n_epoch=20") # 20 self.sql_executor.execute("set n_gaussians_reg=4") # 3 self.sql_executor.execute("set n_gaussians_density=20") # 10 self.sql_executor.execute( "create table " + mdl_name + "(ss_sales_price real, ss_sold_date_sk real) from '../data/tpcds/10g/ss_10g_520k.csv' GROUP BY ss_store_sk method uniform size num_points/ss_10g.csv' " ) # num_of_points57.csv # self.sql_executor.execute( # "create table "+"ss10g_binary_30"+"(ss_sales_price real, ss_sold_date_sk real) from '../data/tpcds/10g/ss_10g_520k.csv' GROUP BY ss_store_sk method uniform size 'num_points/ss_10g.csv' ") # num_of_points57.csv def workload( self, mdl_name, result2file: str = '/home/u1796377/Documents/workspace/DBEstClient/experiments/results/mdn/10g/', n_jobs=1): self.sql_executor.mdl_name = mdl_name self.sql_executor.execute("set n_jobs=" + str(n_jobs) + '"') self.sql_executor.execute("set result2file='" + result2file + "sum1.txt'") self.sql_executor.execute( "select sum(ss_sales_price) from " + self.mdl_name + " where 2451119 <=ss_sold_date_sk<= 2451483 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "sum2.txt'") self.sql_executor.execute( "select sum(ss_sales_price) from " + self.mdl_name + " where 2451300 <=ss_sold_date_sk<= 2451665 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "sum3.txt'") self.sql_executor.execute( "select sum(ss_sales_price) from " + self.mdl_name + " where 2451392 <=ss_sold_date_sk<= 2451757 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "sum4.txt'") self.sql_executor.execute( "select sum(ss_sales_price) from " + self.mdl_name + " where 2451484 <=ss_sold_date_sk<= 2451849 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "sum5.txt'") self.sql_executor.execute( "select sum(ss_sales_price) from " + self.mdl_name + " where 2451545 <=ss_sold_date_sk<= 2451910 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "sum6.txt'") self.sql_executor.execute( "select sum(ss_sales_price) from " + self.mdl_name + " where 2451636 <=ss_sold_date_sk<= 2452000 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "sum7.txt'") self.sql_executor.execute( "select sum(ss_sales_price) from " + self.mdl_name + " where 2451727 <=ss_sold_date_sk<= 2452091 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "sum8.txt'") self.sql_executor.execute( "select sum(ss_sales_price) from " + self.mdl_name + " where 2451850 <=ss_sold_date_sk<= 2452214 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "sum9.txt'") self.sql_executor.execute( "select sum(ss_sales_price) from " + self.mdl_name + " where 2451911 <=ss_sold_date_sk<= 2452275 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "sum10.txt'") self.sql_executor.execute( "select sum(ss_sales_price) from " + self.mdl_name + " where 2452031 <=ss_sold_date_sk<= 2452395 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "count1.txt'") self.sql_executor.execute( "select count(ss_sales_price) from " + self.mdl_name + " where 2451119 <=ss_sold_date_sk<= 2451483 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "count2.txt'") self.sql_executor.execute( "select count(ss_sales_price) from " + self.mdl_name + " where 2451300 <=ss_sold_date_sk<= 2451665 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "count3.txt'") self.sql_executor.execute( "select count(ss_sales_price) from " + self.mdl_name + " where 2451392 <=ss_sold_date_sk<= 2451757 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "count4.txt'") self.sql_executor.execute( "select count(ss_sales_price) from " + self.mdl_name + " where 2451484 <=ss_sold_date_sk<= 2451849 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "count5.txt'") self.sql_executor.execute( "select count(ss_sales_price) from " + self.mdl_name + " where 2451545 <=ss_sold_date_sk<= 2451910 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "count6.txt'") self.sql_executor.execute( "select count(ss_sales_price) from " + self.mdl_name + " where 2451636 <=ss_sold_date_sk<= 2452000 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "count7.txt'") self.sql_executor.execute( "select count(ss_sales_price) from " + self.mdl_name + " where 2451727 <=ss_sold_date_sk<= 2452091 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "count8.txt'") self.sql_executor.execute( "select count(ss_sales_price) from " + self.mdl_name + " where 2451850 <=ss_sold_date_sk<= 2452214 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "count9.txt'") self.sql_executor.execute( "select count(ss_sales_price) from " + self.mdl_name + " where 2451911 <=ss_sold_date_sk<= 2452275 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "count10.txt'") self.sql_executor.execute( "select count(ss_sales_price) from " + self.mdl_name + " where 2452031 <=ss_sold_date_sk<= 2452395 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "avg1.txt'") self.sql_executor.execute( "select avg(ss_sales_price) from " + self.mdl_name + " where 2451119 <=ss_sold_date_sk<= 2451483 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "avg2.txt'") self.sql_executor.execute( "select avg(ss_sales_price) from " + self.mdl_name + " where 2451300 <=ss_sold_date_sk<= 2451665 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "avg3.txt'") self.sql_executor.execute( "select avg(ss_sales_price) from " + self.mdl_name + " where 2451392 <=ss_sold_date_sk<= 2451757 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "avg4.txt'") self.sql_executor.execute( "select avg(ss_sales_price) from " + self.mdl_name + " where 2451484 <=ss_sold_date_sk<= 2451849 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "avg5.txt'") self.sql_executor.execute( "select avg(ss_sales_price) from " + self.mdl_name + " where 2451545 <=ss_sold_date_sk<= 2451910 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "avg6.txt'") self.sql_executor.execute( "select avg(ss_sales_price) from " + self.mdl_name + " where 2451636 <=ss_sold_date_sk<= 2452000 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "avg7.txt'") self.sql_executor.execute( "select avg(ss_sales_price) from " + self.mdl_name + " where 2451727 <=ss_sold_date_sk<= 2452091 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "avg8.txt'") self.sql_executor.execute( "select avg(ss_sales_price) from " + self.mdl_name + " where 2451850 <=ss_sold_date_sk<= 2452214 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "avg9.txt'") self.sql_executor.execute( "select avg(ss_sales_price) from " + self.mdl_name + " where 2451911 <=ss_sold_date_sk<= 2452275 group by ss_store_sk", ) self.sql_executor.execute("set result2file='" + result2file + "avg10.txt'") # self.sql_executor.execute( # "set result2file='None'") self.sql_executor.execute( "select avg(ss_sales_price) from " + self.mdl_name + " where 2452031 <=ss_sold_date_sk<= 2452395 group by ss_store_sk", )
class DBEstPrompt(Cmd): def __init__(self): super(DBEstPrompt, self).__init__() self.prompt = 'dbestclient> ' self.intro = "Welcome to DBEst: a model-based AQP engine! Type exit to exit!" self.query = "" # # deal with configuration file # if os.path.exists('config.json'): # print("Configuration file loaded.") # self.config = json.load(open('config.json')) # else: # print("Configuration file config.json does not exist! use default values") # self.config = config # json.dump(self.config, open('config.json', 'w')) # self.verbose = self.config['verbose'] # self.b_show_latency = self.config['b_show_latency'] # deal with warehouse if os.path.exists("dbestwarehouse"): print("warehouse is initialized.") else: print("warehouse does not exists, so initialize one.") os.mkdir("dbestwarehouse") self.sqlExecutor = SqlExecutor() # print the exit message. def do_exit(self, inp): '''exit the application.''' print("DBEst closed successfully.") return True # process the query def default(self, inp): if ";" not in inp: self.query = self.query + inp + " " else: self.query += inp.split(";")[0] # if self.config['verbose']: # print("Executing query >>> " + self.query + "...") # query execution goes here # -------------------------------------------->> # check if query begins with 'bypass', if so use backend server, otherwise use dbest to give a prediction if self.query.lstrip()[0:6].lower() == 'bypass': print("Bypass DBEst, use the backend server instead.") # go to the backend server else: # sqlExecutor = SqlExecutor(config) # print(self.query) # self.query.replace(";",'') self.sqlExecutor.execute(self.query) # self.sqlExecutor.execute("create table mdl1(pm25 real, PRES real) from pm25.csv method uniform size 100") # self.sqlExecutor.execute("select count(pm25 real) from mdl1 where PRES between 1000 and 1020") # sqlExecutor.execute("select sum(pm25 real) from mdl where PRES between 1000 and 1020") # sqlExecutor.execute("select avg(pm25 real) from mdl where PRES between 1000 and 1020") # self.sqlExecutor.execute("create table ss(ss_list_price real, ss_wholesale_cost real) from store_sales.dat method uniform size 10000 group by ss_store_sk") # self.sqlExecutor.execute("select count(ss_list_price) from ss where ss_wholesale_cost between 1 and 100 group by ss_store_sk") # <<-------------------------------------------- # restore the query for the next coming query self.query = "" # deal with KeyboardInterrupt caused by ctrl+c def cmdloop(self, intro=None): print(self.intro) while True: try: super(DBEstPrompt, self).cmdloop(intro="") break except KeyboardInterrupt: # self.do_exit("") print("DBEst closed successfully.") return True do_EOF = do_exit
class Query1: def __init__(self): self.mdl_name = None self.sql_executor = None self.model_size = "1m" def build_model(self, mdl_name: str = "flight_1m", encoder='binary'): self.mdl_name = mdl_name self.sql_executor = SqlExecutor() self.sql_executor.execute("set v='True'") # self.sql_executor.execute("set device='cpu'") self.sql_executor.execute("set b_grid_search='false'") self.sql_executor.execute("set b_print_to_screen='false'") self.sql_executor.execute("set csv_split_char=','") self.sql_executor.execute("set batch_size=1000") self.sql_executor.execute("set table_header=" + "'year_date,unique_carrier,origin,origin_state_abr,dest,dest_state_abr,dep_delay,taxi_out,taxi_in,arr_delay,air_time,distance'") self.sql_executor.execute("set encoder='"+ encoder +"'") if self.model_size =="1m": self.sql_executor.execute("set n_mdn_layer_node_reg=10") # 20 self.sql_executor.execute("set n_mdn_layer_node_density=15") # 30 self.sql_executor.execute("set n_jobs=1") # self.sql_executor.execute("set n_hidden_layer=1") # 2 self.sql_executor.execute("set n_epoch=20") # 50 self.sql_executor.execute("set n_gaussians_reg=8") # self.sql_executor.execute("set n_gaussians_density=10") # 8 self.sql_executor.execute( "create table "+mdl_name+"(dep_delay real, distance real) from '../data/flights/flight_1m.csv' GROUP BY unique_carrier method uniform size 'num_points/flights_group.csv' ") elif self.model_size == "5m": self.sql_executor.execute("set n_mdn_layer_node_reg=20") # 20 self.sql_executor.execute("set n_mdn_layer_node_density=30") # 30 self.sql_executor.execute("set n_jobs=1") # self.sql_executor.execute("set n_hidden_layer=1") # 1 self.sql_executor.execute("set n_epoch=20") # 20 self.sql_executor.execute("set n_gaussians_reg=8") # self.sql_executor.execute("set n_gaussians_density=8") # 10 self.sql_executor.execute( "create table "+mdl_name+"(dep_delay real, distance real) from '../data/flights/flight_5m.csv' GROUP BY unique_carrier method uniform size 'num_points/flights_group.csv' ") # self.sql_executor.execute("set n_mdn_layer_node_reg=30") # 5 # self.sql_executor.execute("set n_mdn_layer_node_density=30") # 30 # self.sql_executor.execute("set n_jobs=1") # 2 # self.sql_executor.execute("set n_hidden_layer=1") # 1 # self.sql_executor.execute("set n_epoch=20") # 20 # self.sql_executor.execute("set n_gaussians_reg=8") # 3 # self.sql_executor.execute("set n_gaussians_density=8") # 10 # self.sql_executor.execute( # "create table "+mdl_name+"(dep_delay real, distance real) from '../data/flights/flights_10k.csv' GROUP BY unique_carrier method uniform size 'num_points/flights_group.csv' ") # num_of_points57.csv # SELECT unique_carrier, AVG(dep_delay) FROM flights WHERE 300<=distance<=1000 GROUP BY unique_carrier; # self.sql_executor.execute( # "create table "+"ss10g_binary_30"+"(ss_sales_price real, ss_sold_date_sk real) from '../data/tpcds/10g/ss_10g_520k.csv' GROUP BY ss_store_sk method uniform size 'num_points/ss_10g.csv' ") # num_of_points57.csv def build_one_model(self, mdl_name: str = "flight_1m", encoder='binary'): self.mdl_name = mdl_name self.sql_executor = SqlExecutor() self.sql_executor.execute("set v='True'") # self.sql_executor.execute("set device='cpu'") self.sql_executor.execute("set one_model='true'") self.sql_executor.execute("set b_grid_search='false'") self.sql_executor.execute("set b_print_to_screen='false'") self.sql_executor.execute("set csv_split_char=','") self.sql_executor.execute("set batch_size=1000") self.sql_executor.execute("set table_header=" + "'year_date,unique_carrier,origin,origin_state_abr,dest,dest_state_abr,dep_delay,taxi_out,taxi_in,arr_delay,air_time,distance'") self.sql_executor.execute("set encoder='"+ encoder +"'") model_size = "5m" if model_size =="1m": self.sql_executor.execute("set n_mdn_layer_node_reg=20") # 5 self.sql_executor.execute("set n_mdn_layer_node_density=30") # 30 self.sql_executor.execute("set n_jobs=1") # 2 self.sql_executor.execute("set n_hidden_layer=2") # 1 self.sql_executor.execute("set n_epoch=20") # 20 self.sql_executor.execute("set n_gaussians_reg=8") # 3 self.sql_executor.execute("set n_gaussians_density=8") # 10 self.sql_executor.execute( "create table "+mdl_name+"(distance real, dep_delay real, origin_state_abr categorical) from '../data/flights/flight_1m.csv' GROUP BY unique_carrier method uniform size 0.001") elif model_size == "5m": self.sql_executor.execute("set n_mdn_layer_node_reg=50") # 20 self.sql_executor.execute("set n_mdn_layer_node_density=50") # 30 self.sql_executor.execute("set n_jobs=1") # self.sql_executor.execute("set n_hidden_layer=2") # 1 self.sql_executor.execute("set n_epoch=20") # 50 self.sql_executor.execute("set n_gaussians_reg=8") # 3 self.sql_executor.execute("set n_gaussians_density=8") # 10 self.sql_executor.execute( "create table "+mdl_name+"(distance real, dep_delay real, origin_state_abr categorical) from '../data/flights/flight_5m.csv' GROUP BY unique_carrier method uniform size 0.005") # self.sql_executor.execute( # "create table "+mdl_name+"(distance real, dep_delay real, origin_state_abr categorical) from '../data/flights/flight_5m.csv' GROUP BY unique_carrier method uniform size 0.005")#'num_points/flights_group.csv' ") # num_of_points57.csv #SELECT unique_carrier, COUNT(*) FROM flights WHERE origin_state_abr='LA' AND dest_state_abr='CA' GROUP BY unique_carrier; def workload(self, mdl_name, result2file: str = 'experiments/flights/results/mdn1m/', n_jobs=1): self.sql_executor.mdl_name = mdl_name self.sql_executor.execute("set n_jobs=" + str(n_jobs)+'"') # self.sql_executor.execute( # "set result2file='" + result2file + "7.txt'") # self.sql_executor.execute("select unique_carrier, COUNT(dep_delay) from " + self.mdl_name + # " where 300<=distance<=1000 GROUP BY unique_carrier") # self.sql_executor.execute( # "set result2file='" + result2file + "8.txt'") # self.sql_executor.execute("SELECT unique_carrier, COUNT(dep_delay) from " + self.mdl_name + # " where 1000<=distance<=1500 GROUP BY unique_carrier",) # self.sql_executor.execute( # "set result2file='" + result2file + "9.txt'") # self.sql_executor.execute("SELECT unique_carrier, COUNT(dep_delay) from " + self.mdl_name + # " where 1500<=distance<=2000 GROUP BY unique_carrier",) # self.sql_executor.execute( # "set result2file='" + result2file + "10.txt'") # self.sql_executor.execute("SELECT unique_carrier, SUM(dep_delay) from " + self.mdl_name + # " where 300<=distance<=1000 GROUP BY unique_carrier",) # self.sql_executor.execute( # "set result2file='" + result2file + "11.txt'") # self.sql_executor.execute("SELECT unique_carrier, SUM(dep_delay) from " + self.mdl_name + # " where 1000<=distance<=1500 GROUP BY unique_carrier",) # self.sql_executor.execute( # "set result2file='" + result2file + "12.txt'") # self.sql_executor.execute("SELECT unique_carrier, SUM(dep_delay) from " + self.mdl_name + # " where 1500<=distance<=2000 GROUP BY unique_carrier",) # self.sql_executor.execute( # "set result2file='" + result2file + "13.txt'") # self.sql_executor.execute("SELECT unique_carrier, AVG(dep_delay) from " + self.mdl_name + # " where 300<=distance<=1000 GROUP BY unique_carrier",) # self.sql_executor.execute( # "set result2file='" + result2file + "14.txt'") # self.sql_executor.execute("SELECT unique_carrier, AVG(dep_delay) from " + self.mdl_name + # " where 1000<=distance<=1500 GROUP BY unique_carrier",) # self.sql_executor.execute( # "set result2file='" + result2file + "15.txt'") # self.sql_executor.execute("SELECT unique_carrier, AVG(dep_delay) from " + self.mdl_name + # " where 1500<=distance<=2000 GROUP BY unique_carrier",) self.sql_executor.execute( "set result2file='" + result2file + "one_model_1x.txt'") self.sql_executor.execute("SELECT unique_carrier, COUNT(distance) FROM "+mdl_name+" where 1000<=dep_delay<=1200 AND origin_state_abr='LA' GROUP BY unique_carrier")
class Query1: def __init__(self): self.mdl_name = None self.sql_executor = None def build_model(self, mdl_name: str = "ss_10g", encoder='binary'): self.mdl_name = mdl_name self.sql_executor = SqlExecutor() self.sql_executor.execute("set v='True'") # self.sql_executor.execute("set device='cpu'") self.sql_executor.execute("set b_grid_search='false'") self.sql_executor.execute("set b_print_to_screen='false'") self.sql_executor.execute("set csv_split_char='|'") self.sql_executor.execute("set batch_size=1000") self.sql_executor.execute( "set table_header=" + "'ss_sold_date_sk|ss_sold_time_sk|ss_item_sk|ss_customer_sk|ss_cdemo_sk|ss_hdemo_sk|" + "ss_addr_sk|ss_store_sk|ss_promo_sk|ss_ticket_number|ss_quantity|ss_wholesale_cost|" + "ss_list_price|ss_sales_price|ss_ext_discount_amt|ss_ext_sales_price|" + "ss_ext_wholesale_cost|ss_ext_list_price|ss_ext_tax|ss_coupon_amt|ss_net_paid|" + "ss_net_paid_inc_tax|ss_net_profit|none'") # sql_executor.execute("set table_header=" + # "'ss_sold_date_sk|ss_store_sk|ss_sales_price'") self.sql_executor.execute("set encoder='" + encoder + "'") self.sql_executor.execute("set n_mdn_layer_node_reg=50") # 5 self.sql_executor.execute("set n_mdn_layer_node_density=60") # 30 self.sql_executor.execute("set n_jobs=1") # 2 self.sql_executor.execute("set n_hidden_layer=2") # 1 self.sql_executor.execute("set n_epoch=20") # 20 self.sql_executor.execute("set n_gaussians_reg=4") # 3 self.sql_executor.execute("set n_gaussians_density=20") # 10 self.sql_executor.execute( "create table " + mdl_name + "(ss_sales_price real, ss_sold_date_sk real) from '../data/tpcds/10g/ss_10g_520k.csv' GROUP BY ss_store_sk method uniform size 1000" ) # num_points/ss_10g.csv' ") # num_of_points57.csv
def run(): sqlExecutor = SqlExecutor() sqlExecutor.execute("set v='True'") sqlExecutor.execute("set n_jobs=8") sqlExecutor.execute("set device='cpu'") sqlExecutor.execute("set encoder='binary'") sqlExecutor.execute("set b_grid_search='False'") sqlExecutor.execute("set b_print_to_screen='true'") sqlExecutor.execute("set csv_split_char='|'") sqlExecutor.execute("set batch_size=1000") sqlExecutor.execute( "set table_header=" + "'ss_sold_date_sk|ss_sold_time_sk|ss_item_sk|ss_customer_sk|ss_cdemo_sk|ss_hdemo_sk|" + "ss_addr_sk|ss_store_sk|ss_promo_sk|ss_ticket_number|ss_quantity|ss_wholesale_cost|" + "ss_list_price|ss_sales_price|ss_ext_discount_amt|ss_ext_sales_price|" + "ss_ext_wholesale_cost|ss_ext_list_price|ss_ext_tax|ss_coupon_amt|ss_net_paid|" + "ss_net_paid_inc_tax|ss_net_profit|none'") # run_2_groupby(sqlExecutor) build_models(sqlExecutor) query(sqlExecutor)