Esempio n. 1
0
    def __init__(self):
        super(DBEstPrompt, self).__init__()
        self.prompt = 'dbestclient> '
        self.intro = "Welcome to DBEst: a model-based AQP engine! Type exit to exit!"
        self.query = ""

        # # deal with configuration file
        # if os.path.exists('config.json'):
        #     print("Configuration file loaded.")
        #     self.config = json.load(open('config.json'))
        # else:
        #     print("Configuration file config.json does not exist! use default values")
        #     self.config = config
        #     json.dump(self.config, open('config.json', 'w'))
        # self.verbose = self.config['verbose']
        # self.b_show_latency = self.config['b_show_latency']

        # deal with warehouse
        if os.path.exists("dbestwarehouse"):
            print("warehouse is initialized.")
        else:
            print("warehouse does not exists, so initialize one.")
            os.mkdir("dbestwarehouse")

        self.sqlExecutor = SqlExecutor()
Esempio n. 2
0
def run():
    #
    sqlExecutor = SqlExecutor(config)
    sqlExecutor.set_table_headers(
        "ss_sold_date_sk,ss_sold_time_sk,ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,"
        +
        "ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,"
        +
        "ss_list_price,ss_sales_price,ss_ext_discount_amt,ss_ext_sales_price,"
        +
        "ss_ext_wholesale_cost,ss_ext_list_price,ss_ext_tax,ss_coupon_amt,ss_net_paid,"
        + "ss_net_paid_inc_tax,ss_net_profit,none")
    # build_models(sqlExecutor)
    run_10k(sqlExecutor)
    run_100k(sqlExecutor)
Esempio n. 3
0
    def build_model(self, mdl_name: str = "ss_10g", encoder='binary'):
        self.mdl_name = mdl_name
        self.sql_executor = SqlExecutor()

        self.sql_executor.execute("set v='True'")
        # self.sql_executor.execute("set device='cpu'")

        self.sql_executor.execute("set b_grid_search='false'")
        self.sql_executor.execute("set b_print_to_screen='false'")
        self.sql_executor.execute("set csv_split_char='|'")
        self.sql_executor.execute("set batch_size=1000")
        self.sql_executor.execute(
            "set table_header=" +
            "'ss_sold_date_sk|ss_sold_time_sk|ss_item_sk|ss_customer_sk|ss_cdemo_sk|ss_hdemo_sk|"
            +
            "ss_addr_sk|ss_store_sk|ss_promo_sk|ss_ticket_number|ss_quantity|ss_wholesale_cost|"
            +
            "ss_list_price|ss_sales_price|ss_ext_discount_amt|ss_ext_sales_price|"
            +
            "ss_ext_wholesale_cost|ss_ext_list_price|ss_ext_tax|ss_coupon_amt|ss_net_paid|"
            + "ss_net_paid_inc_tax|ss_net_profit|none'")
        # sql_executor.execute("set table_header=" +
        #                     "'ss_sold_date_sk|ss_store_sk|ss_sales_price'")

        self.sql_executor.execute("set encoder='" + encoder + "'")
        self.sql_executor.execute("set n_mdn_layer_node_reg=50")  # 5
        self.sql_executor.execute("set n_mdn_layer_node_density=60")  # 30
        self.sql_executor.execute("set n_jobs=1")  # 2
        self.sql_executor.execute("set n_hidden_layer=2")  # 1
        self.sql_executor.execute("set n_epoch=20")  # 20
        self.sql_executor.execute("set n_gaussians_reg=4")  # 3
        self.sql_executor.execute("set n_gaussians_density=20")  # 10

        self.sql_executor.execute(
            "create table " + mdl_name +
            "(ss_sales_price real, ss_sold_date_sk real) from '../data/tpcds/10g/ss_10g_520k.csv' GROUP BY ss_store_sk method uniform size num_points/ss_10g.csv' "
        )  # num_of_points57.csv
Esempio n. 4
0
    def build_one_model(self, mdl_name: str = "flight_1m", encoder='binary'):
        self.mdl_name = mdl_name
        self.sql_executor = SqlExecutor()

        self.sql_executor.execute("set v='True'")
        # self.sql_executor.execute("set device='cpu'")
        self.sql_executor.execute("set one_model='true'")
        self.sql_executor.execute("set b_grid_search='false'")
        self.sql_executor.execute("set b_print_to_screen='false'")
        self.sql_executor.execute("set csv_split_char=','")
        self.sql_executor.execute("set batch_size=1000")
        self.sql_executor.execute("set table_header=" +
                                  "'year_date,unique_carrier,origin,origin_state_abr,dest,dest_state_abr,dep_delay,taxi_out,taxi_in,arr_delay,air_time,distance'")
        
        self.sql_executor.execute("set encoder='"+ encoder +"'")

        model_size = "5m"
        if model_size =="1m":
            self.sql_executor.execute("set n_mdn_layer_node_reg=20")          # 5
            self.sql_executor.execute("set n_mdn_layer_node_density=30")      # 30
            self.sql_executor.execute("set n_jobs=1")                         # 2
            self.sql_executor.execute("set n_hidden_layer=2")                 # 1
            self.sql_executor.execute("set n_epoch=20")                       # 20
            self.sql_executor.execute("set n_gaussians_reg=8")                # 3
            self.sql_executor.execute("set n_gaussians_density=8")            # 10
            self.sql_executor.execute(
            "create table "+mdl_name+"(distance real, dep_delay real, origin_state_abr categorical) from '../data/flights/flight_1m.csv' GROUP BY unique_carrier method uniform size 0.001")
        elif model_size == "5m":
            self.sql_executor.execute("set n_mdn_layer_node_reg=50")          # 20
            self.sql_executor.execute("set n_mdn_layer_node_density=50")      # 30
            self.sql_executor.execute("set n_jobs=1")                         # 
            self.sql_executor.execute("set n_hidden_layer=2")                 # 1
            self.sql_executor.execute("set n_epoch=20")                       # 50
            self.sql_executor.execute("set n_gaussians_reg=8")                # 3
            self.sql_executor.execute("set n_gaussians_density=8")            # 10
            self.sql_executor.execute(
            "create table "+mdl_name+"(distance real, dep_delay real, origin_state_abr categorical) from '../data/flights/flight_5m.csv' GROUP BY unique_carrier method uniform size 0.005")
Esempio n. 5
0
 def test_categorical_one_model(self):
     sqlExecutor = SqlExecutor()
     sqlExecutor.execute("set b_grid_search='False'")
     sqlExecutor.execute("set csv_split_char='|'")
     sqlExecutor.execute("set one_model='true'")
     sqlExecutor.execute(
         "set table_header=" +
         "'ss_sold_date_sk|ss_sold_time_sk|ss_item_sk|ss_customer_sk|ss_cdemo_sk|ss_hdemo_sk|"
         +
         "ss_addr_sk|ss_store_sk|ss_promo_sk|ss_ticket_number|ss_quantity|ss_wholesale_cost|"
         +
         "ss_list_price|ss_sales_price|ss_ext_discount_amt|ss_ext_sales_price|"
         +
         "ss_ext_wholesale_cost|ss_ext_list_price|ss_ext_tax|ss_coupon_amt|ss_net_paid|"
         + "ss_net_paid_inc_tax|ss_net_profit|none'")
     sqlExecutor.execute(
         "create table test_ss40g_categorical_one_model(ss_sales_price real, ss_sold_date_sk real,  ss_coupon_amt categorical, ) from '/data/tpcds/40G/ss_600k.csv' GROUP BY ss_store_sk method uniform size 600"
     )  # ss_ext_discount_amt categorical
     predictions = sqlExecutor.execute(
         "select count(ss_sales_price)  from test_ss40g_categorical_one_model where   2451119  <=ss_sold_date_sk<= 2451483 and ss_coupon_amt='0.00'   group by ss_store_sk"
     )  # and ss_ext_discount_amt='0.00'
     # sqlExecutor.execute("drop table test_ss40g_categorical_one_model")
     self.assertTrue(predictions)
Esempio n. 6
0
 def test_simple_model(self):
     sqlExecutor = SqlExecutor()
     sqlExecutor.execute("set n_epoch=10")
     sqlExecutor.execute("set reg_type='mdn'")
     sqlExecutor.execute("set density_type='mdn'")
     sqlExecutor.execute("set b_grid_search='False'")
     sqlExecutor.execute("set csv_split_char='|'")
     sqlExecutor.execute(
         "set table_header=" +
         "'ss_sold_date_sk|ss_sold_time_sk|ss_item_sk|ss_customer_sk|ss_cdemo_sk|ss_hdemo_sk|"
         +
         "ss_addr_sk|ss_store_sk|ss_promo_sk|ss_ticket_number|ss_quantity|ss_wholesale_cost|"
         +
         "ss_list_price|ss_sales_price|ss_ext_discount_amt|ss_ext_sales_price|"
         +
         "ss_ext_wholesale_cost|ss_ext_list_price|ss_ext_tax|ss_coupon_amt|ss_net_paid|"
         + "ss_net_paid_inc_tax|ss_net_profit|none'")
     sqlExecutor.execute(
         "create table test_ss40g_sm(ss_sales_price real, ss_sold_date_sk real) from '/data/tpcds/40G/ss_600k.csv'  method uniform size 600 scale data num_of_points2.csv"
     )
     predictions = sqlExecutor.execute(
         "select avg(ss_sales_price)  from test_ss40g_sm where   2451119  <=ss_sold_date_sk<= 2451483 "
     )
     sqlExecutor.execute("drop table test_ss40g_sm")
     self.assertTrue(predictions)
Esempio n. 7
0
 def test_embedding(self):
     sqlExecutor = SqlExecutor()
     sqlExecutor.execute("set b_grid_search='False'")
     sqlExecutor.execute("set csv_split_char='|'")
     sqlExecutor.execute("set encoder='embedding'")
     sqlExecutor.execute(
         "set table_header=" +
         "'ss_sold_date_sk|ss_sold_time_sk|ss_item_sk|ss_customer_sk|ss_cdemo_sk|ss_hdemo_sk|"
         +
         "ss_addr_sk|ss_store_sk|ss_promo_sk|ss_ticket_number|ss_quantity|ss_wholesale_cost|"
         +
         "ss_list_price|ss_sales_price|ss_ext_discount_amt|ss_ext_sales_price|"
         +
         "ss_ext_wholesale_cost|ss_ext_list_price|ss_ext_tax|ss_coupon_amt|ss_net_paid|"
         + "ss_net_paid_inc_tax|ss_net_profit|none'")
     sqlExecutor.execute(
         "create table test_ss40g_embedding(ss_sales_price real, ss_sold_date_sk real) from '../data/tpcds/10g/ss_10g_520k.csv' GROUP BY ss_store_sk method uniform size 600"
     )
     predictions = sqlExecutor.execute(
         "select avg(ss_sales_price)  from test_ss40g_embedding where   2451119  <=ss_sold_date_sk<= 2451483  group by ss_store_sk"
     )
     sqlExecutor.execute("drop table test_ss40g_embedding")
     self.assertTrue(predictions)
Esempio n. 8
0
 def test_drop_clause(self):
     sqlExecutor = SqlExecutor()
     status = sqlExecutor.execute("drop table model2drop")
     self.assertFalse(status)
Esempio n. 9
0
class Query1:
    def __init__(self):
        self.mdl_name = None
        self.sql_executor = None

    def build_model(self, mdl_name: str = "ss_10g", encoder='binary'):
        self.mdl_name = mdl_name
        self.sql_executor = SqlExecutor()

        self.sql_executor.execute("set v='True'")
        # self.sql_executor.execute("set device='cpu'")

        self.sql_executor.execute("set b_grid_search='false'")
        self.sql_executor.execute("set b_print_to_screen='false'")
        self.sql_executor.execute("set csv_split_char='|'")
        self.sql_executor.execute("set batch_size=1000")
        self.sql_executor.execute(
            "set table_header=" +
            "'ss_sold_date_sk|ss_sold_time_sk|ss_item_sk|ss_customer_sk|ss_cdemo_sk|ss_hdemo_sk|"
            +
            "ss_addr_sk|ss_store_sk|ss_promo_sk|ss_ticket_number|ss_quantity|ss_wholesale_cost|"
            +
            "ss_list_price|ss_sales_price|ss_ext_discount_amt|ss_ext_sales_price|"
            +
            "ss_ext_wholesale_cost|ss_ext_list_price|ss_ext_tax|ss_coupon_amt|ss_net_paid|"
            + "ss_net_paid_inc_tax|ss_net_profit|none'")
        # sql_executor.execute("set table_header=" +
        #                     "'ss_sold_date_sk|ss_store_sk|ss_sales_price'")

        self.sql_executor.execute("set encoder='" + encoder + "'")
        self.sql_executor.execute("set n_mdn_layer_node_reg=50")  # 5
        self.sql_executor.execute("set n_mdn_layer_node_density=60")  # 30
        self.sql_executor.execute("set n_jobs=1")  # 2
        self.sql_executor.execute("set n_hidden_layer=2")  # 1
        self.sql_executor.execute("set n_epoch=20")  # 20
        self.sql_executor.execute("set n_gaussians_reg=4")  # 3
        self.sql_executor.execute("set n_gaussians_density=20")  # 10

        self.sql_executor.execute(
            "create table " + mdl_name +
            "(ss_sales_price real, ss_sold_date_sk real) from '../data/tpcds/10g/ss_10g_520k.csv' GROUP BY ss_store_sk method uniform size num_points/ss_10g.csv' "
        )  # num_of_points57.csv

        # self.sql_executor.execute(
        #     "create table "+"ss10g_binary_30"+"(ss_sales_price real, ss_sold_date_sk real) from '../data/tpcds/10g/ss_10g_520k.csv' GROUP BY ss_store_sk method uniform size 'num_points/ss_10g.csv' ")  # num_of_points57.csv

    def workload(
            self,
            mdl_name,
            result2file:
        str = '/home/u1796377/Documents/workspace/DBEstClient/experiments/results/mdn/10g/',
            n_jobs=1):
        self.sql_executor.mdl_name = mdl_name
        self.sql_executor.execute("set n_jobs=" + str(n_jobs) + '"')
        self.sql_executor.execute("set result2file='" + result2file +
                                  "sum1.txt'")
        self.sql_executor.execute(
            "select sum(ss_sales_price)   from " + self.mdl_name +
            "  where   2451119 <=ss_sold_date_sk<= 2451483 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "sum2.txt'")
        self.sql_executor.execute(
            "select sum(ss_sales_price)   from " + self.mdl_name +
            "  where  2451300 <=ss_sold_date_sk<= 2451665 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "sum3.txt'")
        self.sql_executor.execute(
            "select sum(ss_sales_price)   from " + self.mdl_name +
            "  where  2451392 <=ss_sold_date_sk<= 2451757 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "sum4.txt'")
        self.sql_executor.execute(
            "select sum(ss_sales_price)   from " + self.mdl_name +
            "  where  2451484 <=ss_sold_date_sk<= 2451849 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "sum5.txt'")
        self.sql_executor.execute(
            "select sum(ss_sales_price)   from " + self.mdl_name +
            "  where  2451545 <=ss_sold_date_sk<= 2451910 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "sum6.txt'")
        self.sql_executor.execute(
            "select sum(ss_sales_price)   from " + self.mdl_name +
            "  where  2451636 <=ss_sold_date_sk<= 2452000 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "sum7.txt'")
        self.sql_executor.execute(
            "select sum(ss_sales_price)   from " + self.mdl_name +
            "  where  2451727 <=ss_sold_date_sk<= 2452091 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "sum8.txt'")
        self.sql_executor.execute(
            "select sum(ss_sales_price)   from " + self.mdl_name +
            "  where  2451850 <=ss_sold_date_sk<= 2452214 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "sum9.txt'")
        self.sql_executor.execute(
            "select sum(ss_sales_price)   from " + self.mdl_name +
            "  where  2451911 <=ss_sold_date_sk<= 2452275 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "sum10.txt'")
        self.sql_executor.execute(
            "select sum(ss_sales_price)   from " + self.mdl_name +
            "  where  2452031 <=ss_sold_date_sk<= 2452395 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "count1.txt'")
        self.sql_executor.execute(
            "select count(ss_sales_price) from " + self.mdl_name +
            "  where  2451119 <=ss_sold_date_sk<= 2451483 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "count2.txt'")
        self.sql_executor.execute(
            "select count(ss_sales_price) from " + self.mdl_name +
            "  where  2451300 <=ss_sold_date_sk<= 2451665 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "count3.txt'")
        self.sql_executor.execute(
            "select count(ss_sales_price) from " + self.mdl_name +
            "  where  2451392 <=ss_sold_date_sk<= 2451757 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "count4.txt'")
        self.sql_executor.execute(
            "select count(ss_sales_price) from " + self.mdl_name +
            "  where  2451484 <=ss_sold_date_sk<= 2451849 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "count5.txt'")
        self.sql_executor.execute(
            "select count(ss_sales_price) from " + self.mdl_name +
            "  where  2451545 <=ss_sold_date_sk<= 2451910 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "count6.txt'")
        self.sql_executor.execute(
            "select count(ss_sales_price) from " + self.mdl_name +
            "  where  2451636 <=ss_sold_date_sk<= 2452000 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "count7.txt'")
        self.sql_executor.execute(
            "select count(ss_sales_price) from " + self.mdl_name +
            "  where  2451727 <=ss_sold_date_sk<= 2452091 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "count8.txt'")
        self.sql_executor.execute(
            "select count(ss_sales_price) from " + self.mdl_name +
            "  where  2451850 <=ss_sold_date_sk<= 2452214 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "count9.txt'")
        self.sql_executor.execute(
            "select count(ss_sales_price) from " + self.mdl_name +
            "  where  2451911 <=ss_sold_date_sk<= 2452275 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "count10.txt'")
        self.sql_executor.execute(
            "select count(ss_sales_price) from " + self.mdl_name +
            "  where  2452031 <=ss_sold_date_sk<= 2452395 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "avg1.txt'")
        self.sql_executor.execute(
            "select avg(ss_sales_price)   from " + self.mdl_name +
            "  where  2451119 <=ss_sold_date_sk<= 2451483 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "avg2.txt'")
        self.sql_executor.execute(
            "select avg(ss_sales_price)   from " + self.mdl_name +
            "  where  2451300 <=ss_sold_date_sk<= 2451665 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "avg3.txt'")
        self.sql_executor.execute(
            "select avg(ss_sales_price)   from " + self.mdl_name +
            "  where  2451392 <=ss_sold_date_sk<= 2451757 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "avg4.txt'")
        self.sql_executor.execute(
            "select avg(ss_sales_price)   from " + self.mdl_name +
            "  where  2451484 <=ss_sold_date_sk<= 2451849 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "avg5.txt'")
        self.sql_executor.execute(
            "select avg(ss_sales_price)   from " + self.mdl_name +
            "  where  2451545 <=ss_sold_date_sk<= 2451910 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "avg6.txt'")
        self.sql_executor.execute(
            "select avg(ss_sales_price)   from " + self.mdl_name +
            "  where  2451636 <=ss_sold_date_sk<= 2452000 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "avg7.txt'")
        self.sql_executor.execute(
            "select avg(ss_sales_price)   from " + self.mdl_name +
            "  where  2451727 <=ss_sold_date_sk<= 2452091 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "avg8.txt'")
        self.sql_executor.execute(
            "select avg(ss_sales_price)   from " + self.mdl_name +
            "  where  2451850 <=ss_sold_date_sk<= 2452214 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "avg9.txt'")
        self.sql_executor.execute(
            "select avg(ss_sales_price)   from " + self.mdl_name +
            "  where  2451911 <=ss_sold_date_sk<= 2452275 group by   ss_store_sk",
        )
        self.sql_executor.execute("set result2file='" + result2file +
                                  "avg10.txt'")
        # self.sql_executor.execute(
        # "set result2file='None'")
        self.sql_executor.execute(
            "select avg(ss_sales_price)   from " + self.mdl_name +
            "  where  2452031 <=ss_sold_date_sk<= 2452395 group by   ss_store_sk",
        )
Esempio n. 10
0
class DBEstPrompt(Cmd):
    def __init__(self):
        super(DBEstPrompt, self).__init__()
        self.prompt = 'dbestclient> '
        self.intro = "Welcome to DBEst: a model-based AQP engine! Type exit to exit!"
        self.query = ""

        # # deal with configuration file
        # if os.path.exists('config.json'):
        #     print("Configuration file loaded.")
        #     self.config = json.load(open('config.json'))
        # else:
        #     print("Configuration file config.json does not exist! use default values")
        #     self.config = config
        #     json.dump(self.config, open('config.json', 'w'))
        # self.verbose = self.config['verbose']
        # self.b_show_latency = self.config['b_show_latency']

        # deal with warehouse
        if os.path.exists("dbestwarehouse"):
            print("warehouse is initialized.")
        else:
            print("warehouse does not exists, so initialize one.")
            os.mkdir("dbestwarehouse")

        self.sqlExecutor = SqlExecutor()

    # print the exit message.
    def do_exit(self, inp):
        '''exit the application.'''
        print("DBEst closed successfully.")
        return True

    # process the query
    def default(self, inp):
        if ";" not in inp:
            self.query = self.query + inp + " "
        else:
            self.query += inp.split(";")[0]
            # if self.config['verbose']:
            #     print("Executing query >>> " + self.query + "...")

            # query execution goes here
            # -------------------------------------------->>
            # check if query begins with 'bypass', if so use backend server, otherwise use dbest to give a prediction
            if self.query.lstrip()[0:6].lower() == 'bypass':
                print("Bypass DBEst, use the backend server instead.")
                # go to the backend server
            else:
                # sqlExecutor = SqlExecutor(config)
                # print(self.query)
                # self.query.replace(";",'')
                self.sqlExecutor.execute(self.query)

                # self.sqlExecutor.execute("create table mdl1(pm25 real, PRES real) from pm25.csv  method uniform size 100")
                # self.sqlExecutor.execute("select count(pm25 real) from mdl1 where PRES between 1000 and 1020")
                # sqlExecutor.execute("select sum(pm25 real) from mdl where PRES between 1000 and 1020")
                # sqlExecutor.execute("select avg(pm25 real) from mdl where PRES between 1000 and 1020")
                # self.sqlExecutor.execute("create table ss(ss_list_price real, ss_wholesale_cost real) from store_sales.dat  method uniform size 10000 group by ss_store_sk")
                # self.sqlExecutor.execute("select count(ss_list_price) from ss where ss_wholesale_cost between 1 and 100 group by ss_store_sk")

            # <<--------------------------------------------

            # restore the query for the next coming query
            self.query = ""

    # deal with KeyboardInterrupt caused by ctrl+c
    def cmdloop(self, intro=None):
        print(self.intro)
        while True:
            try:
                super(DBEstPrompt, self).cmdloop(intro="")
                break
            except KeyboardInterrupt:
                # self.do_exit("")
                print("DBEst closed successfully.")
                return True

    do_EOF = do_exit
Esempio n. 11
0
class Query1:
    def __init__(self):
        self.mdl_name = None
        self.sql_executor = None
        self.model_size = "1m"

    def build_model(self, mdl_name: str = "flight_1m", encoder='binary'):
        self.mdl_name = mdl_name
        self.sql_executor = SqlExecutor()

        self.sql_executor.execute("set v='True'")
        # self.sql_executor.execute("set device='cpu'")
        
        self.sql_executor.execute("set b_grid_search='false'")
        self.sql_executor.execute("set b_print_to_screen='false'")
        self.sql_executor.execute("set csv_split_char=','")
        self.sql_executor.execute("set batch_size=1000")
        self.sql_executor.execute("set table_header=" +
                                  "'year_date,unique_carrier,origin,origin_state_abr,dest,dest_state_abr,dep_delay,taxi_out,taxi_in,arr_delay,air_time,distance'")
        
        self.sql_executor.execute("set encoder='"+ encoder +"'")
        
        if self.model_size =="1m":
            self.sql_executor.execute("set n_mdn_layer_node_reg=10")          # 20
            self.sql_executor.execute("set n_mdn_layer_node_density=15")      # 30
            self.sql_executor.execute("set n_jobs=1")                         # 
            self.sql_executor.execute("set n_hidden_layer=1")                 # 2
            self.sql_executor.execute("set n_epoch=20")                       # 50
            self.sql_executor.execute("set n_gaussians_reg=8")                # 
            self.sql_executor.execute("set n_gaussians_density=10")           # 8
            self.sql_executor.execute(
                "create table "+mdl_name+"(dep_delay real, distance real) from '../data/flights/flight_1m.csv' GROUP BY unique_carrier method uniform size 'num_points/flights_group.csv' ") 
        elif self.model_size == "5m":
            self.sql_executor.execute("set n_mdn_layer_node_reg=20")          # 20
            self.sql_executor.execute("set n_mdn_layer_node_density=30")      # 30
            self.sql_executor.execute("set n_jobs=1")                         # 
            self.sql_executor.execute("set n_hidden_layer=1")                 # 1
            self.sql_executor.execute("set n_epoch=20")                       # 20
            self.sql_executor.execute("set n_gaussians_reg=8")                # 
            self.sql_executor.execute("set n_gaussians_density=8")            # 10
            self.sql_executor.execute(
                "create table "+mdl_name+"(dep_delay real, distance real) from '../data/flights/flight_5m.csv' GROUP BY unique_carrier method uniform size 'num_points/flights_group.csv' ") 
        

        # self.sql_executor.execute("set n_mdn_layer_node_reg=30")          # 5
        # self.sql_executor.execute("set n_mdn_layer_node_density=30")      # 30
        # self.sql_executor.execute("set n_jobs=1")                         # 2
        # self.sql_executor.execute("set n_hidden_layer=1")                 # 1
        # self.sql_executor.execute("set n_epoch=20")                       # 20
        # self.sql_executor.execute("set n_gaussians_reg=8")                # 3
        # self.sql_executor.execute("set n_gaussians_density=8")           # 10

        # self.sql_executor.execute(
        #     "create table "+mdl_name+"(dep_delay real, distance real) from '../data/flights/flights_10k.csv' GROUP BY unique_carrier method uniform size 'num_points/flights_group.csv' ")  # num_of_points57.csv
        
        # SELECT unique_carrier, AVG(dep_delay) FROM flights WHERE 300<=distance<=1000 GROUP BY unique_carrier;
        # self.sql_executor.execute(
        #     "create table "+"ss10g_binary_30"+"(ss_sales_price real, ss_sold_date_sk real) from '../data/tpcds/10g/ss_10g_520k.csv' GROUP BY ss_store_sk method uniform size 'num_points/ss_10g.csv' ")  # num_of_points57.csv

    def build_one_model(self, mdl_name: str = "flight_1m", encoder='binary'):
        self.mdl_name = mdl_name
        self.sql_executor = SqlExecutor()

        self.sql_executor.execute("set v='True'")
        # self.sql_executor.execute("set device='cpu'")
        self.sql_executor.execute("set one_model='true'")
        self.sql_executor.execute("set b_grid_search='false'")
        self.sql_executor.execute("set b_print_to_screen='false'")
        self.sql_executor.execute("set csv_split_char=','")
        self.sql_executor.execute("set batch_size=1000")
        self.sql_executor.execute("set table_header=" +
                                  "'year_date,unique_carrier,origin,origin_state_abr,dest,dest_state_abr,dep_delay,taxi_out,taxi_in,arr_delay,air_time,distance'")
        
        self.sql_executor.execute("set encoder='"+ encoder +"'")

        model_size = "5m"
        if model_size =="1m":
            self.sql_executor.execute("set n_mdn_layer_node_reg=20")          # 5
            self.sql_executor.execute("set n_mdn_layer_node_density=30")      # 30
            self.sql_executor.execute("set n_jobs=1")                         # 2
            self.sql_executor.execute("set n_hidden_layer=2")                 # 1
            self.sql_executor.execute("set n_epoch=20")                       # 20
            self.sql_executor.execute("set n_gaussians_reg=8")                # 3
            self.sql_executor.execute("set n_gaussians_density=8")            # 10
            self.sql_executor.execute(
            "create table "+mdl_name+"(distance real, dep_delay real, origin_state_abr categorical) from '../data/flights/flight_1m.csv' GROUP BY unique_carrier method uniform size 0.001")
        elif model_size == "5m":
            self.sql_executor.execute("set n_mdn_layer_node_reg=50")          # 20
            self.sql_executor.execute("set n_mdn_layer_node_density=50")      # 30
            self.sql_executor.execute("set n_jobs=1")                         # 
            self.sql_executor.execute("set n_hidden_layer=2")                 # 1
            self.sql_executor.execute("set n_epoch=20")                       # 50
            self.sql_executor.execute("set n_gaussians_reg=8")                # 3
            self.sql_executor.execute("set n_gaussians_density=8")            # 10
            self.sql_executor.execute(
            "create table "+mdl_name+"(distance real, dep_delay real, origin_state_abr categorical) from '../data/flights/flight_5m.csv' GROUP BY unique_carrier method uniform size 0.005")

        # self.sql_executor.execute(
        #     "create table "+mdl_name+"(distance real, dep_delay real, origin_state_abr categorical) from '../data/flights/flight_5m.csv' GROUP BY unique_carrier method uniform size 0.005")#'num_points/flights_group.csv' ")  # num_of_points57.csv
        #SELECT unique_carrier, COUNT(*) FROM flights WHERE origin_state_abr='LA' AND  dest_state_abr='CA' GROUP BY unique_carrier;

    def workload(self, mdl_name, result2file: str = 'experiments/flights/results/mdn1m/', n_jobs=1):
        self.sql_executor.mdl_name = mdl_name
        self.sql_executor.execute("set n_jobs=" + str(n_jobs)+'"')
        # self.sql_executor.execute(
        #     "set result2file='" + result2file + "7.txt'")
        # self.sql_executor.execute("select unique_carrier, COUNT(dep_delay) from  " + self.mdl_name +
        #                           "  where   300<=distance<=1000 GROUP BY unique_carrier")
        # self.sql_executor.execute(
        #     "set result2file='" + result2file + "8.txt'")
        # self.sql_executor.execute("SELECT unique_carrier, COUNT(dep_delay) from " + self.mdl_name +
        #                           "  where   1000<=distance<=1500 GROUP BY unique_carrier",)
        # self.sql_executor.execute(
        #     "set result2file='" + result2file + "9.txt'")
        # self.sql_executor.execute("SELECT unique_carrier, COUNT(dep_delay) from " + self.mdl_name +
        #                           "  where   1500<=distance<=2000 GROUP BY unique_carrier",)
        # self.sql_executor.execute(
        #     "set result2file='" + result2file + "10.txt'")
        # self.sql_executor.execute("SELECT unique_carrier, SUM(dep_delay) from " + self.mdl_name +
        #                           "  where   300<=distance<=1000 GROUP BY unique_carrier",)
        # self.sql_executor.execute(
        #     "set result2file='" + result2file + "11.txt'")
        # self.sql_executor.execute("SELECT unique_carrier, SUM(dep_delay) from " + self.mdl_name +
        #                           "  where   1000<=distance<=1500 GROUP BY unique_carrier",)
        # self.sql_executor.execute(
        #     "set result2file='" + result2file + "12.txt'")
        # self.sql_executor.execute("SELECT unique_carrier, SUM(dep_delay) from " + self.mdl_name +
        #                           "  where   1500<=distance<=2000 GROUP BY unique_carrier",)
        # self.sql_executor.execute(
        #     "set result2file='" + result2file + "13.txt'")
        # self.sql_executor.execute("SELECT unique_carrier, AVG(dep_delay) from " + self.mdl_name +
        #                           "  where   300<=distance<=1000 GROUP BY unique_carrier",)
        # self.sql_executor.execute(
        #     "set result2file='" + result2file + "14.txt'")
        # self.sql_executor.execute("SELECT unique_carrier, AVG(dep_delay) from " + self.mdl_name +
        #                           "  where   1000<=distance<=1500 GROUP BY unique_carrier",)
        # self.sql_executor.execute(
        #     "set result2file='" + result2file + "15.txt'")
        # self.sql_executor.execute("SELECT unique_carrier, AVG(dep_delay) from " + self.mdl_name +
        #                           "  where   1500<=distance<=2000 GROUP BY unique_carrier",)
        
        self.sql_executor.execute(
            "set result2file='" + result2file + "one_model_1x.txt'")
        
        self.sql_executor.execute("SELECT unique_carrier, COUNT(distance) FROM "+mdl_name+" where 1000<=dep_delay<=1200 AND origin_state_abr='LA'  GROUP BY unique_carrier")
Esempio n. 12
0
def run():
    sqlExecutor = SqlExecutor()
    sqlExecutor.execute("set v='True'")
    sqlExecutor.execute("set n_jobs=8")
    sqlExecutor.execute("set device='cpu'")
    sqlExecutor.execute("set encoder='binary'")
    sqlExecutor.execute("set b_grid_search='False'")
    sqlExecutor.execute("set b_print_to_screen='true'")
    sqlExecutor.execute("set csv_split_char='|'")
    sqlExecutor.execute("set batch_size=1000")
    sqlExecutor.execute(
        "set table_header=" +
        "'ss_sold_date_sk|ss_sold_time_sk|ss_item_sk|ss_customer_sk|ss_cdemo_sk|ss_hdemo_sk|"
        +
        "ss_addr_sk|ss_store_sk|ss_promo_sk|ss_ticket_number|ss_quantity|ss_wholesale_cost|"
        +
        "ss_list_price|ss_sales_price|ss_ext_discount_amt|ss_ext_sales_price|"
        +
        "ss_ext_wholesale_cost|ss_ext_list_price|ss_ext_tax|ss_coupon_amt|ss_net_paid|"
        + "ss_net_paid_inc_tax|ss_net_profit|none'")
    # run_2_groupby(sqlExecutor)
    build_models(sqlExecutor)
    query(sqlExecutor)