Ejemplo n.º 1
0
    def load_orders(self):
        print(f'Loading orders ({self.warehouse_id})')
        self.order_lines = []
        self.c_ids = list(range(1, NUM_ORDERS + 1))
        self.random.shuffle(self.c_ids)

        with DBConn(self.dsn) as conn:
            it = StringIteratorIO((
                self.generate_order(d_id, o_id)
                # generate in the order that a higher order number means a later transaction
                for o_id in range(1, NUM_ORDERS + 1)
                for d_id in range(1, DIST_PER_WARE + 1)))
            conn.cursor.copy_from(it, 'orders', null='None', size=COPY_SIZE)

        with DBConn(self.dsn) as conn:
            conn.cursor.execute(f'''
                INSERT INTO new_orders(no_o_id, no_d_id, no_w_id)
                SELECT o_id, o_d_id, o_w_id
                FROM orders
                WHERE o_id >= {FIRST_UNPROCESSED_O_ID} AND o_w_id = { self.warehouse_id }'''
                                )

        print(f'Loading order_line ({self.warehouse_id})')
        with DBConn(self.dsn) as conn:
            it = StringIteratorIO((self.generate_order_lines(order_line)
                                   for order_line in self.order_lines))
            conn.cursor.copy_from(it,
                                  'order_line',
                                  null='None',
                                  size=COPY_SIZE)
Ejemplo n.º 2
0
 def load_supplier(self):
     print(f'Loading suppliers')
     assert self.warehouse_id == 0
     with DBConn(self.dsn) as conn:
         it = StringIteratorIO((self.generate_supplier(su_id)
                                for su_id in range(NUM_SUPPLIERS)))
         conn.cursor.copy_from(it, 'supplier', null='None', size=COPY_SIZE)
Ejemplo n.º 3
0
 def load_item(self):
     print(f'Loading items')
     assert self.warehouse_id == 0
     with DBConn(self.dsn) as conn:
         it = StringIteratorIO(
             (self.generate_item(i_id) for i_id in range(1, MAX_ITEMS + 1)))
         conn.cursor.copy_from(it, 'item', null='None', size=COPY_SIZE)
Ejemplo n.º 4
0
 def load_customer(self):
     print(f'Loading customer ({self.warehouse_id})')
     with DBConn(self.dsn) as conn:
         it = StringIteratorIO((self.generate_customer(d_id, c_id)
                                for d_id in range(1, DIST_PER_WARE + 1)
                                for c_id in range(1, CUST_PER_DIST + 1)))
         conn.cursor.copy_from(it, 'customer', null='None', size=COPY_SIZE)
Ejemplo n.º 5
0
 def _query_num_warehouses(self):
     with DBConn(self.args.dsn) as conn:
         try:
             conn.cursor.execute(
                 'SELECT count(distinct(w_id)) from warehouse')
             return conn.cursor.fetchone()[0]
         except ProgrammingError:
             self._sql_error('Could not query number of warehouses.')
Ejemplo n.º 6
0
 def _query_range_delivery_date(self):
     with DBConn(self.args.dsn) as conn:
         try:
             conn.cursor.execute(
                 'SELECT min(ol_delivery_d), max(ol_delivery_d) FROM order_line'
             )
             return conn.cursor.fetchone()
         except ProgrammingError:
             self._sql_error('Could not query the latest delivery date.')
Ejemplo n.º 7
0
 def load_nation(self):
     print('Loading nation')
     assert self.warehouse_id == 0
     with DBConn(self.dsn) as conn:
         for i in range(NUM_NATIONS):
             nation_key, name, region_key = NATIONS[i]
             self.insert_data('nation', [[
                 nation_key, name, region_key,
                 self.olap_text.random_length_text(31, 114)
             ]])
Ejemplo n.º 8
0
 def load_region(self):
     print('Loading regions')
     assert self.warehouse_id == 0
     with DBConn(self.dsn) as conn:
         for i in range(NUM_REGIONS):
             region_key, name = REGIONS[i]
             self.insert_data('region', [[
                 region_key, name,
                 self.olap_text.random_length_text(31, 115)
             ]])
Ejemplo n.º 9
0
    def _prepare_stats_db(self):
        dsn_url = urlparse(self.args.stats_dsn)
        dbname = dsn_url.path[1:]

        with DBConn(f'{dsn_url.scheme}://{dsn_url.netloc}/postgres') as conn:
            try:
                conn.cursor.execute(
                    f"CREATE DATABASE {dbname} TEMPLATE template0 ENCODING 'UTF-8'"
                )
            except DuplicateDatabase:
                pass

        with DBConn(self.args.stats_dsn) as conn:
            stats_schema_path = os.path.join('benchmarks', 'htap',
                                             'stats_schema.sql')
            with open(stats_schema_path, 'r') as schema:
                schema_sql = schema.read()
                try:
                    conn.cursor.execute(schema_sql)
                except DuplicateTable:
                    pass
Ejemplo n.º 10
0
    def load_history(self):
        print(f'Loading history ({self.warehouse_id})')
        copy_columns = ('h_c_id', 'h_c_d_id', 'h_c_w_id', 'h_d_id', 'h_w_id',
                        'h_date', 'h_amount', 'h_data')

        with DBConn(self.dsn) as conn:
            it = StringIteratorIO((self.generate_history(d_id, c_id)
                                   for d_id in range(1, DIST_PER_WARE + 1)
                                   for c_id in range(1, CUST_PER_DIST + 1)))
            conn.cursor.copy_from(it,
                                  'history',
                                  null='None',
                                  columns=copy_columns,
                                  size=COPY_SIZE)
Ejemplo n.º 11
0
 def oltp_worker(self, worker_id):
     # do NOT introduce timeouts for the oltp queries! this will make that
     # the workload gets inbalanaced and eventually the whole benchmark stalls
     with DBConn(self.args.dsn) as conn:
         oltp_worker = TransactionalWorker(worker_id, self.num_warehouses,
                                           self.latest_timestamp, conn,
                                           self.args.dry_run)
         next_reporting_time = time.time() + 0.1
         while True:
             self.oltp_sleep()
             oltp_worker.next_transaction()
             if next_reporting_time <= time.time():
                 # its beneficial to send in chunks so try to batch the stats by accumulating 0.1s of samples
                 self.stats_queue.put(('oltp', oltp_worker.stats()))
                 next_reporting_time += 0.1
Ejemplo n.º 12
0
    def analyze_worker(self):
        tables = [
            'customer', 'district', 'history', 'item', 'nation', 'new_orders',
            'order_line', 'orders', 'region', 'stock', 'supplier', 'warehouse'
        ]

        os.makedirs('results', exist_ok=True)
        with open('results/analyze.csv', 'w+') as csv:
            with DBConn(self.args.dsn) as conn:
                while True:
                    for table in tables:
                        start = time.time()
                        conn.cursor.execute(f'ANALYZE {table}')
                        runtime = time.time() - start
                        csv.write(
                            f'{datetime.now()}, {table}, {runtime:.2f}\n')
                        csv.flush()
                    time.sleep(600)
Ejemplo n.º 13
0
    def update(self):
        # open the files and connections lazily to avoid serialization problems when forking
        os.makedirs('results', exist_ok=True)
        if not self.csv_oltp:
            self.csv_oltp = open('results/oltp.csv', 'w')
        if not self.csv_olap:
            self.csv_olap = open('results/olap.csv', 'w')
        if not self.csv_olap_stream:
            self.csv_olap_stream = open('results/olap_stream.csv', 'w')
        if not self.csv_dbstats:
            self.csv_dbstats = open('results/dbstats.csv', 'w')
        if not self.conn:
            self.conn = DBConn(self.dsn, use_dict_cursor=True)
            self._update_cached_stats()

        self.updates += 1
        if self.updates % 10 == 0:
            self._update_cached_stats()
        if self.csv_interval and self.updates % self.csv_interval == 0:
            self._write_oltp_stats()
            self._write_dbstats()
Ejemplo n.º 14
0
 def load_stock(self):
     print(f'Loading stock ({self.warehouse_id})')
     with DBConn(self.dsn) as conn:
         it = StringIteratorIO(
             (self.generate_stock(s_id) for s_id in range(1, STOCKS + 1)))
         conn.cursor.copy_from(it, 'stock', null='None', size=COPY_SIZE)
Ejemplo n.º 15
0
    def run(self):
        begin = datetime.now()
        elapsed = timedelta()
        burnin_duration = None if not self.args.dont_wait_until_enough_data else timedelta(
        )

        if self.args.stats_dsn is not None:
            print(f"Statistics will be collected in '{self.args.stats_dsn}'.")
            self._prepare_stats_db()
            stats_conn_holder = DBConn(self.args.stats_dsn)
        else:
            print(f'Database statistics collection is disabled.')
            stats_conn_holder = nullcontext()

        def worker_init():
            signal.signal(signal.SIGINT, signal.SIG_IGN)

        num_total_workers = self.args.oltp_workers + self.args.olap_workers + 1
        with stats_conn_holder as stats_conn:
            with Pool(num_total_workers, worker_init) as pool:
                oltp_workers = pool.map_async(self.oltp_worker,
                                              range(self.args.oltp_workers))
                olap_workers = pool.map_async(self.olap_worker,
                                              range(self.args.olap_workers))
                analyze_worker = pool.apply_async(self.analyze_worker)

                try:
                    update_interval = timedelta(seconds=min(
                        self.args.monitoring_interval, self.args.csv_interval))
                    display_interval = timedelta(
                        seconds=self.args.monitoring_interval)
                    next_display = datetime.now() + display_interval
                    next_update = datetime.now() + update_interval
                    while True:
                        # the workers are not supposed to ever stop.
                        # so test for errors by testing for ready() and if so propagate them
                        # by calling .get()
                        if self.args.oltp_workers > 0 and oltp_workers.ready():
                            oltp_workers.get()
                        if self.args.olap_workers > 0 and olap_workers.ready():
                            olap_workers.get()
                        if analyze_worker.ready():
                            analyze_worker.get()

                        while datetime.now() < next_update:
                            self.stats.process_queue(self.stats_queue)
                            time.sleep(0.1)

                        time_now = datetime.now()
                        available_data = datetime.fromtimestamp(
                            self.latest_timestamp.value
                        ) - self.range_delivery_date[0]
                        if burnin_duration == None and available_data >= WANTED_RANGE:
                            burnin_duration = time_now - begin

                        elapsed = time_now - begin
                        if elapsed.total_seconds() >= self.args.duration:
                            break

                        self.stats.update()
                        next_update = time_now + update_interval
                        if 'print' in self.args.output and next_display <= time_now:
                            next_display += display_interval
                            self.monitor.update_display(
                                elapsed, burnin_duration, time_now, stats_conn,
                                datetime.fromtimestamp(
                                    self.latest_timestamp.value))
                except KeyboardInterrupt:
                    pass
                finally:
                    if burnin_duration == None:
                        burnin_duration = elapsed
                    self.monitor.display_summary(elapsed, burnin_duration)
                    self.stats.write_summary(self.args.csv_file, elapsed)
Ejemplo n.º 16
0
 def load_district(self):
     print(f'Loading district ({self.warehouse_id})')
     with DBConn(self.dsn) as conn:
         it = StringIteratorIO((self.generate_district(d_id)
                                for d_id in range(1, DIST_PER_WARE + 1)))
         conn.cursor.copy_from(it, 'district', null='None', size=COPY_SIZE)
Ejemplo n.º 17
0
 def insert_data(self, table, data):
     with DBConn(self.dsn) as conn:
         sql = f'INSERT INTO {table} VALUES %s'
         execute_values(conn.cursor, sql, data)