def run_throughput_inner(query_root, data_dir, generated_query_dir, host, port, database, user, password, stream, num_streams, queue, verbose): """ :param query_root: :param data_dir: subdirectory with data to be loaded :param generated_query_dir: subdirectory with generated queries :param host: hostname where the Postgres database is running :param port: port number where the Postgres database is listening :param database: database name, where the benchmark will be run :param user: username of the Postgres user with full access to the benchmark DB :param password: password for the Postgres user :param stream: stream number :param num_streams: number of streams :param queue: process queue :param verbose: True if more verbose output is required :return: none, uses exit(1) to abort on errors """ try: conn = pgdb.PGDB(host, port, database, user, password) result = r.Result("ThroughputQueryStream%s" % stream) if run_query_stream(conn, query_root, generated_query_dir, stream, num_streams, result, verbose): print("unable to finish query in stream #%s" % stream) exit(1) queue.put(result) except Exception as e: print("unable to connect to DB for query in stream #%s: %s" % (stream, e)) exit(1)
def load_tables(data_dir, host, port, db_name, user, password, tables, load_dir): """Loads data into tables. Expects that tables are already empty. Args: data_dir (str): Directory in which load data exists host (str): IP/hostname of the PG instance port (int): port for the PG instance db_name (str): name of the tpch database user (str): user for the PG instance password (str): password for the PG instance tables (str): list of tables load_dir (str): directory with data files to be loaded Return: 0 if successful non zero otherwise """ try: conn = pgdb.PGDB(host, port, db_name, user, password) try: for table in tables: filepath = os.path.join(data_dir, load_dir, table.lower() + ".tbl.csv") conn.copyFrom(filepath, separator="|", table=table) conn.commit() except Exception as e: print("unable to run load tables. %s" %e) return 1 conn.close() return 0 except Exception as e: print("unable to connect to the database. %s" % e) return 1
def clean_database(query_root, host, port, db_name, user, password, tables): """Drops the tables if they exist Args: query_root (str): Directory in which generated queries directory exists host (str): IP/hostname of the PG instance port (int): port for the PG instance db_name (str): name of the tpch database user (str): user for the PG instance password (str): password for the PG instance tables (str): list of tables Return: 0 if successful non zero otherwise """ try: conn = pgdb.PGDB(host, port, db_name, user, password) try: for table in tables: conn.executeQuery("DROP TABLE IF EXISTS %s " % table) except Exception as e: print("unable to remove existing tables. %s" % e) return 1 print("dropped existing tables") conn.commit() conn.close() return 0 except Exception as e: print("unable to connect to the database. %s" % e) return 1
def create_schema(query_root, host, port, db_name, user, password, prep_query_dir): """Creates the schema for the tests. Drops the tables if they exist Args: query_root (str): Directory in which generated queries directory exists host (str): IP/hostname of the PG instance port (int): port for the PG instance db_name (str): name of the tpch database user (str): user for the PG instance password (str): password for the PG instance prep_query_dir (str): directory with queries for schema creation Return: 0 if successful non zero otherwise """ try: conn = pgdb.PGDB(host, port, db_name, user, password) try: conn.executeQueryFromFile(os.path.join(query_root, prep_query_dir, "create_tbl.sql")) except Exception as e: print("unable to run create tables. %s" % e) return 1 conn.commit() conn.close() except Exception as e: print("unable to connect to the database. %s" % e) return 1
def index_tables(query_root, host, port, db_name, user, password, prep_query_dir): """Creates indexes and foreign keys for loaded tables. Args: query_root (str): Directory in which preparation queries directory exists host (str): IP/hostname of the PG instance port (int): port for the PG instance db_name (str): name of the tpch database user (str): user for the PG instance password (str): password for the PG instance prep_query_dir (str): directory with create index script Return: 0 if successful non zero otherwise """ try: conn = pgdb.PGDB(host, port, db_name, user, password) try: conn.executeQueryFromFile(os.path.join(query_root, prep_query_dir, "create_idx.sql")) conn.commit() except Exception as e: print("unable to run index tables. %s" % e) return 1 conn.close() return 0 except Exception as e: print("unable to connect to the database. %s" % e) return 1
def run_power_test(query_root, data_dir, update_dir, delete_dir, generated_query_dir, results_dir, host, port, database, user, password, run_timestamp, num_streams, verbose, read_only): """ :param query_root: directory where generated SQL statements are stored :param data_dir: subdirectory with data to be loaded :param update_dir: subdirectory with data to be updated :param delete_dir: subdirectory with data to be deleted :param generated_query_dir: subdirectory with generated queries :param results_dir: path to the results folder :param host: hostname where the Postgres database is running :param port: port number where the Postgres database is listening :param database: database name, where the benchmark will be run :param user: username of the Postgres user with full access to the benchmark DB :param password: password for the Postgres user :param run_timestamp: name of the run folder, format run_YYYYMMDD_HHMMSS :param num_streams: number of streams :param verbose: True if more verbose output is required :param read_only: True if no inserts/updates/deletes are to be run; can be used to run the same test multiple times without (re)loading the data, e.g. while developing :return: 0 if successful, 1 otherwise """ try: print("Power tests started ...") conn = pgdb.PGDB(host, port, database, user, password) result = r.Result("Power") result.startTimer() stream = 0 # constant for power tests # if not read_only: if refresh_func1(conn, data_dir, update_dir, stream, num_streams, verbose): return 1 result.setMetric(REFRESH_METRIC % (stream, 1), result.stopTimer()) # if run_query_stream(conn, query_root, generated_query_dir, stream, num_streams, result, verbose): return 1 # result.startTimer() if not read_only: if refresh_func2(conn, data_dir, delete_dir, stream, num_streams, verbose): return 1 result.setMetric(REFRESH_METRIC % (stream, 2), result.stopTimer()) # print("Power tests finished.") if verbose: result.printMetrics() result.saveMetrics(results_dir, run_timestamp, "power") except Exception as e: print("unable to run power tests. DB connection failed: %s" % e) return 1 return 0
def run_throughput_test(query_root, data_dir, update_dir, delete_dir, generated_query_dir, results_dir, host, port, database, user, password, run_timestamp, num_streams, verbose, read_only): """ :param query_root: :param data_dir: subdirectory with data to be loaded :param update_dir: subdirectory with data to be updated :param delete_dir: subdirectory with data to be deleted :param generated_query_dir: subdirectory with generated queries :param results_dir: path to the results folder :param host: hostname where the Postgres database is running :param port: port number where the Postgres database is listening :param database: database name, where the benchmark will be run :param user: username of the Postgres user with full access to the benchmark DB :param password: password for the Postgres user :param run_timestamp: name of the run folder, format run_YYYYMMDD_HHMMSS :param num_streams: number of streams :param verbose: True if more verbose output is required :param read_only: True if no inserts/updates/deletes are to be run; can be used to run the same test multiple times without (re)loading the data, e.g. while developing :return: 0 if successful, 1 otherwise """ try: print("Throughput tests started ...") conn = pgdb.PGDB(host, port, database, user, password) total = r.Result("ThroughputTotal") total.startTimer() processes = [] queue = Queue() for i in range(num_streams): stream = i + 1 # queries print("Throughput tests in stream #%s started ..." % stream) p = Process(target=run_throughput_inner, args=(query_root, data_dir, generated_query_dir, host, port, database, user, password, stream, num_streams, queue, verbose)) processes.append(p) p.start() result = r.Result("ThroughputRefreshStream") for i in range(num_streams): stream = i + 1 # refresh functions result.startTimer() if not read_only: if refresh_func1(conn, data_dir, update_dir, stream, num_streams, verbose): return 1 result.setMetric(REFRESH_METRIC % (stream, 1), result.stopTimer()) # result.startTimer() if not read_only: if refresh_func2(conn, data_dir, delete_dir, stream, num_streams, verbose): return 1 result.setMetric(REFRESH_METRIC % (stream, 2), result.stopTimer()) # queue.put(result) for p in processes: p.join() print("Throughput tests finished.") for i in range(queue.qsize()): res = queue.get(False) if verbose: res.printMetrics() res.saveMetrics(results_dir, run_timestamp, THROUGHPUT) # total.setMetric(THROUGHPUT_TOTAL_METRIC, total.stopTimer()) if verbose: total.printMetrics() total.saveMetrics(results_dir, run_timestamp, THROUGHPUT) # except Exception as e: print("unable to execute throughput tests: %s" % e) return 1 return 0