def prep_e_tables(self): (ok, result) = psql.runfile(ifile = self.scripts_dir + os.sep + 'prep_database.sql', dbname = self.database_name) self.output('------ create schema ------\n' + '\n'.join(result)) if not ok or str(result).find('ERROR') != -1: return False self.port = self.getOpenPort() hostname = socket.gethostname() with open(self.scripts_dir + os.sep + 'prep_external_tables.sql', 'r') as f: sql = f.read() sql = sql.replace('//HOST:PORT','//%s:%s' % (hostname, self.port) ) with open(self.tmp_folder + os.sep + 'prep_external_tables.sql', 'w') as f: f.write(sql) self.output('------ prep_external_tables ------') (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + 'prep_external_tables.sql', dbname = self.database_name, flag = '-e') self.output('/n'.join(result)) if ok and str(result).find('ERROR') == -1: cmd = 'gpfdist -d %s -p %s -l %s/fdist.%s.log &' % (self.tmp_folder, self.port, self.tmp_folder, self.port) self.output(cmd) result = os.system(cmd) self.output(str(result)) return True else: self.output('prep_external_tables error. ') return False
def grand_revoke_privileges(self, filename = ''): self.output('-- Start exec %s for database %s' % (filename, self.database_name)) if self.run_workload_flag and self.user != 'gpadmin': with open(self.workload_directory + '/data/' + filename , 'r') as f: query = f.read() if gl.suffix: query = query.replace('TABLESUFFIX', self.tbl_suffix) else: query = query.replace('_TABLESUFFIX', '') query = query.replace('ROLENAME', self.user) file_path = self.tmp_folder + os.sep + '%s_%s_' % (self.database_name, self.user) + filename with open(file_path, 'w') as f: f.write(query) for retry in range(0, 20): time.sleep(random.randint(1, 10)) (ok, output) = psql.runfile(ifile = file_path, dbname = self.database_name, username = '******', flag = '-t -A') if not ok or str(output).find('ERROR:') != -1 or str(output).find('FATAL:') != -1 or str(output).find('psql:') != -1: self.output("Retry %d times:" %retry) self.output(query) self.output('\n'.join(output)) else: break self.output(query) self.output('\n'.join(output)) self.output('-- Complete exec %s for database %s' % (filename, self.database_name))
def grand_revoke_privileges(self, filename = ''): self.output('-- Start exec %s for database %s' % (filename, self.database_name)) if self.run_workload_flag and self.user != 'gpadmin': with open(self.workload_directory + '/data-hawq/' + filename , 'r') as f: query = f.read() if gl.suffix: query = query.replace('TABLESUFFIX', self.tbl_suffix) else: query = query.replace('_TABLESUFFIX', '') query = query.replace('ROLENAME', self.user) file_path = self.tmp_folder + os.sep + '%s_%s_' % (self.database_name, self.user) + filename with open(file_path, 'w') as f: f.write(query) for retry in range(0, 20): time.sleep(random.randint(1, 10)) (ok, output) = psql.runfile(ifile = file_path, dbname = self.database_name, flag = '-t -A') if not ok or str(output).find('ERROR:') != -1 or str(output).find('FATAL:') != -1 or str(output).find('psql:') != -1: self.output("Retry %d times:" %retry) self.output(query) self.output('\n'.join(output)) else: break self.output(query) self.output('\n'.join(output)) self.output('-- Complete exec %s for database %s' % (filename, self.database_name))
def load_data(self): self.output('-- generate data file: %s' % (self.fname)) cmd = "dbgen -b %s -s 1 -T L > %s " % (self.dss, self.fname) (status, output) = commands.getstatusoutput(cmd) self.output(cmd) self.output(output) if status != 0: print("generate data file %s error. " % (self.fname)) sys.exit(2) self.output('generate data file successed. ') if self.load_data_flag == False: return self.output('-- Start loading data') super(Copy,self).load_data() # get the data dir data_directory = self.workload_directory + os.sep + 'data' if not os.path.exists(data_directory): self.output('ERROR: Cannot find DDL to create tables for TPC-H: %s does not exists' % (data_directory)) return table_name = 'lineitem_copy' with open(data_directory + os.sep + table_name + '.sql', 'r') as f: cmd = f.read() cmd = self.replace_sql(sql = cmd, table_name = table_name) with open(self.tmp_folder + os.sep + 'copy_create.sql', 'w') as f: f.write(cmd) if self.user != 'gpadmin': f.write('GRANT ALL ON TABLE %s TO %s;' % (self.table_name, self.user)) self.output(cmd) (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + 'copy_create.sql', dbname = self.database_name, flag = '-t -A') self.output('\n'.join(result)) self.output('-- Complete loading data')
def run_queries_dir(self, tables, data_directory_suffix, success_criteria_str, test_type): hawq_data_directory = self.workload_directory + os.sep + data_directory_suffix # run all sql in each loading data file for table_name in tables: con_id = -1 if self.continue_flag: with open( os.path.join(hawq_data_directory, table_name + '.sql'), 'r') as f: cmd = f.read() cmd = self.replace_sql(sql=cmd, table_name=table_name, query_type=test_type) with open( self.tmp_folder + os.sep + data_directory_suffix + '-' + table_name + '.sql', 'w') as f: f.write(cmd) self.output(cmd) beg_time = datetime.now() (ok, result) = psql.runfile(ifile=self.tmp_folder + os.sep + data_directory_suffix + '-' + table_name + '.sql', dbname=self.database_name, username='******', flag='-t -A') end_time = datetime.now() self.output(result[0].split('***')[0]) if ok and str(result).find('ERROR') == -1 and str(result).find( 'FATAL' ) == -1 and str(result).find(success_criteria_str) != -1: status = 'SUCCESS' con_id = -1 else: status = 'ERROR' beg_time = datetime.now() end_time = beg_time self.continue_flag = False else: status = 'ERROR' beg_time = datetime.now() end_time = beg_time duration = end_time - beg_time duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000 beg_time = str(beg_time).split('.')[0] end_time = str(end_time).split('.')[0] self.output( ' Loading=%s Iteration=%d Stream=%d Status=%s Time=%d' % (table_name, 1, 1, status, duration)) self.report_sql( "INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" % (self.tr_id, self.s_id, con_id, table_name, status, beg_time, end_time, duration, self.adj_s_id))
def load_data(self): if self.load_data_flag == False: return self.output('-- Start loading data') super(Sri, self).load_data() # create table and sequence if self.distributed_randomly: cmd = 'drop sequence sri_seg;create sequence sri_seq;drop table if exists %s;\n' % ( self.table_name ) + 'create table %s (tid bigint, bdate date, aid int, delta int, mtime timestamp) with (%s) ' % ( self.table_name, self.sql_suffix) else: cmd = 'drop sequence sri_seg;create sequence sri_seq;drop table if exists %s;\n' % ( self.table_name ) + 'create table %s (tid bigint, bdate date, aid int, delta int, mtime timestamp) with (%s)' % ( self.table_name, self.sql_suffix) if self.distributed_randomly: cmd = cmd + 'distributed randomly ' else: cmd = cmd + 'distributed by (tid) ' if self.partitions == 0 or self.partitions is None: partition_query = '' else: with open(self.workload_directory + os.sep + 'partition.tpl', 'r') as p: partition_query = p.read() partition_query = partition_query.replace('table_name', self.table_name) partition_query = partition_query.replace('table_orientation', self.orientation) if self.compression_type is None: partition_query = partition_query.replace( ', compresstype=table_compresstype', '') else: partition_query = partition_query.replace( 'table_compresstype', str(self.compression_type)) if self.compression_level is None or self.compression_level < 0: partition_query = partition_query.replace( ', compresslevel=table_compresslevel', '') else: partition_query = partition_query.replace( 'table_compresslevel', str(self.compression_level)) cmd = cmd + partition_query + ';' with open(self.tmp_folder + os.sep + 'sri_create.sql', 'w') as f: f.write(cmd) self.output(cmd) (ok, result) = psql.runfile(ifile=self.tmp_folder + os.sep + 'sri_create.sql', dbname=self.database_name, flag='-t -A') self.output('\n'.join(result)) self.output('-- Complete loading data')
def load_data(self): if self.load_data_flag: beg_time = datetime.now() end_time = beg_time status = 'ERROE' if self.check_seeds() and self.prep_e_tables() and self.prep_udfs(): status = 'SUCCESS' os.system(self.scripts_dir + os.sep + 'prep_GUCs.sh') # dca_demo_conf set the data scale with open(self.scripts_dir + os.sep + 'dca_demo_conf.sql', 'r') as f: sql = f.read() scale = int (8500000000 / 10.4 / 1024 * self.scale_factor) sql = sql.replace('8500000000', str(scale)) with open(self.tmp_folder + os.sep + 'dca_demo_conf.sql', 'w') as f: f.write(sql) scripts = ['prep_dimensions.sql', 'prep_facts.sql', 'gen_order_base.sql', 'gen_facts.sql'] for script in scripts: self.output('------ start %s ------' % (script)) with open(self.scripts_dir + os.sep + script, 'r') as f: sql = f.read() sql = sql.replace('PATH_OF_DCA_DEMO_CONF_SQL', '\i %s/dca_demo_conf.sql' % (self.tmp_folder)) sql = sql.replace('SQLSUFFIX', self.sql_suffix) with open(self.tmp_folder + os.sep + script, 'w') as f: f.write(sql) (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + script, dbname = self.database_name, flag = '-e') self.output('\n'.join(result)) if ok and str(result).find('ERROR') == -1: end_time = datetime.now() else: status = 'ERROR' self.output('%s is error!' % (script)) end_time = datetime.now() break else: status = 'SKIP' beg_time = datetime.now() end_time = beg_time duration = end_time - beg_time duration = duration.days*24*3600*1000 + duration.seconds*1000 + duration.microseconds /1000 beg_time = str(beg_time).split('.')[0] end_time = str(end_time).split('.')[0] self.output(' Loading Iteration=%d Stream=%d Status=%s Time=%d' % (1, 1, status, duration)) self.report_sql("INSERT INTO hst.test_result VALUES (%d, %d, 'Loading', '%s', %d, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL);" % (self.tr_id, self.s_id, 'retail_dw_' + self.tbl_suffix, 1, status, beg_time, end_time, duration))
def prep_e_tables(self): (ok, result) = psql.runfile(ifile=self.scripts_dir + os.sep + 'prep_database.sql', dbname=self.database_name) self.output('------ create schema ------\n' + '\n'.join(result)) if not ok or str(result).find('ERROR') != -1: return False self.port = self.getOpenPort() hostname = socket.gethostname() with open(self.scripts_dir + os.sep + 'prep_external_tables.sql', 'r') as f: sql = f.read() sql = sql.replace('//HOST:PORT', '//%s:%s' % (hostname, self.port)) with open(self.tmp_folder + os.sep + 'prep_external_tables.sql', 'w') as f: f.write(sql) self.output('------ prep_external_tables ------') (ok, result) = psql.runfile(ifile=self.tmp_folder + os.sep + 'prep_external_tables.sql', dbname=self.database_name, flag='-e') self.output('/n'.join(result)) if ok and str(result).find('ERROR') == -1: cmd = 'gpfdist -d %s -p %s -l %s/fdist.%s.log &' % ( self.tmp_folder, self.port, self.tmp_folder, self.port) self.output(cmd) result = os.system(cmd) self.output(str(result)) return True else: self.output('prep_external_tables error. ') return False
def vacuum_analyze(self): self.output('-- Start vacuum analyze') con_id = -1 if self.continue_flag: if self.load_data_flag: #sql = 'VACUUM ANALYZE;' sql = 'ANALYZE;' self.output(sql) sql_filename = 'vacuum.sql' # get con_id sql_file = '%' + sql_filename + '%' get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % (sql_filename , sql_file) with open(self.tmp_folder + os.sep + sql_filename, 'w') as f: f.write(sql) f.write(get_con_id_sql) beg_time = datetime.now() (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + sql_filename, dbname = self.database_name, flag = '-t -A') # , username = self.user,) end_time = datetime.now() self.output(result[0].split('***')[0]) if ok and str(result).find('ERROR:') == -1 and str(result).find('FATAL:') == -1 and str(result).find('psql:'): status = 'SUCCESS' con_id = int(result[0].split('***')[1].split('|')[2].strip()) else: status = 'ERROR' self.continue_flag = False else: status = 'SKIP' beg_time = datetime.now() end_time = beg_time else: status = 'ERROR' beg_time = datetime.now() end_time = beg_time duration = end_time - beg_time duration = duration.days*24*3600*1000 + duration.seconds*1000 + duration.microseconds/1000 beg_time = str(beg_time) end_time = str(end_time) self.output(' VACUUM ANALYZE Iteration=%d Stream=%d Status=%s Time=%d' % (1, 1, status, duration)) self.report_sql("INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Vacuum_analyze', 'Vacuum_analyze', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" % (self.tr_id, self.s_id, con_id, status, beg_time, end_time, duration, self.adj_s_id)) self.output('-- Complete vacuum analyze')
def prep_udfs(self): # make box_muller = self.workload_directory + os.sep + 'box_muller' gphome = os.environ['GPHOME'] cmd = 'cd %s;make clean;make install' % (box_muller) self.output(cmd) (status, output) = commands.getstatusoutput(cmd) if status != 0: self.output('error: ' + output) return False else: self.output('make success. ') host_list = os.popen( 'psql -d %s -c \"SELECT hostname FROM pg_catalog.gp_segment_configuration GROUP BY hostname ORDER by hostname;\"' % (self.database_name)).readlines() lists = [ i.strip() for i in host_list if 'hostname' not in i and '---' not in i and 'row' not in i and i != '\n' ] hostfile = '' for host in lists: hostfile = hostfile + '-h %s ' % (host) cmd = 'gpscp %s %s/bm.so =:%s/lib/postgresql/' % (hostfile, box_muller, gphome) self.output(cmd) (status, output) = commands.getstatusoutput(cmd) if status != 0: self.output('error: ' + output) return False else: self.output('gpscp success. ') (ok, result) = psql.runfile(ifile=self.scripts_dir + os.sep + 'prep_UDFs.sql', dbname=self.database_name) self.output('------ perp udfs ------\n' + '\n'.join(result)) if not ok or str(result).find('ERROR') != -1: self.output('prep_UDFs error. ') return False else: self.output('prep_UDFs success. ') return True
def load_data(self): self.output('-- generate data file: %s' % (self.fname)) cmd = "dbgen -b %s -s 1 -T L > %s " % (self.dss, self.fname) (status, output) = commands.getstatusoutput(cmd) self.output(cmd) self.output(output) if status != 0: print("generate data file %s error. " % (self.fname)) sys.exit(2) self.output('generate data file successed. ') if self.load_data_flag == False: return self.output('-- Start loading data') super(Copy, self).load_data() # get the data dir data_directory = self.workload_directory + os.sep + 'data' if not os.path.exists(data_directory): self.output( 'ERROR: Cannot find DDL to create tables for TPC-H: %s does not exists' % (data_directory)) return table_name = 'lineitem_copy' with open(data_directory + os.sep + table_name + '.sql', 'r') as f: cmd = f.read() cmd = self.replace_sql(sql=cmd, table_name=table_name) with open(self.tmp_folder + os.sep + 'copy_create.sql', 'w') as f: f.write(cmd) if self.user != 'gpadmin': f.write('GRANT ALL ON TABLE %s TO %s;' % (table_name, self.user)) self.output(cmd) (ok, result) = psql.runfile(ifile=self.tmp_folder + os.sep + 'copy_create.sql', dbname=self.database_name, flag='-t -A') self.output('\n'.join(result)) self.output('-- Complete loading data')
def load_data(self): if self.load_data_flag == False: return self.output('-- Start loading data') super(Sri,self).load_data() # create table and sequence if self.distributed_randomly: cmd = 'drop sequence sri_seg;create sequence sri_seq;drop table if exists %s;\n' % (self.table_name) + 'create table %s (tid bigint, bdate date, aid int, delta int, mtime timestamp) with (%s) distributed randomly' % (self.table_name, self.sql_suffix) else: cmd = 'drop sequence sri_seg;create sequence sri_seq;drop table if exists %s;\n' % (self.table_name) + 'create table %s (tid bigint, bdate date, aid int, delta int, mtime timestamp) with (%s) distributed by (tid)' % (self.table_name, self.sql_suffix) if self.partitions == 0 or self.partitions is None: partition_query = '' else: with open(self.workload_directory + os.sep + 'partition.tpl', 'r') as p: partition_query = p.read() partition_query = partition_query.replace('table_name', self.table_name) partition_query = partition_query.replace('table_orientation', self.orientation) if self.compression_type is None: partition_query = partition_query.replace(', compresstype=table_compresstype', '') else: partition_query = partition_query.replace('table_compresstype', str(self.compression_type)) if self.compression_level is None or self.compression_level < 0: partition_query = partition_query.replace(', compresslevel=table_compresslevel', '') else: partition_query = partition_query.replace('table_compresslevel', str(self.compression_level)) cmd = cmd + partition_query + ';' with open(self.tmp_folder + os.sep + 'sri_create.sql', 'w') as f: f.write(cmd) self.output(cmd) (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + 'sri_create.sql', dbname = self.database_name, flag = '-t -A') self.output('\n'.join(result)) self.output('-- Complete loading data')
def prep_udfs(self): # make box_muller = self.workload_directory + os.sep + 'box_muller' gphome=os.environ['GPHOME'] cmd = 'cd %s;make clean;make install' % (box_muller) self.output(cmd) (status, output) = commands.getstatusoutput(cmd) if status != 0: self.output('error: ' + output) return False else: self.output('make success. ') host_list = os.popen('psql -d %s -c \"SELECT hostname FROM pg_catalog.gp_segment_configuration GROUP BY hostname ORDER by hostname;\"'% (self.database_name)).readlines() lists = [i.strip() for i in host_list if 'hostname' not in i and '---' not in i and 'row' not in i and i !='\n'] hostfile = '' for host in lists: hostfile = hostfile + '-h %s ' % (host) cmd = 'gpscp %s %s/bm.so =:%s/lib/postgresql/' % (hostfile, box_muller, gphome) self.output(cmd) (status, output) = commands.getstatusoutput(cmd) if status != 0: self.output('error: ' + output) return False else: self.output('gpscp success. ') (ok, result) = psql.runfile(ifile = self.scripts_dir + os.sep + 'prep_UDFs.sql', dbname = self.database_name) self.output('------ perp udfs ------\n' + '\n'.join(result)) if not ok or str(result).find('ERROR') != -1: self.output('prep_UDFs error. ') return False else: self.output('prep_UDFs success. ') return True
def load_data(self): self.output("-- Start loading data") # get the data dir data_directory = self.workload_directory + os.sep + "data" if not os.path.exists(data_directory): self.output("ERROR: Cannot find DDL to create tables for TPC-H: %s does not exists" % (data_directory)) sys.exit(2) if self.load_data_flag: cmd = "drop database if exists %s;" % (self.database_name) (ok, output) = psql.runcmd(cmd=cmd) if not ok: print cmd print "\n".join(output) sys.exit(2) self.output(cmd) self.output("\n".join(output)) cmd = "create database %s;" % (self.database_name) (ok, output) = psql.runcmd(cmd=cmd, username=self.user) if not ok: print cmd print "\n".join(output) sys.exit(2) self.output(cmd) self.output("\n".join(output)) tables = ["nation", "region", "part", "supplier", "partsupp", "customer", "orders", "lineitem", "revenue"] for table_name in tables: if self.continue_flag: if self.load_data_flag: with open(data_directory + os.sep + table_name + ".sql", "r") as f: cmd = f.read() cmd = self.replace_sql(sql=cmd, table_name=table_name) with open(self.tmp_folder + os.sep + table_name + ".sql", "w") as f: f.write(cmd) self.output(cmd) beg_time = datetime.now() (ok, result) = psql.runfile( ifile=self.tmp_folder + os.sep + table_name + ".sql", dbname=self.database_name, username=self.user, ) end_time = datetime.now() self.output("\n".join(result)) if ok and str(result).find("ERROR") == -1 and str(result).find("FATAL") == -1: status = "SUCCESS" else: status = "ERROR" self.continue_flag = False else: status = "SKIP" beg_time = datetime.now() end_time = beg_time else: status = "ERROR" beg_time = datetime.now() end_time = beg_time duration = end_time - beg_time duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000 beg_time = str(beg_time).split(".")[0] end_time = str(end_time).split(".")[0] self.output( " Loading=%s Iteration=%d Stream=%d Status=%s Time=%d" % (table_name, 1, 1, status, duration) ) self.report_sql( "INSERT INTO hst.test_result VALUES (%d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL);" % (self.tr_id, self.s_id, table_name, status, beg_time, end_time, duration) ) self.output("-- Complete loading data")
def load_data(self): self.output('-- Start loading data') # get the data dir data_directory = self.workload_directory + os.sep + 'data' + '-' + self.test_type if not os.path.exists(data_directory): self.output( 'ERROR: Cannot find DDL to create tables for TPC-H: %s does not exists' % (data_directory)) sys.exit(2) if self.load_data_flag: cmd = 'drop database if exists %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd=cmd) if not ok: print cmd print '\n'.join(output) sys.exit(2) self.output(cmd) self.output('\n'.join(output)) cmd = 'create database %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd=cmd) if not ok: print cmd print '\n'.join(output) sys.exit(2) self.output(cmd) self.output('\n'.join(output)) tables = [ 'nation', 'region', 'part', 'supplier', 'partsupp', 'customer', 'orders', 'lineitem', 'revenue' ] if self.test_type == 'pxf': self.output( 'Deleting writable external table director if it already exists' ) os.system('sudo -u pxf hdfs dfs -rm -r %s' % (self.pxf_writable_path)) for table_name in tables: con_id = -1 if self.continue_flag: if self.load_data_flag: with open(data_directory + os.sep + table_name + '.sql', 'r') as f: cmd = f.read() cmd = self.replace_sql(sql=cmd, table_name=table_name) # get con_id use this query unique_string1 = '%s_%s_' % (self.workload_name, self.user) + table_name unique_string2 = '%' + unique_string1 + '%' get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % ( unique_string1, unique_string2) with open(self.tmp_folder + os.sep + table_name + '.sql', 'w') as f: f.write(cmd) f.write(get_con_id_sql) self.output(cmd) beg_time = datetime.now() (ok, result) = psql.runfile( ifile=self.tmp_folder + os.sep + table_name + '.sql', dbname=self.database_name, flag='-t -A') #, username = self.user) #(ok,result) = commands.getstatusoutput('psql -d %s -f %s -t -A' % (self.database_name, self.tmp_folder + os.sep + table_name + '.sql')) end_time = datetime.now() self.output(result[0].split('***')[0]) #self.output('\n'.join(result)) if ok and str(result).find('ERROR:') == -1 and str( result).find('FATAL:') == -1 and ( str(result).find('INSERT 0') != -1 or str(result).find('CREATE VIEW') != -1): status = 'SUCCESS' con_id = int( result[0].split('***')[1].split('|')[2].strip()) else: status = 'ERROR' self.continue_flag = False else: status = 'SKIP' beg_time = datetime.now() end_time = beg_time else: status = 'ERROR' beg_time = datetime.now() end_time = beg_time duration = end_time - beg_time duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000 beg_time = str(beg_time) end_time = str(end_time) self.output( ' Loading=%s Iteration=%d Stream=%d Status=%s Time=%d' % (table_name, 1, 1, status, duration)) self.report_sql( "INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" % (self.tr_id, self.s_id, con_id, table_name, status, beg_time, end_time, duration, self.adj_s_id)) self.output('-- Complete loading data')
def load_data(self): self.output('-- Start loading data ') # get the data dir data_directory = self.workload_directory + os.sep + 'data' if not os.path.exists(data_directory): self.output('ERROR: Cannot find DDL to create tables for Copy: %s does not exists' % (data_directory)) return self.output('-- start gpfdist service') self.gpfdist_port = self._getOpenPort() cmd = "gpssh -h %s -e 'gpfdist -d %s -p %d -l ./gpfdist.log &'" % (self.host_name, self.tmp_folder, self.gpfdist_port) (status, output) = commands.getstatusoutput(cmd) self.output(cmd) self.output(output) cmd = 'ps -ef | grep gpfdist' (status, output) = commands.getstatusoutput(cmd) self.output(cmd) self.output(output) self.output('-- generate data file: %s' % (self.fname)) cmd = "dbgen -b %s -s %d -T L > %s " % (self.dss, self.scale_factor, self.fname) (status, output) = commands.getstatusoutput(cmd) self.output(cmd) self.output(output) if status != 0: print("generate data file %s error. " % (self.fname)) sys.exit(2) self.output('generate data file successed. ') if self.load_data_flag: cmd = 'drop database if exists %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd = cmd) if not ok: print cmd print '\n'.join(output) sys.exit(2) self.output(cmd) self.output('\n'.join(output)) count = 0 while(True): cmd = 'create database %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd = cmd) if not ok: count = count + 1 time.sleep(1) else: self.output(cmd) self.output('\n'.join(output)) if self.user != 'gpadmin': cmd1 = 'GRANT ALL ON DATABASE %s TO %s;' % (self.database_name, self.user) (ok1, output1) = psql.runcmd(cmd = cmd1) self.output(cmd1) self.output('\n'.join(output1)) break if count == 10: print cmd print '\n'.join(output) sys.exit(2) tables = ['lineitem_gpfdist'] niteration = 1 while niteration <= self.num_iteration: self.output('-- Start iteration %d' % (niteration)) for table_name in tables: con_id = -1 if self.load_data_flag or self.run_workload_flag: with open(data_directory + os.sep + table_name + '.sql', 'r') as f: cmd = f.read() cmd = self.replace_sql(sql = cmd, table_name = table_name, num = niteration) # get con_id use this query unique_string1 = '%s_%s_' % (self.workload_name, self.user) + table_name unique_string2 = '%' + unique_string1 + '%' get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % (unique_string1, unique_string2) with open(self.tmp_folder + os.sep + 'gpfdist_loading_temp.sql', 'w') as f: f.write(cmd) f.write(get_con_id_sql) self.output(cmd) beg_time = datetime.now() (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + 'gpfdist_loading_temp.sql', dbname = self.database_name, flag = '-t -A') #, username = self.user) end_time = datetime.now() self.output(result[0].split('***')[0]) if ok and str(result).find('ERROR') == -1 and str(result).find('FATAL') == -1 and str(result).find('INSERT 0') != -1: status = 'SUCCESS' con_id = int(result[0].split('***')[1].split('|')[2].strip()) else: status = 'ERROR' else: status = 'SKIP' beg_time = datetime.now() end_time = beg_time duration = end_time - beg_time duration = duration.days*24*3600*1000 + duration.seconds*1000 + duration.microseconds /1000 beg_time = str(beg_time).split('.')[0] end_time = str(end_time).split('.')[0] self.output(' Loading=%s Iteration=%d Stream=%d Status=%s Time=%d' % (table_name, niteration, 1, status, duration)) self.report_sql("INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', %d, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" % (self.tr_id, self.s_id, con_id, table_name, niteration, status, beg_time, end_time, duration, self.adj_s_id)) self.output('-- Complete iteration %d' % (niteration)) niteration += 1 if self.user != 'gpadmin': cmd1 = 'REVOKE ALL ON DATABASE %s FROM %s;' % (self.database_name, self.user) (ok1, output1) = psql.runcmd(cmd = cmd1) self.output(cmd1) self.output('\n'.join(output1)) self.output('-- Complete loading data')
def run_one_query(self, iteration, stream, qf_name, query, query_type='hawq'): con_id = -1 if self.continue_flag: if not self.run_workload_flag: status = 'SKIP' beg_time = datetime.now() end_time = beg_time if query_type == 'hive': self.run_hive_query(iteration, stream, qf_name, query) return else: if gl.suffix and query_type == 'hawq': query = query.replace('TABLESUFFIX', self.tbl_suffix) elif query_type == 'pxf': query = query.replace('TABLESUFFIX', 'r_' + self.pxf_profile) else: query = query.replace('_TABLESUFFIX', '') query = query.replace('SQLSUFFIX', self.sql_suffix) # get con_id use this query unique_string1 = '%s_%s_%d_%d_' % (self.workload_name, self.user, iteration, stream) + qf_name unique_string2 = '%' + unique_string1 + '%' get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % (unique_string1, unique_string2) with open(self.tmp_folder + os.sep + '%d_%d_' % (iteration, stream) + qf_name, 'w') as f: f.write(query) f.write(get_con_id_sql) self.output(query) query_file_path = self.tmp_folder + os.sep + '%d_%d_' % (iteration, stream) + qf_name beg_time = datetime.now() (ok, result) = psql.runfile(ifile = query_file_path, dbname = self.database_name, username = self.user, flag = '-t -A') end_time = datetime.now() if ok and str(result).find('psql: FATAL:') == -1 and str(result).find('NOTICE:') == -1 and str(result).find('ERROR:') == -1 and str(result).find('server closed') == -1 : status = 'SUCCESS' # generate output and md5 file with open(self.result_directory + os.sep + '%d_%d_' % (iteration, stream) + qf_name.split('.')[0] + '_' + query_type + '.output', 'w') as f: f.write(str(result[0].split('***')[0])) with open(self.result_directory + os.sep + '%d_%d_' % (iteration, stream) + qf_name.split('.')[0] + '_' + query_type +'.output', 'r') as f: query_result = f.read() md5code = hashlib.md5(query_result.encode('utf-8')).hexdigest() with open(self.result_directory + os.sep + '%d_%d_' % (iteration, stream) + qf_name.split('.')[0] + '_' + query_type +'.md5', 'w') as f: f.write(md5code) # check query result if gl.check_result: ans_file = self.ans_directory + os.sep + qf_name.split('.')[0] + '_' + query_type +'.ans' md5_file = self.ans_directory + os.sep + qf_name.split('.')[0] + '_' + query_type +'.md5' if os.path.exists(ans_file): self.output('Check query result use ans file') if not self.check_query_result(ans_file=ans_file, result_file=self.result_directory + os.sep + '%d_%d_' % (iteration, stream) + qf_name.split('.')[0] + '_' + query_type +'.output'): status = 'ERROR' elif os.path.exists(md5_file): self.output('Check query result use md5 file') if not self.check_query_result(ans_file=md5_file, result_file=self.result_directory + os.sep + '%d_%d_' % (iteration, stream) + qf_name.split('.')[0] + '_' + query_type +'.md5'): status = 'ERROR' else: self.output('No answer file') status = 'ERROR' con_id = int(result[0].split('***')[1].split('|')[2].strip()) else: status = 'ERROR' self.output('\n'.join(result)) else: status = 'ERROR' beg_time = datetime.now() end_time = beg_time duration = end_time - beg_time duration = duration.days*24*3600*1000 + duration.seconds*1000 + duration.microseconds/1000 beg_time = str(beg_time) end_time = str(end_time) self.output(' Execution=%s Iteration=%d Stream=%d Status=%s Time=%d' % (qf_name.replace('.sql', ''), iteration, stream, status, duration)) self.report_sql("INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Execution', '%s', %d, %d, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" % (self.tr_id, self.s_id, con_id, qf_name.replace('.sql', ''), iteration, stream, status, beg_time, end_time, duration, self.adj_s_id))
def load_generate_series_data(self, hawq_tables): hawq_data_directory = self.workload_directory + os.sep + 'data-gen-series-hawq' # run all sql in each loading data file for table_name in hawq_tables: con_id = -1 with open(os.path.join(hawq_data_directory, table_name + '.sql'), 'r') as f: cmd = f.read() cmd = self.replace_sql(sql=cmd, table_name=table_name, query_type='hawq') with open(self.tmp_folder + os.sep + table_name + '.sql', 'w') as f: f.write(cmd) self.output(cmd) for batch_num in xrange(self.generate_data_batches_num): if self.continue_flag: batch_start_date = ( datetime.strptime(str(self.generate_data_start_date), '%Y%m%d').date() + timedelta(days=batch_num * ceil(self.generate_data_batch_size / float(60 * 60 * 24))) ).strftime("%Y%m%d") flags = '-t -A -v v_date_YYYYMMDD="\'%s\'" -v v_nb_rows="%s" ' % ( batch_start_date, self.generate_data_batch_size) beg_time = datetime.now() (ok, result) = psql.runfile(ifile=self.tmp_folder + os.sep + table_name + '.sql', dbname=self.database_name, username='******', flag=flags) end_time = datetime.now() self.output(result[0].split('***')[0]) if ok and str(result).find('ERROR') == -1 and str( result).find('FATAL') == -1 and str(result).find( 'INSERT 0') != -1: status = 'SUCCESS' con_id = -1 else: status = 'ERROR' beg_time = datetime.now() end_time = beg_time self.continue_flag = False else: status = 'ERROR' beg_time = datetime.now() end_time = beg_time duration = end_time - beg_time duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000 beg_time = str(beg_time).split('.')[0] end_time = str(end_time).split('.')[0] self.output( ' Loading=%s Iteration=%d Stream=%d Status=%s Time=%d' % (table_name, batch_num, 1, status, duration)) self.report_sql( "INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" % (self.tr_id, self.s_id, con_id, table_name, status, beg_time, end_time, duration, self.adj_s_id))
def load_loading(self, tables): self.output('\n--Start gpfdist') self._start_gpfdist() cmd = "gpssh -f %s -e 'ps -ef | grep gpfdist | grep -v grep'" % ( self.hostfile_seg) (status, output) = commands.getstatusoutput(cmd) self.output(output) data_directory = self.workload_directory + os.sep + 'data' if not os.path.exists(data_directory): self.output( 'ERROR: Cannot find DDL to create tables for TPCDS: %s does not exists' % (data_directory)) sys.exit(2) gpfdist_map = {} for item in tables: gpfdist_map[item] = [] for cur_host in self.seg_hostname_list: cmd = "gpssh -h %s -e 'cat %s'" % ( cur_host, os.path.join(self.tmp_tpcds_folder, 'dat_files.txt')) dat_file_suffix = '.dat' (status, output) = commands.getstatusoutput(cmd) if status != 0: print('Error happen in ls data dir in %s' % (cur_host)) print(output) sys.exit(2) else: lines = output.split('\n') for line in lines: if line.find(dat_file_suffix) != -1: file_name = line.split(' ')[-1].strip() tmp_name = file_name[:file_name.rindex('_')] table_name = tmp_name[:tmp_name.rindex('_')] if table_name not in gpfdist_map.keys(): if table_name.find('dbgen_version') == -1: print('Error: %s not find in gpfdist_map' % (table_name)) sys.exit(2) else: gpfdist_map[table_name].append( "'gpfdist://%s:%s/%s'" % (cur_host, self.gpfdist_port, file_name)) for table_name in tables: if len(gpfdist_map[table_name]) == 0: print(table_name + ' has no data files, generate data wrong in workload ' + self.workload_name) self.output(table_name + ':' + str(len(gpfdist_map[table_name])) + ' data files') self.output('\n--Start loading data into tables') # run all sql in each loading data file for table_name in tables: con_id = -1 if self.continue_flag: with open(os.path.join(data_directory, table_name + '.sql'), 'r') as f: cmd = f.read() cmd = self.replace_sql(sql=cmd, table_name=table_name) location = "LOCATION(" + ','.join( gpfdist_map[table_name]) + ")" cmd = cmd.replace('LOCATION', location) # get con_id use this query unique_string1 = '%s_%s_' % (self.workload_name, self.user) + table_name unique_string2 = '%' + unique_string1 + '%' get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % ( unique_string1, unique_string2) with open(self.tmp_folder + os.sep + table_name + '.sql', 'w') as f: f.write(cmd) f.write(get_con_id_sql) self.output(cmd) beg_time = datetime.now() (ok, result) = psql.runfile(ifile=self.tmp_folder + os.sep + table_name + '.sql', dbname=self.database_name, username='******', flag='-t -A') end_time = datetime.now() self.output(result[0].split('***')[0]) if ok and str(result).find('ERROR') == -1 and str(result).find( 'FATAL') == -1 and str(result).find('INSERT 0') != -1: status = 'SUCCESS' con_id = int( result[0].split('***')[1].split('|')[2].strip()) else: status = 'ERROR' beg_time = datetime.now() end_time = beg_time self.continue_flag = False else: status = 'ERROR' beg_time = datetime.now() end_time = beg_time duration = end_time - beg_time duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000 beg_time = str(beg_time).split('.')[0] end_time = str(end_time).split('.')[0] self.output( ' Loading=%s Iteration=%d Stream=%d Status=%s Time=%d' % (table_name, 1, 1, status, duration)) self.report_sql( "INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" % (self.tr_id, self.s_id, con_id, table_name, status, beg_time, end_time, duration, self.adj_s_id))
def load_data(self): self.output('-- Start loading data') # get the data dir data_directory = self.workload_directory + os.sep + 'data' if not os.path.exists(data_directory): self.output( 'ERROR: Cannot find DDL to create tables for TPC-H: %s does not exists' % (data_directory)) sys.exit(2) if self.load_data_flag: cmd = 'drop database if exists %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd=cmd) if not ok: print cmd print '\n'.join(output) sys.exit(2) self.output(cmd) self.output('\n'.join(output)) cmd = 'create database %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd=cmd, username=self.user) if not ok: print cmd print '\n'.join(output) sys.exit(2) self.output(cmd) self.output('\n'.join(output)) tables = [ 'nation', 'region', 'part', 'supplier', 'partsupp', 'customer', 'orders', 'lineitem', 'revenue' ] for table_name in tables: if self.continue_flag: if self.load_data_flag: with open(data_directory + os.sep + table_name + '.sql', 'r') as f: cmd = f.read() cmd = self.replace_sql(sql=cmd, table_name=table_name) with open(self.tmp_folder + os.sep + table_name + '.sql', 'w') as f: f.write(cmd) self.output(cmd) beg_time = datetime.now() (ok, result) = psql.runfile(ifile=self.tmp_folder + os.sep + table_name + '.sql', dbname=self.database_name, username=self.user) end_time = datetime.now() self.output('\n'.join(result)) if ok and str(result).find('ERROR') == -1 and str( result).find('FATAL') == -1: status = 'SUCCESS' else: status = 'ERROR' self.continue_flag = False else: status = 'SKIP' beg_time = datetime.now() end_time = beg_time else: status = 'ERROR' beg_time = datetime.now() end_time = beg_time duration = end_time - beg_time duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000 beg_time = str(beg_time).split('.')[0] end_time = str(end_time).split('.')[0] self.output( ' Loading=%s Iteration=%d Stream=%d Status=%s Time=%d' % (table_name, 1, 1, status, duration)) self.report_sql( "INSERT INTO hst.test_result VALUES (%d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL);" % (self.tr_id, self.s_id, table_name, status, beg_time, end_time, duration)) self.output('-- Complete loading data')
def run_one_query(self, iteration, stream, qf_name, query): con_id = -1 if self.continue_flag: if self.run_workload_flag: if gl.suffix: query = query.replace('TABLESUFFIX', self.tbl_suffix) else: query = query.replace('_TABLESUFFIX', '') query = query.replace('SQLSUFFIX', self.sql_suffix) # get con_id use this query unique_string1 = '%s_%s_%d_%d_' % (self.workload_name, self.user, iteration, stream) + qf_name unique_string2 = '%' + unique_string1 + '%' get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % (unique_string1, unique_string2) with open(self.tmp_folder + os.sep + '%d_%d_' % (iteration, stream) + qf_name, 'w') as f: f.write(query) f.write(get_con_id_sql) self.output(query) beg_time = datetime.now() (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + '%d_%d_' % (iteration, stream) + qf_name, dbname = self.database_name, username = self.user, flag = '-t -A') end_time = datetime.now() if ok and str(result).find('psql: FATAL:') == -1 and str(result).find('ERROR:') == -1 and str(result).find('server closed') == -1 : status = 'SUCCESS' # generate output and md5 file with open(self.result_directory + os.sep + '%d_%d_' % (iteration, stream) + qf_name.split('.')[0] + '.output', 'w') as f: f.write(str(result[0].split('***')[0])) with open(self.result_directory + os.sep + '%d_%d_' % (iteration, stream) + qf_name.split('.')[0] + '.output', 'r') as f: query_result = f.read() md5code = hashlib.md5(query_result.encode('utf-8')).hexdigest() with open(self.result_directory + os.sep + '%d_%d_' % (iteration, stream) + qf_name.split('.')[0] + '.md5', 'w') as f: f.write(md5code) # check query result if gl.check_result: ans_file = self.ans_directory + os.sep + qf_name.split('.')[0] + '.ans' md5_file = self.ans_directory + os.sep + qf_name.split('.')[0] + '.md5' if os.path.exists(ans_file): self.output('Check query result use ans file') if not self.check_query_result(ans_file = ans_file, result_file = self.result_directory + os.sep + '%d_%d_' % (iteration, stream) + qf_name.split('.')[0] + '.output'): status = 'ERROR' elif os.path.exists(md5_file): self.output('Check query result use md5 file') if not self.check_query_result(ans_file = md5_file, result_file = self.result_directory + os.sep + '%d_%d_' % (iteration, stream) + qf_name.split('.')[0] + '.md5'): status = 'ERROR' else: self.output('No answer file') status = 'ERROR' con_id = int(result[0].split('***')[1].split('|')[2].strip()) else: status = 'ERROR' self.output('\n'.join(result)) else: status = 'SKIP' beg_time = datetime.now() end_time = beg_time else: status = 'ERROR' beg_time = datetime.now() end_time = beg_time duration = end_time - beg_time duration = duration.days*24*3600*1000 + duration.seconds*1000 + duration.microseconds/1000 beg_time = str(beg_time) end_time = str(end_time) self.output(' Execution=%s Iteration=%d Stream=%d Status=%s Time=%d' % (qf_name.replace('.sql', ''), iteration, stream, status, duration)) self.report_sql("INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Execution', '%s', %d, %d, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" % (self.tr_id, self.s_id, con_id, qf_name.replace('.sql', ''), iteration, stream, status, beg_time, end_time, duration, self.adj_s_id))
def load_data(self): self.output('-- Start loading data') # get the data dir data_directory = self.workload_directory + os.sep + 'data' if not os.path.exists(data_directory): self.output('ERROR: Cannot find DDL to create tables for TPC-H: %s does not exists' % (data_directory)) sys.exit(2) if self.load_data_flag: cmd = 'drop database if exists %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd = cmd, username = '******') if not ok: print cmd print '\n'.join(output) sys.exit(2) self.output(cmd) self.output('\n'.join(output)) cmd = 'create database %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd = cmd, username = '******') if not ok: print cmd print '\n'.join(output) sys.exit(2) self.output(cmd) self.output('\n'.join(output)) tables = ['nation', 'region', 'part', 'supplier', 'partsupp', 'customer', 'orders','lineitem' ,'revenue'] for table_name in tables: con_id = -1 if self.continue_flag: if self.load_data_flag: with open(data_directory + os.sep + table_name + '.sql', 'r') as f: cmd = f.read() cmd = self.replace_sql(sql = cmd, table_name = table_name) # get con_id use this query unique_string1 = '%s_%s_' % (self.workload_name, self.user) + table_name unique_string2 = '%' + unique_string1 + '%' get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % (unique_string1, unique_string2) with open(self.tmp_folder + os.sep + table_name + '.sql', 'w') as f: f.write(cmd) f.write(get_con_id_sql) self.output(cmd) beg_time = datetime.now() (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + table_name + '.sql', dbname = self.database_name, flag = '-t -A') #, username = self.user) #(ok,result) = commands.getstatusoutput('psql -d %s -f %s -t -A' % (self.database_name, self.tmp_folder + os.sep + table_name + '.sql')) end_time = datetime.now() self.output(result[0].split('***')[0]) #self.output('\n'.join(result)) if ok and str(result).find('ERROR:') == -1 and str(result).find('FATAL:') == -1 and (str(result).find('INSERT 0') != -1 or str(result).find('CREATE VIEW') != -1): status = 'SUCCESS' con_id = int(result[0].split('***')[1].split('|')[2].strip()) else: status = 'ERROR' self.continue_flag = False else: status = 'SKIP' beg_time = datetime.now() end_time = beg_time else: status = 'ERROR' beg_time = datetime.now() end_time = beg_time duration = end_time - beg_time duration = duration.days*24*3600*1000 + duration.seconds*1000 + duration.microseconds /1000 beg_time = str(beg_time) end_time = str(end_time) self.output(' Loading=%s Iteration=%d Stream=%d Status=%s Time=%d' % (table_name, 1, 1, status, duration)) self.report_sql("INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" % (self.tr_id, self.s_id, con_id, table_name, status, beg_time, end_time, duration, self.adj_s_id)) self.output('-- Complete loading data')
def load_data(self): if self.load_data_flag: beg_time = datetime.now() end_time = beg_time status = 'ERROE' if self.check_seeds() and self.prep_e_tables() and self.prep_udfs( ): status = 'SUCCESS' os.system(self.scripts_dir + os.sep + 'prep_GUCs.sh') # dca_demo_conf set the data scale with open(self.scripts_dir + os.sep + 'dca_demo_conf.sql', 'r') as f: sql = f.read() scale = int(8500000000 / 10.4 / 1024 * self.scale_factor) sql = sql.replace('8500000000', str(scale)) with open(self.tmp_folder + os.sep + 'dca_demo_conf.sql', 'w') as f: f.write(sql) scripts = [ 'prep_dimensions.sql', 'prep_facts.sql', 'gen_order_base.sql', 'gen_facts.sql' ] for script in scripts: self.output('------ start %s ------' % (script)) with open(self.scripts_dir + os.sep + script, 'r') as f: sql = f.read() sql = sql.replace( 'PATH_OF_DCA_DEMO_CONF_SQL', '\i %s/dca_demo_conf.sql' % (self.tmp_folder)) sql = sql.replace('SQLSUFFIX', self.sql_suffix) with open(self.tmp_folder + os.sep + script, 'w') as f: f.write(sql) (ok, result) = psql.runfile(ifile=self.tmp_folder + os.sep + script, dbname=self.database_name, flag='-e') self.output('\n'.join(result)) if ok and str(result).find('ERROR') == -1: end_time = datetime.now() else: status = 'ERROR' self.output('%s is error!' % (script)) end_time = datetime.now() break else: status = 'SKIP' beg_time = datetime.now() end_time = beg_time duration = end_time - beg_time duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000 beg_time = str(beg_time).split('.')[0] end_time = str(end_time).split('.')[0] self.output( ' Loading Iteration=%d Stream=%d Status=%s Time=%d' % (1, 1, status, duration)) self.report_sql( "INSERT INTO hst.test_result VALUES (%d, %d, 'Loading', '%s', %d, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL);" % (self.tr_id, self.s_id, 'retail_dw_' + self.tbl_suffix, 1, status, beg_time, end_time, duration))
def load_data(self): self.output('-- Start loading data ') # get the data dir data_directory = self.workload_directory + os.sep + 'data' if not os.path.exists(data_directory): self.output( 'ERROR: Cannot find DDL to create tables for Copy: %s does not exists' % (data_directory)) return self.output('-- start gpfdist service') self.gpfdist_port = self._getOpenPort() cmd = "gpssh -h %s -e 'gpfdist -d %s -p %d -l ./gpfdist.log &'" % ( self.host_name, self.tmp_folder, self.gpfdist_port) (status, output) = commands.getstatusoutput(cmd) self.output(cmd) self.output(output) cmd = 'ps -ef | grep gpfdist' (status, output) = commands.getstatusoutput(cmd) self.output(cmd) self.output(output) self.output('-- generate data file: %s' % (self.fname)) cmd = "dbgen -b %s -s %d -T L > %s " % (self.dss, self.scale_factor, self.fname) (status, output) = commands.getstatusoutput(cmd) self.output(cmd) self.output(output) if status != 0: print("generate data file %s error. " % (self.fname)) sys.exit(2) self.output('generate data file successed. ') if self.load_data_flag: cmd = 'drop database if exists %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd=cmd) if not ok: print cmd print '\n'.join(output) sys.exit(2) self.output(cmd) self.output('\n'.join(output)) count = 0 while (True): cmd = 'create database %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd=cmd) if not ok: count = count + 1 time.sleep(1) else: self.output(cmd) self.output('\n'.join(output)) if self.user != 'gpadmin': cmd1 = 'GRANT ALL ON DATABASE %s TO %s;' % ( self.database_name, self.user) (ok1, output1) = psql.runcmd(cmd=cmd1) self.output(cmd1) self.output('\n'.join(output1)) break if count == 10: print cmd print '\n'.join(output) sys.exit(2) tables = ['lineitem_gpfdist'] niteration = 1 while niteration <= self.num_iteration: self.output('-- Start iteration %d' % (niteration)) for table_name in tables: con_id = -1 if self.load_data_flag or self.run_workload_flag: with open(data_directory + os.sep + table_name + '.sql', 'r') as f: cmd = f.read() cmd = self.replace_sql(sql=cmd, table_name=table_name, num=niteration) # get con_id use this query unique_string1 = '%s_%s_' % (self.workload_name, self.user) + table_name unique_string2 = '%' + unique_string1 + '%' get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % ( unique_string1, unique_string2) with open( self.tmp_folder + os.sep + 'gpfdist_loading_temp.sql', 'w') as f: f.write(cmd) f.write(get_con_id_sql) self.output(cmd) beg_time = datetime.now() (ok, result) = psql.runfile( ifile=self.tmp_folder + os.sep + 'gpfdist_loading_temp.sql', dbname=self.database_name, flag='-t -A') #, username = self.user) end_time = datetime.now() self.output(result[0].split('***')[0]) if ok and str(result).find('ERROR') == -1 and str( result).find('FATAL') == -1 and str(result).find( 'INSERT 0') != -1: status = 'SUCCESS' con_id = int( result[0].split('***')[1].split('|')[2].strip()) else: status = 'ERROR' else: status = 'SKIP' beg_time = datetime.now() end_time = beg_time duration = end_time - beg_time duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000 beg_time = str(beg_time).split('.')[0] end_time = str(end_time).split('.')[0] self.output( ' Loading=%s Iteration=%d Stream=%d Status=%s Time=%d' % (table_name, niteration, 1, status, duration)) self.report_sql( "INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', %d, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" % (self.tr_id, self.s_id, con_id, table_name, niteration, status, beg_time, end_time, duration, self.adj_s_id)) self.output('-- Complete iteration %d' % (niteration)) niteration += 1 if self.user != 'gpadmin': cmd1 = 'REVOKE ALL ON DATABASE %s FROM %s;' % (self.database_name, self.user) (ok1, output1) = psql.runcmd(cmd=cmd1) self.output(cmd1) self.output('\n'.join(output1)) self.output('-- Complete loading data')
def load_loading(self, tables): self.output('\n--Start gpfdist') self._start_gpfdist() cmd = "gpssh -f %s -e 'ps -ef | grep gpfdist | grep -v grep'" % (self.hostfile_seg) (status, output) = commands.getstatusoutput(cmd) self.output(output) data_directory = self.workload_directory + os.sep + 'data' if not os.path.exists(data_directory): self.output('ERROR: Cannot find DDL to create tables for TPCDS: %s does not exists' % (data_directory)) sys.exit(2) gpfdist_map = {} for item in tables: gpfdist_map[item] = [] for cur_host in self.seg_hostname_list: cmd = "gpssh -h %s -e 'cat %s'" % (cur_host, os.path.join(self.tmp_tpcds_folder, 'dat_files.txt')) dat_file_suffix = '.dat' (status, output) = commands.getstatusoutput(cmd) if status != 0: print('Error happen in ls data dir in %s' % (cur_host)) print(output) sys.exit(2) else: lines = output.split('\n') for line in lines: if line.find(dat_file_suffix) != -1: file_name = line.split(' ')[-1].strip() tmp_name = file_name[:file_name.rindex('_')] table_name = tmp_name[:tmp_name.rindex('_')] if table_name not in gpfdist_map.keys(): if table_name.find('dbgen_version') == -1: print('Error: %s not find in gpfdist_map' % (table_name)) sys.exit(2) else: gpfdist_map[table_name].append("'gpfdist://%s:%s/%s'" % (cur_host, self.gpfdist_port, file_name)) for table_name in tables: if len(gpfdist_map[table_name]) == 0: print(table_name + ' has no data files, generate data wrong in workload ' + self.workload_name) self.output(table_name + ':' + str(len(gpfdist_map[table_name])) + ' data files') self.output('\n--Start loading data into tables') # run all sql in each loading data file for table_name in tables: con_id = -1 if self.continue_flag: with open(os.path.join(data_directory, table_name + '.sql'), 'r') as f: cmd = f.read() cmd = self.replace_sql(sql = cmd, table_name = table_name) location = "LOCATION(" + ','.join(gpfdist_map[table_name]) + ")" cmd = cmd.replace('LOCATION', location) # get con_id use this query unique_string1 = '%s_%s_' % (self.workload_name, self.user) + table_name unique_string2 = '%' + unique_string1 + '%' get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % (unique_string1, unique_string2) with open(self.tmp_folder + os.sep + table_name + '.sql', 'w') as f: f.write(cmd) f.write(get_con_id_sql) self.output(cmd) beg_time = datetime.now() (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + table_name + '.sql', dbname = self.database_name, username = '******', flag = '-t -A') end_time = datetime.now() self.output(result[0].split('***')[0]) if ok and str(result).find('ERROR') == -1 and str(result).find('FATAL') == -1 and str(result).find('INSERT 0') != -1: status = 'SUCCESS' con_id = int(result[0].split('***')[1].split('|')[2].strip()) else: status = 'ERROR' beg_time = datetime.now() end_time = beg_time self.continue_flag = False else: status = 'ERROR' beg_time = datetime.now() end_time = beg_time duration = end_time - beg_time duration = duration.days*24*3600*1000 + duration.seconds*1000 + duration.microseconds /1000 beg_time = str(beg_time).split('.')[0] end_time = str(end_time).split('.')[0] self.output(' Loading=%s Iteration=%d Stream=%d Status=%s Time=%d' % (table_name, 1, 1, status, duration)) self.report_sql("INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" % (self.tr_id, self.s_id, con_id, table_name, status, beg_time, end_time, duration, self.adj_s_id))