def load_data(self): '''Load data for workload''' if self.load_data_flag: cmd = 'drop database if exists %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd = cmd) if not ok: print cmd print '\n'.join(output) sys.exit(2) self.output(cmd) self.output('\n'.join(output)) count = 0 while(True): cmd = 'create database %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd = cmd) if not ok: count = count + 1 time.sleep(1) else: self.output(cmd) self.output('\n'.join(output)) if self.user != 'gpadmin': cmd1 = 'GRANT ALL ON DATABASE %s TO %s;' % (self.database_name, self.user) (ok1, output1) = psql.runcmd(cmd = cmd1) self.output(cmd1) self.output('\n'.join(output1)) break if count == 10: print cmd print '\n'.join(output) sys.exit(2)
def __fetch_hawq_configuration(self): '''Fetch master hostname, segments hostname, data directory of HAWQ.''' self.hawq_master = config.getMasterHostName() self.hawq_segments = config.getSegHostNames() self.hawq_paths = [] self.hawq_config = {} sql = "SELECT gsc.hostname, pfe.fselocation FROM gp_segment_configuration gsc, pg_filespace_entry pfe WHERE gsc.dbid = pfe.fsedbid AND pfe.fselocation NOT LIKE 'hdfs%' ORDER BY pfe.fselocation;" (ok, out) = psql.runcmd( dbname = 'postgres', cmd = sql , ofile = '-', flag = '-q -t' ) if not ok: print out raise Exception("Failed to get HAWQ configuration paths.") else: for line in out: line = line.strip() if line: (host, path) = line.split( '|' ) if not self.hawq_config.has_key(host.strip()): self.hawq_config[host.strip()] = [] self.hawq_config[host.strip()].append(path.strip()) # self.hawq_config.append( (host.strip(), path.strip()) ) self.hawq_paths.append( path.strip() ) print self.hawq_master print self.hawq_segments print self.hawq_config
def __fetch_hawq_configuration(self): '''Fetch master hostname, segments hostname, data directory of HAWQ.''' self.hawq_master = config.getMasterHostName() self.hawq_segments = config.getSegHostNames() self.hawq_paths = [] self.hawq_config = {} sql = "SELECT gsc.hostname, pfe.fselocation FROM gp_segment_configuration gsc, pg_filespace_entry pfe WHERE gsc.dbid = pfe.fsedbid AND pfe.fselocation NOT LIKE 'hdfs%' ORDER BY pfe.fselocation;" (ok, out) = psql.runcmd(dbname='postgres', cmd=sql, ofile='-', flag='-q -t') if not ok: print out raise Exception("Failed to get HAWQ configuration paths.") else: for line in out: line = line.strip() if line: (host, path) = line.split('|') if not self.hawq_config.has_key(host.strip()): self.hawq_config[host.strip()] = [] self.hawq_config[host.strip()].append(path.strip()) # self.hawq_config.append( (host.strip(), path.strip()) ) self.hawq_paths.append(path.strip()) print self.hawq_master print self.hawq_segments print self.hawq_config
def load_data(self): self.output('\n-- Start loading data') if self.load_data_flag: cmd = 'drop database if exists %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd=cmd) if not ok: print cmd print '\n'.join(output) sys.exit(2) self.output(cmd) self.output('\n'.join(output)) cmd = 'create database %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd=cmd, username='******') if not ok: print cmd print '\n'.join(output) sys.exit(2) self.output(cmd) self.output('\n'.join(output)) tables = [ 'call_center', 'catalog_page', 'catalog_returns', 'catalog_sales', 'customer', 'customer_address', 'customer_demographics', 'date_dim', 'household_demographics', 'income_band', 'inventory', 'item', 'promotion', 'reason', 'ship_mode', 'store', 'store_returns', 'store_sales', 'time_dim', 'warehouse', 'web_page', 'web_returns', 'web_sales', 'web_site' ] if not self.load_data_flag: beg_time = str(datetime.now()).split('.')[0] for table_name in tables: self.output( ' Loading=%s Iteration=%d Stream=%d Status=%s Time=%d' % (table_name, 1, 1, 'SKIP', 0)) self.report_sql( "INSERT INTO hst.test_result VALUES (%d, %d, 'Loading', '%s', 1, 1, 'SKIP', '%s', '%s', 0, NULL, NULL, NULL, %d);" % (self.tr_id, self.s_id, table_name, beg_time, beg_time, self.adj_s_id)) else: self.load_setup() self.load_generate() self.load_loading(tables=tables) self.output('-- Complete loading data')
def test_02_check_hawq_health(self): '''Test case 04: Check health including: segment down''' # Potential improvement: further investigation on root cause using gpcheckperf sql = "SELECT count(*) FROM pg_catalog.gp_segment_configuration WHERE mode<>'s'" (ok, out) = psql.runcmd( dbname = 'postgres', cmd = sql , ofile = '-', flag = '-q -t' ) if not ok: print str(out) print('test_02_check_hawq_health: error ') if int(out[0]) == 0: print('test_02_check_hawq_health: success ') else: print('test_02_check_hawq_health: %d segments is failed. ' % (int(out[0])))
def load_data(self): self.output('\n-- Start loading data') if self.load_data_flag: cmd = 'drop database if exists %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd = cmd) if not ok: print cmd print '\n'.join(output) sys.exit(2) self.output(cmd) self.output('\n'.join(output)) cmd = 'create database %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd = cmd, username = '******') if not ok: print cmd print '\n'.join(output) sys.exit(2) self.output(cmd) self.output('\n'.join(output)) tables = ['call_center', 'catalog_page', 'catalog_returns', 'catalog_sales', 'customer', 'customer_address', 'customer_demographics', 'date_dim', 'household_demographics', 'income_band', 'inventory', 'item', 'promotion', 'reason', 'ship_mode', 'store', 'store_returns', 'store_sales', 'time_dim', 'warehouse','web_page', 'web_returns', 'web_sales', 'web_site'] if not self.load_data_flag: beg_time = str(datetime.now()).split('.')[0] for table_name in tables: self.output(' Loading=%s Iteration=%d Stream=%d Status=%s Time=%d' % (table_name, 1, 1, 'SKIP', 0)) self.report_sql("INSERT INTO hst.test_result VALUES (%d, %d, 'Loading', '%s', 1, 1, 'SKIP', '%s', '%s', 0, NULL, NULL, NULL, %d);" % (self.tr_id, self.s_id, table_name, beg_time, beg_time, self.adj_s_id)) else: self.load_setup() self.load_generate() self.load_loading(tables = tables) self.output('-- Complete loading data')
def test_02_check_hawq_health(self): '''Test case 04: Check health including: segment down''' # Potential improvement: further investigation on root cause using gpcheckperf sql = "SELECT count(*) FROM pg_catalog.gp_segment_configuration WHERE mode<>'s'" (ok, out) = psql.runcmd(dbname='postgres', cmd=sql, ofile='-', flag='-q -t') if not ok: print str(out) print('test_02_check_hawq_health: error ') if int(out[0]) == 0: print('test_02_check_hawq_health: success ') else: print('test_02_check_hawq_health: %d segments is failed. ' % (int(out[0])))
def load_data(self): self.output("-- Start loading data") # get the data dir data_directory = self.workload_directory + os.sep + "data" if not os.path.exists(data_directory): self.output("ERROR: Cannot find DDL to create tables for TPC-H: %s does not exists" % (data_directory)) sys.exit(2) if self.load_data_flag: cmd = "drop database if exists %s;" % (self.database_name) (ok, output) = psql.runcmd(cmd=cmd) if not ok: print cmd print "\n".join(output) sys.exit(2) self.output(cmd) self.output("\n".join(output)) cmd = "create database %s;" % (self.database_name) (ok, output) = psql.runcmd(cmd=cmd, username=self.user) if not ok: print cmd print "\n".join(output) sys.exit(2) self.output(cmd) self.output("\n".join(output)) tables = ["nation", "region", "part", "supplier", "partsupp", "customer", "orders", "lineitem", "revenue"] for table_name in tables: if self.continue_flag: if self.load_data_flag: with open(data_directory + os.sep + table_name + ".sql", "r") as f: cmd = f.read() cmd = self.replace_sql(sql=cmd, table_name=table_name) with open(self.tmp_folder + os.sep + table_name + ".sql", "w") as f: f.write(cmd) self.output(cmd) beg_time = datetime.now() (ok, result) = psql.runfile( ifile=self.tmp_folder + os.sep + table_name + ".sql", dbname=self.database_name, username=self.user, ) end_time = datetime.now() self.output("\n".join(result)) if ok and str(result).find("ERROR") == -1 and str(result).find("FATAL") == -1: status = "SUCCESS" else: status = "ERROR" self.continue_flag = False else: status = "SKIP" beg_time = datetime.now() end_time = beg_time else: status = "ERROR" beg_time = datetime.now() end_time = beg_time duration = end_time - beg_time duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000 beg_time = str(beg_time).split(".")[0] end_time = str(end_time).split(".")[0] self.output( " Loading=%s Iteration=%d Stream=%d Status=%s Time=%d" % (table_name, 1, 1, status, duration) ) self.report_sql( "INSERT INTO hst.test_result VALUES (%d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL);" % (self.tr_id, self.s_id, table_name, status, beg_time, end_time, duration) ) self.output("-- Complete loading data")
def load_data(self): self.output('-- Start loading data ') # get the data dir data_directory = self.workload_directory + os.sep + 'data' if not os.path.exists(data_directory): self.output('ERROR: Cannot find DDL to create tables for Copy: %s does not exists' % (data_directory)) return self.output('-- start gpfdist service') self.gpfdist_port = self._getOpenPort() cmd = "gpssh -h %s -e 'gpfdist -d %s -p %d -l ./gpfdist.log &'" % (self.host_name, self.tmp_folder, self.gpfdist_port) (status, output) = commands.getstatusoutput(cmd) self.output(cmd) self.output(output) cmd = 'ps -ef | grep gpfdist' (status, output) = commands.getstatusoutput(cmd) self.output(cmd) self.output(output) self.output('-- generate data file: %s' % (self.fname)) cmd = "dbgen -b %s -s %d -T L > %s " % (self.dss, self.scale_factor, self.fname) (status, output) = commands.getstatusoutput(cmd) self.output(cmd) self.output(output) if status != 0: print("generate data file %s error. " % (self.fname)) sys.exit(2) self.output('generate data file successed. ') if self.load_data_flag: cmd = 'drop database if exists %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd = cmd) if not ok: print cmd print '\n'.join(output) sys.exit(2) self.output(cmd) self.output('\n'.join(output)) count = 0 while(True): cmd = 'create database %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd = cmd) if not ok: count = count + 1 time.sleep(1) else: self.output(cmd) self.output('\n'.join(output)) if self.user != 'gpadmin': cmd1 = 'GRANT ALL ON DATABASE %s TO %s;' % (self.database_name, self.user) (ok1, output1) = psql.runcmd(cmd = cmd1) self.output(cmd1) self.output('\n'.join(output1)) break if count == 10: print cmd print '\n'.join(output) sys.exit(2) tables = ['lineitem_gpfdist'] niteration = 1 while niteration <= self.num_iteration: self.output('-- Start iteration %d' % (niteration)) for table_name in tables: con_id = -1 if self.load_data_flag or self.run_workload_flag: with open(data_directory + os.sep + table_name + '.sql', 'r') as f: cmd = f.read() cmd = self.replace_sql(sql = cmd, table_name = table_name, num = niteration) # get con_id use this query unique_string1 = '%s_%s_' % (self.workload_name, self.user) + table_name unique_string2 = '%' + unique_string1 + '%' get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % (unique_string1, unique_string2) with open(self.tmp_folder + os.sep + 'gpfdist_loading_temp.sql', 'w') as f: f.write(cmd) f.write(get_con_id_sql) self.output(cmd) beg_time = datetime.now() (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + 'gpfdist_loading_temp.sql', dbname = self.database_name, flag = '-t -A') #, username = self.user) end_time = datetime.now() self.output(result[0].split('***')[0]) if ok and str(result).find('ERROR') == -1 and str(result).find('FATAL') == -1 and str(result).find('INSERT 0') != -1: status = 'SUCCESS' con_id = int(result[0].split('***')[1].split('|')[2].strip()) else: status = 'ERROR' else: status = 'SKIP' beg_time = datetime.now() end_time = beg_time duration = end_time - beg_time duration = duration.days*24*3600*1000 + duration.seconds*1000 + duration.microseconds /1000 beg_time = str(beg_time).split('.')[0] end_time = str(end_time).split('.')[0] self.output(' Loading=%s Iteration=%d Stream=%d Status=%s Time=%d' % (table_name, niteration, 1, status, duration)) self.report_sql("INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', %d, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" % (self.tr_id, self.s_id, con_id, table_name, niteration, status, beg_time, end_time, duration, self.adj_s_id)) self.output('-- Complete iteration %d' % (niteration)) niteration += 1 if self.user != 'gpadmin': cmd1 = 'REVOKE ALL ON DATABASE %s FROM %s;' % (self.database_name, self.user) (ok1, output1) = psql.runcmd(cmd = cmd1) self.output(cmd1) self.output('\n'.join(output1)) self.output('-- Complete loading data')
def load_data(self): self.output('-- Start loading data') # get the data dir data_directory = self.workload_directory + os.sep + 'data' if not os.path.exists(data_directory): self.output( 'ERROR: Cannot find DDL to create tables for TPC-H: %s does not exists' % (data_directory)) sys.exit(2) if self.load_data_flag: cmd = 'drop database if exists %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd=cmd) if not ok: print cmd print '\n'.join(output) sys.exit(2) self.output(cmd) self.output('\n'.join(output)) cmd = 'create database %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd=cmd, username=self.user) if not ok: print cmd print '\n'.join(output) sys.exit(2) self.output(cmd) self.output('\n'.join(output)) tables = [ 'nation', 'region', 'part', 'supplier', 'partsupp', 'customer', 'orders', 'lineitem', 'revenue' ] for table_name in tables: if self.continue_flag: if self.load_data_flag: with open(data_directory + os.sep + table_name + '.sql', 'r') as f: cmd = f.read() cmd = self.replace_sql(sql=cmd, table_name=table_name) with open(self.tmp_folder + os.sep + table_name + '.sql', 'w') as f: f.write(cmd) self.output(cmd) beg_time = datetime.now() (ok, result) = psql.runfile(ifile=self.tmp_folder + os.sep + table_name + '.sql', dbname=self.database_name, username=self.user) end_time = datetime.now() self.output('\n'.join(result)) if ok and str(result).find('ERROR') == -1 and str( result).find('FATAL') == -1: status = 'SUCCESS' else: status = 'ERROR' self.continue_flag = False else: status = 'SKIP' beg_time = datetime.now() end_time = beg_time else: status = 'ERROR' beg_time = datetime.now() end_time = beg_time duration = end_time - beg_time duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000 beg_time = str(beg_time).split('.')[0] end_time = str(end_time).split('.')[0] self.output( ' Loading=%s Iteration=%d Stream=%d Status=%s Time=%d' % (table_name, 1, 1, status, duration)) self.report_sql( "INSERT INTO hst.test_result VALUES (%d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL);" % (self.tr_id, self.s_id, table_name, status, beg_time, end_time, duration)) self.output('-- Complete loading data')
def load_data(self): self.output('-- Start loading data ') # get the data dir data_directory = self.workload_directory + os.sep + 'data' if not os.path.exists(data_directory): self.output( 'ERROR: Cannot find DDL to create tables for Copy: %s does not exists' % (data_directory)) return self.output('-- start gpfdist service') self.gpfdist_port = self._getOpenPort() cmd = "gpssh -h %s -e 'gpfdist -d %s -p %d -l ./gpfdist.log &'" % ( self.host_name, self.tmp_folder, self.gpfdist_port) (status, output) = commands.getstatusoutput(cmd) self.output(cmd) self.output(output) cmd = 'ps -ef | grep gpfdist' (status, output) = commands.getstatusoutput(cmd) self.output(cmd) self.output(output) self.output('-- generate data file: %s' % (self.fname)) cmd = "dbgen -b %s -s %d -T L > %s " % (self.dss, self.scale_factor, self.fname) (status, output) = commands.getstatusoutput(cmd) self.output(cmd) self.output(output) if status != 0: print("generate data file %s error. " % (self.fname)) sys.exit(2) self.output('generate data file successed. ') if self.load_data_flag: cmd = 'drop database if exists %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd=cmd) if not ok: print cmd print '\n'.join(output) sys.exit(2) self.output(cmd) self.output('\n'.join(output)) count = 0 while (True): cmd = 'create database %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd=cmd) if not ok: count = count + 1 time.sleep(1) else: self.output(cmd) self.output('\n'.join(output)) if self.user != 'gpadmin': cmd1 = 'GRANT ALL ON DATABASE %s TO %s;' % ( self.database_name, self.user) (ok1, output1) = psql.runcmd(cmd=cmd1) self.output(cmd1) self.output('\n'.join(output1)) break if count == 10: print cmd print '\n'.join(output) sys.exit(2) tables = ['lineitem_gpfdist'] niteration = 1 while niteration <= self.num_iteration: self.output('-- Start iteration %d' % (niteration)) for table_name in tables: con_id = -1 if self.load_data_flag or self.run_workload_flag: with open(data_directory + os.sep + table_name + '.sql', 'r') as f: cmd = f.read() cmd = self.replace_sql(sql=cmd, table_name=table_name, num=niteration) # get con_id use this query unique_string1 = '%s_%s_' % (self.workload_name, self.user) + table_name unique_string2 = '%' + unique_string1 + '%' get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % ( unique_string1, unique_string2) with open( self.tmp_folder + os.sep + 'gpfdist_loading_temp.sql', 'w') as f: f.write(cmd) f.write(get_con_id_sql) self.output(cmd) beg_time = datetime.now() (ok, result) = psql.runfile( ifile=self.tmp_folder + os.sep + 'gpfdist_loading_temp.sql', dbname=self.database_name, flag='-t -A') #, username = self.user) end_time = datetime.now() self.output(result[0].split('***')[0]) if ok and str(result).find('ERROR') == -1 and str( result).find('FATAL') == -1 and str(result).find( 'INSERT 0') != -1: status = 'SUCCESS' con_id = int( result[0].split('***')[1].split('|')[2].strip()) else: status = 'ERROR' else: status = 'SKIP' beg_time = datetime.now() end_time = beg_time duration = end_time - beg_time duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000 beg_time = str(beg_time).split('.')[0] end_time = str(end_time).split('.')[0] self.output( ' Loading=%s Iteration=%d Stream=%d Status=%s Time=%d' % (table_name, niteration, 1, status, duration)) self.report_sql( "INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', %d, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" % (self.tr_id, self.s_id, con_id, table_name, niteration, status, beg_time, end_time, duration, self.adj_s_id)) self.output('-- Complete iteration %d' % (niteration)) niteration += 1 if self.user != 'gpadmin': cmd1 = 'REVOKE ALL ON DATABASE %s FROM %s;' % (self.database_name, self.user) (ok1, output1) = psql.runcmd(cmd=cmd1) self.output(cmd1) self.output('\n'.join(output1)) self.output('-- Complete loading data')
def load_data(self): self.output('-- Start loading data') # get the data dir data_directory = self.workload_directory + os.sep + 'data' if not os.path.exists(data_directory): self.output('ERROR: Cannot find DDL to create tables for TPC-H: %s does not exists' % (data_directory)) sys.exit(2) if self.load_data_flag: cmd = 'drop database if exists %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd = cmd, username = '******') if not ok: print cmd print '\n'.join(output) sys.exit(2) self.output(cmd) self.output('\n'.join(output)) cmd = 'create database %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd = cmd, username = '******') if not ok: print cmd print '\n'.join(output) sys.exit(2) self.output(cmd) self.output('\n'.join(output)) tables = ['nation', 'region', 'part', 'supplier', 'partsupp', 'customer', 'orders','lineitem' ,'revenue'] for table_name in tables: con_id = -1 if self.continue_flag: if self.load_data_flag: with open(data_directory + os.sep + table_name + '.sql', 'r') as f: cmd = f.read() cmd = self.replace_sql(sql = cmd, table_name = table_name) # get con_id use this query unique_string1 = '%s_%s_' % (self.workload_name, self.user) + table_name unique_string2 = '%' + unique_string1 + '%' get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % (unique_string1, unique_string2) with open(self.tmp_folder + os.sep + table_name + '.sql', 'w') as f: f.write(cmd) f.write(get_con_id_sql) self.output(cmd) beg_time = datetime.now() (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + table_name + '.sql', dbname = self.database_name, flag = '-t -A') #, username = self.user) #(ok,result) = commands.getstatusoutput('psql -d %s -f %s -t -A' % (self.database_name, self.tmp_folder + os.sep + table_name + '.sql')) end_time = datetime.now() self.output(result[0].split('***')[0]) #self.output('\n'.join(result)) if ok and str(result).find('ERROR:') == -1 and str(result).find('FATAL:') == -1 and (str(result).find('INSERT 0') != -1 or str(result).find('CREATE VIEW') != -1): status = 'SUCCESS' con_id = int(result[0].split('***')[1].split('|')[2].strip()) else: status = 'ERROR' self.continue_flag = False else: status = 'SKIP' beg_time = datetime.now() end_time = beg_time else: status = 'ERROR' beg_time = datetime.now() end_time = beg_time duration = end_time - beg_time duration = duration.days*24*3600*1000 + duration.seconds*1000 + duration.microseconds /1000 beg_time = str(beg_time) end_time = str(end_time) self.output(' Loading=%s Iteration=%d Stream=%d Status=%s Time=%d' % (table_name, 1, 1, status, duration)) self.report_sql("INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" % (self.tr_id, self.s_id, con_id, table_name, status, beg_time, end_time, duration, self.adj_s_id)) self.output('-- Complete loading data')
def load_data(self): self.output('-- Start loading data') # get the data dir data_directory = self.workload_directory + os.sep + 'data' + '-' + self.test_type if not os.path.exists(data_directory): self.output( 'ERROR: Cannot find DDL to create tables for TPC-H: %s does not exists' % (data_directory)) sys.exit(2) if self.load_data_flag: cmd = 'drop database if exists %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd=cmd) if not ok: print cmd print '\n'.join(output) sys.exit(2) self.output(cmd) self.output('\n'.join(output)) cmd = 'create database %s;' % (self.database_name) (ok, output) = psql.runcmd(cmd=cmd) if not ok: print cmd print '\n'.join(output) sys.exit(2) self.output(cmd) self.output('\n'.join(output)) tables = [ 'nation', 'region', 'part', 'supplier', 'partsupp', 'customer', 'orders', 'lineitem', 'revenue' ] if self.test_type == 'pxf': self.output( 'Deleting writable external table director if it already exists' ) os.system('sudo -u pxf hdfs dfs -rm -r %s' % (self.pxf_writable_path)) for table_name in tables: con_id = -1 if self.continue_flag: if self.load_data_flag: with open(data_directory + os.sep + table_name + '.sql', 'r') as f: cmd = f.read() cmd = self.replace_sql(sql=cmd, table_name=table_name) # get con_id use this query unique_string1 = '%s_%s_' % (self.workload_name, self.user) + table_name unique_string2 = '%' + unique_string1 + '%' get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % ( unique_string1, unique_string2) with open(self.tmp_folder + os.sep + table_name + '.sql', 'w') as f: f.write(cmd) f.write(get_con_id_sql) self.output(cmd) beg_time = datetime.now() (ok, result) = psql.runfile( ifile=self.tmp_folder + os.sep + table_name + '.sql', dbname=self.database_name, flag='-t -A') #, username = self.user) #(ok,result) = commands.getstatusoutput('psql -d %s -f %s -t -A' % (self.database_name, self.tmp_folder + os.sep + table_name + '.sql')) end_time = datetime.now() self.output(result[0].split('***')[0]) #self.output('\n'.join(result)) if ok and str(result).find('ERROR:') == -1 and str( result).find('FATAL:') == -1 and ( str(result).find('INSERT 0') != -1 or str(result).find('CREATE VIEW') != -1): status = 'SUCCESS' con_id = int( result[0].split('***')[1].split('|')[2].strip()) else: status = 'ERROR' self.continue_flag = False else: status = 'SKIP' beg_time = datetime.now() end_time = beg_time else: status = 'ERROR' beg_time = datetime.now() end_time = beg_time duration = end_time - beg_time duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000 beg_time = str(beg_time) end_time = str(end_time) self.output( ' Loading=%s Iteration=%d Stream=%d Status=%s Time=%d' % (table_name, 1, 1, status, duration)) self.report_sql( "INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" % (self.tr_id, self.s_id, con_id, table_name, status, beg_time, end_time, duration, self.adj_s_id)) self.output('-- Complete loading data')