def __init__(self, workload_specification, workload_directory, report_directory, report_sql_file, cs_id, tr_id, user): # initialize common variables self.cs_id = cs_id self.tr_id = tr_id self.us_id = 0 self.s_id = 0 self.adj_s_id = 0 self.user = user # check user_id if exist in backend database if self.cs_id != 0: self.us_id = check.check_id(result_id = 'us_id', table_name = 'hst.users', search_condition = "us_name = '%s'" % ('gpadmin')) #(self.user)) if self.us_id is None: sys.stderr.write('The db user name is wrong!\n') sys.exit(2) self.continue_flag = True # should always run the workload by default self.should_stop = False # set workload source directory self.workload_directory = workload_directory # required fields, workload_name, database_name, user try: self.workload_name = workload_specification['workload_name'].strip() self.database_name = workload_specification['database_name'].strip() except Exception, e: print('Please add %s option in schedule file.' % (str(e)) ) sys.exit(2)
def __init__(self, workload_specification, workload_directory, report_directory, report_sql_file, cs_id, tr_id, user): # initialize common variables self.cs_id = cs_id self.tr_id = tr_id self.us_id = 0 self.s_id = 0 self.adj_s_id = 0 self.user = user # check user_id if exist in backend database if self.cs_id != 0: self.us_id = check.check_id(result_id = 'us_id', table_name = 'hst.users', search_condition = "us_name = '%s'" % ('gpadmin')) #(self.user)) if self.us_id is None: sys.stderr.write('The db user name is wrong!\n') sys.exit(2) self.continue_flag = True # should always run the workload by default self.should_stop = False # set workload source directory self.workload_directory = workload_directory # required fields, workload_name, database_name, user try: self.workload_name = workload_specification['workload_name'].strip() self.database_name = workload_specification['database_name'].strip() except Exception, e: print('Please add %s option in schedule file.' % (str(e)) ) sys.exit(2)
def __set_info(self): tbl_suffix = '' sql_suffix = '' # init tpch specific configuration such as tpch table_settings if self.append_only in [None, True]: tbl_suffix = tbl_suffix + 'ao' sql_suffix = sql_suffix + 'appendonly = true' # add distributed randomly if self.distributed_randomly: adj_distributed_randomly = 'FALSE' else: adj_distributed_randomly = 'TRUE' tbl_suffix = tbl_suffix + '_' + self.orientation sql_suffix = sql_suffix + ', '+ 'orientation = ' + self.orientation if self.orientation in ['ROW', 'COLUMN']: # group size, page_size self.page_size = -1 self.row_group_size = -1 if self.compression_type is None: tbl_suffix = tbl_suffix + '_nocomp' self.compression_type = 'None' self.compression_level = -1 elif self.compression_type == 'QUICKLZ': self.compression_level = 1 tbl_suffix = tbl_suffix + '_' + self.compression_type + str(self.compression_level) sql_suffix = sql_suffix + ', ' + 'compresstype = ' + self.compression_type + ', ' + 'compresslevel = ' + str(self.compression_level) elif self.compression_type == 'ZLIB': if (self.compression_level is None) or (self.compression_level < 1) or (self.compression_level > 9): self.compression_level = 1 tbl_suffix = tbl_suffix + '_' + self.compression_type + str(self.compression_level) sql_suffix = sql_suffix + ', ' + 'compresstype = ' + self.compression_type + ', ' + 'compresslevel = ' + str(self.compression_level) elif self.compression_type == 'SNAPPY': self.compression_level = -1 tbl_suffix = tbl_suffix + '_' + self.compression_type sql_suffix = sql_suffix + ', ' + 'compresstype = ' + self.compression_type else: tbl_suffix = tbl_suffix + '_nocomp' else: # PARQUET if self.row_group_size is None or self.page_size is None: self.row_group_size = 8388608 self.page_size = 1048576 sql_suffix = sql_suffix + ', ' + 'pagesize = %s, rowgroupsize = %s' % (self.page_size, self.row_group_size) if self.compression_type == 'SNAPPY': self.compression_level = -1 tbl_suffix = tbl_suffix + '_' + self.compression_type sql_suffix = sql_suffix + ', ' + 'compresstype = ' + self.compression_type elif self.compression_type == 'GZIP': if (self.compression_level is None) or (self.compression_level < 1) or (self.compression_level > 9): self.compression_level = 1 tbl_suffix = tbl_suffix + '_' + self.compression_type + str(self.compression_level) sql_suffix = sql_suffix + ', ' + 'compresstype = ' + self.compression_type + ', ' + 'compresslevel = ' + str(self.compression_level) else: tbl_suffix = tbl_suffix + '_nocomp' if self.partitions > 0: tbl_suffix += '_part' else: tbl_suffix += '_nopart' else: print 'not support heap table' sys.exit(2) tbl_suffix = tbl_suffix + 'heap' sql_suffix = '' if (self.num_concurrency > 1): self.check_condition = "wl_catetory = '%s' and wl_data_volume_type = '%s' and wl_data_volume_size = %d and wl_appendonly = '%s' and wl_orientation = '%s' and wl_row_group_size = %d and wl_page_size = %d and \ wl_compression_type = '%s' and wl_compression_level = %d and wl_partitions = %d \ and wl_iteration = %d and wl_concurrency = %d and wl_query_order= '%s'" \ % (self.workload_name.split('_')[0].upper(), self.data_volume_type, self.data_volume_size, self.append_only, self.orientation, self.row_group_size, self.page_size, self.compression_type, self.compression_level,self.partitions, self.num_iteration, self.num_concurrency, self.run_workload_mode) else: self.check_condition = "wl_catetory = '%s' and wl_data_volume_type = '%s' and wl_data_volume_size = %d and wl_appendonly = '%s' \ and wl_orientation = '%s' and wl_row_group_size = %d and wl_page_size = %d and \ wl_compression_type = '%s' and wl_compression_level = %d and wl_partitions = %d and wl_concurrency = %d and wl_query_order= '%s'" \ % (self.workload_name.split('_')[0].upper(), self.data_volume_type, self.data_volume_size, self.append_only, self.orientation, self.row_group_size, self.page_size, self.compression_type, self.compression_level,self.partitions, self.num_concurrency, self.run_workload_mode) adj_check_condition = "wl_catetory = '%s' and wl_data_volume_type = '%s' and wl_data_volume_size = %d and wl_appendonly = '%s' and wl_orientation = '%s' and wl_row_group_size = %d and wl_page_size = %d and \ wl_compression_type = '%s' and wl_compression_level = %d and wl_partitions = %d and wl_iteration = %d and wl_concurrency = %d and wl_query_order= '%s'" \ % (self.workload_name.split('_')[0].upper(), self.data_volume_type, self.data_volume_size, self.append_only, self.orientation, self.row_group_size, self.page_size, self.compression_type, self.compression_level, self.partitions, self.num_iteration, self.num_concurrency, self.run_workload_mode) self.wl_values = "'%s', '%s', '%s', %d, '%s', '%s', %d, %d, '%s', %d, %d, %d, %d, '%s'" \ % (self.workload_name, self.workload_name.split('_')[0].upper(), self.data_volume_type, self.data_volume_size, self.append_only, self.orientation, self.row_group_size, self.page_size, self.compression_type, self.compression_level, self.partitions, self.num_iteration, self.num_concurrency, self.run_workload_mode) adj_wl_values = "'%s', '%s', '%s', %d, '%s', '%s', %d, %d, '%s', %d, %d, %d, %d, '%s'" \ % (self.workload_name, self.workload_name.split('_')[0].upper(), self.data_volume_type, self.data_volume_size, self.append_only, self.orientation, self.row_group_size, self.page_size, self.compression_type, self.compression_level, self.partitions, self.num_iteration, self.num_concurrency, self.run_workload_mode) if self.cs_id != 0: # check wl_id if exist self.wl_id = check.check_id(result_id = 'wl_id', table_name = 'hst.workload', search_condition = self.check_condition) if self.wl_id is None: check.insert_new_record(table_name = 'hst.workload', col_list = 'wl_name, wl_catetory, wl_data_volume_type, wl_data_volume_size, wl_appendonly, wl_orientation, wl_row_group_size, wl_page_size, wl_compression_type, wl_compression_level, wl_partitions, wl_iteration, wl_concurrency, wl_query_order', values = self.wl_values) self.wl_id = check.get_max_id(result_id = 'wl_id', table_name = 'hst.workload') # check s_id if exist self.s_id = check.check_id(result_id = 's_id', table_name = 'hst.scenario', search_condition = 'cs_id = %d and wl_id = %d and us_id = %d' % (self.cs_id, self.wl_id, self.us_id)) if self.s_id is None: check.insert_new_record(table_name = 'hst.scenario', col_list = 'cs_id, wl_id, us_id', values = '%d, %d, %d' % (self.cs_id, self.wl_id, self.us_id)) self.s_id = check.get_max_id(result_id = 's_id', table_name = 'hst.scenario') #get tr_id #self.tr_id = check.get_max_id(result_id = 'tr_id', table_name = 'hst.test_run') # check adjust scenario check # check adjust scenario check adj_wl_id = check.check_id(result_id = 'wl_id', table_name = 'hst.workload', search_condition = adj_check_condition) #if adj_wl_id is None: # check.insert_new_record(table_name = 'hst.workload', # col_list = 'wl_name, wl_catetory, wl_data_volume_type, wl_data_volume_size, wl_appendonly, wl_disrandomly, wl_orientation, wl_row_group_size, wl_page_size, wl_compression_type, wl_compression_level, wl_partitions, wl_iteration, wl_concurrency, wl_query_order', # values = adj_wl_values) # adj_wl_id = check.get_max_id(result_id = 'wl_id', table_name = 'hst.workload') #self.adj_s_id = check.check_id(result_id = 's_id', table_name = 'hst.scenario', # search_condition = 'cs_id = %d and wl_id = %d and us_id = %d' % (self.cs_id, adj_wl_id, self.us_id)) #if self.adj_s_id is None: # check.insert_new_record(table_name = 'hst.scenario', col_list = 'cs_id, wl_id, us_id', # values = '%d, %d, %d' % (self.cs_id, adj_wl_id, self.us_id)) # self.s_id = check.get_max_id(result_id = 's_id', table_name = 'hst.scenario') if adj_wl_id is None: self.adj_s_id = -1 else: self.adj_s_id = check.check_id(result_id = 's_id', table_name = 'hst.scenario', search_condition = 'cs_id = %d and wl_id = %d and us_id = %d' % (self.cs_id, adj_wl_id, self.us_id)) if self.adj_s_id is None: self.adj_s_id = -1 self.tbl_suffix = tbl_suffix.lower() self.sql_suffix = sql_suffix
# HAWQ 1.X baseline versions are hard code here. hawq1_version = 'HAWQ 1.3.0.0 build 13048GVA HK' phd1_version = 'PHD 3.0' if cluster_name is None and cluster_name in schedule_parser.keys(): cluster_name = schedule_parser['cluster_name'] if cluster_name is None: sys.stderr.write('Invalid cluster name!') add_database = False else: if cluster_name == 'HAWQ main performance on BCN cluster': hawq2_version = 'HAWQ 2.0.0.0_beta build 21481 BCN HK' # check cluster information if lsp not run in standalone mode if add_database: # check if specified cluster exists cs_id = check.check_id(result_id = 'cs_id', table_name = 'hst.cluster_settings', search_condition = "cs_name = '%s'" % (cluster_name)) if cs_id is None: sys.stderr.write('Invalid cluster name %s!\n' % (cluster_name)) continue if not start_flag: start_flag = True # add test run information in backend database if lsp not run in standalone mode,such as build_id, build_url, hawq_version, hdfs_version tr_id = -1 if add_database: output = commands.getoutput('cat ~/qa.sh') try: wd = output[output.index('wd='):].split('"')[1] output = commands.getoutput('%s; cat build_info_file.txt' % (wd)) build_id = output[output.index('PULSE_ID_INFO'):].split('\n')[0].split('=')[1] build_url = output[output.index('PULSE_PROJECT_INFO'):].split('\n')[0].split('=')[1]
# parse schedule file for schedule_name in schedule_list: schedule_file = LSP_HOME + os.sep + 'schedules' + os.sep + schedule_name + '.yml' with open(schedule_file, 'r') as fschedule: schedule_parser = yaml.load(fschedule) # parse list of the workloads for execution if 'workloads_list' not in schedule_parser.keys() or schedule_parser['workloads_list'] is None : print 'No workload is specified in schedule file : %s' %(schedule_name + '.yml') continue # check cluster information if lsp not run in standalone mode if add_database: cluster_name = schedule_parser['cluster_name'] # check if specified cluster exists cs_id = check.check_id(result_id = 'cs_id', table_name = 'hst.cluster_settings', search_condition = "cs_name = '%s'" % (cluster_name)) if cs_id is None: sys.stderr.write('Invalid cluster name %s!\n' % (cluster_name)) continue if not start_flag: start_flag = True # add test run information in backend database if lsp not run in standalone mode,such as build_id, build_url, hawq_version, hdfs_version tr_id = -1 if add_database: output = commands.getoutput('cat ~/qa.sh') try: wd = output[output.index('wd='):].split('"')[1] output = commands.getoutput('%s; cat build_info_file.txt' % (wd)) build_id = output[output.index('PULSE_ID_INFO'):].split('\n')[0].split('=')[1] build_url = output[output.index('PULSE_PROJECT_INFO'):].split('\n')[0].split('=')[1]
def __set_info(self): tbl_suffix = '' sql_suffix = '' # init tpch specific configuration such as tpch table_settings if self.append_only in [None, True]: tbl_suffix = tbl_suffix + 'ao' sql_suffix = sql_suffix + 'appendonly = true' # add distributed randomly if self.distributed_randomly: adj_distributed_randomly = 'FALSE' else: adj_distributed_randomly = 'TRUE' tbl_suffix = tbl_suffix + '_' + self.orientation sql_suffix = sql_suffix + ', '+ 'orientation = ' + self.orientation if self.orientation in ['ROW', 'COLUMN']: # group size, page_size self.page_size = -1 self.row_group_size = -1 if self.compression_type is None: tbl_suffix = tbl_suffix + '_nocomp' self.compression_type = 'None' self.compression_level = -1 elif self.compression_type == 'QUICKLZ': self.compression_level = 1 tbl_suffix = tbl_suffix + '_' + self.compression_type + str(self.compression_level) sql_suffix = sql_suffix + ', ' + 'compresstype = ' + self.compression_type + ', ' + 'compresslevel = ' + str(self.compression_level) elif self.compression_type == 'ZLIB': if (self.compression_level is None) or (self.compression_level < 1) or (self.compression_level > 9): self.compression_level = 1 tbl_suffix = tbl_suffix + '_' + self.compression_type + str(self.compression_level) sql_suffix = sql_suffix + ', ' + 'compresstype = ' + self.compression_type + ', ' + 'compresslevel = ' + str(self.compression_level) else: tbl_suffix = tbl_suffix + '_nocomp' else: # PARQUET if self.row_group_size is None or self.page_size is None: self.row_group_size = 8388608 self.page_size = 1048576 sql_suffix = sql_suffix + ', ' + 'pagesize = %s, rowgroupsize = %s' % (self.page_size, self.row_group_size) if self.compression_type == 'SNAPPY': self.compression_level = -1 tbl_suffix = tbl_suffix + '_' + self.compression_type sql_suffix = sql_suffix + ', ' + 'compresstype = ' + self.compression_type elif self.compression_type == 'GZIP': if (self.compression_level is None) or (self.compression_level < 1) or (self.compression_level > 9): self.compression_level = 1 tbl_suffix = tbl_suffix + '_' + self.compression_type + str(self.compression_level) sql_suffix = sql_suffix + ', ' + 'compresstype = ' + self.compression_type + ', ' + 'compresslevel = ' + str(self.compression_level) else: tbl_suffix = tbl_suffix + '_nocomp' if self.partitions > 0: tbl_suffix += '_part' else: tbl_suffix += '_nopart' else: print 'not support heap table' sys.exit(2) tbl_suffix = tbl_suffix + 'heap' sql_suffix = '' self.check_condition = "wl_catetory = '%s' and wl_data_volume_type = '%s' and wl_data_volume_size = %d and wl_appendonly = '%s' and wl_disrandomly = '%s' and wl_orientation = '%s' and wl_row_group_size = %d and wl_page_size = %d and \ wl_compression_type = '%s' and wl_compression_level = %d and wl_partitions = %d and wl_iteration = %d and wl_concurrency = %d and wl_query_order= '%s'" \ % (self.workload_name.split('_')[0].upper(), self.data_volume_type, self.data_volume_size, self.append_only, self.distributed_randomly, self.orientation, self.row_group_size, self.page_size, self.compression_type, self.compression_level, self.partitions, self.num_iteration, self.num_concurrency, self.run_workload_mode) adj_check_condition = "wl_catetory = '%s' and wl_data_volume_type = '%s' and wl_data_volume_size = %d and wl_appendonly = '%s' and wl_disrandomly = '%s' and wl_orientation = '%s' and wl_row_group_size = %d and wl_page_size = %d and \ wl_compression_type = '%s' and wl_compression_level = %d and wl_partitions = %d and wl_iteration = %d and wl_concurrency = %d and wl_query_order= '%s'" \ % (self.workload_name.split('_')[0].upper(), self.data_volume_type, self.data_volume_size, self.append_only, adj_distributed_randomly, self.orientation, self.row_group_size, self.page_size, self.compression_type, self.compression_level, self.partitions, self.num_iteration, self.num_concurrency, self.run_workload_mode) self.wl_values = "'%s', '%s', '%s', %d, '%s', '%s', '%s', %d, %d, '%s', %d, %d, %d, %d, '%s'" \ % (self.workload_name, self.workload_name.split('_')[0].upper(), self.data_volume_type, self.data_volume_size, self.append_only, self.distributed_randomly, self.orientation, self.row_group_size, self.page_size, self.compression_type, self.compression_level, self.partitions, self.num_iteration, self.num_concurrency, self.run_workload_mode) adj_wl_values = "'%s', '%s', '%s', %d, '%s', '%s', '%s', %d, %d, '%s', %d, %d, %d, %d, '%s'" \ % (self.workload_name, self.workload_name.split('_')[0].upper(), self.data_volume_type, self.data_volume_size, self.append_only, adj_distributed_randomly, self.orientation, self.row_group_size, self.page_size, self.compression_type, self.compression_level, self.partitions, self.num_iteration, self.num_concurrency, self.run_workload_mode) if self.cs_id != 0: # check wl_id if exist self.wl_id = check.check_id(result_id = 'wl_id', table_name = 'hst.workload', search_condition = self.check_condition) if self.wl_id is None: check.insert_new_record(table_name = 'hst.workload', col_list = 'wl_name, wl_catetory, wl_data_volume_type, wl_data_volume_size, wl_appendonly, wl_disrandomly, wl_orientation, wl_row_group_size, wl_page_size, wl_compression_type, wl_compression_level, wl_partitions, wl_iteration, wl_concurrency, wl_query_order', values = self.wl_values) self.wl_id = check.get_max_id(result_id = 'wl_id', table_name = 'hst.workload') # check s_id if exist self.s_id = check.check_id(result_id = 's_id', table_name = 'hst.scenario', search_condition = 'cs_id = %d and wl_id = %d and us_id = %d' % (self.cs_id, self.wl_id, self.us_id)) if self.s_id is None: check.insert_new_record(table_name = 'hst.scenario', col_list = 'cs_id, wl_id, us_id', values = '%d, %d, %d' % (self.cs_id, self.wl_id, self.us_id)) self.s_id = check.get_max_id(result_id = 's_id', table_name = 'hst.scenario') #get tr_id #self.tr_id = check.get_max_id(result_id = 'tr_id', table_name = 'hst.test_run') # check adjust scenario check # check adjust scenario check adj_wl_id = check.check_id(result_id = 'wl_id', table_name = 'hst.workload', search_condition = adj_check_condition) #if adj_wl_id is None: # check.insert_new_record(table_name = 'hst.workload', # col_list = 'wl_name, wl_catetory, wl_data_volume_type, wl_data_volume_size, wl_appendonly, wl_disrandomly, wl_orientation, wl_row_group_size, wl_page_size, wl_compression_type, wl_compression_level, wl_partitions, wl_iteration, wl_concurrency, wl_query_order', # values = adj_wl_values) # adj_wl_id = check.get_max_id(result_id = 'wl_id', table_name = 'hst.workload') #self.adj_s_id = check.check_id(result_id = 's_id', table_name = 'hst.scenario', # search_condition = 'cs_id = %d and wl_id = %d and us_id = %d' % (self.cs_id, adj_wl_id, self.us_id)) #if self.adj_s_id is None: # check.insert_new_record(table_name = 'hst.scenario', col_list = 'cs_id, wl_id, us_id', # values = '%d, %d, %d' % (self.cs_id, adj_wl_id, self.us_id)) # self.s_id = check.get_max_id(result_id = 's_id', table_name = 'hst.scenario') if adj_wl_id is None: self.adj_s_id = -1 else: self.adj_s_id = check.check_id(result_id = 's_id', table_name = 'hst.scenario', search_condition = 'cs_id = %d and wl_id = %d and us_id = %d' % (self.cs_id, adj_wl_id, self.us_id)) if self.adj_s_id is None: self.adj_s_id = -1 self.tbl_suffix = tbl_suffix.lower() self.sql_suffix = sql_suffix