Ejemplo n.º 1
0
    def __init__(self, workload_specification, workload_directory, report_directory, report_sql_file, cs_id, tr_id, user):
        # initialize common variables
        self.cs_id = cs_id
        self.tr_id = tr_id
        self.us_id = 0
        self.s_id = 0
        self.adj_s_id = 0

        self.user = user
        
        # check user_id if exist in backend database
        if self.cs_id != 0:
            self.us_id = check.check_id(result_id = 'us_id', table_name = 'hst.users', search_condition = "us_name = '%s'" % ('gpadmin')) #(self.user))
            if self.us_id is None:
                sys.stderr.write('The db user name is wrong!\n')
                sys.exit(2)

        self.continue_flag = True
        # should always run the workload by default
        self.should_stop = False
        # set workload source directory
        self.workload_directory = workload_directory
         
        # required fields, workload_name, database_name, user
        try:
            self.workload_name = workload_specification['workload_name'].strip()
            self.database_name = workload_specification['database_name'].strip()
        except Exception, e:
            print('Please add %s option in schedule file.' % (str(e)) )
            sys.exit(2)
Ejemplo n.º 2
0
    def __init__(self, workload_specification, workload_directory, report_directory, report_sql_file, cs_id, tr_id, user):
        # initialize common variables
        self.cs_id = cs_id
        self.tr_id = tr_id
        self.us_id = 0
        self.s_id = 0
        self.adj_s_id = 0

        self.user = user
        
        # check user_id if exist in backend database
        if self.cs_id != 0:
            self.us_id = check.check_id(result_id = 'us_id', table_name = 'hst.users', search_condition = "us_name = '%s'" % ('gpadmin')) #(self.user))
            if self.us_id is None:
                sys.stderr.write('The db user name is wrong!\n')
                sys.exit(2)

        self.continue_flag = True
        # should always run the workload by default
        self.should_stop = False
        # set workload source directory
        self.workload_directory = workload_directory
         
        # required fields, workload_name, database_name, user
        try:
            self.workload_name = workload_specification['workload_name'].strip()
            self.database_name = workload_specification['database_name'].strip()
        except Exception, e:
            print('Please add %s option in schedule file.' % (str(e)) )
            sys.exit(2)
Ejemplo n.º 3
0
    def __set_info(self):
        tbl_suffix = ''
        sql_suffix = ''
        # init tpch specific configuration such as tpch table_settings

        if self.append_only in [None, True]:
            tbl_suffix = tbl_suffix + 'ao'
            sql_suffix = sql_suffix + 'appendonly = true'
            # add distributed randomly
            if self.distributed_randomly:
                adj_distributed_randomly = 'FALSE'
            else:
                adj_distributed_randomly = 'TRUE'

            tbl_suffix = tbl_suffix + '_' + self.orientation
            sql_suffix = sql_suffix + ', '+ 'orientation = ' + self.orientation

            if self.orientation in ['ROW', 'COLUMN']:
                # group size, page_size
                self.page_size = -1
                self.row_group_size = -1

                if self.compression_type is None:
                    tbl_suffix = tbl_suffix + '_nocomp'
                    self.compression_type = 'None'
                    self.compression_level = -1
                elif self.compression_type == 'QUICKLZ':
                    self.compression_level = 1
                    tbl_suffix = tbl_suffix + '_' + self.compression_type + str(self.compression_level)
                    sql_suffix = sql_suffix + ', ' + 'compresstype = ' + self.compression_type  + ', ' + 'compresslevel = ' + str(self.compression_level)
                elif self.compression_type == 'ZLIB':
                    if (self.compression_level is None) or (self.compression_level < 1) or (self.compression_level > 9):
                        self.compression_level = 1
                    tbl_suffix = tbl_suffix + '_' + self.compression_type + str(self.compression_level)
                    sql_suffix = sql_suffix + ', ' + 'compresstype = ' + self.compression_type  + ', ' + 'compresslevel = ' + str(self.compression_level)
                elif self.compression_type == 'SNAPPY':
                    self.compression_level = -1
                    tbl_suffix = tbl_suffix + '_' + self.compression_type
                    sql_suffix = sql_suffix + ', ' + 'compresstype = ' + self.compression_type
                else:
                    tbl_suffix = tbl_suffix + '_nocomp'
            else:
                # PARQUET
                if self.row_group_size is None or self.page_size is None:
                    self.row_group_size = 8388608
                    self.page_size = 1048576

                sql_suffix = sql_suffix + ', ' + 'pagesize = %s, rowgroupsize = %s' % (self.page_size, self.row_group_size)

                if self.compression_type == 'SNAPPY':
                    self.compression_level = -1
                    tbl_suffix = tbl_suffix + '_' + self.compression_type
                    sql_suffix = sql_suffix + ', ' + 'compresstype = ' + self.compression_type
                elif self.compression_type == 'GZIP':
                    if (self.compression_level is None) or (self.compression_level < 1) or (self.compression_level > 9):
                        self.compression_level = 1
                    tbl_suffix = tbl_suffix + '_' + self.compression_type + str(self.compression_level)
                    sql_suffix = sql_suffix + ', ' + 'compresstype = ' + self.compression_type  + ', ' + 'compresslevel = ' + str(self.compression_level)
                else:
                    tbl_suffix = tbl_suffix + '_nocomp'

            if self.partitions > 0:
                tbl_suffix += '_part'
            else:
                tbl_suffix += '_nopart'
        
        else:
            print 'not support heap table'
            sys.exit(2)
            tbl_suffix = tbl_suffix + 'heap'
            sql_suffix = ''

        if (self.num_concurrency > 1): 
            self.check_condition = "wl_catetory = '%s' and wl_data_volume_type = '%s' and wl_data_volume_size = %d and wl_appendonly = '%s' and wl_orientation = '%s' and wl_row_group_size = %d and wl_page_size = %d and \
                     wl_compression_type = '%s' and wl_compression_level = %d and wl_partitions = %d \
                     and wl_iteration = %d and wl_concurrency = %d and wl_query_order= '%s'" \
                     % (self.workload_name.split('_')[0].upper(), self.data_volume_type, self.data_volume_size, self.append_only, self.orientation, self.row_group_size, self.page_size, self.compression_type, self.compression_level,self.partitions, self.num_iteration, self.num_concurrency, self.run_workload_mode)
        else:
            self.check_condition = "wl_catetory = '%s' and wl_data_volume_type = '%s' and wl_data_volume_size = %d and wl_appendonly = '%s' \
                   and wl_orientation = '%s' and wl_row_group_size = %d and wl_page_size = %d and \
                   wl_compression_type = '%s' and wl_compression_level = %d and wl_partitions = %d and wl_concurrency = %d and wl_query_order= '%s'" \
                   % (self.workload_name.split('_')[0].upper(), self.data_volume_type, self.data_volume_size, self.append_only, self.orientation, self.row_group_size, self.page_size, self.compression_type, self.compression_level,self.partitions, self.num_concurrency, self.run_workload_mode)
 
        adj_check_condition = "wl_catetory = '%s' and wl_data_volume_type = '%s' and wl_data_volume_size = %d and wl_appendonly = '%s' and wl_orientation = '%s' and wl_row_group_size = %d and wl_page_size = %d and \
        wl_compression_type = '%s' and wl_compression_level = %d and wl_partitions = %d and wl_iteration = %d and wl_concurrency = %d and wl_query_order= '%s'" \
        % (self.workload_name.split('_')[0].upper(), self.data_volume_type, self.data_volume_size, self.append_only, self.orientation, self.row_group_size, self.page_size, self.compression_type, self.compression_level,
            self.partitions, self.num_iteration, self.num_concurrency, self.run_workload_mode)

        self.wl_values = "'%s', '%s', '%s', %d, '%s', '%s', %d, %d, '%s', %d, %d, %d, %d, '%s'" \
        % (self.workload_name, self.workload_name.split('_')[0].upper(), self.data_volume_type, self.data_volume_size, self.append_only, self.orientation, self.row_group_size, self.page_size, self.compression_type, self.compression_level,
            self.partitions, self.num_iteration, self.num_concurrency, self.run_workload_mode)

        adj_wl_values = "'%s', '%s', '%s', %d, '%s', '%s', %d, %d, '%s', %d, %d, %d, %d, '%s'" \
        % (self.workload_name, self.workload_name.split('_')[0].upper(), self.data_volume_type, self.data_volume_size, self.append_only, self.orientation, self.row_group_size, self.page_size, self.compression_type, self.compression_level,
            self.partitions, self.num_iteration, self.num_concurrency, self.run_workload_mode)

        if self.cs_id != 0:
            # check wl_id if exist
            self.wl_id = check.check_id(result_id = 'wl_id', table_name = 'hst.workload', search_condition = self.check_condition)
            if self.wl_id is None:
                check.insert_new_record(table_name = 'hst.workload',
                                        col_list = 'wl_name, wl_catetory, wl_data_volume_type, wl_data_volume_size, wl_appendonly, wl_orientation, wl_row_group_size, wl_page_size, wl_compression_type, wl_compression_level, wl_partitions, wl_iteration, wl_concurrency, wl_query_order',
                                        values = self.wl_values)
                self.wl_id = check.get_max_id(result_id = 'wl_id', table_name = 'hst.workload')
                
            # check s_id if exist
            self.s_id = check.check_id(result_id = 's_id', table_name = 'hst.scenario', 
                                       search_condition = 'cs_id = %d and wl_id = %d and us_id = %d' % (self.cs_id, self.wl_id, self.us_id))
            if self.s_id is None:
                check.insert_new_record(table_name = 'hst.scenario', col_list = 'cs_id, wl_id, us_id', 
                                        values = '%d, %d, %d' % (self.cs_id, self.wl_id, self.us_id))
                self.s_id = check.get_max_id(result_id = 's_id', table_name = 'hst.scenario')
            #get tr_id
            #self.tr_id = check.get_max_id(result_id = 'tr_id', table_name = 'hst.test_run')

            # check adjust scenario check
            # check adjust scenario check
            adj_wl_id = check.check_id(result_id = 'wl_id', table_name = 'hst.workload', search_condition = adj_check_condition)
            #if adj_wl_id is None:
            #    check.insert_new_record(table_name = 'hst.workload',
            #                            col_list = 'wl_name, wl_catetory, wl_data_volume_type, wl_data_volume_size, wl_appendonly, wl_disrandomly, wl_orientation, wl_row_group_size, wl_page_size, wl_compression_type, wl_compression_level, wl_partitions, wl_iteration, wl_concurrency, wl_query_order',
            #                            values = adj_wl_values)
            #    adj_wl_id = check.get_max_id(result_id = 'wl_id', table_name = 'hst.workload')
            #self.adj_s_id = check.check_id(result_id = 's_id', table_name = 'hst.scenario', 
            #                       search_condition = 'cs_id = %d and wl_id = %d and us_id = %d' % (self.cs_id, adj_wl_id, self.us_id))
            #if self.adj_s_id is None:
            #    check.insert_new_record(table_name = 'hst.scenario', col_list = 'cs_id, wl_id, us_id', 
            #                            values = '%d, %d, %d' % (self.cs_id, adj_wl_id, self.us_id))
            #    self.s_id = check.get_max_id(result_id = 's_id', table_name = 'hst.scenario')

            if adj_wl_id is None:
                self.adj_s_id = -1
            else:
                self.adj_s_id = check.check_id(result_id = 's_id', table_name = 'hst.scenario', 
                                       search_condition = 'cs_id = %d and wl_id = %d and us_id = %d' % (self.cs_id, adj_wl_id, self.us_id))
                if self.adj_s_id is None:
                    self.adj_s_id = -1
        
        self.tbl_suffix = tbl_suffix.lower()
        self.sql_suffix = sql_suffix
Ejemplo n.º 4
0
        # HAWQ 1.X baseline versions are hard code here.
        hawq1_version = 'HAWQ 1.3.0.0 build 13048GVA HK'
        phd1_version = 'PHD 3.0'

        if cluster_name is None and cluster_name in schedule_parser.keys():
            cluster_name = schedule_parser['cluster_name']
        if cluster_name is None:
            sys.stderr.write('Invalid cluster name!')
            add_database = False
        else:
            if cluster_name == 'HAWQ main performance on BCN cluster':
                hawq2_version = 'HAWQ 2.0.0.0_beta build 21481 BCN HK'
        # check cluster information if lsp not run in standalone mode
        if add_database:
            # check if specified cluster exists 
            cs_id = check.check_id(result_id = 'cs_id', table_name = 'hst.cluster_settings', search_condition = "cs_name = '%s'" % (cluster_name))
            if cs_id is None:
                sys.stderr.write('Invalid cluster name %s!\n' % (cluster_name))
                continue

        if not start_flag:
            start_flag = True
            # add test run information in backend database if lsp not run in standalone mode,such as build_id, build_url, hawq_version, hdfs_version
            tr_id = -1
            if add_database:
                output = commands.getoutput('cat ~/qa.sh')
                try:
                    wd = output[output.index('wd='):].split('"')[1]
                    output = commands.getoutput('%s; cat build_info_file.txt' % (wd))
                    build_id = output[output.index('PULSE_ID_INFO'):].split('\n')[0].split('=')[1]
                    build_url = output[output.index('PULSE_PROJECT_INFO'):].split('\n')[0].split('=')[1]
Ejemplo n.º 5
0
    # parse schedule file
    for schedule_name in schedule_list:
        schedule_file = LSP_HOME + os.sep + 'schedules' + os.sep + schedule_name + '.yml'
        with open(schedule_file, 'r') as fschedule:
            schedule_parser = yaml.load(fschedule)

        # parse list of the workloads for execution
        if 'workloads_list' not in schedule_parser.keys() or schedule_parser['workloads_list'] is None :
            print 'No workload is specified in schedule file : %s' %(schedule_name + '.yml')
            continue

        # check cluster information if lsp not run in standalone mode
        if add_database:
            cluster_name = schedule_parser['cluster_name']
            # check if specified cluster exists 
            cs_id = check.check_id(result_id = 'cs_id', table_name = 'hst.cluster_settings', search_condition = "cs_name = '%s'" % (cluster_name))
            if cs_id is None:
                sys.stderr.write('Invalid cluster name %s!\n' % (cluster_name))
                continue

        if not start_flag:
            start_flag = True
            # add test run information in backend database if lsp not run in standalone mode,such as build_id, build_url, hawq_version, hdfs_version
            tr_id = -1
            if add_database:
                output = commands.getoutput('cat ~/qa.sh')
                try:
                    wd = output[output.index('wd='):].split('"')[1]
                    output = commands.getoutput('%s; cat build_info_file.txt' % (wd))
                    build_id = output[output.index('PULSE_ID_INFO'):].split('\n')[0].split('=')[1]
                    build_url = output[output.index('PULSE_PROJECT_INFO'):].split('\n')[0].split('=')[1]
Ejemplo n.º 6
0
    def __set_info(self):
        tbl_suffix = ''
        sql_suffix = ''
        # init tpch specific configuration such as tpch table_settings

        if self.append_only in [None, True]:
            tbl_suffix = tbl_suffix + 'ao'
            sql_suffix = sql_suffix + 'appendonly = true'
            # add distributed randomly
            if self.distributed_randomly:
                adj_distributed_randomly = 'FALSE'
            else:
                adj_distributed_randomly = 'TRUE'

            tbl_suffix = tbl_suffix + '_' + self.orientation
            sql_suffix = sql_suffix + ', '+ 'orientation = ' + self.orientation

            if self.orientation in ['ROW', 'COLUMN']:
                # group size, page_size
                self.page_size = -1
                self.row_group_size = -1

                if self.compression_type is None:
                    tbl_suffix = tbl_suffix + '_nocomp'
                    self.compression_type = 'None'
                    self.compression_level = -1
                elif self.compression_type == 'QUICKLZ':
                    self.compression_level = 1
                    tbl_suffix = tbl_suffix + '_' + self.compression_type + str(self.compression_level)
                    sql_suffix = sql_suffix + ', ' + 'compresstype = ' + self.compression_type  + ', ' + 'compresslevel = ' + str(self.compression_level)
                elif self.compression_type == 'ZLIB':
                    if (self.compression_level is None) or (self.compression_level < 1) or (self.compression_level > 9):
                        self.compression_level = 1
                    tbl_suffix = tbl_suffix + '_' + self.compression_type + str(self.compression_level)
                    sql_suffix = sql_suffix + ', ' + 'compresstype = ' + self.compression_type  + ', ' + 'compresslevel = ' + str(self.compression_level)
                else:
                    tbl_suffix = tbl_suffix + '_nocomp'
            else:
                # PARQUET
                if self.row_group_size is None or self.page_size is None:
                    self.row_group_size = 8388608
                    self.page_size = 1048576

                sql_suffix = sql_suffix + ', ' + 'pagesize = %s, rowgroupsize = %s' % (self.page_size, self.row_group_size)

                if self.compression_type == 'SNAPPY':
                    self.compression_level = -1
                    tbl_suffix = tbl_suffix + '_' + self.compression_type
                    sql_suffix = sql_suffix + ', ' + 'compresstype = ' + self.compression_type
                elif self.compression_type == 'GZIP':
                    if (self.compression_level is None) or (self.compression_level < 1) or (self.compression_level > 9):
                        self.compression_level = 1
                    tbl_suffix = tbl_suffix + '_' + self.compression_type + str(self.compression_level)
                    sql_suffix = sql_suffix + ', ' + 'compresstype = ' + self.compression_type  + ', ' + 'compresslevel = ' + str(self.compression_level)
                else:
                    tbl_suffix = tbl_suffix + '_nocomp'

            if self.partitions > 0:
                tbl_suffix += '_part'
            else:
                tbl_suffix += '_nopart'
        
        else:
            print 'not support heap table'
            sys.exit(2)
            tbl_suffix = tbl_suffix + 'heap'
            sql_suffix = ''

        self.check_condition = "wl_catetory = '%s' and wl_data_volume_type = '%s' and wl_data_volume_size = %d and wl_appendonly = '%s' and wl_disrandomly = '%s' and wl_orientation = '%s' and wl_row_group_size = %d and wl_page_size = %d and \
        wl_compression_type = '%s' and wl_compression_level = %d and wl_partitions = %d and wl_iteration = %d and wl_concurrency = %d and wl_query_order= '%s'" \
        % (self.workload_name.split('_')[0].upper(), self.data_volume_type, self.data_volume_size, self.append_only, self.distributed_randomly, self.orientation, self.row_group_size, self.page_size, self.compression_type, self.compression_level,
            self.partitions, self.num_iteration, self.num_concurrency, self.run_workload_mode)

        adj_check_condition = "wl_catetory = '%s' and wl_data_volume_type = '%s' and wl_data_volume_size = %d and wl_appendonly = '%s' and wl_disrandomly = '%s' and wl_orientation = '%s' and wl_row_group_size = %d and wl_page_size = %d and \
        wl_compression_type = '%s' and wl_compression_level = %d and wl_partitions = %d and wl_iteration = %d and wl_concurrency = %d and wl_query_order= '%s'" \
        % (self.workload_name.split('_')[0].upper(), self.data_volume_type, self.data_volume_size, self.append_only, adj_distributed_randomly, self.orientation, self.row_group_size, self.page_size, self.compression_type, self.compression_level,
            self.partitions, self.num_iteration, self.num_concurrency, self.run_workload_mode)

        self.wl_values = "'%s', '%s', '%s', %d, '%s', '%s', '%s', %d, %d, '%s', %d, %d, %d, %d, '%s'" \
        % (self.workload_name, self.workload_name.split('_')[0].upper(), self.data_volume_type, self.data_volume_size, self.append_only, self.distributed_randomly, self.orientation, self.row_group_size, self.page_size, self.compression_type, self.compression_level,
            self.partitions, self.num_iteration, self.num_concurrency, self.run_workload_mode)

        adj_wl_values = "'%s', '%s', '%s', %d, '%s', '%s', '%s', %d, %d, '%s', %d, %d, %d, %d, '%s'" \
        % (self.workload_name, self.workload_name.split('_')[0].upper(), self.data_volume_type, self.data_volume_size, self.append_only, adj_distributed_randomly, self.orientation, self.row_group_size, self.page_size, self.compression_type, self.compression_level,
            self.partitions, self.num_iteration, self.num_concurrency, self.run_workload_mode)

        if self.cs_id != 0:
            # check wl_id if exist
            self.wl_id = check.check_id(result_id = 'wl_id', table_name = 'hst.workload', search_condition = self.check_condition)
            if self.wl_id is None:
                check.insert_new_record(table_name = 'hst.workload',
                                        col_list = 'wl_name, wl_catetory, wl_data_volume_type, wl_data_volume_size, wl_appendonly, wl_disrandomly, wl_orientation, wl_row_group_size, wl_page_size, wl_compression_type, wl_compression_level, wl_partitions, wl_iteration, wl_concurrency, wl_query_order',
                                        values = self.wl_values)
                self.wl_id = check.get_max_id(result_id = 'wl_id', table_name = 'hst.workload')
                
            # check s_id if exist
            self.s_id = check.check_id(result_id = 's_id', table_name = 'hst.scenario', 
                                       search_condition = 'cs_id = %d and wl_id = %d and us_id = %d' % (self.cs_id, self.wl_id, self.us_id))
            if self.s_id is None:
                check.insert_new_record(table_name = 'hst.scenario', col_list = 'cs_id, wl_id, us_id', 
                                        values = '%d, %d, %d' % (self.cs_id, self.wl_id, self.us_id))
                self.s_id = check.get_max_id(result_id = 's_id', table_name = 'hst.scenario')
            #get tr_id
            #self.tr_id = check.get_max_id(result_id = 'tr_id', table_name = 'hst.test_run')

            # check adjust scenario check
            # check adjust scenario check
            adj_wl_id = check.check_id(result_id = 'wl_id', table_name = 'hst.workload', search_condition = adj_check_condition)
            #if adj_wl_id is None:
            #    check.insert_new_record(table_name = 'hst.workload',
            #                            col_list = 'wl_name, wl_catetory, wl_data_volume_type, wl_data_volume_size, wl_appendonly, wl_disrandomly, wl_orientation, wl_row_group_size, wl_page_size, wl_compression_type, wl_compression_level, wl_partitions, wl_iteration, wl_concurrency, wl_query_order',
            #                            values = adj_wl_values)
            #    adj_wl_id = check.get_max_id(result_id = 'wl_id', table_name = 'hst.workload')
            #self.adj_s_id = check.check_id(result_id = 's_id', table_name = 'hst.scenario', 
            #                       search_condition = 'cs_id = %d and wl_id = %d and us_id = %d' % (self.cs_id, adj_wl_id, self.us_id))
            #if self.adj_s_id is None:
            #    check.insert_new_record(table_name = 'hst.scenario', col_list = 'cs_id, wl_id, us_id', 
            #                            values = '%d, %d, %d' % (self.cs_id, adj_wl_id, self.us_id))
            #    self.s_id = check.get_max_id(result_id = 's_id', table_name = 'hst.scenario')

            if adj_wl_id is None:
                self.adj_s_id = -1
            else:
                self.adj_s_id = check.check_id(result_id = 's_id', table_name = 'hst.scenario', 
                                       search_condition = 'cs_id = %d and wl_id = %d and us_id = %d' % (self.cs_id, adj_wl_id, self.us_id))
                if self.adj_s_id is None:
                    self.adj_s_id = -1
        
        self.tbl_suffix = tbl_suffix.lower()
        self.sql_suffix = sql_suffix