Esempio n. 1
0
 def load_data(self):
     '''Load data for workload'''
     if self.load_data_flag:
         cmd = 'drop database if exists %s;' % (self.database_name)
         (ok, output) = psql.runcmd(cmd = cmd)
         if not ok:
             print cmd
             print '\n'.join(output)
             sys.exit(2)
         self.output(cmd)
         self.output('\n'.join(output))
         count = 0
         while(True):
             cmd = 'create database %s;' % (self.database_name)
             (ok, output) = psql.runcmd(cmd = cmd)
             if not ok:
                 count = count + 1
                 time.sleep(1)
             else:
                 self.output(cmd)
                 self.output('\n'.join(output))
                 if self.user != 'gpadmin':
                     cmd1 = 'GRANT ALL ON DATABASE %s TO %s;' % (self.database_name, self.user)
                     (ok1, output1) = psql.runcmd(cmd = cmd1)
                     self.output(cmd1)
                     self.output('\n'.join(output1))
                 break
             if count == 10:
                 print cmd
                 print '\n'.join(output)
                 sys.exit(2)
Esempio n. 2
0
 def load_data(self):
     '''Load data for workload'''
     if self.load_data_flag:
         cmd = 'drop database if exists %s;' % (self.database_name)
         (ok, output) = psql.runcmd(cmd = cmd)
         if not ok:
             print cmd
             print '\n'.join(output)
             sys.exit(2)
         self.output(cmd)
         self.output('\n'.join(output))
         count = 0
         while(True):
             cmd = 'create database %s;' % (self.database_name)
             (ok, output) = psql.runcmd(cmd = cmd)
             if not ok:
                 count = count + 1
                 time.sleep(1)
             else:
                 self.output(cmd)
                 self.output('\n'.join(output))
                 if self.user != 'gpadmin':
                     cmd1 = 'GRANT ALL ON DATABASE %s TO %s;' % (self.database_name, self.user)
                     (ok1, output1) = psql.runcmd(cmd = cmd1)
                     self.output(cmd1)
                     self.output('\n'.join(output1))
                 break
             if count == 10:
                 print cmd
                 print '\n'.join(output)
                 sys.exit(2)
Esempio n. 3
0
    def __fetch_hawq_configuration(self):
        '''Fetch master hostname, segments hostname, data directory of HAWQ.'''
        self.hawq_master = config.getMasterHostName()
        self.hawq_segments = config.getSegHostNames()
        self.hawq_paths = []
        self.hawq_config = {}

        sql = "SELECT gsc.hostname, pfe.fselocation FROM gp_segment_configuration gsc, pg_filespace_entry pfe WHERE gsc.dbid = pfe.fsedbid AND pfe.fselocation NOT LIKE 'hdfs%' ORDER BY pfe.fselocation;"
        (ok, out) = psql.runcmd( dbname = 'postgres', cmd = sql , ofile = '-', flag = '-q -t' )

        if not ok:
            print out
            raise Exception("Failed to get HAWQ configuration paths.")
        else:
            for line in out:
                line = line.strip()
                if line:
                    (host, path) = line.split( '|' )
                    if not self.hawq_config.has_key(host.strip()):
                        self.hawq_config[host.strip()] = []
                    self.hawq_config[host.strip()].append(path.strip())
                        
                   # self.hawq_config.append( (host.strip(), path.strip()) )
                    self.hawq_paths.append( path.strip() ) 

        print self.hawq_master
        print self.hawq_segments
        print self.hawq_config
Esempio n. 4
0
    def __fetch_hawq_configuration(self):
        '''Fetch master hostname, segments hostname, data directory of HAWQ.'''
        self.hawq_master = config.getMasterHostName()
        self.hawq_segments = config.getSegHostNames()
        self.hawq_paths = []
        self.hawq_config = {}

        sql = "SELECT gsc.hostname, pfe.fselocation FROM gp_segment_configuration gsc, pg_filespace_entry pfe WHERE gsc.dbid = pfe.fsedbid AND pfe.fselocation NOT LIKE 'hdfs%' ORDER BY pfe.fselocation;"
        (ok, out) = psql.runcmd(dbname='postgres',
                                cmd=sql,
                                ofile='-',
                                flag='-q -t')

        if not ok:
            print out
            raise Exception("Failed to get HAWQ configuration paths.")
        else:
            for line in out:
                line = line.strip()
                if line:
                    (host, path) = line.split('|')
                    if not self.hawq_config.has_key(host.strip()):
                        self.hawq_config[host.strip()] = []
                    self.hawq_config[host.strip()].append(path.strip())

                    # self.hawq_config.append( (host.strip(), path.strip()) )
                    self.hawq_paths.append(path.strip())

        print self.hawq_master
        print self.hawq_segments
        print self.hawq_config
Esempio n. 5
0
    def load_data(self):
        self.output('\n-- Start loading data')

        if self.load_data_flag:
            cmd = 'drop database if exists %s;' % (self.database_name)
            (ok, output) = psql.runcmd(cmd=cmd)
            if not ok:
                print cmd
                print '\n'.join(output)
                sys.exit(2)
            self.output(cmd)
            self.output('\n'.join(output))

            cmd = 'create database %s;' % (self.database_name)
            (ok, output) = psql.runcmd(cmd=cmd, username='******')
            if not ok:
                print cmd
                print '\n'.join(output)
                sys.exit(2)
            self.output(cmd)
            self.output('\n'.join(output))

        tables = [
            'call_center', 'catalog_page', 'catalog_returns', 'catalog_sales',
            'customer', 'customer_address', 'customer_demographics',
            'date_dim', 'household_demographics', 'income_band', 'inventory',
            'item', 'promotion', 'reason', 'ship_mode', 'store',
            'store_returns', 'store_sales', 'time_dim', 'warehouse',
            'web_page', 'web_returns', 'web_sales', 'web_site'
        ]

        if not self.load_data_flag:
            beg_time = str(datetime.now()).split('.')[0]
            for table_name in tables:
                self.output(
                    '   Loading=%s   Iteration=%d   Stream=%d   Status=%s   Time=%d'
                    % (table_name, 1, 1, 'SKIP', 0))
                self.report_sql(
                    "INSERT INTO hst.test_result VALUES (%d, %d, 'Loading', '%s', 1, 1, 'SKIP', '%s', '%s', 0, NULL, NULL, NULL, %d);"
                    % (self.tr_id, self.s_id, table_name, beg_time, beg_time,
                       self.adj_s_id))
        else:
            self.load_setup()
            self.load_generate()
            self.load_loading(tables=tables)
        self.output('-- Complete loading data')
Esempio n. 6
0
 def test_02_check_hawq_health(self):
     '''Test case 04: Check health including: segment down'''
     # Potential improvement: further investigation on root cause using gpcheckperf
     sql = "SELECT count(*) FROM pg_catalog.gp_segment_configuration WHERE mode<>'s'"
     (ok, out) = psql.runcmd( dbname = 'postgres', cmd = sql , ofile = '-', flag = '-q -t' )
     if not ok:
         print str(out) 
         print('test_02_check_hawq_health: error ')
     if int(out[0]) == 0:
         print('test_02_check_hawq_health: success ')
     else:
         print('test_02_check_hawq_health: %d segments is failed. ' % (int(out[0])))
Esempio n. 7
0
    def load_data(self):
        self.output('\n-- Start loading data')

        if self.load_data_flag:
            cmd = 'drop database if exists %s;' % (self.database_name)
            (ok, output) = psql.runcmd(cmd = cmd)
            if not ok:
                print cmd
                print '\n'.join(output)
                sys.exit(2)
            self.output(cmd)
            self.output('\n'.join(output))

            cmd = 'create database %s;' % (self.database_name)
            (ok, output) = psql.runcmd(cmd = cmd, username = '******')
            if not ok:
                print cmd
                print '\n'.join(output)
                sys.exit(2)
            self.output(cmd)
            self.output('\n'.join(output))

        tables = ['call_center', 'catalog_page', 'catalog_returns', 'catalog_sales', 'customer', 'customer_address',
        'customer_demographics', 'date_dim', 'household_demographics', 'income_band', 'inventory', 'item',
        'promotion', 'reason', 'ship_mode', 'store', 'store_returns', 'store_sales',
        'time_dim', 'warehouse','web_page', 'web_returns', 'web_sales', 'web_site']
        
        if not self.load_data_flag:
            beg_time = str(datetime.now()).split('.')[0]
            for table_name in tables:
                self.output('   Loading=%s   Iteration=%d   Stream=%d   Status=%s   Time=%d' % (table_name, 1, 1, 'SKIP', 0))
                self.report_sql("INSERT INTO hst.test_result VALUES (%d, %d, 'Loading', '%s', 1, 1, 'SKIP', '%s', '%s', 0, NULL, NULL, NULL, %d);" 
                    % (self.tr_id, self.s_id, table_name, beg_time, beg_time, self.adj_s_id))
        else:
            self.load_setup()
            self.load_generate()
            self.load_loading(tables = tables)
        self.output('-- Complete loading data')      
Esempio n. 8
0
 def test_02_check_hawq_health(self):
     '''Test case 04: Check health including: segment down'''
     # Potential improvement: further investigation on root cause using gpcheckperf
     sql = "SELECT count(*) FROM pg_catalog.gp_segment_configuration WHERE mode<>'s'"
     (ok, out) = psql.runcmd(dbname='postgres',
                             cmd=sql,
                             ofile='-',
                             flag='-q -t')
     if not ok:
         print str(out)
         print('test_02_check_hawq_health: error ')
     if int(out[0]) == 0:
         print('test_02_check_hawq_health: success ')
     else:
         print('test_02_check_hawq_health: %d segments is failed. ' %
               (int(out[0])))
Esempio n. 9
0
    def load_data(self):
        self.output("-- Start loading data")

        # get the data dir
        data_directory = self.workload_directory + os.sep + "data"
        if not os.path.exists(data_directory):
            self.output("ERROR: Cannot find DDL to create tables for TPC-H: %s does not exists" % (data_directory))
            sys.exit(2)

        if self.load_data_flag:
            cmd = "drop database if exists %s;" % (self.database_name)
            (ok, output) = psql.runcmd(cmd=cmd)
            if not ok:
                print cmd
                print "\n".join(output)
                sys.exit(2)
            self.output(cmd)
            self.output("\n".join(output))

            cmd = "create database %s;" % (self.database_name)
            (ok, output) = psql.runcmd(cmd=cmd, username=self.user)
            if not ok:
                print cmd
                print "\n".join(output)
                sys.exit(2)
            self.output(cmd)
            self.output("\n".join(output))

        tables = ["nation", "region", "part", "supplier", "partsupp", "customer", "orders", "lineitem", "revenue"]
        for table_name in tables:
            if self.continue_flag:
                if self.load_data_flag:
                    with open(data_directory + os.sep + table_name + ".sql", "r") as f:
                        cmd = f.read()
                    cmd = self.replace_sql(sql=cmd, table_name=table_name)
                    with open(self.tmp_folder + os.sep + table_name + ".sql", "w") as f:
                        f.write(cmd)

                    self.output(cmd)
                    beg_time = datetime.now()
                    (ok, result) = psql.runfile(
                        ifile=self.tmp_folder + os.sep + table_name + ".sql",
                        dbname=self.database_name,
                        username=self.user,
                    )
                    end_time = datetime.now()
                    self.output("\n".join(result))

                    if ok and str(result).find("ERROR") == -1 and str(result).find("FATAL") == -1:
                        status = "SUCCESS"
                    else:
                        status = "ERROR"
                        self.continue_flag = False
                else:
                    status = "SKIP"
                    beg_time = datetime.now()
                    end_time = beg_time
            else:
                status = "ERROR"
                beg_time = datetime.now()
                end_time = beg_time

            duration = end_time - beg_time
            duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000
            beg_time = str(beg_time).split(".")[0]
            end_time = str(end_time).split(".")[0]
            self.output(
                "   Loading=%s   Iteration=%d   Stream=%d   Status=%s   Time=%d" % (table_name, 1, 1, status, duration)
            )
            self.report_sql(
                "INSERT INTO hst.test_result VALUES (%d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL);"
                % (self.tr_id, self.s_id, table_name, status, beg_time, end_time, duration)
            )

        self.output("-- Complete loading data")
Esempio n. 10
0
    def load_data(self):
        self.output('-- Start loading data ')

        # get the data dir
        data_directory = self.workload_directory + os.sep + 'data'
        if not os.path.exists(data_directory):
            self.output('ERROR: Cannot find DDL to create tables for Copy: %s does not exists' % (data_directory))
            return
        
        self.output('-- start gpfdist service')
        self.gpfdist_port = self._getOpenPort()

        cmd = "gpssh -h %s -e 'gpfdist -d %s -p %d -l ./gpfdist.log &'" % (self.host_name, self.tmp_folder, self.gpfdist_port)
        (status, output) = commands.getstatusoutput(cmd)
        self.output(cmd)
        self.output(output)
        
        cmd = 'ps -ef | grep gpfdist'
        (status, output) = commands.getstatusoutput(cmd)
        self.output(cmd)
        self.output(output)
        
        self.output('-- generate data file: %s' % (self.fname))
        cmd = "dbgen -b %s -s %d -T L > %s " % (self.dss, self.scale_factor, self.fname)
        (status, output) = commands.getstatusoutput(cmd)
        self.output(cmd)
        self.output(output)
        if status != 0:
            print("generate data file %s error. " % (self.fname))
            sys.exit(2)
        self.output('generate data file successed. ')

        if self.load_data_flag:
            cmd = 'drop database if exists %s;' % (self.database_name)
            (ok, output) = psql.runcmd(cmd = cmd)
            if not ok:
                print cmd
                print '\n'.join(output)
                sys.exit(2)
            self.output(cmd)
            self.output('\n'.join(output))

            count = 0
            while(True):
                cmd = 'create database %s;' % (self.database_name)
                (ok, output) = psql.runcmd(cmd = cmd)
                if not ok:
                    count = count + 1
                    time.sleep(1)
                else:
                    self.output(cmd)
                    self.output('\n'.join(output))
                    if self.user != 'gpadmin':
                        cmd1 = 'GRANT ALL ON DATABASE %s TO %s;' % (self.database_name, self.user)
                        (ok1, output1) = psql.runcmd(cmd = cmd1)
                        self.output(cmd1)
                        self.output('\n'.join(output1))
                    break
                if count == 10:
                    print cmd
                    print '\n'.join(output)
                    sys.exit(2)

        tables = ['lineitem_gpfdist']
        
        niteration = 1
        while niteration <= self.num_iteration:
            self.output('-- Start iteration %d' % (niteration))
            for table_name in tables:
                con_id = -1
                if self.load_data_flag or self.run_workload_flag:
                    with open(data_directory + os.sep + table_name + '.sql', 'r') as f:
                        cmd = f.read()
                    cmd = self.replace_sql(sql = cmd, table_name = table_name, num = niteration)

                    # get con_id use this query
                    unique_string1 = '%s_%s_' % (self.workload_name, self.user) + table_name
                    unique_string2 = '%' + unique_string1 + '%'
                    get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % (unique_string1, unique_string2)
                    
                    with open(self.tmp_folder + os.sep + 'gpfdist_loading_temp.sql', 'w') as f:
                        f.write(cmd)
                        f.write(get_con_id_sql)

                    self.output(cmd)    
                    beg_time = datetime.now()
                    (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + 'gpfdist_loading_temp.sql', dbname = self.database_name, flag = '-t -A') #, username = self.user)
                    end_time = datetime.now()
                    self.output(result[0].split('***')[0])

                    if ok and str(result).find('ERROR') == -1 and str(result).find('FATAL') == -1 and str(result).find('INSERT 0') != -1: 
                        status = 'SUCCESS'
                        con_id = int(result[0].split('***')[1].split('|')[2].strip())
                    else:
                        status = 'ERROR'
                
                else:
                    status = 'SKIP'
                    beg_time = datetime.now()
                    end_time = beg_time
                    
                duration = end_time - beg_time
                duration = duration.days*24*3600*1000 + duration.seconds*1000 + duration.microseconds /1000
                beg_time = str(beg_time).split('.')[0]
                end_time = str(end_time).split('.')[0]
                
                self.output('   Loading=%s   Iteration=%d   Stream=%d   Status=%s   Time=%d' % (table_name, niteration, 1, status, duration))
                self.report_sql("INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', %d, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" 
                    % (self.tr_id, self.s_id, con_id, table_name, niteration, status, beg_time, end_time, duration, self.adj_s_id))
                
            self.output('-- Complete iteration %d' % (niteration))
            niteration += 1
        
        if self.user != 'gpadmin':
            cmd1 = 'REVOKE ALL ON DATABASE %s FROM %s;' % (self.database_name, self.user)
            (ok1, output1) = psql.runcmd(cmd = cmd1)
            self.output(cmd1)
            self.output('\n'.join(output1))

        self.output('-- Complete loading data')      
Esempio n. 11
0
    def load_data(self):
        self.output('-- Start loading data')

        # get the data dir
        data_directory = self.workload_directory + os.sep + 'data'
        if not os.path.exists(data_directory):
            self.output(
                'ERROR: Cannot find DDL to create tables for TPC-H: %s does not exists'
                % (data_directory))
            sys.exit(2)

        if self.load_data_flag:
            cmd = 'drop database if exists %s;' % (self.database_name)
            (ok, output) = psql.runcmd(cmd=cmd)
            if not ok:
                print cmd
                print '\n'.join(output)
                sys.exit(2)
            self.output(cmd)
            self.output('\n'.join(output))

            cmd = 'create database %s;' % (self.database_name)
            (ok, output) = psql.runcmd(cmd=cmd, username=self.user)
            if not ok:
                print cmd
                print '\n'.join(output)
                sys.exit(2)
            self.output(cmd)
            self.output('\n'.join(output))

        tables = [
            'nation', 'region', 'part', 'supplier', 'partsupp', 'customer',
            'orders', 'lineitem', 'revenue'
        ]
        for table_name in tables:
            if self.continue_flag:
                if self.load_data_flag:
                    with open(data_directory + os.sep + table_name + '.sql',
                              'r') as f:
                        cmd = f.read()
                    cmd = self.replace_sql(sql=cmd, table_name=table_name)
                    with open(self.tmp_folder + os.sep + table_name + '.sql',
                              'w') as f:
                        f.write(cmd)

                    self.output(cmd)
                    beg_time = datetime.now()
                    (ok, result) = psql.runfile(ifile=self.tmp_folder +
                                                os.sep + table_name + '.sql',
                                                dbname=self.database_name,
                                                username=self.user)
                    end_time = datetime.now()
                    self.output('\n'.join(result))

                    if ok and str(result).find('ERROR') == -1 and str(
                            result).find('FATAL') == -1:
                        status = 'SUCCESS'
                    else:
                        status = 'ERROR'
                        self.continue_flag = False
                else:
                    status = 'SKIP'
                    beg_time = datetime.now()
                    end_time = beg_time
            else:
                status = 'ERROR'
                beg_time = datetime.now()
                end_time = beg_time

            duration = end_time - beg_time
            duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000
            beg_time = str(beg_time).split('.')[0]
            end_time = str(end_time).split('.')[0]
            self.output(
                '   Loading=%s   Iteration=%d   Stream=%d   Status=%s   Time=%d'
                % (table_name, 1, 1, status, duration))
            self.report_sql(
                "INSERT INTO hst.test_result VALUES (%d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL);"
                % (self.tr_id, self.s_id, table_name, status, beg_time,
                   end_time, duration))

        self.output('-- Complete loading data')
Esempio n. 12
0
    def load_data(self):
        self.output('-- Start loading data ')

        # get the data dir
        data_directory = self.workload_directory + os.sep + 'data'
        if not os.path.exists(data_directory):
            self.output(
                'ERROR: Cannot find DDL to create tables for Copy: %s does not exists'
                % (data_directory))
            return

        self.output('-- start gpfdist service')
        self.gpfdist_port = self._getOpenPort()

        cmd = "gpssh -h %s -e 'gpfdist -d %s -p %d -l ./gpfdist.log &'" % (
            self.host_name, self.tmp_folder, self.gpfdist_port)
        (status, output) = commands.getstatusoutput(cmd)
        self.output(cmd)
        self.output(output)

        cmd = 'ps -ef | grep gpfdist'
        (status, output) = commands.getstatusoutput(cmd)
        self.output(cmd)
        self.output(output)

        self.output('-- generate data file: %s' % (self.fname))
        cmd = "dbgen -b %s -s %d -T L > %s " % (self.dss, self.scale_factor,
                                                self.fname)
        (status, output) = commands.getstatusoutput(cmd)
        self.output(cmd)
        self.output(output)
        if status != 0:
            print("generate data file %s error. " % (self.fname))
            sys.exit(2)
        self.output('generate data file successed. ')

        if self.load_data_flag:
            cmd = 'drop database if exists %s;' % (self.database_name)
            (ok, output) = psql.runcmd(cmd=cmd)
            if not ok:
                print cmd
                print '\n'.join(output)
                sys.exit(2)
            self.output(cmd)
            self.output('\n'.join(output))

            count = 0
            while (True):
                cmd = 'create database %s;' % (self.database_name)
                (ok, output) = psql.runcmd(cmd=cmd)
                if not ok:
                    count = count + 1
                    time.sleep(1)
                else:
                    self.output(cmd)
                    self.output('\n'.join(output))
                    if self.user != 'gpadmin':
                        cmd1 = 'GRANT ALL ON DATABASE %s TO %s;' % (
                            self.database_name, self.user)
                        (ok1, output1) = psql.runcmd(cmd=cmd1)
                        self.output(cmd1)
                        self.output('\n'.join(output1))
                    break
                if count == 10:
                    print cmd
                    print '\n'.join(output)
                    sys.exit(2)

        tables = ['lineitem_gpfdist']

        niteration = 1
        while niteration <= self.num_iteration:
            self.output('-- Start iteration %d' % (niteration))
            for table_name in tables:
                con_id = -1
                if self.load_data_flag or self.run_workload_flag:
                    with open(data_directory + os.sep + table_name + '.sql',
                              'r') as f:
                        cmd = f.read()
                    cmd = self.replace_sql(sql=cmd,
                                           table_name=table_name,
                                           num=niteration)

                    # get con_id use this query
                    unique_string1 = '%s_%s_' % (self.workload_name,
                                                 self.user) + table_name
                    unique_string2 = '%' + unique_string1 + '%'
                    get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % (
                        unique_string1, unique_string2)

                    with open(
                            self.tmp_folder + os.sep +
                            'gpfdist_loading_temp.sql', 'w') as f:
                        f.write(cmd)
                        f.write(get_con_id_sql)

                    self.output(cmd)
                    beg_time = datetime.now()
                    (ok, result) = psql.runfile(
                        ifile=self.tmp_folder + os.sep +
                        'gpfdist_loading_temp.sql',
                        dbname=self.database_name,
                        flag='-t -A')  #, username = self.user)
                    end_time = datetime.now()
                    self.output(result[0].split('***')[0])

                    if ok and str(result).find('ERROR') == -1 and str(
                            result).find('FATAL') == -1 and str(result).find(
                                'INSERT 0') != -1:
                        status = 'SUCCESS'
                        con_id = int(
                            result[0].split('***')[1].split('|')[2].strip())
                    else:
                        status = 'ERROR'

                else:
                    status = 'SKIP'
                    beg_time = datetime.now()
                    end_time = beg_time

                duration = end_time - beg_time
                duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000
                beg_time = str(beg_time).split('.')[0]
                end_time = str(end_time).split('.')[0]

                self.output(
                    '   Loading=%s   Iteration=%d   Stream=%d   Status=%s   Time=%d'
                    % (table_name, niteration, 1, status, duration))
                self.report_sql(
                    "INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', %d, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);"
                    % (self.tr_id, self.s_id, con_id, table_name, niteration,
                       status, beg_time, end_time, duration, self.adj_s_id))

            self.output('-- Complete iteration %d' % (niteration))
            niteration += 1

        if self.user != 'gpadmin':
            cmd1 = 'REVOKE ALL ON DATABASE %s FROM %s;' % (self.database_name,
                                                           self.user)
            (ok1, output1) = psql.runcmd(cmd=cmd1)
            self.output(cmd1)
            self.output('\n'.join(output1))

        self.output('-- Complete loading data')
Esempio n. 13
0
    def load_data(self):
        self.output('-- Start loading data')

        # get the data dir
        data_directory = self.workload_directory + os.sep + 'data'
        if not os.path.exists(data_directory):
            self.output('ERROR: Cannot find DDL to create tables for TPC-H: %s does not exists' % (data_directory))
            sys.exit(2)

        if self.load_data_flag:
            cmd = 'drop database if exists %s;' % (self.database_name)
            (ok, output) = psql.runcmd(cmd = cmd, username = '******')
            if not ok:
                print cmd
                print '\n'.join(output)
                sys.exit(2)
            self.output(cmd)
            self.output('\n'.join(output))

            cmd = 'create database %s;' % (self.database_name)
            (ok, output) = psql.runcmd(cmd = cmd, username = '******')
            if not ok:
                print cmd
                print '\n'.join(output)
                sys.exit(2)
            self.output(cmd)
            self.output('\n'.join(output))

        tables = ['nation', 'region', 'part', 'supplier', 'partsupp', 'customer', 'orders','lineitem' ,'revenue']
        for table_name in tables:
            con_id = -1
            if self.continue_flag:
                if self.load_data_flag:
                    with open(data_directory + os.sep + table_name + '.sql', 'r') as f:
                        cmd = f.read()
                    cmd = self.replace_sql(sql = cmd, table_name = table_name)
                    
                    # get con_id use this query
                    unique_string1 = '%s_%s_' % (self.workload_name, self.user) + table_name
                    unique_string2 = '%' + unique_string1 + '%'
                    get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % (unique_string1, unique_string2)

                    with open(self.tmp_folder + os.sep + table_name + '.sql', 'w') as f:
                        f.write(cmd)
                        f.write(get_con_id_sql)

                    self.output(cmd)
                    beg_time = datetime.now()
                    (ok, result) = psql.runfile(ifile = self.tmp_folder + os.sep + table_name + '.sql', dbname = self.database_name, flag = '-t -A') #, username = self.user)
                    #(ok,result) = commands.getstatusoutput('psql -d %s -f %s -t -A' % (self.database_name, self.tmp_folder + os.sep + table_name + '.sql'))
                    end_time = datetime.now()
                    self.output(result[0].split('***')[0])
                    #self.output('\n'.join(result))
                    
                    if ok and str(result).find('ERROR:') == -1 and str(result).find('FATAL:') == -1 and (str(result).find('INSERT 0') != -1 or str(result).find('CREATE VIEW') != -1):
                        status = 'SUCCESS'
                        con_id = int(result[0].split('***')[1].split('|')[2].strip())
                    else:
                        status = 'ERROR'
                        self.continue_flag = False
                else:
                    status = 'SKIP'
                    beg_time = datetime.now()
                    end_time = beg_time
            else:
                status = 'ERROR'
                beg_time = datetime.now()
                end_time = beg_time
                
            duration = end_time - beg_time
            duration = duration.days*24*3600*1000 + duration.seconds*1000 + duration.microseconds /1000
            beg_time = str(beg_time)
            end_time = str(end_time)        
            self.output('   Loading=%s   Iteration=%d   Stream=%d   Status=%s   Time=%d' % (table_name, 1, 1, status, duration))
            self.report_sql("INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);" 
                % (self.tr_id, self.s_id, con_id, table_name, status, beg_time, end_time, duration, self.adj_s_id))
               
        self.output('-- Complete loading data')
Esempio n. 14
0
    def load_data(self):
        self.output('-- Start loading data')

        # get the data dir
        data_directory = self.workload_directory + os.sep + 'data' + '-' + self.test_type
        if not os.path.exists(data_directory):
            self.output(
                'ERROR: Cannot find DDL to create tables for TPC-H: %s does not exists'
                % (data_directory))
            sys.exit(2)

        if self.load_data_flag:
            cmd = 'drop database if exists %s;' % (self.database_name)
            (ok, output) = psql.runcmd(cmd=cmd)
            if not ok:
                print cmd
                print '\n'.join(output)
                sys.exit(2)
            self.output(cmd)
            self.output('\n'.join(output))

            cmd = 'create database %s;' % (self.database_name)
            (ok, output) = psql.runcmd(cmd=cmd)
            if not ok:
                print cmd
                print '\n'.join(output)
                sys.exit(2)
            self.output(cmd)
            self.output('\n'.join(output))

        tables = [
            'nation', 'region', 'part', 'supplier', 'partsupp', 'customer',
            'orders', 'lineitem', 'revenue'
        ]

        if self.test_type == 'pxf':
            self.output(
                'Deleting writable external table director if it already exists'
            )
            os.system('sudo -u pxf hdfs dfs -rm -r %s' %
                      (self.pxf_writable_path))

        for table_name in tables:
            con_id = -1
            if self.continue_flag:
                if self.load_data_flag:
                    with open(data_directory + os.sep + table_name + '.sql',
                              'r') as f:
                        cmd = f.read()
                    cmd = self.replace_sql(sql=cmd, table_name=table_name)

                    # get con_id use this query
                    unique_string1 = '%s_%s_' % (self.workload_name,
                                                 self.user) + table_name
                    unique_string2 = '%' + unique_string1 + '%'
                    get_con_id_sql = "select '***', '%s', sess_id from pg_stat_activity where current_query like '%s';" % (
                        unique_string1, unique_string2)

                    with open(self.tmp_folder + os.sep + table_name + '.sql',
                              'w') as f:
                        f.write(cmd)
                        f.write(get_con_id_sql)

                    self.output(cmd)
                    beg_time = datetime.now()
                    (ok, result) = psql.runfile(
                        ifile=self.tmp_folder + os.sep + table_name + '.sql',
                        dbname=self.database_name,
                        flag='-t -A')  #, username = self.user)
                    #(ok,result) = commands.getstatusoutput('psql -d %s -f %s -t -A' % (self.database_name, self.tmp_folder + os.sep + table_name + '.sql'))
                    end_time = datetime.now()
                    self.output(result[0].split('***')[0])
                    #self.output('\n'.join(result))

                    if ok and str(result).find('ERROR:') == -1 and str(
                            result).find('FATAL:') == -1 and (
                                str(result).find('INSERT 0') != -1
                                or str(result).find('CREATE VIEW') != -1):
                        status = 'SUCCESS'
                        con_id = int(
                            result[0].split('***')[1].split('|')[2].strip())
                    else:
                        status = 'ERROR'
                        self.continue_flag = False
                else:
                    status = 'SKIP'
                    beg_time = datetime.now()
                    end_time = beg_time
            else:
                status = 'ERROR'
                beg_time = datetime.now()
                end_time = beg_time

            duration = end_time - beg_time
            duration = duration.days * 24 * 3600 * 1000 + duration.seconds * 1000 + duration.microseconds / 1000
            beg_time = str(beg_time)
            end_time = str(end_time)
            self.output(
                '   Loading=%s   Iteration=%d   Stream=%d   Status=%s   Time=%d'
                % (table_name, 1, 1, status, duration))
            self.report_sql(
                "INSERT INTO hst.test_result VALUES (%d, %d, %d, 'Loading', '%s', 1, 1, '%s', '%s', '%s', %d, NULL, NULL, NULL, %d);"
                % (self.tr_id, self.s_id, con_id, table_name, status, beg_time,
                   end_time, duration, self.adj_s_id))

        self.output('-- Complete loading data')