コード例 #1
0
 def __init__(self, methodName, config=None):
     if config is not None:
         self.config = config
     else:
         self.config = GPDBConfig()
     self.gpverify = GpdbVerify(config=self.config)
     super(DbStateClass, self).__init__(methodName)
コード例 #2
0
class GpdbVerifyRegressionTests(unittest.TestCase):
    def __init__(self, methodName):
        self.gpv = GpdbVerify()
        super(GpdbVerifyRegressionTests, self).__init__(methodName)

    def setUp(self):
        PSQL.run_sql_command('create database gptest;', dbname='postgres')

    def tearDown(self):
        PSQL.run_sql_command('drop database gptest', dbname='postgres')

    def test_gpcheckcat(self):
        (a, b, c, d) = self.gpv.gpcheckcat()
        self.assertIn(a, (0, 1, 2))

    def test_gpcheckmirrorseg(self):
        (res, fix_file) = self.gpv.gpcheckmirrorseg()
        self.assertIn(res, (True, False))

    def test_check_db_is_running(self):
        self.assertTrue(self.gpv.check_db_is_running())

    def test_run_repairscript(self):
        repair_script = local_path('gpcheckcat_repair')
        res = self.gpv.run_repair_script(repair_script)
        self.assertIn(res, (True, False))

    def test_ignore_extra_m(self):
        fix_file = local_path('fix_file')
        res = self.gpv.ignore_extra_m(fix_file)
        self.assertIn(res, (True, False))
コード例 #3
0
ファイル: __init__.py プロジェクト: 50wu/gpdb
 def do_gpcheckcat(self):
     dbstate = GpdbVerify()
     i = 0 
     errorCode = 1 
     while(errorCode>0 and i<5):
         (errorCode, hasError, gpcheckcat_output, repairScriptDir) = dbstate.gpcheckcat(alldb = False)
         tinctest.logger.info(" %s Gpcheckcat iteration . ErrorCode: %s " % (i,errorCode))
         if (errorCode>0):
             dbstate.run_repair_script(repairScriptDir)
             i = i+1 
         if errorCode!=0 and i>=5 :
             raise Exception('gpcheckcat finished with error(s)')
コード例 #4
0
 def do_gpcheckcat(self):
     dbstate = GpdbVerify()
     i = 0
     errorCode = 1
     while (errorCode > 0 and i < 5):
         (errorCode, hasError, gpcheckcat_output,
          repairScriptDir) = dbstate.gpcheckcat(alldb=False)
         tinctest.logger.info(" %s Gpcheckcat iteration . ErrorCode: %s " %
                              (i, errorCode))
         if (errorCode > 0):
             dbstate.run_repair_script(repairScriptDir)
             i = i + 1
         if errorCode != 0 and i >= 5:
             raise Exception('gpcheckcat finished with error(s)')
コード例 #5
0
ファイル: dbstate.py プロジェクト: PengJi/gpdb-comments
 def __init__(self,methodName,config=None):
     if config is not None:
         self.config = config
     else:
         self.config = GPDBConfig()
     self.gpverify = GpdbVerify(config=self.config)
     super(DbStateClass,self).__init__(methodName)
コード例 #6
0
ファイル: dbstate.py プロジェクト: PengJi/gpdb-comments
class DbStateClass(MPPTestCase):


    def __init__(self,methodName,config=None):
        if config is not None:
            self.config = config
        else:
            self.config = GPDBConfig()
        self.gpverify = GpdbVerify(config=self.config)
        super(DbStateClass,self).__init__(methodName)

    def check_system(self):
        ''' 
        @summary: Check whether the system is up and sync. Exit out if not 
        '''
        cmd ="select count(*) from gp_segment_configuration where content<> -1 ;"
        count_all = PSQL.run_sql_command(cmd, flags ='-q -t', dbname='postgres')
        cmd ="select count(*) from gp_segment_configuration where content<> -1 and mode = 's' and status = 'u';"
        count_up_and_sync = PSQL.run_sql_command(cmd, flags ='-q -t', dbname='postgres')
        if count_all.strip() != count_up_and_sync.strip() :
            raise Exception('The cluster is not in up/sync ............')
        else:
            tinctest.logger.info("\n Starting New Test: System is up and in sync .........")

    def check_catalog(self,dbname=None, alldb=True, online=False, testname=None, outputFile=None, host=None, port=None):
        '''1. Run gpcheckcat'''
        (errorCode, hasError, gpcheckcat_output, repairScriptDir) =  self.gpverify.gpcheckcat(dbname=dbname, alldb=alldb, online=online, testname=testname, outputFile=outputFile, host=host, port=port)
        if errorCode != 0:
            raise Exception('GpCheckcat failed with errcode %s '% (errorCode))

    def check_mirrorintegrity(self, master=False):
        '''Runs checkmirrorintegrity(default), check_mastermirrorintegrity(when master=True) '''
        (checkmirror, fix_outfile) = self.gpverify.gpcheckmirrorseg(master=master)
        if not checkmirror:
           self.fail('Checkmirrorseg failed. Fix file location : %s' %fix_outfile)
        tinctest.logger.info('Successfully completed integrity check')

    def run_validation(self):
        '''
        1. gpcheckcat
        2. checkmirrorintegrity
        3. check_mastermirrorintegrity 
        ''' 
        self.check_catalog()
        self.check_mirrorintegrity()
        if self.config.has_master_mirror():
            self.check_mirrorintegrity(master=True)
コード例 #7
0
 def __init__(self, methodName):
     self.filereputil = Filerepe2e_Util()
     self.config = GPDBConfig()
     self.gprecover = GpRecover(self.config)
     self.gpstop = GpStop()
     self.gpstart = GpStart()
     self.gpverify = GpdbVerify(config=self.config)
     self.dbstate = DbStateClass('run_validation', self.config)
     self.port = os.getenv('PGPORT')
     super(PgtwoPhaseClass, self).__init__(methodName)
コード例 #8
0
class GpdbVerifyRegressionTests(unittest.TestCase):
    def __init__(self, methodName):
        self.gpv = GpdbVerify()
        super(GpdbVerifyRegressionTests, self).__init__(methodName)

    def setUp(self):
        PSQL.run_sql_command('create database gptest;', dbname='postgres')

    def tearDown(self):
        PSQL.run_sql_command('drop database gptest', dbname='postgres')

    def test_gpcheckcat(self):
        (a, b, c, d) = self.gpv.gpcheckcat()
        self.assertIn(a, (0, 1, 2))

    def test_gpcheckmirrorseg(self):
        (res, fix_file) = self.gpv.gpcheckmirrorseg()
        self.assertIn(res, (True, False))

    def test_check_db_is_running(self):
        self.assertTrue(self.gpv.check_db_is_running())

    def test_run_repairscript(self):
        repair_script = local_path('gpcheckcat_repair')
        res = self.gpv.run_repair_script(repair_script)
        self.assertIn(res, (True, False))

    def test_ignore_extra_m(self):
        fix_file = local_path('fix_file')
        res = self.gpv.ignore_extra_m(fix_file)
        self.assertIn(res, (True, False))

    def test_cleanup_old_file(self):
        old_time = int(time.strftime("%Y%m%d%H%M%S")) - 1005000
        old_file = local_path('checkmirrorsegoutput_%s' % old_time)
        open(old_file, 'w')
        self.gpv.cleanup_day_old_out_files(local_path(''))
        self.assertFalse(os.path.isfile(old_file))

    def test_not_cleanup_todays_file(self):
        new_file = local_path('checkmirrorsegoutput_%s' %
                              time.strftime("%Y%m%d%H%M%S"))
        open(new_file, 'w')
        self.gpv.cleanup_day_old_out_files(local_path(''))
        self.assertTrue(os.path.isfile(new_file))
コード例 #9
0
class GpdbVerifyRegressionTests(unittest.TestCase):

    def __init__(self, methodName):
        self.gpv = GpdbVerify()
        super(GpdbVerifyRegressionTests, self).__init__(methodName)
    
    def setUp(self):
        PSQL.run_sql_command('create database gptest;', dbname='postgres')

    def tearDown(self):
        PSQL.run_sql_command('drop database gptest', dbname='postgres')
     
    def test_gpcheckcat(self):
        (a,b,c,d) = self.gpv.gpcheckcat()
        self.assertIn(a,(0,1,2))

    def test_gpcheckmirrorseg(self):
        (res,fix_file) = self.gpv.gpcheckmirrorseg()
        self.assertIn(res, (True,False))

    def test_check_db_is_running(self):
        self.assertTrue(self.gpv.check_db_is_running())

    def test_run_repairscript(self):
        repair_script = local_path('gpcheckcat_repair')
        res = self.gpv.run_repair_script(repair_script)
        self.assertIn(res, (True,False))

    def test_ignore_extra_m(self):
        fix_file = local_path('fix_file')
        res = self.gpv.ignore_extra_m(fix_file)
        self.assertIn(res, (True,False))
     
    def test_cleanup_old_file(self):
        old_time = int(time.strftime("%Y%m%d%H%M%S")) - 1005000 
        old_file = local_path('checkmirrorsegoutput_%s' % old_time)
        open(old_file,'w')
        self.gpv.cleanup_day_old_out_files(local_path(''))
        self.assertFalse(os.path.isfile(old_file))
        
    def test_not_cleanup_todays_file(self):
        new_file = local_path('checkmirrorsegoutput_%s' % time.strftime("%Y%m%d%H%M%S"))
        open(new_file,'w')
        self.gpv.cleanup_day_old_out_files(local_path(''))
        self.assertTrue(os.path.isfile(new_file))
コード例 #10
0
    def __init__(self, config=None):
        if config is not None:
            self.config = config
        else:
            self.config = GPDBConfig()

        self.filereputil = Filerepe2e_Util()
        self.gprecover = GpRecover(self.config)
        self.gpstop = GpStop()
        self.gpstart = GpStart()
        self.gpverify = GpdbVerify(config=self.config)
        self.dbstate = DbStateClass('run_validation', self.config)
        self.port = os.getenv('PGPORT')
コード例 #11
0
    def test_pg_inherits(self):
        """
        Change order of children in pg_inherits on segments.  Alter should not
        cause inconsistent OIDs.

        """
        # Create paritioned table.
        sql = local_path("create_part_table.sql")
        out = local_path("create_part_table.out")
        ans = local_path("create_part_table.ans")
        PSQL.run_sql_file(sql, out)
        assert Gpdiff.are_files_equal(out, ans)

        # Change order of children in pg_inherits on segments but not
        # on master.
        sql = local_path("reorder_pg_inherits.sql")
        out = local_path("reorder_pg_inherits.out")
        ans = local_path("reorder_pg_inherits.ans")
        segments = [
            seg for seg in self.gparray.getSegDbList() if seg.role == "p"
        ]
        assert len(segments) > 0, "No primary segments found."
        primary = segments[0]
        PSQL.run_sql_file(sql,
                          out,
                          host=primary.hostname,
                          port=primary.port,
                          PGOPTIONS=("-c allow_system_table_mods=dml "
                                     "-c gp_session_role=utility"))
        assert Gpdiff.are_files_equal(out, ans)

        # Alter the partitioned table so that it's rewritten.
        with dbconn.connect(dbconn.DbURL()) as conn:
            dbconn.execSQL(conn, "ALTER TABLE co1 ALTER COLUMN c2 TYPE int8")
            conn.commit()

        # Run gpcheckcat
        result = GpdbVerify().gpcheckcat(testname="inconsistent")
        # Test return code
        if result[0] != 0:
            logger.error(result[2])  # log output
            self.fail("gpcheckcat 'inconsistent' test failed")
コード例 #12
0
    def test_master_panic_after_phase1(self):
        """PANIC master after recording distributed commit.

        Trigger PANIC in master after completing phase 1 of 2PC,
        right after recording distributed commit in xlog but before
        broadcasting COMMIT PREPARED to segments.  Master's recovery
        cycle should correctly broadcast COMMIT PREPARED because
        master should find distributed commit record in its xlog
        during recovery.  Verify that the transaction is committed
        after recovery.

        JIRA: MPP-19044

        """
        tinctest.logger.info("running test: test_crash_master_after_phase1")
        gparray = GpArray.initFromCatalog(dbconn.DbURL(), utility=True)
        assert len(gparray.getHostList()) == 1, "cannot run on multi-node"
        host = gparray.getHostList()[0]

        # Must have at least one in-sync and up segment.
        primaries = [
            p for p in gparray.get_list_of_primary_segments_on_host(host)
            if p.getSegmentMode() == "s" and p.getSegmentStatus() == "u"
        ]
        assert len(primaries) > 0, "in-sync and up primary not found"
        primary = primaries[0]
        tinctest.logger.info("chose primary: %s" % primary.datadir)

        # Inject suspend fault after recording distributed commit on master.
        cmd = Command("Suspend master post distributed commit",
                      self.faultcmd % "suspend")
        cmd.run(validateAfter=True)
        tinctest.logger.info(cmd.get_results().printResult())

        # Trigger the fault.
        cmd = Command("run DDL",
                      "psql -f %s" % local_path('sql/ao_create.sql'))
        self.proc = cmd.runNoWait()
        tinctest.logger.info("runNoWait: %s, pid: %d" %
                             (cmd.cmdStr, self.proc.pid))

        commitBlocked = self.filereputil.check_fault_status(
            fault_name='dtm_xlog_distributed_commit',
            status="triggered",
            seg_id='1',
            num_times_hit=1)

        # Shutdown of primary (and mirror) should happen only after
        # the commit is blocked due to suspend fault.
        assert commitBlocked, "timeout waiting for commit to be blocked"
        tinctest.logger.info("commit is blocked due to suspend fault")
        # At this point, segments have already recorded the
        # transaction as prepared by writing PREPARE record in xlog.
        # Crash one primary (and its mirror).
        mirror = None
        mirrors = [
            m for m in gparray.get_list_of_mirror_segments_on_host(host)
            if m.getSegmentMode() == "s" and m.getSegmentStatus() == "u"
            and primary.getSegmentContentId() == m.getSegmentContentId()
        ]
        if len(mirrors) > 0:
            mirror = mirrors[0]
            tinctest.logger.info("chose mirror: %s" % mirror.datadir)
            # Pause FTS probes to avoid a failover while we bring down
            # segments.  Note that we bring down both primary and its
            # mirror, thereby causing double failure.  This prevents
            # FTS from making changes to segment configuration, even
            # if FTS probes are unpaused.  It is necessary to unpause
            # FTS probes to prevent gang creation from being blocked.
            PSQL.run_sql_command_utility_mode("SET gp_fts_probe_pause = on")
            tinctest.logger.info("FTS probes paused")
            cmdstr = 'pg_ctl -D %s stop -m immediate' % mirror.datadir
            tinctest.logger.info("bringing down primary: %s" % cmdstr)
            cmd = Command("Shutdown a primary segment", cmdstr)
            cmd.run(validateAfter=True)

        cmdstr = 'pg_ctl -D %s stop -m immediate' % primary.datadir
        tinctest.logger.info("bringing down primary: %s" % cmdstr)
        cmd = Command("Shutdown a primary segment", cmdstr)
        cmd.run(validateAfter=True)

        if mirror is not None:
            PSQL.run_sql_command_utility_mode("SET gp_fts_probe_pause = off")
            tinctest.logger.info("FTS probes unpaused")

        # Resume master.  Master should PANIC and go through crash recovery.
        cmd = Command("resume master", self.faultcmd % "resume")
        cmd.run(validateAfter=True)
        tinctest.logger.info(cmd.get_results().printResult())

        (rc, out, err) = self.proc.communicate2()
        self.proc = None
        tinctest.logger.info("runNoWait rc: %d, output: %s, err: %s" %
                             (rc, out, err))
        # Fail if QD did not PANIC.
        assert (out.find("commit succeeded") == -1
                and err.find("commit succeeded") == -1
                and err.find("PANIC") != -1)
        # Wait for recovery to complete, timeout after ~ 5 mins.
        attempts = 1
        recoveryComplete = False
        while attempts < 600 and not recoveryComplete:
            recoveryComplete = "aaa150" in PSQL.run_sql_command_utility_mode(
                "select 'aaa' || (100+50)")
            time.sleep(0.5)
            attempts = attempts + 1
        assert recoveryComplete, "timeout waiting for master to recover"
        cmdstr = "gpstop -ar"
        cmd = Command("restart", cmdstr)
        tinctest.logger.info("restarting the cluster with '%s'" % cmdstr)
        cmd.run(validateAfter=True)
        tinctest.logger.info("restart complete")
        # Verify table got created (commit was successful).
        assert PSQL.run_sql_file(local_path('sql/ao_select.sql'))

        gpverify = GpdbVerify()
        (errorCode, hasError, gpcheckcat_output,
         repairScript) = gpverify.gpcheckcat()
        assert errorCode == 0, ("gpcheckcat failed: %s" % gpcheckcat_output[0])

        # No need to restart GPDB again in tearDown()
        self.skipRestart = True
コード例 #13
0
class DbStateClass(MPPTestCase):
    def __init__(self, methodName, config=None):
        if config is not None:
            self.config = config
        else:
            self.config = GPDBConfig()
        self.gpverify = GpdbVerify(config=self.config)
        super(DbStateClass, self).__init__(methodName)

    def check_system(self):
        ''' 
        @summary: Check whether the system is up and sync. Exit out if not 
        '''
        cmd = "select count(*) from gp_segment_configuration where content<> -1 ;"
        count_all = PSQL.run_sql_command(cmd, flags='-q -t', dbname='postgres')
        cmd = "select count(*) from gp_segment_configuration where content<> -1 and mode = 's' and status = 'u';"
        count_up_and_sync = PSQL.run_sql_command(cmd,
                                                 flags='-q -t',
                                                 dbname='postgres')
        if count_all.strip() != count_up_and_sync.strip():
            raise Exception('The cluster is not in up/sync ............')
        else:
            tinctest.logger.info(
                "\n Starting New Test: System is up and in sync .........")

    def check_catalog(self,
                      dbname=None,
                      alldb=True,
                      online=False,
                      testname=None,
                      outputFile=None,
                      host=None,
                      port=None):
        '''1. Run gpcheckcat'''
        (errorCode, hasError, gpcheckcat_output,
         repairScriptDir) = self.gpverify.gpcheckcat(dbname=dbname,
                                                     alldb=alldb,
                                                     online=online,
                                                     testname=testname,
                                                     outputFile=outputFile,
                                                     host=host,
                                                     port=port)
        if errorCode != 0:
            raise Exception('GpCheckcat failed with errcode %s ' % (errorCode))

    def check_mirrorintegrity(self, master=False):
        '''Runs checkmirrorintegrity(default), check_mastermirrorintegrity(when master=True) '''
        (checkmirror,
         fix_outfile) = self.gpverify.gpcheckmirrorseg(master=master)
        if not checkmirror:
            self.fail('Checkmirrorseg failed. Fix file location : %s' %
                      fix_outfile)
        tinctest.logger.info('Successfully completed integrity check')

    def run_validation(self):
        '''
        1. gpcheckcat
        2. checkmirrorintegrity
        3. check_mastermirrorintegrity 
        '''
        self.check_catalog()
        self.check_mirrorintegrity()
        if self.config.has_master_mirror():
            self.check_mirrorintegrity(master=True)
コード例 #14
0
    def test_master_panic_after_phase1(self):
        """PANIC master after recording distributed commit.

        Trigger PANIC in master after completing phase 1 of 2PC,
        right after recording distributed commit in xlog but before
        broadcasting COMMIT PREPARED to segments.  Master's recovery
        cycle should correctly broadcast COMMIT PREPARED because
        master should find distributed commit record in its xlog
        during recovery.  Verify that the transaction is committed
        after recovery.

        JIRA: MPP-19044

        """
        tinctest.logger.info("running test: test_crash_master_after_phase1")
        gparray = GpArray.initFromCatalog(dbconn.DbURL(), utility=True)
        assert len(gparray.getHostList()) == 1, "cannot run on multi-node"
        host = gparray.getHostList()[0]

        # Must have at least one in-sync and up segment.
        primaries = [
            p for p in gparray.get_list_of_primary_segments_on_host(host)
            if p.getSegmentMode() == "s" and p.getSegmentStatus() == "u"]
        assert len(primaries) > 0, "in-sync and up primary not found"
        primary = primaries[0]
        tinctest.logger.info("chose primary: %s" % primary.datadir)

        # Inject suspend fault after recording distributed commit on master.
        cmd = Command("Suspend master post distributed commit",
                      self.faultcmd % "suspend")
        cmd.run(validateAfter=True)
        tinctest.logger.info(cmd.get_results().printResult())

        # Trigger the fault.
        cmd = Command("run DDL", "psql -f %s" %
                      local_path('sql/ao_create.sql'))
        self.proc = cmd.runNoWait()
        tinctest.logger.info("runNoWait: %s, pid: %d" % (cmd.cmdStr, self.proc.pid))

        commitBlocked = self.filereputil.check_fault_status(fault_name='dtm_xlog_distributed_commit', status="triggered", seg_id='1', num_times_hit=1);

        # Shutdown of primary (and mirror) should happen only after
        # the commit is blocked due to suspend fault.
        assert commitBlocked, "timeout waiting for commit to be blocked"
        tinctest.logger.info("commit is blocked due to suspend fault")
        # At this point, segments have already recorded the
        # transaction as prepared by writing PREPARE record in xlog.
        # Crash one primary (and its mirror).
        mirror = None
        mirrors = [m for m in gparray.get_list_of_mirror_segments_on_host(host)
                   if m.getSegmentMode() == "s" and m.getSegmentStatus() == "u"
                   and primary.getSegmentContentId() == m.getSegmentContentId()]
        if len(mirrors) > 0:
            mirror = mirrors[0]
            tinctest.logger.info("chose mirror: %s" % mirror.datadir)
            # Pause FTS probes to avoid a failover while we bring down
            # segments.  Note that we bring down both primary and its
            # mirror, thereby causing double failure.  This prevents
            # FTS from making changes to segment configuration, even
            # if FTS probes are unpaused.  It is necessary to unpause
            # FTS probes to prevent gang creation from being blocked.
            PSQL.run_sql_command_utility_mode("SET gp_fts_probe_pause = on")
            tinctest.logger.info("FTS probes paused")
            cmdstr = 'pg_ctl -D %s stop -m immediate' % mirror.datadir
            tinctest.logger.info("bringing down primary: %s" % cmdstr)
            cmd = Command("Shutdown a primary segment", cmdstr)
            cmd.run(validateAfter=True)

        cmdstr = 'pg_ctl -D %s stop -m immediate' % primary.datadir
        tinctest.logger.info("bringing down primary: %s" % cmdstr)
        cmd = Command("Shutdown a primary segment", cmdstr)
        cmd.run(validateAfter=True)

        if mirror is not None:
            PSQL.run_sql_command_utility_mode("SET gp_fts_probe_pause = off")
            tinctest.logger.info("FTS probes unpaused")

        # Resume master.  Master should PANIC and go through crash recovery.
        cmd = Command("resume master", self.faultcmd % "resume")
        cmd.run(validateAfter=True)
        tinctest.logger.info(cmd.get_results().printResult())

        (rc, out, err) = self.proc.communicate2()
        self.proc = None
        tinctest.logger.info("runNoWait rc: %d, output: %s, err: %s" %
                              (rc, out, err))
        # Fail if QD did not PANIC.
        assert (out.find("commit succeeded") == -1 and
                err.find("commit succeeded") == -1 and
                err.find("PANIC") != -1)

        # Wait for a few seconds to ensure that postmaster reset has started
        time.sleep(5)

        # Wait for recovery to complete, timeout after ~ 5 mins.
        attempts = 1
        recoveryComplete = False
        while attempts < 600 and not recoveryComplete:
            recoveryComplete = "aaa150" in PSQL.run_sql_command_utility_mode(
                "select 'aaa' || (100+50)")
            time.sleep(0.5)
            attempts = attempts + 1
        assert recoveryComplete, "timeout waiting for master to recover"
        cmdstr = "gpstop -ar"
        cmd = Command("restart", cmdstr)
        tinctest.logger.info("restarting the cluster with '%s'" % cmdstr)
        cmd.run(validateAfter=True)
        tinctest.logger.info("restart complete")
        # Verify table got created (commit was successful).
        assert PSQL.run_sql_file(local_path('sql/ao_select.sql'))

        gpverify = GpdbVerify()
        (errorCode, hasError, gpcheckcat_output, repairScript) = gpverify.gpcheckcat()
        assert errorCode == 0, ("gpcheckcat failed: %s" % gpcheckcat_output[0])

        # No need to restart GPDB again in tearDown()
        self.skipRestart = True
コード例 #15
0
 def __init__(self, methodName):
     self.gpv = GpdbVerify()
     super(GpdbVerifyRegressionTests, self).__init__(methodName)
コード例 #16
0
 def __init__(self, methodName):
     self.gpv = GpdbVerify()
     super(GpdbVerifyRegressionTests, self).__init__(methodName)