Exemple #1
0
 def test_pg_aocsseg_corruption(self):
     self.create_appendonly_tables(row=False)
     config = GPDBConfig()
     host, port = config.get_hostandport_of_segment() 
     self.transform_sql_file(os.path.join(self.sql_dir, 'corrupt_pg_aocsseg.sql.t'), 'co1')
     out_file = os.path.join(self.output_dir, 'corrupt_pg_aocsseg.out')
     ans_file = os.path.join(self.ans_dir, 'corrupt_pg_aocsseg.ans')
     sql_file = os.path.join(self.sql_dir, 'corrupt_pg_aocsseg.sql')
     PSQL.run_sql_file_utility_mode(sql_file, out_file=out_file, host=host,
                                    port=port, dbname=os.environ['PGDATABASE'])
     if not Gpdiff.are_files_equal(out_file, ans_file, match_sub=[local_path('sql/init_file')]):
         raise Exception('Corruption test of pg_aocsseg failed for appendonly tables !')
    def run_test(self, data_file, sql_file, out_file, ans_file, table):
        base_dir = os.path.dirname(sys.modules[self.__module__].__file__)
        out_file = os.path.join(base_dir, 'output', out_file)
        ans_file = os.path.join(base_dir, 'expected', ans_file)
        sql_file = os.path.join(base_dir, 'sql', sql_file)

        data_out_file = os.path.join(base_dir, 'output', data_file.strip('.sql') + '.out')
        data_ans_file = os.path.join(base_dir, 'expected', data_file.strip('.sql') + '.ans')
        data_file = os.path.join(base_dir, 'sql', data_file) 

        PSQL.run_sql_file(data_file, out_file=data_out_file)
        self.assertTrue(Gpdiff.are_files_equal(data_out_file, data_ans_file))

        host, port = self._get_host_and_port_for_table(table)
        PSQL.run_sql_file_utility_mode(sql_file, host=host, port=port, out_file=out_file)
        self.assertTrue(Gpdiff.are_files_equal(out_file, ans_file))
Exemple #3
0
 def test_pg_aocsseg_corruption(self):
     self.create_appendonly_tables(row=False)
     config = GPDBConfig()
     host, port = config.get_hostandport_of_segment()
     self.transform_sql_file(
         os.path.join(self.sql_dir, 'corrupt_pg_aocsseg.sql.t'), 'co1')
     out_file = os.path.join(self.output_dir, 'corrupt_pg_aocsseg.out')
     ans_file = os.path.join(self.ans_dir, 'corrupt_pg_aocsseg.ans')
     sql_file = os.path.join(self.sql_dir, 'corrupt_pg_aocsseg.sql')
     PSQL.run_sql_file_utility_mode(sql_file,
                                    out_file=out_file,
                                    host=host,
                                    port=port,
                                    dbname=os.environ['PGDATABASE'])
     if not Gpdiff.are_files_equal(
             out_file, ans_file, match_sub=[local_path('sql/init_file')]):
         raise Exception(
             'Corruption test of pg_aocsseg failed for appendonly tables !')
 def run_sql_utility_mode(self,filename,host=None,port=None):
     fname=filename
     sql_file = self.get_sql_files(fname)
     out_file = self.base_dir+ "/sql/"+fname +'.out'
     ans_file = self.base_dir+ "/expected/"+fname+'.ans'
     tinctest.logger.info( '\n==============================')
     tinctest.logger.info( sql_file)
     tinctest.logger.info( out_file)
     tinctest.logger.info( ans_file)
     tinctest.logger.info( '==============================')
     result=PSQL.run_sql_file_utility_mode(sql_file,out_file=out_file,host=host, port=port)
     self.validate_sql(ans_file,out_file)
Exemple #5
0
 def run_sql_utility_mode(self,filename,host=None,port=None):
     fname=filename
     sql_file = self.get_sql_files(fname)
     out_file = self.base_dir+ "/sql/"+fname +'.out'
     ans_file = self.base_dir+ "/expected/"+fname+'.ans'
     tinctest.logger.info( '\n==============================')
     tinctest.logger.info( sql_file)
     tinctest.logger.info( out_file)
     tinctest.logger.info( ans_file)
     tinctest.logger.info( '==============================')
     result=PSQL.run_sql_file_utility_mode(sql_file,out_file=out_file,host=host, port=port)
     self.validate_sql(ans_file,out_file)
Exemple #6
0
 def test_run_sql_file_utility_mode(self):
     sql_file = os.path.join(os.path.dirname(inspect.getfile(self.__class__)),'test_utility_mode.sql')
     out_file = os.path.join(os.path.dirname(inspect.getfile(self.__class__)),'test_utility_mode.out')
     self.assertFalse(os.path.exists(out_file))
     try:
         self.assertTrue(PSQL.run_sql_file_utility_mode(sql_file = sql_file, out_file = out_file))
         self.assertTrue(os.path.exists(out_file))
         with open(out_file, 'r') as f:
             output = f.read()
             self.assertIsNotNone(re.search('utility', output))
     finally:
         os.remove(out_file)
         self.assertFalse(os.path.exists(out_file))
    def test_singledb_corruption(self):
        """
        Test that gpcheckcat reports errors and it generates
        the verify file
        """
        dbname = 'test_singledb_corruption'
        PSQL.run_sql_command('DROP DATABASE IF EXISTS %s' % dbname)
        stdout = PSQL.run_sql_command('CREATE DATABASE %s' % dbname)
        if not stdout.endswith('CREATE DATABASE\n'):
            self.fail('failed to create database: %s' % stdout)

        sql_file = local_path('sql/create_tables.sql')
        if not PSQL.run_sql_file(sql_file, dbname=dbname,
                                 output_to_file=False):
            self.fail('failed to create tables')

        host, port = self.config.get_hostandport_of_segment()
        sql_file = local_path('sql/catalog_corruption.sql')
        if not PSQL.run_sql_file_utility_mode(
                sql_file, dbname=dbname, host=host, port=port,
                output_to_file=False):
            self.fail('failed to introduce catalog corruption')

        res = {'rc': 0, 'stdout' : '', 'stderr': ''}
        run_shell_command(
            "cd %s && $GPHOME/bin/lib/gpcheckcat -p %s %s" %
            (self.gpcheckcat_test_dir, self.master_port, dbname),
            results=res)

        self.assertEqual(3, res['rc'])
        found = False
        for f in os.listdir(self.gpcheckcat_test_dir):
            if fnmatch.fnmatch(f, 'gpcheckcat.verify.%s.*' % dbname):
                found = True
        self.assertTrue(found)

        verify_file_pat = 'gpcheckcat.verify.%s.*' % dbname
        mtime = lambda f: os.stat(
            os.path.join(self.gpcheckcat_test_dir, f)).st_mtime
        fname = list(sorted(
                        fnmatch.filter(
                            os.listdir(self.gpcheckcat_test_dir),
                            verify_file_pat),
                        key=mtime))[-1]
        if not PSQL.run_sql_file(os.path.join(self.gpcheckcat_test_dir, fname), output_to_file=False):
            self.fail('failed to run verify file for database %s' % dbname)
Exemple #8
0
 def test_run_sql_file_utility_mode(self):
     sql_file = os.path.join(
         os.path.dirname(inspect.getfile(self.__class__)),
         'test_utility_mode.sql')
     out_file = os.path.join(
         os.path.dirname(inspect.getfile(self.__class__)),
         'test_utility_mode.out')
     self.assertFalse(os.path.exists(out_file))
     try:
         self.assertTrue(
             PSQL.run_sql_file_utility_mode(sql_file=sql_file,
                                            out_file=out_file))
         self.assertTrue(os.path.exists(out_file))
         with open(out_file, 'r') as f:
             output = f.read()
             self.assertIsNotNone(re.search('utility', output))
     finally:
         os.remove(out_file)
         self.assertFalse(os.path.exists(out_file))
    def test_error(self):
        """
        Test for errors during the generation of verify file
        """
        dbname = 'test_error'
        PSQL.run_sql_command('DROP DATABASE IF EXISTS %s' % dbname)
        stdout = PSQL.run_sql_command('CREATE DATABASE %s' % dbname)
        if not stdout.endswith('CREATE DATABASE\n'):
            self.fail('failed to create database: %s' % stdout)

        # Remove old verify files before runing the test.
        if not run_shell_command('rm -f %s/gpcheckcat.verify.%s.*' %
                                 (self.gpcheckcat_test_dir, dbname)):
            self.fail('failed to remove old verify files')

        sql_file = local_path('sql/create_tables.sql')
        if not PSQL.run_sql_file(sql_file, dbname=dbname,
                                 output_to_file=False):
            self.fail('failed to create tables')

        host, port = self.config.get_hostandport_of_segment()
        sql_file = local_path('sql/catalog_corruption.sql')
        if not PSQL.run_sql_file_utility_mode(
                sql_file, dbname=dbname, host=host, port=port,
                output_to_file=False):
            self.fail('failed to introduce catalog corruption')

        os.chmod(self.gpcheckcat_test_dir, 0555)
        res = {'rc': 0, 'stdout' : '', 'stderr': ''}
        run_shell_command(
            "cd %s && $GPHOME/bin/lib/gpcheckcat -p %s %s" %
            (self.gpcheckcat_test_dir, self.master_port, dbname),
            results=res)
        self.assertEqual(3, res['rc'])
        for f in os.listdir(self.gpcheckcat_test_dir):
            if fnmatch.fnmatch(f, 'gpcheckcat.verify.%s.*' % dbname):
                self.fail('found verify file when not expecting it')
    def test_resync_ct_blocks_per_query(self):
        '''Catch a bug in resync that manifests only after rebalance.
        The logic used by a resync worker to obtain changed blocks
        from CT log had a bug.  The SQL query used to obtain a batch
        of changed blocks from CT log was incorrectly using LSN to
        filter out changed blocks.  All of the following must be true
        for the bug to occur:

         * More than gp_filerep_ct_batch_size blocks of a relation
           are changed on a segment in changetracking.

         * A block with a higher number is changed earlier (lower
           LSN) than lower numbered blocks.

         * The first batch of changed blocks obtained by resync worker
           from CT log for this relation contains only lower
           (according to block number) blocks.  The higher block with
           lower LSN is not included in this batch.  Another query
           must be run against CT log to obtain this block.

         * The SQL query used to obtain next batch of changed blocks
           for this relation contains incorrect WHERE clause involving
           a filter based on LSN of previously obtained blocks.  The
           higher numbered block is missed out - not returned by the
           query as changed block for the relation.  The block is
           never shipped from primary to mirror, resulting in data
           loss.  The test aims to verify that this doesn't happen as
           the bug is now fixed.
        '''
        config = GPDBConfig()
        assert (config.is_not_insync_segments()
                & config.is_balanced_segments()
                ), 'cluster not in-sync and balanced'

        # Create table and insert data so that adequate number of
        # blocks are occupied.
        self.run_sql('resync_bug_setup')
        # Bring down primaries and transition mirrors to
        # changetracking.
        filerep = Filerepe2e_Util()
        filerep.inject_fault(y='fault',
                             f='segment_probe_response',
                             r='primary')
        # Trigger the fault by running a sql file.
        PSQL.run_sql_file(local_path('test_ddl.sql'))
        filerep.wait_till_change_tracking_transition()

        # Set gp_filerep_ct_batch_size = 3.
        cmd = Command('reduce resync batch size',
                      'gpconfig -c gp_filerep_ct_batch_size -v 3')
        cmd.run()
        assert cmd.get_results().rc == 0, 'gpconfig failed'
        cmd = Command('load updated config', 'gpstop -au')
        cmd.run()
        assert cmd.get_results().rc == 0, '"gpstop -au" failed'

        self.run_sql('change_blocks_in_ct')

        # Capture change tracking log contents from the segment of
        # interest for debugging, in case the test fails.
        (host, port) = GPDBConfig().get_hostandport_of_segment(0, 'p')
        assert PSQL.run_sql_file_utility_mode(
            sql_file=local_path('sql/ct_log_contents.sql'),
            out_file=local_path('output/ct_log_contents.out'),
            host=host,
            port=port), sql_file

        gprecover = GpRecover(GPDBConfig())
        gprecover.incremental(False)
        gprecover.wait_till_insync_transition()

        # Rebalance, so that original primary is back in the role
        gprecover = GpRecover(GPDBConfig())
        gprecover.rebalance()
        gprecover.wait_till_insync_transition()

        # Reset gp_filerep_ct_batch_size
        cmd = Command('reset resync batch size',
                      'gpconfig -r gp_filerep_ct_batch_size')
        cmd.run()
        assert cmd.get_results().rc == 0, 'gpconfig failed'
        cmd = Command('load updated config', 'gpstop -au')
        cmd.run()
        assert cmd.get_results().rc == 0, '"gpstop -au" failed'

        self.run_sql('select_after_rebalance')
    def test_resync_ct_blocks_per_query(self):
        '''Catch a bug in resync that manifests only after rebalance.
        The logic used by a resync worker to obtain changed blocks
        from CT log had a bug.  The SQL query used to obtain a batch
        of changed blocks from CT log was incorrectly using LSN to
        filter out changed blocks.  All of the following must be true
        for the bug to occur:

         * More than gp_filerep_ct_batch_size blocks of a relation
           are changed on a segment in changetracking.

         * A block with a higher number is changed earlier (lower
           LSN) than lower numbered blocks.

         * The first batch of changed blocks obtained by resync worker
           from CT log for this relation contains only lower
           (according to block number) blocks.  The higher block with
           lower LSN is not included in this batch.  Another query
           must be run against CT log to obtain this block.

         * The SQL query used to obtain next batch of changed blocks
           for this relation contains incorrect WHERE clause involving
           a filter based on LSN of previously obtained blocks.  The
           higher numbered block is missed out - not returned by the
           query as changed block for the relation.  The block is
           never shipped from primary to mirror, resulting in data
           loss.  The test aims to verify that this doesn't happen as
           the bug is now fixed.
        '''
        config = GPDBConfig()
        assert (config.is_not_insync_segments() &
                config.is_balanced_segments()), 'cluster not in-sync and balanced'

        # Create table and insert data so that adequate number of
        # blocks are occupied.
        self.run_sql('resync_bug_setup')
        # Bring down primaries and transition mirrors to
        # changetracking.
        filerep = Filerepe2e_Util()
        filerep.inject_fault(y='fault', f='segment_probe_response',
                             r='primary')
        # Trigger the fault by running a sql file.
        PSQL.run_sql_file(local_path('test_ddl.sql'))
        filerep.wait_till_change_tracking_transition()

        # Set gp_filerep_ct_batch_size = 3.
        cmd = Command('reduce resync batch size',
                      'gpconfig -c gp_filerep_ct_batch_size -v 3')
        cmd.run()
        assert cmd.get_results().rc == 0, 'gpconfig failed'
        cmd = Command('load updated config', 'gpstop -au')
        cmd.run()
        assert cmd.get_results().rc == 0, '"gpstop -au" failed'

        self.run_sql('change_blocks_in_ct')

        # Capture change tracking log contents from the segment of
        # interest for debugging, in case the test fails.
        (host, port) = GPDBConfig().get_hostandport_of_segment(0, 'p')
        assert PSQL.run_sql_file_utility_mode(
            sql_file=local_path('sql/ct_log_contents.sql'),
            out_file=local_path('output/ct_log_contents.out'),
            host=host, port=port), sql_file

        gprecover = GpRecover(GPDBConfig())
        gprecover.incremental(False)
        gprecover.wait_till_insync_transition()

        # Rebalance, so that original primary is back in the role
        gprecover = GpRecover(GPDBConfig())
        gprecover.rebalance()
        gprecover.wait_till_insync_transition()

        # Reset gp_filerep_ct_batch_size
        cmd = Command('reset resync batch size',
                      'gpconfig -r gp_filerep_ct_batch_size')
        cmd.run()
        assert cmd.get_results().rc == 0, 'gpconfig failed'
        cmd = Command('load updated config', 'gpstop -au')
        cmd.run()
        assert cmd.get_results().rc == 0, '"gpstop -au" failed'

        self.run_sql('select_after_rebalance')
    def test_multidb_corruption(self):
        """
        Test that gpcheckcat reports errors and it generates
        the verify file
        """
        dbname1 = 'test_multidb_corruption1'
        dbname2 = 'test_multidb_corruption2'
        PSQL.run_sql_command('DROP DATABASE IF EXISTS %s' % dbname1)
        stdout = PSQL.run_sql_command('CREATE DATABASE %s' % dbname1)
        if not stdout.endswith('CREATE DATABASE\n'):
            self.fail('failed to create database: %s' % stdout)
        PSQL.run_sql_command('DROP DATABASE IF EXISTS %s' % dbname2)
        stdout = PSQL.run_sql_command('CREATE DATABASE %s' % dbname2)
        if not stdout.endswith('CREATE DATABASE\n'):
            self.fail('failed to create database: %s' % stdout)

        sql_file = local_path('sql/create_tables.sql')
        if not PSQL.run_sql_file(sql_file, dbname=dbname1,
                                 output_to_file=False):
            self.fail('failed to create tables in database %s' % dbname1)
        if not PSQL.run_sql_file(sql_file, dbname=dbname2,
                                output_to_file=False):
            self.fail('failed to create tables in database %s' % dbname2)

        host, port = self.config.get_hostandport_of_segment()
        sql_file = local_path('sql/catalog_corruption.sql')
        if not PSQL.run_sql_file_utility_mode(
                sql_file, dbname=dbname1, host=host, port=port,
                output_to_file=False):
            self.fail('failed to introduce corruption in database %s' % dbname1)
        if not PSQL.run_sql_file_utility_mode(
                sql_file, dbname=dbname2, host=host, port=port,
                output_to_file=False):
            self.fail('failed to introduce corruption in database %s' % dbname2)

        res = {'rc': 0, 'stdout' : '', 'stderr': ''}
        run_shell_command("cd %s && $GPHOME/bin/lib/gpcheckcat -p %s %s" %
                          (self.gpcheckcat_test_dir, self.master_port, dbname1),
                          results=res)
        self.assertTrue(res['rc'] > 0)
        run_shell_command("cd %s && $GPHOME/bin/lib/gpcheckcat -p %s %s" %
                          (self.gpcheckcat_test_dir, self.master_port, dbname2),
                          results=res)
        self.assertTrue(res['rc'] > 0)

        found = False
        for f in os.listdir(self.gpcheckcat_test_dir):
            if fnmatch.fnmatch(f, 'gpcheckcat.verify.%s.*' % dbname1):
                found = True
        self.assertTrue(found)

        found = False
        for f in os.listdir(self.gpcheckcat_test_dir):
            if fnmatch.fnmatch(f, 'gpcheckcat.verify.%s.*' % dbname2):
                found = True
        self.assertTrue(found)

        mtime = lambda f: os.stat(
            os.path.join(self.gpcheckcat_test_dir, f)).st_mtime
        # Choose the most recent verify file with dbname1 in its name.
        verify_file_pat = 'gpcheckcat.verify.%s.*' % dbname1
        fname = list(
            sorted(
                fnmatch.filter(
                    os.listdir(self.gpcheckcat_test_dir),
                    verify_file_pat),
                key=mtime))[-1]

        # Ensure that the verify file can be run.  It is difficult to
        # assert the SQL output against an expected answer file
        # because the output mostly has OIDs.  We are therefore
        # skipping this level of assertion for now.
        if not PSQL.run_sql_file(os.path.join(self.gpcheckcat_test_dir, fname), output_to_file=False):
            self.fail('failed to run verify file for database %s' % dbname1)

        # Similarly for dbname2.
        verify_file_pat = 'gpcheckcat.verify.%s.*' % dbname2
        mtime = lambda f: os.stat(
            os.path.join(self.gpcheckcat_test_dir, f)).st_mtime
        fname = list(sorted(
                        fnmatch.filter(
                            os.listdir(self.gpcheckcat_test_dir),
                            verify_file_pat),
                        key=mtime))[-1]
        if not PSQL.run_sql_file(os.path.join(self.gpcheckcat_test_dir, fname), output_to_file=False):
            self.fail('failed to run verify file for database %s' % dbname2)