Exemple #1
0
    def __runWaitAndCheckWorkerPoolForErrorsAndClear(self,
                                                     cmds,
                                                     actionVerb,
                                                     suppressErrorCheck=False,
                                                     progressCmds=[]):
        for cmd in cmds:
            self.__pool.addCommand(cmd)

        if self.__quiet:
            self.__pool.join()
        elif progressCmds:
            self._join_and_show_segment_progress(
                progressCmds,
                inplace=self.__progressMode ==
                GpMirrorListToBuild.Progress.INPLACE)
        else:
            base.join_and_indicate_progress(self.__pool)

        if not suppressErrorCheck:
            self.__pool.check_results()

        completedRecoveryCmds = list(
            set(self.__pool.getCompletedItems()) & set(cmds))

        self.__pool.empty_completed_items()

        return completedRecoveryCmds
Exemple #2
0
    def __runStartCommand(self, segments, startMethod, numContentsInCluster, resultOut, gpArray, era):
        """
        Putt results into the resultOut object
        """

        if len(segments) == 0:
            return

        if startMethod == START_AS_PRIMARY_OR_MIRROR:
            logger.info("Commencing parallel primary and mirror segment instance startup, please wait...")
        else:
            logger.info("Commencing parallel segment instance startup, please wait...")

        dbIdToPeerMap = gpArray.getDbIdToPeerMap()

        mirroringModePreTransition = MIRROR_MODE_MIRRORLESS if startMethod == START_AS_MIRRORLESS else MIRROR_MODE_QUIESCENT

        # launch the start
        for hostName, segments in GpArray.getSegmentsByHostName(segments).iteritems():
            logger.debug("Dispatching command to start segments on host: %s, " \
                            "with %s contents in cluster" % (hostName, numContentsInCluster))

            pickledTransitionData = None
            if startMethod == START_AS_PRIMARY_OR_MIRROR:
                mirroringModePerSegment = []
                for seg in segments:
                    modeThisSegment = MIRROR_MODE_PRIMARY if seg.isSegmentPrimary(True) else MIRROR_MODE_MIRROR
                    mirroringModePerSegment.append(modeThisSegment)
                pickledTransitionData = self.__createPickledTransitionParameters(segments, mirroringModePerSegment, None, dbIdToPeerMap)

            #
            # This will call sbin/gpsegstart.py
            #
            cmd = gp.GpSegStartCmd("remote segment starts on host '%s'" % hostName,
                                   self.__gpHome, segments,
                                   self.__gpVersion,
                                   mirroringModePreTransition,
                                   numContentsInCluster,
                                   era,
                                   self.master_checksum_value,
                                   self.__timeout,
                                   verbose=logging_is_verbose(),
                                   ctxt=base.REMOTE,
                                   remoteHost=segments[0].getSegmentAddress(),
                                   pickledTransitionData=pickledTransitionData,
                                   specialMode=self.__specialMode,
                                   wrapper=self.__wrapper,
                                   wrapper_args=self.__wrapper_args,
                                   parallel=self.__parallel,
                                   logfileDirectory=self.logfileDirectory)
            self.__workerPool.addCommand(cmd)

        if self.__quiet:
            self.__workerPool.join()
        else:
            base.join_and_indicate_progress(self.__workerPool)

        # process results
        self.__processStartOrConvertCommands(resultOut)
        self.__workerPool.empty_completed_items()
    def run_pg_rewind(self, rewindInfo):
        """
        Run pg_rewind for incremental recovery.
        """

        rewindFailedSegments = []
        # Run pg_rewind on all the targets
        for rewindSeg in list(rewindInfo.values()):
            # Do CHECKPOINT on source to force TimeLineID to be updated in pg_control.
            # pg_rewind wants that to make incremental recovery successful finally.
            self.__logger.debug(
                'Do CHECKPOINT on %s (port: %d) before running pg_rewind.' %
                (rewindSeg.sourceHostname, rewindSeg.sourcePort))
            dburl = dbconn.DbURL(hostname=rewindSeg.sourceHostname,
                                 port=rewindSeg.sourcePort,
                                 dbname='template1')
            conn = dbconn.connect(dburl, utility=True)
            dbconn.execSQL(conn, "CHECKPOINT")
            conn.close()

            # If the postmaster.pid still exists and another process
            # is actively using that pid, pg_rewind will fail when it
            # tries to start the failed segment in single-user
            # mode. It should be safe to remove the postmaster.pid
            # file since we do not expect the failed segment to be up.
            self.remove_postmaster_pid_from_remotehost(
                rewindSeg.targetSegment.getSegmentHostName(),
                rewindSeg.targetSegment.getSegmentDataDirectory())

            # Note the command name, we use the dbid later to
            # correlate the command results with GpMirrorToBuild
            # object.
            cmd = gp.SegmentRewind(
                'rewind dbid: %s' % rewindSeg.targetSegment.getSegmentDbId(),
                rewindSeg.targetSegment.getSegmentHostName(),
                rewindSeg.targetSegment.getSegmentDataDirectory(),
                rewindSeg.sourceHostname,
                rewindSeg.sourcePort,
                verbose=gplog.logging_is_verbose())
            self.__pool.addCommand(cmd)

        if self.__quiet:
            self.__pool.join()
        else:
            base.join_and_indicate_progress(self.__pool)

        for cmd in self.__pool.getCompletedItems():
            self.__logger.debug('pg_rewind results: %s' % cmd.results)
            if not cmd.was_successful():
                dbid = int(cmd.name.split(':')[1].strip())
                self.__logger.debug("%s failed" % cmd.name)
                self.__logger.warning(cmd.get_stdout())
                self.__logger.warning(
                    "Incremental recovery failed for dbid %d. You must use gprecoverseg -F to recover the segment."
                    % dbid)
                rewindFailedSegments.append(rewindInfo[dbid].targetSegment)

        self.__pool.empty_completed_items()

        return rewindFailedSegments
Exemple #4
0
    def __runStartCommand(self, segments, startMethod, numContentsInCluster, resultOut, gpArray, era):
        """
        Putt results into the resultOut object
        """

        if len(segments) == 0:
            return

        if startMethod == START_AS_PRIMARY_OR_MIRROR:
            logger.info("Commencing parallel primary and mirror segment instance startup, please wait...")
        else:
            logger.info("Commencing parallel segment instance startup, please wait...")

        dbIdToPeerMap = gpArray.getDbIdToPeerMap()

        mirroringModePreTransition = MIRROR_MODE_MIRRORLESS if startMethod == START_AS_MIRRORLESS else MIRROR_MODE_QUIESCENT

        # launch the start
        for hostName, segments in GpArray.getSegmentsByHostName(segments).items():
            logger.debug("Dispatching command to start segments on host: %s, " \
                            "with %s contents in cluster" % (hostName, numContentsInCluster))

            pickledTransitionData = None
            if startMethod == START_AS_PRIMARY_OR_MIRROR:
                mirroringModePerSegment = []
                for seg in segments:
                    modeThisSegment = MIRROR_MODE_PRIMARY if seg.isSegmentPrimary(True) else MIRROR_MODE_MIRROR
                    mirroringModePerSegment.append(modeThisSegment)
                pickledTransitionData = self.__createPickledTransitionParameters(segments, mirroringModePerSegment, None, dbIdToPeerMap)

            #
            # This will call sbin/gpsegstart.py
            #
            cmd = gp.GpSegStartCmd("remote segment starts on host '%s'" % hostName,
                                   self.__gpHome, segments,
                                   self.__gpVersion,
                                   mirroringModePreTransition,
                                   numContentsInCluster,
                                   era,
                                   self.master_checksum_value,
                                   self.__timeout,
                                   verbose=logging_is_verbose(),
                                   ctxt=base.REMOTE,
                                   remoteHost=segments[0].getSegmentAddress(),
                                   pickledTransitionData=pickledTransitionData,
                                   specialMode=self.__specialMode,
                                   wrapper=self.__wrapper,
                                   wrapper_args=self.__wrapper_args,
                                   parallel=self.__parallel,
                                   logfileDirectory=self.logfileDirectory)
            self.__workerPool.addCommand(cmd)

        if self.__quiet:
            self.__workerPool.join()
        else:
            base.join_and_indicate_progress(self.__workerPool)

        # process results
        self.__processStartOrConvertCommands(resultOut)
        self.__workerPool.empty_completed_items()
Exemple #5
0
    def __runWaitAndCheckWorkerPoolForErrorsAndClear(self, cmds, actionVerb, suppressErrorCheck=False):
        for cmd in cmds:
            self.__pool.addCommand(cmd)

        if self.__quiet:
            self.__pool.join()
        else:
            base.join_and_indicate_progress(self.__pool)

        if not suppressErrorCheck:
            self.__pool.check_results()
        self.__pool.empty_completed_items()
    def test_join_and_indicate_progress_flushes_every_dot(self):
        duration = 0.005

        cmd = mock.Mock(spec=Command)
        def wait_for_duration():
            time.sleep(duration)
        cmd.run.side_effect = wait_for_duration
        self.pool.addCommand(cmd)

        stdout = mock.Mock(spec=file)
        join_and_indicate_progress(self.pool, stdout, interval=(duration / 5))

        for i, call in enumerate(stdout.mock_calls):
            # Every written dot should be followed by a flush().
            if call == mock.call.write('.'):
                self.assertEqual(stdout.mock_calls[i + 1], mock.call.flush())
Exemple #7
0
    def __runWaitAndCheckWorkerPoolForErrorsAndClear(self, cmds, actionVerb, suppressErrorCheck=False,
                                                     progressCmds=[]):
        for cmd in cmds:
            self.__pool.addCommand(cmd)

        if self.__quiet:
            self.__pool.join()
        elif progressCmds:
            self._join_and_show_segment_progress(progressCmds,
                                                 inplace=self.__progressMode == GpMirrorListToBuild.Progress.INPLACE)
        else:
            base.join_and_indicate_progress(self.__pool)

        if not suppressErrorCheck:
            self.__pool.check_results()
        self.__pool.empty_completed_items()
Exemple #8
0
    def test_join_and_indicate_progress_prints_dots_until_pool_is_done(self):
        # To avoid false negatives from the race conditions here, let's set up a
        # situation where we'll print ten dots on average, and verify that there
        # were at least five dots printed.
        duration = 0.01

        cmd = mock.Mock(spec=Command)
        def wait_for_duration():
            time.sleep(duration)
        cmd.run.side_effect = wait_for_duration
        self.pool.addCommand(cmd)

        stdout = StringIO.StringIO()
        join_and_indicate_progress(self.pool, stdout, interval=(duration / 10))

        results = stdout.getvalue()
        self.assertIn('.....', results)
        self.assertTrue(results.endswith('\n'))
Exemple #9
0
    def test_join_and_indicate_progress_flushes_every_dot(self):
        duration = 0.005

        cmd = mock.Mock(spec=Command)

        def wait_for_duration():
            time.sleep(duration)

        cmd.run.side_effect = wait_for_duration
        self.pool.addCommand(cmd)

        stdout = mock.Mock(io.StringIO())
        join_and_indicate_progress(self.pool, stdout, interval=(duration / 5))

        for i, call in enumerate(stdout.mock_calls):
            # Every written dot should be followed by a flush().
            if call == mock.call.write('.'):
                self.assertEqual(stdout.mock_calls[i + 1], mock.call.flush())
    def test_join_and_indicate_progress_prints_dots_until_pool_is_done(self):
        # To avoid false negatives from the race conditions here, let's set up a
        # situation where we'll print ten dots on average, and verify that there
        # were at least five dots printed.
        duration = 0.01

        cmd = mock.Mock(spec=Command)

        def wait_for_duration():
            time.sleep(duration)

        cmd.run.side_effect = wait_for_duration
        self.pool.addCommand(cmd)

        stdout = StringIO.StringIO()
        join_and_indicate_progress(self.pool, stdout, interval=(duration / 10))

        results = stdout.getvalue()
        self.assertIn('.....', results)
        self.assertTrue(results.endswith('\n'))
Exemple #11
0
 def _do_setup_for_recovery(self, recovery_info_by_host):
     self.__logger.info('Setting up the required segments for recovery')
     cmds = []
     for host_name, recovery_info_list in recovery_info_by_host.items():
         cmds.append(
             gp.GpSegSetupRecovery(
                 'Run validation checks and setup data directories for recovery',
                 recoveryinfo.serialize_list(recovery_info_list),
                 gplog.get_logger_dir(),
                 verbose=gplog.logging_is_verbose(),
                 batchSize=self.__parallelPerHost,
                 remoteHost=host_name,
                 forceoverwrite=self.__forceoverwrite))
     for cmd in cmds:
         self.__pool.addCommand(cmd)
     if self.__quiet:
         self.__pool.join()
     else:
         base.join_and_indicate_progress(self.__pool)
     completed_results = self.__pool.getCompletedItems()
     self.__pool.empty_completed_items()
     return completed_results
Exemple #12
0
    def test_join_and_indicate_progress_prints_nothing_if_pool_is_done(self):
        stdout = StringIO.StringIO()
        join_and_indicate_progress(self.pool, stdout)

        self.assertEqual(stdout.getvalue(), '')
Exemple #13
0
 def tmain():
     join_and_indicate_progress(self.pool, write_end, interval=0.001)
     write_end.close()
Exemple #14
0
    def test_join_and_indicate_progress_prints_nothing_if_pool_is_done(self):
        stdout = io.StringIO()
        join_and_indicate_progress(self.pool, stdout)

        self.assertEqual(stdout.getvalue(), '')
Exemple #15
0
    def __copySegmentDirectories(self, gpEnv, gpArray, directives):
        """
        directives should be composed of GpCopySegmentDirectoryDirective values
        """
        if len(directives) == 0:
            return

        srcSegments = []
        destSegments = []
        isTargetReusedLocation = []
        timeStamp = datetime.datetime.today().strftime('%Y%m%d_%H%M%S')
        for directive in directives:
            srcSegment = directive.getSrcSegment()
            destSegment = directive.getDestSegment()
            destSegment.primaryHostname = srcSegment.getSegmentHostName()
            destSegment.primarySegmentPort = srcSegment.getSegmentPort()
            destSegment.progressFile = '%s/pg_basebackup.%s.dbid%s.out' % (
                gplog.get_logger_dir(), timeStamp,
                destSegment.getSegmentDbId())
            srcSegments.append(srcSegment)
            destSegments.append(destSegment)
            isTargetReusedLocation.append(directive.isTargetReusedLocation())

        destSegmentByHost = GpArray.getSegmentsByHostName(destSegments)
        newSegmentInfo = gp.ConfigureNewSegment.buildSegmentInfoForNewSegment(
            destSegments, isTargetReusedLocation)

        def createConfigureNewSegmentCommand(hostName, cmdLabel,
                                             validationOnly):
            segmentInfo = newSegmentInfo[hostName]
            checkNotNone("segmentInfo for %s" % hostName, segmentInfo)

            return gp.ConfigureNewSegment(cmdLabel,
                                          segmentInfo,
                                          gplog.get_logger_dir(),
                                          newSegments=True,
                                          verbose=gplog.logging_is_verbose(),
                                          batchSize=self.__parallelDegree,
                                          ctxt=gp.REMOTE,
                                          remoteHost=hostName,
                                          validationOnly=validationOnly,
                                          forceoverwrite=self.__forceoverwrite)

        #
        # validate directories for target segments
        #
        self.__logger.info('Validating remote directories')
        cmds = []
        for hostName in list(destSegmentByHost.keys()):
            cmds.append(
                createConfigureNewSegmentCommand(hostName,
                                                 'validate blank segments',
                                                 True))
        for cmd in cmds:
            self.__pool.addCommand(cmd)

        if self.__quiet:
            self.__pool.join()
        else:
            base.join_and_indicate_progress(self.__pool)

        validationErrors = []
        for item in self.__pool.getCompletedItems():
            results = item.get_results()
            if not results.wasSuccessful():
                if results.rc == 1:
                    # stdoutFromFailure = results.stdout.replace("\n", " ").strip()
                    lines = results.stderr.split("\n")
                    for line in lines:
                        if len(line.strip()) > 0:
                            validationErrors.append(
                                "Validation failure on host %s %s" %
                                (item.remoteHost, line))
                else:
                    validationErrors.append(str(item))
        self.__pool.empty_completed_items()
        if validationErrors:
            raise ExceptionNoStackTraceNeeded("\n" +
                                              ("\n".join(validationErrors)))

        # Configure a new segment
        #
        # Recover segments using gpconfigurenewsegment, which
        # uses pg_basebackup. gprecoverseg generates a log filename which is
        # passed to gpconfigurenewsegment as a confinfo parameter. gprecoverseg
        # tails this file to show recovery progress to the user, and removes the
        # file when one done. A new file is generated for each run of
        # gprecoverseg based on a timestamp.
        self.__logger.info('Configuring new segments')
        cmds = []
        progressCmds = []
        removeCmds = []
        for hostName in list(destSegmentByHost.keys()):
            for segment in destSegmentByHost[hostName]:
                progressCmd, removeCmd = self.__getProgressAndRemoveCmds(
                    segment.progressFile, segment.getSegmentDbId(), hostName)
                removeCmds.append(removeCmd)
                if progressCmd:
                    progressCmds.append(progressCmd)

            cmds.append(
                createConfigureNewSegmentCommand(hostName,
                                                 'configure blank segments',
                                                 False))

        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(
            cmds,
            "unpacking basic segment directory",
            suppressErrorCheck=False,
            progressCmds=progressCmds)

        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(
            removeCmds,
            "removing pg_basebackup progress logfiles",
            suppressErrorCheck=False)

        #
        # copy dump files from old segment to new segment
        #
        for srcSeg in srcSegments:
            for destSeg in destSegments:
                if srcSeg.content == destSeg.content:
                    src_dump_dir = os.path.join(
                        srcSeg.getSegmentDataDirectory(), 'db_dumps')
                    cmd = base.Command('check existence of db_dumps directory',
                                       'ls %s' % (src_dump_dir),
                                       ctxt=base.REMOTE,
                                       remoteHost=destSeg.getSegmentAddress())
                    cmd.run()
                    if cmd.results.rc == 0:  # Only try to copy directory if it exists
                        cmd = Scp(
                            'copy db_dumps from old segment to new segment',
                            os.path.join(srcSeg.getSegmentDataDirectory(),
                                         'db_dumps*', '*'),
                            os.path.join(destSeg.getSegmentDataDirectory(),
                                         'db_dumps'),
                            srcSeg.getSegmentAddress(),
                            destSeg.getSegmentAddress(),
                            recursive=True)
                        cmd.run(validateAfter=True)
                        break
Exemple #16
0
    def __copySegmentDirectories(self, gpEnv, gpArray, directives):
        """
        directives should be composed of GpCopySegmentDirectoryDirective values
        """
        if len(directives) == 0:
            return

        srcSegments = []
        destSegments = []
        isTargetReusedLocation = []
        timeStamp = datetime.datetime.today().strftime('%Y%m%d_%H%M%S')
        for directive in directives:
            srcSegment = directive.getSrcSegment()
            destSegment = directive.getDestSegment()
            destSegment.primaryHostname = srcSegment.getSegmentHostName()
            destSegment.primarySegmentPort = srcSegment.getSegmentPort()
            destSegment.progressFile = '%s/pg_basebackup.%s.dbid%s.out' % (gplog.get_logger_dir(),
                                                                           timeStamp,
                                                                           destSegment.getSegmentDbId())
            srcSegments.append(srcSegment)
            destSegments.append(destSegment)
            isTargetReusedLocation.append(directive.isTargetReusedLocation())

        destSegmentByHost = GpArray.getSegmentsByHostName(destSegments)
        newSegmentInfo = gp.ConfigureNewSegment.buildSegmentInfoForNewSegment(destSegments, isTargetReusedLocation)

        def createConfigureNewSegmentCommand(hostName, cmdLabel, validationOnly):
            segmentInfo = newSegmentInfo[hostName]
            checkNotNone("segmentInfo for %s" % hostName, segmentInfo)

            return gp.ConfigureNewSegment(cmdLabel,
                                          segmentInfo,
                                          gplog.get_logger_dir(),
                                          newSegments=True,
                                          verbose=gplog.logging_is_verbose(),
                                          batchSize=self.__parallelDegree,
                                          ctxt=gp.REMOTE,
                                          remoteHost=hostName,
                                          validationOnly=validationOnly,
                                          forceoverwrite=self.__forceoverwrite)
        #
        # validate directories for target segments
        #
        self.__logger.info('Validating remote directories')
        cmds = []
        for hostName in destSegmentByHost.keys():
            cmds.append(createConfigureNewSegmentCommand(hostName, 'validate blank segments', True))
        for cmd in cmds:
            self.__pool.addCommand(cmd)

        if self.__quiet:
            self.__pool.join()
        else:
            base.join_and_indicate_progress(self.__pool)

        validationErrors = []
        for item in self.__pool.getCompletedItems():
            results = item.get_results()
            if not results.wasSuccessful():
                if results.rc == 1:
                    # stdoutFromFailure = results.stdout.replace("\n", " ").strip()
                    lines = results.stderr.split("\n")
                    for line in lines:
                        if len(line.strip()) > 0:
                            validationErrors.append("Validation failure on host %s %s" % (item.remoteHost, line))
                else:
                    validationErrors.append(str(item))
        self.__pool.empty_completed_items()
        if validationErrors:
            raise ExceptionNoStackTraceNeeded("\n" + ("\n".join(validationErrors)))

        # Configure a new segment
        #
        # Recover segments using gpconfigurenewsegment, which
        # uses pg_basebackup. gprecoverseg generates a log filename which is
        # passed to gpconfigurenewsegment as a confinfo parameter. gprecoverseg
        # tails this file to show recovery progress to the user, and removes the
        # file when one done. A new file is generated for each run of
        # gprecoverseg based on a timestamp.
        #
        # There is race between when the pg_basebackup log file is created and
        # when the progress command is run. Thus, the progress command touches
        # the file to ensure its present before tailing.
        self.__logger.info('Configuring new segments')
        cmds = []
        progressCmds = []
        removeCmds= []
        for hostName in destSegmentByHost.keys():
            for segment in destSegmentByHost[hostName]:
                if self.__progressMode != GpMirrorListToBuild.Progress.NONE:
                    progressCmds.append(
                        GpMirrorListToBuild.ProgressCommand("tail the last line of the file",
                                       "set -o pipefail; touch -a {0}; tail -1 {0} | tr '\\r' '\\n' | tail -1".format(
                                           pipes.quote(segment.progressFile)),
                                       segment.getSegmentDbId(),
                                       segment.progressFile,
                                       ctxt=base.REMOTE,
                                       remoteHost=hostName))
                removeCmds.append(
                    base.Command("remove file",
                                 "rm -f %s" % pipes.quote(segment.progressFile),
                                 ctxt=base.REMOTE,
                                 remoteHost=hostName))

            cmds.append(
                createConfigureNewSegmentCommand(hostName, 'configure blank segments', False))

        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "unpacking basic segment directory",
                                                          suppressErrorCheck=False,
                                                          progressCmds=progressCmds)

        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(removeCmds, "removing pg_basebackup progress logfiles",
                                                          suppressErrorCheck=False)

        #
        # copy dump files from old segment to new segment
        #
        for srcSeg in srcSegments:
            for destSeg in destSegments:
                if srcSeg.content == destSeg.content:
                    src_dump_dir = os.path.join(srcSeg.getSegmentDataDirectory(), 'db_dumps')
                    cmd = base.Command('check existence of db_dumps directory', 'ls %s' % (src_dump_dir),
                                       ctxt=base.REMOTE, remoteHost=destSeg.getSegmentAddress())
                    cmd.run()
                    if cmd.results.rc == 0:  # Only try to copy directory if it exists
                        cmd = Scp('copy db_dumps from old segment to new segment',
                                  os.path.join(srcSeg.getSegmentDataDirectory(), 'db_dumps*', '*'),
                                  os.path.join(destSeg.getSegmentDataDirectory(), 'db_dumps'),
                                  srcSeg.getSegmentAddress(),
                                  destSeg.getSegmentAddress(),
                                  recursive=True)
                        cmd.run(validateAfter=True)
                        break
Exemple #17
0
    def rebalance(self):
        # Get the unbalanced primary segments grouped by hostname
        # These segments are what we will shutdown.
        self.logger.info("Getting unbalanced segments")
        unbalanced_primary_segs = GpArray.getSegmentsByHostName(
            self.gpArray.get_unbalanced_primary_segdbs())
        pool = base.WorkerPool()

        try:
            # Disable ctrl-c
            signal.signal(signal.SIGINT, signal.SIG_IGN)

            self.logger.info("Stopping unbalanced primary segments...")
            for hostname in unbalanced_primary_segs.keys():
                cmd = GpSegStopCmd("stop unbalanced primary segs",
                                   self.gpEnv.getGpHome(),
                                   self.gpEnv.getGpVersion(),
                                   'fast',
                                   unbalanced_primary_segs[hostname],
                                   ctxt=base.REMOTE,
                                   remoteHost=hostname,
                                   timeout=600)
                pool.addCommand(cmd)

            base.join_and_indicate_progress(pool)

            failed_count = 0
            completed = pool.getCompletedItems()
            for res in completed:
                if not res.get_results().wasSuccessful():
                    failed_count += 1

            allSegmentsStopped = (failed_count == 0)

            if not allSegmentsStopped:
                self.logger.warn(
                    "%d segments failed to stop.  A full rebalance of the")
                self.logger.warn(
                    "system is not possible at this time.  Please check the")
                self.logger.warn(
                    "log files, correct the problem, and run gprecoverseg -r")
                self.logger.warn("again.")
                self.logger.info(
                    "gprecoverseg will continue with a partial rebalance.")

            pool.empty_completed_items()
            segment_reconfigurer = SegmentReconfigurer(
                logger=self.logger,
                worker_pool=pool,
                timeout=MIRROR_PROMOTION_TIMEOUT)
            segment_reconfigurer.reconfigure()

            # Final step is to issue a recoverseg operation to resync segments
            self.logger.info("Starting segment synchronization")
            original_sys_args = sys.argv[:]
            try:
                self.logger.info(
                    "=============================START ANOTHER RECOVER========================================="
                )
                # import here because GpRecoverSegmentProgram and GpSegmentRebalanceOperation have a circular dependency
                from gppylib.programs.clsRecoverSegment import GpRecoverSegmentProgram
                sys.argv = ['gprecoverseg', '-a']
                local_parser = GpRecoverSegmentProgram.createParser()
                local_options, args = local_parser.parse_args()
                cmd = GpRecoverSegmentProgram.createProgram(
                    local_options, args)
                cmd.run()

            except SystemExit as e:
                if e.code != 0:
                    self.logger.error(
                        "Failed to start the synchronization step of the segment rebalance."
                    )
                    self.logger.error(
                        "Check the gprecoverseg log file, correct any problems, and re-run"
                    )
                    self.logger.error("'gprecoverseg -a'.")
                    raise Exception("Error synchronizing.\nError: %s" % str(e))
            finally:
                if cmd:
                    cmd.cleanup()
                sys.argv = original_sys_args
                self.logger.info(
                    "==============================END ANOTHER RECOVER=========================================="
                )

        except Exception, ex:
            raise ex
Exemple #18
0
 def tmain():
     join_and_indicate_progress(self.pool, write_end, interval=0.001)
     write_end.close()
Exemple #19
0
    def __copySegmentDirectories(self, gpEnv, gpArray, directives):
        """
        directives should be composed of GpCopySegmentDirectoryDirective values
        """
        if len(directives) == 0:
            return

        srcSegments = []
        destSegments = []
        isTargetReusedLocation = []
        for directive in directives:
            srcSegment = directive.getSrcSegment()
            destSegment = directive.getDestSegment()
            destSegment.primaryHostname = srcSegment.getSegmentHostName()
            destSegment.primarySegmentPort = srcSegment.getSegmentPort()

            srcSegments.append(srcSegment)
            destSegments.append(destSegment)
            isTargetReusedLocation.append(directive.isTargetReusedLocation())

        destSegmentByHost = GpArray.getSegmentsByHostName(destSegments)
        newSegmentInfo = gp.ConfigureNewSegment.buildSegmentInfoForNewSegment(destSegments, isTargetReusedLocation)

        def createConfigureNewSegmentCommand(hostName, cmdLabel, validationOnly):
            segmentInfo = newSegmentInfo[hostName]
            checkNotNone("segmentInfo for %s" % hostName, segmentInfo)

            return gp.ConfigureNewSegment(cmdLabel,
                                          segmentInfo,
                                          gplog.get_logger_dir(),
                                          newSegments=True,
                                          verbose=gplog.logging_is_verbose(),
                                          batchSize=self.__parallelDegree,
                                          ctxt=gp.REMOTE,
                                          remoteHost=hostName,
                                          validationOnly=validationOnly,
                                          forceoverwrite=self.__forceoverwrite)

        #
        # validate directories for target segments
        #
        self.__logger.info('Validating remote directories')
        cmds = []
        for hostName in destSegmentByHost.keys():
            cmds.append(createConfigureNewSegmentCommand(hostName, 'validate blank segments', True))
        for cmd in cmds:
            self.__pool.addCommand(cmd)

        if self.__quiet:
            self.__pool.join()
        else:
            base.join_and_indicate_progress(self.__pool)

        validationErrors = []
        for item in self.__pool.getCompletedItems():
            results = item.get_results()
            if not results.wasSuccessful():
                if results.rc == 1:
                    # stdoutFromFailure = results.stdout.replace("\n", " ").strip()
                    lines = results.stderr.split("\n")
                    for line in lines:
                        if len(line.strip()) > 0:
                            validationErrors.append("Validation failure on host %s %s" % (item.remoteHost, line))
                else:
                    validationErrors.append(str(item))
        self.__pool.empty_completed_items()
        if validationErrors:
            raise ExceptionNoStackTraceNeeded("\n" + ("\n".join(validationErrors)))

        #
        # unpack and configure new segments
        #
        self.__logger.info('Configuring new segments')
        cmds = []
        for hostName in destSegmentByHost.keys():
            cmds.append(createConfigureNewSegmentCommand(hostName, 'configure blank segments', False))
        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "unpacking basic segment directory")

        #
        # copy dump files from old segment to new segment
        #
        for srcSeg in srcSegments:
            for destSeg in destSegments:
                if srcSeg.content == destSeg.content:
                    src_dump_dir = os.path.join(srcSeg.getSegmentDataDirectory(), 'db_dumps')
                    cmd = base.Command('check existence of db_dumps directory', 'ls %s' % (src_dump_dir),
                                       ctxt=base.REMOTE, remoteHost=destSeg.getSegmentAddress())
                    cmd.run()
                    if cmd.results.rc == 0:  # Only try to copy directory if it exists
                        cmd = Scp('copy db_dumps from old segment to new segment',
                                  os.path.join(srcSeg.getSegmentDataDirectory(), 'db_dumps*', '*'),
                                  os.path.join(destSeg.getSegmentDataDirectory(), 'db_dumps'),
                                  srcSeg.getSegmentAddress(),
                                  destSeg.getSegmentAddress(),
                                  recursive=True)
                        cmd.run(validateAfter=True)
                        break
Exemple #20
0
    def rebalance(self):
        self.logger.info("Determining primary and mirror segment pairs to rebalance")

        # The current implementation of rebalance calls "gprecoverseg -a" below.
        # Thus, if another balanced pair is not synchronized, or has a down mirror
        # that pair will be recovered as a side-effect of rebalancing.
        unbalanced_primary_segs = []
        for segmentPair in self.gpArray.segmentPairs:
            if segmentPair.balanced():
                continue

            if segmentPair.up() and segmentPair.reachable() and segmentPair.synchronized():
                unbalanced_primary_segs.append(segmentPair.primaryDB)
            else:
                self.logger.warning(
                    "Not rebalancing primary segment dbid %d with its mirror dbid %d because one is either down, unreachable, or not synchronized" \
                    % (segmentPair.primaryDB.dbid, segmentPair.mirrorDB.dbid))

        if not len(unbalanced_primary_segs):
            self.logger.info("No segments to rebalance")
            return True

        unbalanced_primary_segs = GpArray.getSegmentsByHostName(unbalanced_primary_segs)

        pool = base.WorkerPool(min(len(unbalanced_primary_segs), self.batch_size))
        try:
            # Disable ctrl-c
            signal.signal(signal.SIGINT, signal.SIG_IGN)

            self.logger.info("Stopping unbalanced primary segments...")
            for hostname in list(unbalanced_primary_segs.keys()):
                cmd = GpSegStopCmd("stop unbalanced primary segs",
                                   self.gpEnv.getGpHome(),
                                   self.gpEnv.getGpVersion(),
                                   'fast',
                                   unbalanced_primary_segs[hostname],
                                   ctxt=base.REMOTE,
                                   remoteHost=hostname,
                                   timeout=600,
                                   segment_batch_size=self.segment_batch_size)
                pool.addCommand(cmd)

            base.join_and_indicate_progress(pool)
            
            failed_count = 0
            completed = pool.getCompletedItems()
            for res in completed:
                if not res.get_results().wasSuccessful():
                    failed_count += 1

            allSegmentsStopped = (failed_count == 0)

            if not allSegmentsStopped:
                self.logger.warn("%d segments failed to stop.  A full rebalance of the" % failed_count)
                self.logger.warn("system is not possible at this time.  Please check the")
                self.logger.warn("log files, correct the problem, and run gprecoverseg -r")
                self.logger.warn("again.")
                self.logger.info("gprecoverseg will continue with a partial rebalance.")

            pool.empty_completed_items()
            segment_reconfigurer = SegmentReconfigurer(logger=self.logger,
                    worker_pool=pool, timeout=MIRROR_PROMOTION_TIMEOUT)
            segment_reconfigurer.reconfigure()

            # Final step is to issue a recoverseg operation to resync segments
            self.logger.info("Starting segment synchronization")
            original_sys_args = sys.argv[:]
            self.logger.info("=============================START ANOTHER RECOVER=========================================")
            # import here because GpRecoverSegmentProgram and GpSegmentRebalanceOperation have a circular dependency
            from gppylib.programs.clsRecoverSegment import GpRecoverSegmentProgram
            cmd_args = ['gprecoverseg', '-a', '-B', str(self.batch_size), '-b', str(self.segment_batch_size)]
            sys.argv = cmd_args[:]
            local_parser = GpRecoverSegmentProgram.createParser()
            local_options, args = local_parser.parse_args()
            recover_cmd = GpRecoverSegmentProgram.createProgram(local_options, args)
            try:
                recover_cmd.run()
            except SystemExit as e:
                if e.code != 0:
                    self.logger.error("Failed to start the synchronization step of the segment rebalance.")
                    self.logger.error("Check the gprecoverseg log file, correct any problems, and re-run")
                    self.logger.error(' '.join(cmd_args))
                    raise Exception("Error synchronizing.\nError: %s" % str(e))
            finally:
                if recover_cmd:
                    recover_cmd.cleanup()
                sys.argv = original_sys_args
                self.logger.info("==============================END ANOTHER RECOVER==========================================")

        except Exception as ex:
            raise ex
        finally:
            pool.join()
            pool.haltWork()
            pool.joinWorkers()
            signal.signal(signal.SIGINT, signal.default_int_handler)

        return allSegmentsStopped # if all segments stopped, then a full rebalance was done
Exemple #21
0
    def rebalance(self):
        # Get the unbalanced primary segments grouped by hostname
        # These segments are what we will shutdown.
        self.logger.info("Getting unbalanced segments")
        unbalanced_primary_segs = GpArray.getSegmentsByHostName(self.gpArray.get_unbalanced_primary_segdbs())
        pool = base.WorkerPool()

        try:
            # Disable ctrl-c
            signal.signal(signal.SIGINT, signal.SIG_IGN)

            self.logger.info("Stopping unbalanced primary segments...")
            for hostname in unbalanced_primary_segs.keys():
                cmd = GpSegStopCmd("stop unbalanced primary segs",
                                   self.gpEnv.getGpHome(),
                                   self.gpEnv.getGpVersion(),
                                   'fast',
                                   unbalanced_primary_segs[hostname],
                                   ctxt=base.REMOTE,
                                   remoteHost=hostname,
                                   timeout=600)
                pool.addCommand(cmd)

            base.join_and_indicate_progress(pool)
            
            failed_count = 0
            completed = pool.getCompletedItems()
            for res in completed:
                if not res.get_results().wasSuccessful():
                    failed_count += 1

            allSegmentsStopped = (failed_count == 0)

            if not allSegmentsStopped:
                self.logger.warn("%d segments failed to stop.  A full rebalance of the")
                self.logger.warn("system is not possible at this time.  Please check the")
                self.logger.warn("log files, correct the problem, and run gprecoverseg -r")
                self.logger.warn("again.")
                self.logger.info("gprecoverseg will continue with a partial rebalance.")

            pool.empty_completed_items()
            segment_reconfigurer = SegmentReconfigurer(logger=self.logger,
                    worker_pool=pool, timeout=MIRROR_PROMOTION_TIMEOUT)
            segment_reconfigurer.reconfigure()

            # Final step is to issue a recoverseg operation to resync segments
            self.logger.info("Starting segment synchronization")
            original_sys_args = sys.argv[:]
            try:
                self.logger.info("=============================START ANOTHER RECOVER=========================================")
                # import here because GpRecoverSegmentProgram and GpSegmentRebalanceOperation have a circular dependency
                from gppylib.programs.clsRecoverSegment import GpRecoverSegmentProgram
                sys.argv = ['gprecoverseg', '-a']
                local_parser = GpRecoverSegmentProgram.createParser()
                local_options, args = local_parser.parse_args()
                cmd = GpRecoverSegmentProgram.createProgram(local_options, args)
                cmd.run()

            except SystemExit as e:
                if e.code != 0:
                    self.logger.error("Failed to start the synchronization step of the segment rebalance.")
                    self.logger.error("Check the gprecoverseg log file, correct any problems, and re-run")
                    self.logger.error("'gprecoverseg -a'.")
                    raise Exception("Error synchronizing.\nError: %s" % str(e))
            finally:
                if cmd:
                    cmd.cleanup()
                sys.argv = original_sys_args
                self.logger.info("==============================END ANOTHER RECOVER==========================================")

        except Exception, ex:
            raise ex