Esempio n. 1
0
    def _stop_failed_segments(self, gpEnv):
        failed_reachable_segments = self._get_failed_reachable_segments()
        if len(failed_reachable_segments) == 0:
            return

        self.__logger.info("Ensuring %d failed segment(s) are stopped" %
                           (len(failed_reachable_segments)))
        segments = self._get_running_postgres_segments(
            failed_reachable_segments)
        segmentByHost = GpArray.getSegmentsByHostName(segments)

        cmds = []
        for hostName, segments in segmentByHost.items():
            cmd = gp.GpSegStopCmd("remote segment stop on host '%s'" %
                                  hostName,
                                  gpEnv.getGpHome(),
                                  gpEnv.getGpVersion(),
                                  mode='fast',
                                  dbs=segments,
                                  verbose=gplog.logging_is_verbose(),
                                  ctxt=base.REMOTE,
                                  remoteHost=hostName,
                                  segment_batch_size=self.__parallelPerHost)

            cmds.append(cmd)

        # we suppress checking for the error.  This is because gpsegstop will actually error
        #  in many cases where the stop is actually done (that is, for example, the segment is
        #  running but slow to shutdown so gpsegstop errors after whacking it with a kill)
        #
        # Perhaps we should make it so that it so that is checks if the seg is running and only attempt stop
        #  if it's running?  In that case, we could propagate the error
        #
        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(
            cmds, suppressErrorCheck=True)
Esempio n. 2
0
    def __ensureStopped(self, gpEnv, directives):
        """

        @param directives a list of the GpStopSegmentDirectoryDirective values indicating which segments to stop

        """
        if len(directives) == 0:
            return

        self.__logger.info("Ensuring %d failed segment(s) are stopped" % (len(directives)))
        segments = [d.getSegment() for d in directives]
        segments = self._get_running_postgres_segments(segments)
        segmentByHost = GpArray.getSegmentsByHostName(segments)

        cmds = []
        for hostName, segments in segmentByHost.iteritems():
            cmd = gp.GpSegStopCmd("remote segment stop on host '%s'" % hostName,
                                  gpEnv.getGpHome(), gpEnv.getGpVersion(),
                                  mode='fast', dbs=segments, verbose=gplog.logging_is_verbose(),
                                  ctxt=base.REMOTE, remoteHost=hostName)

            cmds.append(cmd)

        # we suppress checking for the error.  This is because gpsegstop will actually error
        #  in many cases where the stop is actually done (that is, for example, the segment is
        #  running but slow to shutdown so gpsegstop errors after whacking it with a kill)
        #
        # Perhaps we should make it so that it so that is checks if the seg is running and only attempt stop
        #  if it's running?  In that case, we could propagate the error
        #
        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "stopping segments", suppressErrorCheck=True)
Esempio n. 3
0
    def _clean_up_failed_segments(self):
        segments_to_clean_up = []
        for toRecover in self.__mirrorsToBuild:
            is_in_place = toRecover.getFailedSegment(
            ) is not None and toRecover.getFailoverSegment() is None
            if is_in_place and toRecover.isFullSynchronization():
                segments_to_clean_up.append(toRecover.getFailedSegment())

        if len(segments_to_clean_up) == 0:
            return

        self.__logger.info("Cleaning files from %d segment(s)" %
                           (len(segments_to_clean_up)))
        segments_to_clean_up_by_host = GpArray.getSegmentsByHostName(
            segments_to_clean_up)

        cmds = []
        for hostName, segments_to_clean_up in segments_to_clean_up_by_host.items(
        ):
            cmds.append(
                gp.GpCleanSegmentDirectories(
                    "clean segment directories on %s" % hostName,
                    segments_to_clean_up, gp.REMOTE, hostName))

        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds)
Esempio n. 4
0
    def checkForPortAndDirectoryConflicts(self, gpArray):
        """
        Check gpArray for internal consistency -- no duplicate ports or directories on the same host, for example

        A detected problem causes an Exception to be raised
        """

        for hostName, segmentArr in GpArray.getSegmentsByHostName(
                gpArray.getDbList()).items():
            usedPorts = {}
            usedDataDirectories = {}
            for segment in segmentArr:

                # check for port conflict
                port = segment.getSegmentPort()
                dbid = segment.getSegmentDbId()
                if port in usedPorts:
                    raise Exception(
                        "Segment dbid's %s and %s on host %s cannot have the same port %s."
                        % (dbid, usedPorts.get(port), hostName, port))

                usedPorts[port] = dbid

                # check for directory conflict; could improve this by reporting nicer the conflicts
                path = segment.getSegmentDataDirectory()

                if path in usedDataDirectories:
                    raise Exception(
                        "Segment dbid's %s and %s on host %s cannot have the same data directory '%s'."
                        %
                        (dbid, usedDataDirectories.get(path), hostName, path))
                usedDataDirectories[path] = dbid
Esempio n. 5
0
    def checkForPortAndDirectoryConflicts(self, gpArray):
        """
        Check gpArray for internal consistency -- no duplicate ports or directories on the same host, for example

        A detected problem causes an Exception to be raised
        """

        for hostName, segmentArr in GpArray.getSegmentsByHostName(gpArray.getDbList()).iteritems():
            usedPorts = {}
            usedDataDirectories = {}
            for segment in segmentArr:

                # check for port conflict
                port = segment.getSegmentPort()
                dbid = segment.getSegmentDbId()
                if port in usedPorts:
                    raise Exception(
                        "Segment dbid's %s and %s on host %s cannot have the same port %s." %
                        (dbid, usedPorts.get(port), hostName, port))

                usedPorts[port] = dbid

                # check for directory conflict; could improve this by reporting nicer the conflicts
                path = segment.getSegmentDataDirectory()

                if path in usedDataDirectories:
                    raise Exception(
                        "Segment dbid's %s and %s on host %s cannot have the same data directory '%s'." %
                        (dbid, usedDataDirectories.get(path), hostName, path))
                usedDataDirectories[path] = dbid
Esempio n. 6
0
    def __ensureSharedMemCleaned(self, gpEnv, directives):
        """

        @param directives a list of the GpStopSegmentDirectoryDirective values indicating which segments to cleanup 

        """

        if len(directives) == 0:
            return

        logger.info(
            'Ensuring that shared memory is cleaned up for stopped segments')
        segments = [d.getSegment() for d in directives]
        segmentsByHost = GpArray.getSegmentsByHostName(segments)
        operation_list = [
            RemoteOperation(CleanSharedMem(segments), host=hostName)
            for hostName, segments in segmentsByHost.items()
        ]
        ParallelOperation(operation_list).run()

        for operation in operation_list:
            try:
                operation.get_ret()
            except Exception as e:
                logger.warning(
                    'Unable to clean up shared memory for stopped segments on host (%s)'
                    % operation.host)
Esempio n. 7
0
    def __runStartCommand(self, segments, startMethod, numContentsInCluster, resultOut, gpArray, era):
        """
        Putt results into the resultOut object
        """

        if len(segments) == 0:
            return

        if startMethod == START_AS_PRIMARY_OR_MIRROR:
            logger.info("Commencing parallel primary and mirror segment instance startup, please wait...")
        else:
            logger.info("Commencing parallel segment instance startup, please wait...")

        dbIdToPeerMap = gpArray.getDbIdToPeerMap()

        mirroringModePreTransition = MIRROR_MODE_MIRRORLESS if startMethod == START_AS_MIRRORLESS else MIRROR_MODE_QUIESCENT

        # launch the start
        for hostName, segments in GpArray.getSegmentsByHostName(segments).iteritems():
            logger.debug("Dispatching command to start segments on host: %s, " \
                            "with %s contents in cluster" % (hostName, numContentsInCluster))

            pickledTransitionData = None
            if startMethod == START_AS_PRIMARY_OR_MIRROR:
                mirroringModePerSegment = []
                for seg in segments:
                    modeThisSegment = MIRROR_MODE_PRIMARY if seg.isSegmentPrimary(True) else MIRROR_MODE_MIRROR
                    mirroringModePerSegment.append(modeThisSegment)
                pickledTransitionData = self.__createPickledTransitionParameters(segments, mirroringModePerSegment, None, dbIdToPeerMap)

            #
            # This will call sbin/gpsegstart.py
            #
            cmd = gp.GpSegStartCmd("remote segment starts on host '%s'" % hostName,
                                   self.__gpHome, segments,
                                   self.__gpVersion,
                                   mirroringModePreTransition,
                                   numContentsInCluster,
                                   era,
                                   self.master_checksum_value,
                                   self.__timeout,
                                   verbose=logging_is_verbose(),
                                   ctxt=base.REMOTE,
                                   remoteHost=segments[0].getSegmentAddress(),
                                   pickledTransitionData=pickledTransitionData,
                                   specialMode=self.__specialMode,
                                   wrapper=self.__wrapper,
                                   wrapper_args=self.__wrapper_args,
                                   parallel=self.__parallel,
                                   logfileDirectory=self.logfileDirectory)
            self.__workerPool.addCommand(cmd)

        if self.__quiet:
            self.__workerPool.join()
        else:
            base.join_and_indicate_progress(self.__workerPool)

        # process results
        self.__processStartOrConvertCommands(resultOut)
        self.__workerPool.empty_completed_items()
Esempio n. 8
0
    def __ensureStopped(self, gpEnv, directives):
        """

        @param directives a list of the GpStopSegmentDirectoryDirective values indicating which segments to stop

        """
        if len(directives) == 0:
            return

        logger.info("Ensuring %d failed segment(s) are stopped" % (len(directives)))
        segments = [d.getSegment() for d in directives]
        segments = self._get_running_postgres_segments(segments)
        segmentByHost = GpArray.getSegmentsByHostName(segments)

        cmds = []
        for hostName, segments in segmentByHost.iteritems():
            cmd=gp.GpSegStopCmd("remote segment stop on host '%s'" % hostName,
                                gpEnv.getGpHome(), gpEnv.getGpVersion(),
                                mode='fast', dbs=segments, verbose=logging_is_verbose(),
                                ctxt=base.REMOTE, remoteHost=hostName)

            cmds.append( cmd)

        # we suppress checking for the error.  This is because gpsegstop will actually error
        #  in many cases where the stop is actually done (that is, for example, the segment is
        #  running but slow to shutdown so gpsegstop errors after whacking it with a kill)
        #
        # Perhaps we should make it so that it so that is checks if the seg is running and only attempt stop
        #  if it's running?  In that case, we could propagate the error
        #
        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "stopping segments", suppressErrorCheck=True)
Esempio n. 9
0
    def __runStartCommand(self, segments, startMethod, numContentsInCluster, resultOut, gpArray, era):
        """
        Putt results into the resultOut object
        """

        if len(segments) == 0:
            return

        if startMethod == START_AS_PRIMARY_OR_MIRROR:
            logger.info("Commencing parallel primary and mirror segment instance startup, please wait...")
        else:
            logger.info("Commencing parallel segment instance startup, please wait...")

        dbIdToPeerMap = gpArray.getDbIdToPeerMap()

        mirroringModePreTransition = MIRROR_MODE_MIRRORLESS if startMethod == START_AS_MIRRORLESS else MIRROR_MODE_QUIESCENT

        # launch the start
        for hostName, segments in GpArray.getSegmentsByHostName(segments).items():
            logger.debug("Dispatching command to start segments on host: %s, " \
                            "with %s contents in cluster" % (hostName, numContentsInCluster))

            pickledTransitionData = None
            if startMethod == START_AS_PRIMARY_OR_MIRROR:
                mirroringModePerSegment = []
                for seg in segments:
                    modeThisSegment = MIRROR_MODE_PRIMARY if seg.isSegmentPrimary(True) else MIRROR_MODE_MIRROR
                    mirroringModePerSegment.append(modeThisSegment)
                pickledTransitionData = self.__createPickledTransitionParameters(segments, mirroringModePerSegment, None, dbIdToPeerMap)

            #
            # This will call sbin/gpsegstart.py
            #
            cmd = gp.GpSegStartCmd("remote segment starts on host '%s'" % hostName,
                                   self.__gpHome, segments,
                                   self.__gpVersion,
                                   mirroringModePreTransition,
                                   numContentsInCluster,
                                   era,
                                   self.master_checksum_value,
                                   self.__timeout,
                                   verbose=logging_is_verbose(),
                                   ctxt=base.REMOTE,
                                   remoteHost=segments[0].getSegmentAddress(),
                                   pickledTransitionData=pickledTransitionData,
                                   specialMode=self.__specialMode,
                                   wrapper=self.__wrapper,
                                   wrapper_args=self.__wrapper_args,
                                   parallel=self.__parallel,
                                   logfileDirectory=self.logfileDirectory)
            self.__workerPool.addCommand(cmd)

        if self.__quiet:
            self.__workerPool.join()
        else:
            base.join_and_indicate_progress(self.__workerPool)

        # process results
        self.__processStartOrConvertCommands(resultOut)
        self.__workerPool.empty_completed_items()
Esempio n. 10
0
    def checkForPortAndDirectoryConflicts(self, gpArray):
        """
        Check gpArray for internal consistency -- no duplicate ports or directories on the same host, for example

        A detected problem causes an Exception to be raised
        """

        for hostName, segmentArr in GpArray.getSegmentsByHostName(
                gpArray.getDbList()).iteritems():
            usedPorts = {}
            usedDataDirectories = {}
            for segment in segmentArr:

                # check for port conflict
                replicationPort = segment.getSegmentReplicationPort()
                port = segment.getSegmentPort()
                dbid = segment.getSegmentDbId()
                if port in usedPorts:
                    raise Exception(
                        "On host %s, port %s for segment with dbid %s conflicts with port for segment dbid %s"
                        % (hostName, port, dbid, usedPorts.get(port)))

                if segment.isSegmentQE():
                    if replicationPort is None:
                        raise Exception(
                            "On host %s, the replication port is not set for segment with dbid %s"
                            % (hostName, dbid))

                    if replicationPort in usedPorts:
                        raise Exception(
                            "On host %s, replication port %s for segment with dbid %s conflicts "
                            "with a port for segment dbid %s" %
                            (hostName, dbid, replicationPort,
                             usedPorts.get(replicationPort)))

                    if port == replicationPort:
                        raise Exception(
                            "On host %s, segment with dbid %s has equal port and replication port"
                            % (hostName, dbid))

                usedPorts[port] = dbid
                usedPorts[replicationPort] = dbid

                # check for directory conflict; could improve this by reporting nicer the conflicts
                paths = [
                    path
                    for oid, path in segment.getSegmentFilespaces().items()
                    if oid != gparray.SYSTEM_FILESPACE
                ]
                paths.append(segment.getSegmentDataDirectory())

                for path in paths:
                    if path in usedDataDirectories:
                        raise Exception(
                            "On host %s, directory (base or filespace) for segment with dbid %s conflicts with a "
                            "directory (base or filespace) for segment dbid %s; directory: %s"
                            % (hostName, dbid, usedDataDirectories.get(path),
                               path))
                    usedDataDirectories[path] = dbid
    def checkForPortAndDirectoryConflicts(self, gpArray):
        """
        Check gpArray for internal consistency -- no duplicate ports or directories on the same host, for example

        A detected problem causes an Exception to be raised
        """

        for hostName, segmentArr in GpArray.getSegmentsByHostName(gpArray.getDbList()).iteritems():
            usedPorts = {}
            usedDataDirectories = {}
            for segment in segmentArr:

                # check for port conflict
                replicationPort = segment.getSegmentReplicationPort()
                port = segment.getSegmentPort()
                dbid = segment.getSegmentDbId()
                if port in usedPorts:
                    raise Exception(
                        "On host %s, a port for segment with dbid %s conflicts with a port for segment dbid %s"
                        % (hostName, dbid, usedPorts.get(port))
                    )

                if segment.isSegmentQE():
                    if replicationPort is not None:
                        raise Exception(
                            "On host %s, the replication port is set for segment with dbid %s" % (hostName, dbid)
                        )

                    if replicationPort in usedPorts:
                        raise Exception(
                            "On host %s, a port for segment with dbid %s conflicts with a port for segment dbid %s"
                            % (hostName, dbid, usedPorts.get(replicationPort))
                        )

                    if port == replicationPort:
                        raise Exception(
                            "On host %s, segment with dbid %s has equal port and replication port" % (hostName, dbid)
                        )

                usedPorts[port] = dbid
                if replicationPort is not None:
                    usedPorts[replicationPort] = dbid

                # check for directory conflict; could improve this by reporting nicer the conflicts
                paths = [
                    path for oid, path in segment.getSegmentFilespaces().items() if oid != gparray.SYSTEM_FILESPACE
                ]
                paths.append(segment.getSegmentDataDirectory())

                for path in paths:
                    if path in usedDataDirectories and 0:
                        raise Exception(
                            "On host %s, directory (base or filespace) for segment with dbid %s conflicts with a "
                            "directory (base or filespace) for segment dbid %s; directory: %s"
                            % (hostName, dbid, usedDataDirectories.get(path), path)
                        )
                    usedDataDirectories[path] = dbid
Esempio n. 12
0
    def __sendPrimaryMirrorTransition(self, targetMode, segments,
                                      convertUsingFullResync, gpArray,
                                      resultOut):
        """
            @param segments the segments to convert
            @param convertUsingFullResync in parallel with segments, may be None, gives true/false for whether fullResync
                                          flag should be passed to the transition
        """

        if len(segments) == 0:
            logger.debug("%s conversion of zero segments...skipping" %
                         targetMode)
            return

        logger.info(
            "Commencing parallel %s conversion of %s segments, please wait..."
            % (targetMode, len(segments)))

        ###############################################
        # for each host, create + transfer the transition arguments file
        dispatchCount = 0

        dbIdToPeerMap = gpArray.getDbIdToPeerMap()
        segmentsByHostName = GpArray.getSegmentsByHostName(segments)
        for hostName, segments in segmentsByHostName.iteritems():
            assert len(segments) > 0

            logger.debug(
                "Dispatching command to convert segments on host: %s " %
                (hostName))

            targetModePerSegment = [targetMode for seg in segments]
            pickledParams = self.__createPickledTransitionParameters(
                segments, targetModePerSegment, convertUsingFullResync,
                dbIdToPeerMap)

            address = segments[0].getSegmentAddress()
            cmd = gp.GpSegChangeMirrorModeCmd(
                "remote segment mirror mode conversion on host '%s' using address '%s'"
                % (hostName, address),
                self.__gpHome,
                self.__localeData,
                self.__gpVersion,
                segments,
                targetMode,
                pickledParams,
                verbose=logging_is_verbose(),
                ctxt=base.REMOTE,
                remoteHost=address)
            self.__workerPool.addCommand(cmd)
            dispatchCount += 1
        self.__workerPool.wait_and_printdots(dispatchCount, self.__quiet)

        # process results
        self.__processStartOrConvertCommands(resultOut)
        self.__workerPool.empty_completed_items()
Esempio n. 13
0
    def __cleanUpSegmentDirectories(self, directives):
        if len(directives) == 0:
            return

        self.__logger.info("Cleaning files from %d segment(s)" % (len(directives)))
        segments = [d.getSegment() for d in directives]
        segmentByHost = GpArray.getSegmentsByHostName(segments)

        cmds = []
        for hostName, segments in segmentByHost.iteritems():
            cmds.append(gp.GpCleanSegmentDirectories("clean segment directories on %s" % hostName,
                                                     segments, gp.REMOTE, hostName))

        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "cleaning existing directories")
Esempio n. 14
0
    def __cleanUpSegmentDirectories(self, directives):
        if len(directives) == 0:
            return

        logger.info("Cleaning files from %d segment(s)" % (len(directives)))
        segments = [d.getSegment() for d in directives]
        segmentByHost = GpArray.getSegmentsByHostName(segments)

        cmds = []
        for hostName, segments in segmentByHost.iteritems():
            cmds.append( gp.GpCleanSegmentDirectories("clean segment directories on %s" % hostName, \
                    segments, gp.REMOTE, hostName))

        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "cleaning existing directories")
Esempio n. 15
0
    def __init__(self, gpArray):
        #
        # determine port information for recovering to a new host --
        #   we need to know the ports that are in use and the valid range of ports
        #
        segments = gpArray.getDbList()
        ports = [seg.getSegmentPort() for seg in segments if seg.isSegmentQE()]
        if len(ports) > 0:
            self.__minPort = min(ports)
        else:
            raise Exception("No segment ports found in array.")
        self.__usedPortsByHostName = {}

        byHost = GpArray.getSegmentsByHostName(segments)
        for hostName, segments in byHost.items():
            usedPorts = self.__usedPortsByHostName[hostName] = {}
            for seg in segments:
                usedPorts[seg.getSegmentPort()] = True
Esempio n. 16
0
    def __sendPrimaryMirrorTransition(self, targetMode, segments, convertUsingFullResync, gpArray, resultOut):
        """
            @param segments the segments to convert
            @param convertUsingFullResync in parallel with segments, may be None, gives true/false for whether fullResync
                                          flag should be passed to the transition
        """

        if len(segments) == 0:
            logger.debug("%s conversion of zero segments...skipping" % targetMode)
            return

        logger.info("Commencing parallel %s conversion of %s segments, please wait..." % (targetMode, len(segments)))

        ###############################################
        # for each host, create + transfer the transition arguments file
        dispatchCount=0

        dbIdToPeerMap = gpArray.getDbIdToPeerMap()
        segmentsByHostName = GpArray.getSegmentsByHostName(segments)
        for hostName, segments in segmentsByHostName.iteritems():
            assert len(segments) > 0

            logger.debug("Dispatching command to convert segments on host: %s " % (hostName))

            targetModePerSegment = [targetMode for seg in segments]
            pickledParams = self.__createPickledTransitionParameters(segments, targetModePerSegment,
                                        convertUsingFullResync, dbIdToPeerMap)

            address = segments[0].getSegmentAddress()
            cmd=gp.GpSegChangeMirrorModeCmd(
                    "remote segment mirror mode conversion on host '%s' using address '%s'" % (hostName, address),
                    self.__gpHome, self.__localeData, self.__gpVersion,
                    segments, targetMode, pickledParams, verbose=logging_is_verbose(),
                    ctxt=base.REMOTE,
                    remoteHost=address)
            self.__workerPool.addCommand(cmd)
            dispatchCount+=1
        self.__workerPool.wait_and_printdots(dispatchCount,self.__quiet)

        # process results
        self.__processStartOrConvertCommands(resultOut)
        self.__workerPool.empty_completed_items()
Esempio n. 17
0
    def __updateGpIdFile(self, gpEnv, gpArray, segments):
        segmentByHost = GpArray.getSegmentsByHostName(segments)
        newSegmentInfo = gp.ConfigureNewSegment.buildSegmentInfoForNewSegment(segments)

        cmds = []
        for hostName in segmentByHost.keys():
            segmentInfo = newSegmentInfo[hostName]
            checkNotNone("segmentInfo for %s" % hostName, segmentInfo)
            cmd = gp.ConfigureNewSegment("update gpid file",
                                            segmentInfo,
                                            newSegments=False,
                                            verbose=gplog.logging_is_verbose(),
                                            batchSize=self.__parallelDegree,
                                            ctxt=gp.REMOTE,
                                            remoteHost=hostName,
                                            validationOnly=False,
                                            writeGpIdFileOnly=True)

            cmds.append(cmd)
        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "writing updated gpid files")
Esempio n. 18
0
    def __updateGpIdFile(self, gpEnv, gpArray, segments):
        segmentByHost = GpArray.getSegmentsByHostName(segments)
        newSegmentInfo = gp.ConfigureNewSegment.buildSegmentInfoForNewSegment(segments)

        cmds = []
        for hostName in segmentByHost.keys():
            segmentInfo = newSegmentInfo[hostName]
            checkNotNone("segmentInfo for %s" % hostName, segmentInfo)
            cmd = gp.ConfigureNewSegment("update gpid file",
                                         segmentInfo,
                                         newSegments=False,
                                         verbose=gplog.logging_is_verbose(),
                                         batchSize=self.__parallelDegree,
                                         ctxt=gp.REMOTE,
                                         remoteHost=hostName,
                                         validationOnly=False,
                                         writeGpIdFileOnly=True)

            cmds.append(cmd)
        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "writing updated gpid files")
Esempio n. 19
0
    def __ensureSharedMemCleaned(self, gpEnv, directives):
        """

        @param directives a list of the GpStopSegmentDirectoryDirective values indicating which segments to cleanup 

        """

        if len(directives) == 0:
            return

        logger.info('Ensuring that shared memory is cleaned up for stopped segments')
        segments = [d.getSegment() for d in directives]
        segmentsByHost = GpArray.getSegmentsByHostName(segments)
        operation_list = [RemoteOperation(CleanSharedMem(segments), host=hostName) for hostName, segments in segmentsByHost.items()]
        ParallelOperation(operation_list).run()

        for operation in operation_list:
            try:
                operation.get_ret()
            except Exception as e:
                logger.warning('Unable to clean up shared memory for stopped segments on host (%s)' % operation.host)
Esempio n. 20
0
    def getTriplets(self):
        def _check_new_hosts():
            if len(self.newHosts) > len(failedSegments):
                self.interfaceHostnameWarnings.append(
                    "The following recovery hosts were not needed:")
                for h in self.newHosts[len(failedSegments):]:
                    self.interfaceHostnameWarnings.append("\t%s" % h)

            if len(self.newHosts) < len(failedSegments):
                raise Exception(
                    'Not enough new recovery hosts given for recovery.')

            unreachable_hosts = get_unreachable_segment_hosts(
                self.newHosts[:len(failedSegments)], len(failedSegments))
            if unreachable_hosts:
                raise ExceptionNoStackTraceNeeded(
                    "Cannot recover. The following recovery target hosts are "
                    "unreachable: %s" % unreachable_hosts)

        failedSegments = GpArray.getSegmentsByHostName([
            seg for seg in self.gpArray.getSegDbList() if seg.isSegmentDown()
        ])
        _check_new_hosts()

        requests = []
        for failedHost, failoverHost in zip(sorted(failedSegments.keys()),
                                            self.newHosts):
            for failed in failedSegments[failedHost]:
                failoverPort = self.portAssigner.findAndReservePort(
                    failoverHost, failoverHost)
                req = RecoveryTripletRequest(failed, failoverHost,
                                             failoverPort,
                                             failed.getSegmentDataDirectory(),
                                             True)
                requests.append(req)

        return self._convert_requests_to_triplets(requests)
Esempio n. 21
0
 def _sortedSegs(gparray):
     segs_by_host = GpArray.getSegmentsByHostName(gparray.getSegDbList())
     for host in segs_by_host:
         segs_by_host[host] = sorted(segs_by_host[host],
                                     key=lambda seg: seg.getSegmentDbId())
     return segs_by_host
Esempio n. 22
0
    def __copySegmentDirectories(self, gpEnv, gpArray, directives):
        """
        directives should be composed of GpCopySegmentDirectoryDirective values
        """
        if len(directives) == 0:
            return

        srcSegments = []
        destSegments = []
        isTargetReusedLocation = []
        timeStamp = datetime.datetime.today().strftime('%Y%m%d_%H%M%S')
        for directive in directives:
            srcSegment = directive.getSrcSegment()
            destSegment = directive.getDestSegment()
            destSegment.primaryHostname = srcSegment.getSegmentHostName()
            destSegment.primarySegmentPort = srcSegment.getSegmentPort()
            destSegment.progressFile = '%s/pg_basebackup.%s.dbid%s.out' % (gplog.get_logger_dir(),
                                                                           timeStamp,
                                                                           destSegment.getSegmentDbId())
            srcSegments.append(srcSegment)
            destSegments.append(destSegment)
            isTargetReusedLocation.append(directive.isTargetReusedLocation())

        destSegmentByHost = GpArray.getSegmentsByHostName(destSegments)
        newSegmentInfo = gp.ConfigureNewSegment.buildSegmentInfoForNewSegment(destSegments, isTargetReusedLocation)

        def createConfigureNewSegmentCommand(hostName, cmdLabel, validationOnly):
            segmentInfo = newSegmentInfo[hostName]
            checkNotNone("segmentInfo for %s" % hostName, segmentInfo)

            return gp.ConfigureNewSegment(cmdLabel,
                                          segmentInfo,
                                          gplog.get_logger_dir(),
                                          newSegments=True,
                                          verbose=gplog.logging_is_verbose(),
                                          batchSize=self.__parallelDegree,
                                          ctxt=gp.REMOTE,
                                          remoteHost=hostName,
                                          validationOnly=validationOnly,
                                          forceoverwrite=self.__forceoverwrite)
        #
        # validate directories for target segments
        #
        self.__logger.info('Validating remote directories')
        cmds = []
        for hostName in destSegmentByHost.keys():
            cmds.append(createConfigureNewSegmentCommand(hostName, 'validate blank segments', True))
        for cmd in cmds:
            self.__pool.addCommand(cmd)

        if self.__quiet:
            self.__pool.join()
        else:
            base.join_and_indicate_progress(self.__pool)

        validationErrors = []
        for item in self.__pool.getCompletedItems():
            results = item.get_results()
            if not results.wasSuccessful():
                if results.rc == 1:
                    # stdoutFromFailure = results.stdout.replace("\n", " ").strip()
                    lines = results.stderr.split("\n")
                    for line in lines:
                        if len(line.strip()) > 0:
                            validationErrors.append("Validation failure on host %s %s" % (item.remoteHost, line))
                else:
                    validationErrors.append(str(item))
        self.__pool.empty_completed_items()
        if validationErrors:
            raise ExceptionNoStackTraceNeeded("\n" + ("\n".join(validationErrors)))

        # Configure a new segment
        #
        # Recover segments using gpconfigurenewsegment, which
        # uses pg_basebackup. gprecoverseg generates a log filename which is
        # passed to gpconfigurenewsegment as a confinfo parameter. gprecoverseg
        # tails this file to show recovery progress to the user, and removes the
        # file when one done. A new file is generated for each run of
        # gprecoverseg based on a timestamp.
        #
        # There is race between when the pg_basebackup log file is created and
        # when the progress command is run. Thus, the progress command touches
        # the file to ensure its present before tailing.
        self.__logger.info('Configuring new segments')
        cmds = []
        progressCmds = []
        removeCmds= []
        for hostName in destSegmentByHost.keys():
            for segment in destSegmentByHost[hostName]:
                if self.__progressMode != GpMirrorListToBuild.Progress.NONE:
                    progressCmds.append(
                        GpMirrorListToBuild.ProgressCommand("tail the last line of the file",
                                       "set -o pipefail; touch -a {0}; tail -1 {0} | tr '\\r' '\\n' | tail -1".format(
                                           pipes.quote(segment.progressFile)),
                                       segment.getSegmentDbId(),
                                       segment.progressFile,
                                       ctxt=base.REMOTE,
                                       remoteHost=hostName))
                removeCmds.append(
                    base.Command("remove file",
                                 "rm -f %s" % pipes.quote(segment.progressFile),
                                 ctxt=base.REMOTE,
                                 remoteHost=hostName))

            cmds.append(
                createConfigureNewSegmentCommand(hostName, 'configure blank segments', False))

        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "unpacking basic segment directory",
                                                          suppressErrorCheck=False,
                                                          progressCmds=progressCmds)

        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(removeCmds, "removing pg_basebackup progress logfiles",
                                                          suppressErrorCheck=False)

        #
        # copy dump files from old segment to new segment
        #
        for srcSeg in srcSegments:
            for destSeg in destSegments:
                if srcSeg.content == destSeg.content:
                    src_dump_dir = os.path.join(srcSeg.getSegmentDataDirectory(), 'db_dumps')
                    cmd = base.Command('check existence of db_dumps directory', 'ls %s' % (src_dump_dir),
                                       ctxt=base.REMOTE, remoteHost=destSeg.getSegmentAddress())
                    cmd.run()
                    if cmd.results.rc == 0:  # Only try to copy directory if it exists
                        cmd = Scp('copy db_dumps from old segment to new segment',
                                  os.path.join(srcSeg.getSegmentDataDirectory(), 'db_dumps*', '*'),
                                  os.path.join(destSeg.getSegmentDataDirectory(), 'db_dumps'),
                                  srcSeg.getSegmentAddress(),
                                  destSeg.getSegmentAddress(),
                                  recursive=True)
                        cmd.run(validateAfter=True)
                        break
Esempio n. 23
0
    def __copySegmentDirectories(self, gpEnv, gpArray, directives):
        """
        directives should be composed of GpCopySegmentDirectoryDirective values
        """
        if len(directives) == 0:
            return

        srcSegments = [d.getSrcSegment() for d in directives]
        destSegments = [d.getDestSegment() for d in directives]
        isTargetReusedLocation = [
            d.isTargetReusedLocation() for d in directives
        ]
        destSegmentByHost = GpArray.getSegmentsByHostName(destSegments)
        newSegmentInfo = gp.ConfigureNewSegment.buildSegmentInfoForNewSegment(
            destSegments, isTargetReusedLocation)

        logger.info('Building template directory')
        (tempDir, blankTarFile,
         tarFileName) = self.__buildTarFileForTransfer(gpEnv, gpArray.master,
                                                       srcSegments[0],
                                                       destSegments)

        def createConfigureNewSegmentCommand(hostName, cmdLabel,
                                             validationOnly):
            segmentInfo = newSegmentInfo[hostName]
            checkNotNone("segmentInfo for %s" % hostName, segmentInfo)
            return gp.ConfigureNewSegment(cmdLabel,
                                          segmentInfo,
                                          tarFile=tarFileName,
                                          newSegments=True,
                                          verbose=gplog.logging_is_verbose(),
                                          batchSize=self.__parallelDegree,
                                          ctxt=gp.REMOTE,
                                          remoteHost=hostName,
                                          validationOnly=validationOnly)

        #
        # validate directories for target segments
        #
        logger.info('Validating remote directories')
        cmds = []
        for hostName in destSegmentByHost.keys():
            cmds.append(
                createConfigureNewSegmentCommand(hostName,
                                                 'validate blank segments',
                                                 True))
        for cmd in cmds:
            self.__pool.addCommand(cmd)
        self.__pool.wait_and_printdots(len(cmds), self.__quiet)
        validationErrors = []
        for item in self.__pool.getCompletedItems():
            results = item.get_results()
            if not results.wasSuccessful():
                if results.rc == 1:
                    # stdoutFromFailure = results.stdout.replace("\n", " ").strip()
                    lines = results.stderr.split("\n")
                    for line in lines:
                        if len(line.strip()) > 0:
                            validationErrors.append(
                                "Validation failure on host %s %s" %
                                (item.remoteHost, line))
                else:
                    validationErrors.append(str(item))
        self.__pool.empty_completed_items()
        if validationErrors:
            raise ExceptionNoStackTraceNeeded("\n" +
                                              ("\n".join(validationErrors)))

        #
        # copy tar from master to target hosts
        #
        logger.info('Copying template directory file')
        cmds = []
        for hostName in destSegmentByHost.keys():
            cmds.append(
                gp.RemoteCopy("copy segment tar", blankTarFile, hostName,
                              tarFileName))

        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(
            cmds, "building and transferring basic segment directory")

        #
        # unpack and configure new segments
        #
        logger.info('Configuring new segments')
        cmds = []
        for hostName in destSegmentByHost.keys():
            cmds.append(
                createConfigureNewSegmentCommand(hostName,
                                                 'configure blank segments',
                                                 False))
        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(
            cmds, "unpacking basic segment directory")

        #
        # copy dump files from old segment to new segment
        #
        for srcSeg in srcSegments:
            for destSeg in destSegments:
                if srcSeg.content == destSeg.content:
                    cmd = Scp('copy db_dumps from old segment to new segment',
                              os.path.join(srcSeg.getSegmentDataDirectory(),
                                           'db_dumps*', '*'),
                              os.path.join(destSeg.getSegmentDataDirectory(),
                                           'db_dumps'),
                              srcSeg.getSegmentAddress(),
                              destSeg.getSegmentAddress(),
                              recursive=True)
                    cmd.run(validateAfter=True)
                    break

        #
        # Clean up copied tar from each remote host
        #
        logger.info('Cleaning files')
        cmds = []
        for hostName, segments in destSegmentByHost.iteritems():
            cmds.append(
                unix.RemoveFiles('remove tar file',
                                 tarFileName,
                                 ctxt=gp.REMOTE,
                                 remoteHost=hostName))
        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(
            cmds, "cleaning up tar file on segment hosts")

        #
        # clean up the local temp directory
        #
        unix.RemoveFiles.local('remove temp directory', tempDir)
Esempio n. 24
0
    def __copySegmentDirectories(self, gpEnv, gpArray, directives):
        """
        directives should be composed of GpCopySegmentDirectoryDirective values
        """
        if len(directives) == 0:
            return

        srcSegments = []
        destSegments = []
        isTargetReusedLocation = []
        timeStamp = datetime.datetime.today().strftime('%Y%m%d_%H%M%S')
        for directive in directives:
            srcSegment = directive.getSrcSegment()
            destSegment = directive.getDestSegment()
            destSegment.primaryHostname = srcSegment.getSegmentHostName()
            destSegment.primarySegmentPort = srcSegment.getSegmentPort()
            destSegment.progressFile = '%s/pg_basebackup.%s.dbid%s.out' % (
                gplog.get_logger_dir(), timeStamp,
                destSegment.getSegmentDbId())
            srcSegments.append(srcSegment)
            destSegments.append(destSegment)
            isTargetReusedLocation.append(directive.isTargetReusedLocation())

        destSegmentByHost = GpArray.getSegmentsByHostName(destSegments)
        newSegmentInfo = gp.ConfigureNewSegment.buildSegmentInfoForNewSegment(
            destSegments, isTargetReusedLocation)

        def createConfigureNewSegmentCommand(hostName, cmdLabel,
                                             validationOnly):
            segmentInfo = newSegmentInfo[hostName]
            checkNotNone("segmentInfo for %s" % hostName, segmentInfo)

            return gp.ConfigureNewSegment(cmdLabel,
                                          segmentInfo,
                                          gplog.get_logger_dir(),
                                          newSegments=True,
                                          verbose=gplog.logging_is_verbose(),
                                          batchSize=self.__parallelDegree,
                                          ctxt=gp.REMOTE,
                                          remoteHost=hostName,
                                          validationOnly=validationOnly,
                                          forceoverwrite=self.__forceoverwrite)

        #
        # validate directories for target segments
        #
        self.__logger.info('Validating remote directories')
        cmds = []
        for hostName in list(destSegmentByHost.keys()):
            cmds.append(
                createConfigureNewSegmentCommand(hostName,
                                                 'validate blank segments',
                                                 True))
        for cmd in cmds:
            self.__pool.addCommand(cmd)

        if self.__quiet:
            self.__pool.join()
        else:
            base.join_and_indicate_progress(self.__pool)

        validationErrors = []
        for item in self.__pool.getCompletedItems():
            results = item.get_results()
            if not results.wasSuccessful():
                if results.rc == 1:
                    # stdoutFromFailure = results.stdout.replace("\n", " ").strip()
                    lines = results.stderr.split("\n")
                    for line in lines:
                        if len(line.strip()) > 0:
                            validationErrors.append(
                                "Validation failure on host %s %s" %
                                (item.remoteHost, line))
                else:
                    validationErrors.append(str(item))
        self.__pool.empty_completed_items()
        if validationErrors:
            raise ExceptionNoStackTraceNeeded("\n" +
                                              ("\n".join(validationErrors)))

        # Configure a new segment
        #
        # Recover segments using gpconfigurenewsegment, which
        # uses pg_basebackup. gprecoverseg generates a log filename which is
        # passed to gpconfigurenewsegment as a confinfo parameter. gprecoverseg
        # tails this file to show recovery progress to the user, and removes the
        # file when one done. A new file is generated for each run of
        # gprecoverseg based on a timestamp.
        self.__logger.info('Configuring new segments')
        cmds = []
        progressCmds = []
        removeCmds = []
        for hostName in list(destSegmentByHost.keys()):
            for segment in destSegmentByHost[hostName]:
                progressCmd, removeCmd = self.__getProgressAndRemoveCmds(
                    segment.progressFile, segment.getSegmentDbId(), hostName)
                removeCmds.append(removeCmd)
                if progressCmd:
                    progressCmds.append(progressCmd)

            cmds.append(
                createConfigureNewSegmentCommand(hostName,
                                                 'configure blank segments',
                                                 False))

        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(
            cmds,
            "unpacking basic segment directory",
            suppressErrorCheck=False,
            progressCmds=progressCmds)

        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(
            removeCmds,
            "removing pg_basebackup progress logfiles",
            suppressErrorCheck=False)

        #
        # copy dump files from old segment to new segment
        #
        for srcSeg in srcSegments:
            for destSeg in destSegments:
                if srcSeg.content == destSeg.content:
                    src_dump_dir = os.path.join(
                        srcSeg.getSegmentDataDirectory(), 'db_dumps')
                    cmd = base.Command('check existence of db_dumps directory',
                                       'ls %s' % (src_dump_dir),
                                       ctxt=base.REMOTE,
                                       remoteHost=destSeg.getSegmentAddress())
                    cmd.run()
                    if cmd.results.rc == 0:  # Only try to copy directory if it exists
                        cmd = Scp(
                            'copy db_dumps from old segment to new segment',
                            os.path.join(srcSeg.getSegmentDataDirectory(),
                                         'db_dumps*', '*'),
                            os.path.join(destSeg.getSegmentDataDirectory(),
                                         'db_dumps'),
                            srcSeg.getSegmentAddress(),
                            destSeg.getSegmentAddress(),
                            recursive=True)
                        cmd.run(validateAfter=True)
                        break
Esempio n. 25
0
    def __copySegmentDirectories(self, gpEnv, gpArray, directives):
        """
        directives should be composed of GpCopySegmentDirectoryDirective values
        """
        if len(directives) == 0:
            return

        srcSegments = []
        destSegments = []
        isTargetReusedLocation = []
        for directive in directives:
            srcSegment = directive.getSrcSegment()
            destSegment = directive.getDestSegment()
            destSegment.primaryHostname = srcSegment.getSegmentHostName()
            destSegment.primarySegmentPort = srcSegment.getSegmentPort()

            srcSegments.append(srcSegment)
            destSegments.append(destSegment)
            isTargetReusedLocation.append(directive.isTargetReusedLocation())

        destSegmentByHost = GpArray.getSegmentsByHostName(destSegments)
        newSegmentInfo = gp.ConfigureNewSegment.buildSegmentInfoForNewSegment(destSegments, isTargetReusedLocation)

        def createConfigureNewSegmentCommand(hostName, cmdLabel, validationOnly):
            segmentInfo = newSegmentInfo[hostName]
            checkNotNone("segmentInfo for %s" % hostName, segmentInfo)

            return gp.ConfigureNewSegment(cmdLabel,
                                          segmentInfo,
                                          gplog.get_logger_dir(),
                                          newSegments=True,
                                          verbose=gplog.logging_is_verbose(),
                                          batchSize=self.__parallelDegree,
                                          ctxt=gp.REMOTE,
                                          remoteHost=hostName,
                                          validationOnly=validationOnly,
                                          forceoverwrite=self.__forceoverwrite)

        #
        # validate directories for target segments
        #
        self.__logger.info('Validating remote directories')
        cmds = []
        for hostName in destSegmentByHost.keys():
            cmds.append(createConfigureNewSegmentCommand(hostName, 'validate blank segments', True))
        for cmd in cmds:
            self.__pool.addCommand(cmd)

        if self.__quiet:
            self.__pool.join()
        else:
            base.join_and_indicate_progress(self.__pool)

        validationErrors = []
        for item in self.__pool.getCompletedItems():
            results = item.get_results()
            if not results.wasSuccessful():
                if results.rc == 1:
                    # stdoutFromFailure = results.stdout.replace("\n", " ").strip()
                    lines = results.stderr.split("\n")
                    for line in lines:
                        if len(line.strip()) > 0:
                            validationErrors.append("Validation failure on host %s %s" % (item.remoteHost, line))
                else:
                    validationErrors.append(str(item))
        self.__pool.empty_completed_items()
        if validationErrors:
            raise ExceptionNoStackTraceNeeded("\n" + ("\n".join(validationErrors)))

        #
        # unpack and configure new segments
        #
        self.__logger.info('Configuring new segments')
        cmds = []
        for hostName in destSegmentByHost.keys():
            cmds.append(createConfigureNewSegmentCommand(hostName, 'configure blank segments', False))
        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "unpacking basic segment directory")

        #
        # copy dump files from old segment to new segment
        #
        for srcSeg in srcSegments:
            for destSeg in destSegments:
                if srcSeg.content == destSeg.content:
                    src_dump_dir = os.path.join(srcSeg.getSegmentDataDirectory(), 'db_dumps')
                    cmd = base.Command('check existence of db_dumps directory', 'ls %s' % (src_dump_dir),
                                       ctxt=base.REMOTE, remoteHost=destSeg.getSegmentAddress())
                    cmd.run()
                    if cmd.results.rc == 0:  # Only try to copy directory if it exists
                        cmd = Scp('copy db_dumps from old segment to new segment',
                                  os.path.join(srcSeg.getSegmentDataDirectory(), 'db_dumps*', '*'),
                                  os.path.join(destSeg.getSegmentDataDirectory(), 'db_dumps'),
                                  srcSeg.getSegmentAddress(),
                                  destSeg.getSegmentAddress(),
                                  recursive=True)
                        cmd.run(validateAfter=True)
                        break
Esempio n. 26
0
    def rebalance(self):
        # Get the unbalanced primary segments grouped by hostname
        # These segments are what we will shutdown.
        logger.info("Getting unbalanced segments")
        unbalanced_primary_segs = GpArray.getSegmentsByHostName(
            self.gpArray.get_unbalanced_primary_segdbs())
        pool = WorkerPool()

        count = 0

        try:
            # Disable ctrl-c
            signal.signal(signal.SIGINT, signal.SIG_IGN)

            logger.info("Stopping unbalanced primary segments...")
            for hostname in unbalanced_primary_segs.keys():
                cmd = GpSegStopCmd("stop unbalanced primary segs",
                                   self.gpEnv.getGpHome(),
                                   self.gpEnv.getGpVersion(),
                                   'fast',
                                   unbalanced_primary_segs[hostname],
                                   ctxt=REMOTE,
                                   remoteHost=hostname,
                                   timeout=600)
                pool.addCommand(cmd)
                count += 1

            pool.wait_and_printdots(count, False)

            failed_count = 0
            completed = pool.getCompletedItems()
            for res in completed:
                if not res.get_results().wasSuccessful():
                    failed_count += 1

            if failed_count > 0:
                logger.warn(
                    "%d segments failed to stop.  A full rebalance of the")
                logger.warn(
                    "system is not possible at this time.  Please check the")
                logger.warn(
                    "log files, correct the problem, and run gprecoverseg -r")
                logger.warn("again.")
                logger.info(
                    "gprecoverseg will continue with a partial rebalance.")

            pool.empty_completed_items()
            # issue a distributed query to make sure we pick up the fault
            # that we just caused by shutting down segments
            conn = None
            try:
                logger.info("Triggering segment reconfiguration")
                dburl = dbconn.DbURL()
                conn = dbconn.connect(dburl)
                cmd = ReconfigDetectionSQLQueryCommand(conn)
                pool.addCommand(cmd)
                pool.wait_and_printdots(1, False)
            except Exception:
                # This exception is expected
                pass
            finally:
                if conn:
                    conn.close()

            # Final step is to issue a recoverseg operation to resync segments
            logger.info("Starting segment synchronization")
            cmd = GpRecoverseg("rebalance recoverseg")
            pool.addCommand(cmd)
            pool.wait_and_printdots(1, False)
        except Exception, ex:
            raise ex
Esempio n. 27
0
    def rebalance(self):
        self.logger.info("Determining primary and mirror segment pairs to rebalance")

        # The current implementation of rebalance calls "gprecoverseg -a" below.
        # Thus, if another balanced pair is not synchronized, or has a down mirror
        # that pair will be recovered as a side-effect of rebalancing.
        unbalanced_primary_segs = []
        for segmentPair in self.gpArray.segmentPairs:
            if segmentPair.balanced():
                continue

            if segmentPair.up() and segmentPair.reachable() and segmentPair.synchronized():
                unbalanced_primary_segs.append(segmentPair.primaryDB)
            else:
                self.logger.warning(
                    "Not rebalancing primary segment dbid %d with its mirror dbid %d because one is either down, unreachable, or not synchronized" \
                    % (segmentPair.primaryDB.dbid, segmentPair.mirrorDB.dbid))

        if not len(unbalanced_primary_segs):
            self.logger.info("No segments to rebalance")
            return True

        unbalanced_primary_segs = GpArray.getSegmentsByHostName(unbalanced_primary_segs)

        pool = base.WorkerPool(min(len(unbalanced_primary_segs), self.batch_size))
        try:
            # Disable ctrl-c
            signal.signal(signal.SIGINT, signal.SIG_IGN)

            self.logger.info("Stopping unbalanced primary segments...")
            for hostname in list(unbalanced_primary_segs.keys()):
                cmd = GpSegStopCmd("stop unbalanced primary segs",
                                   self.gpEnv.getGpHome(),
                                   self.gpEnv.getGpVersion(),
                                   'fast',
                                   unbalanced_primary_segs[hostname],
                                   ctxt=base.REMOTE,
                                   remoteHost=hostname,
                                   timeout=600,
                                   segment_batch_size=self.segment_batch_size)
                pool.addCommand(cmd)

            base.join_and_indicate_progress(pool)
            
            failed_count = 0
            completed = pool.getCompletedItems()
            for res in completed:
                if not res.get_results().wasSuccessful():
                    failed_count += 1

            allSegmentsStopped = (failed_count == 0)

            if not allSegmentsStopped:
                self.logger.warn("%d segments failed to stop.  A full rebalance of the" % failed_count)
                self.logger.warn("system is not possible at this time.  Please check the")
                self.logger.warn("log files, correct the problem, and run gprecoverseg -r")
                self.logger.warn("again.")
                self.logger.info("gprecoverseg will continue with a partial rebalance.")

            pool.empty_completed_items()
            segment_reconfigurer = SegmentReconfigurer(logger=self.logger,
                    worker_pool=pool, timeout=MIRROR_PROMOTION_TIMEOUT)
            segment_reconfigurer.reconfigure()

            # Final step is to issue a recoverseg operation to resync segments
            self.logger.info("Starting segment synchronization")
            original_sys_args = sys.argv[:]
            self.logger.info("=============================START ANOTHER RECOVER=========================================")
            # import here because GpRecoverSegmentProgram and GpSegmentRebalanceOperation have a circular dependency
            from gppylib.programs.clsRecoverSegment import GpRecoverSegmentProgram
            cmd_args = ['gprecoverseg', '-a', '-B', str(self.batch_size), '-b', str(self.segment_batch_size)]
            sys.argv = cmd_args[:]
            local_parser = GpRecoverSegmentProgram.createParser()
            local_options, args = local_parser.parse_args()
            recover_cmd = GpRecoverSegmentProgram.createProgram(local_options, args)
            try:
                recover_cmd.run()
            except SystemExit as e:
                if e.code != 0:
                    self.logger.error("Failed to start the synchronization step of the segment rebalance.")
                    self.logger.error("Check the gprecoverseg log file, correct any problems, and re-run")
                    self.logger.error(' '.join(cmd_args))
                    raise Exception("Error synchronizing.\nError: %s" % str(e))
            finally:
                if recover_cmd:
                    recover_cmd.cleanup()
                sys.argv = original_sys_args
                self.logger.info("==============================END ANOTHER RECOVER==========================================")

        except Exception as ex:
            raise ex
        finally:
            pool.join()
            pool.haltWork()
            pool.joinWorkers()
            signal.signal(signal.SIGINT, signal.default_int_handler)

        return allSegmentsStopped # if all segments stopped, then a full rebalance was done
    def __copySegmentDirectories(self, gpEnv, gpArray, directives):
        """
        directives should be composed of GpCopySegmentDirectoryDirective values
        """
        if len(directives) == 0:
            return

        srcSegments = [d.getSrcSegment() for d in directives]
        destSegments = [d.getDestSegment() for d in directives]
        isTargetReusedLocation = [d.isTargetReusedLocation() for d in directives]
        destSegmentByHost = GpArray.getSegmentsByHostName(destSegments)
        newSegmentInfo = gp.ConfigureNewSegment.buildSegmentInfoForNewSegment(destSegments, isTargetReusedLocation)

        logger.info("Building template directory")
        # In GPSQL, we need to create a template and copy it to all of failed segments.
        if gpArray.getFaultStrategy() == gparray.FAULT_STRATEGY_NONE:
            tempDir = "/tmp/GPSQL"
            templateDir = tempDir + "/gpsql_template" + time.strftime("%Y%m%d_%H%M%S")
            unix.MakeDirectory("create blank directory for segment", templateDir).run(validateAfter=True)
            unix.Chmod.local("set permissions on template dir", templateDir, "0700")  # set perms so postgres can start

            logger.info("Creating template")
            srcSegments[0].createTemplate(templateDir)

            # Don't need log files and gpperfmon files in template.
            rmCmd = unix.RemoveFiles(
                "gprecoverseg remove gppermfon data from template", templateDir + "/gpperfmon/data"
            )
            rmCmd.run(validateAfter=True)
            rmCmd = unix.RemoveFiles("gprecoverseg remove logs from template", templateDir + "/pg_log/*")
            rmCmd.run(validateAfter=True)

            # other files not needed
            rmCmd = unix.RemoveFiles(
                "gprecoverseg remove postmaster.opt from template", templateDir + "/postmaster.opts"
            )
            rmCmd.run(validateAfter=True)
            rmCmd = unix.RemoveFiles(
                "gprecoverseg remove postmaster.pid from template", templateDir + "/postmaster.pid"
            )
            rmCmd.run(validateAfter=True)

            # template the temporary directories file
            template_temporary_directories(templateDir, srcSegments[0].content)

            tarFileName = "gpsqlSegmentTemplate.tar"
            blankTarFile = tempDir + "/" + tarFileName
            cmd = gp.CreateTar("gpbuildingmirrorsegment tar segment template", templateDir, blankTarFile)
            cmd.run(validateAfter=True)

        def createConfigureNewSegmentCommand(hostName, cmdLabel, validationOnly):
            segmentInfo = newSegmentInfo[hostName]
            checkNotNone("segmentInfo for %s" % hostName, segmentInfo)
            return gp.ConfigureNewSegment(
                cmdLabel,
                segmentInfo,
                tarFile=tarFileName,
                newSegments=True,
                verbose=gplog.logging_is_verbose(),
                batchSize=self.__parallelDegree,
                ctxt=gp.REMOTE,
                remoteHost=hostName,
                validationOnly=validationOnly,
            )

        #
        # validate directories for target segments
        #
        logger.info("Validating remote directories")
        cmds = []
        for hostName in destSegmentByHost.keys():
            cmds.append(createConfigureNewSegmentCommand(hostName, "validate blank segments", True))
        for cmd in cmds:
            self.__pool.addCommand(cmd)
        self.__pool.wait_and_printdots(len(cmds), self.__quiet)
        validationErrors = []
        for item in self.__pool.getCompletedItems():
            results = item.get_results()
            if not results.wasSuccessful():
                if results.rc == 1:
                    # stdoutFromFailure = results.stdout.replace("\n", " ").strip()
                    lines = results.stderr.split("\n")
                    for line in lines:
                        if len(line.strip()) > 0:
                            validationErrors.append("Validation failure on host %s %s" % (item.remoteHost, line))
                else:
                    validationErrors.append(str(item))
        self.__pool.empty_completed_items()
        if validationErrors:
            raise ExceptionNoStackTraceNeeded("\n" + ("\n".join(validationErrors)))

        #
        # copy tar from master to target hosts
        #
        logger.info("Copying template directory file")
        cmds = []
        for hostName in destSegmentByHost.keys():
            cmds.append(gp.RemoteCopy("copy segment tar", blankTarFile, hostName, tarFileName))

        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "building and transferring basic segment directory")

        #
        # unpack and configure new segments
        #
        logger.info("Configuring new segments")
        cmds = []
        for hostName in destSegmentByHost.keys():
            cmds.append(createConfigureNewSegmentCommand(hostName, "configure blank segments", False))
        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "unpacking basic segment directory")

        #
        # Clean up copied tar from each remote host
        #
        logger.info("Cleaning files")
        cmds = []
        for hostName, segments in destSegmentByHost.iteritems():
            cmds.append(unix.RemoveFiles("remove tar file", tarFileName, ctxt=gp.REMOTE, remoteHost=hostName))
        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "cleaning up tar file on segment hosts")

        #
        # clean up the local temp directory
        #
        unix.RemoveFiles.local("remove temp directory", tempDir)
Esempio n. 29
0
    def __copySegmentDirectories(self, gpEnv, gpArray, directives):
        """
        directives should be composed of GpCopySegmentDirectoryDirective values
        """
        if len(directives) == 0:
            return

        srcSegments = [d.getSrcSegment() for d in directives]
        destSegments = [d.getDestSegment() for d in directives]
        isTargetReusedLocation = [d.isTargetReusedLocation() for d in directives]
        destSegmentByHost = GpArray.getSegmentsByHostName(destSegments)
        newSegmentInfo = gp.ConfigureNewSegment.buildSegmentInfoForNewSegment(destSegments, isTargetReusedLocation)

        logger.info('Building template directory')
        (tempDir, blankTarFile, tarFileName) = self.__buildTarFileForTransfer(gpEnv, gpArray.master, srcSegments[0], destSegments)

        def createConfigureNewSegmentCommand(hostName, cmdLabel, validationOnly):
            segmentInfo = newSegmentInfo[hostName]
            checkNotNone("segmentInfo for %s" % hostName, segmentInfo)
            return gp.ConfigureNewSegment(cmdLabel,
                                            segmentInfo,
                                            tarFile=tarFileName,
                                            newSegments=True,
                                            verbose=gplog.logging_is_verbose(),
                                            batchSize=self.__parallelDegree,
                                            ctxt=gp.REMOTE,
                                            remoteHost=hostName,
                                            validationOnly=validationOnly)
        #
        # validate directories for target segments
        #
        logger.info('Validating remote directories')
        cmds = []
        for hostName in destSegmentByHost.keys():
            cmds.append(createConfigureNewSegmentCommand(hostName, 'validate blank segments', True))
        for cmd in cmds:
            self.__pool.addCommand(cmd)
        self.__pool.wait_and_printdots(len(cmds), self.__quiet)
        validationErrors = []
        for item in self.__pool.getCompletedItems():
            results = item.get_results()
            if not results.wasSuccessful():
                if results.rc == 1:
                    # stdoutFromFailure = results.stdout.replace("\n", " ").strip()
                    lines = results.stderr.split("\n")
                    for line in lines:
                        if len(line.strip()) > 0:
                            validationErrors.append("Validation failure on host %s %s" % (item.remoteHost, line))
                else:
                    validationErrors.append(str(item))
        self.__pool.empty_completed_items()
        if validationErrors:
            raise ExceptionNoStackTraceNeeded("\n" + ("\n".join(validationErrors)))

        #
        # copy tar from master to target hosts
        #
        logger.info('Copying template directory file')
        cmds = []
        for hostName in destSegmentByHost.keys():
            cmds.append( gp.RemoteCopy("copy segment tar", blankTarFile, hostName, tarFileName ))

        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "building and transferring basic segment directory")

        #
        # unpack and configure new segments
        #
        logger.info('Configuring new segments')
        cmds = []
        for hostName in destSegmentByHost.keys():
            cmds.append(createConfigureNewSegmentCommand(hostName, 'configure blank segments', False))
        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "unpacking basic segment directory")

        #
        # Clean up copied tar from each remote host
        #
        logger.info('Cleaning files')
        cmds = []
        for hostName, segments in destSegmentByHost.iteritems():
            cmds.append(unix.RemoveFiles('remove tar file', tarFileName, ctxt=gp.REMOTE, remoteHost=hostName))
        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "cleaning up tar file on segment hosts")

        #
        # clean up the local temp directory
        #
        unix.RemoveFiles.local('remove temp directory', tempDir)
Esempio n. 30
0
    def __copySegmentDirectories(self, gpEnv, gpArray, directives):
        """
        directives should be composed of GpCopySegmentDirectoryDirective values
        """
        if len(directives) == 0:
            return

        srcSegments = []
        destSegments = []
        isTargetReusedLocation = []
        for directive in directives:
            srcSegment = directive.getSrcSegment()
            destSegment = directive.getDestSegment()
            destSegment.primaryHostname = srcSegment.getSegmentHostName()
            destSegment.primarySegmentPort = srcSegment.getSegmentPort()

            srcSegments.append(srcSegment)
            destSegments.append(destSegment)
            isTargetReusedLocation.append(directive.isTargetReusedLocation())

        destSegmentByHost = GpArray.getSegmentsByHostName(destSegments)
        newSegmentInfo = gp.ConfigureNewSegment.buildSegmentInfoForNewSegment(
            destSegments, isTargetReusedLocation)

        def createConfigureNewSegmentCommand(hostName, cmdLabel,
                                             validationOnly):
            segmentInfo = newSegmentInfo[hostName]
            checkNotNone("segmentInfo for %s" % hostName, segmentInfo)
            return gp.ConfigureNewSegment(cmdLabel,
                                          segmentInfo,
                                          newSegments=True,
                                          verbose=gplog.logging_is_verbose(),
                                          batchSize=self.__parallelDegree,
                                          ctxt=gp.REMOTE,
                                          remoteHost=hostName,
                                          validationOnly=validationOnly,
                                          forceoverwrite=self.__forceoverwrite)

        #
        # validate directories for target segments
        #
        self.__logger.info('Validating remote directories')
        cmds = []
        for hostName in destSegmentByHost.keys():
            cmds.append(
                createConfigureNewSegmentCommand(hostName,
                                                 'validate blank segments',
                                                 True))
        for cmd in cmds:
            self.__pool.addCommand(cmd)
        self.__pool.wait_and_printdots(len(cmds), self.__quiet)
        validationErrors = []
        for item in self.__pool.getCompletedItems():
            results = item.get_results()
            if not results.wasSuccessful():
                if results.rc == 1:
                    # stdoutFromFailure = results.stdout.replace("\n", " ").strip()
                    lines = results.stderr.split("\n")
                    for line in lines:
                        if len(line.strip()) > 0:
                            validationErrors.append(
                                "Validation failure on host %s %s" %
                                (item.remoteHost, line))
                else:
                    validationErrors.append(str(item))
        self.__pool.empty_completed_items()
        if validationErrors:
            raise ExceptionNoStackTraceNeeded("\n" +
                                              ("\n".join(validationErrors)))

        #
        # unpack and configure new segments
        #
        self.__logger.info('Configuring new segments')
        cmds = []
        for hostName in destSegmentByHost.keys():
            cmds.append(
                createConfigureNewSegmentCommand(hostName,
                                                 'configure blank segments',
                                                 False))
        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(
            cmds, "unpacking basic segment directory")

        #
        # copy dump files from old segment to new segment
        #
        for srcSeg in srcSegments:
            for destSeg in destSegments:
                if srcSeg.content == destSeg.content:
                    src_dump_dir = os.path.join(
                        srcSeg.getSegmentDataDirectory(), 'db_dumps')
                    cmd = base.Command('check existence of db_dumps directory',
                                       'ls %s' % (src_dump_dir),
                                       ctxt=base.REMOTE,
                                       remoteHost=destSeg.getSegmentAddress())
                    cmd.run()
                    if cmd.results.rc == 0:  # Only try to copy directory if it exists
                        cmd = Scp(
                            'copy db_dumps from old segment to new segment',
                            os.path.join(srcSeg.getSegmentDataDirectory(),
                                         'db_dumps*', '*'),
                            os.path.join(destSeg.getSegmentDataDirectory(),
                                         'db_dumps'),
                            srcSeg.getSegmentAddress(),
                            destSeg.getSegmentAddress(),
                            recursive=True)
                        cmd.run(validateAfter=True)
                        break
Esempio n. 31
0
    def __copySegmentDirectories(self, gpEnv, gpArray, directives):
        """
        directives should be composed of GpCopySegmentDirectoryDirective values
        """
        if len(directives) == 0:
            return

        srcSegments = [d.getSrcSegment() for d in directives]
        destSegments = [d.getDestSegment() for d in directives]
        isTargetReusedLocation = [
            d.isTargetReusedLocation() for d in directives
        ]
        destSegmentByHost = GpArray.getSegmentsByHostName(destSegments)
        newSegmentInfo = gp.ConfigureNewSegment.buildSegmentInfoForNewSegment(
            destSegments, isTargetReusedLocation)

        logger.info('Building template directory')
        # In GPSQL, we need to create a template and copy it to all of failed segments.
        if gpArray.getFaultStrategy() == gparray.FAULT_STRATEGY_NONE:
            tempDir = '/tmp/GPSQL'
            templateDir = tempDir + '/gpsql_template' + time.strftime(
                "%Y%m%d_%H%M%S")
            unix.MakeDirectory("create blank directory for segment",
                               templateDir).run(validateAfter=True)
            unix.Chmod.local('set permissions on template dir', templateDir,
                             '0700')  # set perms so postgres can start

            logger.info('Creating template')
            srcSegments[0].createTemplate(templateDir)

            # Don't need log files and gpperfmon files in template.
            rmCmd = unix.RemoveFiles(
                'gprecoverseg remove gppermfon data from template',
                templateDir + '/gpperfmon/data')
            rmCmd.run(validateAfter=True)
            rmCmd = unix.RemoveFiles('gprecoverseg remove logs from template',
                                     templateDir + '/pg_log/*')
            rmCmd.run(validateAfter=True)

            #other files not needed
            rmCmd = unix.RemoveFiles(
                'gprecoverseg remove postmaster.opt from template',
                templateDir + '/postmaster.opts')
            rmCmd.run(validateAfter=True)
            rmCmd = unix.RemoveFiles(
                'gprecoverseg remove postmaster.pid from template',
                templateDir + '/postmaster.pid')
            rmCmd.run(validateAfter=True)

            # template the temporary directories file
            template_temporary_directories(templateDir, srcSegments[0].content)

            tarFileName = "gpsqlSegmentTemplate.tar"
            blankTarFile = tempDir + "/" + tarFileName
            cmd = gp.CreateTar('gpbuildingmirrorsegment tar segment template',
                               templateDir, blankTarFile)
            cmd.run(validateAfter=True)

        def createConfigureNewSegmentCommand(hostName, cmdLabel,
                                             validationOnly):
            segmentInfo = newSegmentInfo[hostName]
            checkNotNone("segmentInfo for %s" % hostName, segmentInfo)
            return gp.ConfigureNewSegment(cmdLabel,
                                          segmentInfo,
                                          tarFile=tarFileName,
                                          newSegments=True,
                                          verbose=gplog.logging_is_verbose(),
                                          batchSize=self.__parallelDegree,
                                          ctxt=gp.REMOTE,
                                          remoteHost=hostName,
                                          validationOnly=validationOnly)

        #
        # validate directories for target segments
        #
        logger.info('Validating remote directories')
        cmds = []
        for hostName in destSegmentByHost.keys():
            cmds.append(
                createConfigureNewSegmentCommand(hostName,
                                                 'validate blank segments',
                                                 True))
        for cmd in cmds:
            self.__pool.addCommand(cmd)
        self.__pool.wait_and_printdots(len(cmds), self.__quiet)
        validationErrors = []
        for item in self.__pool.getCompletedItems():
            results = item.get_results()
            if not results.wasSuccessful():
                if results.rc == 1:
                    # stdoutFromFailure = results.stdout.replace("\n", " ").strip()
                    lines = results.stderr.split("\n")
                    for line in lines:
                        if len(line.strip()) > 0:
                            validationErrors.append(
                                "Validation failure on host %s %s" %
                                (item.remoteHost, line))
                else:
                    validationErrors.append(str(item))
        self.__pool.empty_completed_items()
        if validationErrors:
            raise ExceptionNoStackTraceNeeded("\n" +
                                              ("\n".join(validationErrors)))

        #
        # copy tar from master to target hosts
        #
        logger.info('Copying template directory file')
        cmds = []
        for hostName in destSegmentByHost.keys():
            cmds.append(
                gp.RemoteCopy("copy segment tar", blankTarFile, hostName,
                              tarFileName))

        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(
            cmds, "building and transferring basic segment directory")

        #
        # unpack and configure new segments
        #
        logger.info('Configuring new segments')
        cmds = []
        for hostName in destSegmentByHost.keys():
            cmds.append(
                createConfigureNewSegmentCommand(hostName,
                                                 'configure blank segments',
                                                 False))
        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(
            cmds, "unpacking basic segment directory")

        #
        # Clean up copied tar from each remote host
        #
        logger.info('Cleaning files')
        cmds = []
        for hostName, segments in destSegmentByHost.iteritems():
            cmds.append(
                unix.RemoveFiles('remove tar file',
                                 tarFileName,
                                 ctxt=gp.REMOTE,
                                 remoteHost=hostName))
        self.__runWaitAndCheckWorkerPoolForErrorsAndClear(
            cmds, "cleaning up tar file on segment hosts")

        #
        # clean up the local temp directory
        #
        unix.RemoveFiles.local('remove temp directory', tempDir)
Esempio n. 32
0
    def rebalance(self):
        # Get the unbalanced primary segments grouped by hostname
        # These segments are what we will shutdown.
        self.logger.info("Getting unbalanced segments")
        unbalanced_primary_segs = GpArray.getSegmentsByHostName(
            self.gpArray.get_unbalanced_primary_segdbs())
        pool = base.WorkerPool()

        try:
            # Disable ctrl-c
            signal.signal(signal.SIGINT, signal.SIG_IGN)

            self.logger.info("Stopping unbalanced primary segments...")
            for hostname in unbalanced_primary_segs.keys():
                cmd = GpSegStopCmd("stop unbalanced primary segs",
                                   self.gpEnv.getGpHome(),
                                   self.gpEnv.getGpVersion(),
                                   'fast',
                                   unbalanced_primary_segs[hostname],
                                   ctxt=base.REMOTE,
                                   remoteHost=hostname,
                                   timeout=600)
                pool.addCommand(cmd)

            base.join_and_indicate_progress(pool)

            failed_count = 0
            completed = pool.getCompletedItems()
            for res in completed:
                if not res.get_results().wasSuccessful():
                    failed_count += 1

            allSegmentsStopped = (failed_count == 0)

            if not allSegmentsStopped:
                self.logger.warn(
                    "%d segments failed to stop.  A full rebalance of the")
                self.logger.warn(
                    "system is not possible at this time.  Please check the")
                self.logger.warn(
                    "log files, correct the problem, and run gprecoverseg -r")
                self.logger.warn("again.")
                self.logger.info(
                    "gprecoverseg will continue with a partial rebalance.")

            pool.empty_completed_items()
            segment_reconfigurer = SegmentReconfigurer(
                logger=self.logger,
                worker_pool=pool,
                timeout=MIRROR_PROMOTION_TIMEOUT)
            segment_reconfigurer.reconfigure()

            # Final step is to issue a recoverseg operation to resync segments
            self.logger.info("Starting segment synchronization")
            original_sys_args = sys.argv[:]
            try:
                self.logger.info(
                    "=============================START ANOTHER RECOVER========================================="
                )
                # import here because GpRecoverSegmentProgram and GpSegmentRebalanceOperation have a circular dependency
                from gppylib.programs.clsRecoverSegment import GpRecoverSegmentProgram
                sys.argv = ['gprecoverseg', '-a']
                local_parser = GpRecoverSegmentProgram.createParser()
                local_options, args = local_parser.parse_args()
                cmd = GpRecoverSegmentProgram.createProgram(
                    local_options, args)
                cmd.run()

            except SystemExit as e:
                if e.code != 0:
                    self.logger.error(
                        "Failed to start the synchronization step of the segment rebalance."
                    )
                    self.logger.error(
                        "Check the gprecoverseg log file, correct any problems, and re-run"
                    )
                    self.logger.error("'gprecoverseg -a'.")
                    raise Exception("Error synchronizing.\nError: %s" % str(e))
            finally:
                if cmd:
                    cmd.cleanup()
                sys.argv = original_sys_args
                self.logger.info(
                    "==============================END ANOTHER RECOVER=========================================="
                )

        except Exception, ex:
            raise ex
    def rebalance(self):
        # Get the unbalanced primary segments grouped by hostname
        # These segments are what we will shutdown.
        logger.info("Getting unbalanced segments")
        unbalanced_primary_segs = GpArray.getSegmentsByHostName(self.gpArray.get_unbalanced_primary_segdbs())
        pool = WorkerPool()
        
        count = 0

        try:        
            # Disable ctrl-c
            signal.signal(signal.SIGINT,signal.SIG_IGN)
            
            logger.info("Stopping unbalanced primary segments...")
            for hostname in unbalanced_primary_segs.keys():
                cmd = GpSegStopCmd("stop unbalanced primary segs",
                                   self.gpEnv.getGpHome(),
                                   self.gpEnv.getGpVersion(),
                                   'fast',
                                   unbalanced_primary_segs[hostname],
                                   ctxt=REMOTE,
                                   remoteHost=hostname,
                                   timeout=600)
                pool.addCommand(cmd)
                count+=1
                
            pool.wait_and_printdots(count, False)
            
            failed_count = 0
            completed = pool.getCompletedItems()
            for res in completed:
                if not res.get_results().wasSuccessful():
                    failed_count+=1
                    
            if failed_count > 0:
                logger.warn("%d segments failed to stop.  A full rebalance of the")
                logger.warn("system is not possible at this time.  Please check the")
                logger.warn("log files, correct the problem, and run gprecoverseg -r")
                logger.warn("again.")
                logger.info("gprecoverseg will continue with a partial rebalance.")
            
            pool.empty_completed_items()
            # issue a distributed query to make sure we pick up the fault
            # that we just caused by shutting down segments
            conn = None
            try:
                logger.info("Triggering segment reconfiguration")
                dburl = dbconn.DbURL()
                conn = dbconn.connect(dburl)
                cmd = ReconfigDetectionSQLQueryCommand(conn)
                pool.addCommand(cmd)
                pool.wait_and_printdots(1, False)
            except Exception:
                # This exception is expected
                pass
            finally:
                if conn:
                    conn.close()

            # Final step is to issue a recoverseg operation to resync segments
            logger.info("Starting segment synchronization")
            cmd = GpRecoverseg("rebalance recoverseg")
            pool.addCommand(cmd)
            pool.wait_and_printdots(1, False)
        except Exception, ex:
            raise ex
Esempio n. 34
0
    def rebalance(self):
        # Get the unbalanced primary segments grouped by hostname
        # These segments are what we will shutdown.
        self.logger.info("Getting unbalanced segments")
        unbalanced_primary_segs = GpArray.getSegmentsByHostName(self.gpArray.get_unbalanced_primary_segdbs())
        pool = base.WorkerPool()
        count = 0

        try:
            # Disable ctrl-c
            signal.signal(signal.SIGINT, signal.SIG_IGN)

            self.logger.info("Stopping unbalanced primary segments...")
            for hostname in unbalanced_primary_segs.keys():
                cmd = GpSegStopCmd("stop unbalanced primary segs",
                                   self.gpEnv.getGpHome(),
                                   self.gpEnv.getGpVersion(),
                                   'fast',
                                   unbalanced_primary_segs[hostname],
                                   ctxt=base.REMOTE,
                                   remoteHost=hostname,
                                   timeout=600)
                pool.addCommand(cmd)
                count += 1

            pool.wait_and_printdots(count, False)
            
            failed_count = 0
            completed = pool.getCompletedItems()
            for res in completed:
                if not res.get_results().wasSuccessful():
                    failed_count += 1

            allSegmentsStopped = (failed_count == 0)

            if not allSegmentsStopped:
                self.logger.warn("%d segments failed to stop.  A full rebalance of the")
                self.logger.warn("system is not possible at this time.  Please check the")
                self.logger.warn("log files, correct the problem, and run gprecoverseg -r")
                self.logger.warn("again.")
                self.logger.info("gprecoverseg will continue with a partial rebalance.")

            pool.empty_completed_items()
            # issue a distributed query to make sure we pick up the fault
            # that we just caused by shutting down segments
            conn = None
            try:
                self.logger.info("Triggering segment reconfiguration")
                dburl = dbconn.DbURL()
                conn = dbconn.connect(dburl)
                cmd = ReconfigDetectionSQLQueryCommand(conn)
                pool.addCommand(cmd)
                pool.wait_and_printdots(1, False)
            except Exception:
                # This exception is expected
                pass
            finally:
                if conn:
                    conn.close()

            # Final step is to issue a recoverseg operation to resync segments
            self.logger.info("Starting segment synchronization")
            original_sys_args = sys.argv[:]
            try:
                self.logger.info("=============================START ANOTHER RECOVER=========================================")
                # import here because GpRecoverSegmentProgram and GpSegmentRebalanceOperation have a circular dependency
                from gppylib.programs.clsRecoverSegment import GpRecoverSegmentProgram
                sys.argv = ['gprecoverseg', '-a']
                local_parser = GpRecoverSegmentProgram.createParser()
                local_options, args = local_parser.parse_args()
                cmd = GpRecoverSegmentProgram.createProgram(local_options, args)
                cmd.run()

            except SystemExit as e:
                if e.code != 0:
                    self.logger.error("Failed to start the synchronization step of the segment rebalance.")
                    self.logger.error("Check the gprecoverseg log file, correct any problems, and re-run")
                    self.logger.error("'gprecoverseg -a'.")
                    raise Exception("Error synchronizing.\nError: %s" % str(e))
            finally:
                if cmd:
                    cmd.cleanup()
                sys.argv = original_sys_args
                self.logger.info("==============================END ANOTHER RECOVER==========================================")

        except Exception, ex:
            raise ex