Esempio n. 1
0
    def __init__(self, dblist, gpversion, collation, targetMirroringMode,
                 pickledTransitionData):
        self.dblist = dblist
        self.expected_gpversion = gpversion
        self.pool = base.WorkerPool(numWorkers=len(dblist))
        self.expected_lc_collate = None
        self.expected_lc_monetary = None
        self.expected_lc_numeric = None
        self.targetMirroringMode = targetMirroringMode
        self.pickledTransitionData = pickledTransitionData

        self.gphome = os.path.abspath(os.pardir)

        self.actual_gpversion = gp.GpVersion.local(
            'local GP software version check', self.gphome)
        if self.actual_gpversion != self.expected_gpversion:
            raise Exception(
                "Local Software Version does not match what is expected.\n"
                "The local software version is: '%s'\n"
                "But we were expecting it to be: '%s'\n"
                "Please review and correct" %
                (self.actual_gpversion, self.expected_gpversion))

        collation_strings = collation.split(':')
        if len(collation_strings) != 3:
            raise Exception("Invalid collation string specified!")
        (self.expected_lc_collate, self.expected_lc_monetary,
         self.expected_lc_numeric) = collation_strings

        pass
Esempio n. 2
0
    def run(self):
        results = []
        failures = []

        self.logger.info("Issuing shutdown commands to local segments...")
        self.pool = base.WorkerPool()
        for db in self.dblist:
            cmd = SegStop('segment shutdown',
                          db=db,
                          mode=self.mode,
                          timeout=self.timeout)
            self.pool.addCommand(cmd)
        self.pool.join()

        failed = False
        for cmd in self.pool.getCompletedItems():
            result = cmd.get_results()
            if not result.stopped:
                failed = True
            results.append(result)

        # Log the results!
        status = '\nCOMMAND RESULTS\n'
        for result in results:
            status += str(result) + "\n"

        self.logger.info(status)
        return 1 if failed else 0
Esempio n. 3
0
def get_unreachable_segment_hosts(hosts, num_workers):
    pool = base.WorkerPool(numWorkers=num_workers)
    try:
        for host in hosts:
            cmd = Command(name='check %s is up' % host,
                          cmdStr="ssh %s 'echo %s'" % (host, host))
            pool.addCommand(cmd)
        pool.join()
    finally:
        pool.haltWork()
        pool.joinWorkers()

    # There's no good way to map a CommandResult back to its originating Command.
    # To determine reachable hosts parse the stdout of the successful commands.
    reachable_hosts = set()
    for item in pool.getCompletedItems():
        result = item.get_results()
        if result.rc == 0:
            host = result.stdout.strip()
            reachable_hosts.add(host)

    unreachable_hosts = list(set(hosts).difference(reachable_hosts))
    unreachable_hosts.sort()
    if len(unreachable_hosts) > 0:
        logger.warning("One or more hosts are not reachable via SSH.")
        for host in sorted(unreachable_hosts):
            logger.warning("Host %s is unreachable" % host)

    return unreachable_hosts
Esempio n. 4
0
    def run(self):
        if self.__options.parallelDegree < 1 or self.__options.parallelDegree > 64:
            raise ProgramArgumentValidationException(
                "Invalid parallelDegree provided with -B argument: %d" %
                self.__options.parallelDegree)

        self.__pool = base.WorkerPool(self.__options.parallelDegree)
        gpEnv = GpMasterEnvironment(self.__options.masterDataDirectory, True)

        faultProberInterface.getFaultProber().initializeProber(
            gpEnv.getMasterPort())
        confProvider = configInterface.getConfigurationProvider(
        ).initializeProvider(gpEnv.getMasterPort())
        gpArray = confProvider.loadSystemConfig(useUtilityMode=False)

        # check that heap_checksums is consistent across cluster, fail immediately if not
        self.validate_heap_checksums(gpArray)

        # check that we actually have mirrors
        if gpArray.getFaultStrategy() != gparray.FAULT_STRATEGY_NONE:
            raise ExceptionNoStackTraceNeeded( \
                "GPDB physical mirroring cannot be added.  The cluster is already configured with %s." % \
                gparray.getFaultStrategyLabel(gpArray.getFaultStrategy()))

        # figure out what needs to be done
        mirrorBuilder = self.__getMirrorsToBuildBasedOnOptions(gpEnv, gpArray)
        mirrorBuilder.checkForPortAndDirectoryConflicts(gpArray)

        if self.__options.outputSampleConfigFile is not None:
            # just output config file and done
            self.__outputToFile(mirrorBuilder,
                                self.__options.outputSampleConfigFile, gpArray)
            logger.info('Configuration file output to %s successfully.' %
                        self.__options.outputSampleConfigFile)
        else:
            self.__displayAddMirrors(gpEnv, mirrorBuilder, gpArray)
            if self.__options.interactive:
                if not userinput.ask_yesno(
                        None, "\nContinue with add mirrors procedure", 'N'):
                    raise UserAbortedException()

            gpArray.setFaultStrategy(gparray.FAULT_STRATEGY_FILE_REPLICATION)
            mirrorBuilder.buildMirrors("add", gpEnv, gpArray)

            logger.info(
                "******************************************************************"
            )
            logger.info(
                "Mirror segments have been added; data synchronization is in progress."
            )
            logger.info(
                "Data synchronization will continue in the background.")
            logger.info("")
            logger.info(
                "Use  gpstate -s  to check the resynchronization progress.")
            logger.info(
                "******************************************************************"
            )

        return 0  # success -- exit code 0!
Esempio n. 5
0
    def __init__(self,
                 dblist,
                 gpversion,
                 collation,
                 mirroringMode,
                 num_cids,
                 era,
                 timeout,
                 pickledTransitionData,
                 specialMode,
                 wrapper,
                 wrapper_args,
                 logfileDirectory=False):

        # validate/store arguments
        #
        self.dblist = map(gparray.GpDB.initFromString, dblist)

        expected_gpversion = gpversion
        actual_gpversion = gp.GpVersion.local(
            'local GP software version check', os.path.abspath(os.pardir))
        if actual_gpversion != expected_gpversion:
            raise Exception(
                "Local Software Version does not match what is expected.\n"
                "The local software version is: '%s'\n"
                "But we were expecting it to be: '%s'\n"
                "Please review and correct" %
                (actual_gpversion, expected_gpversion))

        collation_strings = collation.split(':')
        if len(collation_strings) != 3:
            raise Exception("Invalid collation string specified!")
        (self.expected_lc_collate, self.expected_lc_monetary,
         self.expected_lc_numeric) = collation_strings

        self.mirroringMode = mirroringMode
        self.num_cids = num_cids
        self.era = era
        self.timeout = timeout
        self.pickledTransitionData = pickledTransitionData

        assert (specialMode in [None, 'upgrade', 'maintenance'])
        self.specialMode = specialMode

        self.wrapper = wrapper
        self.wrapper_args = wrapper_args

        # initialize state
        #
        self.pool = base.WorkerPool(numWorkers=len(dblist))
        self.logger = logger
        self.overall_status = None

        self.logfileDirectory = logfileDirectory
Esempio n. 6
0
    def __init__(self,
                 dblist,
                 gpversion,
                 mirroringMode,
                 num_cids,
                 era,
                 timeout,
                 pickledTransitionData,
                 specialMode,
                 wrapper,
                 wrapper_args,
                 coordinator_checksum_version,
                 segment_batch_size,
                 logfileDirectory=False):

        # validate/store arguments
        #
        self.dblist = list(map(gparray.Segment.initFromString, dblist))

        expected_gpversion = gpversion
        actual_gpversion = gp.GpVersion.local(
            'local GP software version check', os.path.abspath(os.pardir))
        if actual_gpversion != expected_gpversion:
            raise Exception(
                "Local Software Version does not match what is expected.\n"
                "The local software version is: '%s'\n"
                "But we were expecting it to be: '%s'\n"
                "Please review and correct" %
                (actual_gpversion, expected_gpversion))

        self.mirroringMode = mirroringMode
        self.num_cids = num_cids
        self.era = era
        self.timeout = timeout
        self.pickledTransitionData = pickledTransitionData

        assert (specialMode in [None, 'upgrade', 'maintenance'])
        self.specialMode = specialMode

        self.wrapper = wrapper
        self.wrapper_args = wrapper_args

        # initialize state
        #
        self.pool = base.WorkerPool(
            numWorkers=min(len(dblist), segment_batch_size))
        self.logger = logger
        self.overall_status = None

        self.logfileDirectory = logfileDirectory
        self.coordinator_checksum_version = coordinator_checksum_version
Esempio n. 7
0
    def validate(self):
        pool = base.WorkerPool()
        gp_array = GpArray.initFromCatalog(dbconn.DbURL(), utility=True)
        host_list = list(set(gp_array.get_hostlist(True)))
        msg = None

        for h in host_list:
            cmd = Command(h, "gpcheckresgroupimpl", REMOTE, h)
            pool.addCommand(cmd)
        pool.join()

        items = pool.getCompletedItems()
        failed = []
        for i in items:
            if not i.was_successful():
                failed.append("[%s:%s]"%(i.remoteHost, i.get_stderr().rstrip()))
        pool.haltWork()
        pool.joinWorkers()
        if failed:
            msg = ",".join(failed)
        return msg
Esempio n. 8
0
    def __init__(self, dblist, gpversion, targetMirroringMode,
                 pickledTransitionData):
        self.dblist = dblist
        self.expected_gpversion = gpversion
        self.pool = base.WorkerPool(numWorkers=len(dblist))
        self.targetMirroringMode = targetMirroringMode
        self.pickledTransitionData = pickledTransitionData

        self.gphome = os.path.abspath(os.pardir)

        self.actual_gpversion = gp.GpVersion.local(
            'local GP software version check', self.gphome)
        if self.actual_gpversion != self.expected_gpversion:
            raise Exception(
                "Local Software Version does not match what is expected.\n"
                "The local software version is: '%s'\n"
                "But we were expecting it to be: '%s'\n"
                "Please review and correct" %
                (self.actual_gpversion, self.expected_gpversion))

        pass
Esempio n. 9
0
    def rebalance(self):
        self.logger.info("Determining primary and mirror segment pairs to rebalance")

        # The current implementation of rebalance calls "gprecoverseg -a" below.
        # Thus, if another balanced pair is not synchronized, or has a down mirror
        # that pair will be recovered as a side-effect of rebalancing.
        unbalanced_primary_segs = []
        for segmentPair in self.gpArray.segmentPairs:
            if segmentPair.balanced():
                continue

            if segmentPair.up() and segmentPair.reachable() and segmentPair.synchronized():
                unbalanced_primary_segs.append(segmentPair.primaryDB)
            else:
                self.logger.warning(
                    "Not rebalancing primary segment dbid %d with its mirror dbid %d because one is either down, unreachable, or not synchronized" \
                    % (segmentPair.primaryDB.dbid, segmentPair.mirrorDB.dbid))

        if not len(unbalanced_primary_segs):
            self.logger.info("No segments to rebalance")
            return True

        unbalanced_primary_segs = GpArray.getSegmentsByHostName(unbalanced_primary_segs)

        pool = base.WorkerPool(min(len(unbalanced_primary_segs), self.batch_size))
        try:
            # Disable ctrl-c
            signal.signal(signal.SIGINT, signal.SIG_IGN)

            self.logger.info("Stopping unbalanced primary segments...")
            for hostname in list(unbalanced_primary_segs.keys()):
                cmd = GpSegStopCmd("stop unbalanced primary segs",
                                   self.gpEnv.getGpHome(),
                                   self.gpEnv.getGpVersion(),
                                   'fast',
                                   unbalanced_primary_segs[hostname],
                                   ctxt=base.REMOTE,
                                   remoteHost=hostname,
                                   timeout=600,
                                   segment_batch_size=self.segment_batch_size)
                pool.addCommand(cmd)

            base.join_and_indicate_progress(pool)
            
            failed_count = 0
            completed = pool.getCompletedItems()
            for res in completed:
                if not res.get_results().wasSuccessful():
                    failed_count += 1

            allSegmentsStopped = (failed_count == 0)

            if not allSegmentsStopped:
                self.logger.warn("%d segments failed to stop.  A full rebalance of the" % failed_count)
                self.logger.warn("system is not possible at this time.  Please check the")
                self.logger.warn("log files, correct the problem, and run gprecoverseg -r")
                self.logger.warn("again.")
                self.logger.info("gprecoverseg will continue with a partial rebalance.")

            pool.empty_completed_items()
            segment_reconfigurer = SegmentReconfigurer(logger=self.logger,
                    worker_pool=pool, timeout=MIRROR_PROMOTION_TIMEOUT)
            segment_reconfigurer.reconfigure()

            # Final step is to issue a recoverseg operation to resync segments
            self.logger.info("Starting segment synchronization")
            original_sys_args = sys.argv[:]
            self.logger.info("=============================START ANOTHER RECOVER=========================================")
            # import here because GpRecoverSegmentProgram and GpSegmentRebalanceOperation have a circular dependency
            from gppylib.programs.clsRecoverSegment import GpRecoverSegmentProgram
            cmd_args = ['gprecoverseg', '-a', '-B', str(self.batch_size), '-b', str(self.segment_batch_size)]
            sys.argv = cmd_args[:]
            local_parser = GpRecoverSegmentProgram.createParser()
            local_options, args = local_parser.parse_args()
            recover_cmd = GpRecoverSegmentProgram.createProgram(local_options, args)
            try:
                recover_cmd.run()
            except SystemExit as e:
                if e.code != 0:
                    self.logger.error("Failed to start the synchronization step of the segment rebalance.")
                    self.logger.error("Check the gprecoverseg log file, correct any problems, and re-run")
                    self.logger.error(' '.join(cmd_args))
                    raise Exception("Error synchronizing.\nError: %s" % str(e))
            finally:
                if recover_cmd:
                    recover_cmd.cleanup()
                sys.argv = original_sys_args
                self.logger.info("==============================END ANOTHER RECOVER==========================================")

        except Exception as ex:
            raise ex
        finally:
            pool.join()
            pool.haltWork()
            pool.joinWorkers()
            signal.signal(signal.SIGINT, signal.default_int_handler)

        return allSegmentsStopped # if all segments stopped, then a full rebalance was done
Esempio n. 10
0
    def run(self):
        if self.__options.batch_size < 1 or self.__options.batch_size > gp.MAX_COORDINATOR_NUM_WORKERS:
            raise ProgramArgumentValidationException(
                "Invalid batch_size provided with -B argument: %d" %
                self.__options.batch_size)
        if self.__options.segment_batch_size < 1 or self.__options.segment_batch_size > gp.MAX_SEGHOST_NUM_WORKERS:
            raise ProgramArgumentValidationException(
                "Invalid segment_batch_size provided with -b argument: %d" %
                self.__options.segment_batch_size)

        self.__pool = base.WorkerPool(self.__options.batch_size)
        gpEnv = GpCoordinatorEnvironment(
            self.__options.coordinatorDataDirectory, True)

        faultProberInterface.getFaultProber().initializeProber(
            gpEnv.getCoordinatorPort())
        confProvider = configInterface.getConfigurationProvider(
        ).initializeProvider(gpEnv.getCoordinatorPort())
        gpArray = confProvider.loadSystemConfig(useUtilityMode=False)

        # check that heap_checksums is consistent across cluster, fail immediately if not
        self.validate_heap_checksums(gpArray)

        if self.__options.mirrorConfigFile is None:
            self.checkMirrorOffset(gpArray)

        # check that we actually have mirrors
        if gpArray.hasMirrors:
            raise ExceptionNoStackTraceNeeded( \
                "GPDB physical mirroring cannot be added.  The cluster is already configured with Mirrors.")

        # figure out what needs to be done (AND update the gpArray!)
        mirrorBuilder = self.__getMirrorsToBuildBasedOnOptions(gpEnv, gpArray)
        mirrorBuilder.checkForPortAndDirectoryConflicts(gpArray)

        if self.__options.outputSampleConfigFile is not None:
            # just output config file and done
            self.__outputToFile(mirrorBuilder,
                                self.__options.outputSampleConfigFile, gpArray)
            logger.info('Configuration file output to %s successfully.' %
                        self.__options.outputSampleConfigFile)
        else:
            self.__displayAddMirrors(gpEnv, mirrorBuilder, gpArray)
            if self.__options.interactive:
                if not userinput.ask_yesno(
                        None, "\nContinue with add mirrors procedure", 'N'):
                    raise UserAbortedException()

            update_pg_hba_on_segments(gpArray, self.__options.hba_hostnames,
                                      self.__options.batch_size)
            if not mirrorBuilder.buildMirrors("add", gpEnv, gpArray):
                return 1

            logger.info(
                "******************************************************************"
            )
            logger.info(
                "Mirror segments have been added; data synchronization is in progress."
            )
            logger.info(
                "Data synchronization will continue in the background.")
            logger.info(
                "Use  gpstate -s  to check the resynchronization progress.")
            logger.info(
                "******************************************************************"
            )

        return 0  # success -- exit code 0!
Esempio n. 11
0
    def rebalance(self):
        # Get the unbalanced primary segments grouped by hostname
        # These segments are what we will shutdown.
        self.logger.info("Getting unbalanced segments")
        unbalanced_primary_segs = GpArray.getSegmentsByHostName(
            self.gpArray.get_unbalanced_primary_segdbs())
        pool = base.WorkerPool()

        try:
            # Disable ctrl-c
            signal.signal(signal.SIGINT, signal.SIG_IGN)

            self.logger.info("Stopping unbalanced primary segments...")
            for hostname in unbalanced_primary_segs.keys():
                cmd = GpSegStopCmd("stop unbalanced primary segs",
                                   self.gpEnv.getGpHome(),
                                   self.gpEnv.getGpVersion(),
                                   'fast',
                                   unbalanced_primary_segs[hostname],
                                   ctxt=base.REMOTE,
                                   remoteHost=hostname,
                                   timeout=600)
                pool.addCommand(cmd)

            base.join_and_indicate_progress(pool)

            failed_count = 0
            completed = pool.getCompletedItems()
            for res in completed:
                if not res.get_results().wasSuccessful():
                    failed_count += 1

            allSegmentsStopped = (failed_count == 0)

            if not allSegmentsStopped:
                self.logger.warn(
                    "%d segments failed to stop.  A full rebalance of the")
                self.logger.warn(
                    "system is not possible at this time.  Please check the")
                self.logger.warn(
                    "log files, correct the problem, and run gprecoverseg -r")
                self.logger.warn("again.")
                self.logger.info(
                    "gprecoverseg will continue with a partial rebalance.")

            pool.empty_completed_items()
            segment_reconfigurer = SegmentReconfigurer(
                logger=self.logger,
                worker_pool=pool,
                timeout=MIRROR_PROMOTION_TIMEOUT)
            segment_reconfigurer.reconfigure()

            # Final step is to issue a recoverseg operation to resync segments
            self.logger.info("Starting segment synchronization")
            original_sys_args = sys.argv[:]
            try:
                self.logger.info(
                    "=============================START ANOTHER RECOVER========================================="
                )
                # import here because GpRecoverSegmentProgram and GpSegmentRebalanceOperation have a circular dependency
                from gppylib.programs.clsRecoverSegment import GpRecoverSegmentProgram
                sys.argv = ['gprecoverseg', '-a']
                local_parser = GpRecoverSegmentProgram.createParser()
                local_options, args = local_parser.parse_args()
                cmd = GpRecoverSegmentProgram.createProgram(
                    local_options, args)
                cmd.run()

            except SystemExit as e:
                if e.code != 0:
                    self.logger.error(
                        "Failed to start the synchronization step of the segment rebalance."
                    )
                    self.logger.error(
                        "Check the gprecoverseg log file, correct any problems, and re-run"
                    )
                    self.logger.error("'gprecoverseg -a'.")
                    raise Exception("Error synchronizing.\nError: %s" % str(e))
            finally:
                if cmd:
                    cmd.cleanup()
                sys.argv = original_sys_args
                self.logger.info(
                    "==============================END ANOTHER RECOVER=========================================="
                )

        except Exception, ex:
            raise ex
Esempio n. 12
0
gphostcachelookup.py -- look up the hostname for a list of interfaces

Usage: gphostcachelookup.py interface-name
Input is taken from stdin. Each line from stdin is considered as interface name.
Output is the hostname, gets printed to stdout.

'''

import sys
from gppylib.gphostcache import GpInterfaceToHostNameCache
from gppylib.commands import base

#-------------------------------------------------------------------------
if __name__ == '__main__':

    pool = base.WorkerPool(1)
    retCode = 0

    try:
        interfaces = []
        hostNames = []
        for line in sys.stdin:
            interfaces.append(line.strip())
            hostNames.append(None)

        lookup = GpInterfaceToHostNameCache(pool, interfaces, hostNames)

        for interface in interfaces:
            hostname = lookup.getHostName(interface)
            if hostname is None:
                sys.stdout.write("__lookup_of_hostname_failed__\n")