class DeleteCurrentDump(Operation): def __init__(self, timestamp, master_datadir, master_port, ddboost): self.timestamp = timestamp self.master_datadir = master_datadir self.master_port = master_port self.ddboost = ddboost def execute(self): try: DeleteCurrentSegDump(self.timestamp, self.master_datadir).run() except OSError, e: logger.warn("Error encountered during deletion of %s on master" % self.timestamp) gparray = GpArray.initFromCatalog(dbconn.DbURL(port=self.master_port), utility=True) segs = [ seg for seg in gparray.getDbList() if seg.isSegmentPrimary(current_role=True) ] for seg in segs: try: RemoteOperation( DeleteCurrentSegDump(self.timestamp, seg.getSegmentDataDirectory()), seg.getSegmentHostName()).run() except OSError, e: logger.warn("Error encountered during deletion of %s on %s" % (self.timestamp, seg.getSegmentHostName()))
def execute(self): dburl = dbconn.DbURL() gparray = GpArray.initFromCatalog(dburl) my_fault_strategy = gparray.getFaultStrategy() if my_fault_strategy != FAULT_STRATEGY_FILE_REPLICATION: raise NoMirroringError( 'Fault strategy %s does not support mirror verification.' % FAULT_STRATEGY_LABELS[my_fault_strategy]) if self.content is not None: contents = set( [seg.getSegmentContentId() for seg in gparray.getDbList()]) if self.content not in contents: raise InvalidContentIDError(self.content) logger.info('Validating target contents...') to_verify = [x for x in gparray.getDbList() if x.isSegmentQE()] if self.content is not None: to_verify = [ x for x in to_verify if x.getSegmentContentId() == self.content ] if self.primaries_only: to_verify = [ x for x in to_verify if x.isSegmentPrimary(current_role=True) ] return to_verify
def execute(self): dburl = dbconn.DbURL() query = self.UPDATE_VERIFICATION_ENTRY % (self.state, self.done, self.mismatch, self.token) with dbconn.connect(dburl, allowSystemTableMods='dml') as conn: dbconn.execSQL(conn, query) conn.commit()
def execute(self): dburl = dbconn.DbURL() query = self.INSERT_VERIFICATION_ENTRY % ( self.token, self.type, self.content, VerificationState.RUNNING) with dbconn.connect(dburl, allowSystemTableMods='dml') as conn: dbconn.execSQL(conn, query) conn.commit()
def execute(self): """ TODO: Improve with grouping by host and ParallelOperation dispatch. """ gparray = GpArray.initFromCatalog(dbconn.DbURL(port=self.master_port), utility=True) primaries = [ seg for seg in gparray.getDbList() if seg.isSegmentPrimary(current_role=True) ] dump_count = 0 for seg in primaries: if seg.isSegmentDown(): """ Why must every Segment function have the word Segment in it ?! """ raise ExceptionNoStackTraceNeeded( "Host %s dir %s dbid %d marked as invalid" % (seg.getSegmentHostName(), seg.getSegmentDataDirectory(), seg.getSegmentDbId())) path = os.path.join(seg.getSegmentDataDirectory(), DUMP_DIR, self.restore_timestamp[0:8]) host = seg.getSegmentHostName() path = os.path.join( path, "%s0_%d_%s" % (DBDUMP_PREFIX, seg.getSegmentDbId(), self.restore_timestamp)) if self.compress: path += ".gz" exists = CheckRemoteFile(path, host).run() if not exists: raise ExceptionNoStackTraceNeeded( "No dump file on %s at %s" % (seg.getSegmentHostName(), path))
def execute(self): needed_space = 0 dburl = dbconn.DbURL(dbname=self.dump_database, port=self.segport) conn = None try: conn = dbconn.connect(dburl, utility=True) if self.include_dump_tables: for dump_table in self.include_dump_tables: schema, table = dump_table.split('.') needed_space += execSQLForSingleton( conn, "select sotdsize/1024 from gp_toolkit.gp_size_of_table_disk where sotdschemaname='%s' and sotdtablename='%s';" % (schema, table)) else: needed_space = execSQLForSingleton( conn, "select sodddatsize/1024 from gp_toolkit.gp_size_of_database where sodddatname='%s';" % self.dump_database) except UnexpectedRowsError, e: logger.exception( "Disk space queries have failed. Cannot estimate disk space needed for dump." ) raise ExceptionNoStackTraceNeeded( "Cannot estimate disk space needed for dump. Use -b to override this check." )
def execute(self): schema, table = UpdateHistoryTable.HISTORY_TABLE.split('.') exists = CheckTableExists(database=self.dump_database, schema=schema, table=table, master_port=self.master_port).run() if not exists: conn = None CREATE_HISTORY_TABLE = """ create table %s (rec_date timestamp, start_time char(8), end_time char(8), options text, dump_key varchar(20), dump_exit_status smallint, script_exit_status smallint, exit_text varchar(10)) distributed by (rec_date); """ % UpdateHistoryTable.HISTORY_TABLE try: dburl = dbconn.DbURL(port=self.master_port, dbname=self.dump_database) conn = dbconn.connect(dburl) execSQL(conn, CREATE_HISTORY_TABLE) conn.commit() except Exception, e: logger.exception( "Unable to create %s in %s database" % (UpdateHistoryTable.HISTORY_TABLE, self.dump_database)) return else: logger.info( "Created %s in %s database" % (UpdateHistoryTable.HISTORY_TABLE, self.dump_database)) finally:
def __ensureMarkedDown(self, gpEnv, toEnsureMarkedDown): """Waits for FTS prober to mark segments as down""" wait_time = 60 * 30 # Wait up to 30 minutes to handle very large, busy # clusters that may have faults. In most cases the # actual time to wait will be small and this operation # is only needed when moving mirrors that are up and # needed to be stopped, an uncommon operation. dburl = dbconn.DbURL(port=gpEnv.getMasterPort(), dbname='template1') time_elapsed = 0 seg_up_count = 0 initial_seg_up_count = len(toEnsureMarkedDown) last_seg_up_count = initial_seg_up_count if initial_seg_up_count == 0: # Nothing to wait on return logger.info("Waiting for segments to be marked down.") logger.info("This may take up to %d seconds on large clusters." % wait_time) # wait for all needed segments to be marked down by the prober. We'll wait # a max time of double the interval while wait_time > time_elapsed: seg_up_count = 0 current_gparray = GpArray.initFromCatalog(dburl, True) seg_db_map = current_gparray.getSegDbMap() # go through and get the status of each segment we need to be marked down for segdb in toEnsureMarkedDown: if segdb.getSegmentDbId() in seg_db_map and seg_db_map[segdb.getSegmentDbId()].isSegmentUp() == True: seg_up_count += 1 if seg_up_count == 0: break else: if last_seg_up_count != seg_up_count: print "\n", logger.info("%d of %d segments have been marked down." % (initial_seg_up_count - seg_up_count, initial_seg_up_count)) last_seg_up_count = seg_up_count for _i in range(1,5): time.sleep(1) sys.stdout.write(".") sys.stdout.flush() time_elapsed += 5 if seg_up_count == 0: print "\n", logger.info("%d of %d segments have been marked down." % (initial_seg_up_count, initial_seg_up_count)) else: raise Exception("%d segments were not marked down by FTS" % seg_up_count)
def execute(self): dburl = dbconn.DbURL() query = self.SELECT_VERIFICATION_ENTRY % self.token with dbconn.connect(dburl) as conn: try: tuple = dbconn.execSQLForSingletonRow(conn, query) except UnexpectedRowsError, e: if e.actual == 0: raise TokenNotFound(self.token) raise
def execute(self): timestamp = datetime.now().strftime("%Y%m%d%H%M%S") config_backup_file = "gp_master_config_files_%s.tar" % timestamp if self.backup_dir is not None: path = os.path.join(self.backup_dir, DUMP_DIR, DUMP_DATE, config_backup_file) else: path = os.path.join(self.master_datadir, DUMP_DIR, DUMP_DATE, config_backup_file) logger.info("Dumping master config files") Command("Dumping master configuration files", "tar cf %s %s/*.conf" % (path, self.master_datadir)).run(validateAfter=True) if self.ddboost: abspath = path relpath = os.path.join(DUMP_DIR, DUMP_DATE, config_backup_file) logger.debug('Copying %s to DDBoost' % abspath) cmd = Command( 'DDBoost copy of %s' % abspath, 'gpddboost --copyToDDBoost --from-file=%s --to-file=%s' % (abspath, relpath)) cmd.run(validateAfter=True) logger.info("Dumping segment config files") gparray = GpArray.initFromCatalog(dbconn.DbURL(port=self.master_port), utility=True) primaries = [ seg for seg in gparray.getDbList() if seg.isSegmentPrimary(current_role=True) ] for seg in primaries: config_backup_file = "gp_segment_config_files_0_%d_%s.tar" % ( seg.getSegmentDbId(), timestamp) if self.backup_dir is not None: path = os.path.join(self.backup_dir, DUMP_DIR, DUMP_DATE, config_backup_file) else: path = os.path.join(seg.getSegmentDataDirectory(), DUMP_DIR, DUMP_DATE, config_backup_file) host = seg.getSegmentHostName() Command("Dumping segment config files", "tar cf %s %s/*.conf" % (path, seg.getSegmentDataDirectory()), ctxt=REMOTE, remoteHost=host).run(validateAfter=True) if self.ddboost: abspath = path relpath = os.path.join(DUMP_DIR, DUMP_DATE, config_backup_file) logger.debug('Copying %s to DDBoost' % abspath) cmd = Command( 'DDBoost copy of %s' % abspath, 'gpddboost --copyToDDBoost --from-file=%s --to-file=%s' % (abspath, relpath), ctxt=REMOTE, remoteHost=host) cmd.run(validateAfter=True)
def __init__(self, masterDataDir, readFromMasterCatalog, timeout=None, retries=None): """ masterDataDir: if None then we try to find it from the system environment readFromMasterCatalog: if True then we will connect to the master in utility mode and fetch some more data from there (like collation settings) """ if masterDataDir is None: self.__masterDataDir = gp.get_masterdatadir() else: self.__masterDataDir = masterDataDir logger.debug("Obtaining master's port from master data directory") pgconf_dict = pgconf.readfile(self.__masterDataDir + "/postgresql.conf") self.__masterPort = pgconf_dict.int('port') logger.debug("Read from postgresql.conf port=%s" % self.__masterPort) self.__masterMaxConnections = pgconf_dict.int('max_connections') logger.debug("Read from postgresql.conf max_connections=%s" % self.__masterMaxConnections) self.__gpHome = gp.get_gphome() self.__gpVersion = gp.GpVersion.local( 'local GP software version check', self.__gpHome) logger.info("local Greenplum Version: '%s'" % self.__gpVersion) # read collation settings from master if readFromMasterCatalog: dbUrl = dbconn.DbURL(port=self.__masterPort, dbname='template1', timeout=timeout, retries=retries) conn = dbconn.connect(dbUrl, utility=True) (self.__lcCollate, self.__lcMonetary, self.__lcNumeric) = catalog.getCollationSettings(conn) # MPP-13807, read/show the master's database version too self.__pgVersion = dbconn.execSQLForSingletonRow( conn, "select version();")[0] logger.info("master Greenplum Version: '%s'" % self.__pgVersion) conn.close() checkNotNone("lc_collate", self.__lcCollate) checkNotNone("lc_monetary", self.__lcMonetary) checkNotNone("lc_numeric", self.__lcNumeric) else: self.__lcCollate = None self.__lcMonetary = None self.__lcNumeric = None self.__pgVersion = None
def _analyze(self, restore_db, master_port): conn = None logger.info('Commencing analyze of %s database, please wait' % restore_db) try: dburl = dbconn.DbURL(port=master_port, dbname=restore_db) conn = dbconn.connect(dburl) execSQL(conn, 'analyze') conn.commit() except Exception, e: logger.warn('Issue with analyze of %s database' % restore_db)
def execute(self): try: dburl = dbconn.DbURL(port=self.master_port, dbname=self.database) conn = dbconn.connect(dburl) count = execSQLForSingleton( conn, "select count(*) from pg_class, pg_namespace where pg_class.relname = '%s' and pg_class.relnamespace = pg_namespace.oid and pg_namespace.nspname = '%s'" % (self.table, self.schema)) return count > 0 finally: if conn is not None: conn.close()
def execute(self): gparray = GpArray.initFromCatalog(dbconn.DbURL(port=self.master_port), utility=True) failed_segs = [ seg for seg in gparray.getDbList() if seg.isSegmentPrimary(current_role=True) and seg.isSegmentDown() ] if len(failed_segs) != 0: logger.warn("Failed primary segment instances detected") failed_dbids = [seg.getSegmentDbid() for seg in failed_segs] raise ExceptionNoStackTraceNeeded( "Detected failed segment(s) with dbid=%s" % ",".join(failed_dbids))
def execute(self): conn = None try: dburl = dbconn.DbURL(port=self.master_port) conn = dbconn.connect(dburl) count = execSQLForSingleton( conn, "select count(*) from pg_database where datname='%s';" % self.database) if count == 0: raise ExceptionNoStackTraceNeeded( "Database %s does not exist." % self.database) finally: if conn is not None: conn.close()
def execute(self): conn = None logger.info('Commencing vacuum of %s database, please wait' % self.database) try: dburl = dbconn.DbURL(port=self.master_port, dbname=self.database) conn = dbconn.connect(dburl) cursor = conn.cursor() cursor.execute( "commit") # hack to move drop stmt out of implied transaction cursor.execute("vacuum") cursor.close() except Exception, e: logger.exception('Error encountered with vacuum of %s database' % self.database)
def _get_gpdb_host_list(self): """ TODO: AK: Get rid of this. Program logic should not be driving host list building . This method gets the host names of all hosts in the gpdb array. It sets the following variables GpPkgProgram.master_host to master GpPkgProgram.standby_host to standby GpPkgProgram.segment_host_list to segment hosts """ logger.debug('_get_gpdb_host_list') #Get host list gparr = GpArray.initFromCatalog(dbconn.DbURL(port=self.master_port), utility=True) master_host = None standby_host = None segment_host_list = [] segs = gparr.getDbList() for seg in segs: if seg.isSegmentMaster(current_role=True): master_host = seg.getSegmentHostName() elif seg.isSegmentStandby(current_role=True): standby_host = seg.getSegmentHostName() else: segment_host_list.append(seg.getSegmentHostName()) #Deduplicate the hosts so that we #dont install multiple times on the same host segment_host_list = list(set(segment_host_list)) #Segments might exist on the master host. Since we store the #master host separately in self.master_host, storing the master_host #in the segment_host_list is redundant. for host in segment_host_list: if host == master_host or host == standby_host: segment_host_list.remove(host) self.master_host = master_host self.standby_host = standby_host self.segment_host_list = segment_host_list
def execute(self): dburl = dbconn.DbURL(port=self.master_port) if self.ddboost: cmd = Command('List directories in DDBoost db_dumps dir', 'gpddboost --listDir --dir=db_dumps/ | grep ^[0-9]') cmd.run(validateAfter=False) rc = cmd.get_results().rc if rc != 0: logger.info("Cannot find old backup sets to remove on DDboost") return old_dates = cmd.get_results().stdout.splitlines() else: old_dates = ListFiles(os.path.join(self.master_datadir, DUMP_DIR)).run() try: old_dates.remove(DUMP_DATE) except ValueError, e: # DUMP_DATE was not found in old_dates pass
def execute(self): existing_tables = [] table_counts = [] conn = None try: dburl = dbconn.DbURL(port=self.master_port, dbname=self.restore_db) conn = dbconn.connect(dburl) for restore_table in self.restore_tables: if '.' not in restore_table: logger.warn( "No schema name supplied for %s, removing from list of tables to restore" % restore_table) continue schema, table = restore_table.split('.') count = execSQLForSingleton( conn, "select count(*) from pg_class, pg_namespace where pg_class.relname = '%s' and pg_class.relnamespace = pg_namespace.oid and pg_namespace.nspname = '%s'" % (table, schema)) if count == 0: logger.warn( "Table %s does not exist in database %s, removing from list of tables to restore" % (table, self.restore_db)) continue count = execSQLForSingleton( conn, "select count(*) from %s.%s" % (schema, table)) if count > 0: logger.warn('Table %s has %d records %s' % (restore_table, count, WARN_MARK)) existing_tables.append(restore_table) table_counts.append((restore_table, count)) finally: if conn is not None: conn.close() if len(existing_tables) == 0: raise ExceptionNoStackTraceNeeded("Have no tables to restore") logger.info("Have %d tables to restore, will continue" % len(existing_tables)) return (existing_tables, table_counts)
def execute(self): gparray = GpArray.initFromCatalog(dbconn.DbURL(port=self.master_port), utility=True) from_host, from_path = self.host, self.path logger.info( "Commencing remote database dump file recovery process, please wait..." ) segs = [ seg for seg in gparray.getDbList() if seg.isSegmentPrimary( current_role=True) or seg.isSegmentMaster() ] pool = WorkerPool(numWorkers=min(len(segs), self.batch_default)) for seg in segs: if seg.isSegmentMaster(): file = '%s%s' % (MASTER_DBDUMP_PREFIX, self.restore_timestamp) else: file = '%s0_%d_%s' % (DBDUMP_PREFIX, seg.getSegmentDbId(), self.restore_timestamp) if self.compress: file += '.gz' to_host = seg.getSegmentHostName() to_path = os.path.join(seg.getSegmentDataDirectory(), DUMP_DIR, self.restore_timestamp[0:8]) if not CheckRemoteDir(to_path, to_host).run(): logger.info('Creating directory %s on %s' % (to_path, to_host)) try: MakeRemoteDir(to_path, to_host).run() except OSError, e: raise ExceptionNoStackTraceNeeded( "Failed to create directory %s on %s" % (to_path, to_host)) logger.info("Commencing remote copy from %s to %s:%s" % (from_host, to_host, to_path)) pool.addCommand( Scp('Copying dump for seg %d' % seg.getSegmentDbId(), srcFile=os.path.join(from_path, file), dstFile=os.path.join(to_path, file), srcHost=from_host, dstHost=to_host))
def execute(self): dburl = dbconn.DbURL(dbname=self.database, port=self.master_port) conn = None try: conn = dbconn.connect(dburl) count = execSQLForSingleton( conn, "select count(*) from pg_class, pg_namespace where pg_namespace.nspname = 'gp_toolkit' and pg_class.relnamespace = pg_namespace.oid" ) finally: if conn is not None: conn.close() if count > 0: logger.debug("gp_toolkit exists within database %s." % self.database) return logger.info("gp_toolkit not found. Installing...") Psql('Installing gp_toolkit', filename='$GPHOME/share/postgresql/gp_toolkit.sql', database=self.database, port=self.master_port).run(validateAfter=True)
def testReadPostmasterTempFile(self): logger.info("testReadPostmasterTempFile") url = dbconn.DbURL() gpdb = GpArray.initFromCatalog(url) logger.info("Search for valid master port: %s" % gpdb.master.port) cmd = pg.ReadPostmasterTempFile.local('test pg tempfile read', gpdb.master.port) (exists, PID, datadir) = cmd.getResults() logger.info("exists:=%s PID=%d datadir='%s'" % (exists, PID, datadir)) self.assertTrue(exists) self.assertTrue(PID > 0) self.assertEquals(datadir, gpdb.master.datadir) gpdb.master.port = 4000 logger.info("Search for bogus master port: %s" % gpdb.master.port) cmd = pg.ReadPostmasterTempFile.local('test pg tempfile read', gpdb.master.port) (exists, PID, datadir) = cmd.getResults() logger.info("exists:=%s PID=%d datadir='%s'" % (exists, PID, datadir)) self.assertFalse(exists)
def _analyze(self, restore_db, restore_tables, master_port): conn = None try: dburl = dbconn.DbURL(port=master_port, dbname=restore_db) conn = dbconn.connect(dburl) for table in restore_tables: logger.info( 'Commencing analyze of %s in %s database, please wait...' % (table, restore_db)) try: execSQL(conn, 'analyze %s' % table) conn.commit() except Exception, e: logger.warn( 'Issue with analyze of %s table, check log file for details' % table) else: logger.info('Analyze of %s table completed without error' % table) finally: if conn is not None: conn.close()
def execute(self): ret = [] dburl = dbconn.DbURL() with dbconn.connect(dburl) as conn: # TODO: improve execSQL APIs to avoid need to use cursor here for such a simple task cursor = conn.cursor() cursor.execute(self.SELECT_ALL_VERIFICATIONS) res = cursor.fetchall() cursor.close() for tuple in res: # TODO: execSQL or pygresql should be able to do this for us ret.append({ 'vertoken': tuple[0], 'vertype': tuple[1], 'vercontent': tuple[2], 'verstarttime': tuple[3], 'verstate': tuple[4], 'verdone': tuple[5], 'verendtime': tuple[6], 'vermismatch': tuple[7] }) return ret
def get_host_list(): ''' Returns a tuple which consists of the standby and segment hosts ''' gparr = GpArray.initFromCatalog(dbconn.DbURL(port=MASTER_PORT), utility=True) segs = gparr.getDbList() standby_host = None segment_host_list = [] for seg in segs: if seg.isSegmentStandby(current_role=True): standby_host = seg.getSegmentHostName() elif not seg.isSegmentMaster(current_role=True): segment_host_list.append(seg.getSegmentHostName()) #Deduplicate the hosts so that we #dont install multiple times on the same host segment_host_list = list(set(segment_host_list)) return (standby_host, segment_host_list)
def _process_createdb(self, restore_timestamp, restore_db, master_datadir, master_port): conn = None try: dburl = dbconn.DbURL(port=master_port) conn = dbconn.connect(dburl) count = execSQLForSingleton( conn, "select count(*) from pg_database where datname='%s';" % restore_db) if count == 1: logger.info("Dropping database %s" % restore_db) try: cursor = conn.cursor() cursor.execute( "commit" ) # hack to move drop stmt out of implied transaction cursor.execute("drop database %s" % restore_db) cursor.close() except Exception, e: logger.exception("Could not create database %s" % restore_db) raise ExceptionNoStackTraceNeeded( 'Failed to drop database %s' % restore_db) else: logger.info('Dropped database %s' % restore_db) finally: if conn is not None: conn.close() createdb_file = os.path.join( master_datadir, DUMP_DIR, restore_timestamp[0:8], "%s%s" % (CREATEDB_PREFIX, restore_timestamp)) logger.info('Invoking %s' % createdb_file) Psql('Invoking schema dump', filename=createdb_file).run(validateAfter=True)
def execute(self): fake_timestamp = PickDumpTimestamp( restore_timestamp=self.restore_timestamp, compress=self.compress, master_datadir=self.master_datadir).run() gparray = GpArray.initFromCatalog(dbconn.DbURL(port=self.master_port), utility=True) primaries = [ seg for seg in gparray.getDbList() if seg.isSegmentPrimary(current_role=True) ] operations = [] for seg in primaries: real_filename = os.path.join( seg.getSegmentDataDirectory(), DUMP_DIR, self.restore_timestamp[0:8], "%s0_%d_%s" % (DBDUMP_PREFIX, seg.getSegmentDbId(), self.restore_timestamp)) fake_filename = os.path.join( seg.getSegmentDataDirectory(), DUMP_DIR, fake_timestamp[0:8], "%s0_%d_%s" % (DBDUMP_PREFIX, seg.getSegmentDbId(), fake_timestamp)) operations.append( BuildRemoteTableDump(self.restore_tables, real_filename, fake_filename, self.compress, seg.getSegmentHostName())) ParallelOperation(operations, self.batch_default).run() for operation in operations: try: operation.get_ret() except Exception, e: logger.exception('Parallel table dump file build failed.') raise ExceptionNoStackTraceNeeded( 'Parallel table dump file build failed, review log file for details' )
def execute(self): ValidateGpToolkit(database=self.dump_database, master_port=self.master_port).run() operations = [] gparray = GpArray.initFromCatalog(dbconn.DbURL(port=self.master_port), utility=True) segs = [ seg for seg in gparray.getDbList() if seg.isSegmentPrimary(current_role=True) ] for seg in segs: operations.append( RemoteOperation( ValidateSegDiskSpace( free_space_percent=self.free_space_percent, compress=self.compress, dump_database=self.dump_database, include_dump_tables=self.include_dump_tables, datadir=seg.getSegmentDataDirectory(), segport=seg.getSegmentPort()), seg.getSegmentHostName())) ParallelOperation(operations, self.batch_default).run() success = 0 for remote in operations: host = remote.host try: remote.get_ret() except NotEnoughDiskSpace, e: logger.error( "%s has insufficient disk space. [Need: %dK, Free %dK]" % (host, e.needed_space, e.free_space)) else: success += 1
def execute(self): fake_timestamp = PickDumpTimestamp( restore_timestamp=self.restore_timestamp, compress=self.compress, master_datadir=self.master_datadir).run() gparray = GpArray.initFromCatalog(dbconn.DbURL(port=self.master_port), utility=True) primaries = [ seg for seg in gparray.getDbList() if seg.isSegmentPrimary(current_role=True) ] pool = WorkerPool(numWorkers=min(primaries, self.batch_default)) for seg in primaries: real_filename = os.path.join( DUMP_DIR, self.restore_timestamp[0:8], "%s0_%d_%s" % (DBDUMP_PREFIX, seg.getSegmentDbId(), self.restore_timestamp)) fake_filename = os.path.join( seg.getSegmentDataDirectory(), DUMP_DIR, fake_timestamp[0:8], "%s0_%d_%s" % (DBDUMP_PREFIX, seg.getSegmentDbId(), fake_timestamp)) if self.compress: real_filename += '.gz' fake_filename += '.gz' table_str = ' '.join( ['--table=%s' % table for table in self.restore_tables]) # TODO: Escaping. Low priority due to MPP-12880, et al cmd = Command('DDBoost building dump file for dbid %s' % seg.getSegmentDbId(), 'gpddboost --from-file=%s --to-file=%s %s' % (real_filename, fake_filename, table_str), ctxt=REMOTE, remoteHost=seg.getSegmentHostName()) pool.addCommand(cmd) pool.join() pool.check_results() BuildDDBoostMasterTableDump(restore_timestamp=self.restore_timestamp, fake_timestamp=fake_timestamp, compress=self.compress, master_datadir=self.master_datadir).run() # Build master cdatabase file real_createdb = os.path.join( DUMP_DIR, self.restore_timestamp[0:8], "%s%s" % (CREATEDB_PREFIX, self.restore_timestamp)) fake_createdb = os.path.join( self.master_datadir, DUMP_DIR, fake_timestamp[0:8], "%s%s" % (CREATEDB_PREFIX, fake_timestamp)) Command( 'Copying cdatabase file from DDBoost', 'gpddboost --copyFromDDBoost --from-file=%s --to-file=%s' % (real_createdb, fake_createdb)).run(validateAfter=True) # Build master _post_data file: CopyPostData(self.restore_timestamp, fake_timestamp, self.compress, self.master_datadir).run() return fake_timestamp
logger.warn('Failed to remove %s on master' % path) # Remove master _post_data file path = os.path.join( self.master_datadir, DUMP_DIR, self.fake_timestamp[0:8], "%s%s_post_data" % (MASTER_DBDUMP_PREFIX, self.fake_timestamp)) if self.compress: path += '.gz' try: RemoveFile(path).run() except OSError, e: logger.warn('Failed to remove %s on master' % path) # Remove segment dump files operations = [] gparray = GpArray.initFromCatalog(dbconn.DbURL(port=self.master_port), utility=True) primaries = [ seg for seg in gparray.getDbList() if seg.isSegmentPrimary(current_role=True) ] for seg in primaries: path = os.path.join( seg.getSegmentDataDirectory(), DUMP_DIR, self.fake_timestamp[0:8], "%s0_%d_%s" % (DBDUMP_PREFIX, seg.getSegmentDbId(), self.fake_timestamp)) if self.compress: path += '.gz' host = seg.getSegmentHostName() operations.append(RemoveRemoteFile(path, host))