def fetchYumRepo(self, basepath="./", callback=None, verify_options=None): startTime = time.time() self.yumFetch = RepoFetch(self.repo_label, repourl=self.repo_url, \ cacert=self.sslcacert, clicert=self.sslclientcert, \ clikey=self.sslclientkey, mirrorlist=self.mirrors, \ download_dir=basepath, proxy_url=self.proxy_url, \ proxy_port=self.proxy_port, proxy_user=self.proxy_user, \ proxy_pass=self.proxy_pass, sslverify=self.sslverify, max_speed=self.max_speed, verify_options=verify_options) self.fetchPkgs = ParallelFetch(self.yumFetch, self.numThreads, callback=callback) try: if not verify_options: verify_options = {"size":False, "checksum":False} self.yumFetch.setupRepo() # first fetch the metadata self.fetchPkgs.processCallback(ProgressReport.DownloadMetadata) self.yumFetch.getRepoData() if self.stopped: return None if not self.skip.has_key('packages') or self.skip['packages'] != 1: # get rpms to fetch self.prepareRPMS() # get drpms to fetch self.prepareDRPMS() else: log("Skipping packages preparation from sync process") if not self.skip.has_key('distribution') or self.skip['distribution'] != 1: # get Trees to fetch self.prepareTrees() else: log("Skipping distribution preparation from sync process") # prepare for download self.fetchPkgs.addItemList(self.downloadinfo) self.fetchPkgs.start() report = self.fetchPkgs.waitForFinish() self.yumFetch.finalizeMetadata() endTime = time.time() #log("Processed <%s> items in [%d] seconds" % (len(self.downloadinfo), \ # (endTime - startTime))) if not self.skip.has_key('packages') or self.skip['packages'] != 1: if self.purge_orphaned: #log("Cleaning any orphaned packages..") self.fetchPkgs.processCallback(ProgressReport.PurgeOrphanedPackages) self.purgeOrphanPackages(self.yumFetch.getPackageList(), self.yumFetch.repo_dir) if self.remove_old: log("Removing old packages to limit to %s" % self.numOldPackages) self.fetchPkgs.processCallback(ProgressReport.RemoveOldPackages) gutils = GrinderUtils() gutils.runRemoveOldPackages(self.pkgsavepath, self.numOldPackages) self.yumFetch.deleteBaseCacheDir() #log("Processed <%s> in %s seconds" % (report, endTime - startTime)) return report, (endTime - startTime) finally: self.fetchPkgs.stop() self.yumFetch.closeRepo()
def setup(self, basepath="./", callback=None, verify_options=None, num_retries=None, retry_delay=None): """ Fetches yum metadata and determines what object should be downloaded. @param basepath: path to store repo data @type basepath: str @param callback: progress callback function @type callback: function which accepts a grinder.GrinderCallback.ProgressReport @param verify_options: controls verification checks on "size" and "checksum". @type verify_options: dict{"size":bool,"checksum":bool} @param num_retries: number of retries to perform if an error occurs @type num_retries: int @param retry_delay: delay in seconds between retries, delay = 'retry_attempt' * 'retry_delay' @type retry_delay: int """ self.repo_dir = os.path.join(basepath, self.repo_label) LOG.info("%s, %s, Calling RepoFetch with: cacert=<%s>, clicert=<%s>, clikey=<%s>, proxy_url=<%s>, proxy_port=<%s>, proxy_user=<%s>, proxy_pass=<NOT_LOGGED>, sslverify=<%s>, max_speed=<%s>, verify_options=<%s>, filter=<%s>" %\ (self.repo_label, self.repo_url, self.sslcacert, self.sslclientcert, self.sslclientkey, self.proxy_url, self.proxy_port, self.proxy_user, self.sslverify, self.max_speed, verify_options, self.filter)) self.repoFetch = RepoFetch(cacert=self.sslcacert, clicert=self.sslclientcert, clikey=self.sslclientkey,\ proxy_url=self.proxy_url, proxy_port=self.proxy_port, proxy_user=self.proxy_user, proxy_pass=self.proxy_pass, sslverify=self.sslverify, max_speed=self.max_speed, verify_options=verify_options, num_retries=num_retries) self.fetchPkgs = ParallelFetch(self.repoFetch, self.numThreads, callback=callback) self.fetchPkgs.processCallback(ProgressReport.DownloadMetadata) info = YumInfo( repo_label=self.repo_label, repo_url=self.repo_url, mirrors = self.mirrors, repo_dir=self.repo_dir, packages_location=self.pkgpath, newest=self.newest, remove_old=self.remove_old, numOldPackages=self.numOldPackages, cacert=self.sslcacert, clicert=self.sslclientcert, clikey=self.sslclientkey, proxy_url=self.proxy_url, proxy_port=self.proxy_port, proxy_user=self.proxy_user, proxy_pass=self.proxy_pass, sslverify=self.sslverify, skip=self.skip, tmp_path=self.tmp_path, filter=self.filter, num_retries=num_retries, retry_delay=retry_delay) info.setUp() self.rpmlist = info.rpms self.drpmlist = info.drpms
def fetch(self, basepath="./", callback=None): LOG.info("fetch basepath = %s" % (basepath)) startTime = time.time() self.fileFetch = FileFetch(self.repo_label, self.repo_url, cacert=self.sslcacert, \ clicert=self.sslclientcert, clikey=self.sslclientkey, \ download_dir=basepath, proxy_url=self.proxy_url, \ proxy_port=self.proxy_port, proxy_user=self.proxy_user, \ proxy_pass=self.proxy_pass, sslverify=self.sslverify, max_speed=self.max_speed) self.parallel_fetch_files = ParallelFetch(self.fileFetch, self.numThreads, callback=callback) LOG.info("Determining downloadable Content bits...") self.parallel_fetch_files.processCallback( ProgressReport.DownloadMetadata) self.prepareFiles() # prepare for download self.parallel_fetch_files.addItemList(self.downloadinfo) self.parallel_fetch_files.start() report = self.parallel_fetch_files.waitForFinish() endTime = time.time() LOG.info("Processed <%s> items in [%d] seconds" % (len(self.downloadinfo), \ (endTime - startTime))) return report
def setup(self, basepath="./", callback=None, verify_options=None, num_retries=None, retry_delay=None, incr_progress=False): """ Fetches yum metadata and determines what object should be downloaded. @param basepath: path to store repo data @type basepath: str @param callback: progress callback function @type callback: function which accepts a grinder.GrinderCallback.ProgressReport @param verify_options: controls verification checks on "size" and "checksum". @type verify_options: dict{"size":bool,"checksum":bool} @param num_retries: number of retries to perform if an error occurs @type num_retries: int @param retry_delay: delay in seconds between retries, delay = 'retry_attempt' * 'retry_delay' @type retry_delay: int @param incr_progress: if true, incremental progress on each item as it's downloaded will be reported @type inc_progress: bool """ self.repo_dir = os.path.join(basepath, self.repo_label) LOG.info("%s, %s, Calling RepoFetch with: cacert=<%s>, clicert=<%s>, clikey=<%s>, proxy_url=<%s>, proxy_port=<%s>, proxy_user=<%s>, proxy_pass=<NOT_LOGGED>, sslverify=<%s>, max_speed=<%s>, verify_options=<%s>, filter=<%s>" %\ (self.repo_label, self.repo_url, self.sslcacert, self.sslclientcert, self.sslclientkey, self.proxy_url, self.proxy_port, self.proxy_user, self.sslverify, self.max_speed, verify_options, self.filter)) self.repoFetch = RepoFetch(cacert=self.sslcacert, clicert=self.sslclientcert, clikey=self.sslclientkey,\ proxy_url=self.proxy_url, proxy_port=self.proxy_port, proxy_user=self.proxy_user, proxy_pass=self.proxy_pass, sslverify=self.sslverify, max_speed=self.max_speed, verify_options=verify_options, num_retries=num_retries) self.fetchPkgs = ParallelFetch(self.repoFetch, self.numThreads, callback=callback, incr_progress=incr_progress) self.fetchPkgs.processCallback(ProgressReport.DownloadMetadata) info = YumInfo( repo_label=self.repo_label, repo_url=self.repo_url, mirrors = self.mirrors, repo_dir=self.repo_dir, packages_location=self.pkgpath, newest=self.newest, remove_old=self.remove_old, numOldPackages=self.numOldPackages, cacert=self.sslcacert, clicert=self.sslclientcert, clikey=self.sslclientkey, proxy_url=self.proxy_url, proxy_port=self.proxy_port, proxy_user=self.proxy_user, proxy_pass=self.proxy_pass, sslverify=self.sslverify, skip=self.skip, tmp_path=self.tmp_path, filter=self.filter, num_retries=num_retries, retry_delay=retry_delay) info.setUp() self.rpmlist = info.rpms self.drpmlist = info.drpms
def syncPackages(self, channelLabel, savePath, verbose=0): """ channelLabel - channel to sync packages from savePath - path to save packages verbose - if true display more output """ startTime = time.time() if channelLabel == "": LOG.critical("No channel label specified to sync, abort sync.") raise NoChannelLabelException() LOG.info("sync(%s, %s) invoked" % (channelLabel, verbose)) satDump = SatDumpClient(self.baseURL, verbose=verbose) LOG.debug("*** calling product_names ***") packages = satDump.getChannelPackages(self.systemid, channelLabel) LOG.info("%s packages are available, getting list of short metadata now." % (len(packages))) pkgInfo = satDump.getShortPackageInfo(self.systemid, packages, filterLatest = not self.fetchAll) LOG.info("%s packages have been marked to be fetched" % (len(pkgInfo.values()))) numThreads = int(self.parallel) LOG.info("Running in parallel fetch mode with %s threads" % (numThreads)) pkgFetch = PackageFetch(self.systemid, self.baseURL, channelLabel, savePath) self.parallelFetchPkgs = ParallelFetch(pkgFetch, numThreads) self.parallelFetchPkgs.addItemList(pkgInfo.values()) self.parallelFetchPkgs.start() report = self.parallelFetchPkgs.waitForFinish() LOG.debug("Attempting to fetch comps.xml info from RHN") self.fetchCompsXML(savePath, channelLabel) self.fetchUpdateinfo(savePath, channelLabel) endTime = time.time() LOG.info("Processed <%s> %s packages, %s errors, completed in %s seconds" \ % (channelLabel, report.successes, report.errors, (endTime-startTime))) if self.removeOldPackages: LOG.info("Remove old packages from %s" % (savePath)) self.runRemoveOldPackages(savePath) return report
def syncKickstarts(self, channelLabel, savePath, verbose=0): """ channelLabel - channel to sync kickstarts from savePath - path to save kickstarts verbose - if true display more output """ startTime = time.time() satDump = SatDumpClient(self.baseURL, verbose=verbose) ksLabels = satDump.getKickstartLabels(self.systemid, [channelLabel]) LOG.info("Found %s kickstart labels for channel %s" % (len(ksLabels[channelLabel]), channelLabel)) ksFiles = [] for ksLbl in ksLabels[channelLabel]: LOG.info("Syncing kickstart label: %s" % (ksLbl)) metadata = satDump.getKickstartTreeMetadata(self.systemid, [ksLbl]) LOG.info("Retrieved metadata on %s files for kickstart label: %s" % (len(metadata[ksLbl]["files"]), ksLbl)) ksSavePath = os.path.join(savePath, ksLbl) for ksFile in metadata[ksLbl]["files"]: info = {} info["relative-path"] = ksFile["relative-path"] info["size"] = ksFile["file-size"] info["md5sum"] = ksFile["md5sum"] info["ksLabel"] = ksLbl info["channelLabel"] = channelLabel info["savePath"] = ksSavePath ksFiles.append(info) ksFetch = KickstartFetch(self.systemid, self.baseURL) numThreads = int(self.parallel) self.parallelFetchKickstarts = ParallelFetch(ksFetch, numThreads) self.parallelFetchKickstarts.addItemList(ksFiles) self.parallelFetchKickstarts.start() report = self.parallelFetchKickstarts.waitForFinish() endTime = time.time() LOG.info("Processed %s %s %s kickstart files, %s errors, completed in %s seconds" \ % (channelLabel, ksLabels[channelLabel], report.successes, report.errors, (endTime-startTime))) return report
def fetch(self,basepath="./", callback=None): LOG.info("fetch basepath = %s" % (basepath)) startTime = time.time() self.fileFetch = FileFetch(self.repo_label, self.repo_url, cacert=self.sslcacert, \ clicert=self.sslclientcert, clikey=self.sslclientkey, \ download_dir=basepath, proxy_url=self.proxy_url, \ proxy_port=self.proxy_port, proxy_user=self.proxy_user, \ proxy_pass=self.proxy_pass, sslverify=self.sslverify, max_speed=self.max_speed) self.parallel_fetch_files = ParallelFetch(self.fileFetch, self.numThreads, callback=callback) LOG.info("Determining downloadable Content bits...") self.parallel_fetch_files.processCallback(ProgressReport.DownloadMetadata) self.prepareFiles() # prepare for download self.parallel_fetch_files.addItemList(self.downloadinfo) self.parallel_fetch_files.start() report = self.parallel_fetch_files.waitForFinish() endTime = time.time() LOG.info("Processed <%s> items in [%d] seconds" % (len(self.downloadinfo), \ (endTime - startTime))) return report
class YumRepoGrinder(object): """ Driver class to fetch content from a Yum Repository """ def __init__(self, repo_label, repo_url, parallel=10, mirrors=None, newest=False, cacert=None, clicert=None, clikey=None, proxy_url=None, proxy_port=None, proxy_user=None, proxy_pass=None, sslverify=1, packages_location=None, remove_old=False, numOldPackages=2, skip=None, max_speed=None, purge_orphaned=True, distro_location=None, tmp_path=None, filter=None): self.repo_label = repo_label self.repo_url = repo_url self.repo_dir = None self.mirrors = mirrors self.numThreads = int(parallel) self.fetchPkgs = None self.downloadinfo = [] self.repoFetch = None self.fetchPkgs = None self.sslcacert = cacert self.sslclientcert = clicert self.sslclientkey = clikey self.temp_ssl_client_cert = None self.temp_ssl_client_key = None self.proxy_url = proxy_url self.proxy_port = proxy_port self.proxy_user = proxy_user self.proxy_pass = proxy_pass self.newest = newest # set this if you want all packages to be stored in a central location self.pkgpath = packages_location self.numOldPackages = numOldPackages self.pkgsavepath = '' self.remove_old = remove_old self.skip = skip if not self.skip: self.skip = [] self.sslverify = sslverify self.max_speed = max_speed self.purge_orphaned = purge_orphaned self.stopped = False self.distropath = distro_location self.rpmlist = [] self.drpmlist = [] self.tmp_path = tmp_path self.filter = filter def getRPMItems(self): return self.rpmlist def getDeltaRPMItems(self): return self.drpmlist def getDistroItems(self): return self.distro_items def setup(self, basepath="./", callback=None, verify_options=None, num_retries=None, retry_delay=None, incr_progress=False): """ Fetches yum metadata and determines what object should be downloaded. @param basepath: path to store repo data @type basepath: str @param callback: progress callback function @type callback: function which accepts a grinder.GrinderCallback.ProgressReport @param verify_options: controls verification checks on "size" and "checksum". @type verify_options: dict{"size":bool,"checksum":bool} @param num_retries: number of retries to perform if an error occurs @type num_retries: int @param retry_delay: delay in seconds between retries, delay = 'retry_attempt' * 'retry_delay' @type retry_delay: int @param incr_progress: if true, incremental progress on each item as it's downloaded will be reported @type inc_progress: bool """ self.repo_dir = os.path.join(basepath, self.repo_label) LOG.info("%s, %s, Calling RepoFetch with: cacert=<%s>, clicert=<%s>, clikey=<%s>, proxy_url=<%s>, proxy_port=<%s>, proxy_user=<%s>, proxy_pass=<NOT_LOGGED>, sslverify=<%s>, max_speed=<%s>, verify_options=<%s>, filter=<%s>" %\ (self.repo_label, self.repo_url, self.sslcacert, self.sslclientcert, self.sslclientkey, self.proxy_url, self.proxy_port, self.proxy_user, self.sslverify, self.max_speed, verify_options, self.filter)) self.repoFetch = RepoFetch(cacert=self.sslcacert, clicert=self.sslclientcert, clikey=self.sslclientkey,\ proxy_url=self.proxy_url, proxy_port=self.proxy_port, proxy_user=self.proxy_user, proxy_pass=self.proxy_pass, sslverify=self.sslverify, max_speed=self.max_speed, verify_options=verify_options, num_retries=num_retries) self.fetchPkgs = ParallelFetch(self.repoFetch, self.numThreads, callback=callback, incr_progress=incr_progress) self.fetchPkgs.processCallback(ProgressReport.DownloadMetadata) info = YumInfo( repo_label=self.repo_label, repo_url=self.repo_url, mirrors = self.mirrors, repo_dir=self.repo_dir, packages_location=self.pkgpath, newest=self.newest, remove_old=self.remove_old, numOldPackages=self.numOldPackages, cacert=self.sslcacert, clicert=self.sslclientcert, clikey=self.sslclientkey, proxy_url=self.proxy_url, proxy_port=self.proxy_port, proxy_user=self.proxy_user, proxy_pass=self.proxy_pass, sslverify=self.sslverify, skip=self.skip, tmp_path=self.tmp_path, filter=self.filter, num_retries=num_retries, retry_delay=retry_delay) info.setUp() self.rpmlist = info.rpms self.drpmlist = info.drpms def setupDistroInfo(self): info = DistroInfo(repo_url=self.repo_url, repo_dir=self.repo_dir, distropath=self.distropath) distro_items = info.prepareTrees(self.repoFetch) self.distro_items = {} if distro_items: self.distro_items = distro_items def addItems(self, items): self.fetchPkgs.addItemList(items) def download(self): """ Synchronous call, initiates download and waits for all items to finish before returning @return: A SyncReport @rtype: grinder.ParallelFetch.SyncReport """ try: startTime = time.time() self.fetchPkgs.start() self.fetchPkgs.processCallback(ProgressReport.DownloadItems) report = self.fetchPkgs.waitForFinish() self.finalizeMetadata() if 'rpm' not in self.skip: if self.purge_orphaned: # Includes logic of: # 1) removing previously existing packages that have been # removed from repository metadata # 2) removing old packages that are part of repository metadata, # but we want removed because of remove_old/numOldPackages option LOG.info("Cleaning any orphaned packages..") self.fetchPkgs.processCallback(ProgressReport.PurgeOrphanedPackages) self.purgeOrphanPackages() if self.remove_old: # Need to re-test remove_old is functioning # We added the ability to limit the old packages from being downloaded # I think we need to address the case of existing packages from a prior sync self.fetchPkgs.processCallback(ProgressReport.RemoveOldPackages) endTime = time.time() LOG.info("Processed <%s>,<%s> with <%s> items in [%d] seconds. Report: %s" % (self.repo_label, self.repo_url, len(self.downloadinfo),\ (endTime - startTime), report)) return report finally: if self.fetchPkgs: self.fetchPkgs.stop() self.fetchPkgs = None def fetchYumRepo(self, basepath="./", callback=None, verify_options=None): LOG.info("fetchYumRepo() repo_label = %s, repo_url = %s, basepath = %s, verify_options = %s" % \ (self.repo_label, self.repo_url, basepath, verify_options)) self.setup(basepath, callback, verify_options) if 'distribution' not in self.skip: self.setupDistroInfo() if self.distro_items: self.addItems(self.distro_items['files']) else: LOG.debug("skipping distributions from sync") self.addItems(self.rpmlist) self.addItems(self.drpmlist) return self.download() def stop(self, block=True): LOG.info("Stopping") self.stopped = True if self.fetchPkgs: self.fetchPkgs.stop() if block: LOG.info("Block is <%s> so waiting" % (block)) self.fetchPkgs._waitForThreads() def purgeOrphanPackages(self): """ While re-sync purge any orphaned packages in the Repo that we did not intend to sync. Includes removal of packages no longer in primary.xml as well as older packages filtered out from remove_old/numOldPackages logic """ dpkgs = [] if self.rpmlist: for pkg in self.rpmlist: dpkgs.append(os.path.join(self.repo_dir, os.path.dirname(pkg['relativepath']), pkg['fileName'])) if os.path.exists(self.repo_dir): for root, dirs, files in os.walk(self.repo_dir): for f in files: tmp_path = os.path.join(root, f) if tmp_path.endswith('.rpm') and tmp_path not in dpkgs: LOG.info("Removing orphan package: %s" % (tmp_path)) os.remove(tmp_path) def finalizeMetadata(self): local_repo_path = "%s/%s" % (self.repo_dir, "repodata") local_new_path = "%s/%s" % (self.repo_dir, "repodata.new") if not os.path.exists(local_new_path): LOG.info("No new metadata to finalize.") return try: LOG.info("Finalizing metadata, moving %s to %s" % (local_new_path, local_repo_path)) if os.path.exists(local_repo_path): # remove existing metadata before copying shutil.rmtree(local_repo_path) shutil.copytree(local_new_path, local_repo_path) shutil.rmtree(local_new_path) except Exception, e: LOG.error("An error occurred while finalizing metadata:\n%s" % str(e))
class YumRepoGrinder(object): """ Driver class to fetch content from a Yum Repository """ def __init__(self, repo_label, repo_url, parallel=10, mirrors=None, newest=False, cacert=None, clicert=None, clikey=None, proxy_url=None, proxy_port=None, proxy_user=None, proxy_pass=None, sslverify=1, packages_location=None, remove_old=False, numOldPackages=2, skip=None, max_speed=None, purge_orphaned=True, distro_location=None, tmp_path=None, filter=None): self.repo_label = repo_label self.repo_url = repo_url self.repo_dir = None self.mirrors = mirrors self.numThreads = int(parallel) self.fetchPkgs = None self.downloadinfo = [] self.repoFetch = None self.fetchPkgs = None self.sslcacert = cacert self.sslclientcert = clicert self.sslclientkey = clikey self.temp_ssl_client_cert = None self.temp_ssl_client_key = None self.proxy_url = proxy_url self.proxy_port = proxy_port self.proxy_user = proxy_user self.proxy_pass = proxy_pass self.newest = newest # set this if you want all packages to be stored in a central location self.pkgpath = packages_location self.numOldPackages = numOldPackages self.pkgsavepath = '' self.remove_old = remove_old self.skip = skip if not self.skip: self.skip = [] self.sslverify = sslverify self.max_speed = max_speed self.purge_orphaned = purge_orphaned self.stopped = False self.distropath = distro_location self.rpmlist = [] self.drpmlist = [] self.tmp_path = tmp_path self.filter = filter def getRPMItems(self): return self.rpmlist def getDeltaRPMItems(self): return self.drpmlist def getDistroItems(self): return self.distro_items def setup(self, basepath="./", callback=None, verify_options=None, num_retries=None, retry_delay=None): """ Fetches yum metadata and determines what object should be downloaded. @param basepath: path to store repo data @type basepath: str @param callback: progress callback function @type callback: function which accepts a grinder.GrinderCallback.ProgressReport @param verify_options: controls verification checks on "size" and "checksum". @type verify_options: dict{"size":bool,"checksum":bool} @param num_retries: number of retries to perform if an error occurs @type num_retries: int @param retry_delay: delay in seconds between retries, delay = 'retry_attempt' * 'retry_delay' @type retry_delay: int """ self.repo_dir = os.path.join(basepath, self.repo_label) LOG.info("%s, %s, Calling RepoFetch with: cacert=<%s>, clicert=<%s>, clikey=<%s>, proxy_url=<%s>, proxy_port=<%s>, proxy_user=<%s>, proxy_pass=<NOT_LOGGED>, sslverify=<%s>, max_speed=<%s>, verify_options=<%s>, filter=<%s>" %\ (self.repo_label, self.repo_url, self.sslcacert, self.sslclientcert, self.sslclientkey, self.proxy_url, self.proxy_port, self.proxy_user, self.sslverify, self.max_speed, verify_options, self.filter)) self.repoFetch = RepoFetch(cacert=self.sslcacert, clicert=self.sslclientcert, clikey=self.sslclientkey,\ proxy_url=self.proxy_url, proxy_port=self.proxy_port, proxy_user=self.proxy_user, proxy_pass=self.proxy_pass, sslverify=self.sslverify, max_speed=self.max_speed, verify_options=verify_options, num_retries=num_retries) self.fetchPkgs = ParallelFetch(self.repoFetch, self.numThreads, callback=callback) self.fetchPkgs.processCallback(ProgressReport.DownloadMetadata) info = YumInfo( repo_label=self.repo_label, repo_url=self.repo_url, mirrors = self.mirrors, repo_dir=self.repo_dir, packages_location=self.pkgpath, newest=self.newest, remove_old=self.remove_old, numOldPackages=self.numOldPackages, cacert=self.sslcacert, clicert=self.sslclientcert, clikey=self.sslclientkey, proxy_url=self.proxy_url, proxy_port=self.proxy_port, proxy_user=self.proxy_user, proxy_pass=self.proxy_pass, sslverify=self.sslverify, skip=self.skip, tmp_path=self.tmp_path, filter=self.filter, num_retries=num_retries, retry_delay=retry_delay) info.setUp() self.rpmlist = info.rpms self.drpmlist = info.drpms def setupDistroInfo(self): info = DistroInfo(repo_url=self.repo_url, repo_dir=self.repo_dir, distropath=self.distropath) distro_items = info.prepareTrees(self.repoFetch) self.distro_items = {} if distro_items: self.distro_items = distro_items def addItems(self, items): self.fetchPkgs.addItemList(items) def download(self): """ Synchronous call, initiates download and waits for all items to finish before returning @return: A SyncReport @rtype: grinder.ParallelFetch.SyncReport """ try: startTime = time.time() self.fetchPkgs.start() self.fetchPkgs.processCallback(ProgressReport.DownloadItems) report = self.fetchPkgs.waitForFinish() self.finalizeMetadata() if 'rpm' not in self.skip: if self.purge_orphaned: # Includes logic of: # 1) removing previously existing packages that have been # removed from repository metadata # 2) removing old packages that are part of repository metadata, # but we want removed because of remove_old/numOldPackages option LOG.info("Cleaning any orphaned packages..") self.fetchPkgs.processCallback(ProgressReport.PurgeOrphanedPackages) self.purgeOrphanPackages() if self.remove_old: # Need to re-test remove_old is functioning # We added the ability to limit the old packages from being downloaded # I think we need to address the case of existing packages from a prior sync self.fetchPkgs.processCallback(ProgressReport.RemoveOldPackages) endTime = time.time() LOG.info("Processed <%s>,<%s> with <%s> items in [%d] seconds. Report: %s" % (self.repo_label, self.repo_url, len(self.downloadinfo),\ (endTime - startTime), report)) return report finally: if self.fetchPkgs: self.fetchPkgs.stop() self.fetchPkgs = None def fetchYumRepo(self, basepath="./", callback=None, verify_options=None): LOG.info("fetchYumRepo() repo_label = %s, repo_url = %s, basepath = %s, verify_options = %s" % \ (self.repo_label, self.repo_url, basepath, verify_options)) self.setup(basepath, callback, verify_options) if 'distribution' not in self.skip: self.setupDistroInfo() if self.distro_items: self.addItems(self.distro_items['files']) else: LOG.debug("skipping distributions from sync") self.addItems(self.rpmlist) self.addItems(self.drpmlist) return self.download() def stop(self, block=True): LOG.info("Stopping") self.stopped = True if self.fetchPkgs: self.fetchPkgs.stop() if block: LOG.info("Block is <%s> so waiting" % (block)) self.fetchPkgs._waitForThreads() def purgeOrphanPackages(self): """ While re-sync purge any orphaned packages in the Repo that we did not intend to sync. Includes removal of packages no longer in primary.xml as well as older packages filtered out from remove_old/numOldPackages logic """ dpkgs = [] if self.rpmlist: for pkg in self.rpmlist: dpkgs.append(os.path.join(self.repo_dir, os.path.dirname(pkg['relativepath']), pkg['fileName'])) if os.path.exists(self.repo_dir): for root, dirs, files in os.walk(self.repo_dir): for f in files: tmp_path = os.path.join(root, f) if tmp_path.endswith('.rpm') and tmp_path not in dpkgs: LOG.info("Removing orphan package: %s" % (tmp_path)) os.remove(tmp_path) def finalizeMetadata(self): local_repo_path = "%s/%s" % (self.repo_dir, "repodata") local_new_path = "%s/%s" % (self.repo_dir, "repodata.new") if not os.path.exists(local_new_path): LOG.info("No new metadata to finalize.") return try: LOG.info("Finalizing metadata, moving %s to %s" % (local_new_path, local_repo_path)) if os.path.exists(local_repo_path): # remove existing metadata before copying shutil.rmtree(local_repo_path) shutil.copytree(local_new_path, local_repo_path) shutil.rmtree(local_new_path) except Exception, e: LOG.error("An error occurred while finalizing metadata:\n%s" % str(e))
class FileGrinder(object): """ Driver module to initiate the file fetching """ def __init__(self, repo_label, url, parallel=50, cacert=None, clicert=None, clikey=None, \ proxy_url=None, proxy_port=None, proxy_user=None, \ proxy_pass=None, sslverify=1, files_location=None, max_speed=None ): self.repo_label = repo_label self.repo_url = url self.numThreads = int(parallel) self.downloadinfo = [] self.sslcacert = cacert self.sslclientcert = clicert self.sslclientkey = clikey self.proxy_url = proxy_url self.proxy_port = proxy_port self.proxy_user = proxy_user self.proxy_pass = proxy_pass # set this if you want all packages to be stored in a central location self.filepath = files_location self.sslverify = sslverify self.max_speed = max_speed self.fileFetch = None def prepareFiles(self): file_manifest = "PULP_MANIFEST" file_url = self.fileFetch.url + '/' + file_manifest file_name = file_manifest file_path = self.fileFetch.repo_dir info = { 'downloadurl' : file_url, 'fileName' : file_name, 'savepath' : file_path, 'checksumtype' : None, 'checksum' : None, 'size' : None, 'pkgpath' : None, } self.fileFetch.fetchItem(info) file_info = {} file_manifest_path = os.path.join(file_path, file_manifest) if os.path.exists(file_manifest_path): file_info = parseManifest(file_manifest_path) else: LOG.info("File Metadata Not Found at url %s" % self.repo_url) for fileinfo in file_info: info = {} info['downloadurl'] = self.repo_url + '/' + fileinfo['filename'] info['fileName'] = os.path.basename(fileinfo['filename']) info['savepath'] = file_path #+ '/' + os.path.dirname(info['filename']) info['checksumtype'] = 'sha256' info['checksum'] = fileinfo['checksum'] info['size'] = int(fileinfo['size']) if self.filepath: info['pkgpath'] = "%s/%s/%s/%s/" % (self.filepath, os.path.basename(fileinfo['filename'])[:3], \ os.path.basename(fileinfo['filename']), fileinfo['checksum']) else: info['pkgpath'] = None info['item_type'] = BaseFetch.FILE self.downloadinfo.append(info) LOG.info("%s files have been marked to be fetched" % len(file_info)) def fetch(self,basepath="./", callback=None): LOG.info("fetch basepath = %s" % (basepath)) startTime = time.time() self.fileFetch = FileFetch(self.repo_label, self.repo_url, cacert=self.sslcacert, \ clicert=self.sslclientcert, clikey=self.sslclientkey, \ download_dir=basepath, proxy_url=self.proxy_url, \ proxy_port=self.proxy_port, proxy_user=self.proxy_user, \ proxy_pass=self.proxy_pass, sslverify=self.sslverify, max_speed=self.max_speed) self.parallel_fetch_files = ParallelFetch(self.fileFetch, self.numThreads, callback=callback) LOG.info("Determining downloadable Content bits...") self.parallel_fetch_files.processCallback(ProgressReport.DownloadMetadata) self.prepareFiles() # prepare for download self.parallel_fetch_files.addItemList(self.downloadinfo) self.parallel_fetch_files.start() report = self.parallel_fetch_files.waitForFinish() endTime = time.time() LOG.info("Processed <%s> items in [%d] seconds" % (len(self.downloadinfo), \ (endTime - startTime))) return report def stop(self, block=True): if self.parallel_fetch_files: self.parallel_fetch_files.stop() if block: self.parallel_fetch_files._waitForThreads()
class FileGrinder(object): """ Driver module to initiate the file fetching """ def __init__(self, repo_label, url, parallel=50, cacert=None, clicert=None, clikey=None, \ proxy_url=None, proxy_port=None, proxy_user=None, \ proxy_pass=None, sslverify=1, files_location=None, max_speed=None ): self.repo_label = repo_label self.repo_url = url self.numThreads = int(parallel) self.downloadinfo = [] self.sslcacert = cacert self.sslclientcert = clicert self.sslclientkey = clikey self.proxy_url = proxy_url self.proxy_port = proxy_port self.proxy_user = proxy_user self.proxy_pass = proxy_pass # set this if you want all packages to be stored in a central location self.filepath = files_location self.sslverify = sslverify self.max_speed = max_speed self.fileFetch = None def prepareFiles(self): file_manifest = "PULP_MANIFEST" file_url = self.fileFetch.url + '/' + file_manifest file_name = file_manifest file_path = self.fileFetch.repo_dir info = { 'downloadurl': file_url, 'fileName': file_name, 'savepath': file_path, 'checksumtype': None, 'checksum': None, 'size': None, 'pkgpath': None, } self.fileFetch.fetchItem(info) file_info = {} file_manifest_path = os.path.join(file_path, file_manifest) if os.path.exists(file_manifest_path): file_info = parseManifest(file_manifest_path) else: LOG.info("File Metadata Not Found at url %s" % self.repo_url) for fileinfo in file_info: info = {} info['downloadurl'] = self.repo_url + '/' + fileinfo['filename'] info['fileName'] = os.path.basename(fileinfo['filename']) info[ 'savepath'] = file_path #+ '/' + os.path.dirname(info['filename']) info['checksumtype'] = 'sha256' info['checksum'] = fileinfo['checksum'] info['size'] = int(fileinfo['size']) if self.filepath: info['pkgpath'] = "%s/%s/%s/%s/" % (self.filepath, os.path.basename(fileinfo['filename'])[:3], \ os.path.basename(fileinfo['filename']), fileinfo['checksum']) else: info['pkgpath'] = None info['item_type'] = BaseFetch.FILE self.downloadinfo.append(info) LOG.info("%s files have been marked to be fetched" % len(file_info)) def fetch(self, basepath="./", callback=None): LOG.info("fetch basepath = %s" % (basepath)) startTime = time.time() self.fileFetch = FileFetch(self.repo_label, self.repo_url, cacert=self.sslcacert, \ clicert=self.sslclientcert, clikey=self.sslclientkey, \ download_dir=basepath, proxy_url=self.proxy_url, \ proxy_port=self.proxy_port, proxy_user=self.proxy_user, \ proxy_pass=self.proxy_pass, sslverify=self.sslverify, max_speed=self.max_speed) self.parallel_fetch_files = ParallelFetch(self.fileFetch, self.numThreads, callback=callback) LOG.info("Determining downloadable Content bits...") self.parallel_fetch_files.processCallback( ProgressReport.DownloadMetadata) self.prepareFiles() # prepare for download self.parallel_fetch_files.addItemList(self.downloadinfo) self.parallel_fetch_files.start() report = self.parallel_fetch_files.waitForFinish() endTime = time.time() LOG.info("Processed <%s> items in [%d] seconds" % (len(self.downloadinfo), \ (endTime - startTime))) return report def stop(self, block=True): if self.parallel_fetch_files: self.parallel_fetch_files.stop() if block: self.parallel_fetch_files._waitForThreads()
class Grinder: def __init__(self, url, username, password, cert, systemid, parallel, verbose): self.baseURL = url self.cert = open(cert, 'r').read() self.systemid = open(systemid, 'r').read() self.username = username self.password = password self.parallel = parallel self.fetchAll = False #default is only fetch latest packages self.parallelFetchPkgs = None self.parallelFetchKickstarts = None self.skipProductList = [] self.skipPackageList = [] self.verbose = verbose self.killcount = 0 self.removeOldPackages = False self.numOldPkgsKeep = 1 self.rhnComm = RHNComm(url, self.systemid) def setRemoveOldPackages(self, value): self.removeOldPackages = value def getRemoveOldPackages(self): return self.removeOldPackages def getFetchAllPackages(self): return self.fetchAll def setFetchAllPackages(self, val): self.fetchAll = val def getSkipProductList(self): return self.skipProductList def setSkipProductList(self, skipProductList): self.skipProductList = skipProductList def getSkipPackageList(self): return self.skipPackageList def setSkipPackageList(self, skipPackageList): self.skipPackageList = skipPackageList def getNumOldPackagesToKeep(self): return self.numOldPkgsKeep def setNumOldPackagesToKeep(self, num): self.numOldPkgsKeep = num def deactivate(self): SATELLITE_URL = "%s/rpc/api" % (self.baseURL) client = getRhnApi(SATELLITE_URL, verbose=0) key = client.auth.login(self.username, self.password) retval = client.satellite.deactivateSatellite(self.systemid) print "retval from deactivation: %s" % retval client.auth.logout(key) print "Deactivated!" def activate(self): rhn = RHNTransport() satClient = getRhnApi(self.baseURL + "/SAT", verbose=self.verbose, transport=rhn) # First check if we are active active = False retval = satClient.authentication.check(self.systemid) LOG.debug("AUTH CHECK: %s " % str(retval)) if (retval == 1): LOG.debug("We are activated ... continue!") active = True else: LOG.debug("Not active") if (not active): if(not self.username or not self.password): raise SystemNotActivatedException() SATELLITE_URL = "%s/rpc/api" % (self.baseURL) client = RhnApi(SATELLITE_URL, verbose=0) key = client.auth.login(self.username, self.password) retval = client.satellite.activateSatellite(self.systemid, self.cert) LOG.debug("retval from activation: %s" % retval) if (retval != 1): raise CantActivateException() client.auth.logout(key) LOG.debug("Activated!") def stop(self): if (self.parallelFetchPkgs): self.parallelFetchPkgs.stop() if (self.parallelFetchKickstarts): self.parallelFetchKickstarts.stop() def checkChannels(self, channelsToSync): """ Input: channelsToSync - list of channels to sync Output: list containing bad channel names """ satDump = SatDumpClient(self.baseURL) channelFamilies = satDump.getChannelFamilies(self.systemid) badChannel = [] for channelLabel in channelsToSync: found = False for d in channelFamilies.values(): if channelLabel in d["channel_labels"]: LOG.debug("Found %s under %s" % (channelLabel, d["label"])) found = True break if not found: LOG.debug("Unable to find %s, adding it to badChannel list" % (channelLabel)) badChannel.append(channelLabel) return badChannel def getChannelLabels(self): labels = {} satDump = SatDumpClient(self.baseURL) channelFamilies = satDump.getChannelFamilies(self.systemid) for d in channelFamilies.values(): if (d["label"] in self.skipProductList): continue labels[d["label"]] = d["channel_labels"] return labels def displayListOfChannels(self): labels = self.getChannelLabels() print("List of channels:") for lbl in labels: print("\nProduct : %s\n" % (lbl)) for l in labels[lbl]: print(" %s" % (l)) def syncKickstarts(self, channelLabel, savePath, verbose=0): """ channelLabel - channel to sync kickstarts from savePath - path to save kickstarts verbose - if true display more output """ startTime = time.time() satDump = SatDumpClient(self.baseURL, verbose=verbose) ksLabels = satDump.getKickstartLabels(self.systemid, [channelLabel]) LOG.info("Found %s kickstart labels for channel %s" % (len(ksLabels[channelLabel]), channelLabel)) ksFiles = [] for ksLbl in ksLabels[channelLabel]: LOG.info("Syncing kickstart label: %s" % (ksLbl)) metadata = satDump.getKickstartTreeMetadata(self.systemid, [ksLbl]) LOG.info("Retrieved metadata on %s files for kickstart label: %s" % (len(metadata[ksLbl]["files"]), ksLbl)) ksSavePath = os.path.join(savePath, ksLbl) for ksFile in metadata[ksLbl]["files"]: info = {} info["relative-path"] = ksFile["relative-path"] info["size"] = ksFile["file-size"] info["md5sum"] = ksFile["md5sum"] info["ksLabel"] = ksLbl info["channelLabel"] = channelLabel info["savePath"] = ksSavePath ksFiles.append(info) ksFetch = KickstartFetch(self.systemid, self.baseURL) numThreads = int(self.parallel) self.parallelFetchKickstarts = ParallelFetch(ksFetch, numThreads) self.parallelFetchKickstarts.addItemList(ksFiles) self.parallelFetchKickstarts.start() report = self.parallelFetchKickstarts.waitForFinish() endTime = time.time() LOG.info("Processed %s %s %s kickstart files, %s errors, completed in %s seconds" \ % (channelLabel, ksLabels[channelLabel], report.successes, report.errors, (endTime-startTime))) return report def syncPackages(self, channelLabel, savePath, verbose=0): """ channelLabel - channel to sync packages from savePath - path to save packages verbose - if true display more output """ startTime = time.time() if channelLabel == "": LOG.critical("No channel label specified to sync, abort sync.") raise NoChannelLabelException() LOG.info("sync(%s, %s) invoked" % (channelLabel, verbose)) satDump = SatDumpClient(self.baseURL, verbose=verbose) LOG.debug("*** calling product_names ***") packages = satDump.getChannelPackages(self.systemid, channelLabel) LOG.info("%s packages are available, getting list of short metadata now." % (len(packages))) pkgInfo = satDump.getShortPackageInfo(self.systemid, packages, filterLatest = not self.fetchAll) LOG.info("%s packages have been marked to be fetched" % (len(pkgInfo.values()))) numThreads = int(self.parallel) LOG.info("Running in parallel fetch mode with %s threads" % (numThreads)) pkgFetch = PackageFetch(self.systemid, self.baseURL, channelLabel, savePath) self.parallelFetchPkgs = ParallelFetch(pkgFetch, numThreads) self.parallelFetchPkgs.addItemList(pkgInfo.values()) self.parallelFetchPkgs.start() report = self.parallelFetchPkgs.waitForFinish() LOG.debug("Attempting to fetch comps.xml info from RHN") self.fetchCompsXML(savePath, channelLabel) self.fetchUpdateinfo(savePath, channelLabel) endTime = time.time() LOG.info("Processed <%s> %s packages, %s errors, completed in %s seconds" \ % (channelLabel, report.successes, report.errors, (endTime-startTime))) if self.removeOldPackages: LOG.info("Remove old packages from %s" % (savePath)) self.runRemoveOldPackages(savePath) return report def fetchCompsXML(self, savePath, channelLabel): ### # Fetch comps.xml, used by createrepo for "groups" info ### compsxml = "" try: compsxml = self.rhnComm.getRepodata(channelLabel, "comps.xml") except GetRequestException, ge: if (ge.code == 404): LOG.info("Channel has no compsXml") else: raise ge if not savePath: savePath = channelLabel f = open(os.path.join(savePath, "comps.xml"), "w") f.write(compsxml) f.close()