def sync(self, update_repodata=True): """Trigger a reposync""" failed_packages = 0 sync_error = 0 if not self.urls: sync_error = -1 start_time = datetime.now() for (repo_id, url, repo_label) in self.urls: log(0, "Repo URL: %s" % url) plugin = None # If the repository uses a uln:// URL, switch to the ULN plugin, overriding the command-line if url.startswith("uln://"): self.repo_plugin = self.load_plugin("uln") # pylint: disable=W0703 try: if repo_label: repo_name = repo_label else: # use modified relative_url as name of repo plugin, because # it used as name of cache directory as well relative_url = '_'.join(url.split('://')[1].split('/')[1:]) repo_name = relative_url.replace("?", "_").replace( "&", "_").replace("=", "_") plugin = self.repo_plugin(url, repo_name, org=str(self.org_id or ''), channel_label=self.channel_label) if update_repodata: plugin.clear_cache() if repo_id is not None: keys = rhnSQL.fetchall_dict(""" select k1.key as ca_cert, k2.key as client_cert, k3.key as client_key from rhncontentsource cs inner join rhncontentsourcessl csssl on cs.id = csssl.content_source_id inner join rhncryptokey k1 on csssl.ssl_ca_cert_id = k1.id left outer join rhncryptokey k2 on csssl.ssl_client_cert_id = k2.id left outer join rhncryptokey k3 on csssl.ssl_client_key_id = k3.id where cs.id = :repo_id """, repo_id=int(repo_id)) if keys: ssl_set = get_single_ssl_set( keys, check_dates=self.check_ssl_dates) if ssl_set: plugin.set_ssl_options(ssl_set['ca_cert'], ssl_set['client_cert'], ssl_set['client_key']) else: raise ValueError( "No valid SSL certificates were found for repository." ) if not self.no_packages: ret = self.import_packages(plugin, repo_id, url) failed_packages += ret self.import_groups(plugin, url) if not self.no_errata: self.import_updates(plugin, url) # only for repos obtained from the DB if self.sync_kickstart and repo_label: try: self.import_kickstart(plugin, repo_label) except: rhnSQL.rollback() raise except Exception: e = sys.exc_info()[1] log2(0, 0, "ERROR: %s" % e, stream=sys.stderr) log2disk(0, "ERROR: %s" % e) # pylint: disable=W0104 sync_error = -1 if plugin is not None: plugin.clear_ssl_cache() # Update cache with package checksums rhnCache.set(checksum_cache_filename, self.checksum_cache) if self.regen: taskomatic.add_to_repodata_queue_for_channel_package_subscription( [self.channel_label], [], "server.app.yumreposync") taskomatic.add_to_erratacache_queue(self.channel_label) self.update_date() rhnSQL.commit() # update permissions fileutils.createPath(os.path.join( CFG.MOUNT_POINT, 'rhn')) # if the directory exists update ownership only for root, dirs, files in os.walk(os.path.join(CFG.MOUNT_POINT, 'rhn')): for d in dirs: fileutils.setPermsPath(os.path.join(root, d), group='apache') for f in files: fileutils.setPermsPath(os.path.join(root, f), group='apache') elapsed_time = datetime.now() - start_time log( 0, "Sync of channel completed in %s." % str(elapsed_time).split('.')[0]) # if there is no global problems, but some packages weren't synced if sync_error == 0 and failed_packages > 0: sync_error = failed_packages return elapsed_time, sync_error
def import_packages(self, plug, source_id, url): failed_packages = 0 if (not self.filters) and source_id: h = rhnSQL.prepare(""" select flag, filter from rhnContentSourceFilter where source_id = :source_id order by sort_order """) h.execute(source_id=source_id) filter_data = h.fetchall_dict() or [] filters = [(row['flag'], re.split(r'[,\s]+', row['filter'])) for row in filter_data] else: filters = self.filters packages = plug.list_packages(filters, self.latest) self.all_packages.extend(packages) to_process = [] num_passed = len(packages) log(0, "Packages in repo: %5d" % plug.num_packages) if plug.num_excluded: log(0, "Packages passed filter rules: %5d" % num_passed) channel_id = int(self.channel['id']) for pack in packages: db_pack = rhnPackage.get_info_for_package( [pack.name, pack.version, pack.release, pack.epoch, pack.arch], channel_id, self.org_id) to_download = True to_link = True # Package exists in DB if db_pack: # Path in filesystem is defined if db_pack['path']: pack.path = os.path.join(CFG.MOUNT_POINT, db_pack['path']) else: pack.path = "" if self.metadata_only or self.match_package_checksum( db_pack['path'], pack.path, pack.checksum_type, pack.checksum): # package is already on disk or not required to_download = False if db_pack['channel_id'] == channel_id: # package is already in the channel to_link = False # just pass data from DB, they will be used in strict channel # linking if there is no new RPM downloaded pack.checksum = db_pack['checksum'] pack.checksum_type = db_pack['checksum_type'] pack.epoch = db_pack['epoch'] elif db_pack['channel_id'] == channel_id: # different package with SAME NVREA self.disassociate_package(db_pack) if to_download or to_link: to_process.append((pack, to_download, to_link)) num_to_process = len(to_process) if num_to_process == 0: log(0, "No new packages to sync.") # If we are just appending, we can exit if not self.strict: return failed_packages else: log( 0, "Packages already synced: %5d" % (num_passed - num_to_process)) log(0, "Packages to sync: %5d" % num_to_process) is_non_local_repo = (url.find("file:/") < 0) downloader = ThreadedDownloader() to_download_count = 0 for what in to_process: pack, to_download, to_link = what if to_download: target_file = os.path.join( plug.repo.pkgdir, os.path.basename(pack.unique_id.relativepath)) pack.path = target_file params = {} if self.metadata_only: bytes_range = (0, pack.unique_id.hdrend) checksum_type = None checksum = None else: bytes_range = None checksum_type = pack.checksum_type checksum = pack.checksum plug.set_download_parameters(params, pack.unique_id.relativepath, target_file, checksum_type=checksum_type, checksum_value=checksum, bytes_range=bytes_range) downloader.add(params) to_download_count += 1 if num_to_process != 0: log(0, "New packages to download: %5d" % to_download_count) logger = TextLogger(None, to_download_count) downloader.set_log_obj(logger) downloader.run() log2disk(0, "Importing packages started.") progress_bar = ProgressBarLogger("Importing packages: ", to_download_count) for (index, what) in enumerate(to_process): pack, to_download, to_link = what if not to_download: continue localpath = pack.path # pylint: disable=W0703 try: if os.path.exists(localpath): pack.load_checksum_from_header() rel_package_path = pack.upload_package( self.org_id, metadata_only=self.metadata_only) # Save uploaded package to cache with repository checksum type if rel_package_path: self.checksum_cache[rel_package_path] = { pack.checksum_type: pack.checksum } # we do not want to keep a whole 'a_pkg' object for every package in memory, # because we need only checksum. see BZ 1397417 pack.checksum = pack.a_pkg.checksum pack.checksum_type = pack.a_pkg.checksum_type pack.epoch = pack.a_pkg.header['epoch'] pack.a_pkg = None else: raise Exception progress_bar.log(True, None) except KeyboardInterrupt: raise except Exception: failed_packages += 1 e = str(sys.exc_info()[1]) if e: log2(0, 1, e, stream=sys.stderr) if self.fail: raise to_process[index] = (pack, False, False) self.all_packages.remove(pack) progress_bar.log(False, None) finally: if is_non_local_repo and localpath and os.path.exists( localpath): os.remove(localpath) log2disk(0, "Importing packages finished.") if self.strict: # Need to make sure all packages from all repositories are associated with channel import_batch = [ self.associate_package(pack) for pack in self.all_packages ] else: # Only packages from current repository are appended to channel import_batch = [ self.associate_package(pack) for (pack, to_download, to_link) in to_process if to_link ] # Do not re-link if nothing was marked to link if any([to_link for (pack, to_download, to_link) in to_process]): log(0, "Linking packages to channel.") backend = SQLBackend() caller = "server.app.yumreposync" importer = ChannelPackageSubscription(import_batch, backend, caller=caller, repogen=False, strict=self.strict) importer.run() backend.commit() self.regen = True return failed_packages
def __init__(self, channel_label, repo_type, url=None, fail=False, filters=None, no_errata=False, sync_kickstart=False, latest=False, metadata_only=False, strict=0, excluded_urls=None, no_packages=False, log_dir="reposync", log_level=None, force_kickstart=False, force_all_errata=False, check_ssl_dates=False, force_null_org_content=False): self.regen = False self.fail = fail self.filters = filters or [] self.no_packages = no_packages self.no_errata = no_errata self.sync_kickstart = sync_kickstart self.force_all_errata = force_all_errata self.force_kickstart = force_kickstart self.latest = latest self.metadata_only = metadata_only self.ks_tree_type = 'externally-managed' self.ks_install_type = None initCFG('server.satellite') rhnSQL.initDB() # setup logging log_filename = channel_label + '.log' log_path = default_log_location + log_dir + '/' + log_filename if log_level is None: log_level = 0 CFG.set('DEBUG', log_level) rhnLog.initLOG(log_path, log_level) # os.fchown isn't in 2.4 :/ if isSUSE(): os.system("chgrp www " + log_path) else: os.system("chgrp apache " + log_path) log2disk(0, "Command: %s" % str(sys.argv)) log2disk(0, "Sync of channel started.") self.channel_label = channel_label self.channel = self.load_channel() if not self.channel: log(0, "Channel %s does not exist." % channel_label) if not self.channel['org_id'] or force_null_org_content: self.org_id = None else: self.org_id = int(self.channel['org_id']) if not url: # TODO:need to look at user security across orgs h = rhnSQL.prepare("""select s.id, s.source_url, s.label from rhnContentSource s, rhnChannelContentSource cs where s.id = cs.source_id and cs.channel_id = :channel_id""") h.execute(channel_id=int(self.channel['id'])) source_data = h.fetchall_dict() self.urls = [] if excluded_urls is None: excluded_urls = [] if source_data: for row in source_data: if row['source_url'] not in excluded_urls: self.urls.append( (row['id'], row['source_url'], row['label'])) else: self.urls = [(None, u, None) for u in url] if not self.urls: log2(0, 0, "Channel %s has no URL associated" % channel_label, stream=sys.stderr) self.repo_plugin = self.load_plugin(repo_type) self.strict = strict self.all_packages = [] self.check_ssl_dates = check_ssl_dates # Init cache for computed checksums to not compute it on each reposync run again self.checksum_cache = rhnCache.get(checksum_cache_filename) if self.checksum_cache is None: self.checksum_cache = {}
def import_kickstart(self, plug, repo_label): ks_path = 'rhn/kickstart/' ks_tree_label = re.sub(r'[^-_0-9A-Za-z@.]', '', repo_label.replace(' ', '_')) if len(ks_tree_label) < 4: ks_tree_label += "_repo" # construct ks_path and check we already have this KS tree synced id_request = """ select id from rhnKickstartableTree where channel_id = :channel_id and label = :label """ if self.org_id: ks_path += str(self.org_id) + '/' + ks_tree_label # Trees synced from external repositories are expected to have full path it database db_path = os.path.join(CFG.MOUNT_POINT, ks_path) row = rhnSQL.fetchone_dict(id_request + " and org_id = :org_id", channel_id=self.channel['id'], label=ks_tree_label, org_id=self.org_id) else: ks_path += ks_tree_label db_path = ks_path row = rhnSQL.fetchone_dict(id_request + " and org_id is NULL", channel_id=self.channel['id'], label=ks_tree_label) treeinfo_path = ['treeinfo', '.treeinfo'] treeinfo_parser = None for path in treeinfo_path: log(1, "Trying " + path) treeinfo = plug.get_file( path, os.path.join(plug.repo.basecachedir, plug.name)) if treeinfo: try: treeinfo_parser = TreeInfoParser(treeinfo) break except TreeInfoError: pass if not treeinfo_parser: log(0, "Kickstartable tree not detected (no valid treeinfo file)") return if self.ks_install_type is None: family = treeinfo_parser.get_family() if family == 'Fedora': self.ks_install_type = 'fedora18' elif family == 'CentOS': self.ks_install_type = 'rhel_' + treeinfo_parser.get_major_version( ) else: self.ks_install_type = 'generic_rpm' fileutils.createPath(os.path.join(CFG.MOUNT_POINT, ks_path)) # Make sure images are included to_download = set() for repo_path in treeinfo_parser.get_images(): local_path = os.path.join(CFG.MOUNT_POINT, ks_path, repo_path) # TODO: better check if not os.path.exists(local_path) or self.force_kickstart: to_download.add(repo_path) if row: log( 0, "Kickstartable tree %s already synced. Updating content..." % ks_tree_label) ks_id = row['id'] else: row = rhnSQL.fetchone_dict(""" select sequence_nextval('rhn_kstree_id_seq') as id from dual """) ks_id = row['id'] rhnSQL.execute(""" insert into rhnKickstartableTree (id, org_id, label, base_path, channel_id, kstree_type, install_type, last_modified, created, modified) values (:id, :org_id, :label, :base_path, :channel_id, ( select id from rhnKSTreeType where label = :ks_tree_type), ( select id from rhnKSInstallType where label = :ks_install_type), current_timestamp, current_timestamp, current_timestamp)""", id=ks_id, org_id=self.org_id, label=ks_tree_label, base_path=db_path, channel_id=self.channel['id'], ks_tree_type=self.ks_tree_type, ks_install_type=self.ks_install_type) log( 0, "Added new kickstartable tree %s. Downloading content..." % ks_tree_label) insert_h = rhnSQL.prepare(""" insert into rhnKSTreeFile (kstree_id, relative_filename, checksum_id, file_size, last_modified, created, modified) values (:id, :path, lookup_checksum('sha256', :checksum), :st_size, epoch_seconds_to_timestamp_tz(:st_time), current_timestamp, current_timestamp) """) delete_h = rhnSQL.prepare(""" delete from rhnKSTreeFile where kstree_id = :id and relative_filename = :path """) # Downloading/Updating content of KS Tree # start from root dir is_root = True dirs_queue = [''] log(0, "Gathering all files in kickstart repository...") while len(dirs_queue) > 0: cur_dir_name = dirs_queue.pop(0) cur_dir_html = plug.get_file(cur_dir_name) if cur_dir_html is None: continue blacklist = None if is_root: blacklist = [treeinfo_parser.get_package_dir() + '/'] is_root = False parser = KSDirParser(cur_dir_html, blacklist) for ks_file in parser.get_content(): repo_path = cur_dir_name + ks_file['name'] # if this is a directory, just add a name into queue (like BFS algorithm) if ks_file['type'] == 'DIR': dirs_queue.append(repo_path) continue if not os.path.exists( os.path.join(CFG.MOUNT_POINT, ks_path, repo_path)) or self.force_kickstart: to_download.add(repo_path) if to_download: log(0, "Downloading %d kickstart files." % len(to_download)) progress_bar = ProgressBarLogger("Downloading kickstarts:", len(to_download)) downloader = ThreadedDownloader(force=self.force_kickstart) for item in to_download: params = {} plug.set_download_parameters( params, item, os.path.join(CFG.MOUNT_POINT, ks_path, item)) downloader.add(params) downloader.set_log_obj(progress_bar) downloader.run() log2disk(0, "Download finished.") for item in to_download: st = os.stat(os.path.join(CFG.MOUNT_POINT, ks_path, item)) # update entity about current file in a database delete_h.execute(id=ks_id, path=item) insert_h.execute(id=ks_id, path=item, checksum=getFileChecksum( 'sha256', os.path.join(CFG.MOUNT_POINT, ks_path, item)), st_size=st.st_size, st_time=st.st_mtime) else: log(0, "No new kickstart files to download.") # set permissions recursively rhnSQL.commit()
def __init__(self, channel_label, repo_type, url=None, fail=False, filters=None, no_errata=False, sync_kickstart=False, latest=False, metadata_only=False, strict=0, excluded_urls=None, no_packages=False, log_dir="reposync", log_level=None): self.regen = False self.fail = fail self.filters = filters or [] self.no_packages = no_packages self.no_errata = no_errata self.sync_kickstart = sync_kickstart self.latest = latest self.metadata_only = metadata_only self.ks_tree_type = 'externally-managed' self.ks_install_type = 'generic_rpm' initCFG('server.satellite') rhnSQL.initDB() # setup logging log_filename = channel_label + '.log' log_path = default_log_location + log_dir + '/' + log_filename if log_level is None: log_level = 0 CFG.set('DEBUG', log_level) rhnLog.initLOG(log_path, log_level) # os.fchown isn't in 2.4 :/ if isSUSE(): os.system("chgrp www " + log_path) else: os.system("chgrp apache " + log_path) log2disk(0, "Command: %s" % str(sys.argv)) log2disk(0, "Sync of channel started.") self.channel_label = channel_label self.channel = self.load_channel() if not self.channel: log(0, "Channel %s does not exist." % channel_label) if not url: # TODO:need to look at user security across orgs h = rhnSQL.prepare("""select s.id, s.source_url, s.label, fm.channel_family_id from rhnContentSource s, rhnChannelContentSource cs, rhnChannelFamilyMembers fm where s.id = cs.source_id and cs.channel_id = fm.channel_id and cs.channel_id = :channel_id""") h.execute(channel_id=int(self.channel['id'])) source_data = h.fetchall_dict() self.urls = [] if excluded_urls is None: excluded_urls = [] if source_data: for row in source_data: if row['source_url'] not in excluded_urls: self.urls.append((row['id'], row['source_url'], row['label'], row['channel_family_id'])) else: self.urls = [(None, u, None, None) for u in url] if not self.urls: log2stderr(0, "Channel %s has no URL associated" % channel_label) self.repo_plugin = self.load_plugin(repo_type) self.strict = strict self.all_packages = []
def import_packages(self, plug, source_id, url): ret_code = 0 if (not self.filters) and source_id: h = rhnSQL.prepare(""" select flag, filter from rhnContentSourceFilter where source_id = :source_id order by sort_order """) h.execute(source_id=source_id) filter_data = h.fetchall_dict() or [] filters = [(row['flag'], re.split(r'[,\s]+', row['filter'])) for row in filter_data] else: filters = self.filters packages = plug.list_packages(filters, self.latest) self.all_packages.extend(packages) to_process = [] num_passed = len(packages) log(0, "Packages in repo: %5d" % plug.num_packages) if plug.num_excluded: log(0, "Packages passed filter rules: %5d" % num_passed) channel_id = int(self.channel['id']) for pack in packages: db_pack = rhnPackage.get_info_for_package( [pack.name, pack.version, pack.release, pack.epoch, pack.arch], channel_id, self.channel['org_id']) to_download = True to_link = True # Package exists in DB if db_pack: # Path in filesystem is defined if db_pack['path']: pack.path = os.path.join(CFG.MOUNT_POINT, db_pack['path']) else: pack.path = "" if self.metadata_only or self.match_package_checksum(pack.path, pack.checksum_type, pack.checksum): # package is already on disk or not required to_download = False if db_pack['channel_id'] == channel_id: # package is already in the channel to_link = False elif db_pack['channel_id'] == channel_id: # different package with SAME NVREA self.disassociate_package(db_pack) # just pass data from DB, they will be used if there is no RPM available pack.checksum = db_pack['checksum'] pack.checksum_type = db_pack['checksum_type'] pack.epoch = db_pack['epoch'] if to_download or to_link: to_process.append((pack, to_download, to_link)) num_to_process = len(to_process) if num_to_process == 0: log(0, "No new packages to sync.") # If we are just appending, we can exit if not self.strict: return else: log(0, "Packages already synced: %5d" % (num_passed - num_to_process)) log(0, "Packages to sync: %5d" % num_to_process) self.regen = True is_non_local_repo = (url.find("file:/") < 0) for (index, what) in enumerate(to_process): pack, to_download, to_link = what localpath = None # pylint: disable=W0703 try: log(0, "%d/%d : %s" % (index + 1, num_to_process, pack.getNVREA())) if to_download: pack.path = localpath = plug.get_package(pack, metadata_only=self.metadata_only) pack.load_checksum_from_header() pack.upload_package(self.channel, metadata_only=self.metadata_only) # we do not want to keep a whole 'a_pkg' object for every package in memory, # because we need only checksum. see BZ 1397417 pack.checksum = pack.a_pkg.checksum pack.checksum_type = pack.a_pkg.checksum_type pack.epoch = pack.a_pkg.header['epoch'] pack.a_pkg = None except KeyboardInterrupt: raise except Exception: ret_code = 1 e = sys.exc_info()[1] log2stderr(0, e) log2disk(0, e) if self.fail: raise to_process[index] = (pack, False, False) continue finally: if is_non_local_repo and localpath and os.path.exists(localpath): os.remove(localpath) log(0, "Linking packages to channel.") if self.strict: import_batch = [self.associate_package(pack) for pack in self.all_packages] else: import_batch = [self.associate_package(pack) for (pack, to_download, to_link) in to_process if to_link] backend = SQLBackend() caller = "server.app.yumreposync" importer = ChannelPackageSubscription(import_batch, backend, caller=caller, repogen=False, strict=self.strict) importer.run() backend.commit() return ret_code
def sync(self, update_repodata=False): """Trigger a reposync""" if self.urls: ret_code = 0 else: ret_code = 1 start_time = datetime.now() for (repo_id, url, repo_label) in self.urls: log(0, "Repo URL: %s" % url) plugin = None # If the repository uses a uln:// URL, switch to the ULN plugin, overriding the command-line if url.startswith("uln://"): self.repo_plugin = self.load_plugin("uln") # pylint: disable=W0703 try: if repo_label: repo_name = repo_label else: # use modified relative_url as name of repo plugin, because # it used as name of cache directory as well relative_url = '_'.join(url.split('://')[1].split('/')[1:]) repo_name = relative_url.replace("?", "_").replace("&", "_").replace("=", "_") plugin = self.repo_plugin(url, repo_name, org=str(self.channel['org_id'] or ''), channel_label=self.channel_label) if update_repodata: plugin.clear_cache() if repo_id is not None: keys = rhnSQL.fetchone_dict(""" select k1.key as ca_cert, k2.key as client_cert, k3.key as client_key from rhncontentsource cs join rhncryptokey k1 on cs.ssl_ca_cert_id = k1.id left outer join rhncryptokey k2 on cs.ssl_client_cert_id = k2.id left outer join rhncryptokey k3 on cs.ssl_client_key_id = k3.id where cs.id = :repo_id """, repo_id=int(repo_id)) if keys and ('ca_cert' in keys): plugin.set_ssl_options(keys['ca_cert'], keys['client_cert'], keys['client_key']) if not self.no_packages: ret = self.import_packages(plugin, repo_id, url) # we check previous ret_code value because we don't want # to override it with new successful one if ret_code == 0: ret_code = ret self.import_groups(plugin, url) if not self.no_errata: self.import_updates(plugin, url) # only for repos obtained from the DB if self.sync_kickstart and repo_label: try: self.import_kickstart(plugin, repo_label) except: rhnSQL.rollback() raise except Exception: e = sys.exc_info()[1] log2stderr(0, "ERROR: %s" % e) log2disk(0, "ERROR: %s" % e) if ret_code == 0: ret_code = 1 if plugin is not None: plugin.clear_ssl_cache() if self.regen: taskomatic.add_to_repodata_queue_for_channel_package_subscription( [self.channel_label], [], "server.app.yumreposync") taskomatic.add_to_erratacache_queue(self.channel_label) self.update_date() rhnSQL.commit() elapsed_time = datetime.now() - start_time log(0, "Sync of channel completed in %s." % str(elapsed_time).split('.')[0]) return elapsed_time, ret_code