Example #1
0
 def CollectStatsFromFiles(self,
                           file_list,
                           catalog_file,
                           force_unpack=False):
     """Returns: A list of md5 sums of collected statistics."""
     args_display = file_list
     if len(args_display) > 5:
         args_display = args_display[:5] + ["...more..."]
     self.logger.debug("Processing: %s, please be patient", args_display)
     stats_list = StatsListFromCatalog(file_list, catalog_file, self.debug)
     data_list = []
     # Reversing the item order in the list, so that the pop() method can be used
     # to get packages, and the order of processing still matches the one in the
     # catalog file.
     stats_list.reverse()
     total_packages = len(stats_list)
     if not total_packages:
         raise PackageError("The length of package list is zero.")
     counter = itertools.count(1)
     self.logger.info("Juicing the svr4 package stream files...")
     if not self.debug:
         pbar = progressbar.ProgressBar()
         pbar.maxval = total_packages
         pbar.start()
     else:
         pbar = mute_progressbar.MuteProgressBar()
     while stats_list:
         # This way objects will get garbage collected as soon as they are removed
         # from the list by pop().  The destructor (__del__()) of the srv4 class
         # removes the temporary directory from the disk.  This allows to process
         # the whole catalog.
         stats = stats_list.pop()
         stats.CollectStats(force=force_unpack)
         data_list.append(stats.GetAllStats())
         pbar.update(counter.next())
     pbar.finish()
     return data_list
Example #2
0
 def _ImportFiles(self, data, include_prefixes=None, show_progress=False):
   logging.debug("_ImportFiles()")
   osrel = data["osrel"]
   arch = data["arch"]
   contents = data["contents"]
   catalog = checkpkg_lib.Catalog()
   srv4_files_to_catalog = set()
   # The progressbar library doesn't like handling larger numbers
   # It displays up to 99% if we feed it a maxval in the range of hundreds of
   # thousands.
   progressbar_divisor = int(len(contents) / 1000)
   if progressbar_divisor < 1:
     progressbar_divisor = 1
   update_period = 1L
   count = itertools.count()
   if show_progress:
     pbar = progressbar.ProgressBar()
   else:
     pbar = mute_progressbar.MuteProgressBar()
   pbar.maxval = len(contents) / progressbar_divisor
   pbar.start()
   cleaned_pkgs = set()
   for d in contents:
     i = count.next()
     if not i % update_period and (i / progressbar_divisor) <= pbar.maxval:
       pbar.update(i / progressbar_divisor)
     for pkgname in d["pkgnames"]:
       pkgname = self.SanitizeInstallContentsPkgname(pkgname)
       # If a package is a packge of our own,
       # it should not be imported that way; own packages should be
       # only managed by adding them to specific catalogs.
       skip_pkgname = False
       for prefix in common_constants.OWN_PKGNAME_PREFIXES:
         if pkgname.startswith(prefix):
           skip_pkgname = True
           break
       # Prefix whilelist - whitelisted prefixes win.
       if include_prefixes:
         for prefix_to_include in include_prefixes:
           if pkgname.startswith(prefix_to_include):
             skip_pkgname = False
             break
       if skip_pkgname:
         continue
       # We'll create one file instance for each package
       try:
         sqo_srv4 = self._GetFakeSrv4(pkgname, osrel, arch)
       except sqlobject.main.SQLObjectNotFound, e:
         print d
         raise
       if sqo_srv4 not in cleaned_pkgs:
         sqo_srv4.RemoveAllCswFiles()
         cleaned_pkgs.add(sqo_srv4)
       sqo_pkginst = self._GetPkginst(pkgname)
       f_path, f_basename = os.path.split(d["path"])
       # This is really slow (one run ~1h), but works.
       # To speed it up, raw SQL + cursor.executemany() could be used, but
       # there's a incompatibility between MySQL and sqlite drivers:
       # MySQL:  INSERT ... VALUES (%s, %s, %s);
       # sqlite: INSERT ... VALUES (?, ?, ?);
       # For now, using the sqlobject ORM which is slow, but at least
       # handles compatibility issues.
       csw_file = m.CswFile(pkginst=sqo_pkginst,
           line=d["line"], path=f_path, basename=f_basename,
           srv4_file=sqo_srv4)
       srv4_files_to_catalog.add(sqo_srv4)
Example #3
0
 def CollectStatsFromCatalogEntries(self, catalog_entries, force_unpack=False):
   """Returns: A list of md5 sums of collected statistics."""
   args_display = [x['file_basename'] for x in catalog_entries]
   if len(args_display) > 5:
     args_display = args_display[:5] + ["...more..."]
   self.logger.debug("Processing: %s, please be patient", args_display)
   md5_sum_list = []
   # Reversing the item order in the list, so that the pop() method can be used
   # to get packages, and the order of processing still matches the one in the
   # catalog file.
   total_packages = len(catalog_entries)
   if not total_packages:
     raise PackageError("The length of package list is zero.")
   counter = itertools.count(1)
   self.logger.info("Juicing the svr4 package stream files...")
   if self.debug:
     pbar = mute_progressbar.MuteProgressBar()
   else:
     pbar = progressbar.ProgressBar(widgets=[
       progressbar.widgets.Percentage(),
       ' ',
       progressbar.widgets.ETA(),
       ' ',
       progressbar.widgets.Bar()
     ])
     pbar.maxval = total_packages
     pbar.start()
   base_dir, _ = os.path.split(__file__)
   collect_pkg_metadata = os.path.join(base_dir, "collect_pkg_metadata.py")
   for catalog_entry in catalog_entries:
     pkg_file_name = catalog_entry['pkg_path']
     args = [collect_pkg_metadata]
     stderr_file = subprocess.PIPE
     if self.debug:
       args.append('--debug')
       stderr_file = None
     if force_unpack:
       args += ['--force-unpack']
     args += ['--input', pkg_file_name]
     ret_code, stdout, stderr = shell.ShellCommand(args, allow_error=False,
                                                   stderr=stderr_file)
     try:
       data_back = cjson.decode(stdout)
       if data_back['md5_sum'] != catalog_entry['md5sum']:
         msg = ('Unexpected file content: on disk (or in catalog) the file '
                '%r (%r) has MD5 sum %r but it turned out to be %r as '
                'seen by collect_pkg_metadata.py. '
                'We cannot continue, because we have no '
                'access to the data we are asked to examine. '
                'This can happen when you run mgar on intel and sparc in '
                'parallel, and you have some arch=all packages in the '
                'package set. This error will not happen if you run '
                'mgar platforms.'
                % (catalog_entry['file_basename'],
                   catalog_entry['pkg_path'],
                   catalog_entry['md5sum'],
                   data_back['md5_sum']))
         raise PackageError(msg)
       md5_sum_list.append(data_back['md5_sum'])
     except cjson.DecodeError:
       logging.fatal('Could not deserialize %r', stdout)
       raise
     pbar.update(counter.next())
   pbar.finish()
   return md5_sum_list
Example #4
0
 def GetProgressBar(self):
     if self.show_progress and not self.debug:
         return progressbar.ProgressBar()
     else:
         return mute_progressbar.MuteProgressBar()
Example #5
0
 def _GetPbar(self, show_progress):
     if show_progress:
         pbar = progressbar.ProgressBar()
     else:
         pbar = mute_progressbar.MuteProgressBar()
     return pbar