def setUp(self): self.db_setup() self.tmpdir = tempfile.mkdtemp(suffix='.debsources-test') self.conf = mk_conf(self.tmpdir) dummy_status = updater.UpdateStatus() updater.update_statistics(dummy_status, self.conf, self.session) stats_data = os.path.join(self.conf['cache_dir'], 'stats.data') self.stats = statistics.load_metadata_cache(stats_data)
def setUp(self): self.db_setup() self.tmpdir = Path(tempfile.mkdtemp(suffix=".debsources-test")) self.conf = mk_conf(self.tmpdir) dummy_status = updater.UpdateStatus() updater.update_statistics(dummy_status, self.conf, self.session) stats_data = self.conf["cache_dir"] / "stats.data" self.stats = statistics.load_metadata_cache(stats_data)
def removesStickySuite(self): SARGE_PACKAGES = [('asm', '1.5.2-1'), ('zziplib', '0.12.83-4')] stats_file = os.path.join(self.conf['cache_dir'], 'stats.data') # to test stats.data cleanup self.conf['stages'] = self.TEST_STAGES.union( set([updater.STAGE_STATS])) archiver.add_suite(self.conf, self.session, 'sarge', self.archive) self.assertHasStickySuite('sarge') for pkg in SARGE_PACKAGES: self.assertHasStickyPackage(*pkg) stats = statistics.load_metadata_cache(stats_file) self.assertTrue('debian_sarge.sloccount' in stats) archiver.remove_suite(self.conf, self.session, 'sarge') self.assertLacksStickySuite('sarge') for pkg in SARGE_PACKAGES: self.assertLacksStickyPackage(*pkg) stats = statistics.load_metadata_cache(stats_file) self.assertFalse('debian_sarge.sloccount' in stats)
def removesStickySuite(self): SARGE_PACKAGES = [('asm', '1.5.2-1'), ('zziplib', '0.12.83-4')] stats_file = os.path.join(self.conf['cache_dir'], 'stats.data') # to test stats.data cleanup self.conf['stages'] = self.TEST_STAGES.union( set([updater.STAGE_STATS])) archiver.add_suite(self.conf, self.session, 'sarge', self.archive) self.assertHasStickySuite('sarge') for pkg in SARGE_PACKAGES: self.assertHasStickyPackage(*pkg) stats = statistics.load_metadata_cache(stats_file) self.assertTrue('debian_sarge.sloccount' in stats) archiver.remove_suite(self.conf, self.session, 'sarge') self.assertLacksStickySuite('sarge') for pkg in SARGE_PACKAGES: self.assertLacksStickyPackage(*pkg) stats = statistics.load_metadata_cache(stats_file) self.assertFalse('debian_sarge.sloccount' in stats)
def removesStickySuite(self): SARGE_PACKAGES = [("asm", "1.5.2-1"), ("zziplib", "0.12.83-4")] stats_file = self.conf["cache_dir"] / "stats.data" # to test stats.data cleanup self.conf["stages"] = self.TEST_STAGES.union(set([updater.STAGE_STATS ])) archiver.add_suite(self.conf, self.session, "sarge", self.archive) self.assertHasStickySuite("sarge") for pkg in SARGE_PACKAGES: self.assertHasStickyPackage(*pkg) stats = statistics.load_metadata_cache(stats_file) self.assertTrue("debian_sarge.sloccount" in stats) archiver.remove_suite(self.conf, self.session, "sarge") self.assertLacksStickySuite("sarge") for pkg in SARGE_PACKAGES: self.assertLacksStickyPackage(*pkg) stats = statistics.load_metadata_cache(stats_file) self.assertFalse("debian_sarge.sloccount" in stats)
def _remove_stats_for(conf, session, suite): status = updater.UpdateStatus() if updater.STAGE_STATS in conf['stages']: updater.update_statistics(status, conf, session, [suite]) # remove newly orphan keys from stats.data stats_file = os.path.join(conf['cache_dir'], 'stats.data') stats = statistics.load_metadata_cache(stats_file) for k in stats.keys(): if k.startswith('debian_' + suite + '.'): del (stats[k]) statistics.save_metadata_cache(stats, stats_file) if updater.STAGE_CACHE in conf['stages']: updater.update_metadata(status, conf, session) if updater.STAGE_CHARTS in conf['stages']: updater.update_charts(status, conf, session)
def licenseStatsMatchReferenceDb(self): license_stats_data = self.conf["cache_dir"] / "license_stats.data" license_stats = statistics.load_metadata_cache(license_stats_data) expected_stats = { # just a few samples "experimental.LGPL-2.1+": 749, "experimental.unknown": 4, "jessie.GPL-2": 31, "overall.LGPL-2.1+": 749, "sid.GPL-3+": 533, "sid.GPL-2": 31, "wheezy.GPL-2": 30, "wheezy.GPL-2+": 58, } self.assertDictContainsSubset(expected_stats, license_stats)
def _remove_stats_for(conf, session, suite): status = updater.UpdateStatus() if updater.STAGE_STATS in conf['stages']: updater.update_statistics(status, conf, session, [suite]) # remove newly orphan keys from stats.data stats_file = os.path.join(conf['cache_dir'], 'stats.data') stats = statistics.load_metadata_cache(stats_file) for k in stats.keys(): if k.startswith('debian_' + suite + '.'): del(stats[k]) statistics.save_metadata_cache(stats, stats_file) if updater.STAGE_CACHE in conf['stages']: updater.update_metadata(status, conf, session) if updater.STAGE_CHARTS in conf['stages']: updater.update_charts(status, conf, session)
def licenseStatsMatchReferenceDb(self): license_stats_data = os.path.join(self.conf['cache_dir'], 'license_stats.data') license_stats = statistics.load_metadata_cache(license_stats_data) expected_stats = { # just a few samples 'experimental.LGPL-2.1+': 749, 'experimental.unknown': 4, 'jessie.GPL-2': 31, 'overall.LGPL-2.1+': 749, 'sid.GPL-3+': 533, 'sid.GPL-2': 31, 'wheezy.GPL-2': 30, 'wheezy.GPL-2+': 58, } self.assertDictContainsSubset(expected_stats, license_stats)
def licenseStatsMatchReferenceDb(self): license_stats_data = os.path.join(self.conf['cache_dir'], 'license_stats.data') license_stats = statistics.load_metadata_cache(license_stats_data) expected_stats = { # just a few samples 'experimental.LGPL-2.1+': 749, 'experimental.unknown': 4, 'jessie.GPL-2': 31, 'overall.LGPL-2.1+': 749, 'sid.GPL-3+': 533, 'sid.GPL-2': 31, 'wheezy.GPL-2': 30, 'wheezy.GPL-2+': 58, } self.assertDictContainsSubset(expected_stats, license_stats)
def _remove_stats_for(conf, session, suite): status = updater.UpdateStatus() if updater.STAGE_STATS in conf["stages"]: updater.update_statistics(status, conf, session, [suite]) # remove newly orphan keys from stats.data stats_file = conf["cache_dir"] / "stats.data" stats = statistics.load_metadata_cache(stats_file) stats = { k: v for k, v in stats.items() if not k.startswith("debian_" + suite + ".") } statistics.save_metadata_cache(stats, stats_file) if updater.STAGE_CACHE in conf["stages"]: updater.update_metadata(status, conf, session) if updater.STAGE_CHARTS in conf["stages"]: updater.update_charts(status, conf, session)
def extract_stats(filter_suites=None, filename="cache/stats.data"): """ Extracts information from the collected stats. If filter_suites is None, all the information are extracted. Otherwise suites must be an array of suites names (can contain "total"). e.g. extract_stats(filter_suites=["total", "debian_wheezy"]) """ res = dict() stats = statistics.load_metadata_cache(filename) for (key, value) in six.iteritems(stats): splits = key.split(".") # if this key/value is in the required suites, we add it if filter_suites is None or splits[0] in filter_suites: res[key] = value return res
def update_statistics(status, conf, session, suites=None): """update stage: update statistics by default act on all non-sticky, major suites present in the DB. Pass `suites` to override """ logging.info("update statistics...") ensure_cache_dir(conf) suites = __target_suites(session, suites) now = datetime.utcnow() stats_file = conf["cache_dir"] / "stats.data" if stats_file.exists(): # If stats.data exists, load and update it, otherwise start from # scratch. Note: this means that we need to be careful about changing # stats keys, to avoid orphans. # TODO: add check about orphan stats.data entries to debsources-fsck stats = statistics.load_metadata_cache(stats_file) else: stats = {} def store_sloccount_stats(summary, d, prefix, db_obj): """Update stats dictionary `d`, and DB object `db_obj`, with per language sloccount statistics available in `summary`, generating dictionary keys that start with `prefix`. Missing languages in summary will be stored as 0-value entries. """ total_slocs = 0 for lang in SLOCCOUNT_LANGUAGES: k = prefix + "." + lang v = 0 if lang in summary: v = summary[lang] d[k] = v setattr(db_obj, "lang_" + lang, v) total_slocs += v d[prefix] = total_slocs # compute overall stats suite = "ALL" siz = HistorySize(suite, timestamp=now) loc = HistorySlocCount(suite, timestamp=now) for stat in ["disk_usage", "source_packages", "source_files", "ctags"]: v = getattr(statistics, stat)(session) stats["total." + stat] = v setattr(siz, stat, v) store_sloccount_stats(statistics.sloccount_summary(session), stats, "total.sloccount", loc) if not conf["dry_run"] and "db" in conf["backends"]: session.add(siz) session.add(loc) # Update HistorySize suite_key = "debian_" hist_siz = {suite: HistorySize(suite, timestamp=now) for suite in suites} for stat in ["disk_usage", "source_packages", "source_files", "ctags"]: stats_result = statistics.stats_grouped_by(session, stat) for res in stats_result: if res[0] in suites: stats[suite_key + res[0] + "." + stat] = res[1] setattr(hist_siz[res[0]], stat, res[1]) if not conf["dry_run"] and "db" in conf["backends"]: for siz in hist_siz.values(): session.add(siz) # update historySlocCount sloccount_res = statistics.stats_grouped_by(session, "sloccount") hist_loc = { suite: HistorySlocCount(suite, timestamp=now) for suite in suites } for suite in suites: temp = {item[1]: item[2] for item in sloccount_res if item[0] == suite} store_sloccount_stats(dict(temp), stats, suite_key + suite + ".sloccount", hist_loc[suite]) if not conf["dry_run"] and "db" in conf["backends"]: for loc in hist_loc.values(): session.add(loc) session.flush() # cache computed stats to on-disk stats file if not conf["dry_run"] and "fs" in conf["backends"]: statistics.save_metadata_cache(stats, stats_file) def update_license_statistics(suites): # compute License stats license_stats_file = conf["cache_dir"] / "license_stats.data" dual_license_file = conf["cache_dir"] / "dual_license.data" license_stats = dict() license_d_stats = dict() hist_lic = { suite: HistoryCopyright(suite, timestamp=now) for suite in suites } results = statistics.get_licenses(session) for suite in suites: temp = {item[0]: item[2] for item in results if item[1] == suite} summary = statistics.licenses_summary(temp) for res in summary: license_stats[suite + "." + res.rstrip()] = summary[res] setattr(hist_lic[suite], "license", res.replace("_", " ")) setattr(hist_lic[suite], "files", summary[res]) if not conf["dry_run"] and "db" in conf["backends"]: session.add(hist_lic[suite]) # no historical here, only save to file dual_query = statistics.licenses_summary_w_dual(temp) for res in dual_query: license_d_stats[suite + "." + res.rstrip()] = dual_query[res] # overall dual licenses overall_d_licenses = statistics.licenses_summary_w_dual( statistics.get_licenses(session, "ALL")) for stat in overall_d_licenses: license_d_stats["overall." + stat] = overall_d_licenses[stat] # save dual licenses to file if not conf["dry_run"] and "fs" in conf["backends"]: statistics.save_metadata_cache(license_d_stats, dual_license_file) session.flush() overall_licenses = statistics.licenses_summary( statistics.get_licenses(session, "ALL")) for stat in overall_licenses: lic = HistoryCopyright("ALL", timestamp=now) setattr(lic, "license", stat.replace("_", " ")) setattr(lic, "files", overall_licenses[stat]) license_stats["overall." + stat] = overall_licenses[stat] if not conf["dry_run"] and "db" in conf["backends"]: session.add(lic) session.flush() if not conf["dry_run"] and "fs" in conf["backends"]: statistics.save_metadata_cache(license_stats, license_stats_file) if "copyright" in conf["hooks"]: update_license_statistics(suites)
def update_statistics(status, conf, session, suites=None): """update stage: update statistics by default act on all non-sticky, major suites present in the DB. Pass `suites` to override """ logging.info('update statistics...') ensure_cache_dir(conf) suites = __target_suites(session, suites) now = datetime.utcnow() stats_file = os.path.join(conf['cache_dir'], 'stats.data') if os.path.exists(stats_file): # If stats.data exists, load and update it, otherwise start from # scratch. Note: this means that we need to be careful about changing # stats keys, to avoid orphans. # TODO: add check about orphan stats.data entries to debsources-fsck stats = statistics.load_metadata_cache(stats_file) else: stats = {} def store_sloccount_stats(summary, d, prefix, db_obj): """Update stats dictionary `d`, and DB object `db_obj`, with per language sloccount statistics available in `summary`, generating dictionary keys that start with `prefix`. Missing languages in summary will be stored as 0-value entries. """ total_slocs = 0 for lang in SLOCCOUNT_LANGUAGES: k = prefix + '.' + lang v = 0 if lang in summary: v = summary[lang] d[k] = v setattr(db_obj, 'lang_' + lang, v) total_slocs += v d[prefix] = total_slocs # compute overall stats suite = 'ALL' siz = HistorySize(suite, timestamp=now) loc = HistorySlocCount(suite, timestamp=now) for stat in ['disk_usage', 'source_packages', 'source_files', 'ctags']: v = getattr(statistics, stat)(session) stats['total.' + stat] = v setattr(siz, stat, v) store_sloccount_stats(statistics.sloccount_summary(session), stats, 'total.sloccount', loc) if not conf['dry_run'] and 'db' in conf['backends']: session.add(siz) session.add(loc) # compute per-suite stats for suite in suites: siz = HistorySize(suite, timestamp=now) loc = HistorySlocCount(suite, timestamp=now) suite_key = 'debian_' + suite + '.' for stat in ['disk_usage', 'source_packages', 'source_files', 'ctags']: v = getattr(statistics, stat)(session, suite) stats[suite_key + stat] = v setattr(siz, stat, v) store_sloccount_stats(statistics.sloccount_summary(session, suite), stats, suite_key + 'sloccount', loc) if not conf['dry_run'] and 'db' in conf['backends']: session.add(siz) session.add(loc) session.flush() # cache computed stats to on-disk stats file if not conf['dry_run'] and 'fs' in conf['backends']: statistics.save_metadata_cache(stats, stats_file)
def update_statistics(status, conf, session, suites=None): """update stage: update statistics by default act on all non-sticky, major suites present in the DB. Pass `suites` to override """ logging.info('update statistics...') ensure_cache_dir(conf) suites = __target_suites(session, suites) now = datetime.utcnow() stats_file = os.path.join(conf['cache_dir'], 'stats.data') if os.path.exists(stats_file): # If stats.data exists, load and update it, otherwise start from # scratch. Note: this means that we need to be careful about changing # stats keys, to avoid orphans. # TODO: add check about orphan stats.data entries to debsources-fsck stats = statistics.load_metadata_cache(stats_file) else: stats = {} def store_sloccount_stats(summary, d, prefix, db_obj): """Update stats dictionary `d`, and DB object `db_obj`, with per language sloccount statistics available in `summary`, generating dictionary keys that start with `prefix`. Missing languages in summary will be stored as 0-value entries. """ total_slocs = 0 for lang in SLOCCOUNT_LANGUAGES: k = prefix + '.' + lang v = 0 if lang in summary: v = summary[lang] d[k] = v setattr(db_obj, 'lang_' + lang, v) total_slocs += v d[prefix] = total_slocs # compute overall stats suite = 'ALL' siz = HistorySize(suite, timestamp=now) loc = HistorySlocCount(suite, timestamp=now) for stat in ['disk_usage', 'source_packages', 'source_files', 'ctags']: v = getattr(statistics, stat)(session) stats['total.' + stat] = v setattr(siz, stat, v) store_sloccount_stats(statistics.sloccount_summary(session), stats, 'total.sloccount', loc) if not conf['dry_run'] and 'db' in conf['backends']: session.add(siz) session.add(loc) # Update HistorySize suite_key = 'debian_' hist_siz = dict((suite, HistorySize(suite, timestamp=now)) for suite in suites) for stat in ['disk_usage', 'source_packages', 'source_files', 'ctags']: stats_result = statistics.stats_grouped_by(session, stat) for res in stats_result: if res[0] in suites: stats[suite_key + res[0] + '.' + stat] = res[1] setattr(hist_siz[res[0]], stat, res[1]) if not conf['dry_run'] and 'db' in conf['backends']: for siz in hist_siz.values(): session.add(siz) # update historySlocCount sloccount_res = statistics.stats_grouped_by(session, 'sloccount') hist_loc = dict((suite, HistorySlocCount(suite, timestamp=now)) for suite in suites) for suite in suites: temp = dict((item[1], item[2]) for item in sloccount_res if item[0] == suite) store_sloccount_stats(dict(temp), stats, suite_key + suite + ".sloccount", hist_loc[suite]) if not conf['dry_run'] and 'db' in conf['backends']: for loc in hist_loc.values(): session.add(loc) session.flush() # cache computed stats to on-disk stats file if not conf['dry_run'] and 'fs' in conf['backends']: statistics.save_metadata_cache(stats, stats_file) def update_license_statistics(suites): # compute License stats license_stats_file = os.path.join(conf['cache_dir'], 'license_stats.data') dual_license_file = os.path.join(conf['cache_dir'], 'dual_license.data') license_stats = dict() license_d_stats = dict() hist_lic = dict((suite, HistoryCopyright(suite, timestamp=now)) for suite in suites) results = statistics.get_licenses(session) for suite in suites: temp = dict((item[0], item[2]) for item in results if item[1] == suite) summary = statistics.licenses_summary(temp) for res in summary: license_stats[suite + "." + res.rstrip()] = summary[res] setattr(hist_lic[suite], 'license', res.replace('_', ' ')) setattr(hist_lic[suite], 'files', summary[res]) if not conf['dry_run'] and 'db' in conf['backends']: session.add(hist_lic[suite]) # no historical here, only save to file dual_query = statistics.licenses_summary_w_dual(temp) for res in dual_query: license_d_stats[suite + "." + res.rstrip()] = dual_query[res] # overall dual licenses overall_d_licenses = statistics.licenses_summary_w_dual( statistics.get_licenses(session, 'ALL')) for stat in overall_d_licenses: license_d_stats['overall.' + stat] = overall_d_licenses[stat] # save dual licenses to file if not conf['dry_run'] and 'fs' in conf['backends']: statistics.save_metadata_cache(license_d_stats, dual_license_file) session.flush() overall_licenses = statistics.licenses_summary( statistics.get_licenses(session, 'ALL')) for stat in overall_licenses: lic = HistoryCopyright('ALL', timestamp=now) setattr(lic, 'license', stat.replace('_', ' ')) setattr(lic, 'files', overall_licenses[stat]) license_stats['overall.' + stat] = overall_licenses[stat] if not conf['dry_run'] and 'db' in conf['backends']: session.add(lic) session.flush() if not conf['dry_run'] and 'fs' in conf['backends']: statistics.save_metadata_cache(license_stats, license_stats_file) if 'copyright' in conf['hooks']: update_license_statistics(suites)
def update_statistics(status, conf, session, suites=None): """update stage: update statistics by default act on all non-sticky, major suites present in the DB. Pass `suites` to override """ logging.info('update statistics...') ensure_cache_dir(conf) suites = __target_suites(session, suites) now = datetime.utcnow() stats_file = os.path.join(conf['cache_dir'], 'stats.data') if os.path.exists(stats_file): # If stats.data exists, load and update it, otherwise start from # scratch. Note: this means that we need to be careful about changing # stats keys, to avoid orphans. # TODO: add check about orphan stats.data entries to debsources-fsck stats = statistics.load_metadata_cache(stats_file) else: stats = {} def store_sloccount_stats(summary, d, prefix, db_obj): """Update stats dictionary `d`, and DB object `db_obj`, with per language sloccount statistics available in `summary`, generating dictionary keys that start with `prefix`. Missing languages in summary will be stored as 0-value entries. """ total_slocs = 0 for lang in SLOCCOUNT_LANGUAGES: k = prefix + '.' + lang v = 0 if lang in summary: v = summary[lang] d[k] = v setattr(db_obj, 'lang_' + lang, v) total_slocs += v d[prefix] = total_slocs # compute overall stats suite = 'ALL' siz = HistorySize(suite, timestamp=now) loc = HistorySlocCount(suite, timestamp=now) for stat in ['disk_usage', 'source_packages', 'source_files', 'ctags']: v = getattr(statistics, stat)(session) stats['total.' + stat] = v setattr(siz, stat, v) store_sloccount_stats(statistics.sloccount_summary(session), stats, 'total.sloccount', loc) if not conf['dry_run'] and 'db' in conf['backends']: session.add(siz) session.add(loc) # Update HistorySize suite_key = 'debian_' hist_siz = dict( (suite, HistorySize(suite, timestamp=now)) for suite in suites) for stat in ['disk_usage', 'source_packages', 'source_files', 'ctags']: stats_result = statistics.stats_grouped_by(session, stat) for res in stats_result: if res[0] in suites: stats[suite_key + res[0] + '.' + stat] = res[1] setattr(hist_siz[res[0]], stat, res[1]) if not conf['dry_run'] and 'db' in conf['backends']: for siz in hist_siz.values(): session.add(siz) # update historySlocCount sloccount_res = statistics.stats_grouped_by(session, 'sloccount') hist_loc = dict( (suite, HistorySlocCount(suite, timestamp=now)) for suite in suites) for suite in suites: temp = dict( (item[1], item[2]) for item in sloccount_res if item[0] == suite) store_sloccount_stats(dict(temp), stats, suite_key + suite + ".sloccount", hist_loc[suite]) if not conf['dry_run'] and 'db' in conf['backends']: for loc in hist_loc.values(): session.add(loc) session.flush() # cache computed stats to on-disk stats file if not conf['dry_run'] and 'fs' in conf['backends']: statistics.save_metadata_cache(stats, stats_file) def update_license_statistics(suites): # compute License stats license_stats_file = os.path.join(conf['cache_dir'], 'license_stats.data') dual_license_file = os.path.join(conf['cache_dir'], 'dual_license.data') license_stats = dict() license_d_stats = dict() hist_lic = dict((suite, HistoryCopyright(suite, timestamp=now)) for suite in suites) results = statistics.get_licenses(session) for suite in suites: temp = dict( (item[0], item[2]) for item in results if item[1] == suite) summary = statistics.licenses_summary(temp) for res in summary: license_stats[suite + "." + res.rstrip()] = summary[res] setattr(hist_lic[suite], 'license', res.replace('_', ' ')) setattr(hist_lic[suite], 'files', summary[res]) if not conf['dry_run'] and 'db' in conf['backends']: session.add(hist_lic[suite]) # no historical here, only save to file dual_query = statistics.licenses_summary_w_dual(temp) for res in dual_query: license_d_stats[suite + "." + res.rstrip()] = dual_query[res] # overall dual licenses overall_d_licenses = statistics.licenses_summary_w_dual( statistics.get_licenses(session, 'ALL')) for stat in overall_d_licenses: license_d_stats['overall.' + stat] = overall_d_licenses[stat] # save dual licenses to file if not conf['dry_run'] and 'fs' in conf['backends']: statistics.save_metadata_cache(license_d_stats, dual_license_file) session.flush() overall_licenses = statistics.licenses_summary( statistics.get_licenses(session, 'ALL')) for stat in overall_licenses: lic = HistoryCopyright('ALL', timestamp=now) setattr(lic, 'license', stat.replace('_', ' ')) setattr(lic, 'files', overall_licenses[stat]) license_stats['overall.' + stat] = overall_licenses[stat] if not conf['dry_run'] and 'db' in conf['backends']: session.add(lic) session.flush() if not conf['dry_run'] and 'fs' in conf['backends']: statistics.save_metadata_cache(license_stats, license_stats_file) if 'copyright' in conf['hooks']: update_license_statistics(suites)