def test_group_by_stats(self): stats = dict(statistics.stats_grouped_by(self.session, 'disk_usage')) self.assertEqual(stats['etch'], 32736) stats = dict(statistics.stats_grouped_by(self.session, 'ctags')) self.assertEqual(stats['wheezy'], 20150) stats = dict(statistics.stats_grouped_by(self.session, 'source_packages')) self.assertEqual(stats['jessie'], 13) stats = dict(statistics.stats_grouped_by(self.session, 'source_files')) self.assertEqual(stats['wheezy'], 1632) sloc_list = statistics.stats_grouped_by(self.session, 'sloccount') wheezy_sloc = [[item[1], item[2]] for item in sloc_list if item[0] == "wheezy"] self.assertEqual(dict(wheezy_sloc)['sh'], 13560)
def test_group_by_stats(self): stats = dict(statistics.stats_grouped_by(self.session, 'disk_usage')) self.assertEqual(stats['etch'], 32736) stats = dict(statistics.stats_grouped_by(self.session, 'ctags')) self.assertEqual(stats['wheezy'], 20150) stats = dict( statistics.stats_grouped_by(self.session, 'source_packages')) self.assertEqual(stats['jessie'], 13) stats = dict(statistics.stats_grouped_by(self.session, 'source_files')) self.assertEqual(stats['wheezy'], 1632) sloc_list = statistics.stats_grouped_by(self.session, 'sloccount') wheezy_sloc = [[item[1], item[2]] for item in sloc_list if item[0] == "wheezy"] self.assertEqual(dict(wheezy_sloc)['sh'], 13560)
def test_group_by_stats(self): stats = dict(statistics.stats_grouped_by(self.session, "disk_usage")) self.assertEqual(stats["etch"], 32736) stats = dict(statistics.stats_grouped_by(self.session, "ctags")) self.assertEqual(stats["wheezy"], 20521) stats = dict( statistics.stats_grouped_by(self.session, "source_packages")) self.assertEqual(stats["jessie"], 14) stats = dict(statistics.stats_grouped_by(self.session, "source_files")) self.assertEqual(stats["wheezy"], 1632) sloc_list = statistics.stats_grouped_by(self.session, "sloccount") wheezy_sloc = [[item[1], item[2]] for item in sloc_list if item[0] == "wheezy"] self.assertEqual(dict(wheezy_sloc)["sh"], 13560)
def update_statistics(status, conf, session, suites=None): """update stage: update statistics by default act on all non-sticky, major suites present in the DB. Pass `suites` to override """ logging.info("update statistics...") ensure_cache_dir(conf) suites = __target_suites(session, suites) now = datetime.utcnow() stats_file = conf["cache_dir"] / "stats.data" if stats_file.exists(): # If stats.data exists, load and update it, otherwise start from # scratch. Note: this means that we need to be careful about changing # stats keys, to avoid orphans. # TODO: add check about orphan stats.data entries to debsources-fsck stats = statistics.load_metadata_cache(stats_file) else: stats = {} def store_sloccount_stats(summary, d, prefix, db_obj): """Update stats dictionary `d`, and DB object `db_obj`, with per language sloccount statistics available in `summary`, generating dictionary keys that start with `prefix`. Missing languages in summary will be stored as 0-value entries. """ total_slocs = 0 for lang in SLOCCOUNT_LANGUAGES: k = prefix + "." + lang v = 0 if lang in summary: v = summary[lang] d[k] = v setattr(db_obj, "lang_" + lang, v) total_slocs += v d[prefix] = total_slocs # compute overall stats suite = "ALL" siz = HistorySize(suite, timestamp=now) loc = HistorySlocCount(suite, timestamp=now) for stat in ["disk_usage", "source_packages", "source_files", "ctags"]: v = getattr(statistics, stat)(session) stats["total." + stat] = v setattr(siz, stat, v) store_sloccount_stats(statistics.sloccount_summary(session), stats, "total.sloccount", loc) if not conf["dry_run"] and "db" in conf["backends"]: session.add(siz) session.add(loc) # Update HistorySize suite_key = "debian_" hist_siz = {suite: HistorySize(suite, timestamp=now) for suite in suites} for stat in ["disk_usage", "source_packages", "source_files", "ctags"]: stats_result = statistics.stats_grouped_by(session, stat) for res in stats_result: if res[0] in suites: stats[suite_key + res[0] + "." + stat] = res[1] setattr(hist_siz[res[0]], stat, res[1]) if not conf["dry_run"] and "db" in conf["backends"]: for siz in hist_siz.values(): session.add(siz) # update historySlocCount sloccount_res = statistics.stats_grouped_by(session, "sloccount") hist_loc = { suite: HistorySlocCount(suite, timestamp=now) for suite in suites } for suite in suites: temp = {item[1]: item[2] for item in sloccount_res if item[0] == suite} store_sloccount_stats(dict(temp), stats, suite_key + suite + ".sloccount", hist_loc[suite]) if not conf["dry_run"] and "db" in conf["backends"]: for loc in hist_loc.values(): session.add(loc) session.flush() # cache computed stats to on-disk stats file if not conf["dry_run"] and "fs" in conf["backends"]: statistics.save_metadata_cache(stats, stats_file) def update_license_statistics(suites): # compute License stats license_stats_file = conf["cache_dir"] / "license_stats.data" dual_license_file = conf["cache_dir"] / "dual_license.data" license_stats = dict() license_d_stats = dict() hist_lic = { suite: HistoryCopyright(suite, timestamp=now) for suite in suites } results = statistics.get_licenses(session) for suite in suites: temp = {item[0]: item[2] for item in results if item[1] == suite} summary = statistics.licenses_summary(temp) for res in summary: license_stats[suite + "." + res.rstrip()] = summary[res] setattr(hist_lic[suite], "license", res.replace("_", " ")) setattr(hist_lic[suite], "files", summary[res]) if not conf["dry_run"] and "db" in conf["backends"]: session.add(hist_lic[suite]) # no historical here, only save to file dual_query = statistics.licenses_summary_w_dual(temp) for res in dual_query: license_d_stats[suite + "." + res.rstrip()] = dual_query[res] # overall dual licenses overall_d_licenses = statistics.licenses_summary_w_dual( statistics.get_licenses(session, "ALL")) for stat in overall_d_licenses: license_d_stats["overall." + stat] = overall_d_licenses[stat] # save dual licenses to file if not conf["dry_run"] and "fs" in conf["backends"]: statistics.save_metadata_cache(license_d_stats, dual_license_file) session.flush() overall_licenses = statistics.licenses_summary( statistics.get_licenses(session, "ALL")) for stat in overall_licenses: lic = HistoryCopyright("ALL", timestamp=now) setattr(lic, "license", stat.replace("_", " ")) setattr(lic, "files", overall_licenses[stat]) license_stats["overall." + stat] = overall_licenses[stat] if not conf["dry_run"] and "db" in conf["backends"]: session.add(lic) session.flush() if not conf["dry_run"] and "fs" in conf["backends"]: statistics.save_metadata_cache(license_stats, license_stats_file) if "copyright" in conf["hooks"]: update_license_statistics(suites)
def update_statistics(status, conf, session, suites=None): """update stage: update statistics by default act on all non-sticky, major suites present in the DB. Pass `suites` to override """ logging.info('update statistics...') ensure_cache_dir(conf) suites = __target_suites(session, suites) now = datetime.utcnow() stats_file = os.path.join(conf['cache_dir'], 'stats.data') if os.path.exists(stats_file): # If stats.data exists, load and update it, otherwise start from # scratch. Note: this means that we need to be careful about changing # stats keys, to avoid orphans. # TODO: add check about orphan stats.data entries to debsources-fsck stats = statistics.load_metadata_cache(stats_file) else: stats = {} def store_sloccount_stats(summary, d, prefix, db_obj): """Update stats dictionary `d`, and DB object `db_obj`, with per language sloccount statistics available in `summary`, generating dictionary keys that start with `prefix`. Missing languages in summary will be stored as 0-value entries. """ total_slocs = 0 for lang in SLOCCOUNT_LANGUAGES: k = prefix + '.' + lang v = 0 if lang in summary: v = summary[lang] d[k] = v setattr(db_obj, 'lang_' + lang, v) total_slocs += v d[prefix] = total_slocs # compute overall stats suite = 'ALL' siz = HistorySize(suite, timestamp=now) loc = HistorySlocCount(suite, timestamp=now) for stat in ['disk_usage', 'source_packages', 'source_files', 'ctags']: v = getattr(statistics, stat)(session) stats['total.' + stat] = v setattr(siz, stat, v) store_sloccount_stats(statistics.sloccount_summary(session), stats, 'total.sloccount', loc) if not conf['dry_run'] and 'db' in conf['backends']: session.add(siz) session.add(loc) # Update HistorySize suite_key = 'debian_' hist_siz = dict((suite, HistorySize(suite, timestamp=now)) for suite in suites) for stat in ['disk_usage', 'source_packages', 'source_files', 'ctags']: stats_result = statistics.stats_grouped_by(session, stat) for res in stats_result: if res[0] in suites: stats[suite_key + res[0] + '.' + stat] = res[1] setattr(hist_siz[res[0]], stat, res[1]) if not conf['dry_run'] and 'db' in conf['backends']: for siz in hist_siz.values(): session.add(siz) # update historySlocCount sloccount_res = statistics.stats_grouped_by(session, 'sloccount') hist_loc = dict((suite, HistorySlocCount(suite, timestamp=now)) for suite in suites) for suite in suites: temp = dict((item[1], item[2]) for item in sloccount_res if item[0] == suite) store_sloccount_stats(dict(temp), stats, suite_key + suite + ".sloccount", hist_loc[suite]) if not conf['dry_run'] and 'db' in conf['backends']: for loc in hist_loc.values(): session.add(loc) session.flush() # cache computed stats to on-disk stats file if not conf['dry_run'] and 'fs' in conf['backends']: statistics.save_metadata_cache(stats, stats_file) def update_license_statistics(suites): # compute License stats license_stats_file = os.path.join(conf['cache_dir'], 'license_stats.data') dual_license_file = os.path.join(conf['cache_dir'], 'dual_license.data') license_stats = dict() license_d_stats = dict() hist_lic = dict((suite, HistoryCopyright(suite, timestamp=now)) for suite in suites) results = statistics.get_licenses(session) for suite in suites: temp = dict((item[0], item[2]) for item in results if item[1] == suite) summary = statistics.licenses_summary(temp) for res in summary: license_stats[suite + "." + res.rstrip()] = summary[res] setattr(hist_lic[suite], 'license', res.replace('_', ' ')) setattr(hist_lic[suite], 'files', summary[res]) if not conf['dry_run'] and 'db' in conf['backends']: session.add(hist_lic[suite]) # no historical here, only save to file dual_query = statistics.licenses_summary_w_dual(temp) for res in dual_query: license_d_stats[suite + "." + res.rstrip()] = dual_query[res] # overall dual licenses overall_d_licenses = statistics.licenses_summary_w_dual( statistics.get_licenses(session, 'ALL')) for stat in overall_d_licenses: license_d_stats['overall.' + stat] = overall_d_licenses[stat] # save dual licenses to file if not conf['dry_run'] and 'fs' in conf['backends']: statistics.save_metadata_cache(license_d_stats, dual_license_file) session.flush() overall_licenses = statistics.licenses_summary( statistics.get_licenses(session, 'ALL')) for stat in overall_licenses: lic = HistoryCopyright('ALL', timestamp=now) setattr(lic, 'license', stat.replace('_', ' ')) setattr(lic, 'files', overall_licenses[stat]) license_stats['overall.' + stat] = overall_licenses[stat] if not conf['dry_run'] and 'db' in conf['backends']: session.add(lic) session.flush() if not conf['dry_run'] and 'fs' in conf['backends']: statistics.save_metadata_cache(license_stats, license_stats_file) if 'copyright' in conf['hooks']: update_license_statistics(suites)
def update_statistics(status, conf, session, suites=None): """update stage: update statistics by default act on all non-sticky, major suites present in the DB. Pass `suites` to override """ logging.info('update statistics...') ensure_cache_dir(conf) suites = __target_suites(session, suites) now = datetime.utcnow() stats_file = os.path.join(conf['cache_dir'], 'stats.data') if os.path.exists(stats_file): # If stats.data exists, load and update it, otherwise start from # scratch. Note: this means that we need to be careful about changing # stats keys, to avoid orphans. # TODO: add check about orphan stats.data entries to debsources-fsck stats = statistics.load_metadata_cache(stats_file) else: stats = {} def store_sloccount_stats(summary, d, prefix, db_obj): """Update stats dictionary `d`, and DB object `db_obj`, with per language sloccount statistics available in `summary`, generating dictionary keys that start with `prefix`. Missing languages in summary will be stored as 0-value entries. """ total_slocs = 0 for lang in SLOCCOUNT_LANGUAGES: k = prefix + '.' + lang v = 0 if lang in summary: v = summary[lang] d[k] = v setattr(db_obj, 'lang_' + lang, v) total_slocs += v d[prefix] = total_slocs # compute overall stats suite = 'ALL' siz = HistorySize(suite, timestamp=now) loc = HistorySlocCount(suite, timestamp=now) for stat in ['disk_usage', 'source_packages', 'source_files', 'ctags']: v = getattr(statistics, stat)(session) stats['total.' + stat] = v setattr(siz, stat, v) store_sloccount_stats(statistics.sloccount_summary(session), stats, 'total.sloccount', loc) if not conf['dry_run'] and 'db' in conf['backends']: session.add(siz) session.add(loc) # Update HistorySize suite_key = 'debian_' hist_siz = dict( (suite, HistorySize(suite, timestamp=now)) for suite in suites) for stat in ['disk_usage', 'source_packages', 'source_files', 'ctags']: stats_result = statistics.stats_grouped_by(session, stat) for res in stats_result: if res[0] in suites: stats[suite_key + res[0] + '.' + stat] = res[1] setattr(hist_siz[res[0]], stat, res[1]) if not conf['dry_run'] and 'db' in conf['backends']: for siz in hist_siz.values(): session.add(siz) # update historySlocCount sloccount_res = statistics.stats_grouped_by(session, 'sloccount') hist_loc = dict( (suite, HistorySlocCount(suite, timestamp=now)) for suite in suites) for suite in suites: temp = dict( (item[1], item[2]) for item in sloccount_res if item[0] == suite) store_sloccount_stats(dict(temp), stats, suite_key + suite + ".sloccount", hist_loc[suite]) if not conf['dry_run'] and 'db' in conf['backends']: for loc in hist_loc.values(): session.add(loc) session.flush() # cache computed stats to on-disk stats file if not conf['dry_run'] and 'fs' in conf['backends']: statistics.save_metadata_cache(stats, stats_file) def update_license_statistics(suites): # compute License stats license_stats_file = os.path.join(conf['cache_dir'], 'license_stats.data') dual_license_file = os.path.join(conf['cache_dir'], 'dual_license.data') license_stats = dict() license_d_stats = dict() hist_lic = dict((suite, HistoryCopyright(suite, timestamp=now)) for suite in suites) results = statistics.get_licenses(session) for suite in suites: temp = dict( (item[0], item[2]) for item in results if item[1] == suite) summary = statistics.licenses_summary(temp) for res in summary: license_stats[suite + "." + res.rstrip()] = summary[res] setattr(hist_lic[suite], 'license', res.replace('_', ' ')) setattr(hist_lic[suite], 'files', summary[res]) if not conf['dry_run'] and 'db' in conf['backends']: session.add(hist_lic[suite]) # no historical here, only save to file dual_query = statistics.licenses_summary_w_dual(temp) for res in dual_query: license_d_stats[suite + "." + res.rstrip()] = dual_query[res] # overall dual licenses overall_d_licenses = statistics.licenses_summary_w_dual( statistics.get_licenses(session, 'ALL')) for stat in overall_d_licenses: license_d_stats['overall.' + stat] = overall_d_licenses[stat] # save dual licenses to file if not conf['dry_run'] and 'fs' in conf['backends']: statistics.save_metadata_cache(license_d_stats, dual_license_file) session.flush() overall_licenses = statistics.licenses_summary( statistics.get_licenses(session, 'ALL')) for stat in overall_licenses: lic = HistoryCopyright('ALL', timestamp=now) setattr(lic, 'license', stat.replace('_', ' ')) setattr(lic, 'files', overall_licenses[stat]) license_stats['overall.' + stat] = overall_licenses[stat] if not conf['dry_run'] and 'db' in conf['backends']: session.add(lic) session.flush() if not conf['dry_run'] and 'fs' in conf['backends']: statistics.save_metadata_cache(license_stats, license_stats_file) if 'copyright' in conf['hooks']: update_license_statistics(suites)