Esempio n. 1
0
    def test_group_by_stats(self):
        stats = dict(statistics.stats_grouped_by(self.session, 'disk_usage'))
        self.assertEqual(stats['etch'], 32736)

        stats = dict(statistics.stats_grouped_by(self.session, 'ctags'))
        self.assertEqual(stats['wheezy'], 20150)

        stats = dict(statistics.stats_grouped_by(self.session,
                                                 'source_packages'))
        self.assertEqual(stats['jessie'], 13)

        stats = dict(statistics.stats_grouped_by(self.session, 'source_files'))
        self.assertEqual(stats['wheezy'], 1632)

        sloc_list = statistics.stats_grouped_by(self.session, 'sloccount')
        wheezy_sloc = [[item[1], item[2]] for item in sloc_list
                       if item[0] == "wheezy"]
        self.assertEqual(dict(wheezy_sloc)['sh'], 13560)
Esempio n. 2
0
    def test_group_by_stats(self):
        stats = dict(statistics.stats_grouped_by(self.session, 'disk_usage'))
        self.assertEqual(stats['etch'], 32736)

        stats = dict(statistics.stats_grouped_by(self.session, 'ctags'))
        self.assertEqual(stats['wheezy'], 20150)

        stats = dict(
            statistics.stats_grouped_by(self.session, 'source_packages'))
        self.assertEqual(stats['jessie'], 13)

        stats = dict(statistics.stats_grouped_by(self.session, 'source_files'))
        self.assertEqual(stats['wheezy'], 1632)

        sloc_list = statistics.stats_grouped_by(self.session, 'sloccount')
        wheezy_sloc = [[item[1], item[2]] for item in sloc_list
                       if item[0] == "wheezy"]
        self.assertEqual(dict(wheezy_sloc)['sh'], 13560)
Esempio n. 3
0
    def test_group_by_stats(self):
        stats = dict(statistics.stats_grouped_by(self.session, "disk_usage"))
        self.assertEqual(stats["etch"], 32736)

        stats = dict(statistics.stats_grouped_by(self.session, "ctags"))
        self.assertEqual(stats["wheezy"], 20521)

        stats = dict(
            statistics.stats_grouped_by(self.session, "source_packages"))
        self.assertEqual(stats["jessie"], 14)

        stats = dict(statistics.stats_grouped_by(self.session, "source_files"))
        self.assertEqual(stats["wheezy"], 1632)

        sloc_list = statistics.stats_grouped_by(self.session, "sloccount")
        wheezy_sloc = [[item[1], item[2]] for item in sloc_list
                       if item[0] == "wheezy"]
        self.assertEqual(dict(wheezy_sloc)["sh"], 13560)
Esempio n. 4
0
def update_statistics(status, conf, session, suites=None):
    """update stage: update statistics

    by default act on all non-sticky, major suites present in the DB. Pass
    `suites` to override

    """
    logging.info("update statistics...")
    ensure_cache_dir(conf)
    suites = __target_suites(session, suites)

    now = datetime.utcnow()
    stats_file = conf["cache_dir"] / "stats.data"
    if stats_file.exists():
        # If stats.data exists, load and update it, otherwise start from
        # scratch. Note: this means that we need to be careful about changing
        # stats keys, to avoid orphans.
        # TODO: add check about orphan stats.data entries to debsources-fsck
        stats = statistics.load_metadata_cache(stats_file)
    else:
        stats = {}

    def store_sloccount_stats(summary, d, prefix, db_obj):
        """Update stats dictionary `d`, and DB object `db_obj`, with per
        language sloccount statistics available in `summary`, generating
        dictionary keys that start with `prefix`. Missing languages in summary
        will be stored as 0-value entries.

        """
        total_slocs = 0
        for lang in SLOCCOUNT_LANGUAGES:
            k = prefix + "." + lang
            v = 0
            if lang in summary:
                v = summary[lang]
            d[k] = v
            setattr(db_obj, "lang_" + lang, v)
            total_slocs += v
        d[prefix] = total_slocs

    # compute overall stats
    suite = "ALL"
    siz = HistorySize(suite, timestamp=now)
    loc = HistorySlocCount(suite, timestamp=now)
    for stat in ["disk_usage", "source_packages", "source_files", "ctags"]:
        v = getattr(statistics, stat)(session)
        stats["total." + stat] = v
        setattr(siz, stat, v)
    store_sloccount_stats(statistics.sloccount_summary(session), stats,
                          "total.sloccount", loc)
    if not conf["dry_run"] and "db" in conf["backends"]:
        session.add(siz)
        session.add(loc)

    # Update HistorySize
    suite_key = "debian_"
    hist_siz = {suite: HistorySize(suite, timestamp=now) for suite in suites}
    for stat in ["disk_usage", "source_packages", "source_files", "ctags"]:
        stats_result = statistics.stats_grouped_by(session, stat)
        for res in stats_result:
            if res[0] in suites:
                stats[suite_key + res[0] + "." + stat] = res[1]
                setattr(hist_siz[res[0]], stat, res[1])

    if not conf["dry_run"] and "db" in conf["backends"]:
        for siz in hist_siz.values():
            session.add(siz)

    # update historySlocCount
    sloccount_res = statistics.stats_grouped_by(session, "sloccount")
    hist_loc = {
        suite: HistorySlocCount(suite, timestamp=now)
        for suite in suites
    }
    for suite in suites:
        temp = {item[1]: item[2] for item in sloccount_res if item[0] == suite}
        store_sloccount_stats(dict(temp), stats,
                              suite_key + suite + ".sloccount",
                              hist_loc[suite])

    if not conf["dry_run"] and "db" in conf["backends"]:
        for loc in hist_loc.values():
            session.add(loc)

    session.flush()

    # cache computed stats to on-disk stats file
    if not conf["dry_run"] and "fs" in conf["backends"]:
        statistics.save_metadata_cache(stats, stats_file)

    def update_license_statistics(suites):
        # compute License stats
        license_stats_file = conf["cache_dir"] / "license_stats.data"
        dual_license_file = conf["cache_dir"] / "dual_license.data"
        license_stats = dict()
        license_d_stats = dict()

        hist_lic = {
            suite: HistoryCopyright(suite, timestamp=now)
            for suite in suites
        }
        results = statistics.get_licenses(session)
        for suite in suites:
            temp = {item[0]: item[2] for item in results if item[1] == suite}
            summary = statistics.licenses_summary(temp)
            for res in summary:
                license_stats[suite + "." + res.rstrip()] = summary[res]
                setattr(hist_lic[suite], "license", res.replace("_", " "))
                setattr(hist_lic[suite], "files", summary[res])
                if not conf["dry_run"] and "db" in conf["backends"]:
                    session.add(hist_lic[suite])
            # no historical here, only save to file
            dual_query = statistics.licenses_summary_w_dual(temp)
            for res in dual_query:
                license_d_stats[suite + "." + res.rstrip()] = dual_query[res]

        # overall dual licenses
        overall_d_licenses = statistics.licenses_summary_w_dual(
            statistics.get_licenses(session, "ALL"))
        for stat in overall_d_licenses:
            license_d_stats["overall." + stat] = overall_d_licenses[stat]

        # save dual licenses to file
        if not conf["dry_run"] and "fs" in conf["backends"]:
            statistics.save_metadata_cache(license_d_stats, dual_license_file)

        session.flush()
        overall_licenses = statistics.licenses_summary(
            statistics.get_licenses(session, "ALL"))
        for stat in overall_licenses:
            lic = HistoryCopyright("ALL", timestamp=now)
            setattr(lic, "license", stat.replace("_", " "))
            setattr(lic, "files", overall_licenses[stat])
            license_stats["overall." + stat] = overall_licenses[stat]
            if not conf["dry_run"] and "db" in conf["backends"]:
                session.add(lic)
        session.flush()
        if not conf["dry_run"] and "fs" in conf["backends"]:
            statistics.save_metadata_cache(license_stats, license_stats_file)

    if "copyright" in conf["hooks"]:
        update_license_statistics(suites)
Esempio n. 5
0
def update_statistics(status, conf, session, suites=None):
    """update stage: update statistics

    by default act on all non-sticky, major suites present in the DB. Pass
    `suites` to override

    """
    logging.info('update statistics...')
    ensure_cache_dir(conf)
    suites = __target_suites(session, suites)

    now = datetime.utcnow()
    stats_file = os.path.join(conf['cache_dir'], 'stats.data')
    if os.path.exists(stats_file):
        # If stats.data exists, load and update it, otherwise start from
        # scratch. Note: this means that we need to be careful about changing
        # stats keys, to avoid orphans.
        # TODO: add check about orphan stats.data entries to debsources-fsck
        stats = statistics.load_metadata_cache(stats_file)
    else:
        stats = {}

    def store_sloccount_stats(summary, d, prefix, db_obj):
        """Update stats dictionary `d`, and DB object `db_obj`, with per
        language sloccount statistics available in `summary`, generating
        dictionary keys that start with `prefix`. Missing languages in summary
        will be stored as 0-value entries.

        """
        total_slocs = 0
        for lang in SLOCCOUNT_LANGUAGES:
            k = prefix + '.' + lang
            v = 0
            if lang in summary:
                v = summary[lang]
            d[k] = v
            setattr(db_obj, 'lang_' + lang, v)
            total_slocs += v
        d[prefix] = total_slocs

    # compute overall stats
    suite = 'ALL'
    siz = HistorySize(suite, timestamp=now)
    loc = HistorySlocCount(suite, timestamp=now)
    for stat in ['disk_usage', 'source_packages', 'source_files', 'ctags']:
        v = getattr(statistics, stat)(session)
        stats['total.' + stat] = v
        setattr(siz, stat, v)
    store_sloccount_stats(statistics.sloccount_summary(session),
                          stats, 'total.sloccount', loc)
    if not conf['dry_run'] and 'db' in conf['backends']:
        session.add(siz)
        session.add(loc)

    # Update HistorySize
    suite_key = 'debian_'
    hist_siz = dict((suite, HistorySize(suite, timestamp=now))
                    for suite in suites)
    for stat in ['disk_usage', 'source_packages', 'source_files', 'ctags']:
        stats_result = statistics.stats_grouped_by(session, stat)
        for res in stats_result:
            if res[0] in suites:
                stats[suite_key + res[0] + '.' + stat] = res[1]
                setattr(hist_siz[res[0]], stat, res[1])

    if not conf['dry_run'] and 'db' in conf['backends']:
        for siz in hist_siz.values():
            session.add(siz)

    # update historySlocCount
    sloccount_res = statistics.stats_grouped_by(session, 'sloccount')
    hist_loc = dict((suite, HistorySlocCount(suite, timestamp=now))
                    for suite in suites)
    for suite in suites:
        temp = dict((item[1], item[2]) for item in sloccount_res
                    if item[0] == suite)
        store_sloccount_stats(dict(temp), stats,
                              suite_key + suite + ".sloccount",
                              hist_loc[suite])

    if not conf['dry_run'] and 'db' in conf['backends']:
        for loc in hist_loc.values():
            session.add(loc)

    session.flush()

    # cache computed stats to on-disk stats file
    if not conf['dry_run'] and 'fs' in conf['backends']:
        statistics.save_metadata_cache(stats, stats_file)

    def update_license_statistics(suites):
        # compute License stats
        license_stats_file = os.path.join(conf['cache_dir'],
                                          'license_stats.data')
        dual_license_file = os.path.join(conf['cache_dir'],
                                         'dual_license.data')
        license_stats = dict()
        license_d_stats = dict()

        hist_lic = dict((suite, HistoryCopyright(suite, timestamp=now))
                        for suite in suites)
        results = statistics.get_licenses(session)
        for suite in suites:
            temp = dict((item[0], item[2]) for item in results
                        if item[1] == suite)
            summary = statistics.licenses_summary(temp)
            for res in summary:
                license_stats[suite + "." + res.rstrip()] = summary[res]
                setattr(hist_lic[suite], 'license', res.replace('_', ' '))
                setattr(hist_lic[suite], 'files', summary[res])
                if not conf['dry_run'] and 'db' in conf['backends']:
                        session.add(hist_lic[suite])
            # no historical here, only save to file
            dual_query = statistics.licenses_summary_w_dual(temp)
            for res in dual_query:
                license_d_stats[suite + "." + res.rstrip()] = dual_query[res]

        # overall dual licenses
        overall_d_licenses = statistics.licenses_summary_w_dual(
            statistics.get_licenses(session, 'ALL'))
        for stat in overall_d_licenses:
            license_d_stats['overall.' + stat] = overall_d_licenses[stat]

        # save dual licenses to file
        if not conf['dry_run'] and 'fs' in conf['backends']:
            statistics.save_metadata_cache(license_d_stats, dual_license_file)

        session.flush()
        overall_licenses = statistics.licenses_summary(
            statistics.get_licenses(session, 'ALL'))
        for stat in overall_licenses:
            lic = HistoryCopyright('ALL', timestamp=now)
            setattr(lic, 'license', stat.replace('_', ' '))
            setattr(lic, 'files', overall_licenses[stat])
            license_stats['overall.' + stat] = overall_licenses[stat]
            if not conf['dry_run'] and 'db' in conf['backends']:
                session.add(lic)
        session.flush()
        if not conf['dry_run'] and 'fs' in conf['backends']:
            statistics.save_metadata_cache(license_stats, license_stats_file)

    if 'copyright' in conf['hooks']:
        update_license_statistics(suites)
Esempio n. 6
0
def update_statistics(status, conf, session, suites=None):
    """update stage: update statistics

    by default act on all non-sticky, major suites present in the DB. Pass
    `suites` to override

    """
    logging.info('update statistics...')
    ensure_cache_dir(conf)
    suites = __target_suites(session, suites)

    now = datetime.utcnow()
    stats_file = os.path.join(conf['cache_dir'], 'stats.data')
    if os.path.exists(stats_file):
        # If stats.data exists, load and update it, otherwise start from
        # scratch. Note: this means that we need to be careful about changing
        # stats keys, to avoid orphans.
        # TODO: add check about orphan stats.data entries to debsources-fsck
        stats = statistics.load_metadata_cache(stats_file)
    else:
        stats = {}

    def store_sloccount_stats(summary, d, prefix, db_obj):
        """Update stats dictionary `d`, and DB object `db_obj`, with per
        language sloccount statistics available in `summary`, generating
        dictionary keys that start with `prefix`. Missing languages in summary
        will be stored as 0-value entries.

        """
        total_slocs = 0
        for lang in SLOCCOUNT_LANGUAGES:
            k = prefix + '.' + lang
            v = 0
            if lang in summary:
                v = summary[lang]
            d[k] = v
            setattr(db_obj, 'lang_' + lang, v)
            total_slocs += v
        d[prefix] = total_slocs

    # compute overall stats
    suite = 'ALL'
    siz = HistorySize(suite, timestamp=now)
    loc = HistorySlocCount(suite, timestamp=now)
    for stat in ['disk_usage', 'source_packages', 'source_files', 'ctags']:
        v = getattr(statistics, stat)(session)
        stats['total.' + stat] = v
        setattr(siz, stat, v)
    store_sloccount_stats(statistics.sloccount_summary(session), stats,
                          'total.sloccount', loc)
    if not conf['dry_run'] and 'db' in conf['backends']:
        session.add(siz)
        session.add(loc)

    # Update HistorySize
    suite_key = 'debian_'
    hist_siz = dict(
        (suite, HistorySize(suite, timestamp=now)) for suite in suites)
    for stat in ['disk_usage', 'source_packages', 'source_files', 'ctags']:
        stats_result = statistics.stats_grouped_by(session, stat)
        for res in stats_result:
            if res[0] in suites:
                stats[suite_key + res[0] + '.' + stat] = res[1]
                setattr(hist_siz[res[0]], stat, res[1])

    if not conf['dry_run'] and 'db' in conf['backends']:
        for siz in hist_siz.values():
            session.add(siz)

    # update historySlocCount
    sloccount_res = statistics.stats_grouped_by(session, 'sloccount')
    hist_loc = dict(
        (suite, HistorySlocCount(suite, timestamp=now)) for suite in suites)
    for suite in suites:
        temp = dict(
            (item[1], item[2]) for item in sloccount_res if item[0] == suite)
        store_sloccount_stats(dict(temp), stats,
                              suite_key + suite + ".sloccount",
                              hist_loc[suite])

    if not conf['dry_run'] and 'db' in conf['backends']:
        for loc in hist_loc.values():
            session.add(loc)

    session.flush()

    # cache computed stats to on-disk stats file
    if not conf['dry_run'] and 'fs' in conf['backends']:
        statistics.save_metadata_cache(stats, stats_file)

    def update_license_statistics(suites):
        # compute License stats
        license_stats_file = os.path.join(conf['cache_dir'],
                                          'license_stats.data')
        dual_license_file = os.path.join(conf['cache_dir'],
                                         'dual_license.data')
        license_stats = dict()
        license_d_stats = dict()

        hist_lic = dict((suite, HistoryCopyright(suite, timestamp=now))
                        for suite in suites)
        results = statistics.get_licenses(session)
        for suite in suites:
            temp = dict(
                (item[0], item[2]) for item in results if item[1] == suite)
            summary = statistics.licenses_summary(temp)
            for res in summary:
                license_stats[suite + "." + res.rstrip()] = summary[res]
                setattr(hist_lic[suite], 'license', res.replace('_', ' '))
                setattr(hist_lic[suite], 'files', summary[res])
                if not conf['dry_run'] and 'db' in conf['backends']:
                    session.add(hist_lic[suite])
            # no historical here, only save to file
            dual_query = statistics.licenses_summary_w_dual(temp)
            for res in dual_query:
                license_d_stats[suite + "." + res.rstrip()] = dual_query[res]

        # overall dual licenses
        overall_d_licenses = statistics.licenses_summary_w_dual(
            statistics.get_licenses(session, 'ALL'))
        for stat in overall_d_licenses:
            license_d_stats['overall.' + stat] = overall_d_licenses[stat]

        # save dual licenses to file
        if not conf['dry_run'] and 'fs' in conf['backends']:
            statistics.save_metadata_cache(license_d_stats, dual_license_file)

        session.flush()
        overall_licenses = statistics.licenses_summary(
            statistics.get_licenses(session, 'ALL'))
        for stat in overall_licenses:
            lic = HistoryCopyright('ALL', timestamp=now)
            setattr(lic, 'license', stat.replace('_', ' '))
            setattr(lic, 'files', overall_licenses[stat])
            license_stats['overall.' + stat] = overall_licenses[stat]
            if not conf['dry_run'] and 'db' in conf['backends']:
                session.add(lic)
        session.flush()
        if not conf['dry_run'] and 'fs' in conf['backends']:
            statistics.save_metadata_cache(license_stats, license_stats_file)

    if 'copyright' in conf['hooks']:
        update_license_statistics(suites)