Ejemplo n.º 1
0
 def slocCountsMatchReferenceDb(self):
     slocs_jessie = {
         'ansic': 166724,
         'asm': 65,
         'awk': 25,
         'cpp': 41458,
         'cs': 1213,
         'java': 916,
         'lex': 223,
         'lisp': 2193,
         'makefile': 2092,
         'ml': 5044,
         'objc': 836,
         'perl': 1199,
         'python': 2916,
         'ruby': 193,
         'sed': 16,
         'sh': 29984,
         'sql': 237,
         'xml': 14932,
         'yacc': 312,
     }
     slocs_python = 8740
     slocs_cpp_exp = 87521
     self.assertEqual(slocs_jessie,
                      statistics.sloccount_summary(self.session,
                                                   suite='jessie'))
     self.assertEqual(slocs_python,
                      statistics.sloccount_lang(self.session, 'python'))
     self.assertEqual(slocs_cpp_exp,
                      statistics.sloccount_lang(self.session, 'cpp',
                                                suite='experimental'))
Ejemplo n.º 2
0
 def slocCountsMatchReferenceDb(self):
     slocs_jessie = {
         'ansic': 166724,
         'asm': 65,
         'awk': 25,
         'cpp': 41458,
         'cs': 1213,
         'java': 916,
         'lex': 223,
         'lisp': 2193,
         'makefile': 2092,
         'ml': 5044,
         'objc': 836,
         'perl': 1199,
         'python': 2916,
         'ruby': 193,
         'sed': 16,
         'sh': 29984,
         'sql': 237,
         'xml': 14932,
         'yacc': 312,
     }
     slocs_python = 9193
     slocs_cpp_exp = 87521
     self.assertEqual(
         slocs_jessie,
         statistics.sloccount_summary(self.session, suite='jessie'))
     self.assertEqual(slocs_python,
                      statistics.sloccount_lang(self.session, 'python'))
     self.assertEqual(
         slocs_cpp_exp,
         statistics.sloccount_lang(self.session,
                                   'cpp',
                                   suite='experimental'))
Ejemplo n.º 3
0
 def slocCountsMatchReferenceDb(self):
     slocs_jessie = {
         "ansic": 166724,
         "asm": 65,
         "awk": 25,
         "cpp": 41458,
         "cs": 1213,
         "java": 916,
         "lex": 223,
         "lisp": 2193,
         "makefile": 2104,
         "ml": 5044,
         "objc": 836,
         "perl": 1199,
         "python": 2916,
         "ruby": 193,
         "sed": 16,
         "sh": 30045,
         "sql": 237,
         "xml": 14932,
         "yacc": 312,
     }
     slocs_python = 9193
     slocs_cpp_exp = 87521
     self.assertEqual(
         slocs_jessie,
         statistics.sloccount_summary(self.session, suite="jessie"))
     self.assertEqual(slocs_python,
                      statistics.sloccount_lang(self.session, "python"))
     self.assertEqual(
         slocs_cpp_exp,
         statistics.sloccount_lang(self.session,
                                   "cpp",
                                   suite="experimental"),
     )
Ejemplo n.º 4
0
def update_charts(status, conf, session, suites=None):
    """update stage: rebuild charts"""

    logging.info('update charts...')
    ensure_stats_dir(conf)
    suites = __target_suites(session, suites)

    CHARTS = [  # <period, granularity> paris
        ('1 month', 'hourly'),
        ('1 year', 'daily'),
        ('5 years', 'weekly'),
        ('20 years', 'monthly'),
    ]

    # size charts, various metrics
    for metric in ['source_packages', 'disk_usage', 'source_files', 'ctags']:
        for (period, granularity) in CHARTS:
            for suite in suites + ['ALL']:
                series = getattr(statistics, 'history_size_' + granularity)(
                    session, metric, interval=period, suite=suite)
                chart_file = os.path.join(conf['cache_dir'], 'stats',
                                          '%s-%s-%s.png' %
                                          (suite, metric,
                                           period.replace(' ', '-')))
                if not conf['dry_run']:
                    charts.size_plot(series, chart_file)

    # sloccount: historical histograms
    for (period, granularity) in CHARTS:
        for suite in suites + ['ALL']:
            # historical histogram
            mseries = getattr(statistics, 'history_sloc_' + granularity)(
                session, interval=period, suite=suite)
            chart_file = os.path.join(conf['cache_dir'], 'stats',
                                      '%s-sloc-%s.png' %
                                      (suite, period.replace(' ', '-')))
            if not conf['dry_run']:
                charts.sloc_plot(mseries, chart_file)

    # sloccount: current pie charts
    for suite in suites + ['ALL']:
        sloc_suite = suite
        if sloc_suite == 'ALL':
            sloc_suite = None
        slocs = statistics.sloccount_summary(session, suite=sloc_suite)
        chart_file = os.path.join(conf['cache_dir'], 'stats',
                                  '%s-sloc_pie-current.png' % suite)
        if not conf['dry_run']:
            charts.sloc_pie(slocs, chart_file)
Ejemplo n.º 5
0
def update_charts(status, conf, session, suites=None):
    """update stage: rebuild charts"""

    from debsources import charts

    logging.info("update charts...")
    ensure_stats_dir(conf)
    suites = __target_suites(session, suites)

    CHARTS = [  # <period, granularity> paris
        ("1 month", "hourly"),
        ("1 year", "daily"),
        ("5 years", "weekly"),
        ("20 years", "monthly"),
    ]

    # size charts, various metrics
    for metric in ["source_packages", "disk_usage", "source_files", "ctags"]:
        for (period, granularity) in CHARTS:
            for suite in suites + ["ALL"]:
                series = getattr(statistics, "history_size_" + granularity)(
                    session, metric, interval=period, suite=suite)
                filename = "%s-%s-%s.png" % (suite, metric,
                                             period.replace(" ", "-"))
                chart_file = conf["cache_dir"] / "stats" / filename
                if not conf["dry_run"]:
                    charts.size_plot(series, chart_file)

    # sloccount: historical histograms
    for (period, granularity) in CHARTS:
        for suite in suites + ["ALL"]:
            # historical histogram
            mseries = getattr(statistics,
                              "history_sloc_" + granularity)(session,
                                                             interval=period,
                                                             suite=suite)
            filename = "%s-sloc-%s.png" % (suite, period.replace(" ", "-"))
            chart_file = conf["cache_dir"] / "stats" / filename
            if not conf["dry_run"]:
                charts.multiseries_plot(mseries, chart_file)

    # sloccount: current pie charts
    sloc_per_suite = []
    for suite in suites + ["ALL"]:
        sloc_suite = suite
        if sloc_suite == "ALL":
            sloc_suite = None
        slocs = statistics.sloccount_summary(session, suite=sloc_suite)
        if suite not in ["ALL"]:
            sloc_per_suite.append(slocs)
        filename = "%s-sloc_pie-current.png" % suite
        chart_file = conf["cache_dir"] / "stats" / filename
        if not conf["dry_run"]:
            charts.pie_chart(slocs, chart_file)

    # sloccount: bar chart plot
    if "charts_top_langs" in conf.keys():
        top_langs = int(conf["charts_top_langs"])
    else:
        top_langs = 6
    chart_file = conf["cache_dir"] / "stats" / "sloc_bar_plot.png"
    charts.bar_chart(sloc_per_suite, suites, chart_file, top_langs, "SLOC")

    def update_license_charts():
        # License: historical histogramms
        for (period, granularity) in CHARTS:
            for suite in suites + ["ALL"]:
                mseries = getattr(statistics, "history_copyright_" +
                                  granularity)(session,
                                               interval=period,
                                               suite=suite)
                filename = "copyright_%s-license-%s.png" % (
                    suite,
                    period.replace(" ", "-"),
                )
                chart_file = conf["cache_dir"] / "stats" / filename
                if not conf["dry_run"]:
                    charts.multiseries_plot(mseries, chart_file, cols=3)

        # License: overall pie chart
        overall_licenses = statistics.licenses_summary(
            statistics.get_licenses(session, "ALL"))
        ratio = qry.get_ratio(session)
        chart_file = conf[
            "cache_dir"] / "stats" / "copyright_overall-license_pie.png"
        if not conf["dry_run"]:
            charts.pie_chart(overall_licenses, chart_file, ratio)

        # License: bar chart and per suite pie chart.
        all_suites = statistics.sticky_suites(session) + __target_suites(
            session, None)
        licenses_per_suite = []
        for suite in all_suites:
            licenses = statistics.licenses_summary(
                statistics.get_licenses(session, suite))
            ratio = qry.get_ratio(session, suite=suite)
            # draw license pie chart
            if not conf["dry_run"]:
                filename = "copyright_%s-license_pie-current.png" % suite
                chart_file = conf["cache_dir"] / "stats" / filename
                charts.pie_chart(licenses, chart_file, ratio)

            licenses_per_suite.append(licenses)

        chart_file = conf[
            "cache_dir"] / "stats" / "copyright_license_bar_plot.png"
        if not conf["dry_run"]:
            charts.bar_chart(licenses_per_suite, all_suites, chart_file,
                             top_langs, "Number of files")

    # LICENSE CHARTS
    if "copyright" in conf["hooks"]:
        update_license_charts()
Ejemplo n.º 6
0
def update_statistics(status, conf, session, suites=None):
    """update stage: update statistics

    by default act on all non-sticky, major suites present in the DB. Pass
    `suites` to override

    """
    logging.info("update statistics...")
    ensure_cache_dir(conf)
    suites = __target_suites(session, suites)

    now = datetime.utcnow()
    stats_file = conf["cache_dir"] / "stats.data"
    if stats_file.exists():
        # If stats.data exists, load and update it, otherwise start from
        # scratch. Note: this means that we need to be careful about changing
        # stats keys, to avoid orphans.
        # TODO: add check about orphan stats.data entries to debsources-fsck
        stats = statistics.load_metadata_cache(stats_file)
    else:
        stats = {}

    def store_sloccount_stats(summary, d, prefix, db_obj):
        """Update stats dictionary `d`, and DB object `db_obj`, with per
        language sloccount statistics available in `summary`, generating
        dictionary keys that start with `prefix`. Missing languages in summary
        will be stored as 0-value entries.

        """
        total_slocs = 0
        for lang in SLOCCOUNT_LANGUAGES:
            k = prefix + "." + lang
            v = 0
            if lang in summary:
                v = summary[lang]
            d[k] = v
            setattr(db_obj, "lang_" + lang, v)
            total_slocs += v
        d[prefix] = total_slocs

    # compute overall stats
    suite = "ALL"
    siz = HistorySize(suite, timestamp=now)
    loc = HistorySlocCount(suite, timestamp=now)
    for stat in ["disk_usage", "source_packages", "source_files", "ctags"]:
        v = getattr(statistics, stat)(session)
        stats["total." + stat] = v
        setattr(siz, stat, v)
    store_sloccount_stats(statistics.sloccount_summary(session), stats,
                          "total.sloccount", loc)
    if not conf["dry_run"] and "db" in conf["backends"]:
        session.add(siz)
        session.add(loc)

    # Update HistorySize
    suite_key = "debian_"
    hist_siz = {suite: HistorySize(suite, timestamp=now) for suite in suites}
    for stat in ["disk_usage", "source_packages", "source_files", "ctags"]:
        stats_result = statistics.stats_grouped_by(session, stat)
        for res in stats_result:
            if res[0] in suites:
                stats[suite_key + res[0] + "." + stat] = res[1]
                setattr(hist_siz[res[0]], stat, res[1])

    if not conf["dry_run"] and "db" in conf["backends"]:
        for siz in hist_siz.values():
            session.add(siz)

    # update historySlocCount
    sloccount_res = statistics.stats_grouped_by(session, "sloccount")
    hist_loc = {
        suite: HistorySlocCount(suite, timestamp=now)
        for suite in suites
    }
    for suite in suites:
        temp = {item[1]: item[2] for item in sloccount_res if item[0] == suite}
        store_sloccount_stats(dict(temp), stats,
                              suite_key + suite + ".sloccount",
                              hist_loc[suite])

    if not conf["dry_run"] and "db" in conf["backends"]:
        for loc in hist_loc.values():
            session.add(loc)

    session.flush()

    # cache computed stats to on-disk stats file
    if not conf["dry_run"] and "fs" in conf["backends"]:
        statistics.save_metadata_cache(stats, stats_file)

    def update_license_statistics(suites):
        # compute License stats
        license_stats_file = conf["cache_dir"] / "license_stats.data"
        dual_license_file = conf["cache_dir"] / "dual_license.data"
        license_stats = dict()
        license_d_stats = dict()

        hist_lic = {
            suite: HistoryCopyright(suite, timestamp=now)
            for suite in suites
        }
        results = statistics.get_licenses(session)
        for suite in suites:
            temp = {item[0]: item[2] for item in results if item[1] == suite}
            summary = statistics.licenses_summary(temp)
            for res in summary:
                license_stats[suite + "." + res.rstrip()] = summary[res]
                setattr(hist_lic[suite], "license", res.replace("_", " "))
                setattr(hist_lic[suite], "files", summary[res])
                if not conf["dry_run"] and "db" in conf["backends"]:
                    session.add(hist_lic[suite])
            # no historical here, only save to file
            dual_query = statistics.licenses_summary_w_dual(temp)
            for res in dual_query:
                license_d_stats[suite + "." + res.rstrip()] = dual_query[res]

        # overall dual licenses
        overall_d_licenses = statistics.licenses_summary_w_dual(
            statistics.get_licenses(session, "ALL"))
        for stat in overall_d_licenses:
            license_d_stats["overall." + stat] = overall_d_licenses[stat]

        # save dual licenses to file
        if not conf["dry_run"] and "fs" in conf["backends"]:
            statistics.save_metadata_cache(license_d_stats, dual_license_file)

        session.flush()
        overall_licenses = statistics.licenses_summary(
            statistics.get_licenses(session, "ALL"))
        for stat in overall_licenses:
            lic = HistoryCopyright("ALL", timestamp=now)
            setattr(lic, "license", stat.replace("_", " "))
            setattr(lic, "files", overall_licenses[stat])
            license_stats["overall." + stat] = overall_licenses[stat]
            if not conf["dry_run"] and "db" in conf["backends"]:
                session.add(lic)
        session.flush()
        if not conf["dry_run"] and "fs" in conf["backends"]:
            statistics.save_metadata_cache(license_stats, license_stats_file)

    if "copyright" in conf["hooks"]:
        update_license_statistics(suites)
Ejemplo n.º 7
0
def update_charts(status, conf, session, suites=None):
    """update stage: rebuild charts"""

    from debsources import charts
    logging.info('update charts...')
    ensure_stats_dir(conf)
    suites = __target_suites(session, suites)

    CHARTS = [  # <period, granularity> paris
        ('1 month', 'hourly'),
        ('1 year', 'daily'),
        ('5 years', 'weekly'),
        ('20 years', 'monthly'),
    ]

    # size charts, various metrics
    for metric in ['source_packages', 'disk_usage', 'source_files', 'ctags']:
        for (period, granularity) in CHARTS:
            for suite in suites + ['ALL']:
                series = getattr(statistics, 'history_size_' + granularity)(
                    session, metric, interval=period, suite=suite)
                chart_file = os.path.join(conf['cache_dir'], 'stats',
                                          '%s-%s-%s.png' %
                                          (suite, metric,
                                           period.replace(' ', '-')))
                if not conf['dry_run']:
                    charts.size_plot(series, chart_file)

    # sloccount: historical histograms
    for (period, granularity) in CHARTS:
        for suite in suites + ['ALL']:
            # historical histogram
            mseries = getattr(statistics, 'history_sloc_' + granularity)(
                session, interval=period, suite=suite)
            chart_file = os.path.join(conf['cache_dir'], 'stats',
                                      '%s-sloc-%s.png' %
                                      (suite, period.replace(' ', '-')))
            if not conf['dry_run']:
                charts.sloc_plot(mseries, chart_file)

    # sloccount: current pie charts
    sloc_per_suite = []
    for suite in suites + ['ALL']:
        sloc_suite = suite
        if sloc_suite == 'ALL':
            sloc_suite = None
        slocs = statistics.sloccount_summary(session, suite=sloc_suite)
        if suite not in ['ALL']:
            sloc_per_suite.append(slocs)
        chart_file = os.path.join(conf['cache_dir'], 'stats',
                                  '%s-sloc_pie-current.png' % suite)
        if not conf['dry_run']:
            charts.sloc_pie(slocs, chart_file)

    # sloccount: bar chart plot
    if 'charts_top_langs' in conf.keys():
        top_langs = int(conf['charts_top_langs'])
    else:
        top_langs = 6
    chart_file = os.path.join(conf['cache_dir'], 'stats', 'sloc_bar_plot.png')
    charts.bar_chart(sloc_per_suite, suites, chart_file, top_langs)
Ejemplo n.º 8
0
def update_statistics(status, conf, session, suites=None):
    """update stage: update statistics

    by default act on all non-sticky, major suites present in the DB. Pass
    `suites` to override

    """
    logging.info('update statistics...')
    ensure_cache_dir(conf)
    suites = __target_suites(session, suites)

    now = datetime.utcnow()
    stats_file = os.path.join(conf['cache_dir'], 'stats.data')
    if os.path.exists(stats_file):
        # If stats.data exists, load and update it, otherwise start from
        # scratch. Note: this means that we need to be careful about changing
        # stats keys, to avoid orphans.
        # TODO: add check about orphan stats.data entries to debsources-fsck
        stats = statistics.load_metadata_cache(stats_file)
    else:
        stats = {}

    def store_sloccount_stats(summary, d, prefix, db_obj):
        """Update stats dictionary `d`, and DB object `db_obj`, with per
        language sloccount statistics available in `summary`, generating
        dictionary keys that start with `prefix`. Missing languages in summary
        will be stored as 0-value entries.

        """
        total_slocs = 0
        for lang in SLOCCOUNT_LANGUAGES:
            k = prefix + '.' + lang
            v = 0
            if lang in summary:
                v = summary[lang]
            d[k] = v
            setattr(db_obj, 'lang_' + lang, v)
            total_slocs += v
        d[prefix] = total_slocs

    # compute overall stats
    suite = 'ALL'
    siz = HistorySize(suite, timestamp=now)
    loc = HistorySlocCount(suite, timestamp=now)
    for stat in ['disk_usage', 'source_packages', 'source_files', 'ctags']:
        v = getattr(statistics, stat)(session)
        stats['total.' + stat] = v
        setattr(siz, stat, v)
    store_sloccount_stats(statistics.sloccount_summary(session),
                          stats, 'total.sloccount', loc)
    if not conf['dry_run'] and 'db' in conf['backends']:
        session.add(siz)
        session.add(loc)

    # compute per-suite stats
    for suite in suites:
        siz = HistorySize(suite, timestamp=now)
        loc = HistorySlocCount(suite, timestamp=now)

        suite_key = 'debian_' + suite + '.'
        for stat in ['disk_usage', 'source_packages', 'source_files', 'ctags']:
            v = getattr(statistics, stat)(session, suite)
            stats[suite_key + stat] = v
            setattr(siz, stat, v)
        store_sloccount_stats(statistics.sloccount_summary(session, suite),
                              stats, suite_key + 'sloccount', loc)
        if not conf['dry_run'] and 'db' in conf['backends']:
            session.add(siz)
            session.add(loc)

    session.flush()

    # cache computed stats to on-disk stats file
    if not conf['dry_run'] and 'fs' in conf['backends']:
        statistics.save_metadata_cache(stats, stats_file)
Ejemplo n.º 9
0
def update_charts(status, conf, session, suites=None):
    """update stage: rebuild charts"""

    from debsources import charts
    logging.info('update charts...')
    ensure_stats_dir(conf)
    suites = __target_suites(session, suites)

    CHARTS = [  # <period, granularity> paris
        ('1 month', 'hourly'),
        ('1 year', 'daily'),
        ('5 years', 'weekly'),
        ('20 years', 'monthly'),
    ]

    # size charts, various metrics
    for metric in ['source_packages', 'disk_usage', 'source_files', 'ctags']:
        for (period, granularity) in CHARTS:
            for suite in suites + ['ALL']:
                series = getattr(statistics, 'history_size_' + granularity)(
                    session, metric, interval=period, suite=suite)
                chart_file = os.path.join(conf['cache_dir'], 'stats',
                                          '%s-%s-%s.png' %
                                          (suite, metric,
                                           period.replace(' ', '-')))
                if not conf['dry_run']:
                    charts.size_plot(series, chart_file)

    # sloccount: historical histograms
    for (period, granularity) in CHARTS:
        for suite in suites + ['ALL']:
            # historical histogram
            mseries = getattr(statistics, 'history_sloc_' + granularity)(
                session, interval=period, suite=suite)
            chart_file = os.path.join(conf['cache_dir'], 'stats',
                                      '%s-sloc-%s.png' %
                                      (suite, period.replace(' ', '-')))
            if not conf['dry_run']:
                charts.multiseries_plot(mseries, chart_file)

    # sloccount: current pie charts
    sloc_per_suite = []
    for suite in suites + ['ALL']:
        sloc_suite = suite
        if sloc_suite == 'ALL':
            sloc_suite = None
        slocs = statistics.sloccount_summary(session, suite=sloc_suite)
        if suite not in ['ALL']:
            sloc_per_suite.append(slocs)
        chart_file = os.path.join(conf['cache_dir'], 'stats',
                                  '%s-sloc_pie-current.png' % suite)
        if not conf['dry_run']:
            charts.pie_chart(slocs, chart_file)

    # sloccount: bar chart plot
    if 'charts_top_langs' in conf.keys():
        top_langs = int(conf['charts_top_langs'])
    else:
        top_langs = 6
    chart_file = os.path.join(conf['cache_dir'], 'stats', 'sloc_bar_plot.png')
    charts.bar_chart(sloc_per_suite, suites, chart_file, top_langs, 'SLOC')

    def update_license_charts():
        # License: historical histogramms
        for (period, granularity) in CHARTS:
            for suite in suites + ['ALL']:
                mseries = getattr(statistics,
                                  'history_copyright_' + granularity)(
                    session, interval=period, suite=suite)
                chart_file = os.path.join(conf['cache_dir'], 'stats',
                                          'copyright_%s-license-%s.png' %
                                          (suite, period.replace(' ', '-')))
                if not conf['dry_run']:
                    charts.multiseries_plot(mseries, chart_file, cols=3)

        # License: overall pie chart
        overall_licenses = statistics.licenses_summary(
            statistics.get_licenses(session, 'ALL'))
        ratio = qry.get_ratio(session)
        chart_file = os.path.join(conf['cache_dir'], 'stats',
                                  'copyright_overall-license_pie.png')
        if not conf['dry_run']:
            charts.pie_chart(overall_licenses, chart_file, ratio)

        # License: bar chart and per suite pie chart.
        all_suites = statistics.sticky_suites(session) \
            + __target_suites(session, None)
        licenses_per_suite = []
        for suite in all_suites:
            licenses = statistics.licenses_summary(
                statistics.get_licenses(session, suite))
            ratio = qry.get_ratio(session, suite=suite)
            # draw license pie chart
            if not conf['dry_run']:
                chart_file = os.path.join(conf['cache_dir'], 'stats',
                                          'copyright_%s'
                                          '-license_pie-current.png'
                                          % suite)
                charts.pie_chart(licenses, chart_file, ratio)

            licenses_per_suite.append(licenses)

        chart_file = os.path.join(conf['cache_dir'], 'stats',
                                  'copyright_license_bar_plot.png')
        if not conf['dry_run']:
            charts.bar_chart(licenses_per_suite, all_suites, chart_file,
                             top_langs, 'Number of files')

    # LICENSE CHARTS
    if 'copyright' in conf['hooks']:
        update_license_charts()
Ejemplo n.º 10
0
def update_statistics(status, conf, session, suites=None):
    """update stage: update statistics

    by default act on all non-sticky, major suites present in the DB. Pass
    `suites` to override

    """
    logging.info('update statistics...')
    ensure_cache_dir(conf)
    suites = __target_suites(session, suites)

    now = datetime.utcnow()
    stats_file = os.path.join(conf['cache_dir'], 'stats.data')
    if os.path.exists(stats_file):
        # If stats.data exists, load and update it, otherwise start from
        # scratch. Note: this means that we need to be careful about changing
        # stats keys, to avoid orphans.
        # TODO: add check about orphan stats.data entries to debsources-fsck
        stats = statistics.load_metadata_cache(stats_file)
    else:
        stats = {}

    def store_sloccount_stats(summary, d, prefix, db_obj):
        """Update stats dictionary `d`, and DB object `db_obj`, with per
        language sloccount statistics available in `summary`, generating
        dictionary keys that start with `prefix`. Missing languages in summary
        will be stored as 0-value entries.

        """
        total_slocs = 0
        for lang in SLOCCOUNT_LANGUAGES:
            k = prefix + '.' + lang
            v = 0
            if lang in summary:
                v = summary[lang]
            d[k] = v
            setattr(db_obj, 'lang_' + lang, v)
            total_slocs += v
        d[prefix] = total_slocs

    # compute overall stats
    suite = 'ALL'
    siz = HistorySize(suite, timestamp=now)
    loc = HistorySlocCount(suite, timestamp=now)
    for stat in ['disk_usage', 'source_packages', 'source_files', 'ctags']:
        v = getattr(statistics, stat)(session)
        stats['total.' + stat] = v
        setattr(siz, stat, v)
    store_sloccount_stats(statistics.sloccount_summary(session),
                          stats, 'total.sloccount', loc)
    if not conf['dry_run'] and 'db' in conf['backends']:
        session.add(siz)
        session.add(loc)

    # Update HistorySize
    suite_key = 'debian_'
    hist_siz = dict((suite, HistorySize(suite, timestamp=now))
                    for suite in suites)
    for stat in ['disk_usage', 'source_packages', 'source_files', 'ctags']:
        stats_result = statistics.stats_grouped_by(session, stat)
        for res in stats_result:
            if res[0] in suites:
                stats[suite_key + res[0] + '.' + stat] = res[1]
                setattr(hist_siz[res[0]], stat, res[1])

    if not conf['dry_run'] and 'db' in conf['backends']:
        for siz in hist_siz.values():
            session.add(siz)

    # update historySlocCount
    sloccount_res = statistics.stats_grouped_by(session, 'sloccount')
    hist_loc = dict((suite, HistorySlocCount(suite, timestamp=now))
                    for suite in suites)
    for suite in suites:
        temp = dict((item[1], item[2]) for item in sloccount_res
                    if item[0] == suite)
        store_sloccount_stats(dict(temp), stats,
                              suite_key + suite + ".sloccount",
                              hist_loc[suite])

    if not conf['dry_run'] and 'db' in conf['backends']:
        for loc in hist_loc.values():
            session.add(loc)

    session.flush()

    # cache computed stats to on-disk stats file
    if not conf['dry_run'] and 'fs' in conf['backends']:
        statistics.save_metadata_cache(stats, stats_file)

    def update_license_statistics(suites):
        # compute License stats
        license_stats_file = os.path.join(conf['cache_dir'],
                                          'license_stats.data')
        dual_license_file = os.path.join(conf['cache_dir'],
                                         'dual_license.data')
        license_stats = dict()
        license_d_stats = dict()

        hist_lic = dict((suite, HistoryCopyright(suite, timestamp=now))
                        for suite in suites)
        results = statistics.get_licenses(session)
        for suite in suites:
            temp = dict((item[0], item[2]) for item in results
                        if item[1] == suite)
            summary = statistics.licenses_summary(temp)
            for res in summary:
                license_stats[suite + "." + res.rstrip()] = summary[res]
                setattr(hist_lic[suite], 'license', res.replace('_', ' '))
                setattr(hist_lic[suite], 'files', summary[res])
                if not conf['dry_run'] and 'db' in conf['backends']:
                        session.add(hist_lic[suite])
            # no historical here, only save to file
            dual_query = statistics.licenses_summary_w_dual(temp)
            for res in dual_query:
                license_d_stats[suite + "." + res.rstrip()] = dual_query[res]

        # overall dual licenses
        overall_d_licenses = statistics.licenses_summary_w_dual(
            statistics.get_licenses(session, 'ALL'))
        for stat in overall_d_licenses:
            license_d_stats['overall.' + stat] = overall_d_licenses[stat]

        # save dual licenses to file
        if not conf['dry_run'] and 'fs' in conf['backends']:
            statistics.save_metadata_cache(license_d_stats, dual_license_file)

        session.flush()
        overall_licenses = statistics.licenses_summary(
            statistics.get_licenses(session, 'ALL'))
        for stat in overall_licenses:
            lic = HistoryCopyright('ALL', timestamp=now)
            setattr(lic, 'license', stat.replace('_', ' '))
            setattr(lic, 'files', overall_licenses[stat])
            license_stats['overall.' + stat] = overall_licenses[stat]
            if not conf['dry_run'] and 'db' in conf['backends']:
                session.add(lic)
        session.flush()
        if not conf['dry_run'] and 'fs' in conf['backends']:
            statistics.save_metadata_cache(license_stats, license_stats_file)

    if 'copyright' in conf['hooks']:
        update_license_statistics(suites)
Ejemplo n.º 11
0
def update_charts(status, conf, session, suites=None):
    """update stage: rebuild charts"""

    from debsources import charts
    logging.info('update charts...')
    ensure_stats_dir(conf)
    suites = __target_suites(session, suites)

    CHARTS = [  # <period, granularity> paris
        ('1 month', 'hourly'),
        ('1 year', 'daily'),
        ('5 years', 'weekly'),
        ('20 years', 'monthly'),
    ]

    # size charts, various metrics
    for metric in ['source_packages', 'disk_usage', 'source_files', 'ctags']:
        for (period, granularity) in CHARTS:
            for suite in suites + ['ALL']:
                series = getattr(statistics, 'history_size_' + granularity)(
                    session, metric, interval=period, suite=suite)
                chart_file = os.path.join(
                    conf['cache_dir'], 'stats',
                    '%s-%s-%s.png' % (suite, metric, period.replace(' ', '-')))
                if not conf['dry_run']:
                    charts.size_plot(series, chart_file)

    # sloccount: historical histograms
    for (period, granularity) in CHARTS:
        for suite in suites + ['ALL']:
            # historical histogram
            mseries = getattr(statistics,
                              'history_sloc_' + granularity)(session,
                                                             interval=period,
                                                             suite=suite)
            chart_file = os.path.join(
                conf['cache_dir'], 'stats',
                '%s-sloc-%s.png' % (suite, period.replace(' ', '-')))
            if not conf['dry_run']:
                charts.multiseries_plot(mseries, chart_file)

    # sloccount: current pie charts
    sloc_per_suite = []
    for suite in suites + ['ALL']:
        sloc_suite = suite
        if sloc_suite == 'ALL':
            sloc_suite = None
        slocs = statistics.sloccount_summary(session, suite=sloc_suite)
        if suite not in ['ALL']:
            sloc_per_suite.append(slocs)
        chart_file = os.path.join(conf['cache_dir'], 'stats',
                                  '%s-sloc_pie-current.png' % suite)
        if not conf['dry_run']:
            charts.pie_chart(slocs, chart_file)

    # sloccount: bar chart plot
    if 'charts_top_langs' in conf.keys():
        top_langs = int(conf['charts_top_langs'])
    else:
        top_langs = 6
    chart_file = os.path.join(conf['cache_dir'], 'stats', 'sloc_bar_plot.png')
    charts.bar_chart(sloc_per_suite, suites, chart_file, top_langs, 'SLOC')

    def update_license_charts():
        # License: historical histogramms
        for (period, granularity) in CHARTS:
            for suite in suites + ['ALL']:
                mseries = getattr(statistics, 'history_copyright_' +
                                  granularity)(session,
                                               interval=period,
                                               suite=suite)
                chart_file = os.path.join(
                    conf['cache_dir'], 'stats', 'copyright_%s-license-%s.png' %
                    (suite, period.replace(' ', '-')))
                if not conf['dry_run']:
                    charts.multiseries_plot(mseries, chart_file, cols=3)

        # License: overall pie chart
        overall_licenses = statistics.licenses_summary(
            statistics.get_licenses(session, 'ALL'))
        ratio = qry.get_ratio(session)
        chart_file = os.path.join(conf['cache_dir'], 'stats',
                                  'copyright_overall-license_pie.png')
        if not conf['dry_run']:
            charts.pie_chart(overall_licenses, chart_file, ratio)

        # License: bar chart and per suite pie chart.
        all_suites = statistics.sticky_suites(session) \
            + __target_suites(session, None)
        licenses_per_suite = []
        results = statistics.get_licenses(session)
        for suite in all_suites:
            temp = dict(
                (item[0], item[2]) for item in results if item[1] == suite)
            licenses = statistics.licenses_summary(temp)
            ratio = qry.get_ratio(session, suite=suite)
            # draw license pie chart
            if not conf['dry_run']:
                chart_file = os.path.join(
                    conf['cache_dir'], 'stats', 'copyright_%s'
                    '-license_pie-current.png' % suite)
                charts.pie_chart(licenses, chart_file, ratio)

            licenses_per_suite.append(licenses)

        chart_file = os.path.join(conf['cache_dir'], 'stats',
                                  'copyright_license_bar_plot.png')
        if not conf['dry_run']:
            charts.bar_chart(licenses_per_suite, all_suites, chart_file,
                             top_langs, 'Number of files')

    # LICENSE CHARTS
    if 'copyright' in conf['hooks']:
        update_license_charts()
Ejemplo n.º 12
0
def update_statistics(status, conf, session, suites=None):
    """update stage: update statistics

    by default act on all non-sticky, major suites present in the DB. Pass
    `suites` to override

    """
    logging.info('update statistics...')
    ensure_cache_dir(conf)
    suites = __target_suites(session, suites)

    now = datetime.utcnow()
    stats_file = os.path.join(conf['cache_dir'], 'stats.data')
    if os.path.exists(stats_file):
        # If stats.data exists, load and update it, otherwise start from
        # scratch. Note: this means that we need to be careful about changing
        # stats keys, to avoid orphans.
        # TODO: add check about orphan stats.data entries to debsources-fsck
        stats = statistics.load_metadata_cache(stats_file)
    else:
        stats = {}

    def store_sloccount_stats(summary, d, prefix, db_obj):
        """Update stats dictionary `d`, and DB object `db_obj`, with per
        language sloccount statistics available in `summary`, generating
        dictionary keys that start with `prefix`. Missing languages in summary
        will be stored as 0-value entries.

        """
        total_slocs = 0
        for lang in SLOCCOUNT_LANGUAGES:
            k = prefix + '.' + lang
            v = 0
            if lang in summary:
                v = summary[lang]
            d[k] = v
            setattr(db_obj, 'lang_' + lang, v)
            total_slocs += v
        d[prefix] = total_slocs

    # compute overall stats
    suite = 'ALL'
    siz = HistorySize(suite, timestamp=now)
    loc = HistorySlocCount(suite, timestamp=now)
    for stat in ['disk_usage', 'source_packages', 'source_files', 'ctags']:
        v = getattr(statistics, stat)(session)
        stats['total.' + stat] = v
        setattr(siz, stat, v)
    store_sloccount_stats(statistics.sloccount_summary(session), stats,
                          'total.sloccount', loc)
    if not conf['dry_run'] and 'db' in conf['backends']:
        session.add(siz)
        session.add(loc)

    # Update HistorySize
    suite_key = 'debian_'
    hist_siz = dict(
        (suite, HistorySize(suite, timestamp=now)) for suite in suites)
    for stat in ['disk_usage', 'source_packages', 'source_files', 'ctags']:
        stats_result = statistics.stats_grouped_by(session, stat)
        for res in stats_result:
            if res[0] in suites:
                stats[suite_key + res[0] + '.' + stat] = res[1]
                setattr(hist_siz[res[0]], stat, res[1])

    if not conf['dry_run'] and 'db' in conf['backends']:
        for siz in hist_siz.values():
            session.add(siz)

    # update historySlocCount
    sloccount_res = statistics.stats_grouped_by(session, 'sloccount')
    hist_loc = dict(
        (suite, HistorySlocCount(suite, timestamp=now)) for suite in suites)
    for suite in suites:
        temp = dict(
            (item[1], item[2]) for item in sloccount_res if item[0] == suite)
        store_sloccount_stats(dict(temp), stats,
                              suite_key + suite + ".sloccount",
                              hist_loc[suite])

    if not conf['dry_run'] and 'db' in conf['backends']:
        for loc in hist_loc.values():
            session.add(loc)

    session.flush()

    # cache computed stats to on-disk stats file
    if not conf['dry_run'] and 'fs' in conf['backends']:
        statistics.save_metadata_cache(stats, stats_file)

    def update_license_statistics(suites):
        # compute License stats
        license_stats_file = os.path.join(conf['cache_dir'],
                                          'license_stats.data')
        dual_license_file = os.path.join(conf['cache_dir'],
                                         'dual_license.data')
        license_stats = dict()
        license_d_stats = dict()

        hist_lic = dict((suite, HistoryCopyright(suite, timestamp=now))
                        for suite in suites)
        results = statistics.get_licenses(session)
        for suite in suites:
            temp = dict(
                (item[0], item[2]) for item in results if item[1] == suite)
            summary = statistics.licenses_summary(temp)
            for res in summary:
                license_stats[suite + "." + res.rstrip()] = summary[res]
                setattr(hist_lic[suite], 'license', res.replace('_', ' '))
                setattr(hist_lic[suite], 'files', summary[res])
                if not conf['dry_run'] and 'db' in conf['backends']:
                    session.add(hist_lic[suite])
            # no historical here, only save to file
            dual_query = statistics.licenses_summary_w_dual(temp)
            for res in dual_query:
                license_d_stats[suite + "." + res.rstrip()] = dual_query[res]

        # overall dual licenses
        overall_d_licenses = statistics.licenses_summary_w_dual(
            statistics.get_licenses(session, 'ALL'))
        for stat in overall_d_licenses:
            license_d_stats['overall.' + stat] = overall_d_licenses[stat]

        # save dual licenses to file
        if not conf['dry_run'] and 'fs' in conf['backends']:
            statistics.save_metadata_cache(license_d_stats, dual_license_file)

        session.flush()
        overall_licenses = statistics.licenses_summary(
            statistics.get_licenses(session, 'ALL'))
        for stat in overall_licenses:
            lic = HistoryCopyright('ALL', timestamp=now)
            setattr(lic, 'license', stat.replace('_', ' '))
            setattr(lic, 'files', overall_licenses[stat])
            license_stats['overall.' + stat] = overall_licenses[stat]
            if not conf['dry_run'] and 'db' in conf['backends']:
                session.add(lic)
        session.flush()
        if not conf['dry_run'] and 'fs' in conf['backends']:
            statistics.save_metadata_cache(license_stats, license_stats_file)

    if 'copyright' in conf['hooks']:
        update_license_statistics(suites)