Example #1
0
def main():
    userhash = user_mapping()
    engine = create_engine(
        "sqlite:////redacted_path_for_test/sumcoresg/queue_data.db", echo=True)
    jobs = engine.execute("select * from jobs where cluster ='Orca'").fetchall()
    # jobs = engine.execute("select * from jobs").fetchmany(6)
    jobs.sort(key=lambda x: x[1])

    fmt = '%Y-%m-%d %H:%M:%S.%f'
    # if two commits are longer than delta_t apart, suppose they're parsed from the
    # same xml data
    delta_t = datetime.timedelta(microseconds=100000)           # 0.1 seconds

    # if it's ref_created, then it's not necessarily the exact real time stamp
    ref_created = None
    rcu, qcu = {}, {}
    for k, j in enumerate(jobs):
        # cn: clustername; un: username
        cn = j[0].lower()
        created = datetime.datetime.strptime(j[1], fmt)
        un = userhash[j[2]] # converted to realname, realname equals username here
        cores = int(j[3]) * int(j[4])
        status = j[5]

        if not ref_created:
            # first time in the loop
            ref_created = created
        elif (created - ref_created) > delta_t:
            ref_created = created
        rcu, qcu = update_vars(cn, un, cores, status, ref_created, rcu, qcu)

    for cu in [rcu, qcu]:
        new_cu = {}
        for key in cu:
            # note: a key here is of a tuple type
            clustername, username, ref_created = key[0], key[1], key[2]
            pomeslab_key = (clustername, 'testlab', ref_created)
            if pomeslab_key in new_cu:
                new_cu[pomeslab_key] += cu[key]
            else:
                new_cu[pomeslab_key]  = cu[key]
        cu.update(new_cu)
    return rcu, qcu
Example #2
0
def collect_data(db):
    """should be ideally run in background"""
    # the names in interested_clusters are not arbitrary, it has to match the
    # clusternames in static/xml/clusters.xml, e.g. scinet is not the same as
    # Scinet, SciNet, or Sci Net

    # be sure to use config var INTERESTED_CLUSTERS on heroku
    # here, just use scinet is for local testing,
    interested_clusters = os.getenv(
        "INTERESTED_CLUSTERS",
        "scinet mp2 colosse guillimin lattice nestor parallel orcinus orca"
    ).split()

    # interested_clusters = ["orcinus"]

    # sort of variables initialization
    figs_data_dict, usage_frac_dict = {}, {}
    usermap = util.user_mapping()
    delta_ts, resolutions = get_delta_ts_and_resolutions()
    durations = DURATIONS
    while True:
        for ic in interested_clusters:  # ic: interested cluster
            ic_obj = util.gen_cluster_obj_from_clustername(ic)
            raw_xml = ic_obj.fetch_raw_xml()
            created = datetime.datetime.now()
            if raw_xml:
                global RAW_XML
                RAW_XML[ic] = raw_xml
                # having such error for scinet and nestor,
                # MemcachedError: error 37 from memcached_set: SUCCESS
                # guess those xml data may be too big for memcached,
                # using system memory instead for now 2012-06-12
                # MEMC.set("RAW_XML", raw_xml_cache)

            # rcu, qcu: running & queuing core usages
            rcu, qcu = ic_obj.process_raw_xml(usermap, raw_xml)

            # 1. generate reports and cache it
            reports = MEMC.get("REPORTS")
            if not reports:  # meaning: first time
                reports = {}
            report = ic_obj.gen_report(rcu, qcu, usermap, created)
            reports[ic_obj.clustername] = report
            MEMC.set("REPORTS", reports)

            # 2. insert to database
            insert2db(rcu, qcu, ic_obj, created, db)

            # 3. cache usage data for later plotting
            # dur_queries = [last_day_data, last_week_data, last_month_data,
            # last_year_data]
            dur_queries = prepare_data_for_plotting(ic, created, db)

            # this is for /.json kind of url
            figs_data_dict[ic] = {i: j for i, j in zip(durations, dur_queries)}
            MEMC.set("FIGS_DATA", figs_data_dict)

            # ldd:last_day_data;    lwd:last_week_data
            # lmd:last_month_data;  lyd:last_year_data
            ldd, lwd, lmd, lyd = dur_queries
            total_sec_to_now = (lyd[0][-1] -
                                THE_VERY_BEGINNING).total_seconds()

            # inte_coresec: integrate core seconds
            usage_frac_dict[ic] = {
                'day': inte_coresec(*ldd) / (ic_obj.quota * 24 * 3600),
                'week': inte_coresec(*lwd) / (ic_obj.quota * 7 * 24 * 3600),
                'month': inte_coresec(*lmd) / (ic_obj.quota * 30 * 24 * 3600),
                'year': inte_coresec(*lyd) / (ic_obj.quota * total_sec_to_now),
            }
            MEMC.set("USAGE_FRAC", usage_frac_dict)

        # 4. Now let's do the real plotting, first: usage vs. time, then: histogram
        # 1). usage vs. time
        keys = sorted(figs_data_dict.keys())
        for index, key_group in enumerate(util.split_list(keys, step=4)):
            figs, axes = {}, {}
            for dur in durations:
                figs[dur] = plt.figure(figsize=(24, 13.5))
                axes[dur] = figs[dur].add_subplot(111)
                fig, ax = figs[dur], axes[dur]
                fig = do_fig_plotting(fig, ax, key_group, dur, figs_data_dict,
                                      usage_frac_dict)

                canvas = FigureCanvas(fig)
                png_output = StringIO.StringIO()
                canvas.print_png(png_output)
                plt.close(fig)  # clear up the memory

                # figure naming pattern should be systematically redesigned
                # when # gets large
                ident = str('_'.join([dur, str(index)]))
                fig_content = png_output.getvalue()
                db = update_the_figure(db, Figure, ident, fig_content, created)
            db.session.commit()

        # 2). histogram plotting
        usage_frac_dict_by_dur = {}
        for dur in durations:
            usage_frac_dict_by_dur[dur] = {}
        for ic in usage_frac_dict:
            for dur in usage_frac_dict[ic]:
                usage_frac_dict_by_dur[dur][ic] = usage_frac_dict[ic][dur]

        for dur in usage_frac_dict_by_dur:
            N = len(usage_frac_dict_by_dur[dur])
            width = 1.  # the width of the bars
            ind = np.arange(0, N, width)  # the x locations for the groups

            keys = usage_frac_dict_by_dur[dur].keys()
            keys.sort(key=lambda k: usage_frac_dict_by_dur[dur][k],
                      reverse=True)
            # make sure the order is right
            durMeans = [usage_frac_dict_by_dur[dur][k] for k in keys]

            fig = plt.figure(figsize=(16, 10))
            fig.subplots_adjust(
                bottom=0.2)  # otherwise, xticklabels cannot be shown fully
            ax = fig.add_subplot(111)

            for i, d in zip(ind, durMeans):
                # 'g': green; 'r': red
                col = 'g' if d > 1 else 'r'
                ax.bar(i, d, width, color=col)

            ylim = list(ax.get_ylim())
            ylim[1] = ylim[1] * 1.1 if ylim[1] > 1 else 1.05

            ax.plot([0, 100], [1, 1], 'k--')
            ax.set_xlim([0, N * width])
            ax.set_ylim(ylim)

            ax.set_ylabel('Usage', labelpad=40)
            ax.set_title(dur,
                         size=40,
                         family="monospace",
                         bbox={
                             'facecolor': 'red',
                             'alpha': 0.5
                         })
            ax.title.set_y(1.02)  # offset title position
            ax.set_xticks(ind + width / 2.)
            ax.set_xticklabels(keys, size=25, rotation=45)
            ax.grid(b=True, which="both")

            canvas = FigureCanvas(fig)
            png_output = StringIO.StringIO()
            canvas.print_png(png_output)
            plt.close(fig)

            ident = "histo_{0}".format(dur)
            fig_content = png_output.getvalue()

            db = update_the_figure(db, Figure, ident, fig_content, created)
        db.session.commit()

        # when at last, maybe 10min is too frequent, think about 30 min
        dt = os.environ.get('DELTAT')
        if not dt:
            time.sleep(600)  # sleep 10 min
        else:
            time.sleep(float(dt))
Example #3
0
def collect_data(db):
    """should be ideally run in background"""
    # the names in interested_clusters are not arbitrary, it has to match the
    # clusternames in static/xml/clusters.xml, e.g. scinet is not the same as
    # Scinet, SciNet, or Sci Net

    # be sure to use config var INTERESTED_CLUSTERS on heroku
    # here, just use scinet is for local testing,
    interested_clusters = os.getenv(
        "INTERESTED_CLUSTERS",
        "scinet mp2 colosse guillimin lattice nestor parallel orcinus orca").split()

    # interested_clusters = ["orcinus"]

    # sort of variables initialization
    figs_data_dict, usage_frac_dict = {}, {}
    usermap = util.user_mapping()
    delta_ts, resolutions = get_delta_ts_and_resolutions()
    durations = DURATIONS
    while True:
        for ic in interested_clusters:                 # ic: interested cluster
            ic_obj = util.gen_cluster_obj_from_clustername(ic)
            raw_xml = ic_obj.fetch_raw_xml()
            created = datetime.datetime.now()
            if raw_xml:
                global RAW_XML
                RAW_XML[ic] = raw_xml
                # having such error for scinet and nestor,
                # MemcachedError: error 37 from memcached_set: SUCCESS
                # guess those xml data may be too big for memcached,
                # using system memory instead for now 2012-06-12
                # MEMC.set("RAW_XML", raw_xml_cache)

            # rcu, qcu: running & queuing core usages
            rcu, qcu = ic_obj.process_raw_xml(usermap, raw_xml)

            # 1. generate reports and cache it
            reports = MEMC.get("REPORTS")
            if not reports:                               # meaning: first time
                reports = {}
            report = ic_obj.gen_report(rcu, qcu, usermap, created)
            reports[ic_obj.clustername] = report
            MEMC.set("REPORTS", reports)

            # 2. insert to database
            insert2db(rcu, qcu, ic_obj, created, db)

            # 3. cache usage data for later plotting
            # dur_queries = [last_day_data, last_week_data, last_month_data,
            # last_year_data]
            dur_queries = prepare_data_for_plotting(ic, created, db)

            # this is for /.json kind of url
            figs_data_dict[ic] = {i:j for i, j in zip(durations, dur_queries)}
            MEMC.set("FIGS_DATA", figs_data_dict)

            # ldd:last_day_data;    lwd:last_week_data
            # lmd:last_month_data;  lyd:last_year_data
            ldd, lwd, lmd, lyd = dur_queries
            total_sec_to_now = (
                lyd[0][-1] - THE_VERY_BEGINNING).total_seconds()

            # inte_coresec: integrate core seconds
            usage_frac_dict[ic] = {
                'day': inte_coresec(*ldd) / (ic_obj.quota * 24 * 3600),
                'week': inte_coresec(*lwd) / (ic_obj.quota * 7 * 24 * 3600),
                'month': inte_coresec(*lmd) / (ic_obj.quota * 30 * 24 * 3600),
                'year': inte_coresec(*lyd) / (ic_obj.quota * total_sec_to_now),
                }
            MEMC.set("USAGE_FRAC", usage_frac_dict)

        # 4. Now let's do the real plotting, first: usage vs. time, then: histogram
        # 1). usage vs. time
        keys = sorted(figs_data_dict.keys())
        for index, key_group in enumerate(util.split_list(keys, step=4)):
            figs, axes = {}, {}
            for dur in durations:
                figs[dur] = plt.figure(figsize=(24, 13.5))
                axes[dur] = figs[dur].add_subplot(111)
                fig, ax = figs[dur], axes[dur]
                fig = do_fig_plotting(fig, ax, key_group, dur,
                                      figs_data_dict, usage_frac_dict)

                canvas = FigureCanvas(fig)
                png_output = StringIO.StringIO()
                canvas.print_png(png_output)
                plt.close(fig)                            # clear up the memory

                # figure naming pattern should be systematically redesigned
                # when # gets large
                ident = str('_'.join([dur, str(index)]))
                fig_content = png_output.getvalue()
                db = update_the_figure(db, Figure, ident, fig_content, created)
            db.session.commit()

        # 2). histogram plotting
        usage_frac_dict_by_dur = {}
        for dur in durations:
            usage_frac_dict_by_dur[dur] = {}
        for ic in usage_frac_dict:
            for dur in usage_frac_dict[ic]:
                usage_frac_dict_by_dur[dur][ic] = usage_frac_dict[ic][dur]

        for dur in usage_frac_dict_by_dur:
            N = len(usage_frac_dict_by_dur[dur])
            width = 1.                         # the width of the bars
            ind = np.arange(0, N, width)       # the x locations for the groups

            keys = usage_frac_dict_by_dur[dur].keys()
            keys.sort(key=lambda k:usage_frac_dict_by_dur[dur][k], reverse=True)
            # make sure the order is right
            durMeans = [usage_frac_dict_by_dur[dur][k] for k in keys]

            fig = plt.figure(figsize=(16, 10))
            fig.subplots_adjust(bottom=0.2) # otherwise, xticklabels cannot be shown fully
            ax = fig.add_subplot(111)

            for i, d in zip(ind, durMeans):
                # 'g': green; 'r': red
                col = 'g' if d > 1 else 'r'
                ax.bar(i, d, width, color=col)

            ylim = list(ax.get_ylim())
            ylim[1] = ylim[1] * 1.1 if ylim[1] > 1 else 1.05

            ax.plot([0, 100], [1, 1], 'k--')
            ax.set_xlim([0, N*width])
            ax.set_ylim(ylim)

            ax.set_ylabel('Usage', labelpad=40)
            ax.set_title(dur, size=40, family="monospace",
                         bbox={'facecolor':'red', 'alpha':0.5})
            ax.title.set_y(1.02)                            # offset title position
            ax.set_xticks(ind+width / 2.)
            ax.set_xticklabels(keys, size=25, rotation=45)
            ax.grid(b=True, which="both")

            canvas = FigureCanvas(fig)
            png_output = StringIO.StringIO()
            canvas.print_png(png_output)
            plt.close(fig)

            ident = "histo_{0}".format(dur)
            fig_content = png_output.getvalue()

            db = update_the_figure(db, Figure, ident, fig_content, created)
        db.session.commit()

        # when at last, maybe 10min is too frequent, think about 30 min
        dt = os.environ.get('DELTAT')
        if not dt:
            time.sleep(600)                                 # sleep 10 min
        else:
            time.sleep(float(dt))