def do_fig_plotting(fig, ax, key_group, dur, figs_data_dict, usage_frac_dict): for key in key_group: # note: key is a ic x, y = figs_data_dict[key][dur] # note: key_obj is a ic_obj key_obj = util.gen_cluster_obj_from_clustername(key) ax.plot(x, y / float(key_obj.quota) * 100, 'o-', linewidth=2, label="{0} | {1} | {2:.1%}".format( key_obj.clustername, key_obj.quota, usage_frac_dict[key_obj.clustername][dur])) ax.xaxis.set_major_locator(MaxNLocator(15)) if dur in ['day', 'week']: dtfmt = "%Y-%m-%d %H:%M" else: dtfmt = "%Y-%m-%d" ax.xaxis.set_major_formatter(DateFormatter(dtfmt)) ax.minorticks_on() ax.grid(b=True, which="both") leg = ax.legend(loc="best") leg.get_frame().set_alpha(0.2) fig.autofmt_xdate() # It's probably better to plot percentage ax.set_ylabel("Running Cores (%)", labelpad=50) xlim = list(ax.get_xlim()) # get_ylim() returns a tuple ylim = list(ax.get_ylim()) # get_ylim() returns a tuple ylim[0] = -5 # to see 0 more clearly # make sure we see this line at y=100% ylim[1] = ylim[1] * 1.1 if ylim[1] > 100 else 105 # drawing reference line, this will mess up the xlim, need to # find a way to avoid it 2012-06-28 beg_datetime = datetime.datetime(*[2000, 1, 1, 0, 0, 0, 0]) end_datetime = datetime.datetime(*[2050, 1, 1, 0, 0, 0, 0]) ax.plot([beg_datetime, end_datetime], [100, 100], 'k--') ax.set_xlim(xlim) ax.set_ylim(ylim) return fig
def collect_data(db): """should be ideally run in background""" # the names in interested_clusters are not arbitrary, it has to match the # clusternames in static/xml/clusters.xml, e.g. scinet is not the same as # Scinet, SciNet, or Sci Net # be sure to use config var INTERESTED_CLUSTERS on heroku # here, just use scinet is for local testing, interested_clusters = os.getenv( "INTERESTED_CLUSTERS", "scinet mp2 colosse guillimin lattice nestor parallel orcinus orca" ).split() # interested_clusters = ["orcinus"] # sort of variables initialization figs_data_dict, usage_frac_dict = {}, {} usermap = util.user_mapping() delta_ts, resolutions = get_delta_ts_and_resolutions() durations = DURATIONS while True: for ic in interested_clusters: # ic: interested cluster ic_obj = util.gen_cluster_obj_from_clustername(ic) raw_xml = ic_obj.fetch_raw_xml() created = datetime.datetime.now() if raw_xml: global RAW_XML RAW_XML[ic] = raw_xml # having such error for scinet and nestor, # MemcachedError: error 37 from memcached_set: SUCCESS # guess those xml data may be too big for memcached, # using system memory instead for now 2012-06-12 # MEMC.set("RAW_XML", raw_xml_cache) # rcu, qcu: running & queuing core usages rcu, qcu = ic_obj.process_raw_xml(usermap, raw_xml) # 1. generate reports and cache it reports = MEMC.get("REPORTS") if not reports: # meaning: first time reports = {} report = ic_obj.gen_report(rcu, qcu, usermap, created) reports[ic_obj.clustername] = report MEMC.set("REPORTS", reports) # 2. insert to database insert2db(rcu, qcu, ic_obj, created, db) # 3. cache usage data for later plotting # dur_queries = [last_day_data, last_week_data, last_month_data, # last_year_data] dur_queries = prepare_data_for_plotting(ic, created, db) # this is for /.json kind of url figs_data_dict[ic] = {i: j for i, j in zip(durations, dur_queries)} MEMC.set("FIGS_DATA", figs_data_dict) # ldd:last_day_data; lwd:last_week_data # lmd:last_month_data; lyd:last_year_data ldd, lwd, lmd, lyd = dur_queries total_sec_to_now = (lyd[0][-1] - THE_VERY_BEGINNING).total_seconds() # inte_coresec: integrate core seconds usage_frac_dict[ic] = { 'day': inte_coresec(*ldd) / (ic_obj.quota * 24 * 3600), 'week': inte_coresec(*lwd) / (ic_obj.quota * 7 * 24 * 3600), 'month': inte_coresec(*lmd) / (ic_obj.quota * 30 * 24 * 3600), 'year': inte_coresec(*lyd) / (ic_obj.quota * total_sec_to_now), } MEMC.set("USAGE_FRAC", usage_frac_dict) # 4. Now let's do the real plotting, first: usage vs. time, then: histogram # 1). usage vs. time keys = sorted(figs_data_dict.keys()) for index, key_group in enumerate(util.split_list(keys, step=4)): figs, axes = {}, {} for dur in durations: figs[dur] = plt.figure(figsize=(24, 13.5)) axes[dur] = figs[dur].add_subplot(111) fig, ax = figs[dur], axes[dur] fig = do_fig_plotting(fig, ax, key_group, dur, figs_data_dict, usage_frac_dict) canvas = FigureCanvas(fig) png_output = StringIO.StringIO() canvas.print_png(png_output) plt.close(fig) # clear up the memory # figure naming pattern should be systematically redesigned # when # gets large ident = str('_'.join([dur, str(index)])) fig_content = png_output.getvalue() db = update_the_figure(db, Figure, ident, fig_content, created) db.session.commit() # 2). histogram plotting usage_frac_dict_by_dur = {} for dur in durations: usage_frac_dict_by_dur[dur] = {} for ic in usage_frac_dict: for dur in usage_frac_dict[ic]: usage_frac_dict_by_dur[dur][ic] = usage_frac_dict[ic][dur] for dur in usage_frac_dict_by_dur: N = len(usage_frac_dict_by_dur[dur]) width = 1. # the width of the bars ind = np.arange(0, N, width) # the x locations for the groups keys = usage_frac_dict_by_dur[dur].keys() keys.sort(key=lambda k: usage_frac_dict_by_dur[dur][k], reverse=True) # make sure the order is right durMeans = [usage_frac_dict_by_dur[dur][k] for k in keys] fig = plt.figure(figsize=(16, 10)) fig.subplots_adjust( bottom=0.2) # otherwise, xticklabels cannot be shown fully ax = fig.add_subplot(111) for i, d in zip(ind, durMeans): # 'g': green; 'r': red col = 'g' if d > 1 else 'r' ax.bar(i, d, width, color=col) ylim = list(ax.get_ylim()) ylim[1] = ylim[1] * 1.1 if ylim[1] > 1 else 1.05 ax.plot([0, 100], [1, 1], 'k--') ax.set_xlim([0, N * width]) ax.set_ylim(ylim) ax.set_ylabel('Usage', labelpad=40) ax.set_title(dur, size=40, family="monospace", bbox={ 'facecolor': 'red', 'alpha': 0.5 }) ax.title.set_y(1.02) # offset title position ax.set_xticks(ind + width / 2.) ax.set_xticklabels(keys, size=25, rotation=45) ax.grid(b=True, which="both") canvas = FigureCanvas(fig) png_output = StringIO.StringIO() canvas.print_png(png_output) plt.close(fig) ident = "histo_{0}".format(dur) fig_content = png_output.getvalue() db = update_the_figure(db, Figure, ident, fig_content, created) db.session.commit() # when at last, maybe 10min is too frequent, think about 30 min dt = os.environ.get('DELTAT') if not dt: time.sleep(600) # sleep 10 min else: time.sleep(float(dt))
def collect_data(db): """should be ideally run in background""" # the names in interested_clusters are not arbitrary, it has to match the # clusternames in static/xml/clusters.xml, e.g. scinet is not the same as # Scinet, SciNet, or Sci Net # be sure to use config var INTERESTED_CLUSTERS on heroku # here, just use scinet is for local testing, interested_clusters = os.getenv( "INTERESTED_CLUSTERS", "scinet mp2 colosse guillimin lattice nestor parallel orcinus orca").split() # interested_clusters = ["orcinus"] # sort of variables initialization figs_data_dict, usage_frac_dict = {}, {} usermap = util.user_mapping() delta_ts, resolutions = get_delta_ts_and_resolutions() durations = DURATIONS while True: for ic in interested_clusters: # ic: interested cluster ic_obj = util.gen_cluster_obj_from_clustername(ic) raw_xml = ic_obj.fetch_raw_xml() created = datetime.datetime.now() if raw_xml: global RAW_XML RAW_XML[ic] = raw_xml # having such error for scinet and nestor, # MemcachedError: error 37 from memcached_set: SUCCESS # guess those xml data may be too big for memcached, # using system memory instead for now 2012-06-12 # MEMC.set("RAW_XML", raw_xml_cache) # rcu, qcu: running & queuing core usages rcu, qcu = ic_obj.process_raw_xml(usermap, raw_xml) # 1. generate reports and cache it reports = MEMC.get("REPORTS") if not reports: # meaning: first time reports = {} report = ic_obj.gen_report(rcu, qcu, usermap, created) reports[ic_obj.clustername] = report MEMC.set("REPORTS", reports) # 2. insert to database insert2db(rcu, qcu, ic_obj, created, db) # 3. cache usage data for later plotting # dur_queries = [last_day_data, last_week_data, last_month_data, # last_year_data] dur_queries = prepare_data_for_plotting(ic, created, db) # this is for /.json kind of url figs_data_dict[ic] = {i:j for i, j in zip(durations, dur_queries)} MEMC.set("FIGS_DATA", figs_data_dict) # ldd:last_day_data; lwd:last_week_data # lmd:last_month_data; lyd:last_year_data ldd, lwd, lmd, lyd = dur_queries total_sec_to_now = ( lyd[0][-1] - THE_VERY_BEGINNING).total_seconds() # inte_coresec: integrate core seconds usage_frac_dict[ic] = { 'day': inte_coresec(*ldd) / (ic_obj.quota * 24 * 3600), 'week': inte_coresec(*lwd) / (ic_obj.quota * 7 * 24 * 3600), 'month': inte_coresec(*lmd) / (ic_obj.quota * 30 * 24 * 3600), 'year': inte_coresec(*lyd) / (ic_obj.quota * total_sec_to_now), } MEMC.set("USAGE_FRAC", usage_frac_dict) # 4. Now let's do the real plotting, first: usage vs. time, then: histogram # 1). usage vs. time keys = sorted(figs_data_dict.keys()) for index, key_group in enumerate(util.split_list(keys, step=4)): figs, axes = {}, {} for dur in durations: figs[dur] = plt.figure(figsize=(24, 13.5)) axes[dur] = figs[dur].add_subplot(111) fig, ax = figs[dur], axes[dur] fig = do_fig_plotting(fig, ax, key_group, dur, figs_data_dict, usage_frac_dict) canvas = FigureCanvas(fig) png_output = StringIO.StringIO() canvas.print_png(png_output) plt.close(fig) # clear up the memory # figure naming pattern should be systematically redesigned # when # gets large ident = str('_'.join([dur, str(index)])) fig_content = png_output.getvalue() db = update_the_figure(db, Figure, ident, fig_content, created) db.session.commit() # 2). histogram plotting usage_frac_dict_by_dur = {} for dur in durations: usage_frac_dict_by_dur[dur] = {} for ic in usage_frac_dict: for dur in usage_frac_dict[ic]: usage_frac_dict_by_dur[dur][ic] = usage_frac_dict[ic][dur] for dur in usage_frac_dict_by_dur: N = len(usage_frac_dict_by_dur[dur]) width = 1. # the width of the bars ind = np.arange(0, N, width) # the x locations for the groups keys = usage_frac_dict_by_dur[dur].keys() keys.sort(key=lambda k:usage_frac_dict_by_dur[dur][k], reverse=True) # make sure the order is right durMeans = [usage_frac_dict_by_dur[dur][k] for k in keys] fig = plt.figure(figsize=(16, 10)) fig.subplots_adjust(bottom=0.2) # otherwise, xticklabels cannot be shown fully ax = fig.add_subplot(111) for i, d in zip(ind, durMeans): # 'g': green; 'r': red col = 'g' if d > 1 else 'r' ax.bar(i, d, width, color=col) ylim = list(ax.get_ylim()) ylim[1] = ylim[1] * 1.1 if ylim[1] > 1 else 1.05 ax.plot([0, 100], [1, 1], 'k--') ax.set_xlim([0, N*width]) ax.set_ylim(ylim) ax.set_ylabel('Usage', labelpad=40) ax.set_title(dur, size=40, family="monospace", bbox={'facecolor':'red', 'alpha':0.5}) ax.title.set_y(1.02) # offset title position ax.set_xticks(ind+width / 2.) ax.set_xticklabels(keys, size=25, rotation=45) ax.grid(b=True, which="both") canvas = FigureCanvas(fig) png_output = StringIO.StringIO() canvas.print_png(png_output) plt.close(fig) ident = "histo_{0}".format(dur) fig_content = png_output.getvalue() db = update_the_figure(db, Figure, ident, fig_content, created) db.session.commit() # when at last, maybe 10min is too frequent, think about 30 min dt = os.environ.get('DELTAT') if not dt: time.sleep(600) # sleep 10 min else: time.sleep(float(dt))