def load_one_dir(path: Path): x = EventAccumulator(path=str(path)) x.Reload() x.FirstEventTimestamp() keys = { 'train/running_return': "Return", 'debug/expected_q_values': "Q-values" } df = None for k, v in keys.items(): try: time_steps = x.Scalars(k) except KeyError: logging.warning("Did not find the key in {}".format(path)) continue wall_time, steps, values = list(zip(*time_steps)) df_new = pd.DataFrame(data={"Epoch": steps, v: values}) if df is None: df = df_new else: df = df.merge(df_new, on="Epoch") experiment_name, data, method, seed = str(path).split("/")[-4:] n = len(df) df["Method"] = [method] * n df["Experiment"] = [experiment_name] * n df["Seed"] = [seed] * n df["Dataset"] = [data] * n return df
def plot_q4(logdir, head='./cs285/data/', metric='Eval_AverageReturn'): titles = [ 'Effect of Ensemble Size', 'Effect of Number of Candidate Action Sequences', 'Effect of Planning Horizon' ] filenames = ['ensemble', 'numseq', 'horizon'] for i in range(3): data = [] for j in range(len(logdir[i])): event_acc = EventAccumulator(head + logdir[i][j]) event_acc.Reload() _, _, eval_vals = zip(*event_acc.Scalars('Eval_AverageReturn')) data.append(eval_vals) for j, dat in enumerate(data): x = np.arange(1, len(dat) + 1) #plt.scatter(x, dat) text = re.findall(r'[A-Za-z]+|\d+', logdir[i][j].split('_')[3]) label = text[0] + '=' + text[1] plt.plot(x, dat, label=label) plt.xlabel('Number of Iterations') plt.ylabel(metric) plt.title(titles[i]) plt.legend() plt.savefig('./cs285/data/figures/' + filenames[i] + '.png') plt.show() plt.clf()
def start(self): """ Returns: None """ accumulator = None log = [] step = -1 last_saved_epoch = -1 while step != self.last_epoch: if len(log) != 0 and accumulator is None: accumulator = EventAccumulator(log[0], self.tf_size_guidance) if accumulator is not None: accumulator.Reload() mAP = accumulator.Scalars("DetectionBoxes_Precision/mAP") for entry in mAP: self.precision[int(entry.step)] = float(entry.value) if last_saved_epoch != int(mAP[-1].step): last_saved_epoch = int(mAP[-1].step) with open(os.path.join(self.directory, "metrics.json"), 'w') as file: json.dump({"precision": self.precision}, file) else: log = glob.glob(self.log_path) time.sleep(10)
def results(env_name): d = './runs/' + env_name + "/" folder_runs = [ os.path.join(d, o) for o in os.listdir(d) if os.path.isdir(os.path.join(d, o)) ] x_axis = [] y_axis = [] least = [] for file in folder_runs: event_acc = EventAccumulator(file) event_acc.Reload() # Show all tags in the log file w_times, step_nums, vals = zip(*event_acc.Scalars('data/avg_reward')) new_vals = list(vals) y_axis.append(new_vals) if (len(least) > len(step_nums)): least = step_nums new_y_axis = list(map(list, zip(*y_axis))) mean_plot = np.mean(new_y_axis, axis=1) std_plot = np.std(new_y_axis, axis=1) x_axis = range(0, len(mean_plot)) plt.errorbar(x_axis, mean_plot, yerr=2 * std_plot, fmt='b', ecolor=['orange']) plt.ylabel('average reward') plt.xlabel('episodes') plt.show()
def main(_): sns.color_palette() fig = plt.figure(figsize=(8, 4)) ax = fig.gca() print(FLAGS.logdirs) for logdir in FLAGS.logdirs: print(logdir) samples = [] rewards = [] for seed in range(FLAGS.seeds): logdir_ = Path(logdir) / f'seed{seed}' logdir_ = logdir_ / 'val' event_acc = EventAccumulator(str(logdir_)) event_acc.Reload() _, step_nums, vals = zip(*event_acc.Scalars('val-mean_reward')) samples.append(step_nums) rewards.append(vals) samples = np.array(samples) assert (np.all(samples == samples[:1, :])) rewards = np.array(rewards) mean_rewards = np.mean(rewards, 0) std_rewards = np.std(rewards, 0) ax.plot(samples[0, :], mean_rewards, label=logdir) ax.fill_between(samples[0, :], mean_rewards - std_rewards, mean_rewards + std_rewards, alpha=0.2) ax.legend(loc=4) ax.set_ylim([0, 210]) ax.grid('major') fig.savefig(FLAGS.output_file_name, bbox_inches='tight')
def load_data(args): run = {"SGD": 0, "Adam": 0, "K-FAC (one-level)": 0, "K-FAC (two-level)": 0} ind = {"SGD": 0, "Adam": 1, "K-FAC (one-level)": 2, "K-FAC (two-level)": 3} df_list = [[] for _ in range(len(run))] for root, _, files in os.walk(args.logdir): for file in files: event_acc = EventAccumulator(root) event_acc.Reload() optimizer = get_optimizer(root) print(optimizer) print(root) data = {} data['optimizer'] = optimizer data['run'] = run[optimizer] run[optimizer] += 1 for (i, scalar) in enumerate(event_acc.Tags()['scalars']): if scalar == 'train/lr': continue times, epochs, values = zip(*event_acc.Scalars(scalar)) if (i == 1): data['times'] = np.asarray(times)[:args.stop] data['epochs'] = np.asarray(epochs)[:args.stop] data[scalar] = np.asarray(values)[:args.stop] if run[optimizer] == 1: df = pd.DataFrame(data) df_list[ind[optimizer]] = df else: df = pd.DataFrame(data) df_list[ind[optimizer]] = pd.concat( [df_list[ind[optimizer]], df]) df = pd.concat(df_list) return df
class TensorUtils(object): def __init__(self, engine_id): self.eacc = None engine = Engine.query.filter_by(id=engine_id).first() if engine.model_path: tensor_path = os.path.join(engine.model_path, "tensorboard") else: tensor_path = os.path.join(engine.path, "model/tensorboard") files = glob.glob(os.path.join(tensor_path, "*")) if len(files) > 0: log = files[0] self.eacc = EventAccumulator(log) self.eacc.Reload() def is_loaded(self): return self.eacc is not None def get_tag(self, tag): if self.eacc: tags = self.eacc.Tags() if tag in tags.get('scalars'): return self.eacc.Scalars(tag) return []
def save_logs_as_png(job_dir): if not os.path.isfile(os.path.join(job_dir, 'job.json')): return with open(os.path.join(job_dir, 'job.json')) as f: job = json.load(f) files = glob(os.path.join(job_dir, 'logdir/*')) path = files[0] event_acc = EventAccumulator(path) event_acc.Reload() training_accuracies = event_acc.Scalars('acc') validation_accuracies = event_acc.Scalars('val_acc') training_loss = event_acc.Scalars('loss') validation_loss = event_acc.Scalars('val_loss') steps = len(training_accuracies) x = np.arange(steps) y = np.zeros([steps, 4]) for i in range(steps): y[i, 0] = training_accuracies[i][2] y[i, 1] = validation_accuracies[i][2] y[i, 2] = training_loss[i][2] y[i, 3] = validation_loss[i][2] plt.plot(x, y[:, 0], label='acc') plt.plot(x, y[:, 1], label='val_acc') plt.plot(x, y[:, 2], label='loss') plt.plot(x, y[:, 3], label='val_loss') plt.ylim((0, 1)) plt.xlabel("Steps") plt.ylabel("Values") plt.title(job['name']) plt.legend(loc='lower left', frameon=True) plt.grid() min_val_loss = np.min(y[:, 3]) out_path = os.path.join(job_dir, 'plots_min_val_loss_%.4f.png' % min_val_loss) print('Saving ' + out_path) plt.savefig(out_path) plt.close()
def plot_q2(logdir, head='./cs285/data/'): event_acc = EventAccumulator(head + logdir[0]) event_acc.Reload() _, step_nums, eval_vals = zip(*event_acc.Scalars('Eval_AverageReturn')) _, step_nums, train_vals = zip(*event_acc.Scalars('Train_AverageReturn')) x = np.arange(1, len(step_nums) + 1) eval_returns = np.array(eval_vals) train_returns = np.array(train_vals) plt.scatter(x, eval_returns, label='Eval_AverageReturn') plt.scatter(x, train_returns, label='Train_AverageReturn') plt.xlabel('Number of Iterations') plt.ylabel('Reward') plt.title('Obstacles Single Iteration') plt.legend() plt.savefig('./cs285/data/figures/q2.png') plt.show() plt.clf()
def plot_tensorflow_log(): # Loading too much data is slow... tf_size_guidance = { 'compressedHistograms': 10, 'images': 0, 'scalars': 100, 'histograms': 1 } has_range = "xrange" in config if has_range: cfg_range = config["xrange"] if len(cfg_range) > 1: data_range = range(cfg_range[0], cfg_range[1]) else: data_range = range(cfg_range[0]) for cfg in config["scalar_data"]: event_acc = EventAccumulator(cfg["path"], tf_size_guidance) event_acc.Reload() # Show all tags in the log file if debug_events: print(event_acc.Tags()) events = event_acc.Scalars(cfg["tag"]) steps = len(events) if not has_range: x = np.arange(steps) else: x = data_range y = np.zeros(len(x)) for i in range(len(x)): y[i] = events[i][2] plt.plot(x[:], y[:], label=cfg["plot_label"]) for cfg in config["manual_data"]: events = cfg["data"] steps = len(events) if not has_range: x = np.arange(steps) else: x = data_range y = np.zeros(len(x)) for i in range(len(x)): y[i] = events[i] plt.plot(x[:], y[:], label=cfg["plot_label"]) plt.xlabel(config["xlabel"]) plt.ylabel(config["ylabel"]) plt.ylim(bottom=98.9, top=99.4) plt.legend(loc=config["legend_loc"], frameon=True) plt.show()
def plot_from_summaries(summaries_path, title=None, samples_per_update=512, updates_per_log=100): acc = EventAccumulator(summaries_path) acc.Reload() rews_mean = np.array([s[2] for s in acc.Scalars('Rewards/Mean')]) rews_std = np.array([s[2] for s in acc.Scalars('Rewards/Std')]) x = samples_per_update * updates_per_log * np.arange(0, len(rews_mean)) if not title: title = summaries_path.split('/')[-1].split('_')[0] plt.plot(x, rews_mean) plt.fill_between(x, rews_mean - rews_std, rews_mean + rews_std, alpha=0.2) plt.xlabel('Samples') plt.ylabel('Episode Rewards') plt.title(title) plt.xlim([0, x[-1]+1]) plt.ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
def parse_progress(path, tf_size_guidance): event_acc = EventAccumulator(path, tf_size_guidance) event_acc.Reload() tags = event_acc.Tags()['scalars'] print(tags) # Show all tags in the log file tags = event_acc.Tags()['scalars'] runtimes_scalar = event_acc.Scalars('runtime_ms') runtimes = [runtimes_scalar[i].value for i in range(len(runtimes_scalar))] loss_scalar = event_acc.Scalars('loss') loss = [loss_scalar[i].value for i in range(len(loss_scalar))] assert len(runtimes) == len(loss) return runtimes, loss
def extract_scalar_from_event(path): event_acc = EventAccumulator(path, size_guidance={'scalars': 0}) event_acc.Reload() # ログファイルのサイズによっては非常に時間がかかる target_key = ['step', 'value', 'wall_time'] scalars = {} for tag in event_acc.Tags()['scalars']: events = event_acc.Scalars(tag) scalars[tag] = [{k: event.__getattribute__(k) for k in target_key} for event in events] return scalars
def load_nll(scalar_dir=None): file_dir = os.path.join(scalar_dir, "events.out.tfevents.*.gpu1") file_dir = glob.glob(file_dir)[-1] event_acc = EventAccumulator(file_dir) event_acc.Reload() # E. g. get wall clock, number of steps and value for a scalar 'Accuracy' w_times, step_nums, vals = zip(*event_acc.Scalars('nll_value/step')) vals = np.array(vals) return vals.min()
def read_tf_event_file(tfevent_file): # Read in tensorflow summary file event_acc = EventAccumulator(path=tfevent_file) event_acc.Reload() run_config = {} for scalar_summary_key in event_acc.Tags()['scalars']: _, step_nums, values = zip(*event_acc.Scalars(scalar_summary_key)) run_config[scalar_summary_key] = values return run_config
def get_latency(tb_path): tf_size_guidance = { 'compressedHistograms': 10, 'images': 0, 'scalars': 100, 'histograms': 1 } event_acc = EventAccumulator(tb_path, tf_size_guidance) event_acc.Reload() return event_acc.Scalars('latency')[-1].value
def tflog2pandas(path: str) -> pd.DataFrame: """convert single tensorflow log file to pandas DataFrame Parameters ---------- path : str path to tensorflow log file Returns ------- pd.DataFrame converted dataframe """ DEFAULT_SIZE_GUIDANCE = { "compressedHistograms": 1, "images": 1, "scalars": 0, # 0 means load all "histograms": 1, } try: event_acc = EventAccumulator(path, DEFAULT_SIZE_GUIDANCE) event_acc.Reload() tags = event_acc.Tags()["scalars"] tag_train = [t for t in tags if 'test' not in t] tag_test = [t for t in tags if 'test' in t] d_train = {t: [] for t in tag_train} d_test = {t: [] for t in tag_test} for tag in tag_train: event_list = event_acc.Scalars(tag) values = list(map(lambda x: x.value, event_list)) d_train[tag] = values for tag in tag_test: event_list = event_acc.Scalars(tag) values = list(map(lambda x: x.value, event_list)) d_test[tag] = values d_train = pd.DataFrame(d_train) d_test = pd.DataFrame(d_test) # Dirty catch of DataLossError except Exception: print("Event file possibly corrupt: {}".format(path)) traceback.print_exc() return {'train': d_train, 'test': d_test}
def show_tensorflow_log(path, title_text='', output_image_name='validation_results.png'): # Loading too much data is slow... tf_size_guidance = { 'scalars': 100 } print('Start data accumulation') event_acc = EventAccumulator(path, tf_size_guidance) event_acc.Reload() # Show all tags in the log file print('All tags:') print(event_acc.Tags()) val_f1 = event_acc.Scalars('val_f1') val_precision = event_acc.Scalars('val_precision') val_mAP = event_acc.Scalars('val_mAP') val_recall = event_acc.Scalars('val_recall') print(val_f1) steps = 100 x = np.arange(steps) y = np.zeros([steps, 4]) for i in tqdm(range(steps)): y[i, 0] = val_f1[i][2] y[i, 1] = val_precision[i][2] y[i, 2] = val_mAP[i][2] y[i, 3] = val_recall[i][2] plt.plot(x, y[:, 0], label='val_f1') plt.plot(x, y[:, 1], label='val_precision') plt.plot(x, y[:, 2], label='val_mAP') plt.plot(x, y[:, 3], label='val_recall') plt.xlabel("Epochs") plt.ylabel("Value") plt.title(title_text) plt.legend(loc='upper right', frameon=True) plt.savefig(output_image_name)
def tabulate_events(event): data = {} event_acc = EventAccumulator(event) event_acc.Reload() for tag in sorted(event_acc.Tags()["scalars"]): x = [] for scalar_event in event_acc.Scalars(tag): x.append(scalar_event.value) data[tag] = (np.array(x)) return data
def get_log(path, key): event_acc = EventAccumulator(path) event_acc.Reload() w_times, step_nums, vals = zip(*event_acc.Scalars(key)) return { 'w_times': w_times, 'step_nums': step_nums, 'vals': vals }
def load_tf_log(file_path): event_acc = EventAccumulator(file_path) event_acc.Reload() # Show all tags in the log file # print(event_acc.Tags()) # number of steps and value for the scalars corr and loss _, corr_steps, corr_vals = zip(*event_acc.Scalars('corr')) _, loss_steps, loss_vals = zip(*event_acc.Scalars('loss')) return corr_vals, loss_vals
def parse_runtime(path, tf_size_guidance): event_acc = EventAccumulator(path, tf_size_guidance) event_acc.Reload() # Show all tags in the log file tags = event_acc.Tags()['scalars'] al = event_acc.Scalars('alpha_1_2') runtime_row = [] for i in range(len(al)): runtime_row.append(al[i].value) print(runtime_row)
def plot_tensorboard(train_file, valid_file, scalar_names, set_grid=False): # Read Tensorboard files train_event_acc = EventAccumulator(train_file) valid_event_acc = EventAccumulator(valid_file) train_event_acc.Reload() valid_event_acc.Reload() # Get scalar values train_scalars, valid_scalars = {}, {} for scalar_name in scalar_names: train_scalars[scalar_name] = train_event_acc.Scalars(scalar_name) valid_scalars[scalar_name] = valid_event_acc.Scalars(scalar_name) # Convert to list n_epochs = len(train_scalars["loss"]) epochs = [train_scalars["loss"][i][1] for i in range(n_epochs)] train_lists, valid_lists = {}, {} for scalar_name in scalar_names: train_lists[scalar_name] = [ train_scalars[scalar_name][i][2] for i in range(n_epochs) ] valid_lists[scalar_name] = [ valid_scalars[scalar_name][i][2] for i in range(n_epochs) ] # Plot for scalar_name in scalar_names: fig = plt.figure() ax = fig.add_subplot(1, 1, 1) if set_grid: ax.set_xticks(epochs) ax.plot(epochs, train_lists[scalar_name], label='train') ax.plot(epochs, valid_lists[scalar_name], label='valid') plt.xlabel("epochs") plt.ylabel(scalar_name) plt.legend(frameon=True) plt.grid(True) plt.show()
def logCSV(log_dir, seed, csv_path, tag_name_lst=None): data = EventAccumulator(log_dir) data.Reload() for tag in data.Tags().get('scalars'): print(tag) if tag_name_lst is not None and len(tag_name_lst) > 0: if tag in tag_name_lst: w_times, step_nums, vals = zip(*data.Scalars(tag)) in_data = {'w_times': w_times, 'step': step_nums, 'value': vals} in_data = pd.DataFrame(in_data) in_data.to_csv("{}/{}_seed_{}.csv".format(csv_path, str(tag), str(seed)), index=False) else: w_times, step_nums, vals = zip(*data.Scalars(tag)) in_data = {'wall_time': w_times, 'step': step_nums, 'value': vals} in_data = pd.DataFrame(in_data) in_data.to_csv("{}/{}_seed_{}.csv".format(csv_path, str(tag), str(seed)), index=False)
def tbHistoryPlot(lgd): fn = getFileList(lgd) fn = fn[-1] eacc = EventAccumulator(lgd + '/' + fn) eacc.Reload() tj = eacc.Scalars('loss') vj = eacc.Scalars('val_loss') steps = len(tj) x = np.arange(steps) y = np.zeros([steps, 2]) for i in range(steps): y[i, 0] = tj[i][2] # value y[i, 1] = vj[i][2] plt.plot(x, y[:, 0], label='training loss') plt.plot(x, y[:, 1], label='validation loss') plt.xlabel("Steps") plt.ylabel("Loss") plt.title("Re-Training Progress") plt.legend(loc='upper right', frameon=True) plt.show()
def ckpt_fetch(latency, model, zone, run, setting=None): # event_acc = EventAccumulator('gs://shijian-18-ml/30-cluster/k80-demand-'+zone+'-run'+str(run)+'/eval') event_acc = EventAccumulator('gs://shijian-18-ml/30-cluster/a-' + setting + '-run' + str(run) + '/eval') event_acc.Reload() # Show all tags in the log file # print(event_acc.Tags()) # E. g. get wall clock, number of steps and value for a scalar 'Accuracy' # w_times, step_nums, vals = zip(*event_acc.Scalars('global_step/sec')) w_times, step_nums, vals = zip( *event_acc.Scalars('metrics-image_cifar10/targets/accuracy')) return w_times, step_nums, vals
def gatherData(path, alpha): event_acc = EventAccumulator(path) event_acc.Reload() # Show all tags in the log file print(event_acc.Tags()['scalars']) if len(event_acc.Tags()['scalars']) == 0: return # E. g. get wall clock, number of steps and value for a scalar 'Accuracy' _, stp, bestMeanRwd = zip(*event_acc.Scalars('Best_mean_reward')) _, stp, mean100ep = zip(*event_acc.Scalars('Mean_reward__100_episodes_')) _, stp, loss = zip(*event_acc.Scalars('Train_loss')) bestMeanRwd = np.array(bestMeanRwd) mean100ep = np.array(mean100ep) loss = np.array(loss) stp = np.array(stp) print(path) # bestMeanRwd = numpy_ewma_vectorized_v2(bestMeanRwd, alpha) # loss = numpy_ewma_vectorized_v2(loss, alpha) # mean100ep = numpy_ewma_vectorized_v2(mean100ep, alpha) return (stp, bestMeanRwd, mean100ep, loss)
def get_summary_as_array(model_dir, run="train", scalar="loss"): name = "/{}_{}.npy".format(run, scalar) if os.path.exists(model_dir + name): return np.load(model_dir + name) event_file_path = glob.glob(model_dir + "/{}/event*".format(run))[0] event_acc = EventAccumulator(event_file_path) event_acc.Reload() ret = [[s.step, s.value] for s in event_acc.Scalars(scalar)] # np.save(model_dir+name, ret) return ret
def check_step1_complete(job_list): log_path = '/scratch/li.baol/tsrbrd_log/job_runs/' + testcase + '/' global step1_job global K80_epoch_time for job in job_list: if job not in step1_job and job != 'idle': log_dir = log_path + 'job' + job + '/*' dirs = glob.glob(log_dir) dirs.sort() if len(dirs) > 0: tc = dirs[0] iterator = EventAccumulator(tc).Reload() tag = 'loss' try: if len(iterator.Scalars(tag)) > 2: # this way we can collect one epoch time wall_time = [t.wall_time for t in iterator.Scalars(tag)] K80_epoch_time[job] = wall_time[1] - wall_time[0] step1_job.append(job) print('job' + job + ' has reached step1 complete') except Exception: pass
def save_tag(event_acc: EventAccumulator, tag, outdir, ext='.csv'): raw_data = np.array(event_acc.Scalars(tag)) filename = outdir + tag.replace('/', '_') + ext if ext == '.npy': np.save(filename, raw_data) else: np.savetxt(filename, raw_data, header='walltime step value') logger.info( 'Tag %s saved in %s (%s)' % (tag, filename, humanize.naturalsize(os.path.getsize(filename)))) return raw_data