def check_step1_complete(job_list): log_path = '/scratch/li.baol/tsrbrd_log/job_runs/' + testcase + '/' global step1_job global V100_epoch_time for job in job_list: if job not in step1_job and job != 'idle': log_dir = log_path + 'job' + job + '/*' dirs = glob.glob(log_dir) dirs.sort() if len(dirs) > 0: tc = dirs[0] iterator = EventAccumulator(tc).Reload() tag = 'loss' try: if len(iterator.Scalars(tag) ) > 2: # this way we can collect one epoch time wall_time = [ t.wall_time for t in iterator.Scalars(tag) ] V100_epoch_time[job] = wall_time[1] - wall_time[0] step1_job.append(job) print('job' + job + ' has reached step1 complete') except Exception: pass
def tflog2pandas(path: str) -> pd.DataFrame: """convert single tensorflow log file to pandas DataFrame Parameters ---------- path : str path to tensorflow log file Returns ------- pd.DataFrame converted dataframe """ DEFAULT_SIZE_GUIDANCE = { "compressedHistograms": 1, "images": 1, "scalars": 0, # 0 means load all "histograms": 1, } runlog_data = pd.DataFrame({"metric": [], "value": [], "step": []}) try: event_acc = EventAccumulator(path, DEFAULT_SIZE_GUIDANCE) event_acc.Reload() tags = event_acc.Tags()["scalars"] for tag in tags: event_list = event_acc.Scalars(tag) values = list(map(lambda x: x.value, event_list)) step = list(map(lambda x: x.step, event_list)) r = {"metric": [tag] * len(step), "value": values, "step": step} r = pd.DataFrame(r) runlog_data = pd.concat([runlog_data, r]) # Dirty catch of DataLossError except: print("Event file possibly corrupt: {}".format(path)) traceback.print_exc() return runlog_data
def tabulate_events(inpath, outpath, tags): summary_iterators = [ EventAccumulator(os.path.join(inpath, dname)).Reload() for dname in os.listdir(inpath) ] # tags = summary_iterators[0].Tags()['scalars'] # # for it in summary_iterators: # print(it.Tags()) # assert it.Tags()['scalars'] == tags for tag in tags: for it in summary_iterators: try: run_name = ntpath.basename(it.path) csv_path = os.path.join( outpath, run_name + '_' + tag.replace('/', '_') + '.csv') pd.DataFrame(it.Scalars(tag)).to_csv(csv_path) print('Success: {} - {}'.format(tag, it.path)) except KeyError: print('Error: {} - {}'.format(tag, it.path)) pass
def test_eval_callback_logs_are_written_with_the_correct_timestep(tmp_path): # Skip if no tensorboard installed pytest.importorskip("tensorboard") from tensorboard.backend.event_processing.event_accumulator import EventAccumulator env_name = select_env(DQN) model = DQN( "MlpPolicy", env_name, policy_kwargs=dict(net_arch=[32]), tensorboard_log=tmp_path, verbose=1, seed=1, ) eval_env = gym.make(env_name) eval_freq = 101 eval_callback = EvalCallback(eval_env, eval_freq=eval_freq, warn=False) model.learn(500, callback=eval_callback) acc = EventAccumulator(str(tmp_path / "DQN_1")) acc.Reload() for event in acc.scalars.Items("eval/mean_reward"): assert event.step % eval_freq == 0
def test_tensorboard_hparams_reload(tmpdir): class CustomModel(BoringModel): def __init__(self, b1=0.5, b2=0.999): super().__init__() self.save_hyperparameters() trainer = Trainer(max_steps=1, default_root_dir=tmpdir) model = CustomModel() assert trainer.log_dir == trainer.logger.log_dir trainer.fit(model) assert trainer.log_dir == trainer.logger.log_dir folder_path = trainer.log_dir # make sure yaml is there with open(os.path.join(folder_path, "hparams.yaml")) as file: # The FullLoader parameter handles the conversion from YAML # scalar values to Python the dictionary format yaml_params = yaml.safe_load(file) assert yaml_params["b1"] == 0.5 assert yaml_params["b2"] == 0.999 assert len(yaml_params.keys()) == 2 # verify artifacts assert len(os.listdir(os.path.join(folder_path, "checkpoints"))) == 1 # verify tb logs event_acc = EventAccumulator(folder_path) event_acc.Reload() hparams_data = b'\x12\x1f"\x06\n\x02b1 \x03"\x06\n\x02b2 \x03*\r\n\x0b\x12\thp_metric' assert event_acc.summary_metadata['_hparams_/experiment'].plugin_data.plugin_name == 'hparams' assert event_acc.summary_metadata['_hparams_/experiment'].plugin_data.content == hparams_data
def merge_runs(dir_path, result_name, new_dir_name='tf_merged', tensorboard=False): diff_run_types = list( set([ get_rid_of_num(list(name)) for name in os.listdir(dir_path) if name != 'merge_runs.py' ])) summary_iterators = [] for name in diff_run_types: summary_iterators.append([ EventAccumulator(os.path.join(dir_path, dname)).Reload() for dname in os.listdir(dir_path) if name in dname ]) tags = [iterator[0].Tags()['scalars'] for iterator in summary_iterators] for idx, sum_it in enumerate(summary_iterators): for it in sum_it: assert it.Tags()['scalars'] == tags[idx] to_merge = ['episode_reward'] for tag in to_merge: fig, ax = plt.subplots(1) ax.set_title(tag) ax.set_xlabel('steps') ax.set_ylabel('episode reward') ax.grid() colors = ['red', 'green', 'blue', 'yellow'] fig.tight_layout() for idx, sum_it in enumerate(summary_iterators): summaries_events = [summary.Scalars(tag) for summary in sum_it] end_point = min([events[-1].step for events in summaries_events]) start_point = max([events[0].step for events in summaries_events]) steps = [step for step in range(start_point, end_point + 1)] interpolated_data = [] for events in summaries_events: event_steps = [event.step for event in events] event_data = [event.value for event in events] interpolated_data.append(interp1d(event_steps, event_data)) matrix_form = [] for step in steps: matrix_form.append( [data(step).item(0) for data in interpolated_data]) matrix_form = np.asarray(matrix_form) max_values = np.amax(matrix_form, axis=1) min_values = np.amin(matrix_form, axis=1) mean = matrix_form.mean(axis=1) sigma = matrix_form.std(axis=1) #fig, ax = plt.subplots(1) ax.plot(steps, mean, lw=1, label=diff_run_types[idx], color=colors[idx % len(colors)]) ax.fill_between(steps, mean + sigma, mean - sigma, facecolor=colors[idx % len(colors)], alpha=0.5) if tensorboard: merged_data_ = tf.placeholder(tf.float32) summary_op = tf.summary.histogram(tag + '_merged', merged_data_) with tf.Session() as sess: writer = tf.summary.FileWriter('./log/' + new_dir_name) for step in steps: merged_summary = sess.run( summary_op, feed_dict={ merged_data_: [ data(step).item(0) for data in interpolated_data ] }) writer.add_summary(merged_summary, step) lgd = ax.legend(loc='upper left') plt.savefig(result_name, bbox_extra_artists=(lgd, ), bbox_inches='tight') plt.close()
'wspace': 0 }, figsize=(12, 5)) fig.suptitle("accuracy curve during training") for key, value in testcases.items(): log_dir = base_dir + value + '_*/' dirs = glob.glob(log_dir) dirs.sort() time_all = [] # time for all 10 reps accuracy_all = [] for tc in dirs: model = tc.split('/')[5 + 1] iterator = EventAccumulator(tc).Reload() tag = iterator.Tags()['scalars'][3] # this is tag for accuracy accuracy = [item.value for item in iterator.Scalars(tag)] wall_time = [t.wall_time for t in iterator.Scalars(tag)] relative_time = [(time - wall_time[0]) / 3600 for time in wall_time] time_all.append(relative_time) accuracy_all.append(accuracy) time_avg = [0] * len(time_all[0]) accuracy_avg = [0] * len(time_all[0]) for j in range(len(time_all)): # 10 time_avg = np.add(time_all[j], time_avg) accuracy_avg = np.add(accuracy_all[j], accuracy_avg)
# Evaluate logs fix, ax = plt.subplots(figsize=(15, 10)) for i, config in enumerate(log_configs): print('Parsing %d/%d (%s)' % (i + 1, len(log_configs), config['label'])) folder = os.path.join(rootdir, config['folder']) eventfiles = glob.glob(path.join(folder, 'events.out.tfevents.*')) if len(eventfiles) == 0: logging.warning('No event file found in %s. Skipping config.' % config['folder']) continue elif len(eventfiles) > 1: logging.warning('Multiple event files found in %s.' % config['folder']) event_acc = EventAccumulator(folder) event_acc.Reload() time_out = pd.DataFrame( event_acc.Scalars('inception_score/Wall_clock_time')) inception_out = pd.DataFrame(event_acc.Scalars('inception_score/mean')) df = pd.merge(time_out[['step', 'value']], inception_out[['step', 'value']], on='step') df.columns = ['step', 'time', 'inception score'] df['inception score'] = df['inception score'].rolling(window=mean_window, center=False).mean() ax.plot(df[xaxis], df['inception score'], label=config['label'])
def extract(dpath, dname): import tensorflow.compat.v1 as tf from tensorflow.core.util.event_pb2 import Event scalar_accumulators = [ EventAccumulator(os.path.join(dpath, dname)).Reload().scalars ] # Filter non event files scalar_accumulators = [ scalar_accumulator for scalar_accumulator in scalar_accumulators if scalar_accumulator.Keys() ] # Get and validate all scalar keys all_keys = [ tuple(scalar_accumulator.Keys()) for scalar_accumulator in scalar_accumulators ] assert ( len(set(all_keys)) == 1 ), "All runs need to have the same scalar keys. There are mismatches in {}".format( all_keys) keys = all_keys[0] all_scalar_events_per_key = [[ scalar_accumulator.Items(key) for scalar_accumulator in scalar_accumulators ] for key in keys] # Get and validate all steps per key all_steps_per_key = [[ tuple(scalar_event.step for scalar_event in scalar_events) for scalar_events in all_scalar_events ] for all_scalar_events in all_scalar_events_per_key] for i, all_steps in enumerate(all_steps_per_key): assert ( len(set(all_steps)) == 1 ), "For scalar {} the step numbering or count doesn't match. Step count for all runs: {}".format( keys[i], [len(steps) for steps in all_steps]) steps_per_key = [all_steps[0] for all_steps in all_steps_per_key] # Get and average wall times per step per key wall_times_per_key = [ np.mean( [ tuple(scalar_event.wall_time for scalar_event in scalar_events) for scalar_events in all_scalar_events ], axis=0, ) for all_scalar_events in all_scalar_events_per_key ] # Get values per step per key values_per_key = [[[scalar_event.value for scalar_event in scalar_events] for scalar_events in all_scalar_events] for all_scalar_events in all_scalar_events_per_key] all_per_key = dict( zip(keys, zip(steps_per_key, wall_times_per_key, values_per_key))) return all_per_key
def plot_func(ax, method_dirs, color, label, marker, smooth_index=10, alpha=0.4, linewidth=1.0, scatter_space=500): ''' input: method_dirs : ['algo1/seed1','~algo1/seed4'] ''' ##### extrat data from all seeds ##### y_seeds = [] y_length = [] seed_count = len(method_dirs) for dir in method_dirs: event = EventAccumulator(dir) event.Reload() if (label == 'SAC'): y = event.scalars.Items('episode_threshold') # threshold_value else: y = event.scalars.Items('episode_reward') y_len = len(y) # threshold_len y_length.append(y_len) ###### smoothing ######### smooth_array = np.zeros(y_len) for i in range(y_len): smooth_array[i] = np.array( [j.value for j in y[i:i + smooth_index]]).mean() y_seeds.append(smooth_array) ###### reshape y_data into [num_seeds, min_y_length] ##### min_y_length = min(y_length) y_seeds_new = np.zeros((seed_count, min_y_length)) for i in range(seed_count): y_seeds_new[i] = y_seeds[i][0:min_y_length] y_seeds = y_seeds_new # [4, min_y_len] print(y_seeds.shape) ###### plot y_seeds with color_mean, scatter_marker, std_shadow ############# y_mean = y_seeds.mean(axis=0) x_mean = [i for i in range(min_y_length)] ax.plot(x_mean, y_mean, color=color, linewidth=linewidth) x_scatter = np.linspace(0, min_y_length - 1, int(min_y_length / scatter_space), dtype=np.int) y_scatter = y_mean[x_scatter] ax.scatter(x_scatter, y_scatter, color=color, label=label, marker=marker, s=100) y_std = y_seeds.std(axis=0) upper_bound = y_mean + y_std lower_bound = y_mean - y_std ax.fill_between(x_mean, upper_bound, lower_bound, where=upper_bound > lower_bound, facecolor=color, interpolate=True, alpha=alpha)
def extract(experiments): scalar_accumulators = [ EventAccumulator(experiment_dir).Reload().scalars for experiment_dir in experiments ] # Filter non event files scalar_accumulators = [ scalar_accumulator for scalar_accumulator in scalar_accumulators if scalar_accumulator.Keys() ] # Get and validate all scalar keys all_keys = [ tuple(sorted(scalar_accumulator.Keys())) for scalar_accumulator in scalar_accumulators ] assert len(set(all_keys)) == 1, \ "All runs need to have the same scalar keys. There are mismatches in {}".format(all_keys) keys = all_keys[0] all_scalar_events_per_key = [[ scalar_accumulator.Items(key) for scalar_accumulator in scalar_accumulators ] for key in keys] # Get and validate all steps per key x_per_key = [[ tuple(scalar_event.step for scalar_event in sorted(scalar_events)) for scalar_events in sorted(all_scalar_events) ] for all_scalar_events in all_scalar_events_per_key] plot_step = PLOT_STEP all_steps_per_key = [[ tuple(int(step_id) for step_id in range(0, TOTAL_STEP, plot_step)) for _ in sorted(all_scalar_events) ] for all_scalar_events in all_scalar_events_per_key] for i, all_steps in enumerate(all_steps_per_key): assert len( set(all_steps) ) == 1, "For scalar {} the step numbering or count doesn't match. Step count for all runs: {}".format( keys[i], [len(steps) for steps in all_steps]) steps_per_key = [all_steps[0] for all_steps in all_steps_per_key] # Get values per step per key values_per_key = [[[scalar_event.value for scalar_event in scalar_events] for scalar_events in all_scalar_events] for all_scalar_events in all_scalar_events_per_key] interpolated_keys = dict() for tmp_id in range(len(PLOTS)): key_idx = keys.index(PLOTS[tmp_id]['key']) values = values_per_key[key_idx] x = steps_per_key[key_idx] x_steps = x_per_key[key_idx] interpolated_y = [[] for _ in values] for i in range(len(values)): idx = 0 tmp_min_step = min(len(x_steps[i]), len(values[i])) values[i] = values[i][2:tmp_min_step] x_steps[i] = x_steps[i][2:tmp_min_step] assert len(x_steps[i]) == len(values[i]) for x_idx in x: while idx < len(x_steps[i]) - 1 and x_steps[i][idx] < x_idx: idx += 1 if x_idx == 0: interpolated_value = values[i][idx] elif idx < len(values[i]) - 1: interpolated_value = (values[i][idx] + values[i][idx + 1]) / 2 else: interpolated_value = values[i][idx] interpolated_y[i].append(interpolated_value) assert len(interpolated_y[i]) == len(x) print(interpolated_y[0][:30]) interpolated_keys[PLOTS[tmp_id]['key']] = (x, interpolated_y) return interpolated_keys
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator import matplotlib.pyplot as plt import numpy as np import sys # Many thanks to https://gist.github.com/tomrunia/1e1d383fb21841e8f144 event_acc = EventAccumulator(sys.argv[1], { "compressedHistograms": 10, "images": 0, "scalars": 100, "histograms": 1 }) event_acc.Reload() epochs = 50 print(event_acc.Tags()) training = event_acc.Scalars("epoch_acc") val = event_acc.Scalars("epoch_val_acc") x = np.arange(epochs) y = np.array([[value[2] for value in values] for values in zip(training, val)]) print(np.shape(y)) plt.plot(x, y[:,0], label="Training accuracy") plt.plot(x, y[:,1], label="Validation accuracy") print("Final training accuracy: " + str(training[epochs - 1][2])) print("Final validation accuracy: " + str(val[epochs - 1][2]))
def extract(experiments): # scalar_accumulators = [EventAccumulator(str(dpath / dname / subpath)).Reload().scalars # for dname in os.listdir(dpath) if dname != FOLDER_NAME and dname in hide_file] scalar_accumulators = [ EventAccumulator(experiment_dir).Reload().scalars for experiment_dir in experiments ] # Filter non event files scalar_accumulators = [ scalar_accumulator for scalar_accumulator in scalar_accumulators if scalar_accumulator.Keys() ] # Get and validate all scalar keys # zhehui sorted(scalar_accumulator.Keys()) all_keys = [ tuple(sorted(scalar_accumulator.Keys())) for scalar_accumulator in scalar_accumulators ] assert len( set(all_keys) ) == 1, "All runs need to have the same scalar keys. There are mismatches in {}".format( all_keys) keys = all_keys[0] all_scalar_events_per_key = [[ scalar_accumulator.Items(key) for scalar_accumulator in scalar_accumulators ] for key in keys] # Get and validate all steps per key # sorted(all_scalar_events) sorted(scalar_events) x_per_key = [[ tuple(scalar_event.step for scalar_event in sorted(scalar_events)) for scalar_events in sorted(all_scalar_events) ] for all_scalar_events in all_scalar_events_per_key] # zhehui # import linear interpolation # all_steps_per_key = tuple(step_id*1e6 for step_id in range(1e8/1e6)) # modify_all_steps_per_key = tuple(int(step_id*1e6) for step_id in range(1, int(1e8/1e6 + 1))) plot_step = int(2.5e6) all_steps_per_key = [[ tuple(int(step_id) for step_id in range(0, int(5e8), plot_step)) for scalar_events in sorted(all_scalar_events) ] for all_scalar_events in all_scalar_events_per_key] for i, all_steps in enumerate(all_steps_per_key): assert len( set(all_steps) ) == 1, "For scalar {} the step numbering or count doesn't match. Step count for all runs: {}".format( keys[i], [len(steps) for steps in all_steps]) steps_per_key = [all_steps[0] for all_steps in all_steps_per_key] # Get and average wall times per step per key # wall_times_per_key = [np.mean([tuple(scalar_event.wall_time for scalar_event in scalar_events) for scalar_events in all_scalar_events], axis=0) # for all_scalar_events in all_scalar_events_per_key] # Get values per step per key values_per_key = [[[scalar_event.value for scalar_event in scalar_events] for scalar_events in all_scalar_events] for all_scalar_events in all_scalar_events_per_key] true_reward_key = '0_aux/avg_true_reward' key_idx = keys.index(true_reward_key) values = values_per_key[key_idx] x = steps_per_key[key_idx] x_steps = x_per_key[key_idx] interpolated_y = [[] for _ in values] for i in range(len(values)): idx = 0 values[i] = values[i][2:] x_steps[i] = x_steps[i][2:] assert len(x_steps[i]) == len(values[i]) for x_idx in x: while x_steps[i][idx] < x_idx and idx < len(x_steps[i]): idx += 1 if x_idx == 0: interpolated_value = values[i][idx] elif idx < len(values[i]) - 1: interpolated_value = (values[i][idx] + values[i][idx + 1]) / 2 else: interpolated_value = values[i][idx] interpolated_y[i].append(interpolated_value) assert len(interpolated_y[i]) == len(x) print(interpolated_y[0][:30]) interpolated_keys = dict() interpolated_keys[true_reward_key] = (x, interpolated_y) return interpolated_keys
def extract(env, experiments): # scalar_accumulators = [EventAccumulator(str(dpath / dname / subpath)).Reload().scalars # for dname in os.listdir(dpath) if dname != FOLDER_NAME and dname in hide_file] scalar_accumulators = [ EventAccumulator(experiment_dir).Reload().scalars for experiment_dir in experiments ] # Filter non event files scalar_accumulators = [ scalar_accumulator for scalar_accumulator in scalar_accumulators if scalar_accumulator.Keys() ] # Get and validate all scalar keys # zhehui sorted(scalar_accumulator.Keys()) all_keys = [ tuple(sorted(scalar_accumulator.Keys())) for scalar_accumulator in scalar_accumulators ] # assert len(set(all_keys)) == 1, "All runs need to have the same scalar keys. There are mismatches in {}".format(all_keys) keys = all_keys[0] def all_accumulators_have_this_key(key): for scalar_accumulator in scalar_accumulators: if key not in scalar_accumulator.Keys(): log.debug('Not all of the accumulators have key %s', key) return False return True keys = [key for key in keys if all_accumulators_have_this_key(key)] all_scalar_events_per_key = [[ scalar_accumulator.Items(key) for scalar_accumulator in scalar_accumulators ] for key in keys] # zhehui # import linear interpolation # all_steps_per_key = tuple(step_id*1e6 for step_id in range(1e8/1e6)) # modify_all_steps_per_key = tuple(int(step_id*1e6) for step_id in range(1, int(1e8/1e6 + 1))) plot_step = int(5e7) max_x = EXPERIMENTS[env]['max_x'] all_steps_per_key = [[ tuple(int(step_id) for step_id in range(0, max_x, plot_step)) for scalar_events in sorted(all_scalar_events) ] for all_scalar_events in all_scalar_events_per_key] for i, all_steps in enumerate(all_steps_per_key): assert len(set(all_steps)) == 1, "For scalar {} the step numbering or count doesn't match. Step count for all runs: {}".format( keys[i], [len(steps) for steps in all_steps]) steps_per_key = [all_steps[0] for all_steps in all_steps_per_key] # Get and average wall times per step per key # wall_times_per_key = [np.mean([tuple(scalar_event.wall_time for scalar_event in scalar_events) for scalar_events in all_scalar_events], axis=0) # for all_scalar_events in all_scalar_events_per_key] # Get values per step per key values_per_key = [[[scalar_event.value for scalar_event in scalar_events] for scalar_events in all_scalar_events] for all_scalar_events in all_scalar_events_per_key] x_per_key = [[[scalar_event.step for scalar_event in scalar_events] for scalar_events in all_scalar_events] for all_scalar_events in all_scalar_events_per_key] true_reward_key = EXPERIMENTS[env]['key'] key_idx = keys.index(true_reward_key) values = values_per_key[key_idx] x_ticks = steps_per_key[key_idx] x_steps = x_per_key[key_idx] interpolated_y = [] for i in range(len(values)): # outer loop over experiments log.debug('Experiment %d, len x %d, len y %d', i, len(x_steps[i]), len(values[i])) interpolated_y.append(interpolate_with_fixed_x_ticks(x_steps[i], values[i], x_ticks)) assert len(interpolated_y[i]) == len(x_ticks) log.debug('%r', interpolated_y[0][:30]) log.debug('Key values: %r', interpolated_y[0][:30]) min_length = len(x_ticks) for i in range(len(values)): log.debug('Values for seed %d truncated from %d to %d', i, len(interpolated_y[i]), min_length) interpolated_y[i] = interpolated_y[i][:min_length] interpolated_keys = dict() interpolated_keys[true_reward_key] = (x_ticks, interpolated_y) return interpolated_keys
print(f'Time: {t1:.1f} sec') #%% #bcn.loadw(fn) xp = bcn.aen.predict(tstOH) bcn.evaluate(tstOH) smt.getScore(tstOH,xp,True) fn = getFileList(lgd) fn = fn[0] eacc = EventAccumulator(lgd+'/'+fn) eacc.Reload() #print(eacc.Tags()) #%% tj = eacc.Scalars('loss') vj = eacc.Scalars('val_loss') steps = len(tj) x = np.arange(steps) y = np.zeros([steps, 2]) for i in range(steps):
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator import numpy as np import os path = 'C:\\tmp\\train_stats\\W_NORM_SW_test_c9_lr_0.0010000,kp_1.00,bs_300,nh_128,rl_20' path_out = 'D:\\ardetector\\data\\analytics' event_acc = EventAccumulator(path) event_acc.Reload() # Save train loss path_out2 = str.join('\\', [path_out, 'train_loss.txt']) out_file = open(path_out2, 'w') np.savetxt(out_file, event_acc.Scalars('train_loss'), delimiter=',', fmt='%.5f') out_file.close() # Save test loss path_out2 = str.join('\\', [path_out, 'test_loss.txt']) out_file = open(path_out2, 'w') np.savetxt(out_file, event_acc.Scalars('Test_cost'), delimiter=',', fmt='%.5f') out_file.close() # Save test f1 ar path_out2 = str.join('\\', [path_out, 'test_f1ar.txt']) out_file = open(path_out2, 'w') np.savetxt(out_file, event_acc.Scalars('Test_sample_F1_score'), delimiter=',', fmt='%.5f')
'scalars': 1000, } # check - if losses in folder, use that plot_values = {} plot_steps = {} for variant, variant_runs in variant_paths.items(): min_steps = 0 for i, run in enumerate(variant_runs): accum_path = os.path.join(run, plot_key_folder) loss_data = {} if osp.exists(osp.join(accum_path, 'losses')): for task in tasks: loss_path = osp.join(accum_path, 'losses', task) event_acc = EventAccumulator(loss_path, tf_size_guidance) event_acc.Reload() print(event_acc.Tags()) scalars = event_acc.Scalars('losses') steps_and_values = np.stack([ np.asarray([scalar.step, scalar.value]) for scalar in scalars ]) # # unload steps to make sure the number of frames is equal across runs steps = steps_and_values[:, 0] values = steps_and_values[:, 1] plot_steps[task] = [steps] plot_values[task] = [values] else: print("Nope")
def main(): """Run the training and prediction process.""" start = timeit.default_timer() parser = configargparse.ArgumentParser( description=__doc__, default_config_files=[inkid.ops.default_arguments_file()], ) # Needed files parser.add('data', metavar='infile', help='input data file (JSON or PPM)', nargs='?') parser.add('output', metavar='outfile', help='output directory', nargs='?') # Config file parser.add('-c', '--config-file', metavar='path', is_config_file=True, help='file with pre-specified arguments (in addition to pre-loaded defaults)') # Region set modifications parser.add('-k', metavar='num', default=None, type=int, help='index of region to use for prediction and evaluation') parser.add('--override-volume-slices-dir', metavar='path', default=None, help='override directory for all volume slices (only works if there is ' 'only one volume in the region set file)') # Pretrained model parser.add('--model', metavar='path', default=None, help='existing model directory to load checkpoints from') # Method parser.add('--feature-type', metavar='name', default='subvolume_3dcnn', help='type of feature model is built on', choices=[ 'subvolume_3dcnn', 'voxel_vector_1dcnn', 'descriptive_statistics', ]) parser.add('--label-type', metavar='name', default='ink_classes', help='type of label to train', choices=[ 'ink_classes', 'rgb_values', ]) # Volume parser.add('--normalize-volumes', action='store_true', help='normalize volumes to zero mean and unit variance before training') # Subvolumes parser.add('--subvolume-method', metavar='name', default='nearest_neighbor', help='method for getting subvolumes', choices=[ 'nearest_neighbor', 'interpolated', 'snap_to_axis_aligned', ]) parser.add('--subvolume-shape', metavar='n', nargs=3, type=int, help='subvolume shape in z y x') parser.add('--pad-to-shape', metavar='n', nargs=3, type=int, default=None, help='pad subvolume with zeros to be of given shape (default no padding)') parser.add('--move-along-normal', metavar='n', type=float, help='number of voxels to move along normal before getting a subvolume') parser.add('--normalize-subvolumes', action='store_true', help='normalize each subvolume to zero mean and unit variance on the fly') # Voxel vectors parser.add('--length-in-each-direction', metavar='n', type=int, help='length of voxel vector in each direction along normal') # Data organization/augmentation parser.add('--jitter-max', metavar='n', type=int) parser.add('--augmentation', action='store_true', dest='augmentation') parser.add('--no-augmentation', action='store_false', dest='augmentation') # Network architecture parser.add('--learning-rate', metavar='n', type=float) parser.add('--drop-rate', metavar='n', type=float) parser.add('--batch-norm-momentum', metavar='n', type=float) parser.add('--no-batch-norm', action='store_true') parser.add('--fbeta-weight', metavar='n', type=float) parser.add('--filter-size', metavar='n', nargs=3, type=int, help='3D convolution filter size') parser.add('--filters', metavar='n', nargs='*', type=int, help='number of filters for each convolution layer') parser.add('--adagrad-optimizer', action='store_true') # Run configuration parser.add('--training-batch-size', metavar='n', type=int) parser.add('--training-max-batches', metavar='n', type=int, default=None) parser.add('--training-epochs', metavar='n', type=int, default=None) parser.add('--prediction-batch-size', metavar='n', type=int) parser.add('--prediction-grid-spacing', metavar='n', type=int, help='prediction points will be taken from an NxN grid') parser.add('--evaluation-batch-size', metavar='n', type=int) parser.add('--evaluation-max-samples', metavar='n', type=int) parser.add('--summary-every-n-steps', metavar='n', type=int) parser.add('--save-checkpoint-every-n-steps', metavar='n', type=int) parser.add('--evaluate-every-n-checkpoints', metavar='n', type=int) parser.add('--predict-every-n-checkpoints', metavar='n', type=int) parser.add('--final-prediction-on-all', action='store_true') parser.add('--skip-training', action='store_true') parser.add('--continue-training-from-checkpoint', metavar='path', default=None) parser.add('--skip-batches', metavar='n', type=int, default=0) parser.add('--training-shuffle-seed', metavar='n', type=int, default=random.randint(0,10000)) # Profiling parser.add('--profile-dir-name', metavar='path', default=None, help='dirname to dump TensorFlow profile ' '(no profile produced if not defined)') parser.add('--profile-start-and-end-steps', metavar='n', nargs=2, default=[10, 90], help='start and end steps (and dump step) for profiling') # Logging/metadata parser.add('--eval-metrics-to-write', metavar='metric', nargs='*', default=[ 'area_under_roc_curve', 'loss' ], help='will try the final value for each of these eval metrics and ' 'add it to metadata file.') # Rclone parser.add('--rclone-transfer-remote', metavar='remote', default=None, help='if specified, and if matches the name of one of the directories in ' 'the output path, transfer the results to that rclone remote into the ' 'subpath following the remote name') args = parser.parse_args() if not args.data and not args.output and not args.continue_training_from_checkpoint: parser.print_help() return if args.continue_training_from_checkpoint is not None: # Get previous metadata file prev_dir = args.continue_training_from_checkpoint prev_metadata_file = os.path.join(args.continue_training_from_checkpoint, 'metadata.json') with open(prev_metadata_file) as f: prev_metadata = json.load(f) # Set all the args to be what they were last time prev_args = prev_metadata['Arguments'] d_args = vars(args) for prev_arg in prev_args: d_args[prev_arg] = prev_args[prev_arg] # Calculate number of batches to drop files = os.listdir(prev_dir) checkpoint_files = list(filter(lambda name: re.search('model\.ckpt-(\d+)\.index', name) is not None, files)) iterations = [int(re.findall('model\.ckpt-(\d+)\.index', name)[0]) for name in checkpoint_files] max_iteration = max(iterations) d_args['skip_batches'] = max_iteration print('Skipping {} batches.'.format(max_iteration)) # Set this again since it got wiped above d_args['continue_training_from_checkpoint'] = prev_dir if args.k is None: output_path = os.path.join( args.output, datetime.datetime.today().strftime('%Y-%m-%d_%H.%M.%S') ) else: output_path = os.path.join( args.output, datetime.datetime.today().strftime('%Y-%m-%d_%H.%M.%S') + '_' + str(args.k) ) os.makedirs(output_path) if args.continue_training_from_checkpoint is not None: model_path = prev_dir elif args.model is not None: model_path = args.model else: model_path = output_path # If input file is a PPM, treat this as a texturing module # Skip training, run a prediction on all regions, require trained model, require slices dir _, file_extension = os.path.splitext(args.data) file_extension = file_extension.lower() if file_extension == '.ppm': if args.model is None: print("Pretrained model (--model) required when texturing a .ppm file.") return if args.override_volume_slices_dir is None: print("Volume (--override-volume-slices-dir) required when texturing a .ppm file.") return print("PPM input file provided. Automatically skipping training and running a final prediction on all.") args.skip_training = True args.final_prediction_on_all = True # Transform the input file into region set, can handle JSON or PPM region_data = inkid.data.RegionSet.get_data_from_file(args.data) if args.override_volume_slices_dir is not None: volume_dirs_seen = set() for ppm in region_data['ppms']: volume_dirs_seen.add(region_data['ppms'][ppm]['volume']) if len(volume_dirs_seen) > 1: raise ValueError('--override-volume-slices-dir only ' 'permitted if there is one volume in the region set') for ppm in region_data['ppms']: region_data['ppms'][ppm]['volume'] = args.override_volume_slices_dir if args.k is not None: k_region = region_data['regions']['training'].pop(int(args.k)) region_data['regions']['prediction'].append(k_region) region_data['regions']['evaluation'].append(k_region) regions = inkid.data.RegionSet(region_data) if args.normalize_volumes: print('Normalizing volumes...') regions.normalize_volumes() print('done') print('Arguments:\n{}\n'.format(args)) print('Region Set:\n{}\n'.format(json.dumps(region_data, indent=4, sort_keys=False))) # Write metadata to file metadata = {} metadata['Arguments'] = vars(args) metadata['Region set'] = region_data # Add the git hash if there is a repository try: repo = git.Repo(os.path.join(os.path.dirname(inspect.getfile(inkid)), '..')) sha = repo.head.object.hexsha metadata['Git hash'] = repo.git.rev_parse(sha, short=6) except git.exc.InvalidGitRepositoryError: metadata['Git hash'] = 'No git hash available (unable to find valid repository).' with open(os.path.join(output_path, 'metadata.json'), 'w') as f: f.write(json.dumps(metadata, indent=4, sort_keys=False)) # Save checkpoints every n steps. EvalCheckpointSaverListener # (below) runs an evaluation each time this happens. run_config = tf.estimator.RunConfig( save_checkpoints_steps=args.save_checkpoint_every_n_steps, keep_checkpoint_max=None, # save all checkpoints ) # Create an Estimator with the run configuration, hyperparameters, # and model directory specified. estimator = tf.estimator.Estimator( model_fn={ 'ink_classes': inkid.model.ink_classes_model_fn, 'rgb_values': inkid.model.rgb_values_model_fn, }[args.label_type], model_dir=model_path, config=run_config, params={ 'drop_rate': args.drop_rate, 'subvolume_shape': args.subvolume_shape, 'pad_to_shape': args.pad_to_shape, 'length_in_each_direction': args.length_in_each_direction, 'batch_norm_momentum': args.batch_norm_momentum, 'no_batch_norm': args.no_batch_norm, 'filters': args.filters, 'learning_rate': args.learning_rate, 'fbeta_weight': args.fbeta_weight, 'feature_type': args.feature_type, 'label_type': args.label_type, 'adagrad_optimizer': args.adagrad_optimizer, }, ) # Define tensors to be shown in a "summary" step. if args.label_type == 'ink_classes': tensors_to_log = { 'train_accuracy': 'train_accuracy', 'train_precision': 'train_precision', 'train_recall': 'train_recall', 'train_fbeta_score': 'train_fbeta_score', 'train_positives': 'train_positives', 'train_negatives': 'train_negatives', } else: tensors_to_log = {} logging_hook = tf.train.LoggingTensorHook( tensors=tensors_to_log, every_n_iter=args.summary_every_n_steps, ) tf.logging.set_verbosity(tf.logging.INFO) # Define the feature inputs to the network if args.feature_type == 'subvolume_3dcnn': point_to_subvolume_input = functools.partial( regions.point_to_subvolume_input, subvolume_shape=args.subvolume_shape, out_of_bounds='all_zeros', move_along_normal=args.move_along_normal, method=args.subvolume_method, normalize=args.normalize_subvolumes, pad_to_shape=args.pad_to_shape, ) training_features_fn = functools.partial( point_to_subvolume_input, augment_subvolume=args.augmentation, jitter_max=args.jitter_max, ) evaluation_features_fn = functools.partial( point_to_subvolume_input, augment_subvolume=False, jitter_max=0, ) prediction_features_fn = evaluation_features_fn elif args.feature_type == 'voxel_vector_1dcnn': training_features_fn = functools.partial( regions.point_to_voxel_vector_input, length_in_each_direction=args.length_in_each_direction, out_of_bounds='all_zeros', ) evaluation_features_fn = training_features_fn prediction_features_fn = training_features_fn elif args.feature_type == 'descriptive_statistics': training_features_fn = functools.partial( regions.point_to_descriptive_statistics, subvolume_shape=args.subvolume_shape, ) evaluation_features_fn = training_features_fn prediction_features_fn = training_features_fn # Define the labels if args.label_type == 'ink_classes': label_fn = regions.point_to_ink_classes_label elif args.label_type == 'rgb_values': label_fn = regions.point_to_rgb_values_label # Define the datasets training_input_fn = regions.create_tf_input_fn( region_groups=['training'], batch_size=args.training_batch_size, features_fn=training_features_fn, label_fn=label_fn, perform_shuffle=True, shuffle_seed=args.training_shuffle_seed, restrict_to_surface=True, epochs=args.training_epochs, skip_batches=args.skip_batches, ) evaluation_input_fn = regions.create_tf_input_fn( region_groups=['evaluation'], batch_size=args.evaluation_batch_size, features_fn=evaluation_features_fn, label_fn=label_fn, max_samples=args.evaluation_max_samples, perform_shuffle=True, shuffle_seed=0, # We want the eval set to be the same each time restrict_to_surface=True, ) prediction_input_fn = regions.create_tf_input_fn( region_groups=['prediction'], batch_size=args.prediction_batch_size, features_fn=prediction_features_fn, label_fn=None, perform_shuffle=False, restrict_to_surface=True, grid_spacing=args.prediction_grid_spacing, ) # Run the training process. Predictions are run during training # and also after training. try: with ExitStack() as stack: # Only do profiling if user provided a profile file path # https://stackoverflow.com/questions/27803059/conditional-with-statement-in-python?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa if args.profile_dir_name is not None: print('Enabling TensorFlow profiling...') pctx = stack.enter_context( tf.contrib.tfprof.ProfileContext( 'tmp', trace_steps=range( args.profile_start_and_end_steps[0], args.profile_start_and_end_steps[1] ), dump_steps=[args.profile_start_and_end_steps[1]] ) ) opts = tf.profiler.ProfileOptionBuilder.time_and_memory() opts2 = tf.profiler.ProfileOptionBuilder.trainable_variables_parameter() builder = tf.profiler.ProfileOptionBuilder opts3 = builder(builder.time_and_memory()).order_by('micros').build() pctx.add_auto_profiling( 'op', opts, [args.profile_start_and_end_steps[0], args.profile_start_and_end_steps[1]] ) pctx.add_auto_profiling( 'scope', opts2, [args.profile_start_and_end_steps[0], args.profile_start_and_end_steps[1]] ) pctx.add_auto_profiling( 'op', opts3, [args.profile_start_and_end_steps[0], args.profile_start_and_end_steps[1]] ) # Only train if the training region set group is not empty if len(regions._region_groups['training']) > 0 and not args.skip_training: estimator.train( input_fn=training_input_fn, steps=args.training_max_batches, hooks=[logging_hook], saving_listeners=[ inkid.model.EvalCheckpointSaverListener( estimator=estimator, eval_input_fn=evaluation_input_fn, predict_input_fn=prediction_input_fn, evaluate_every_n_checkpoints=args.evaluate_every_n_checkpoints, predict_every_n_checkpoints=args.predict_every_n_checkpoints, region_set=regions, predictions_dir=os.path.join(output_path, 'predictions'), label_type=args.label_type, ), ], ) # Still attempt final prediction except KeyboardInterrupt: pass try: if args.final_prediction_on_all: print('Running a final prediction on all regions...') final_prediction_input_fn = regions.create_tf_input_fn( region_groups=['prediction', 'training', 'evaluation'], batch_size=args.prediction_batch_size, features_fn=prediction_features_fn, label_fn=None, perform_shuffle=False, restrict_to_surface=True, grid_spacing=args.prediction_grid_spacing, ) if args.label_type == 'ink_classes': predictions = estimator.predict( final_prediction_input_fn, predict_keys=[ 'region_id', 'ppm_xy', 'probabilities', ], ) for prediction in predictions: regions.reconstruct_predicted_ink_classes( np.array([prediction['region_id']]), np.array([prediction['probabilities']]), np.array([prediction['ppm_xy']]), ) regions.save_predictions(os.path.join(output_path, 'predictions'), 'final') regions.reset_predictions() elif args.label_type == 'rgb_values': predictions = estimator.predict( final_prediction_input_fn, predict_keys=[ 'region_id', 'ppm_xy', 'rgb', ], ) for prediction in predictions: regions.reconstruct_predicted_rgb( np.array([prediction['region_id']]), np.array([prediction['rgb']]), np.array([prediction['ppm_xy']]), ) regions.save_predictions(os.path.join(output_path, 'predictions'), 'final') regions.reset_predictions() # Perform finishing touches even if cut short except KeyboardInterrupt: pass # Add some post-run info to metadata file stop = timeit.default_timer() metadata['Runtime'] = stop - start metadata['Finished at'] = time.strftime('%Y-%m-%d %H:%M:%S') # Add some final metrics metrics = {} try: eval_event_acc = EventAccumulator(os.path.join(output_path, 'eval')) eval_event_acc.Reload() for metric in args.eval_metrics_to_write: if metric in eval_event_acc.Tags()['scalars']: metrics[metric] = eval_event_acc.Scalars(metric)[-1].value except: # NOQA pass metadata['Final evaluation metrics'] = metrics with open(os.path.join(output_path, 'metadata.json'), 'w') as f: f.write(json.dumps(metadata, indent=4, sort_keys=False)) # Transfer via rclone if requested if args.rclone_transfer_remote is not None: folders = [] path = os.path.abspath(output_path) while True: path, folder = os.path.split(path) if folder != "": folders.append(folder) else: if path != "": folders.append(path) break folders.reverse() if args.rclone_transfer_remote not in folders: print('Provided rclone transfer remote was not a directory ' 'name in the output path, so it is not clear where in the ' 'remote to put the files. Transfer canceled.') else: while folders.pop(0) != args.rclone_transfer_remote: continue command = [ 'rclone', 'move', '-v', '--delete-empty-src-dirs', output_path, args.rclone_transfer_remote + ':' + os.path.join(*folders) ] print(' '.join(command)) subprocess.call(command)
def output(tensorboard_dir, output_dir, metrics_keys, steps, output_file_base="metrics"): """Output csv and markdown file which accumulated tensorflow event by step and metrics_keys.""" subdirs = GetLogdirSubdirectories(tensorboard_dir) event_accumulators = [] for subdir in subdirs: event_accumulator = EventAccumulator(subdir) # init event accumulator event_accumulator.Reload() event_accumulators.append(event_accumulator) if not metrics_keys: metrics_keys = { metrics_key for event_accumulator in event_accumulators for metrics_key in _get_metrics_keys(event_accumulator) } columns = [ _column_name(event_accumulator, metrics_key) for event_accumulator, metrics_key in itertools.product( event_accumulators, metrics_keys) ] columns.sort() df = pd.DataFrame([], columns=columns) for event_accumulator in event_accumulators: for metrics_key in metrics_keys: value_step_list = _value_step_list(event_accumulator, metrics_key) for value, step in value_step_list: column_name = _column_name(event_accumulator, metrics_key) df.loc[step, column_name] = value if steps: df = df[steps, :] df = df.sort_index(ascending=False) # index to column. and re-order column. df["step"] = df.index df = df[["step"] + columns] output_csv = os.path.join(output_dir, "{}.csv".format(output_file_base)) df.to_csv(output_csv, index=False) output_md = os.path.join(output_dir, "{}.md".format(output_file_base)) writer = pytablewriter.MarkdownTableWriter() writer.char_left_side_row = "|" # fix for github writer.from_dataframe(df) with open(output_md, "w") as file_stream: writer.stream = file_stream writer.write_table() message = """ output success output csv: {} output md: {} """.format(output_csv, output_md) print(message)
def get_event_acc(log_dir): event_acc = EventAccumulator(os.path.expanduser(log_dir)) event_acc.Reload() return event_acc
def main(_): search_path = os.path.join(os.getcwd(), 'models') + '/**/eval' all_dirs = glob.glob(search_path, recursive=True) df_eval = pd.DataFrame() all_models = [] for d in all_dirs: # Grab all of the accuracy results for each model and put into Pandas dataframe event_acc = EventAccumulator(d) event_acc.Reload() # Show all tags in the log file print(event_acc.Tags()) try: s = event_acc.Scalars('PASCAL/Precision/[email protected]') df = pd.DataFrame(s) if not df.empty: dir_name = d.split('eval')[0] model_name = dir_name.split('/')[-2] a = meta.ModelMetadata(model_name) all_models.append(a) time_start = df.wall_time[0] # convert wall time and value to rounded values df['wall_time'] = df['wall_time'].apply(wallToGPUTime, args=(time_start, )) df['value'] = df['value'].apply(valueTomAP) # rename columns df.columns = ['GPU Time', 'step', 'Overall mAP'] df['model'] = np.full(len(df), a.name) df_eval = df_eval.append(df) except Exception as ex: print(ex) # drop the step column as it's no longer needed df_eval = df_eval.drop(['step'], axis=1) df_final = df_eval[df_eval['GPU Time'] < 200] df_mean = df_eval[(df_eval['GPU Time'] < 200) & (df_eval['GPU Time'] > 50)] print(df_mean.groupby(['model']).mean().sort_values('Overall mAP')) all_model_index = df_final.set_index(['model', 'GPU Time']).sort_index() with plt.style.context('ggplot'): # start a new figure - size is in inches fig = plt.figure(figsize=(6, 4), dpi=400) ax1 = plt.subplot(aspect='equal') ax1.set_xlim(0, 300) ax1.set_ylim(0, 100) for model in all_models: model_plot(all_model_index, model, ax1) ax1.set_ylim([0, 100]) ax1.set_ylabel('mAP') ax1.set_xlabel('GPU Time (minutes)') ax1.set_title('Mean Average Precision') markers = [] names = [] for name, marker in arch_markers.items(): s = plt.Line2D((0, 1), (0, 0), color='grey', marker=marker, linestyle='') names.append(name) markers.append(s) ax1.legend(markers, names, loc=0) inc = 40 ax1.text(180, 45, r'Resolution', fontsize=8) for size, color in sz_colors.items(): ax1.text(190, inc - 2, r'{0}'.format(size), fontsize=8) c = mpatches.Circle((180, inc), 2, edgecolor='black', facecolor=color) ax1.add_patch(c) inc -= 10 inc = 40 ax1.text(240, 45, r'Box Proposals', fontsize=8) for size, color in sorted(sz_proposals.items()): ax1.text(250, inc - 2, r'{0}'.format(size), fontsize=8) c = mpatches.Circle((240, inc), 2, edgecolor='black', facecolor=color) ax1.add_patch(c) inc -= 10 plt.savefig('mAP.png', format='png', bbox_inches='tight') plt.show() print('Done creating mAP.png')
def tabulate_events(dpath, dout, save): final_out = {} # for dname in os.listdir(dpath): dname = os.listdir(dpath)[0] print(f"Converting run {dname}", end="") ea = EventAccumulator(os.path.join(dpath, dname)).Reload() tags = ea.Tags()['scalars'] out = {} for tag in tags: #training_loss validation_loss training_Acc validation_Acc tag_values = [] # wall_time=[] steps = [] for event in ea.Scalars(tag): tag_values.append(event.value) # wall_time.append(event.wall_time) steps.append(event.step) # out[tag]=pd.DataFrame(data=dict(zip(steps,np.array([tag_values,wall_time]).transpose())), columns=steps,index=['value','wall_time']) out[tag] = pd.DataFrame(data=dict( zip(steps, np.array([tag_values]).transpose())), columns=steps, index=None) # print(out[tag].head(10)) if len(tags) > 0: df = pd.concat(out.values()) # print('values: ',out.keys()) if save: df.to_csv(f'{os.path.join(dout,dname)}.csv') else: df_T = df.transpose() df_T.columns = out.keys() # print(df_T.head(10)) x = df_T.index.values.tolist() train_loss = df_T['training loss'].to_numpy() val_loss = df_T['validation loss'].to_numpy() train_acc = df_T['training Acc'].to_numpy() val_acc = df_T['validation Acc'].to_numpy() # plt.figure() fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 4), sharex=True) plt.xticks(np.arange(min(x), max(x), 2)) # plt.yticks(np.arange(0, 1, 0.05)) ax1.plot(x, train_loss, 'r', label='train') ax1.plot(x, val_loss, 'b', label='val') ax1.set_title('Loss curves') legend = ax1.legend(loc='upper right', shadow=True, fontsize='large') ax2.plot(x, train_acc, 'r', label='train') ax2.plot(x, val_acc, 'b', label='val') ax2.set_title('Accuracy curves') legend = ax2.legend(loc='lower right', shadow=True, fontsize='large') # plt.figure(figsize=(3, 8)) if not os.path.exists('tmp_images'): os.mkdir('tmp_images') fig.savefig('tmp_images/tmp.png') plt.show() print("- Done") else: print('- Not scalers to write') final_out[dname] = df return final_out
def train_eval_graphs(path): train = {} eval = {} if not os.path.isdir(path): return {} train_events = [ os.path.join(path, f) for f in os.listdir(path) if f.startswith('events.out.tfevents') ] if len(train_events) == 0: return {} train_events.sort(key=lambda x: os.path.getmtime(x)) train_summary = train_events[0] summary_iterator = EventAccumulator(train_summary).Reload() tags = [ m for m in summary_iterator.Tags()['scalars'] if m.split('_1')[0] in ['accuracy', 'r_squared', 'loss'] ] if len(tags) == 0: return {} train['steps'] = [e.step for e in summary_iterator.Scalars(tags[0])] for tag in tags: train[tag.split('_1')[0]] = [] for e in summary_iterator.Scalars(tag): train[tag.split('_1')[0]].append(e.value) eval_events = [] if os.path.isdir(os.path.join(path, 'eval')): eval_events = [ os.path.join(path, 'eval', f) for f in os.listdir(os.path.join(path, 'eval')) if f.startswith('events.out.tfevents') ] if len(eval_events) == 0: return {'train': train} eval_events.sort(key=lambda x: os.path.getmtime(x)) eval_summary = eval_events[0] summary_iterator = EventAccumulator(eval_summary).Reload() tags = [ m for m in summary_iterator.Tags()['scalars'] if m.split('_1')[0] in ['accuracy', 'r_squared', 'loss'] ] if len(tags) == 0: return {'train': train} eval['steps'] = [e.step for e in summary_iterator.Scalars(tags[0])] for tag in tags: eval[tag.split('_1')[0]] = [] for e in summary_iterator.Scalars(tag): eval[tag.split('_1')[0]].append(e.value) return {'train': train, 'eval': eval}
bcn.loadw(fn) xp = bcn.aen.predict(tstOH) bcn.evaluate(tstOH) smt.getScore(tstOH, xp, True) logdir = f'logs/{pfix}{bt}/' fn = getFileList(logdir) fn = fn[0] eacc = EventAccumulator(logdir + fn) eacc.Reload() #print(eacc.Tags()) tj = eacc.Scalars('loss') vj = eacc.Scalars('val_loss') steps = len(tj) x = np.arange(steps) y = np.zeros([steps, 2]) for i in range(steps):
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator from matplotlib import pyplot as plt pixelcnn = EventAccumulator('logs/pixelcnn') noncausal = EventAccumulator('logs/noncausal') denoising = EventAccumulator('logs/denoising') pixelcnn.Reload() noncausal.Reload() denoising.Reload() _, step_nums, vals = zip(*pixelcnn.Scalars('train_loss')) pixelcnn_train = (step_nums, vals) _, step_nums, vals = zip(*pixelcnn.Scalars('test_loss')) pixelcnn_test = (step_nums, vals) _, step_nums, vals = zip(*noncausal.Scalars('train_loss')) noncausal_train = (step_nums, vals) _, step_nums, vals = zip(*noncausal.Scalars('test_loss')) noncausal_test = (step_nums, vals) _, step_nums, vals = zip(*denoising.Scalars('train_loss')) denoising_train = (step_nums, vals) _, step_nums, vals = zip(*denoising.Scalars('test_loss')) denoising_test = (step_nums, vals) plt.plot(*pixelcnn_train, label='PixelCNN Train') plt.plot(*pixelcnn_test, label='PixelCNN Test') plt.xlabel('Iteration')
def __init__(self, event_file, name="MeanCurve", smoothing=None, variance_mode="std", max_n_scalars=None): """ Args: event_file (str|list[str]): a string or a list of strings where each should point to a valid TB dir, e.g., ending with "eval/" or "train/". The curves of these files will be averaged. It's the user's responsibility to ensure that it's meaningful to group these event files and show their mean and variance. name (str): name of the mean curve. smoothing (int): if None, no smoothing is applied; otherwise this is the window width of a Savitzky-Golay filter variance_mode (str): how to compute the shaded region around the mean curve, either "std" or "minmax". max_n_scalars (int): the maximal number of points each curve will have. If None, a default value is used. Returns: MeanCurve: a mean curve structure. """ if not isinstance(event_file, list): event_file = [event_file] else: assert len(event_file) > 0, "Empty event file list!" if max_n_scalars is not None: self._SIZE_GUIDANCE['scalars'] = max_n_scalars x, ys = None, [] for ef in event_file: event_acc = EventAccumulator(ef, self._SIZE_GUIDANCE) event_acc.Reload() # 'scalar_events' is a list of ScalarEvent(wall_time, step, value), # with a maximal length specified by _SIZE_GUIDANCE scalar_events = event_acc.Scalars(self._get_metric_name()) steps, values = zip(*[(se.step, se.value) for se in scalar_events]) if x is None: x = np.array(steps) else: assert len(steps) == len(x), ( "All curves should have the same number of values!") new_x, y = self._interpolate_and_smooth_if_necessary( steps, values, x[0], x[-1], smoothing) ys.append(y) x = new_x y = np.array(list(map(np.mean, zip(*ys)))) if len(ys) == 1: self._mean_curve = MeanCurve(x=x, y=y, min_y=y, max_y=y, name=name) else: # compute mean and variance if variance_mode == "std": std = np.array(list(map(np.std, zip(*ys)))) min_y, max_y = y - std, y + std elif variance_mode == "minmax": min_y = np.array(list(map(np.min, zip(*ys)))) max_y = np.array(list(map(np.max, zip(*ys)))) else: raise ValueError("Invalid variance mode: %s" % variance_mode) self._mean_curve = MeanCurve(x=x, y=y, min_y=min_y, max_y=max_y, name=name)
def main(dirs_dataset_filter="mnist", more_discard_dirs=[], filtering_criteria_models=[ "resnet_50", "resnet_34", "resnet_18", "bagnet_tem_9", "bagnet_tem_17" ], filtering_criteria_frames=[], filtering_criteria_others=[], filtering_criteria_annotation_path=[], filtering_criteria_sampling=["center"]): res_dirs = [f for f in os.listdir("results/") if dirs_dataset_filter in f] discard_dirs = ["motion", "blackframes", "val_1tstride" ] + more_discard_dirs res_dirs = [f for f in res_dirs if all([d not in f for d in discard_dirs])] for r in res_dirs: # if os.path.exists("results/"+r+"/opts.json"): # with open("results/"+r+"/opts.json", "r") as f: # opts = json.load(f) # print(opts["annotation_path"]) if [f for f in os.listdir("results/"+r) if "events.out" in f] and \ any([c in r for c in filtering_criteria_models]) and \ any([c in r for c in filtering_criteria_frames]) and \ any([c in r for c in filtering_criteria_others]) and \ os.path.exists("results/"+r+"/opts.json"): print(r) event_acc = EventAccumulator("results/" + r) event_acc.Reload() with open("results/" + r + "/opts.json", "r") as f: opts = json.load(f) print(opts['annotation_path']) if any([c in opts['annotation_path'] for c in filtering_criteria_annotation_path]) \ and any ([c in opts['train_t_crop'] for c in filtering_criteria_sampling]): if event_acc.scalars.Keys() != []: train_losses, train_epochs, train_accs = zip( *event_acc.Scalars('train/acc')) val_losses, val_epochs, val_accs = zip( *event_acc.Scalars('val/acc')) if len(val_losses) < 10: os.system("rm -r results/" + r) if len(val_losses) >= 10 or train_accs[-1] > 0.95: # print(len(val_losses)) print(r) print("train", round(np.max(train_accs)*100, 2), np.argmax(train_accs), \ "val", round(np.max(val_accs)*100, 2), np.argmax(val_accs)) if os.path.exists("results/" + r + "/checkpoints_test_results.json"): with open( "results/" + r + "/checkpoints_test_results.json", "r") as f: test = json.load(f) print(test)
def extract(dpath, subpath, args): scalar_accumulators = [ EventAccumulator(str(dpath / dname / subpath)).Reload().scalars for dname in os.listdir(dpath) if dname != FOLDER_NAME ] # Filter non event files scalar_accumulators = [ scalar_accumulator for scalar_accumulator in scalar_accumulators if scalar_accumulator.Keys() ] # Get and validate all scalar keys all_keys = [ tuple(scalar_accumulator.Keys()) for scalar_accumulator in scalar_accumulators ] all_keys_set = [ set(scalar_accumulator.Keys()) for scalar_accumulator in scalar_accumulators ] #assert len(set(all_keys_set)) == 1, "All runs need to have the same scalar keys. There are mismatches in {}".format(all_keys_set) keys = all_keys[0] if args.allowed_keys: allowed_keys = args.allowed_keys else: allowed_keys = [ 'evaluate', 'standard_evaluate', 'forgetting_metric', 'weight_stat' ] found_keys = [] for key in all_keys_set[0]: # Check if current key occurs starts with any of allowed keys. log_key = (len(list(filter(lambda x: key.startswith(x), allowed_keys))) > 0) if log_key: present_in_all = True for av_set in all_keys_set: if not key in av_set: present_in_all = False if present_in_all: found_keys.append(key) keys = found_keys keys_list = all_keys[0] all_scalar_events_per_key = [[ scalar_accumulator.Items(key) for scalar_accumulator in scalar_accumulators ] for key in keys] # Get and validate all steps per key all_steps_per_key = [[ tuple(scalar_event.step for scalar_event in scalar_events) for scalar_events in all_scalar_events ] for all_scalar_events in all_scalar_events_per_key] for i, all_steps in enumerate(all_steps_per_key): print(i, all_steps) assert len( set(all_steps) ) == 1, "For scalar {} the step numbering or count doesn't match. Step count for all runs: {}".format( keys[i], [len(steps) for steps in all_steps]) #del keys_list[i] all_scalar_events_per_key = [[ scalar_accumulator.Items(key) for scalar_accumulator in scalar_accumulators ] for key in keys] print(all_scalar_events_per_key) # Get and validate all steps per key all_steps_per_key = [[ tuple(scalar_event.step for scalar_event in scalar_events) for scalar_events in all_scalar_events ] for all_scalar_events in all_scalar_events_per_key] steps_per_key = [all_steps[0] for all_steps in all_steps_per_key] # Get and average wall times per step per key wall_times_per_key = [ np.mean([ tuple(scalar_event.wall_time for scalar_event in scalar_events) for scalar_events in all_scalar_events ], axis=0) for all_scalar_events in all_scalar_events_per_key ] # Get values per step per key values_per_key = [[[scalar_event.value for scalar_event in scalar_events] for scalar_events in all_scalar_events] for all_scalar_events in all_scalar_events_per_key] all_per_key = dict( zip(keys, zip(steps_per_key, wall_times_per_key, values_per_key))) return all_per_key
def get_miou_list_class_all(log_dir, class_num): # add the tensorboard in the tf1x version tf1x_dir = os.path.join(os.path.dirname(os.path.dirname(tf1x_python)), 'lib', 'python3.7', 'site-packages') sys.path.insert(0, tf1x_dir) from tensorboard.backend.event_processing.event_accumulator import EventAccumulator # Loading too much data is slow... # tf_size_guidance on how much data the EventAccumulator should # | store in memory. The DEFAULT_SIZE_GUIDANCE tries not to store too much # | so as to avoid OOMing the client. The size_guidance should be a map # | from a `tagType` string to an integer representing the number of # | items to keep per tag for items of that `tagType`. If the size is 0, # | all events are stored. tf_size_guidance = { 'compressedHistograms': 10, 'images': 0, 'scalars': 0, # set a 0, to load all scalars 'histograms': 1 } miou_dic = {'step': [0]} # step 0, need some where events_files = io_function.get_file_list_by_pattern(log_dir, 'events*') if len(events_files) < 1: print('warning, No events file in %s' % log_dir) return miou_dic event_acc = EventAccumulator(log_dir, tf_size_guidance) event_acc.Reload() # Show all tags in the log file tag_dict = event_acc.Tags() # io_function.save_dict_to_txt_json('event_acc.txt',tag_dict) scalar_tags = tag_dict['scalars'] # print(scalar_tags) for class_id in range(class_num): name = 'class_%d' % class_id tag = 'eval/miou_1.0_' + name if tag in scalar_tags: miou_class_event = event_acc.Scalars(tag) miou_class_list = [ item[2] for item in miou_class_event ] # item[0] is wall_time, item[1] is step, item [2] is the value # step_list = [item[1] for item in miou_class_event] # print(step_list) miou_dic[name] = miou_class_list tag = 'eval/miou_1.0_overall' if tag in scalar_tags: miou_class_overall = event_acc.Scalars('eval/miou_1.0_overall') miou_class_list = [item[2] for item in miou_class_overall] step_list = [item[1] for item in miou_class_overall] wall_time_list = [item[0] for item in miou_class_overall] # print(step_list) miou_dic['overall'] = miou_class_list miou_dic['step'] = step_list miou_dic[ 'wall_time'] = wall_time_list # we can use datetime.fromtimestamp() to convert datetime io_function.save_dict_to_txt_json(os.path.join(log_dir, 'miou.txt'), miou_dic) return miou_dic
summary_home = r'D:\tmp\fujian\temp_move2' metric = namedtuple('metric', ['wall_time', 'step', 'acc', 'miou']) res = [] for each_dir in os.listdir(summary_home): if not os.path.isdir(join(summary_home, each_dir)): continue print(each_dir) model, dataset, _, epochs, batch_size, init_lr, end_lr, iterations, crop_size, bn_scale, ignore_label, structure_model, extra_message = each_dir.split( '#') # front_end, back_end = model.split('_') # front_end, stride = front_end.split('@') cur_metrics = set() for each_summary_file in os.listdir(join(summary_home, each_dir, 'eval')): event_acc = EventAccumulator( join(summary_home, each_dir, 'eval', each_summary_file)) event_acc.Reload() acc = event_acc.Scalars('accuracy') miou = event_acc.Scalars('mean_iou') for each_acc, each_miou in zip(acc, miou): cur_metrics.add( metric(each_acc.wall_time, each_acc.step, each_acc.value, each_miou.value)) # print(cur_metrics) # exit(1) cur_metrics = sorted(cur_metrics, key=attrgetter('step')) run_time = cur_metrics[-1].wall_time - cur_metrics[0].wall_time best_acc = max(cur_metrics, key=attrgetter('acc')) best_miou = max(cur_metrics, key=attrgetter('miou'))