예제 #1
0
def plot_tensorflow_log(path, epochs):

    valdir = os.path.join(path, 'validation')
    traindir = os.path.join(path, 'train')
    metricdir = os.path.join(path, 'metrics')

    val_events = os.path.join(valdir, os.listdir(valdir)[0])
    train_events = os.path.join(traindir, os.listdir(traindir)[0])
    metric_events = os.path.join(metricdir, os.listdir(metricdir)[0])

    #for summary in summary_iterator(metric_events):
    #    print(summary)

    val_acc = EventAccumulator(val_events)
    val_acc.Reload()
    train_acc = EventAccumulator(train_events)
    train_acc.Reload()
    metric_acc = EventAccumulator(metric_events)
    metric_acc.Reload()

    # Show all tags in the log file
    #print(val_acc.Tags())
    #print(train_acc.Tags())
    #print(metric_acc.Tags())

    val_epoch_loss = val_acc.Scalars('epoch_loss')
    val_epoch_accuracy = val_acc.Scalars('epoch_accuracy')
    #print(val_epoch_accuracy)

    train_epoch_loss = train_acc.Scalars('epoch_loss')
    train_epoch_accuracy = train_acc.Scalars('epoch_accuracy')

    #training_accuracies =   event_acc.Scalars('training-accuracy')
    #validation_accuracies = event_acc.Scalars('validation_accuracy')

    steps = len(train_epoch_accuracy)
    x = np.arange(steps)
    y = np.zeros([steps, 2])

    for i in range(steps):
        y[i, 0] = train_epoch_accuracy[i][2]  # value
        y[i, 1] = val_epoch_accuracy[i][2]

    print('Final train accuracy:', train_epoch_accuracy[-1])
    print('Final val accuracy:', val_epoch_accuracy[-1])

    plt.plot(x, y[:, 0], label='training accuracy')
    plt.plot(x, y[:, 1], label='validation accuracy')
    plt.xlim(0, epochs)
    plt.ylim(0, 1)

    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.title("Training Progress")
    plt.grid(True, axis='both')
    plt.legend(loc='lower right', frameon=True)
    plt.show()
예제 #2
0
def load_subdirectory_data(dir_exp, explo_type, run_index=None):
    """
    Loads the data for the subfolders of dir_exp.
    Inputs:
        dir_exp - directory of the experiment
        explo_type - type of exploration for which to load the data
    """

    # list the subdirectories
    search_base = "run*" if run_index is None else "run" + "{:03}".format(run_index)
    sub_list = sorted(glob.glob("/".join((dir_exp, explo_type, search_base))))
    print("{} runs found for the {} type of exploration in {}".format(len(sub_list), explo_type, dir_exp))

    # initialize variables
    var = {"all_epochs": [],
           "all_losses": [],
           "all_metric_errors": [],
           "all_topo_errors_in_P": [],
           "all_topo_errors_in_H": []}

    for sub_dir in sub_list:

        # recover the Tensorboard logs
        log_file = glob.glob(sub_dir + "/tb_logs/*")[0]
        event_acc = EventAccumulator(log_file)
        event_acc.Reload()

        # extract and store the variables
        _, epochs, losses = zip(*event_acc.Scalars("loss"))
        _,      _, topo_errors_in_P = zip(*event_acc.Scalars("topology_error_in_P_1"))
        _,      _, topo_errors_in_H = zip(*event_acc.Scalars("topology_error_in_H_1"))
        _,      _, metric_errors = zip(*event_acc.Scalars("metric_error_1"))
        var["all_epochs"] += [epochs]
        var["all_losses"] += [losses]
        var["all_topo_errors_in_P"] += [topo_errors_in_P]
        var["all_topo_errors_in_H"] += [topo_errors_in_H]
        var["all_metric_errors"] += [metric_errors]

    # check that all runs are valid (they have compatible numbers of epochs)
    to_delete = []
    length_all_epochs = [len(x) for x in var["all_epochs"]]
    max_length = max(length_all_epochs)
    for ind, length in enumerate(length_all_epochs):
        if length < max_length:
            to_delete.append(ind)
            print("!! Warning: the run {} has {} epochs values instead of {} - it is discarded".
                  format(ind, length, max_length))
    # remove the entries that don't have the correct number of epochs
    for key in var.keys():
        var[key] = [val for ind, val in enumerate(var[key]) if ind not in to_delete]

    # convert the lists to arrays
    for key in var.keys():
        var[key] = np.array(var[key])

    # get the number of valid runs
    number_runs = var["all_epochs"].shape[0]
    print("{} runs loaded successfully for the {} exploration".format(number_runs, explo_type))

    return var, number_runs
예제 #3
0
def plot_series(path, ax, label):
    ea = EventAccumulator(path)
    ea.Reload()

    avg_ss = ea.Scalars("Eval_AverageReturn")
    std_ss = ea.Scalars("Eval_StdReturn")
    assert len(avg_ss) == len(std_ss), (avg_ss, std_ss)

    df = pd.DataFrame(
        {
            "steps": [t[1] for t in avg_ss],
            "avg": [t[2] for t in avg_ss],
            "std": [t[2] for t in std_ss],
        }
    )
    # Add a duplicate row to preserve the last data marker
    df.loc[len(df.index)] = df.loc[len(df.index) - 1]
    ax.errorbar(
        x=df["steps"],
        y=df["avg"],
        yerr=df["std"],
        elinewidth=1,
        capsize=4,
        linewidth=2,
        marker="o",
        markersize=6,
        markevery=args.markevery,
        errorevery=args.markevery,
        label=label,
    )
예제 #4
0
def plot_tb(filename, export=None, act_quant_line=None):
    from eegnet_run import _prepare_scalar_array_from_tensorboard as prepare_tb_array
    from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
    ea = EventAccumulator(filename)
    ea.Reload()
    data = {key: prepare_tb_array(ea, key) for key in ea.Tags()['scalars']}
    plot_data(data, export, act_quant_line)
    def read_fn(_format):
        if _format == "csv":
            try:
                df = read_csv(tmp_path / "progress.csv")
            except EmptyDataError:
                return LogContent(_format, [])
            return LogContent(_format, [r for _, r in df.iterrows() if not r.empty])
        elif _format == "json":
            try:
                df = read_json(tmp_path / "progress.json")
            except EmptyDataError:
                return LogContent(_format, [])
            return LogContent(_format, [r for _, r in df.iterrows() if not r.empty])
        elif _format == "stdout":
            captured = capsys.readouterr()
            return LogContent(_format, captured.out.splitlines())
        elif _format == "log":
            return LogContent(_format, (tmp_path / "log.txt").read_text().splitlines())
        elif _format == "tensorboard":
            from tensorboard.backend.event_processing.event_accumulator import EventAccumulator

            acc = EventAccumulator(str(tmp_path))
            acc.Reload()

            tb_values_logged = []
            for reservoir in [acc.scalars, acc.tensors, acc.images, acc.histograms, acc.compressed_histograms]:
                for k in reservoir.Keys():
                    tb_values_logged.append(f"{k}: {str(reservoir.Items(k))}")

            content = LogContent(_format, tb_values_logged)
            return content
예제 #6
0
def main(_):
    sns.color_palette()
    fig = plt.figure(figsize=(8, 4))
    ax = fig.gca()
    print(FLAGS.logdirs)
    for logdir in FLAGS.logdirs:
        print(logdir)
        samples = []
        rewards = []
        for seed in range(FLAGS.seeds):
            logdir_ = Path(logdir) / f'seed{seed}'
            logdir_ = logdir_ / 'val'
            event_acc = EventAccumulator(str(logdir_))
            event_acc.Reload()
            _, step_nums, vals = zip(*event_acc.Scalars('val-mean_reward'))
            samples.append(step_nums)
            rewards.append(vals)
        print(samples)
        assert (np.all(samples == samples[:1, :]))
        rewards = np.array(rewards)
        mean_rewards = np.mean(rewards, 0)
        std_rewards = np.std(rewards, 0)
        ax.plot(samples[0, :], mean_rewards, label=logdir)
        ax.fill_between(samples[0, :],
                        mean_rewards - std_rewards,
                        mean_rewards + std_rewards,
                        alpha=0.2)

    ax.legend(loc=4)
    ax.set_ylim([0, 210])
    ax.grid('major')
    fig.savefig(FLAGS.output_file_name, bbox_inches='tight')
예제 #7
0
def plot_tensorflow_log(path):

    # Loading too much data is slow...
    tf_size_guidance = {
        'compressedHistograms': 10,
        'images': 0,
        'scalars': 100,
        'histograms': 1
    }

    event_acc = EventAccumulator(path, tf_size_guidance)
    event_acc.Reload()

    # Show all tags in the log file
    #print(event_acc.Tags())

    training_loss = event_acc.Scalars("Loss/train")
    validation_loss = event_acc.Scalars("Loss/validation")

    training_loss = [elem[2] for elem in training_loss]
    validation_loss = [elem[2] for elem in validation_loss]

    plt.plot(training_loss, label='training loss')
    plt.plot(validation_loss, label='validation accuracy')

    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Training Progress")
    plt.legend(loc='upper right', frameon=True)
    plt.show()
def sum_log(path: str):
    """Extract values from path to log file"""
    default_size_guidance = {
        'compressedHistograms': 1,
        'images': 1,
        'scalars': 0,  # 0 means load all
        'histograms': 1
    }
    runlog = pd.DataFrame({"metric": [], "value": [], "step": []})

    event_acc = EventAccumulator(path, default_size_guidance)
    event_acc.Reload()
    tags = event_acc.Tags()["scalars"]
    for tag in tags:
        event_list = event_acc.Scalars(tag)
        values = list(map(lambda x: x.value, event_list))
        step = list(map(lambda x: x.step, event_list))
        r = {"metric": [tag] * len(step), "value": values, "step": step}
        r = pd.DataFrame(r)
        runlog = pd.concat([runlog, r])

    # Extract additional tags from path
    description, run_type = path.split('/')[-3:-1]
    embedding = description.split('_')
    embedding, architecture, fold = embedding[0], embedding[1], embedding[-1]

    runlog['embedding'] = embedding
    runlog['architecture'] = architecture
    runlog['fold'] = fold
    runlog['run_type'] = run_type

    return runlog
예제 #9
0
    def run(self):
        df_list = []
        for directory in self.configuration.log_folders:
            list_log_folders = os.listdir(directory)
            dir_name = os.path.basename(os.path.dirname(directory))
            list_log_folders = [
                d for d in list_log_folders
                if os.path.isdir(os.path.join(directory, d))
            ]

            for tb_output_folder in list_log_folders:
                print("working on:", tb_output_folder)
                x = EventAccumulator(
                    path=os.path.join(directory, tb_output_folder))
                x.Reload()
                x.FirstEventTimestamp()
                keys = self.configuration.properties
                print_out_dict = {}

                for i in range(len(keys)):
                    print_out_dict.update({
                        f"{dir_name}_{keys[i]}_{tb_output_folder}":
                        [e.value for e in x.Scalars(keys[i])]
                    })

                df = pd.DataFrame(data=print_out_dict)
                df_list.append(df)

        complete_df = pd.concat(df_list, axis=1)
        complete_df.to_csv(
            os.path.join(self.configuration.output_path,
                         self.configuration.output_name))
예제 #10
0
def plot_tensorflow_log(path):

    # Loading too much data is slow...
    tf_size_guidance = {
        "compressedHistograms": 10,
        "images": 0,
        "scalars": 100,
        "histograms": 1
    }

    event_acc = EventAccumulator(path, tf_size_guidance)
    event_acc.Reload()

    # Show all tags in the log file
    # print(event_acc.Tags())

    epoch_loss = event_acc.Scalars("epoch_loss")
    epoch_accuracy = event_acc.Scalars("epoch_accuracy")

    steps = 10
    x = np.arange(steps)
    y = np.zeros([steps, 2])

    for i in range(steps):
        y[i, 0] = epoch_loss[i][2]
        y[i, 1] = epoch_accuracy[i][2]

    plt.plot(x, y[:, 0], label="Loss")
    plt.plot(x, y[:, 1], label="Accuracy")

    plt.xlabel("Steps")
    plt.ylabel("Accuracy and Loss")
    plt.title("Training Progress")
    plt.legend(loc="upper right", frameon=True)
    plt.show()
예제 #11
0
파일: print_tf_log.py 프로젝트: lhfowl/gan
def get_tensorflow_log(path, scores={}, score_names=[]):
    """Print every event that has 'eval' in  csv format to the screen.
    """

    # Loading too much data is slow...
    tf_size_guidance = {
        'compressedHistograms': 0,
        'images': 0,
        'scalars': 1000,
        'histograms': 0
    }

    event_acc = EventAccumulator(path, tf_size_guidance)
    event_acc.Reload()

    tags = event_acc.Tags()

    event_tags = tags['scalars'] if 'scalars' in tags else []
    event_tags = [et for et in event_tags if 'eval' in et]

    for event_tag in event_tags:
        score_name = event_tag.split('/')[-1]
        if score_name not in score_names:
            score_names.append(score_name)

        events = event_acc.Scalars(event_tag)
        for event in events:
            if event.step not in scores:
                scores[event.step] = {}
            scores[event.step][score_name] = event.value

    return scores, score_names
예제 #12
0
def _get_event_accumulator(
    log_dir: Union[str, Path], size_guidance: Dict = DEFAULT_SIZE_GUIDANCE
) -> EventAccumulator:
    """Returns the tensorboard EventAccumulator instance for a log dir."""
    event_acc = EventAccumulator(str(log_dir), size_guidance=size_guidance)
    event_acc.Reload()
    return event_acc
예제 #13
0
def get_event(event_path):
    event_acc = EventAccumulator(event_path)
    event_acc.Reload()
    # print(event_acc.Tags()['scalars'])
    # print(event_acc.Scalars('metrics'))

    w_times, step_nums, vals = zip(*event_acc.Scalars('metrics'))
    print(step_nums, vals)
    # print(len(step_nums))

    x = list()
    y = list()
    # for i in range(0, 50, 1):
    for i in range(0, len(step_nums), 1):
        # x.append(i)
        x.append(step_nums[i])
        y.append(vals[i])
    # json_dict = {
    #     'x': x,
    #     'y': y
    # }
    # with open('figs/fig_loss.json', 'w') as f:
    #     json.dump(json_dict, f, indent=4)
    # plt.figure()
    # plt.plot(x, y)
    # plt.savefig('figs/fig.jpg')
    return x, y
예제 #14
0
def dataframe(path,
              block=[],
              reservoirs=[SCALAR_RESERVOIR, TENSOR_RESERVOIR],
              everything=False,
              metric=False):
    if everything:
        size_guidance = STORE_EVERYTHING_SIZE_GUIDANCE
    else:
        size_guidance = DEFAULT_SIZE_GUIDANCE

    event_acc = EventAccumulator(str(path), size_guidance=size_guidance)
    event_acc.Reload()

    frames = [
        extract(event_acc, **res, block_list=block) for res in reservoirs
    ]
    frames = list(filter(lambda x: not x.empty, frames))

    if frames:
        try:
            if metric:
                index_filter = lambda x: x.filter(items=[-1], axis=0)
            else:
                index_filter = lambda x: x.filter(
                    items=range(0, int(x.index.max())), axis=0)

            frames = list(map(index_filter, frames))
        except ValueError as err:
            print(err)
            print("Requires manual fix at {}".format(path))

        return functools.reduce(lambda a, b: a.join(b, how="outer", sort=True),
                                frames)
    else:
        return pd.DataFrame()
예제 #15
0
def extract_data(d, category, name):

    # Grab all of the accuracy results for each model and put into Pandas dataframe
    event_acc = EventAccumulator(d)
    event_acc.Reload()
    # Show all tags in the log file
    print(event_acc.Tags())

    s = event_acc.Scalars(
        'PASCAL/PerformanceByCategory/[email protected]/{0}'.format(category))
    df = pd.DataFrame(s)
    if df.empty:
        raise ('No {0} data available in {1}'.format(category, d))

    time_start = df.wall_time[0]

    # convert wall time and value to rounded values
    df['wall_time'] = df['wall_time'].apply(wallToGPUTime, args=(time_start, ))
    df['value'] = df['value'].apply(valueTomAP)

    # rename columns
    df.columns = ['GPU Time', 'step', 'Overall mAP']
    df['model'] = np.full(len(df), name)
    #print(df)
    return df
예제 #16
0
def _accumulate_logs(subject, exp_id):
    # extract name of logfile
    stats_folder = os.path.join(PROBLEM, EXP_FOLDER.format(exp_id), "stats")
    log_files = os.listdir(stats_folder)
    assert(len(log_files) == 1)
    log_file = os.path.join(stats_folder, log_files[0])

    # get eventaccumulator
    ea = EventAccumulator(log_file)
    ea.Reload()

    # load data file
    name_addon = f"data_S{subject:02}"
    data_file = os.path.join(PROBLEM, EXPORT_FILE.format(name_addon))
    if os.path.exists(data_file):
        with np.load(data_file) as data_loader:
            data = dict(data_loader)
    else:
        data = {'num_trials': 0}

    # update the data dictionary to keep the mean value
    num_trials = data['num_trials']
    for key in ea.Tags()['scalars']:
        new_arr = _prepare_scalar_array_from_tensorboard(ea, key)
        if num_trials == 0:
            # just add the data
            data[key] = new_arr
        else:
            assert(key in data)
            data[key] = (data[key] * num_trials + new_arr) / (num_trials + 1)
    data['num_trials'] += 1

    # store data back into the same file
    np.savez(data_file, **data)
예제 #17
0
파일: utils.py 프로젝트: wtaylor17/VPRNN
def load_tensorboard_scalar(tb_path, scalar_name, extract_values=True):
    event_acc = EventAccumulator(tb_path)
    event_acc.Reload()
    tag_array = event_acc.Scalars(scalar_name)
    if extract_values:
        return [obj.value for obj in tag_array]
    return tag_array
예제 #18
0
def gatherData(path, trainlax, trainAax, vallax, valAax, name, alpha):
    event_acc = EventAccumulator(path)
    event_acc.Reload()
    # Show all tags in the log file
    print(event_acc.Tags()['scalars'])
    if len(event_acc.Tags()['scalars']) == 0:
        return

    # E. g. get wall clock, number of steps and value for a scalar 'Accuracy'
    _, trainlstp, trainLoss = zip(*event_acc.Scalars('train_loss'))
    _, trainvstp, trainAcc = zip(*event_acc.Scalars('train_acc'))
    _, vallstp, valLoss = zip(*event_acc.Scalars('val_loss'))
    _, trainvstp, valAcc = zip(*event_acc.Scalars('val_acc'))
    trainLoss = np.array(trainLoss)
    trainAcc = np.array(trainAcc)
    valAcc = np.array(valAcc)
    valLoss = np.array(valLoss)
    trainLoss = numpy_ewma_vectorized_v2(trainLoss, alpha)
    trainLoss = numpy_ewma_vectorized_v2(trainAcc, alpha)
    valAcc = numpy_ewma_vectorized_v2(valAcc, alpha)
    valLoss = numpy_ewma_vectorized_v2(valLoss, alpha)
    trainlax.plot(trainlstp, trainLoss, label=name)
    trainAax.plot(trainvstp, trainAcc, label=name)
    vallax.plot(vallstp, valLoss, label=name)
    valAax.plot(trainvstp, valAcc, label=name)
예제 #19
0
def load_event_scalars(log_path):
    feature = log_path.split(os.sep)[-1]
    print(f"Processing logfile: {os.path.abspath(log_path)}")
    if feature.find("_") != -1:
        feature = feature.split("_")[-1]
    df = pd.DataFrame()
    try:
        event_acc = EventAccumulator(log_path, DEFAULT_SIZE_GUIDANCE)
        event_acc.Reload()
        tags = event_acc.Tags()["scalars"]
        env_list = event_acc.Scalars
        use_tensorflow = False
        if not tags:
            tags = event_acc.Tags()["tensors"]
            env_list = event_acc.tensors.Items
            use_tensorflow = True
        for tag in tags:
            event_list = env_list(tag)
            if use_tensorflow:
                values = list(
                    map(lambda x: float(tf.make_ndarray(x.tensor_proto)),
                        event_list))
                step = list(map(lambda x: x.step, event_list))
                df[tag] = values
            else:
                values = list(map(lambda x: x.value, event_list))
                step = list(map(lambda x: x.step, event_list))
                df = pd.DataFrame({feature: values}, index=step)
    # Dirty catch of DataLossError
    except:
        print("Event file possibly corrupt: {}".format(
            os.path.abspath(log_path)))
        traceback.print_exc()
    return df
예제 #20
0
 def read_distros(self, prefix, *args):
     if prefix[-1] != '/': prefix += '/'
     for arg in args:
         assert isinstance(arg, str),\
           "Scalar names must be strings, found: {}".format(arg)
     subdirs = list(self.ret_checkpoints().keys())
     data = collections.OrderedDict()
     for subdir in subdirs:
         evts_path = os.path.join(self.directory, subdir,
                                  self.evts_stem + '*')
         evts_file = glob.glob(evts_path)
         if not evts_file: continue
         assert len(
             evts_file) == 1, "Multiple events file found: {}".format(
                 evts_file)
         evts_file = evts_file[0]
         print("Reading: {}".format(evts_file))
         EvtAcc = EventAccumulator(evts_file,
                                   size_guidance={'histograms': 0})
         EvtAcc.Reload()
         data.update({subdir: collections.OrderedDict()})
         for arg in args:
             data[subdir].update({arg: collections.OrderedDict()})
             wall_time, global_step, histogram = zip(
                 *EvtAcc.Histograms(prefix + arg))
             distros = [hist[5:] for hist in histogram]
             scalar_dict = collections.OrderedDict()
             data[subdir][arg].update({'wall_time': wall_time})
             data[subdir][arg].update({'global_step': global_step})
             data[subdir][arg].update({'histogram': histogram})
             data[subdir][arg].update({'distros': distros})
     return data
예제 #21
0
    def get_tensorboard_scalar(self, scalar_name):
        path_template = os.path.join(self.tensorboard_path, 'PPO_%i')
        run_idx = 0
        wall_times, timesteps, scalar_values = [], [], []
        while os.path.exists(path_template % run_idx):
            event_accumulator = EventAccumulator(path_template % run_idx)
            event_accumulator.Reload()

            new_wall_times, new_timesteps, new_scalar_values = zip(
                *event_accumulator.Scalars(scalar_name))

            # To chain multiple runs together, the time inbetween has to be left out
            prev_run_wall_time = 0 if len(wall_times) == 0 else wall_times[-1]
            # Iterations have to be continuous even when having multiple runs
            prev_run_timesteps = 0 if len(timesteps) == 0 else timesteps[-1]

            wall_times += [
                prev_run_wall_time + wt - new_wall_times[0]
                for wt in new_wall_times
            ]
            timesteps += [
                prev_run_timesteps + it - new_timesteps[0]
                for it in new_timesteps
            ]
            scalar_values += new_scalar_values

            run_idx += 1

        return wall_times, timesteps, scalar_values
예제 #22
0
def plot_series(path, ax, label):
    ea = EventAccumulator(path)
    ea.Reload()

    avg_ss = ea.Scalars(args.avg_series)
    try:
        std_ss = ea.Scalars(args.std_series)
        assert len(avg_ss) == len(std_ss), (avg_ss, std_ss)
    except KeyError:
        std_ss = None

    data = {
        "steps": [t[1] for t in avg_ss],
        "avg": [t[2] for t in avg_ss],
    }
    if std_ss is not None:
        data["std"] = [t[2] for t in std_ss]

    df = pd.DataFrame(data)
    # Add a duplicate row to preserve the last data marker
    df.loc[len(df.index)] = df.loc[len(df.index) - 1]
    ax.errorbar(
        x=df["steps"],
        y=df["avg"],
        yerr=df["std"] if "std" in df.columns else None,
        elinewidth=1,
        capsize=4,
        linewidth=2,
        marker="o",
        markersize=6,
        markevery=args.markevery,
        errorevery=args.markevery,
        label=label,
    )
예제 #23
0
파일: data.py 프로젝트: trurebel/DeepRole
def load_tensorboard_data(filename):
    logging.disable(logging.CRITICAL)
    event_acc = EventAccumulator(filename)
    event_acc.Reload()
    _, _, loss = zip(*event_acc.Scalars('epoch_val_loss'))
    logging.disable(logging.NOTSET)
    return loss
def get_param_num(results_dir):
    tensorboard_dir = results_dir + "/../"
    event_acc = EventAccumulator(tensorboard_dir)
    event_acc.Reload()
    tensor_event = event_acc.Tensors("model_info")
    tensor_np = tf.make_ndarray(tensor_event[0].tensor_proto)
    return int(tensor_np[0, 1])
예제 #25
0
파일: loggers.py 프로젝트: kaylode/uda
def tflog2pandas(path: str) -> pd.DataFrame:
    """convert single tensorflow log file to pandas DataFrame
    Parameters
    ----------
    path : str
        path to tensorflow log file
    Returns
    -------
    pd.DataFrame
        converted dataframe
    """
    DEFAULT_SIZE_GUIDANCE = {
        "compressedHistograms": 1,
        "images": 1,
        "scalars": 0,  # 0 means load all
        "histograms": 1,
    }
    runlog_data = pd.DataFrame({"metric": [], "value": [], "step": []})
    try:
        event_acc = EventAccumulator(path, DEFAULT_SIZE_GUIDANCE)
        event_acc.Reload()
        tags = event_acc.Tags()["scalars"]
        # tags = event_acc.Tags()["images"]
        for tag in tags:
            event_list = event_acc.Scalars(tag)
            values = list(map(lambda x: x.value, event_list))
            step = list(map(lambda x: x.step, event_list))
            r = {"metric": [tag] * len(step), "value": values, "step": step}
            r = pd.DataFrame(r)
            runlog_data = pd.concat([runlog_data, r])
    # Dirty catch of DataLossError
    except Exception:
        print("Event file possibly corrupt: {}".format(path))
        traceback.print_exc()
    return runlog_data
예제 #26
0
class TensorBoardAnalytics(Analytics):
    def __init__(self, log_path):
        self.event_acc = EventAccumulator(str(log_path),
                                          size_guidance={'tensors': 1000})

    def load(self):
        self.event_acc.Reload()

    def tensor(self, name) -> List[Event]:
        name = name.replace('.', '/')
        events = self.event_acc.Tensors(name)
        return [Event(e.step, tf.make_ndarray(e.tensor_proto)) for e in events]

    def summarize(self, events: List[Event]):
        step = np.mean([e.step for e in events])
        values = np.sort([e.tensor for e in events])
        basis_points = np.percentile(values, BASIS_POINTS)

        return np.concatenate(([step], basis_points))

    def summarize_compressed_histogram(self, events: List[Event]):
        basis_points = [int(b) for b in np.multiply(BASIS_POINTS, 100)]
        results = []
        for e in events:
            buckets = compressor.compress_histogram(e.tensor)
            assert (len(buckets) == len(basis_points))
            for i, c in enumerate(buckets):
                assert (c.basis_point == basis_points[i])
            results.append([e.step] + [c.value for c in buckets])

        return np.asarray(results)
예제 #27
0
def output(tensorboard_dir, output_dir, metrics_keys, steps, output_file_base="metrics"):
    """Output csv and markdown file which accumulated tensorflow event by step and metrics_keys."""
    subdirs = GetLogdirSubdirectories(tensorboard_dir)

    event_accumulators = []
    for subdir in subdirs:
        event_accumulator = EventAccumulator(subdir)
        # init event accumulator
        event_accumulator.Reload()

        event_accumulators.append(event_accumulator)

    if not metrics_keys:
        metrics_keys = {
            metrics_key
            for event_accumulator in event_accumulators
            for metrics_key in _get_metrics_keys(event_accumulator)
        }

    columns = [_column_name(event_accumulator, metrics_key)
               for event_accumulator, metrics_key in itertools.product(event_accumulators, metrics_keys)]
    columns.sort()
    df = pd.DataFrame([], columns=columns)

    for event_accumulator in event_accumulators:
        for metrics_key in metrics_keys:
            value_step_list = _value_step_list(event_accumulator, metrics_key)
            for value, step in value_step_list:
                column_name = _column_name(event_accumulator, metrics_key)
                df.loc[step, column_name] = value

    if steps:
        df = df[steps, :]

    df = df.sort_index(ascending=False)

    # index to column. and re-order column.
    df["step"] = df.index
    df = df[["step"] + columns]

    output_csv = os.path.join(output_dir, "{}.csv".format(output_file_base))
    df.to_csv(output_csv, index=False)

    output_md = os.path.join(output_dir, "{}.md".format(output_file_base))
    writer = pytablewriter.MarkdownTableWriter()
    writer.char_left_side_row = "|"  # fix for github
    writer.from_dataframe(df)

    with open(output_md, "w") as file_stream:
        writer.stream = file_stream
        writer.write_table()

    message = """
output success

output csv: {}
output md: {}
""".format(output_csv, output_md)

    print(message)
def save_plot(event_file, tags, output_dir):
    """从event_file读取Tensorboard数据,并将其保存成图标
    """
    if not os.path.isfile(event_file):
        print(f'{event_file} 不存在')
        return
    os.makedirs(output_dir, exist_ok=True)
    tf_size_guidance = {
        'compressedHistograms': 10,
        'images': 1,
        'scalars': 100,
        'histograms': 1
    }
    event_acc = EventAccumulator(event_file, tf_size_guidance)
    event_acc.Reload()
    for tag in tags:
        try:
            event = event_acc.Scalars(tag)
        except KeyError:
            print(f'Tag {tag} does not exist!')
            continue
        steps = []
        data = []
        for event in event:
            steps.append(event[1])
            data.append(event[2])
        seaborn.set()
        plt.clf()
        plt.rcParams['figure.figsize'] = (16, 8)
        plt.plot(steps, data)
        plt.xlabel('steps')
        plt.ylabel(f'{tag}')
        plt.savefig(os.path.join(output_dir, f"{tag.replace('/', '_')}.png"))
def return_tensorborad_data(path, scalar_name):
    # Loading too much data is slow...
    tf_size_guidance = {
        'compressedHistograms': 10,
        'images': 0,
        'scalars': 100,
        'histograms': 1
    }

    event_acc = EventAccumulator(path, tf_size_guidance)
    event_acc.Reload()

    # Show all tags in the log file
    #print(event_acc.Tags())

    training_loss = event_acc.Scalars(scalar_name)
    validation_loss = event_acc.Scalars('val_' + scalar_name)

    steps = len(training_loss)
    x = np.arange(steps)
    y = np.zeros([steps, 2])

    for i in list(range(steps)):
        y[i, 0] = training_loss[i][2]  # value
        y[i, 1] = validation_loss[i][2]

    return x, y
예제 #30
0
def plot_func(dirs, color, label, marker):
    threshold_list = list()
    for dir in dirs:
        print("current dir: ", dir)
        event = EventAccumulator(dir)
        event.Reload()
        print("event.scalars.Keys(): ", event.scalars.Keys())
        # threshold
        if (label == "ddpg"):
            threshold = event.scalars.Items(ddpg_key_name)
        elif (label == "td3"):
            threshold = event.scalars.Items(td3_key_name)
        elif (label == "sac"):
            threshold = event.scalars.Items(sac_key_name)

        threshold_len = len(threshold)
        tmp_threshold = np.zeros(threshold_len)
        if (not smooth):
            smooth_index = 1
        tmp_threshold = smooth_func(threshold, tmp_threshold)
        threshold_list.append(tmp_threshold)

    # minimum threshold length of all thresholds from one T and different seeds:
    min_threshold_len = min(
        [len(threshold_list[i]) for i in range(len(threshold_list))])
    thresholds = np.zeros(0)
    for i in range(0, len(threshold_list)):
        thresholds = np.concatenate(
            (thresholds, threshold_list[i][0:min_threshold_len]), axis=0)
    thresholds = thresholds.reshape((len(threshold_list), min_threshold_len))
    mean_std_fillcolor_plot(thresholds, color, label, marker)