Esempio n. 1
0
def test_interp_curves():
    # Make some inconsistent data
    x1 = [4, 5, 7, 13, 20]
    y1 = [0.25, 0.22, 0.53, 0.37, 0.55]
    x2 = [2, 4, 6, 7, 9, 11, 15]
    y2 = [0.03, 0.12, 0.4, 0.2, 0.18, 0.32, 0.39]
    new_x, ys = interp_curves([x1, x2], [y1, y2])
    
    assert isinstance(new_x, (list, np.ndarray))
    assert isinstance(ys, (list, np.ndarray))
    assert len(new_x) == 10
    assert len(ys) == 2
    assert len(ys[0]) == 10
    assert len(ys[1]) == 10
    assert min(new_x) == 2
    assert max(new_x) == 20
    assert min(ys[0]) > 0 and min(ys[1]) > 0
    assert max(ys[0]) < 0.6 and max(ys[1]) < 0.6
Esempio n. 2
0
def read_xy(log_folder,
            file_name,
            get_x,
            get_y,
            smooth_out=False,
            smooth_kws=None,
            point_step=1):
    glob_dir = lambda x: [
        p for p in x.glob('*/') if p.is_dir() and str(p.name).isdigit()
    ]
    dfs = []
    for id_folder in glob_dir(Path(log_folder)):
        x = []
        y = []
        for seed_folder in glob_dir(id_folder):
            logs = pickle_load(seed_folder / file_name)
            x.append([get_x(log) for log in logs])
            y.append([get_y(log) for log in logs])
        new_x, ys = interp_curves(x, y)  # all seeds share same x values

        if smooth_out:
            if smooth_kws is None:
                smooth_kws = {'window_length': 51, 'polyorder': 3}
            ys = [smooth_filter(y, **smooth_kws) for y in ys]

        if point_step > 1:
            idx = np.arange(0, new_x.size, step=point_step)
            new_x = new_x[idx, ...]
            ys = [y[idx, ...] for y in ys]

        df = pd.DataFrame({'x': np.tile(new_x, len(ys)), 'y': np.hstack(ys)})
        config = yaml_load(id_folder / 'config.yml')
        config = pd.DataFrame([config.values()], columns=config.keys())
        config = config.applymap(lambda x: tuple(x)
                                 if isinstance(x, list) else x)
        df = pd.concat([df, config], axis=1, ignore_index=False)
        df = df.fillna(method='pad')  # padding all NaN configs
        dfs.append(df)
    dfs = pd.concat(dfs, axis=0, ignore_index=True)
    return dfs