예제 #1
0
def baseline(ax, exp_path, values, bounds):
    exp = loadExperiment(path)
    results = loadResults(exp, errorfile)

    if 'Regh' in exp.agent:
        results = whereParameterEquals(results, 'reg_h', 0.8)
        results = whereParameterEquals(results, 'ratio', 1)
    elif 'TDC' in exp.agent or 'GTD2' in exp.agent:
        results = whereParameterEquals(results, 'ratio', 1)

    if bestBy == 'end':
        metric = lambda m: np.mean(m[-int(m.shape[0] * .1):])
        best = getBestEnd(results)
    elif bestBy == 'auc':
        metric = np.mean
        best = getBest(results)

    color = colors[exp.agent]
    label = exp.agent

    m = metric(best.mean())
    low = min(values)
    high = max(values)
    ax.hlines(m, low, high, color=color, label=label, linewidth=4, linestyle=':')

    bounds.append((m, m))
예제 #2
0
def generatePlotTTA(ax, exp_path, bounds):
    exp = loadExperiment(exp_path)
    results = loadResults(exp, errorfile)
    const, unconst = tee(results)

    color = colors[exp.agent]
    label = rename(exp.agent)

    if 'ReghTDC' in exp.agent:
        const = whereParameterEquals(const, 'ratio', 1)
        const = whereParameterEquals(const, 'reg_h', 1)

    elif 'TDRCC' in exp.agent:
        const = whereParameterEquals(const, 'ratio', 1)
        # const = whereParameterEquals(const, 'reg_h', 0.8)
        const = whereParameterGreaterEq(const, 'reg_h', 0.01)

    elif 'TDC' in exp.agent:
        const = whereParameterGreaterEq(const, 'ratio', 1)

    if show_unconst:
        b = plotSensitivity(unconst, param, ax, stderr=stderr, color=color, label=label + '_unc', bestBy=bestBy, dashed=True)
        bounds.append(b)

    b = plotSensitivity(const, param, ax, stderr=stderr, color=color, label=label, bestBy=bestBy)
    bounds.append(b)
예제 #3
0
def generatePlotTTA(ax, exp_path, bounds):
    exp = loadExperiment(exp_path)
    results = loadResults(exp, errorfile)
    const, unconst = tee(results)

    color = colors[exp.agent]
    label = exp.agent

    const = whereParameterGreaterEq(const, 'ratio', 1)
    if 'ReghTDC' in label:
        const = whereParameterEquals(const, 'reg_h', 0.8)

    best_const = getBest(const, bestBy=bestBy)
    best_unconst = getBest(unconst, bestBy=bestBy)

    if show_unconst and best_const != best_unconst:
        b = plotBest(best_unconst,
                     ax,
                     window=window,
                     smoothing=smoothing,
                     label=label + '_unc',
                     color=color,
                     alpha=0.2,
                     dashed=True)
        bounds.append(b)

    b = plotBest(best_const,
                 ax,
                 window=window,
                 smoothing=smoothing,
                 label=label,
                 color=color,
                 alpha=0.2,
                 dashed=False)
    bounds.append(b)
def generatePlot(ax, exp_paths, bounds):
    for exp_path in exp_paths:
        exp = loadExperiment(exp_path)
        results = loadResults(exp, errorfile)

        color = colors[exp.agent]
        label = exp.agent

        results = whereParameterEquals(results, 'batch_size', 4)
        results = whereParameterLesserEq(results, 'ratio', 8)
        results = whereParameterLesserEq(results, 'alpha', 0.5)
        results = where(results, lambda r: r.params.get('ratio', 1) * r.params['alpha'] <= 1)

        if 'ReghTDC' in label:
            results = whereParameterEquals(results, 'reg_h', 1)
            results = whereParameterEquals(results, 'ratio', 1)

        elif 'TDRCC' in label:
            results = whereParameterEquals(results, 'reg_h', 0.8)
            results = whereParameterEquals(results, 'ratio', 1)

        elif 'TDC' in label:
            results = whereParameterGreaterEq(results, 'ratio', 1)

        left, right = tee(results)

        best_line = getBest(right).mean()
        best = np.mean(best_line)

        for result in left:
            # print(label, result.params)
            shade = 0.12
            line = result.mean()
            if np.mean(line) == best:
                shade = 1

            plotBest(result, ax, label=label, color=color, alphaMain=shade, dashed=False)

        bounds.append(best_line[0])
예제 #5
0
def getResultsAndBest(exps):
    all_results = []
    best_result = None
    for exp in exps:
        results = loadResults(exp)
        results = whereParameterEquals(results, 'lambda', LAMBDA)

        best = getBest(results)
        all_results.append(best)

        if best_result is None:
            best_result = best
        elif np.mean(best.mean()) < np.mean(best_result.mean()):
            best_result = best

    return all_results, best_result
def generatePlotTTA(ax, exp_paths, bounds):
    for exp_path in exp_paths:
        if 'amsgrad' in exp_path:
            continue

        exp = loadExperiment(exp_path)
        results = loadResults(exp, errorfile)
        stepsizes = loadResults(exp, 'stepsize_summary.npy')
        results = whereParameterEquals(results, 'ratio', 1)

        color = colors[exp.agent]
        label = exp.agent

        best_error = getBest(results)
        best_stepsize = find(stepsizes, best_error)

        b = plotBest(best_stepsize,
                     ax,
                     label=[label + '_w', label + '_h'],
                     color=color,
                     dashed=[False, True])
        bounds.append(b)
예제 #7
0
def generatePlotTTA(ax, exp_paths, bestBy, bounds):
    ax.set_xscale("log", basex=2)
    for exp_path in exp_paths:
        exp = loadExperiment(exp_path)
        results = loadResults(exp, errorfile)
        results = whereParameterEquals(results, 'reg_h', 6.4)

        agent = exp.agent
        if 'SmoothTDC' in agent:
            average = exp._d['metaParameters']['averageType']
            agent += '_' + average

        color = colors[agent]
        label = agent

        b = plotSensitivity(results,
                            'ratio',
                            ax,
                            color=color,
                            label=label,
                            bestBy=bestBy)
        bounds.append(b)
예제 #8
0
                if alg == 'htd' and problem in on_policy_problems:
                    curves[i, j, k] = curves[i, td_idx, k]
                    continue

                if ss == 'constant':
                    exp_paths = glob.glob(
                        f'experiments/stepsizes/{problem}/{alg}/{alg}.json')
                else:
                    exp_paths = glob.glob(
                        f'experiments/stepsizes/{problem}/{alg}/{alg}{ss}.json'
                    )

                exp = loadExperiment(exp_paths[0])

                results = loadResults(exp, errorfile)
                results = whereParameterEquals(results, 'reg_h', 0.8)
                lc, results = tee(results)
                best = getBest(lc)

                x, y, e = getSensitivityData(results,
                                             'ratio',
                                             reducer='slice',
                                             bestBy='auc')

                curve = best.mean()

                curves[i, j, k, :, 0] = np.array(y) / curve[0]
                curves[i, j, k, :, 1] = np.array(e) * np.sqrt(best.runs())

                if j == 0:
                    total_runs += best.runs()
예제 #9
0
    table = np.zeros((len(algorithms), len(problems), 2))

    for i, alg in enumerate(algorithms):
        for j, problem in enumerate(problems):
            exp_path = f'experiments/stepsizes/{problem}/{alg}/{alg}{stepsize}.json'
            try:
                exp = loadExperiment(exp_path)
            except:
                continue

            results = loadResults(exp, errorfile)
            if alg == 'td' or alg == 'vtrace':
                const = results
            else:
                const = whereParameterGreaterEq(results, 'ratio', 1)
                const = whereParameterEquals(const, 'reg_h', 0.8)

            best = getBest(const)
            metric = np.mean

            # best = getBestEnd(const)
            # metric = lambda m: np.mean(m[-(int(len(m))):])

            mean = metric(best.mean())
            stderr = metric(best.stderr())

            table[i, j] = [mean, stderr]

    htd_idx = indexOf(algorithms, 'htd')
    vtrace_idx = indexOf(algorithms, 'vtrace')
    td_idx = indexOf(algorithms, 'td')
예제 #10
0
    table = np.zeros((len(algorithms), len(problems), 2))

    for i, alg in enumerate(algorithms):
        for j, problem in enumerate(problems):
            print(alg, problem)
            exp_path = f'experiments/stepsizes/{problem}/{alg}/{alg}{stepsize}.json'
            try:
                exp = loadExperiment(exp_path)
            except:
                continue

            results = loadResults(exp, errorfile)
            if alg == 'td' or alg == 'vtrace':
                const = results
            else:
                const = whereParameterEquals(results, 'ratio', 1)
                const = whereParameterEquals(const, 'reg_h', 0.8)

            best = getBest(const)
            metric = np.mean

            # best = getBestEnd(const)
            # metric = lambda m: np.mean(m[-(int(len(m))):])

            mean = metric(best.mean())
            stderr = metric(best.stderr())

            table[i, j] = [mean, stderr]

    htd_idx = indexOf(algorithms, 'htd')
    vtrace_idx = indexOf(algorithms, 'vtrace')
예제 #11
0
    td_exp = loadExperiment(f'experiments/reward_scale/{problem}/td/td.json')
    scales = td_exp._d['metaParameters']['reward_scale']

    tdrc_exp = loadExperiment(
        f'experiments/reward_scale/{problem}/regh_tdc/regh_tdc.json')
    betas = tdrc_exp._d['metaParameters']['reg_h']

    mat = np.zeros((len(scales), len(betas)))

    xs = []
    ys = []
    alphas = []
    for i, scale in enumerate(scales):
        td_results = loadResults(td_exp, errorfile)
        td_results = whereParameterEquals(td_results, 'reward_scale', scale)
        best_td = getBest(td_results)

        best_td_mean = np.mean(best_td.mean())
        best_td_std = np.mean(
            np.sqrt(best_td.stderr() * np.sqrt(best_td.runs())))

        tdrc_results = loadResults(tdrc_exp, errorfile)
        tdrc_results = whereParameterEquals(tdrc_results, 'reward_scale',
                                            scale)
        tdrc_split = splitOverParameter(tdrc_results, 'reg_h')

        for j, beta in enumerate(betas):
            results = tdrc_split[beta]
            best_tdrc = getBest(results)
            mean = np.mean(best_tdrc.mean())
예제 #12
0
def generatePlot(ax, exp_paths, ss, problem):
    # load results
    all_performance = {}
    for exp_path in exp_paths:
        try:
            exp = loadExperiment(exp_path)
        except:
            continue

        results = loadResults(exp, errorfile)
        results = whereParameterEquals(results, 'reg_h', 0.8)

        color = colors[exp.agent]
        label = exp.agent

        performance = []
        for r in results:
            curve = r.mean()
            m = np.mean(curve)

            # diverged if result doesn't exist
            if np.isscalar(curve):
                m = np.nan

            # diverged if the end of the curve is higher than the start
            elif curve[0] < curve[curve.shape[0] - 1]:
                m = np.nan

            # diverged if mean is larger than start
            # elif curve[0] < m:
            #     m = np.nan

            performance.append(m)

        performance = np.array(performance)

        all_performance[label] = {'res': performance, 'color': color}

    # find max among all algorithms
    global_max = -np.inf
    for key in all_performance:
        data = all_performance[key]
        performance = data['res']

        local_max = np.nanmax(performance)
        if local_max > global_max:
            global_max = local_max

    # plot results
    x_offset = 0.5
    x_ticks = []
    x_labels = []

    for key in all_performance:
        data = all_performance[key]

        label = key
        performance = data['res']
        color = data['color']

        num_diverged = sum(np.isnan(performance))
        num_total = performance.shape[0]

        diverged_file.write(
            f'ss: {ss}; problem: {problem}; alg: {label}; perc: {num_diverged / num_total}; div: {num_diverged}; total: {num_total}\n'
        )

        performance[np.isnan(performance)] = global_max * DIVERGENCE_MULTIPLIER
        ax.scatter(
            [x_offset] * performance.shape[0] +
            np.random.uniform(-LANE_WIDTH, LANE_WIDTH, performance.shape[0]),
            performance,
            marker='o',
            facecolors='none',
            color=color)

        x_ticks.append(x_offset)
        x_labels.append(label)

        x_offset += ALG_WIDTH + 2 * LANE_WIDTH

    ax.xaxis.set_ticks(x_ticks)
    ax.set_xticklabels(x_labels)