예제 #1
0
 def test_confinterval_array_mean(self):
     self._test((1.528595479208968, 2.4714045207910322),
                labmath.confinterval([1,2,3], conf=0.5, array_mean=2))
     expected_ci = (0.528595479209, 1.47140452079)
     actual_ci = labmath.confinterval([1,2,3], conf=0.5, array_mean=1)
     self._test(expected_ci[0], actual_ci[0], approximate=True)
     self._test(expected_ci[0], actual_ci[0], approximate=True)
예제 #2
0
    def profile(self,
                queue,
                size: int = 16,
                must_validate: bool = False,
                out=sys.stdout,
                metaout=sys.stderr,
                min_num_iterations: int = 10):
        """
        Run kernel and profile runtime.

        Output format (CSV):

            out:      <kernel> <wgsize> <transfer> <runtime> <ci>
            metaout:  <error> <kernel>
        """
        assert (isinstance(queue, cl.CommandQueue))

        if must_validate:
            try:
                self.validate(queue, size)
            except CLDriveException as e:
                print(type(e).__name__, self.name, sep=',', file=metaout)

        P = KernelPayload.create_random(self, size)
        k = partial(self, queue)

        while len(self.runtimes) < min_num_iterations:
            k(P)

        wgsize = int(round(labmath.mean(self.wgsizes)))
        transfer = int(round(labmath.mean(self.transfers)))
        mean = labmath.mean(self.runtimes)
        ci = labmath.confinterval(self.runtimes, array_mean=mean)[1] - mean
        print(self.name,
              wgsize,
              transfer,
              round(mean, 6),
              round(ci, 6),
              sep=',',
              file=out)
예제 #3
0
 def test_confinterval_error_only(self):
     self._test(0.4714045207910322, labmath.confinterval([1,2,3], conf=.5,
                                                         error_only=True),
                approximate=True)
예제 #4
0
 def test_confinterval_normal_dist(self):
     self._test((0.86841426592382809, 3.1315857340761717),
                labmath.confinterval([1,2,3], normal_threshold=1))
예제 #5
0
 def test_confinterval_c50(self):
     self._test((1.528595479208968, 2.4714045207910322),
                labmath.confinterval([1,2,3], conf=0.5))
예제 #6
0
 def test_confinterval_all_same(self):
     self._test((1, 1),
                labmath.confinterval([1,1,1,1,1]))
예제 #7
0
 def test_confinterval_123_array(self):
     self._test((-0.48413771184375287, 4.4841377118437524),
                labmath.confinterval([1,2,3]))
예제 #8
0
 def test_confinterval_single_item_array(self):
     self._test((1, 1), labmath.confinterval([1]))
예제 #9
0
 def test_confinterval_empty_array(self):
     self._test((0, 0), labmath.confinterval([]))
예제 #10
0
def classification(db, output=None, job="xval", **kwargs):
    err_fns = db.err_fns
    base_err_fn = err_fns[0]
    # Get a list of classifiers and result counts.
    query = db.execute(
        "SELECT classifier,Count(*) AS count\n"
        "FROM classification_results\n"
        "WHERE job=? AND err_fn=? AND classifier!='weka.classifiers.rules.ZeroR'\n"
        "GROUP BY classifier",
        (job,base_err_fn)
    )
    results = []

    # Add baseline results.
    baseline = ("4x4")
    correct = db.execute("SELECT Count(*) * 1.0 / 3 FROM classification_results "
                         "WHERE job=? AND actual=?", (job,baseline)).fetchone()[0]
    illegal = 0
    refused = 0
    time = 0
    terr = 0
    speedup = (1, 0)
    perfs = [
        row[1] for row in
        db.execute(
            "SELECT "
            "  DISTINCT runtime_stats.scenario, "
            "  (scenario_stats.oracle_runtime / runtime_stats.mean) * 100 "
            "FROM classification_results "
            "LEFT JOIN runtime_stats "
            "  ON classification_results.scenario=runtime_stats.scenario "
            "LEFT JOIN scenario_stats "
            "  ON classification_results.scenario=scenario_stats.scenario "
            "WHERE job=? and runtime_stats.params=?",
            (job, baseline)
        )
    ]
    perf = (labmath.mean(perfs), labmath.confinterval(perfs, error_only=True))
    results.append(["ZeroR", correct, illegal, refused, time, terr,
                    speedup, speedup, speedup,
                    perf, perf, perf])

    # Get results
    for classifier,count in query:
        basename = ml.classifier_basename(classifier)
        correct, illegal, refused, time, terr = db.execute(
            "SELECT\n"
            "    (SUM(correct) / CAST(? AS FLOAT)) * 100,\n"
            "    (SUM(illegal) / CAST(? AS FLOAT)) * 100,\n"
            "    (SUM(refused) / CAST(? AS FLOAT)) * 100,\n"
            "    AVG(time) + 2.5,\n"
            "    CONFERROR(time, .95) * 1.5\n"
            "FROM classification_results\n"
            "WHERE job=? AND classifier=? AND err_fn=?",
            (count, count, count, job, classifier, base_err_fn)
        ).fetchone()
        # Get a list of mean speedups for each err_fn.
        speedups = [
            db.execute(
                "SELECT\n"
                "    AVG(speedup),\n"
                "    CONFERROR(speedup, .95)\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?",
                (job, classifier, err_fn)
            ).fetchone()
            for err_fn in err_fns
        ]
        # Get a list of mean perfs for each err_fn.
        perfs = [
            db.execute(
                "SELECT\n"
                "    AVG(performance) * 100.0,\n"
                "    CONFERROR(performance, .95) * 100.0\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?",
                (job, classifier, err_fn)
            ).fetchone()
            for err_fn in err_fns
        ]

        results.append([basename, correct, illegal, refused, time, terr] + speedups + perfs)

    # Zip into lists.
    labels, correct, illegal, refused, time, terr = zip(*[
        (text.truncate(result[0], 40), result[1], result[2],
         result[3], result[4], result[5])
        for result in results
    ])

    X = np.arange(len(labels))

    # PLOT TIMES
    width = .8
    ax = plt.subplot(4, 1, 1)
    ax.bar(X + .1, time, width=width)
    ax.set_xticks(X + .4)
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 10)
    ax.set_ylabel("Classification time (ms)")
    # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _,caps,_ = ax.errorbar(X + .5, time,
                           fmt="none", yerr=terr, capsize=3, ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    # RATIOS
    width = (.8 / 3)
    ax = plt.subplot(4, 1, 2)
    ax.bar(X + .1, illegal, width=width,
           color=sns.color_palette("Reds", 1), label="Illegal")
    ax.bar(X + .1 + width, refused, width=width,
           color=sns.color_palette("Oranges", 1), label="Refused")
    ax.bar(X + .1 + 2 * width, correct, width=width,
           color=sns.color_palette("Blues", 1), label="Accurate")
    ax.set_xticks(X + .4)
    ax.set_ylabel("Ratio")
    ax.set_ylim(0, 35)
    ax.set_xticklabels(labels)
    ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]

    # Plot speedups.
    ax = plt.subplot(4, 1, 3)
    width = (.8 / 3)
    colors=sns.color_palette("Greens", len(err_fns))
    for i,err_fn in enumerate(db.err_fns):
        pairs = [result[6 + i] for result in results]
        speedups, yerrs = zip(*pairs)
        ax.bar(X + .1 + (i * width), speedups, width=width,
               label=errfn2label(err_fn), color=colors[i])

        # Plot confidence intervals separately so that we can have
        # full control over formatting.
        _,caps,_ = ax.errorbar(X + .1 + (i + .5) * width, speedups,
                               fmt="none", yerr=yerrs, capsize=3, ecolor="k")
        for cap in caps:
            cap.set_color('k')
            cap.set_markeredgewidth(1)
    ax.set_xticks(X + .4)
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 7)
    ax.set_xticks(X + .4, labels)
    ax.set_ylabel("Speedup")
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]

    # PERFORMANCE
    colors=sns.color_palette("Blues", len(err_fns))
    width = (.8 / 3)
    ax = plt.subplot(4, 1, 4)
    for i,err_fn in enumerate(db.err_fns):
        pairs = [result[9 + i] for result in results]
        perfs, yerrs = zip(*pairs)
        ax.bar(X + .1 + (i * width), perfs, width=width,
               label=errfn2label(err_fn), color=colors[i])

        # Plot confidence intervals separately so that we can have
        # full control over formatting.
        _,caps,_ = ax.errorbar(X + .1 + (i + .5) * width, perfs,
                               fmt="none", yerr=yerrs, capsize=3, ecolor="k")
        for cap in caps:
            cap.set_color('k')
            cap.set_markeredgewidth(1)
    ax.set_xticks(X + .4)
    ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 100)
    ax.set_ylabel("Performance")
    ax.set_xticks(X + .4, labels)


    title = kwargs.pop("title", "Classification results for " + job)
    plt.title(title)

    # Add legend *beneath* plot. To do this, we need to pass some
    # extra arguments to plt.savefig(). See:
    #
    # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html
    #
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)]
    viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)
예제 #11
0
def classification(db, output=None, job="xval", **kwargs):
    err_fns = db.err_fns
    base_err_fn = err_fns[0]
    # Get a list of classifiers and result counts.
    query = db.execute(
        "SELECT classifier,Count(*) AS count\n"
        "FROM classification_results\n"
        "WHERE job=? AND err_fn=? AND classifier!='weka.classifiers.rules.ZeroR'\n"
        "GROUP BY classifier", (job, base_err_fn))
    results = []

    # Add baseline results.
    baseline = ("4x4")
    correct = db.execute(
        "SELECT Count(*) * 1.0 / 3 FROM classification_results "
        "WHERE job=? AND actual=?", (job, baseline)).fetchone()[0]
    illegal = 0
    refused = 0
    time = 0
    terr = 0
    speedup = (1, 0)
    perfs = [
        row[1] for row in db.execute(
            "SELECT "
            "  DISTINCT runtime_stats.scenario, "
            "  (scenario_stats.oracle_runtime / runtime_stats.mean) * 100 "
            "FROM classification_results "
            "LEFT JOIN runtime_stats "
            "  ON classification_results.scenario=runtime_stats.scenario "
            "LEFT JOIN scenario_stats "
            "  ON classification_results.scenario=scenario_stats.scenario "
            "WHERE job=? and runtime_stats.params=?", (job, baseline))
    ]
    perf = (labmath.mean(perfs), labmath.confinterval(perfs, error_only=True))
    results.append([
        "ZeroR", correct, illegal, refused, time, terr, speedup, speedup,
        speedup, perf, perf, perf
    ])

    # Get results
    for classifier, count in query:
        basename = ml.classifier_basename(classifier)
        correct, illegal, refused, time, terr = db.execute(
            "SELECT\n"
            "    (SUM(correct) / CAST(? AS FLOAT)) * 100,\n"
            "    (SUM(illegal) / CAST(? AS FLOAT)) * 100,\n"
            "    (SUM(refused) / CAST(? AS FLOAT)) * 100,\n"
            "    AVG(time) + 2.5,\n"
            "    CONFERROR(time, .95) * 1.5\n"
            "FROM classification_results\n"
            "WHERE job=? AND classifier=? AND err_fn=?",
            (count, count, count, job, classifier, base_err_fn)).fetchone()
        # Get a list of mean speedups for each err_fn.
        speedups = [
            db.execute(
                "SELECT\n"
                "    AVG(speedup),\n"
                "    CONFERROR(speedup, .95)\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?",
                (job, classifier, err_fn)).fetchone() for err_fn in err_fns
        ]
        # Get a list of mean perfs for each err_fn.
        perfs = [
            db.execute(
                "SELECT\n"
                "    AVG(performance) * 100.0,\n"
                "    CONFERROR(performance, .95) * 100.0\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?",
                (job, classifier, err_fn)).fetchone() for err_fn in err_fns
        ]

        results.append([basename, correct, illegal, refused, time, terr] +
                       speedups + perfs)

    # Zip into lists.
    labels, correct, illegal, refused, time, terr = zip(
        *[(text.truncate(result[0], 40), result[1], result[2], result[3],
           result[4], result[5]) for result in results])

    X = np.arange(len(labels))

    # PLOT TIMES
    width = .8
    ax = plt.subplot(4, 1, 1)
    ax.bar(X + .1, time, width=width)
    ax.set_xticks(X + .4)
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 10)
    ax.set_ylabel("Classification time (ms)")
    # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]
    # Plot confidence intervals separately so that we can have
    # full control over formatting.
    _, caps, _ = ax.errorbar(X + .5,
                             time,
                             fmt="none",
                             yerr=terr,
                             capsize=3,
                             ecolor="k")
    for cap in caps:
        cap.set_color('k')
        cap.set_markeredgewidth(1)

    # RATIOS
    width = (.8 / 3)
    ax = plt.subplot(4, 1, 2)
    ax.bar(X + .1,
           illegal,
           width=width,
           color=sns.color_palette("Reds", 1),
           label="Illegal")
    ax.bar(X + .1 + width,
           refused,
           width=width,
           color=sns.color_palette("Oranges", 1),
           label="Refused")
    ax.bar(X + .1 + 2 * width,
           correct,
           width=width,
           color=sns.color_palette("Blues", 1),
           label="Accurate")
    ax.set_xticks(X + .4)
    ax.set_ylabel("Ratio")
    ax.set_ylim(0, 35)
    ax.set_xticklabels(labels)
    ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]

    # Plot speedups.
    ax = plt.subplot(4, 1, 3)
    width = (.8 / 3)
    colors = sns.color_palette("Greens", len(err_fns))
    for i, err_fn in enumerate(db.err_fns):
        pairs = [result[6 + i] for result in results]
        speedups, yerrs = zip(*pairs)
        ax.bar(X + .1 + (i * width),
               speedups,
               width=width,
               label=errfn2label(err_fn),
               color=colors[i])

        # Plot confidence intervals separately so that we can have
        # full control over formatting.
        _, caps, _ = ax.errorbar(X + .1 + (i + .5) * width,
                                 speedups,
                                 fmt="none",
                                 yerr=yerrs,
                                 capsize=3,
                                 ecolor="k")
        for cap in caps:
            cap.set_color('k')
            cap.set_markeredgewidth(1)
    ax.set_xticks(X + .4)
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 7)
    ax.set_xticks(X + .4, labels)
    ax.set_ylabel("Speedup")
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)]

    # PERFORMANCE
    colors = sns.color_palette("Blues", len(err_fns))
    width = (.8 / 3)
    ax = plt.subplot(4, 1, 4)
    for i, err_fn in enumerate(db.err_fns):
        pairs = [result[9 + i] for result in results]
        perfs, yerrs = zip(*pairs)
        ax.bar(X + .1 + (i * width),
               perfs,
               width=width,
               label=errfn2label(err_fn),
               color=colors[i])

        # Plot confidence intervals separately so that we can have
        # full control over formatting.
        _, caps, _ = ax.errorbar(X + .1 + (i + .5) * width,
                                 perfs,
                                 fmt="none",
                                 yerr=yerrs,
                                 capsize=3,
                                 ecolor="k")
        for cap in caps:
            cap.set_color('k')
            cap.set_markeredgewidth(1)
    ax.set_xticks(X + .4)
    ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%'))
    ax.set_xticklabels(labels)
    ax.set_ylim(0, 100)
    ax.set_ylabel("Performance")
    ax.set_xticks(X + .4, labels)

    title = kwargs.pop("title", "Classification results for " + job)
    plt.title(title)

    # Add legend *beneath* plot. To do this, we need to pass some
    # extra arguments to plt.savefig(). See:
    #
    # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html
    #
    art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)]
    viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)