Beispiel #1
0
def benchmark_slice(ctx, timer):
  TEST_SIZE = 1000 * ctx.num_workers
  
  # force arange to evaluate first.
  x = expr.eager(expr.zeros((TEST_SIZE,10000)))

  for i in range(5): 
    timer.time_op('slice-rows', lambda: expr.evaluate(x[200:300, :].sum()))
    timer.time_op('slice-cols', lambda: expr.evaluate(x[:, 200:300].sum()))
    timer.time_op('slice-box', lambda: expr.evaluate(x[200:300, 200:300].sum()))
Beispiel #2
0
def benchmark_slice(ctx, timer):
    TEST_SIZE = 1000 * ctx.num_workers

    # force arange to evaluate first.
    x = expr.eager(expr.zeros((TEST_SIZE, 10000)))

    for i in range(5):
        timer.time_op('slice-rows', lambda: expr.evaluate(x[200:300, :].sum()))
        timer.time_op('slice-cols', lambda: expr.evaluate(x[:, 200:300].sum()))
        timer.time_op('slice-box',
                      lambda: expr.evaluate(x[200:300, 200:300].sum()))
Beispiel #3
0
    def test_index(self):
        a = expr.arange((TEST_SIZE, TEST_SIZE))
        b = expr.ones((10, ), dtype=np.int)
        z = a[b]
        val = expr.evaluate(z)

        nx = np.arange(TEST_SIZE * TEST_SIZE).reshape(TEST_SIZE, TEST_SIZE)
        ny = np.ones((10, ), dtype=np.int)

        Assert.all_eq(val.glom(), nx[ny])
Beispiel #4
0
  def test_index(self):
    a = expr.arange((TEST_SIZE, TEST_SIZE))
    b = expr.ones((10,), dtype=np.int)
    z = a[b]
    val = expr.evaluate(z)

    nx = np.arange(TEST_SIZE * TEST_SIZE).reshape(TEST_SIZE, TEST_SIZE)
    ny = np.ones((10,), dtype=np.int)

    Assert.all_eq(val.glom(), nx[ny])
Beispiel #5
0
 def _step():
     y = expr.evaluate(x * x)
Beispiel #6
0
    def train_smo_2005(self, data, labels):
        """
    Train an SVM model using the SMO (2005) algorithm.

    Args:
      data(Expr): points to be trained
      labels(Expr): the correct labels of the training data
    """

        N = data.shape[0]  # Number of instances
        D = data.shape[1]  # Number of features

        self.b = 0.0
        alpha = expr.zeros((N, 1), dtype=np.float64, tile_hint=[N / self.ctx.num_workers, 1]).evaluate()

        # linear kernel
        kernel_results = expr.dot(data, expr.transpose(data), tile_hint=[N / self.ctx.num_workers, N])
        gradient = expr.ones((N, 1), dtype=np.float64, tile_hint=[N / self.ctx.num_workers, 1]) * -1.0

        expr_labels = expr.lazify(labels)

        util.log_info("Starting SMO")
        pv1 = pv2 = -1
        it = 0
        while it < self.maxiter:
            util.log_info("Iteration:%d", it)

            minObj = 1e100

            expr_alpha = expr.lazify(alpha)
            G = expr.multiply(labels, gradient) * -1.0

            v1_mask = (expr_labels > self.tol) * (expr_alpha < self.C) + (expr_labels < -self.tol) * (
                expr_alpha > self.tol
            )
            v1 = expr.argmax(G[v1_mask - True]).glom().item()
            maxG = G[v1, 0].glom()
            print "maxv1:", v1, "maxG:", maxG

            v2_mask = (expr_labels > self.tol) * (expr_alpha > self.tol) + (expr_labels < -self.tol) * (
                expr_alpha < self.C
            )
            min_v2 = expr.argmin(G[v2_mask - True]).glom().item()
            minG = G[min_v2, 0].glom()
            # print 'minv2:', min_v2, 'minG:', minG

            set_v2 = v2_mask.glom().nonzero()[0]
            # print 'actives:', set_v2.shape[0]
            v2 = -1
            for v in set_v2:
                b = maxG - G[v, 0].glom()
                if b > self.tol:
                    na = (kernel_results[v1, v1] + kernel_results[v, v] - 2 * kernel_results[v1, v]).glom()[0][0]
                    if na < self.tol:
                        na = 1e12

                    obj = -(b * b) / na
                    if obj <= minObj and v1 != pv1 or v != pv2:
                        v2 = v
                        a = na
                        minObj = obj

            if v2 == -1:
                break
            if maxG - minG < self.tol:
                break

            print "opt v1:", v1, "v2:", v2

            pv1 = v1
            pv2 = v2

            y1 = labels[v1, 0]
            y2 = labels[v2, 0]

            oldA1 = alpha[v1, 0]
            oldA2 = alpha[v2, 0]

            # Calculate new alpha values, to reduce the objective function...
            b = y2 * expr.glom(gradient[v2, 0]) - y1 * expr.glom(gradient[v1, 0])
            if y1 != y2:
                a += 4 * kernel_results[v1, v2].glom()

            newA1 = oldA1 + y1 * b / a
            newA2 = oldA2 - y2 * b / a

            # Correct for alpha being out of range...
            sum = y1 * oldA1 + y2 * oldA2

            if newA1 < self.tol:
                newA1 = 0.0
            elif newA1 > self.C:
                newA1 = self.C

            newA2 = y2 * (sum - y1 * newA1)

            if newA2 < self.tol:
                newA2 = 0.0
            elif newA2 > self.C:
                newA2 = self.C

            newA1 = y1 * (sum - y2 * newA2)

            # Update the gradient...
            dA1 = newA1 - oldA1
            dA2 = newA2 - oldA2

            gradient += (
                expr.multiply(labels, kernel_results[:, v1]) * y1 * dA1
                + expr.multiply(labels, kernel_results[:, v2]) * y2 * dA2
            )

            alpha[v1, 0] = newA1
            alpha[v2, 0] = newA2

            # print 'alpha:', alpha.glom().T

            it += 1
            # print 'gradient:', gradient.glom().T

        self.w = expr.zeros((D, 1), dtype=np.float64).evaluate()
        for i in xrange(D):
            self.w[i, 0] = expr.reduce(
                alpha,
                axis=None,
                dtype_fn=lambda input: input.dtype,
                local_reduce_fn=margin_mapper,
                accumulate_fn=np.add,
                fn_kw=dict(label=labels, data=expr.evaluate(data[:, i])),
            ).glom()

        self.b = 0.0
        E = (labels - self.margins(data)).evaluate()

        minB = -1e100
        maxB = 1e100
        actualB = 0.0
        numActualB = 0

        for i in xrange(N):
            ai = alpha[i, 0]
            yi = labels[i, 0]
            Ei = E[i, 0]

            if ai < 1e-3:
                if yi < self.tol:
                    maxB = min((maxB, Ei))
                else:
                    minB = max((minB, Ei))
            elif ai > self.C - 1e-3:
                if yi < self.tol:
                    minB = max((minB, Ei))
                else:
                    maxB = min((maxB, Ei))
            else:
                numActualB += 1
                actualB += (Ei - actualB) / float(numActualB)
        if numActualB > 0:
            self.b = actualB
        else:
            self.b = 0.5 * (minB + maxB)

        self.usew_ = True
        print "iteration finish:", it
        print "b:", self.b
        print "w:", self.w.glom()
Beispiel #7
0
    def train_smo_1998(self, data, labels):
        """
    Train an SVM model using the SMO (1998) algorithm.

    Args:
      data(Expr): points to be trained
      labels(Expr): the correct labels of the training data
    """

        N = data.shape[0]  # Number of instances
        D = data.shape[1]  # Number of features

        self.b = 0.0
        self.alpha = expr.zeros((N, 1), dtype=np.float64, tile_hint=[N / self.ctx.num_workers, 1]).evaluate()

        # linear kernel
        kernel_results = expr.dot(data, expr.transpose(data), tile_hint=[N / self.ctx.num_workers, N])

        labels = labels.evaluate()
        self.E = expr.zeros((N, 1), dtype=np.float64, tile_hint=[N / self.ctx.num_workers, 1]).evaluate()
        for i in xrange(N):
            self.E[i, 0] = (
                self.b
                + expr.reduce(
                    self.alpha,
                    axis=None,
                    dtype_fn=lambda input: input.dtype,
                    local_reduce_fn=margin_mapper,
                    accumulate_fn=np.add,
                    fn_kw=dict(label=labels, data=kernel_results[:, i].evaluate()),
                ).glom()
                - labels[i, 0]
            )

        util.log_info("Starting SMO")
        it = 0
        num_changed = 0
        examine_all = True
        while (num_changed > 0 or examine_all) and (it < self.maxiter):
            util.log_info("Iteration:%d", it)

            num_changed = 0

            if examine_all:
                for i in xrange(N):
                    num_changed += self.examine_example(i, N, labels, kernel_results)
            else:
                for i in xrange(N):
                    if self.alpha[i, 0] > 0 and self.alpha[i, 0] < self.C:
                        num_changed += self.examine_example(i, N, labels, kernel_results)

            it += 1

            if examine_all:
                examine_all = False
            elif num_changed == 0:
                examine_all = True

        self.w = expr.zeros((D, 1), dtype=np.float64).evaluate()
        for i in xrange(D):
            self.w[i, 0] = expr.reduce(
                self.alpha,
                axis=None,
                dtype_fn=lambda input: input.dtype,
                local_reduce_fn=margin_mapper,
                accumulate_fn=np.add,
                fn_kw=dict(label=labels, data=expr.evaluate(data[:, i])),
            ).glom()
        self.usew_ = True
        print "iteration finish:", it
        print "b:", self.b
        print "w:", self.w.glom()
Beispiel #8
0
 def _step():
     expr.evaluate(expr.dot(x, y))
Beispiel #9
0
 def _step():
     expr.evaluate(expr.dot(x, y))
Beispiel #10
0
    def train_smo_2005(self, data, labels):
        '''
    Train an SVM model using the SMO (2005) algorithm.

    Args:
      data(Expr): points to be trained
      labels(Expr): the correct labels of the training data
    '''

        N = data.shape[0]  # Number of instances
        D = data.shape[1]  # Number of features

        self.b = 0.0
        alpha = expr.zeros((N, 1),
                           dtype=np.float64,
                           tile_hint=[N / self.ctx.num_workers, 1]).evaluate()

        # linear kernel
        kernel_results = expr.dot(data,
                                  expr.transpose(data),
                                  tile_hint=[N / self.ctx.num_workers, N])
        gradient = expr.ones(
            (N, 1), dtype=np.float64, tile_hint=[N / self.ctx.num_workers, 1
                                                 ]) * -1.0

        expr_labels = expr.lazify(labels)

        util.log_info("Starting SMO")
        pv1 = pv2 = -1
        it = 0
        while it < self.maxiter:
            util.log_info("Iteration:%d", it)

            minObj = 1e100

            expr_alpha = expr.lazify(alpha)
            G = expr.multiply(labels, gradient) * -1.0

            v1_mask = ((expr_labels > self.tol) * (expr_alpha < self.C) +
                       (expr_labels < -self.tol) * (expr_alpha > self.tol))
            v1 = expr.argmax(G[v1_mask - True]).glom().item()
            maxG = G[v1, 0].glom()
            print 'maxv1:', v1, 'maxG:', maxG

            v2_mask = ((expr_labels > self.tol) * (expr_alpha > self.tol) +
                       (expr_labels < -self.tol) * (expr_alpha < self.C))
            min_v2 = expr.argmin(G[v2_mask - True]).glom().item()
            minG = G[min_v2, 0].glom()
            #print 'minv2:', min_v2, 'minG:', minG

            set_v2 = v2_mask.glom().nonzero()[0]
            #print 'actives:', set_v2.shape[0]
            v2 = -1
            for v in set_v2:
                b = maxG - G[v, 0].glom()
                if b > self.tol:
                    na = (kernel_results[v1, v1] + kernel_results[v, v] -
                          2 * kernel_results[v1, v]).glom()[0][0]
                    if na < self.tol: na = 1e12

                    obj = -(b * b) / na
                    if obj <= minObj and v1 != pv1 or v != pv2:
                        v2 = v
                        a = na
                        minObj = obj

            if v2 == -1: break
            if maxG - minG < self.tol: break

            print 'opt v1:', v1, 'v2:', v2

            pv1 = v1
            pv2 = v2

            y1 = labels[v1, 0]
            y2 = labels[v2, 0]

            oldA1 = alpha[v1, 0]
            oldA2 = alpha[v2, 0]

            # Calculate new alpha values, to reduce the objective function...
            b = y2 * expr.glom(gradient[v2, 0]) - y1 * expr.glom(gradient[v1,
                                                                          0])
            if y1 != y2:
                a += 4 * kernel_results[v1, v2].glom()

            newA1 = oldA1 + y1 * b / a
            newA2 = oldA2 - y2 * b / a

            # Correct for alpha being out of range...
            sum = y1 * oldA1 + y2 * oldA2

            if newA1 < self.tol: newA1 = 0.0
            elif newA1 > self.C: newA1 = self.C

            newA2 = y2 * (sum - y1 * newA1)

            if newA2 < self.tol: newA2 = 0.0
            elif newA2 > self.C: newA2 = self.C

            newA1 = y1 * (sum - y2 * newA2)

            # Update the gradient...
            dA1 = newA1 - oldA1
            dA2 = newA2 - oldA2

            gradient += expr.multiply(
                labels, kernel_results[:, v1]) * y1 * dA1 + expr.multiply(
                    labels, kernel_results[:, v2]) * y2 * dA2

            alpha[v1, 0] = newA1
            alpha[v2, 0] = newA2

            #print 'alpha:', alpha.glom().T

            it += 1
            #print 'gradient:', gradient.glom().T

        self.w = expr.zeros((D, 1), dtype=np.float64).evaluate()
        for i in xrange(D):
            self.w[i, 0] = expr.reduce(alpha,
                                       axis=None,
                                       dtype_fn=lambda input: input.dtype,
                                       local_reduce_fn=margin_mapper,
                                       accumulate_fn=np.add,
                                       fn_kw=dict(label=labels,
                                                  data=expr.evaluate(
                                                      data[:, i]))).glom()

        self.b = 0.0
        E = (labels - self.margins(data)).evaluate()

        minB = -1e100
        maxB = 1e100
        actualB = 0.0
        numActualB = 0

        for i in xrange(N):
            ai = alpha[i, 0]
            yi = labels[i, 0]
            Ei = E[i, 0]

            if ai < 1e-3:
                if yi < self.tol:
                    maxB = min((maxB, Ei))
                else:
                    minB = max((minB, Ei))
            elif ai > self.C - 1e-3:
                if yi < self.tol:
                    minB = max((minB, Ei))
                else:
                    maxB = min((maxB, Ei))
            else:
                numActualB += 1
                actualB += (Ei - actualB) / float(numActualB)
        if numActualB > 0:
            self.b = actualB
        else:
            self.b = 0.5 * (minB + maxB)

        self.usew_ = True
        print 'iteration finish:', it
        print 'b:', self.b
        print 'w:', self.w.glom()
Beispiel #11
0
    def train_smo_1998(self, data, labels):
        '''
    Train an SVM model using the SMO (1998) algorithm.

    Args:
      data(Expr): points to be trained
      labels(Expr): the correct labels of the training data
    '''

        N = data.shape[0]  # Number of instances
        D = data.shape[1]  # Number of features

        self.b = 0.0
        self.alpha = expr.zeros((N, 1),
                                dtype=np.float64,
                                tile_hint=[N / self.ctx.num_workers,
                                           1]).evaluate()

        # linear kernel
        kernel_results = expr.dot(data,
                                  expr.transpose(data),
                                  tile_hint=[N / self.ctx.num_workers, N])

        labels = labels.evaluate()
        self.E = expr.zeros((N, 1),
                            dtype=np.float64,
                            tile_hint=[N / self.ctx.num_workers,
                                       1]).evaluate()
        for i in xrange(N):
            self.E[i, 0] = self.b + expr.reduce(
                self.alpha,
                axis=None,
                dtype_fn=lambda input: input.dtype,
                local_reduce_fn=margin_mapper,
                accumulate_fn=np.add,
                fn_kw=dict(label=labels, data=kernel_results[:, i].evaluate()
                           )).glom() - labels[i, 0]

        util.log_info("Starting SMO")
        it = 0
        num_changed = 0
        examine_all = True
        while (num_changed > 0 or examine_all) and (it < self.maxiter):
            util.log_info("Iteration:%d", it)

            num_changed = 0

            if examine_all:
                for i in xrange(N):
                    num_changed += self.examine_example(
                        i, N, labels, kernel_results)
            else:
                for i in xrange(N):
                    if self.alpha[i, 0] > 0 and self.alpha[i, 0] < self.C:
                        num_changed += self.examine_example(
                            i, N, labels, kernel_results)

            it += 1

            if examine_all: examine_all = False
            elif num_changed == 0: examine_all = True

        self.w = expr.zeros((D, 1), dtype=np.float64).evaluate()
        for i in xrange(D):
            self.w[i, 0] = expr.reduce(self.alpha,
                                       axis=None,
                                       dtype_fn=lambda input: input.dtype,
                                       local_reduce_fn=margin_mapper,
                                       accumulate_fn=np.add,
                                       fn_kw=dict(label=labels,
                                                  data=expr.evaluate(
                                                      data[:, i]))).glom()
        self.usew_ = True
        print 'iteration finish:', it
        print 'b:', self.b
        print 'w:', self.w.glom()