Example #1
0
def benchmark_convnet(ctx, timer):
  image_size = BASE_IMG_SIZE
  minibatch = 64
  #minibatch = ctx.num_workers
  hint = util.divup(image_size, sqrt(ctx.num_workers))
  tile_hint = (util.divup(minibatch, ctx.num_workers), N_COLORS, image_size, image_size)
  util.log_info('Hint: %s', tile_hint)
    
  images = expr.eager(expr.ones((minibatch, N_COLORS, image_size, image_size),
                                tile_hint=tile_hint))
  
  w1 = expr.eager(expr.ones((N_FILTERS, N_COLORS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))
  w2 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))
  w3 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))
  
  def _():
    conv1 = stencil.stencil(images, w1, 2)
    pool1 = stencil.maxpool(conv1)
   
    conv2 = stencil.stencil(pool1, w2, 2)
    pool2 = stencil.maxpool(conv2)
    
    conv3 = stencil.stencil(pool2, w3, 2)
    pool3 = stencil.maxpool(conv3)
    
    expr.force(pool3)
 
  # force parakeet functions to compile before timing. 
  _()  
  for i in range(2):
    timer.time_op('convnet', _)
def shortestPath_np(dim, linkMatrix, dist):
	#linkMatrix, dist = init(dim, startVertex)
	for i in range(1000):
		util.log_info("%s", "enter")
		dist = (dist + linkMatrix).min(axis = 0).reshape(dim, 1)
		util.log_info("numItersation %s", i)
	return dist	
Example #3
0
 def mark_failed_worker(self, worker_id):
   util.log_info('Marking worker %s as failed.', worker_id)
   self._available_workers.remove(worker_id)
   for array in self._arrays:
     for ex, tile_id in array.tiles.iteritems():
       if tile_id.worker == worker_id:
         array.bad_tiles.append(ex)
def bfs(ctx, dim):
	util.log_info("start to computing......")

	sGenerate = time.time()
	current = eager(
			expr.shuffle(
				expr.ndarray(
					(dim, 1),
					dtype = np.int64,
					tile_hint = (dim / ctx.num_workers, 1)),
				make_current,
			))
	
	linkMatrix = eager(
				expr.shuffle(
					expr.ndarray(
					 (dim, dim),
					 dtype = np.int64,
					 tile_hint = (dim, dim / ctx.num_workers)),
				make_matrix,
				))
	eGenerate = time.time()

	startCompute = time.time()
	while(True):
		next = expr.dot(linkMatrix, current)
		formerNum = expr.count_nonzero(current)
		laterNum = expr.count_nonzero(next)
		hasNew = expr.equal(formerNum, laterNum).glom()
		current = next
		if (hasNew):
			break
	current.evaluate()
	endCompute = time.time()
	return (eGenerate - sGenerate, endCompute - startCompute) 
Example #5
0
  def test_newaxis(self):
    na = np.arange(100).reshape(10, 10)
    a = expr.from_numpy(na)

    Assert.all_eq(na[np.newaxis, 2:7, 4:8].shape,
                  a[expr.newaxis,2:7, 4:8].shape)

    Assert.all_eq(na[np.newaxis, 2:7, np.newaxis, 4:8].shape,
                  a[expr.newaxis,2:7, expr.newaxis, 4:8].shape)

    Assert.all_eq(na[np.newaxis, 2:7, np.newaxis, 4:8, np.newaxis].shape,
                  a[expr.newaxis,2:7, expr.newaxis, 4:8, expr.newaxis].shape)

    #Extreme case
    Assert.all_eq(na[np.newaxis, np.newaxis, np.newaxis, np.newaxis, 2:7, 
		  np.newaxis, np.newaxis, np.newaxis, 4:8, np.newaxis, 
		  np.newaxis, np.newaxis].shape,

                  a[expr.newaxis, expr.newaxis, expr.newaxis, expr.newaxis,
                  2:7, expr.newaxis, expr.newaxis, expr.newaxis, 4:8,
                  expr.newaxis, expr.newaxis, expr.newaxis].shape)

    util.log_info('\na.shape:  %s  \nna.shape: %s',
		  a[expr.newaxis,2:7, expr.newaxis, 4:8, expr.newaxis,
			expr.newaxis, expr.newaxis].shape,

                  na[np.newaxis, 2:7, np.newaxis, 4:8, np.newaxis,
			np.newaxis, np.newaxis].shape)
Example #6
0
 def _(axis):
     util.log_info('Testing sum over axis %s', axis)
     a = expr.ones((TEST_SIZE, TEST_SIZE)) + expr.ones(
         (TEST_SIZE, TEST_SIZE))
     b = a.sum(axis=axis)
     Assert.all_eq(b.glom(), 2 * np.ones(
         (TEST_SIZE, TEST_SIZE)).sum(axis))
Example #7
0
def benchmark_convnet(ctx, timer):
  image_size = BASE_IMG_SIZE
  minibatch = 64
  #minibatch = ctx.num_workers
  hint = util.divup(image_size, sqrt(ctx.num_workers))
  tile_hint = (util.divup(minibatch, ctx.num_workers), N_COLORS, image_size, image_size)
  util.log_info('Hint: %s', tile_hint)

  images = expr.eager(expr.ones((minibatch, N_COLORS, image_size, image_size),
                                tile_hint=tile_hint))

  w1 = expr.eager(expr.ones((N_FILTERS, N_COLORS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))
  w2 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))
  w3 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))

  def _():
    conv1 = stencil.stencil(images, w1, 2)
    pool1 = stencil.maxpool(conv1)

    conv2 = stencil.stencil(pool1, w2, 2)
    pool2 = stencil.maxpool(conv2)

    conv3 = stencil.stencil(pool2, w3, 2)
    pool3 = stencil.maxpool(conv3)

    pool3.evaluate()

  # force parakeet functions to compile before timing.
  _()
  for i in range(2):
    timer.time_op('convnet', _)
Example #8
0
    def test_newaxis(self):
        na = np.arange(100).reshape(10, 10)
        a = expr.from_numpy(na)

        Assert.all_eq(na[np.newaxis, 2:7, 4:8].shape, a[expr.newaxis, 2:7,
                                                        4:8].shape)

        Assert.all_eq(na[np.newaxis, 2:7, np.newaxis, 4:8].shape,
                      a[expr.newaxis, 2:7, expr.newaxis, 4:8].shape)

        Assert.all_eq(
            na[np.newaxis, 2:7, np.newaxis, 4:8, np.newaxis].shape,
            a[expr.newaxis, 2:7, expr.newaxis, 4:8, expr.newaxis].shape)

        #Extreme case
        Assert.all_eq(
            na[np.newaxis, np.newaxis, np.newaxis, np.newaxis, 2:7, np.newaxis,
               np.newaxis, np.newaxis, 4:8, np.newaxis, np.newaxis,
               np.newaxis].shape,
            a[expr.newaxis, expr.newaxis, expr.newaxis, expr.newaxis, 2:7,
              expr.newaxis, expr.newaxis, expr.newaxis, 4:8, expr.newaxis,
              expr.newaxis, expr.newaxis].shape)

        util.log_info(
            '\na.shape:  %s  \nna.shape: %s',
            a[expr.newaxis, 2:7, expr.newaxis, 4:8, expr.newaxis, expr.newaxis,
              expr.newaxis].shape, na[np.newaxis, 2:7, np.newaxis, 4:8,
                                      np.newaxis, np.newaxis,
                                      np.newaxis].shape)
Example #9
0
 def mark_failed_worker(self, worker_id):
     util.log_info('Marking worker %s as failed.', worker_id)
     self._available_workers.remove(worker_id)
     for array in self._arrays:
         for ex, tile_id in array.tiles.iteritems():
             if tile_id.worker == worker_id:
                 array.bad_tiles.append(ex)
Example #10
0
def test_convnet(ctx):
    hint = util.divup(64, sqrt(ctx.num_workers))

    images = expr.eager(
        expr.ones((N_IMGS, ) + IMG_SIZE,
                  tile_hint=(N_IMGS, N_COLORS, hint, hint)))

    w1 = expr.eager(
        expr.ones((N_FILTERS, N_COLORS) + FILTER_SIZE, tile_hint=ONE_TILE))

    conv1 = stencil.stencil(images, w1, 2)
    pool1 = stencil.maxpool(conv1)

    w2 = expr.eager(
        expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE, tile_hint=ONE_TILE))

    conv2 = stencil.stencil(pool1, w2, 2)
    pool2 = stencil.maxpool(conv2)

    w3 = expr.eager(
        expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE, tile_hint=ONE_TILE))
    conv3 = stencil.stencil(pool2, w3, 2)
    pool3 = stencil.maxpool(conv3)

    util.log_info(pool3.shape)
Example #11
0
def start_remote_worker(worker, st, ed):
    """
  Start processes on a worker machine.

  The machine will launch worker processes ``st`` through ``ed``.

  :param worker: hostname to connect to.
  :param st: First process index to start.
  :param ed: Last process to start.
  """
    if FLAGS.use_threads and worker == "localhost":
        util.log_info("Using threads.")
        for i in range(st, ed):
            p = threading.Thread(target=spartan.worker._start_worker, args=((socket.gethostname(), FLAGS.port_base), i))
            p.daemon = True
            p.start()
        time.sleep(0.1)
        return

    util.log_info("Starting worker %d:%d on host %s", st, ed, worker)
    if FLAGS.oprofile:
        os.system("mkdir operf.%s" % worker)

    ssh_args = ["ssh", "-oForwardX11=no", worker]

    args = ["cd %s && " % os.path.abspath(os.path.curdir)]

    if FLAGS.xterm:
        args += ["xterm", "-e"]

    if FLAGS.oprofile:
        args += ["operf -e CPU_CLK_UNHALTED:100000000", "-g", "-d", "operf.%s" % worker]

    args += [
        #'gdb', '-ex', 'run', '--args',
        "python",
        "-m spartan.worker",
        "--master=%s:%d" % (socket.gethostname(), FLAGS.port_base),
        "--count=%d" % (ed - st),
        "--heartbeat_interval=%d" % FLAGS.heartbeat_interval,
    ]

    # add flags from config/user
    for (name, value) in FLAGS:
        if name in ["worker_list", "print_options"]:
            continue
        args += [repr(value)]

    # print >>sys.stderr, args
    util.log_debug("Running worker %s", " ".join(args))
    time.sleep(0.1)
    # TODO: improve this to make log break at newline
    if worker != "localhost":
        p = subprocess.Popen(ssh_args + args, executable="ssh")
    else:
        p = subprocess.Popen(" ".join(args), shell=True, stdin=subprocess.PIPE)

    return p
Example #12
0
  def train_smo_1998(self, data, labels):
    '''
    Train an SVM model using the SMO (1998) algorithm.
   
    Args:
      data(Expr): points to be trained
      labels(Expr): the correct labels of the training data
    '''
    
    N = data.shape[0] # Number of instances
    D = data.shape[1]  # Number of features

    self.b = 0.0
    self.alpha = expr.zeros((N,1), dtype=np.float64, tile_hint=[N/self.ctx.num_workers, 1]).force()
    
    # linear kernel
    kernel_results = expr.dot(data, expr.transpose(data), tile_hint=[N/self.ctx.num_workers, N])   
    
    labels = expr.force(labels)
    self.E = expr.zeros((N,1), dtype=np.float64, tile_hint=[N/self.ctx.num_workers, 1]).force()
    for i in xrange(N):
      self.E[i, 0] = self.b + expr.reduce(self.alpha, axis=None, dtype_fn=lambda input: input.dtype,
                                          local_reduce_fn=margin_mapper,
                                          accumulate_fn=np.add, 
                                          fn_kw=dict(label=labels, data=kernel_results[:,i].force())).glom() - labels[i, 0]
    
    util.log_info("Starting SMO")
    it = 0
    num_changed = 0
    examine_all = True
    while (num_changed > 0 or examine_all) and (it < self.maxiter):
      util.log_info("Iteration:%d", it)

      num_changed = 0
      
      if examine_all:
        for i in xrange(N): 
          num_changed += self.examine_example(i, N, labels, kernel_results)
      else:
        for i in xrange(N):
          if self.alpha[i, 0] > 0 and self.alpha[i, 0] < self.C:
            num_changed += self.examine_example(i, N, labels, kernel_results)

      it += 1

      if examine_all: examine_all = False
      elif num_changed == 0: examine_all = True
    
    self.w = expr.zeros((D, 1), dtype=np.float64).force()
    for i in xrange(D): 
      self.w[i,0] = expr.reduce(self.alpha, axis=None, dtype_fn=lambda input: input.dtype,
                              local_reduce_fn=margin_mapper,
                              accumulate_fn=np.add, 
                              fn_kw=dict(label=labels, data=expr.force(data[:,i]))).glom()
    self.usew_ = True
    print 'iteration finish:', it
    print 'b:', self.b
    print 'w:', self.w.glom()
Example #13
0
def fuzzy_kmeans(points, k=10, num_iter=10, m=2.0, centers=None):
  '''
  clustering data points using fuzzy kmeans clustering method.

  Args:
    points(Expr or DistArray): the input data points matrix.
    k(int): the number of clusters.
    num_iter(int): the max iterations to run.
    m(float): the parameter of fuzzy kmeans.
    centers(Expr or DistArray): the initialized centers of each cluster.
  '''
  points = expr.force(points)
  num_dim = points.shape[1]
  if centers is None:
      centers = expr.rand(k, num_dim)

  labels = expr.zeros((points.shape[0],), dtype=np.int)

  for iter in range(num_iter):
    centers = expr.as_array(centers)
    points_broadcast = expr.reshape(points, (points.shape[0], 1, points.shape[1]))
    centers_broadcast = expr.reshape(centers, (1, centers.shape[0], centers.shape[1]))
    distances = expr.sum(expr.square(points_broadcast - centers_broadcast), axis=2)
    # This is used to avoid dividing zero
    distances = distances + 0.00000000001
    util.log_info('distances shape %s' % str(distances.shape))
    distances_broadcast = expr.reshape(distances, (distances.shape[0], 1,
                                                   distances.shape[1]))
    distances_broadcast2 = expr.reshape(distances, (distances.shape[0],
                                                    distances.shape[1], 1))
    prob = 1.0 / expr.sum(expr.power(distances_broadcast / distances_broadcast2,
                                     2.0 / (m - 1)), axis=2)
    prob.force()
    counts = expr.sum(prob, axis=0)
    counts = expr.reshape(counts, (counts.shape[0], 1))
    labels = expr.argmax(prob, axis=1)
    centers = expr.sum(expr.reshape(points, (points.shape[0], 1, points.shape[1])) *
                       expr.reshape(prob, (prob.shape[0], prob.shape[1], 1)),
                       axis=0)

    # We assume that the size of centers are relative small that can be handled
    # on the master.
    counts = counts.glom()
    centers = centers.glom()
    # If any centroids don't have any points assigned to them.
    zcount_indices = (counts == 0).reshape(k)

    if np.any(zcount_indices):
      # One or more centroids may not have any points assigned to them, which results in their
      # position being the zero-vector.  We reseed these centroids with new random values
      # and set their counts to 1 in order to get rid of dividing by zero.
      counts[zcount_indices, :] = 1
      centers[zcount_indices, :] = np.random.rand(np.count_nonzero(zcount_indices),
                                                  num_dim)

    centers = centers / counts
  return labels
Example #14
0
def start_remote_worker(worker, st, ed):
  '''
  Start processes on a worker machine.

  The machine will launch worker processes ``st`` through ``ed``.

  :param worker: hostname to connect to.
  :param st: First process index to start.
  :param ed: Last process to start.
  '''
  if FLAGS.use_threads and worker == 'localhost':
    util.log_info('Using threads.')
    for i in range(st, ed):
      p = threading.Thread(target=spartan.worker._start_worker,
                           args=((socket.gethostname(), FLAGS.port_base), i))
      p.daemon = True
      p.start()
    time.sleep(0.1)
    return

  util.log_info('Starting worker %d:%d on host %s', st, ed, worker)
  if FLAGS.oprofile:
    os.system('mkdir operf.%s' % worker)

  ssh_args = ['ssh', '-oForwardX11=no', worker ]

  args = ['cd %s && ' % os.path.abspath(os.path.curdir)]

  if FLAGS.xterm:
    args += ['xterm', '-e',]

  if FLAGS.oprofile:
    args += ['operf -e CPU_CLK_UNHALTED:100000000', '-g', '-d', 'operf.%s' % worker]

  args += [
          #'gdb', '-ex', 'run', '--args',
          'python', '-m spartan.worker',
          '--master=%s:%d' % (socket.gethostname(), FLAGS.port_base),
          '--count=%d' % (ed - st),
          '--heartbeat_interval=%d' % FLAGS.heartbeat_interval
          ]

  # add flags from config/user
  for (name, value) in FLAGS:
    if name in ['worker_list', 'print_options']: continue
    args += [repr(value)]

  #print >>sys.stderr, args
  util.log_debug('Running worker %s', ' '.join(args))
  time.sleep(0.1)
  if worker != 'localhost':
    p = subprocess.Popen(ssh_args + args, executable='ssh')
  else:
    p = subprocess.Popen(' '.join(args), shell=True, stdin=subprocess.PIPE)

  return p
def make_current(tile, ex):
	util.log_info("start to creatting")
	ul = ex.ul
	lr = ex.lr
	dim = ex.shape[0]

	current = np.zeros((dim, 1), dtype = np.int64)
	if(ul[0] <= startVertex <= lr[0]):
		current[startVertex, 0] = 1
	return [(ex, current)]
Example #16
0
  def test_del_dim(self):
    na = np.arange(100).reshape(10, 10)
    a = expr.from_numpy(na)

    Assert.all_eq(na[2:7, 8], a[2:7, 8].glom())
    Assert.all_eq(na[3:9, 4].shape, a[3:9, 4].shape)

    Assert.all_eq(na[2:7, -1], a[2:7, -1].glom())
    Assert.all_eq(na[-1, 3:9].shape, a[-1, 3:9].shape)

    util.log_info('\na.shape: %s \nna.shape %s', a[3:9, 4].shape, na[3:9, 4].shape)
Example #17
0
 def bind(self):
   host, port = self.addr
   host = socket.gethostbyname(host)
   util.log_debug('Binding... %s', (host, port))
   if port == -1:
     self.addr = (host, self._zmq.bind_to_random_port('tcp://%s' % host))
   else:
     try:
       self._zmq.bind('tcp://%s:%d' % (host, port))
     except zmq.ZMQError:
       util.log_info('Failed to bind (%s, %d)' % (host, port))
       raise
Example #18
0
    def test_del_dim(self):
        na = np.arange(100).reshape(10, 10)
        a = expr.from_numpy(na)

        Assert.all_eq(na[2:7, 8], a[2:7, 8].glom())
        Assert.all_eq(na[3:9, 4].shape, a[3:9, 4].shape)

        Assert.all_eq(na[2:7, -1], a[2:7, -1].glom())
        Assert.all_eq(na[-1, 3:9].shape, a[-1, 3:9].shape)

        util.log_info('\na.shape: %s \nna.shape %s', a[3:9, 4].shape,
                      na[3:9, 4].shape)
Example #19
0
  def test_ndimension(self):
    for case in xrange(5):
      dim = np.random.randint(low=2, high=6)
      shape = np.random.randint(low=5, high=11, size=dim)
      util.log_info('Test Case #%s: DIM(%s) shape%s', case + 1, dim, shape)

      na = new_ndarray(shape)
      a = expr.from_numpy(na)

      for axis in xrange(dim):
        Assert.all_eq(expr.sort(a, axis).glom(),
                      np.sort(na, axis))
        Assert.all_eq(expr.argsort(a, axis).glom(),
                      np.argsort(na, axis))
Example #20
0
def fake_netflix_mapper(inputs, ex, p_rating=None):
  '''
  Create "Netflix-like" data for the given extent.
  
  :param p_rating: Sparsity factor (probability a given cell will have a rating)
  '''
  n_ratings = int(max(1, ex.size * p_rating))
  
  uids = np.random.randint(0, ex.shape[0], n_ratings)
  mids = np.random.randint(0, ex.shape[1], n_ratings)
  ratings = np.random.randint(0, 5, n_ratings).astype(np.float32)

  util.log_info('%s %s %s %s', ex, p_rating, ex.size, len(ratings))

  data = scipy.sparse.coo_matrix((ratings, (uids, mids)), shape=ex.shape)
  yield ex, data
Example #21
0
def benchmark_jacobi(ctx, timer):
  global base, ITERATION
  util.log_warn('util.log_warn: %s', ctx.num_workers)

  A, b = jacobi.jacobi_init(base * ctx.num_workers)
  A, b = A.evaluate(), b.evaluate()

  start = time.time()

  result = jacobi.jacobi_method(A, b, ITERATION).glom()

  cost = time.time() - start

  util.log_info('\nresult =\n%s', result)
  util.log_warn('time cost: %s s', cost)
  util.log_warn('cost per iteration: %s s\n', cost / ITERATION)
Example #22
0
    def _initialize(self):
        """Sends an initialization request to all workers and waits
    for their response.
    """
        util.log_info("Initializing...")
        req = core.InitializeReq(peers=dict([(id, w.addr()) for id, w in self._workers.iteritems()]))

        futures = rpc.FutureGroup()
        for id, w in self._workers.iteritems():
            req.id = id
            futures.append(w.initialize(req))
        futures.wait()

        self._ctx = blob_ctx.BlobCtx(blob_ctx.MASTER_ID, self._workers, self)
        self._initialized = True
        util.log_info("done...")
Example #23
0
    def shutdown(self):
        '''Shutdown all workers and halt.'''
        if self._ctx.active is False:
            return

        self._ctx.active = False

        futures = rpc.FutureGroup()
        for id, w in self._workers.iteritems():
            util.log_info('Shutting down worker %d', id)
            futures.append(w.shutdown())

        # Wait a second to let our shutdown request go out.
        time.sleep(1)

        self._server.shutdown()
Example #24
0
def fake_netflix_mapper(inputs, ex, p_rating=None):
    '''
  Create "Netflix-like" data for the given extent.

  :param p_rating: Sparsity factor (probability a given cell will have a rating)
  '''
    n_ratings = int(max(1, ex.size * p_rating))

    uids = np.random.randint(0, ex.shape[0], n_ratings)
    mids = np.random.randint(0, ex.shape[1], n_ratings)
    ratings = np.random.randint(0, 5, n_ratings).astype(np.float32)

    util.log_info('%s %s %s %s', ex, p_rating, ex.size, len(ratings))

    data = scipy.sparse.coo_matrix((ratings, (uids, mids)), shape=ex.shape)
    yield ex, data
Example #25
0
  def test_combo(self):
    na = np.arange(100).reshape(10, 10)
    a = expr.from_numpy(na)

    Assert.all_eq(na[np.newaxis, 2:7, 4],
                  a[expr.newaxis, 2:7, 4].glom())
    Assert.all_eq(na[2:7, np.newaxis, -1],
                  a[2:7, expr.newaxis, -1].glom())
    Assert.all_eq(na[-1, np.newaxis, 2:7],
                  a[-1, expr.newaxis, 2:7].glom())
    Assert.all_eq(na[np.newaxis, 2:7, np.newaxis, np.newaxis, 4, np.newaxis, np.newaxis],
                  a[expr.newaxis, 2:7, expr.newaxis, expr.newaxis, 4, expr.newaxis, expr.newaxis].glom())

    util.log_info('\na.shape:  %s \nna.shape: %s',
		a[expr.newaxis, 2:7, expr.newaxis, expr.newaxis, -1, expr.newaxis, expr.newaxis].shape,
                na[np.newaxis, 2:7, np.newaxis, np.newaxis, -1, np.newaxis, np.newaxis].shape)
Example #26
0
  def shutdown(self):
    '''Shutdown all workers and halt.'''
    if self._ctx.active is False:
      return

    self._ctx.active = False

    futures = rpc.FutureGroup()
    for id, w in self._workers.iteritems():
      util.log_info('Shutting down worker %d', id)
      futures.append(w.shutdown())

    # Wait a second to let our shutdown request go out.
    time.sleep(1)

    self._server.shutdown()
Example #27
0
    def test_combo(self):
        na = np.arange(100).reshape(10, 10)
        a = expr.from_numpy(na)

        Assert.all_eq(na[np.newaxis, 2:7, 4], a[expr.newaxis, 2:7, 4].glom())
        Assert.all_eq(na[2:7, np.newaxis, -1], a[2:7, expr.newaxis, -1].glom())
        Assert.all_eq(na[-1, np.newaxis, 2:7], a[-1, expr.newaxis, 2:7].glom())
        Assert.all_eq(
            na[np.newaxis, 2:7, np.newaxis, np.newaxis, 4, np.newaxis,
               np.newaxis], a[expr.newaxis, 2:7, expr.newaxis, expr.newaxis, 4,
                              expr.newaxis, expr.newaxis].glom())

        util.log_info(
            '\na.shape:  %s \nna.shape: %s',
            a[expr.newaxis, 2:7, expr.newaxis, expr.newaxis, -1, expr.newaxis,
              expr.newaxis].shape, na[np.newaxis, 2:7, np.newaxis, np.newaxis,
                                      -1, np.newaxis, np.newaxis].shape)
Example #28
0
def _build_mapper(ex,
                  task_array,
                  target_array,
                  X,
                  y,
                  criterion,
                  max_depth,
                  min_samples_split,
                  min_samples_leaf,
                  max_features,
                  bootstrap):
  """
  Mapper kernel for building a random forest classifier.

  Each kernel instance fetches the entirety of the feature and prediction
  (X and y) arrays, and invokes sklearn to create a local random forest classifier
  which may has more than one tree.

  The criterion, max_depth, min_samples_split, min_samples_leaf,
  max_features and bootstrap options are passed to the `sklearn.RandomForest` method.
  """
  # The number of rows decides how many trees this kernel will build.
  st = time.time()
  idx = ex.ul[0]
  # Get the number of trees this worker needs to train.
  n_estimators = task_array[idx]
  X = X.glom()
  y = y.glom()

  rf = SKRF(n_estimators = n_estimators,
                           criterion = criterion,
                           max_depth = max_depth,
                           n_jobs = 1,
                           min_samples_split = min_samples_split,
                           min_samples_leaf = min_samples_leaf,
                           max_features = max_features,
                           bootstrap = bootstrap)

  rf.fit(X, y)
  # Update the target array.
  target_array[idx, :] = (rf,)

  result = core.LocalKernelResult()
  result.result = None
  util.log_info("Finish construction : %s", time.time() - st)
  return result
Example #29
0
def compile_parakeet_source(src):
  '''Compile source code defining a parakeet function.'''
  util.log_debug('Compiling parakeet source.')
  tmpfile = tempfile.NamedTemporaryFile(delete=True, prefix='spartan-local-', suffix='.py')
  tmpfile.write(src)
  tmpfile.flush()

  #util.log_info('File: %s, Source: \n %s \n', tmpfile.name, src)

  #os.rename(tmpfile.name, srcfile)
  #atexit.register(lambda: os.remove(srcfile))

  try:
    module = imp.load_source('parakeet_temp', tmpfile.name)
  except Exception, ex:
    util.log_info('Failed to build parakeet wrapper')
    util.log_debug('Source was: %s', src)
    raise CodegenException(ex.message, ex.args)
Example #30
0
    def _initialize(self):
        '''Sends an initialization request to all workers and waits
    for their response.
    '''
        util.log_info('Initializing...')
        req = core.InitializeReq(
            peers=dict([(id, w.addr())
                        for id, w in self._workers.iteritems()]))

        futures = rpc.FutureGroup()
        for id, w in self._workers.iteritems():
            req.id = id
            futures.append(w.initialize(req))
        futures.wait()

        self._ctx = blob_ctx.BlobCtx(blob_ctx.MASTER_ID, self._workers, self)
        self._initialized = True
        util.log_info('done...')
Example #31
0
def benchmark_pagerank(ctx, timer):
  num_pages = PAGES_PER_WORKER * ctx.num_workers
  util.log_info('Total pages: %s', num_pages)

  wts = eager(
    expr.shuffle(
      expr.ndarray(
        (num_pages, num_pages), 
        dtype=np.float32,
        tile_hint=(num_pages, PAGES_PER_WORKER / 8)),
      make_weights,
    ))

  p = eager(expr.ones((num_pages, 1), 
                      tile_hint=(PAGES_PER_WORKER / 8, 1), 
                      dtype=np.float32))

  for i in range(3):
    timer.time_op('pagerank', lambda: expr.dot(wts, p).force())
Example #32
0
def benchmark_pagerank(ctx, timer):
    num_pages = PAGES_PER_WORKER * ctx.num_workers
    util.log_info('Total pages: %s', num_pages)

    wts = eager(
        expr.shuffle(
            expr.ndarray((num_pages, num_pages),
                         dtype=np.float32,
                         tile_hint=(num_pages, PAGES_PER_WORKER / 8)),
            make_weights,
        ))

    p = eager(
        expr.ones((num_pages, 1),
                  tile_hint=(PAGES_PER_WORKER / 8, 1),
                  dtype=np.float32))

    for i in range(3):
        timer.time_op('pagerank', lambda: expr.dot(wts, p).force())
Example #33
0
def load_netflix_mapper(inputs, ex, load_file=None):
    # first column will load all of the data
    row_start, row_end = ex.ul[0], ex.lr[0]
    col_start, col_end = ex.ul[1], ex.lr[1]

    data = scipy.sparse.dok_matrix(ex.shape, dtype=np.float)
    zf = zipfile.ZipFile(load_file, 'r', allowZip64=True)

    for i in range(row_start, row_end):
        offset = i - row_start
        row_data = cPickle.loads(zf.read('%d' % (i + FILE_START)))
        filtered = row_data[row_data['userid'] > col_start]
        filtered = filtered[filtered['userid'] < col_end]

        for uid, rating in filtered:
            uid -= col_start
            data[(offset, uid)] = rating

    util.log_info('Loaded: %s', ex)
    yield ex, data.tocoo()
Example #34
0
def load_netflix_mapper(inputs, ex, load_file=None):
  # first column will load all of the data
  row_start, row_end = ex.ul[0], ex.lr[0]
  col_start, col_end = ex.ul[1], ex.lr[1]
  
  data = scipy.sparse.dok_matrix(ex.shape, dtype=np.float)
  zf = zipfile.ZipFile(load_file, 'r', allowZip64=True)
  
  for i in range(row_start, row_end):
    offset = i - row_start
    row_data = cPickle.loads(zf.read('%d' % (i + FILE_START)))
    filtered = row_data[row_data['userid'] > col_start]
    filtered = filtered[filtered['userid'] < col_end]
    
    for uid, rating in filtered:
      uid -= col_start
      data[(offset, uid)] = rating
  
  util.log_info('Loaded: %s', ex)
  yield ex, data.tocoo()
Example #35
0
def compile_parakeet_source(src):
    '''Compile source code defining a parakeet function.'''
    util.log_debug('Compiling parakeet source.')
    tmpfile = tempfile.NamedTemporaryFile(delete=True,
                                          prefix='spartan-local-',
                                          suffix='.py')
    tmpfile.write(src)
    tmpfile.flush()

    #util.log_info('File: %s, Source: \n %s \n', tmpfile.name, src)

    #os.rename(tmpfile.name, srcfile)
    #atexit.register(lambda: os.remove(srcfile))

    try:
        module = imp.load_source('parakeet_temp', tmpfile.name)
    except Exception, ex:
        util.log_info('Failed to build parakeet wrapper')
        util.log_debug('Source was: %s', src)
        raise CodegenException(ex.message, ex.args)
Example #36
0
  def register(self, req, handle):
    '''
    RPC method.

    Register a new worker with the master.

    Args:
      req (RegisterReq):
      handle (PendingRequest):
    '''
    id = len(self._workers)
    self._workers[id] = rpc.connect(req.host, req.port)
    self._available_workers.append(id)
    util.log_info('Registered %s:%s (%d/%d)', req.host, req.port, id, self.num_workers)

    handle.done(core.EmptyMessage())

    self.init_worker_score(id, req.worker_status)

    if len(self._workers) == self.num_workers:
      threading.Thread(target=self._initialize).start()
Example #37
0
def benchmark_matmul(ctx, timer):
    N = int(1000 * math.pow(ctx.num_workers, 1.0 / 3.0))
    # N = 4000
    M = util.divup(N, ctx.num_workers)
    T = util.divup(N, math.sqrt(ctx.num_workers))

    util.log_info("Testing with %d workers, N = %d, tile_size=%s", ctx.num_workers, N, T)

    # x = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(N, M)))
    # y = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(N, M)))

    x = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(T, T)))
    y = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(T, T)))

    # print expr.glom(expr.dot(x, y))
    # print expr.dag(expr.dot(x, y))

    def _step():
        expr.evaluate(expr.dot(x, y))

    timer.time_op("matmul", _step)
def pagerankDistributed(ctx, numPage, numIters, alpha):
  sGenerate = time.time()
  rank = eager(expr.ones((numPage, 1), tile_hint = (numPage / ctx.num_workers, 1), dtype = np.float32))
  linkMatrix = eager(
              expr.shuffle(
                expr.ndarray(
                  (numPage, numPage),
                  dtype = np.float32,
                  tile_hint = (numPage, numPage / ctx.num_workers)),
              make_weights,
              ))
  eGenerate = time.time()
  util.log_info("**pagerank** rank init finished")
  startCompute = time.time()
  for i in range(numIters):
    #rank = ((1 - alpha) * expr.dot(linkMatrix, rank,tile_hint = (numPage, numPage/10))) + belta
    rank = expr.dot(linkMatrix, rank, tile_hint = (numPage, numPage/10))
  rank.evaluate()
  endCompute = time.time()
  util.log_info("**pagerank** compute finished")
  return (eGenerate - sGenerate, endCompute - startCompute)
Example #39
0
def predict_price(ask, bid, t):
  # element-wise difference 
  spread = ask - bid
  
  # element-wise average of ask and bid  
  midprice = (ask + bid) / 2
  
  # slices allow for cheaply extracting parts of an array
  d_spread = spread[t:] - spread[:-t]

  # find prices `t` steps in the future of d_spread
  d_spread = d_spread[:-t]
  future_price = midprice[2*t:]
 
  util.log_info('D: %s, M: %s', d_spread.shape, future_price.shape)

  # compute a univariate linear predictor
  regression = mean(future_price / d_spread)
  prediction = regression * d_spread
  
  error = mean(abs(prediction - future_price))
  return error 
Example #40
0
    def register(self, req, handle):
        '''
    RPC method.

    Register a new worker with the master.

    Args:
      req (RegisterReq):
      handle (PendingRequest):
    '''
        id = len(self._workers)
        self._workers[id] = rpc.connect(req.host, req.port)
        self._available_workers.append(id)
        util.log_info('Registered %s:%s (%d/%d)', req.host, req.port, id,
                      self.num_workers)

        handle.done(core.EmptyMessage())

        self.init_worker_score(id, req.worker_status)

        if len(self._workers) == self.num_workers:
            threading.Thread(target=self._initialize).start()
Example #41
0
def benchmark_matmul(ctx, timer):
    N = int(1000 * math.pow(ctx.num_workers, 1.0 / 3.0))
    #N = 4000
    M = util.divup(N, ctx.num_workers)
    T = util.divup(N, math.sqrt(ctx.num_workers))

    util.log_info('Testing with %d workers, N = %d, tile_size=%s',
                  ctx.num_workers, N, T)

    #x = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(N, M)))
    #y = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(N, M)))

    x = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(T, T)))
    y = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(T, T)))

    #print expr.glom(expr.dot(x, y))
    #print expr.dag(expr.dot(x, y))

    def _step():
        expr.evaluate(expr.dot(x, y))

    timer.time_op('matmul', _step)
Example #42
0
def test_convnet(ctx):
  hint = util.divup(64, sqrt(ctx.num_workers))

  images = expr.eager(expr.ones((N_IMGS,) + IMG_SIZE,
                                tile_hint=(N_IMGS, N_COLORS, hint, hint)))

  w1 = expr.eager(expr.ones((N_FILTERS, N_COLORS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))

  conv1 = stencil.stencil(images, w1, 2)
  pool1 = stencil.maxpool(conv1)

  w2 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))

  conv2 = stencil.stencil(pool1, w2, 2)
  pool2 = stencil.maxpool(conv2)

  w3 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))
  conv3 = stencil.stencil(pool2, w3, 2)
  pool3 = stencil.maxpool(conv3)

  util.log_info(pool3.shape)
Example #43
0
  def _evaluate(self, ctx, deps):
    V, M, U = deps['V'], deps['M'], deps['U']

    strata = _compute_strata(V)
    util.log_info('Start eval')
    
    for i, stratum in enumerate(strata):
      util.log_info('Processing stratum: %d of %d (size = %d)', i, len(strata), len(stratum))
      #for ex in stratum: print ex

      worklist = set(stratum)
      expr.shuffle(V, sgd_netflix_mapper,
                   kw={'V' : lazify(V), 'M' : lazify(M), 'U' : lazify(U),
                       'worklist' : worklist }).force()
                       
    util.log_info('Eval done.')
Example #44
0
def run(filename):
    signal.signal(signal.SIGQUIT, sig_handler)
    os.system('rm ./_worker_profiles/*')

    mod_name, _ = splitext(basename(filename))
    module = imp.load_source(mod_name, filename)
    util.log_info('Running benchmarks for module: %s (%s)', module, filename)
    benchmarks = [
        k for k in dir(module)
        if (k.startswith('benchmark_')
            and isinstance(getattr(module, k), types.FunctionType))
    ]

    spartan.config.parse(sys.argv)
    if benchmarks:
        # csv header
        print 'num_workers,bench,time'
        workers = [int(w) for w in FLAGS.worker_list.split(',')]

        for i in workers:
            # restart the cluster
            FLAGS.num_workers = i
            ctx = spartan.initialize()

            timer = BenchTimer(i)
            util.log_info('Running benchmarks on %d workers', i)
            if FLAGS.test_optimizations:
                timer.prefix = 'opt_enabled'
                FLAGS.optimization = 1
                run_benchmarks(module, benchmarks, ctx, timer)

            timer.prefix = 'opt_disabled'
            FLAGS.optimization = 1
            run_benchmarks(module, benchmarks, ctx, timer)

            spartan.shutdown()
            time.sleep(1)

    if FLAGS.profile_worker:
        util.log_info('Writing worker profiles...')
        join_profiles('./_worker_profiles')
Example #45
0
def run(filename):
  signal.signal(signal.SIGQUIT, sig_handler)
  os.system('rm ./_worker_profiles/*')

  mod_name, _ = splitext(basename(filename))
  module = imp.load_source(mod_name, filename)
  util.log_info('Running benchmarks for module: %s (%s)', module, filename)
  benchmarks = [k for k in dir(module) if (
             k.startswith('benchmark_') and 
             isinstance(getattr(module, k), types.FunctionType))
          ]

  spartan.config.parse(sys.argv)
  if benchmarks:
    # csv header
    print 'num_workers,bench,time'
    workers = [int(w) for w in FLAGS.worker_list.split(',')]
    
    for i in workers:
      # restart the cluster
      FLAGS.num_workers = i
      ctx = spartan.initialize()
      
      timer = BenchTimer(i)
      util.log_info('Running benchmarks on %d workers', i)
      if FLAGS.test_optimizations:
          timer.prefix = 'opt_enabled'
          FLAGS.optimization = 1
          run_benchmarks(module, benchmarks, ctx, timer)
          
      timer.prefix = 'opt_disabled'
      FLAGS.optimization = 0
      run_benchmarks(module, benchmarks, ctx, timer)

      spartan.shutdown()
      time.sleep(1)

  if FLAGS.profile_worker:
    util.log_info('Writing worker profiles...')
    join_profiles('./_worker_profiles')
Example #46
0
    def _evaluate(self, ctx, deps):
        V, M, U = deps['V'], deps['M'], deps['U']

        strata = _compute_strata(V)
        util.log_info('Start eval')

        for i, stratum in enumerate(strata):
            util.log_info('Processing stratum: %d of %d (size = %d)', i,
                          len(strata), len(stratum))
            #for ex in stratum: print ex

            worklist = set(stratum)
            expr.shuffle(V,
                         sgd_netflix_mapper,
                         kw={
                             'V': lazify(V),
                             'M': lazify(M),
                             'U': lazify(U),
                             'worklist': worklist
                         }).evaluate()

        util.log_info('Eval done.')
Example #47
0
def start_remote_worker(worker, st, ed):
    '''
  Start processes on a worker machine.

  The machine will launch worker processes ``st`` through ``ed``.

  :param worker: hostname to connect to.
  :param st: First process index to start.
  :param ed: Last process to start.
  '''
    if FLAGS.use_threads and worker == 'localhost':
        util.log_info('Using threads.')
        for i in range(st, ed):
            p = threading.Thread(target=spartan.worker._start_worker,
                                 args=((socket.gethostname(), FLAGS.port_base),
                                       i))
            p.daemon = True
            p.start()
        time.sleep(0.1)
        return

    util.log_info('Starting worker %d:%d on host %s', st, ed, worker)
    if FLAGS.oprofile:
        os.system('mkdir operf.%s' % worker)

    ssh_args = ['ssh', '-oForwardX11=no', worker]

    args = ['cd %s && ' % os.path.abspath(os.path.curdir)]

    if FLAGS.xterm:
        args += [
            'xterm',
            '-e',
        ]

    if FLAGS.oprofile:
        args += [
            'operf -e CPU_CLK_UNHALTED:100000000', '-g', '-d',
            'operf.%s' % worker
        ]

    args += [
        #'gdb', '-ex', 'run', '--args',
        'python',
        '-m spartan.worker',
        '--master=%s:%d' % (socket.gethostname(), FLAGS.port_base),
        '--count=%d' % (ed - st),
        '--heartbeat_interval=%d' % FLAGS.heartbeat_interval
    ]

    # add flags from config/user
    for (name, value) in FLAGS:
        if name in ['worker_list', 'print_options']: continue
        args += [repr(value)]

    #print >>sys.stderr, args
    util.log_debug('Running worker %s', ' '.join(args))
    time.sleep(0.1)
    if worker != 'localhost':
        p = subprocess.Popen(ssh_args + args, executable='ssh')
    else:
        p = subprocess.Popen(' '.join(args), shell=True, stdin=subprocess.PIPE)

    return p
Example #48
0
def add_one_extent(v, ex):
  result = v.fetch(ex) + 1
  util.log_info('AddOne: %s, %s', ex, result)
  yield (ex, result)
Example #49
0
def fuzzy_kmeans(points, k=10, num_iter=10, m=2.0, centers=None):
    '''
  clustering data points using fuzzy kmeans clustering method.

  Args:
    points(Expr or DistArray): the input data points matrix.
    k(int): the number of clusters.
    num_iter(int): the max iterations to run.
    m(float): the parameter of fuzzy kmeans.
    centers(Expr or DistArray): the initialized centers of each cluster.
  '''
    points = expr.force(points)
    num_dim = points.shape[1]
    if centers is None:
        centers = expr.rand(k, num_dim)

    labels = expr.zeros((points.shape[0], ), dtype=np.int)

    for iter in range(num_iter):
        centers = expr.as_array(centers)
        points_broadcast = expr.reshape(points,
                                        (points.shape[0], 1, points.shape[1]))
        centers_broadcast = expr.reshape(
            centers, (1, centers.shape[0], centers.shape[1]))
        distances = expr.sum(expr.square(points_broadcast - centers_broadcast),
                             axis=2)
        # This is used to avoid dividing zero
        distances = distances + 0.00000000001
        util.log_info('distances shape %s' % str(distances.shape))
        distances_broadcast = expr.reshape(
            distances, (distances.shape[0], 1, distances.shape[1]))
        distances_broadcast2 = expr.reshape(
            distances, (distances.shape[0], distances.shape[1], 1))
        prob = 1.0 / expr.sum(expr.power(
            distances_broadcast / distances_broadcast2, 2.0 / (m - 1)),
                              axis=2)
        prob.force()
        counts = expr.sum(prob, axis=0)
        counts = expr.reshape(counts, (counts.shape[0], 1))
        labels = expr.argmax(prob, axis=1)
        centers = expr.sum(
            expr.reshape(points, (points.shape[0], 1, points.shape[1])) *
            expr.reshape(prob, (prob.shape[0], prob.shape[1], 1)),
            axis=0)

        # We assume that the size of centers are relative small that can be handled
        # on the master.
        counts = counts.glom()
        centers = centers.glom()
        # If any centroids don't have any points assigned to them.
        zcount_indices = (counts == 0).reshape(k)

        if np.any(zcount_indices):
            # One or more centroids may not have any points assigned to them, which results in their
            # position being the zero-vector.  We reseed these centroids with new random values
            # and set their counts to 1 in order to get rid of dividing by zero.
            counts[zcount_indices, :] = 1
            centers[zcount_indices, :] = np.random.rand(
                np.count_nonzero(zcount_indices), num_dim)

        centers = centers / counts
    return labels
Example #50
0
 def profile2(self):
   self.create_path()
   t1 = expr.sparse_rand((10000, 10000)).evaluate()
   time_a, a = util.timeit(lambda: expr.save(t1, "fiotest3", self.test_dir, False))
   util.log_info('Save a %s sparse array in %s without zip', t1.shape, time_a)
   time_a, a = util.timeit(lambda: expr.load("fiotest3", self.test_dir, False).evaluate())
   util.log_info('Load a %s sparse array in %s without zip', t1.shape, time_a)
   time_a, a = util.timeit(lambda: expr.save(t1, "fiotest3", self.test_dir, True))
   util.log_info('Save a %s sparse array in %s with zip', t1.shape, time_a)
   time_a, a = util.timeit(lambda: expr.load("fiotest3", self.test_dir, True).evaluate())
   util.log_info('Load a %s sparse array in %s with zip', t1.shape, time_a)
   time_a, a = util.timeit(lambda: expr.pickle(t1, "fiotest4", self.test_dir, False))
   util.log_info('Pickle a %s sparse array in %s without zip', t1.shape, time_a)
   time_a, a = util.timeit(lambda: expr.unpickle("fiotest4", self.test_dir, False).evaluate())
   util.log_info('Unpickle a %s sparse array in %s without zip', t1.shape, time_a)
   time_a, a = util.timeit(lambda: expr.pickle(t1, "fiotest4", self.test_dir, True))
   util.log_info('Pickle a %s sparse array in %s with zip', t1.shape, time_a)
   time_a, a = util.timeit(lambda: expr.unpickle("fiotest4", self.test_dir, True).evaluate())
   util.log_info('Unpickle a %s sparse array in %s with zip', t1.shape, time_a)
Example #51
0
 def test_find_change(self):
     arr = expr.randn(100)
     movers = finance.find_change(arr)
     #util.log_info(optimize(movers))
     util.log_info(movers.glom())
Example #52
0
 def test_put(self):
     put, call = finance.black_scholes(self.current, self.strike, maturity,
                                       rate, volatility)
     #util.log_info(put)
     #util.log_info(optimize(put))
     util.log_info(put.glom())
Example #53
0
 def test_call(self):
     put, call = finance.black_scholes(self.current, self.strike, maturity,
                                       rate, volatility)
     #util.log_info(call)
     util.log_info(call.glom())
Example #54
0
def solve(A, AT, desired_rank, is_symmetric=False):
  '''
  A simple implementation of the Lanczos algorithm
  (http://en.wikipedia.org/wiki/Lanczos_algorithm) for eigenvalue computation.

  Like the Mahout implementation, only the matrix*vector step is parallelized.
  
  First we use lanczos method to turn the matrix into tridiagonoal form. Then
  we use numpy.linalg.eig function to extract the eigenvalues and eigenvectors 
  from the tridiagnonal matrix(desired_rank*desired_rank). Since desired_rank 
  should be smaller than the size of matrix, so we could it in local machine 
  efficiently. 
  '''
  # Calculate two more eigenvalues, but we only keep the largest desired_rank
  # one. Doing this to keep the result consistent with scipy.sparse.linalg.svds.
  desired_rank += 2

  n = A.shape[1]
  v_next = np.ones(n) / np.sqrt(n)
  v_prev = np.zeros(n)
  beta = np.zeros(desired_rank+1)
  beta[0] = 0
  alpha = np.zeros(desired_rank)

  # Since the disiredRank << size of matrix, so we keep
  # V in local memory for efficiency reason(It needs to be updated
  # for every iteration). 
  # If the case which V can't be fit in local memory occurs, 
  # you could turn it into spartan distributed array. 
  V = np.zeros((n, desired_rank))


  for i in range(0, desired_rank):
    util.log_info("Iter : %s", i)
    v_next_expr = expr.from_numpy(v_next.reshape(n, 1))

    if is_symmetric:
      w = expr.dot(A, v_next_expr).optimized().glom().reshape(n)
    else:
      w = expr.dot(A, v_next_expr)
      w = expr.dot(AT, w).optimized().glom().reshape(n)

    alpha[i] = np.dot(w, v_next)
    w = w - alpha[i] * v_next - beta[i] * v_prev
    
    # Orthogonalize:
    for t in range(i):
      tmpa = np.dot(w, V[:, t])
      if tmpa == 0.0:
        continue
      w -= tmpa * V[:, t] 

    beta[i+1] = np.linalg.norm(w, 2) 
    v_prev = v_next
    v_next = w / beta[i+1]
    V[:, i] = v_prev
  
  # Create tridiag matrix with size (desired_rank X desired_rank)  
  tridiag = np.diag(alpha)
  for i in range(0, desired_rank-1):
    tridiag[i, i+1] = beta[i+1] 
    tridiag[i+1, i] = beta[i+1]
  
  # Get eigenvectors and eigenvalues of this tridiagonal matrix.  
  # The eigenvalues of this tridiagnoal matrix equals to the eigenvalues
  # of matrix dot(A, A.T.). We can get the eigenvectors of dot(A, A.T) 
  # by multiplying V with eigenvectors of this tridiagonal matrix.
  d, v = np.linalg.eig(tridiag) 
  
  # Sort eigenvalues and their corresponding eigenvectors 
  sorted_idx = np.argsort(np.absolute(d))[::-1]
  d = d[sorted_idx]
  v = v[:, sorted_idx]
  
  # Get the eigenvetors of dot(A, A.T)
  s = np.dot(V, v)
  return d[0:desired_rank-2], s[:, 0:desired_rank-2] 
Example #55
0
def test_local_offset():
    a = extent.create((0, 0), (5, 5), None)
    b = extent.create((2, 2), (3, 3), None)
    util.log_info('%s', extent.offset_from(a, b))