Python log_info Examples, spartan.util.log_info Python Examples

Example #1

0

Show file

File: benchmark_convnet.py Project: EasonLiao/spartan

def benchmark_convnet(ctx, timer):
  image_size = BASE_IMG_SIZE
  minibatch = 64
  #minibatch = ctx.num_workers
  hint = util.divup(image_size, sqrt(ctx.num_workers))
  tile_hint = (util.divup(minibatch, ctx.num_workers), N_COLORS, image_size, image_size)
  util.log_info('Hint: %s', tile_hint)
    
  images = expr.eager(expr.ones((minibatch, N_COLORS, image_size, image_size),
                                tile_hint=tile_hint))
  
  w1 = expr.eager(expr.ones((N_FILTERS, N_COLORS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))
  w2 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))
  w3 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))
  
  def _():
    conv1 = stencil.stencil(images, w1, 2)
    pool1 = stencil.maxpool(conv1)
   
    conv2 = stencil.stencil(pool1, w2, 2)
    pool2 = stencil.maxpool(conv2)
    
    conv3 = stencil.stencil(pool2, w3, 2)
    pool3 = stencil.maxpool(conv3)
    
    expr.force(pool3)
 
  # force parakeet functions to compile before timing. 
  _()  
  for i in range(2):
    timer.time_op('convnet', _)

Example #2

0

Show file

File: backup-sssp.py Project: GatsbyNewton/graph-computation-benchmark

def shortestPath_np(dim, linkMatrix, dist):
	#linkMatrix, dist = init(dim, startVertex)
	for i in range(1000):
		util.log_info("%s", "enter")
		dist = (dist + linkMatrix).min(axis = 0).reshape(dim, 1)
		util.log_info("numItersation %s", i)
	return dist

Example #3

0

Show file

File: master.py Project: MaggieQi/spartan

 def mark_failed_worker(self, worker_id):
   util.log_info('Marking worker %s as failed.', worker_id)
   self._available_workers.remove(worker_id)
   for array in self._arrays:
     for ex, tile_id in array.tiles.iteritems():
       if tile_id.worker == worker_id:
         array.bad_tiles.append(ex)

Example #4

0

Show file

File: backup-bfs.py Project: GatsbyNewton/graph-computation-benchmark

def bfs(ctx, dim):
	util.log_info("start to computing......")

	sGenerate = time.time()
	current = eager(
			expr.shuffle(
				expr.ndarray(
					(dim, 1),
					dtype = np.int64,
					tile_hint = (dim / ctx.num_workers, 1)),
				make_current,
			))
	
	linkMatrix = eager(
				expr.shuffle(
					expr.ndarray(
					 (dim, dim),
					 dtype = np.int64,
					 tile_hint = (dim, dim / ctx.num_workers)),
				make_matrix,
				))
	eGenerate = time.time()

	startCompute = time.time()
	while(True):
		next = expr.dot(linkMatrix, current)
		formerNum = expr.count_nonzero(current)
		laterNum = expr.count_nonzero(next)
		hasNew = expr.equal(formerNum, laterNum).glom()
		current = next
		if (hasNew):
			break
	current.evaluate()
	endCompute = time.time()
	return (eGenerate - sGenerate, endCompute - startCompute)

Example #5

0

Show file

File: test_newaxis.py Project: GabrielWen/spartan

  def test_newaxis(self):
    na = np.arange(100).reshape(10, 10)
    a = expr.from_numpy(na)

    Assert.all_eq(na[np.newaxis, 2:7, 4:8].shape,
                  a[expr.newaxis,2:7, 4:8].shape)

    Assert.all_eq(na[np.newaxis, 2:7, np.newaxis, 4:8].shape,
                  a[expr.newaxis,2:7, expr.newaxis, 4:8].shape)

    Assert.all_eq(na[np.newaxis, 2:7, np.newaxis, 4:8, np.newaxis].shape,
                  a[expr.newaxis,2:7, expr.newaxis, 4:8, expr.newaxis].shape)

    #Extreme case
    Assert.all_eq(na[np.newaxis, np.newaxis, np.newaxis, np.newaxis, 2:7, 
		  np.newaxis, np.newaxis, np.newaxis, 4:8, np.newaxis, 
		  np.newaxis, np.newaxis].shape,

                  a[expr.newaxis, expr.newaxis, expr.newaxis, expr.newaxis,
                  2:7, expr.newaxis, expr.newaxis, expr.newaxis, 4:8,
                  expr.newaxis, expr.newaxis, expr.newaxis].shape)

    util.log_info('\na.shape:  %s  \nna.shape: %s',
		  a[expr.newaxis,2:7, expr.newaxis, 4:8, expr.newaxis,
			expr.newaxis, expr.newaxis].shape,

                  na[np.newaxis, 2:7, np.newaxis, 4:8, np.newaxis,
			np.newaxis, np.newaxis].shape)

Example #6

0

Show file

 def _(axis):
     util.log_info('Testing sum over axis %s', axis)
     a = expr.ones((TEST_SIZE, TEST_SIZE)) + expr.ones(
         (TEST_SIZE, TEST_SIZE))
     b = a.sum(axis=axis)
     Assert.all_eq(b.glom(), 2 * np.ones(
         (TEST_SIZE, TEST_SIZE)).sum(axis))

Example #7

0

Show file

def benchmark_convnet(ctx, timer):
  image_size = BASE_IMG_SIZE
  minibatch = 64
  #minibatch = ctx.num_workers
  hint = util.divup(image_size, sqrt(ctx.num_workers))
  tile_hint = (util.divup(minibatch, ctx.num_workers), N_COLORS, image_size, image_size)
  util.log_info('Hint: %s', tile_hint)

  images = expr.eager(expr.ones((minibatch, N_COLORS, image_size, image_size),
                                tile_hint=tile_hint))

  w1 = expr.eager(expr.ones((N_FILTERS, N_COLORS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))
  w2 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))
  w3 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))

  def _():
    conv1 = stencil.stencil(images, w1, 2)
    pool1 = stencil.maxpool(conv1)

    conv2 = stencil.stencil(pool1, w2, 2)
    pool2 = stencil.maxpool(conv2)

    conv3 = stencil.stencil(pool2, w3, 2)
    pool3 = stencil.maxpool(conv3)

    pool3.evaluate()

  # force parakeet functions to compile before timing.
  _()
  for i in range(2):
    timer.time_op('convnet', _)

Example #8

0

Show file

    def test_newaxis(self):
        na = np.arange(100).reshape(10, 10)
        a = expr.from_numpy(na)

        Assert.all_eq(na[np.newaxis, 2:7, 4:8].shape, a[expr.newaxis, 2:7,
                                                        4:8].shape)

        Assert.all_eq(na[np.newaxis, 2:7, np.newaxis, 4:8].shape,
                      a[expr.newaxis, 2:7, expr.newaxis, 4:8].shape)

        Assert.all_eq(
            na[np.newaxis, 2:7, np.newaxis, 4:8, np.newaxis].shape,
            a[expr.newaxis, 2:7, expr.newaxis, 4:8, expr.newaxis].shape)

        #Extreme case
        Assert.all_eq(
            na[np.newaxis, np.newaxis, np.newaxis, np.newaxis, 2:7, np.newaxis,
               np.newaxis, np.newaxis, 4:8, np.newaxis, np.newaxis,
               np.newaxis].shape,
            a[expr.newaxis, expr.newaxis, expr.newaxis, expr.newaxis, 2:7,
              expr.newaxis, expr.newaxis, expr.newaxis, 4:8, expr.newaxis,
              expr.newaxis, expr.newaxis].shape)

        util.log_info(
            '\na.shape:  %s  \nna.shape: %s',
            a[expr.newaxis, 2:7, expr.newaxis, 4:8, expr.newaxis, expr.newaxis,
              expr.newaxis].shape, na[np.newaxis, 2:7, np.newaxis, 4:8,
                                      np.newaxis, np.newaxis,
                                      np.newaxis].shape)

Example #9

0

Show file

 def mark_failed_worker(self, worker_id):
     util.log_info('Marking worker %s as failed.', worker_id)
     self._available_workers.remove(worker_id)
     for array in self._arrays:
         for ex, tile_id in array.tiles.iteritems():
             if tile_id.worker == worker_id:
                 array.bad_tiles.append(ex)

Example #10

0

Show file

File: test_convnet.py Project: rossparks/spartan

def test_convnet(ctx):
    hint = util.divup(64, sqrt(ctx.num_workers))

    images = expr.eager(
        expr.ones((N_IMGS, ) + IMG_SIZE,
                  tile_hint=(N_IMGS, N_COLORS, hint, hint)))

    w1 = expr.eager(
        expr.ones((N_FILTERS, N_COLORS) + FILTER_SIZE, tile_hint=ONE_TILE))

    conv1 = stencil.stencil(images, w1, 2)
    pool1 = stencil.maxpool(conv1)

    w2 = expr.eager(
        expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE, tile_hint=ONE_TILE))

    conv2 = stencil.stencil(pool1, w2, 2)
    pool2 = stencil.maxpool(conv2)

    w3 = expr.eager(
        expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE, tile_hint=ONE_TILE))
    conv3 = stencil.stencil(pool2, w3, 2)
    pool3 = stencil.maxpool(conv3)

    util.log_info(pool3.shape)

Example #11

0

Show file

File: cluster.py Project: GabrielWen/spartan

def start_remote_worker(worker, st, ed):
    """
  Start processes on a worker machine.

  The machine will launch worker processes ``st`` through ``ed``.

  :param worker: hostname to connect to.
  :param st: First process index to start.
  :param ed: Last process to start.
  """
    if FLAGS.use_threads and worker == "localhost":
        util.log_info("Using threads.")
        for i in range(st, ed):
            p = threading.Thread(target=spartan.worker._start_worker, args=((socket.gethostname(), FLAGS.port_base), i))
            p.daemon = True
            p.start()
        time.sleep(0.1)
        return

    util.log_info("Starting worker %d:%d on host %s", st, ed, worker)
    if FLAGS.oprofile:
        os.system("mkdir operf.%s" % worker)

    ssh_args = ["ssh", "-oForwardX11=no", worker]

    args = ["cd %s && " % os.path.abspath(os.path.curdir)]

    if FLAGS.xterm:
        args += ["xterm", "-e"]

    if FLAGS.oprofile:
        args += ["operf -e CPU_CLK_UNHALTED:100000000", "-g", "-d", "operf.%s" % worker]

    args += [
        #'gdb', '-ex', 'run', '--args',
        "python",
        "-m spartan.worker",
        "--master=%s:%d" % (socket.gethostname(), FLAGS.port_base),
        "--count=%d" % (ed - st),
        "--heartbeat_interval=%d" % FLAGS.heartbeat_interval,
    ]

    # add flags from config/user
    for (name, value) in FLAGS:
        if name in ["worker_list", "print_options"]:
            continue
        args += [repr(value)]

    # print >>sys.stderr, args
    util.log_debug("Running worker %s", " ".join(args))
    time.sleep(0.1)
    # TODO: improve this to make log break at newline
    if worker != "localhost":
        p = subprocess.Popen(ssh_args + args, executable="ssh")
    else:
        p = subprocess.Popen(" ".join(args), shell=True, stdin=subprocess.PIPE)

    return p

Example #12

0

Show file

File: simple_svm.py Project: EasonLiao/spartan

  def train_smo_1998(self, data, labels):
    '''
    Train an SVM model using the SMO (1998) algorithm.
   
    Args:
      data(Expr): points to be trained
      labels(Expr): the correct labels of the training data
    '''
    
    N = data.shape[0] # Number of instances
    D = data.shape[1]  # Number of features

    self.b = 0.0
    self.alpha = expr.zeros((N,1), dtype=np.float64, tile_hint=[N/self.ctx.num_workers, 1]).force()
    
    # linear kernel
    kernel_results = expr.dot(data, expr.transpose(data), tile_hint=[N/self.ctx.num_workers, N])   
    
    labels = expr.force(labels)
    self.E = expr.zeros((N,1), dtype=np.float64, tile_hint=[N/self.ctx.num_workers, 1]).force()
    for i in xrange(N):
      self.E[i, 0] = self.b + expr.reduce(self.alpha, axis=None, dtype_fn=lambda input: input.dtype,
                                          local_reduce_fn=margin_mapper,
                                          accumulate_fn=np.add, 
                                          fn_kw=dict(label=labels, data=kernel_results[:,i].force())).glom() - labels[i, 0]
    
    util.log_info("Starting SMO")
    it = 0
    num_changed = 0
    examine_all = True
    while (num_changed > 0 or examine_all) and (it < self.maxiter):
      util.log_info("Iteration:%d", it)

      num_changed = 0
      
      if examine_all:
        for i in xrange(N): 
          num_changed += self.examine_example(i, N, labels, kernel_results)
      else:
        for i in xrange(N):
          if self.alpha[i, 0] > 0 and self.alpha[i, 0] < self.C:
            num_changed += self.examine_example(i, N, labels, kernel_results)

      it += 1

      if examine_all: examine_all = False
      elif num_changed == 0: examine_all = True
    
    self.w = expr.zeros((D, 1), dtype=np.float64).force()
    for i in xrange(D): 
      self.w[i,0] = expr.reduce(self.alpha, axis=None, dtype_fn=lambda input: input.dtype,
                              local_reduce_fn=margin_mapper,
                              accumulate_fn=np.add, 
                              fn_kw=dict(label=labels, data=expr.force(data[:,i]))).glom()
    self.usew_ = True
    print 'iteration finish:', it
    print 'b:', self.b
    print 'w:', self.w.glom()

Example #13

0

Show file

File: fuzzy_kmeans.py Project: rgardner/spartan

def fuzzy_kmeans(points, k=10, num_iter=10, m=2.0, centers=None):
  '''
  clustering data points using fuzzy kmeans clustering method.

  Args:
    points(Expr or DistArray): the input data points matrix.
    k(int): the number of clusters.
    num_iter(int): the max iterations to run.
    m(float): the parameter of fuzzy kmeans.
    centers(Expr or DistArray): the initialized centers of each cluster.
  '''
  points = expr.force(points)
  num_dim = points.shape[1]
  if centers is None:
      centers = expr.rand(k, num_dim)

  labels = expr.zeros((points.shape[0],), dtype=np.int)

  for iter in range(num_iter):
    centers = expr.as_array(centers)
    points_broadcast = expr.reshape(points, (points.shape[0], 1, points.shape[1]))
    centers_broadcast = expr.reshape(centers, (1, centers.shape[0], centers.shape[1]))
    distances = expr.sum(expr.square(points_broadcast - centers_broadcast), axis=2)
    # This is used to avoid dividing zero
    distances = distances + 0.00000000001
    util.log_info('distances shape %s' % str(distances.shape))
    distances_broadcast = expr.reshape(distances, (distances.shape[0], 1,
                                                   distances.shape[1]))
    distances_broadcast2 = expr.reshape(distances, (distances.shape[0],
                                                    distances.shape[1], 1))
    prob = 1.0 / expr.sum(expr.power(distances_broadcast / distances_broadcast2,
                                     2.0 / (m - 1)), axis=2)
    prob.force()
    counts = expr.sum(prob, axis=0)
    counts = expr.reshape(counts, (counts.shape[0], 1))
    labels = expr.argmax(prob, axis=1)
    centers = expr.sum(expr.reshape(points, (points.shape[0], 1, points.shape[1])) *
                       expr.reshape(prob, (prob.shape[0], prob.shape[1], 1)),
                       axis=0)

    # We assume that the size of centers are relative small that can be handled
    # on the master.
    counts = counts.glom()
    centers = centers.glom()
    # If any centroids don't have any points assigned to them.
    zcount_indices = (counts == 0).reshape(k)

    if np.any(zcount_indices):
      # One or more centroids may not have any points assigned to them, which results in their
      # position being the zero-vector.  We reseed these centroids with new random values
      # and set their counts to 1 in order to get rid of dividing by zero.
      counts[zcount_indices, :] = 1
      centers[zcount_indices, :] = np.random.rand(np.count_nonzero(zcount_indices),
                                                  num_dim)

    centers = centers / counts
  return labels

Example #14

0

Show file

File: cluster.py Project: MaggieQi/spartan

def start_remote_worker(worker, st, ed):
  '''
  Start processes on a worker machine.

  The machine will launch worker processes ``st`` through ``ed``.

  :param worker: hostname to connect to.
  :param st: First process index to start.
  :param ed: Last process to start.
  '''
  if FLAGS.use_threads and worker == 'localhost':
    util.log_info('Using threads.')
    for i in range(st, ed):
      p = threading.Thread(target=spartan.worker._start_worker,
                           args=((socket.gethostname(), FLAGS.port_base), i))
      p.daemon = True
      p.start()
    time.sleep(0.1)
    return

  util.log_info('Starting worker %d:%d on host %s', st, ed, worker)
  if FLAGS.oprofile:
    os.system('mkdir operf.%s' % worker)

  ssh_args = ['ssh', '-oForwardX11=no', worker ]

  args = ['cd %s && ' % os.path.abspath(os.path.curdir)]

  if FLAGS.xterm:
    args += ['xterm', '-e',]

  if FLAGS.oprofile:
    args += ['operf -e CPU_CLK_UNHALTED:100000000', '-g', '-d', 'operf.%s' % worker]

  args += [
          #'gdb', '-ex', 'run', '--args',
          'python', '-m spartan.worker',
          '--master=%s:%d' % (socket.gethostname(), FLAGS.port_base),
          '--count=%d' % (ed - st),
          '--heartbeat_interval=%d' % FLAGS.heartbeat_interval
          ]

  # add flags from config/user
  for (name, value) in FLAGS:
    if name in ['worker_list', 'print_options']: continue
    args += [repr(value)]

  #print >>sys.stderr, args
  util.log_debug('Running worker %s', ' '.join(args))
  time.sleep(0.1)
  if worker != 'localhost':
    p = subprocess.Popen(ssh_args + args, executable='ssh')
  else:
    p = subprocess.Popen(' '.join(args), shell=True, stdin=subprocess.PIPE)

  return p

Example #15

0

Show file

File: backup-bfs.py Project: GatsbyNewton/graph-computation-benchmark

def make_current(tile, ex):
	util.log_info("start to creatting")
	ul = ex.ul
	lr = ex.lr
	dim = ex.shape[0]

	current = np.zeros((dim, 1), dtype = np.int64)
	if(ul[0] <= startVertex <= lr[0]):
		current[startVertex, 0] = 1
	return [(ex, current)]

Example #16

0

Show file

File: test_newaxis.py Project: GabrielWen/spartan

  def test_del_dim(self):
    na = np.arange(100).reshape(10, 10)
    a = expr.from_numpy(na)

    Assert.all_eq(na[2:7, 8], a[2:7, 8].glom())
    Assert.all_eq(na[3:9, 4].shape, a[3:9, 4].shape)

    Assert.all_eq(na[2:7, -1], a[2:7, -1].glom())
    Assert.all_eq(na[-1, 3:9].shape, a[-1, 3:9].shape)

    util.log_info('\na.shape: %s \nna.shape %s', a[3:9, 4].shape, na[3:9, 4].shape)

Example #17

0

Show file

File: zeromq.py Project: muddimedia/spartan-1

 def bind(self):
   host, port = self.addr
   host = socket.gethostbyname(host)
   util.log_debug('Binding... %s', (host, port))
   if port == -1:
     self.addr = (host, self._zmq.bind_to_random_port('tcp://%s' % host))
   else:
     try:
       self._zmq.bind('tcp://%s:%d' % (host, port))
     except zmq.ZMQError:
       util.log_info('Failed to bind (%s, %d)' % (host, port))
       raise

Example #18

0

Show file

    def test_del_dim(self):
        na = np.arange(100).reshape(10, 10)
        a = expr.from_numpy(na)

        Assert.all_eq(na[2:7, 8], a[2:7, 8].glom())
        Assert.all_eq(na[3:9, 4].shape, a[3:9, 4].shape)

        Assert.all_eq(na[2:7, -1], a[2:7, -1].glom())
        Assert.all_eq(na[-1, 3:9].shape, a[-1, 3:9].shape)

        util.log_info('\na.shape: %s \nna.shape %s', a[3:9, 4].shape,
                      na[3:9, 4].shape)

Example #19

0

Show file

File: test_sort.py Project: MaggieQi/spartan

  def test_ndimension(self):
    for case in xrange(5):
      dim = np.random.randint(low=2, high=6)
      shape = np.random.randint(low=5, high=11, size=dim)
      util.log_info('Test Case #%s: DIM(%s) shape%s', case + 1, dim, shape)

      na = new_ndarray(shape)
      a = expr.from_numpy(na)

      for axis in xrange(dim):
        Assert.all_eq(expr.sort(a, axis).glom(),
                      np.sort(na, axis))
        Assert.all_eq(expr.argsort(a, axis).glom(),
                      np.argsort(na, axis))

Example #20

0

Show file

File: netflix.py Project: EasonLiao/spartan

def fake_netflix_mapper(inputs, ex, p_rating=None):
  '''
  Create "Netflix-like" data for the given extent.
  
  :param p_rating: Sparsity factor (probability a given cell will have a rating)
  '''
  n_ratings = int(max(1, ex.size * p_rating))
  
  uids = np.random.randint(0, ex.shape[0], n_ratings)
  mids = np.random.randint(0, ex.shape[1], n_ratings)
  ratings = np.random.randint(0, 5, n_ratings).astype(np.float32)

  util.log_info('%s %s %s %s', ex, p_rating, ex.size, len(ratings))

  data = scipy.sparse.coo_matrix((ratings, (uids, mids)), shape=ex.shape)
  yield ex, data

Example #21

0

Show file

File: test_jacobi.py Project: muddimedia/spartan-1

def benchmark_jacobi(ctx, timer):
  global base, ITERATION
  util.log_warn('util.log_warn: %s', ctx.num_workers)

  A, b = jacobi.jacobi_init(base * ctx.num_workers)
  A, b = A.evaluate(), b.evaluate()

  start = time.time()

  result = jacobi.jacobi_method(A, b, ITERATION).glom()

  cost = time.time() - start

  util.log_info('\nresult =\n%s', result)
  util.log_warn('time cost: %s s', cost)
  util.log_warn('cost per iteration: %s s\n', cost / ITERATION)

Example #22

0

Show file

File: master.py Project: GabrielWen/spartan

    def _initialize(self):
        """Sends an initialization request to all workers and waits
    for their response.
    """
        util.log_info("Initializing...")
        req = core.InitializeReq(peers=dict([(id, w.addr()) for id, w in self._workers.iteritems()]))

        futures = rpc.FutureGroup()
        for id, w in self._workers.iteritems():
            req.id = id
            futures.append(w.initialize(req))
        futures.wait()

        self._ctx = blob_ctx.BlobCtx(blob_ctx.MASTER_ID, self._workers, self)
        self._initialized = True
        util.log_info("done...")

Example #23

0

Show file

    def shutdown(self):
        '''Shutdown all workers and halt.'''
        if self._ctx.active is False:
            return

        self._ctx.active = False

        futures = rpc.FutureGroup()
        for id, w in self._workers.iteritems():
            util.log_info('Shutting down worker %d', id)
            futures.append(w.shutdown())

        # Wait a second to let our shutdown request go out.
        time.sleep(1)

        self._server.shutdown()

Example #24

0

Show file

def fake_netflix_mapper(inputs, ex, p_rating=None):
    '''
  Create "Netflix-like" data for the given extent.

  :param p_rating: Sparsity factor (probability a given cell will have a rating)
  '''
    n_ratings = int(max(1, ex.size * p_rating))

    uids = np.random.randint(0, ex.shape[0], n_ratings)
    mids = np.random.randint(0, ex.shape[1], n_ratings)
    ratings = np.random.randint(0, 5, n_ratings).astype(np.float32)

    util.log_info('%s %s %s %s', ex, p_rating, ex.size, len(ratings))

    data = scipy.sparse.coo_matrix((ratings, (uids, mids)), shape=ex.shape)
    yield ex, data

Example #25

0

Show file

File: test_newaxis.py Project: GabrielWen/spartan

  def test_combo(self):
    na = np.arange(100).reshape(10, 10)
    a = expr.from_numpy(na)

    Assert.all_eq(na[np.newaxis, 2:7, 4],
                  a[expr.newaxis, 2:7, 4].glom())
    Assert.all_eq(na[2:7, np.newaxis, -1],
                  a[2:7, expr.newaxis, -1].glom())
    Assert.all_eq(na[-1, np.newaxis, 2:7],
                  a[-1, expr.newaxis, 2:7].glom())
    Assert.all_eq(na[np.newaxis, 2:7, np.newaxis, np.newaxis, 4, np.newaxis, np.newaxis],
                  a[expr.newaxis, 2:7, expr.newaxis, expr.newaxis, 4, expr.newaxis, expr.newaxis].glom())

    util.log_info('\na.shape:  %s \nna.shape: %s',
		a[expr.newaxis, 2:7, expr.newaxis, expr.newaxis, -1, expr.newaxis, expr.newaxis].shape,
                na[np.newaxis, 2:7, np.newaxis, np.newaxis, -1, np.newaxis, np.newaxis].shape)

Example #26

0

Show file

File: master.py Project: MaggieQi/spartan

  def shutdown(self):
    '''Shutdown all workers and halt.'''
    if self._ctx.active is False:
      return

    self._ctx.active = False

    futures = rpc.FutureGroup()
    for id, w in self._workers.iteritems():
      util.log_info('Shutting down worker %d', id)
      futures.append(w.shutdown())

    # Wait a second to let our shutdown request go out.
    time.sleep(1)

    self._server.shutdown()

Example #27

0

Show file

    def test_combo(self):
        na = np.arange(100).reshape(10, 10)
        a = expr.from_numpy(na)

        Assert.all_eq(na[np.newaxis, 2:7, 4], a[expr.newaxis, 2:7, 4].glom())
        Assert.all_eq(na[2:7, np.newaxis, -1], a[2:7, expr.newaxis, -1].glom())
        Assert.all_eq(na[-1, np.newaxis, 2:7], a[-1, expr.newaxis, 2:7].glom())
        Assert.all_eq(
            na[np.newaxis, 2:7, np.newaxis, np.newaxis, 4, np.newaxis,
               np.newaxis], a[expr.newaxis, 2:7, expr.newaxis, expr.newaxis, 4,
                              expr.newaxis, expr.newaxis].glom())

        util.log_info(
            '\na.shape:  %s \nna.shape: %s',
            a[expr.newaxis, 2:7, expr.newaxis, expr.newaxis, -1, expr.newaxis,
              expr.newaxis].shape, na[np.newaxis, 2:7, np.newaxis, np.newaxis,
                                      -1, np.newaxis, np.newaxis].shape)

Example #28

0

Show file

def _build_mapper(ex,
                  task_array,
                  target_array,
                  X,
                  y,
                  criterion,
                  max_depth,
                  min_samples_split,
                  min_samples_leaf,
                  max_features,
                  bootstrap):
  """
  Mapper kernel for building a random forest classifier.

  Each kernel instance fetches the entirety of the feature and prediction
  (X and y) arrays, and invokes sklearn to create a local random forest classifier
  which may has more than one tree.

  The criterion, max_depth, min_samples_split, min_samples_leaf,
  max_features and bootstrap options are passed to the `sklearn.RandomForest` method.
  """
  # The number of rows decides how many trees this kernel will build.
  st = time.time()
  idx = ex.ul[0]
  # Get the number of trees this worker needs to train.
  n_estimators = task_array[idx]
  X = X.glom()
  y = y.glom()

  rf = SKRF(n_estimators = n_estimators,
                           criterion = criterion,
                           max_depth = max_depth,
                           n_jobs = 1,
                           min_samples_split = min_samples_split,
                           min_samples_leaf = min_samples_leaf,
                           max_features = max_features,
                           bootstrap = bootstrap)

  rf.fit(X, y)
  # Update the target array.
  target_array[idx, :] = (rf,)

  result = core.LocalKernelResult()
  result.result = None
  util.log_info("Finish construction : %s", time.time() - st)
  return result

Example #29

0

Show file

File: local.py Project: MaggieQi/spartan

def compile_parakeet_source(src):
  '''Compile source code defining a parakeet function.'''
  util.log_debug('Compiling parakeet source.')
  tmpfile = tempfile.NamedTemporaryFile(delete=True, prefix='spartan-local-', suffix='.py')
  tmpfile.write(src)
  tmpfile.flush()

  #util.log_info('File: %s, Source: \n %s \n', tmpfile.name, src)

  #os.rename(tmpfile.name, srcfile)
  #atexit.register(lambda: os.remove(srcfile))

  try:
    module = imp.load_source('parakeet_temp', tmpfile.name)
  except Exception, ex:
    util.log_info('Failed to build parakeet wrapper')
    util.log_debug('Source was: %s', src)
    raise CodegenException(ex.message, ex.args)

Example #30

0

Show file

    def _initialize(self):
        '''Sends an initialization request to all workers and waits
    for their response.
    '''
        util.log_info('Initializing...')
        req = core.InitializeReq(
            peers=dict([(id, w.addr())
                        for id, w in self._workers.iteritems()]))

        futures = rpc.FutureGroup()
        for id, w in self._workers.iteritems():
            req.id = id
            futures.append(w.initialize(req))
        futures.wait()

        self._ctx = blob_ctx.BlobCtx(blob_ctx.MASTER_ID, self._workers, self)
        self._initialized = True
        util.log_info('done...')

Example #31

0

Show file

File: benchmark_pagerank.py Project: EasonLiao/spartan

def benchmark_pagerank(ctx, timer):
  num_pages = PAGES_PER_WORKER * ctx.num_workers
  util.log_info('Total pages: %s', num_pages)

  wts = eager(
    expr.shuffle(
      expr.ndarray(
        (num_pages, num_pages), 
        dtype=np.float32,
        tile_hint=(num_pages, PAGES_PER_WORKER / 8)),
      make_weights,
    ))

  p = eager(expr.ones((num_pages, 1), 
                      tile_hint=(PAGES_PER_WORKER / 8, 1), 
                      dtype=np.float32))

  for i in range(3):
    timer.time_op('pagerank', lambda: expr.dot(wts, p).force())

Example #32

0

Show file

File: benchmark_pagerank.py Project: rossparks/spartan

def benchmark_pagerank(ctx, timer):
    num_pages = PAGES_PER_WORKER * ctx.num_workers
    util.log_info('Total pages: %s', num_pages)

    wts = eager(
        expr.shuffle(
            expr.ndarray((num_pages, num_pages),
                         dtype=np.float32,
                         tile_hint=(num_pages, PAGES_PER_WORKER / 8)),
            make_weights,
        ))

    p = eager(
        expr.ones((num_pages, 1),
                  tile_hint=(PAGES_PER_WORKER / 8, 1),
                  dtype=np.float32))

    for i in range(3):
        timer.time_op('pagerank', lambda: expr.dot(wts, p).force())

Example #33

0

Show file

def load_netflix_mapper(inputs, ex, load_file=None):
    # first column will load all of the data
    row_start, row_end = ex.ul[0], ex.lr[0]
    col_start, col_end = ex.ul[1], ex.lr[1]

    data = scipy.sparse.dok_matrix(ex.shape, dtype=np.float)
    zf = zipfile.ZipFile(load_file, 'r', allowZip64=True)

    for i in range(row_start, row_end):
        offset = i - row_start
        row_data = cPickle.loads(zf.read('%d' % (i + FILE_START)))
        filtered = row_data[row_data['userid'] > col_start]
        filtered = filtered[filtered['userid'] < col_end]

        for uid, rating in filtered:
            uid -= col_start
            data[(offset, uid)] = rating

    util.log_info('Loaded: %s', ex)
    yield ex, data.tocoo()

Example #34

0

Show file

File: netflix.py Project: EasonLiao/spartan

def load_netflix_mapper(inputs, ex, load_file=None):
  # first column will load all of the data
  row_start, row_end = ex.ul[0], ex.lr[0]
  col_start, col_end = ex.ul[1], ex.lr[1]
  
  data = scipy.sparse.dok_matrix(ex.shape, dtype=np.float)
  zf = zipfile.ZipFile(load_file, 'r', allowZip64=True)
  
  for i in range(row_start, row_end):
    offset = i - row_start
    row_data = cPickle.loads(zf.read('%d' % (i + FILE_START)))
    filtered = row_data[row_data['userid'] > col_start]
    filtered = filtered[filtered['userid'] < col_end]
    
    for uid, rating in filtered:
      uid -= col_start
      data[(offset, uid)] = rating
  
  util.log_info('Loaded: %s', ex)
  yield ex, data.tocoo()

Example #35

0

Show file

File: local.py Project: muddimedia/spartan-1

def compile_parakeet_source(src):
    '''Compile source code defining a parakeet function.'''
    util.log_debug('Compiling parakeet source.')
    tmpfile = tempfile.NamedTemporaryFile(delete=True,
                                          prefix='spartan-local-',
                                          suffix='.py')
    tmpfile.write(src)
    tmpfile.flush()

    #util.log_info('File: %s, Source: \n %s \n', tmpfile.name, src)

    #os.rename(tmpfile.name, srcfile)
    #atexit.register(lambda: os.remove(srcfile))

    try:
        module = imp.load_source('parakeet_temp', tmpfile.name)
    except Exception, ex:
        util.log_info('Failed to build parakeet wrapper')
        util.log_debug('Source was: %s', src)
        raise CodegenException(ex.message, ex.args)

Example #36

0

Show file

File: master.py Project: MaggieQi/spartan

  def register(self, req, handle):
    '''
    RPC method.

    Register a new worker with the master.

    Args:
      req (RegisterReq):
      handle (PendingRequest):
    '''
    id = len(self._workers)
    self._workers[id] = rpc.connect(req.host, req.port)
    self._available_workers.append(id)
    util.log_info('Registered %s:%s (%d/%d)', req.host, req.port, id, self.num_workers)

    handle.done(core.EmptyMessage())

    self.init_worker_score(id, req.worker_status)

    if len(self._workers) == self.num_workers:
      threading.Thread(target=self._initialize).start()

Example #37

0

Show file

File: benchmark_dot.py Project: GabrielWen/spartan

def benchmark_matmul(ctx, timer):
    N = int(1000 * math.pow(ctx.num_workers, 1.0 / 3.0))
    # N = 4000
    M = util.divup(N, ctx.num_workers)
    T = util.divup(N, math.sqrt(ctx.num_workers))

    util.log_info("Testing with %d workers, N = %d, tile_size=%s", ctx.num_workers, N, T)

    # x = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(N, M)))
    # y = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(N, M)))

    x = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(T, T)))
    y = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(T, T)))

    # print expr.glom(expr.dot(x, y))
    # print expr.dag(expr.dot(x, y))

    def _step():
        expr.evaluate(expr.dot(x, y))

    timer.time_op("matmul", _step)

Example #38

0

Show file

File: pagerank.py Project: GatsbyNewton/graph-computation-benchmark

def pagerankDistributed(ctx, numPage, numIters, alpha):
  sGenerate = time.time()
  rank = eager(expr.ones((numPage, 1), tile_hint = (numPage / ctx.num_workers, 1), dtype = np.float32))
  linkMatrix = eager(
              expr.shuffle(
                expr.ndarray(
                  (numPage, numPage),
                  dtype = np.float32,
                  tile_hint = (numPage, numPage / ctx.num_workers)),
              make_weights,
              ))
  eGenerate = time.time()
  util.log_info("**pagerank** rank init finished")
  startCompute = time.time()
  for i in range(numIters):
    #rank = ((1 - alpha) * expr.dot(linkMatrix, rank,tile_hint = (numPage, numPage/10))) + belta
    rank = expr.dot(linkMatrix, rank, tile_hint = (numPage, numPage/10))
  rank.evaluate()
  endCompute = time.time()
  util.log_info("**pagerank** compute finished")
  return (eGenerate - sGenerate, endCompute - startCompute)

Example #39

0

Show file

def predict_price(ask, bid, t):
  # element-wise difference 
  spread = ask - bid
  
  # element-wise average of ask and bid  
  midprice = (ask + bid) / 2
  
  # slices allow for cheaply extracting parts of an array
  d_spread = spread[t:] - spread[:-t]

  # find prices `t` steps in the future of d_spread
  d_spread = d_spread[:-t]
  future_price = midprice[2*t:]
 
  util.log_info('D: %s, M: %s', d_spread.shape, future_price.shape)

  # compute a univariate linear predictor
  regression = mean(future_price / d_spread)
  prediction = regression * d_spread
  
  error = mean(abs(prediction - future_price))
  return error

Example #40

0

Show file

    def register(self, req, handle):
        '''
    RPC method.

    Register a new worker with the master.

    Args:
      req (RegisterReq):
      handle (PendingRequest):
    '''
        id = len(self._workers)
        self._workers[id] = rpc.connect(req.host, req.port)
        self._available_workers.append(id)
        util.log_info('Registered %s:%s (%d/%d)', req.host, req.port, id,
                      self.num_workers)

        handle.done(core.EmptyMessage())

        self.init_worker_score(id, req.worker_status)

        if len(self._workers) == self.num_workers:
            threading.Thread(target=self._initialize).start()

Example #41

0

Show file

File: benchmark_dot.py Project: muddimedia/spartan-1

def benchmark_matmul(ctx, timer):
    N = int(1000 * math.pow(ctx.num_workers, 1.0 / 3.0))
    #N = 4000
    M = util.divup(N, ctx.num_workers)
    T = util.divup(N, math.sqrt(ctx.num_workers))

    util.log_info('Testing with %d workers, N = %d, tile_size=%s',
                  ctx.num_workers, N, T)

    #x = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(N, M)))
    #y = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(N, M)))

    x = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(T, T)))
    y = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(T, T)))

    #print expr.glom(expr.dot(x, y))
    #print expr.dag(expr.dot(x, y))

    def _step():
        expr.evaluate(expr.dot(x, y))

    timer.time_op('matmul', _step)

Example #42

0

Show file

File: test_convnet.py Project: GabrielWen/spartan

def test_convnet(ctx):
  hint = util.divup(64, sqrt(ctx.num_workers))

  images = expr.eager(expr.ones((N_IMGS,) + IMG_SIZE,
                                tile_hint=(N_IMGS, N_COLORS, hint, hint)))

  w1 = expr.eager(expr.ones((N_FILTERS, N_COLORS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))

  conv1 = stencil.stencil(images, w1, 2)
  pool1 = stencil.maxpool(conv1)

  w2 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))

  conv2 = stencil.stencil(pool1, w2, 2)
  pool2 = stencil.maxpool(conv2)

  w3 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))
  conv3 = stencil.stencil(pool2, w3, 2)
  pool3 = stencil.maxpool(conv3)

  util.log_info(pool3.shape)

Example #43

0

Show file

File: netflix.py Project: MaggieQi/spartan

  def _evaluate(self, ctx, deps):
    V, M, U = deps['V'], deps['M'], deps['U']

    strata = _compute_strata(V)
    util.log_info('Start eval')
    
    for i, stratum in enumerate(strata):
      util.log_info('Processing stratum: %d of %d (size = %d)', i, len(strata), len(stratum))
      #for ex in stratum: print ex

      worklist = set(stratum)
      expr.shuffle(V, sgd_netflix_mapper,
                   kw={'V' : lazify(V), 'M' : lazify(M), 'U' : lazify(U),
                       'worklist' : worklist }).force()
                       
    util.log_info('Eval done.')

Example #44

0

Show file

def run(filename):
    signal.signal(signal.SIGQUIT, sig_handler)
    os.system('rm ./_worker_profiles/*')

    mod_name, _ = splitext(basename(filename))
    module = imp.load_source(mod_name, filename)
    util.log_info('Running benchmarks for module: %s (%s)', module, filename)
    benchmarks = [
        k for k in dir(module)
        if (k.startswith('benchmark_')
            and isinstance(getattr(module, k), types.FunctionType))
    ]

    spartan.config.parse(sys.argv)
    if benchmarks:
        # csv header
        print 'num_workers,bench,time'
        workers = [int(w) for w in FLAGS.worker_list.split(',')]

        for i in workers:
            # restart the cluster
            FLAGS.num_workers = i
            ctx = spartan.initialize()

            timer = BenchTimer(i)
            util.log_info('Running benchmarks on %d workers', i)
            if FLAGS.test_optimizations:
                timer.prefix = 'opt_enabled'
                FLAGS.optimization = 1
                run_benchmarks(module, benchmarks, ctx, timer)

            timer.prefix = 'opt_disabled'
            FLAGS.optimization = 1
            run_benchmarks(module, benchmarks, ctx, timer)

            spartan.shutdown()
            time.sleep(1)

    if FLAGS.profile_worker:
        util.log_info('Writing worker profiles...')
        join_profiles('./_worker_profiles')

Example #45

0

Show file

File: test_common.py Project: EasonLiao/spartan

def run(filename):
  signal.signal(signal.SIGQUIT, sig_handler)
  os.system('rm ./_worker_profiles/*')

  mod_name, _ = splitext(basename(filename))
  module = imp.load_source(mod_name, filename)
  util.log_info('Running benchmarks for module: %s (%s)', module, filename)
  benchmarks = [k for k in dir(module) if (
             k.startswith('benchmark_') and 
             isinstance(getattr(module, k), types.FunctionType))
          ]

  spartan.config.parse(sys.argv)
  if benchmarks:
    # csv header
    print 'num_workers,bench,time'
    workers = [int(w) for w in FLAGS.worker_list.split(',')]
    
    for i in workers:
      # restart the cluster
      FLAGS.num_workers = i
      ctx = spartan.initialize()
      
      timer = BenchTimer(i)
      util.log_info('Running benchmarks on %d workers', i)
      if FLAGS.test_optimizations:
          timer.prefix = 'opt_enabled'
          FLAGS.optimization = 1
          run_benchmarks(module, benchmarks, ctx, timer)
          
      timer.prefix = 'opt_disabled'
      FLAGS.optimization = 0
      run_benchmarks(module, benchmarks, ctx, timer)

      spartan.shutdown()
      time.sleep(1)

  if FLAGS.profile_worker:
    util.log_info('Writing worker profiles...')
    join_profiles('./_worker_profiles')

Example #46

0

Show file

    def _evaluate(self, ctx, deps):
        V, M, U = deps['V'], deps['M'], deps['U']

        strata = _compute_strata(V)
        util.log_info('Start eval')

        for i, stratum in enumerate(strata):
            util.log_info('Processing stratum: %d of %d (size = %d)', i,
                          len(strata), len(stratum))
            #for ex in stratum: print ex

            worklist = set(stratum)
            expr.shuffle(V,
                         sgd_netflix_mapper,
                         kw={
                             'V': lazify(V),
                             'M': lazify(M),
                             'U': lazify(U),
                             'worklist': worklist
                         }).evaluate()

        util.log_info('Eval done.')

Example #47

0

Show file

File: cluster.py Project: rossparks/spartan

def start_remote_worker(worker, st, ed):
    '''
  Start processes on a worker machine.

  The machine will launch worker processes ``st`` through ``ed``.

  :param worker: hostname to connect to.
  :param st: First process index to start.
  :param ed: Last process to start.
  '''
    if FLAGS.use_threads and worker == 'localhost':
        util.log_info('Using threads.')
        for i in range(st, ed):
            p = threading.Thread(target=spartan.worker._start_worker,
                                 args=((socket.gethostname(), FLAGS.port_base),
                                       i))
            p.daemon = True
            p.start()
        time.sleep(0.1)
        return

    util.log_info('Starting worker %d:%d on host %s', st, ed, worker)
    if FLAGS.oprofile:
        os.system('mkdir operf.%s' % worker)

    ssh_args = ['ssh', '-oForwardX11=no', worker]

    args = ['cd %s && ' % os.path.abspath(os.path.curdir)]

    if FLAGS.xterm:
        args += [
            'xterm',
            '-e',
        ]

    if FLAGS.oprofile:
        args += [
            'operf -e CPU_CLK_UNHALTED:100000000', '-g', '-d',
            'operf.%s' % worker
        ]

    args += [
        #'gdb', '-ex', 'run', '--args',
        'python',
        '-m spartan.worker',
        '--master=%s:%d' % (socket.gethostname(), FLAGS.port_base),
        '--count=%d' % (ed - st),
        '--heartbeat_interval=%d' % FLAGS.heartbeat_interval
    ]

    # add flags from config/user
    for (name, value) in FLAGS:
        if name in ['worker_list', 'print_options']: continue
        args += [repr(value)]

    #print >>sys.stderr, args
    util.log_debug('Running worker %s', ' '.join(args))
    time.sleep(0.1)
    if worker != 'localhost':
        p = subprocess.Popen(ssh_args + args, executable='ssh')
    else:
        p = subprocess.Popen(' '.join(args), shell=True, stdin=subprocess.PIPE)

    return p

Example #48

0

Show file

def add_one_extent(v, ex):
  result = v.fetch(ex) + 1
  util.log_info('AddOne: %s, %s', ex, result)
  yield (ex, result)

Example #49

0

Show file

File: fuzzy_kmeans.py Project: rossparks/spartan

def fuzzy_kmeans(points, k=10, num_iter=10, m=2.0, centers=None):
    '''
  clustering data points using fuzzy kmeans clustering method.

  Args:
    points(Expr or DistArray): the input data points matrix.
    k(int): the number of clusters.
    num_iter(int): the max iterations to run.
    m(float): the parameter of fuzzy kmeans.
    centers(Expr or DistArray): the initialized centers of each cluster.
  '''
    points = expr.force(points)
    num_dim = points.shape[1]
    if centers is None:
        centers = expr.rand(k, num_dim)

    labels = expr.zeros((points.shape[0], ), dtype=np.int)

    for iter in range(num_iter):
        centers = expr.as_array(centers)
        points_broadcast = expr.reshape(points,
                                        (points.shape[0], 1, points.shape[1]))
        centers_broadcast = expr.reshape(
            centers, (1, centers.shape[0], centers.shape[1]))
        distances = expr.sum(expr.square(points_broadcast - centers_broadcast),
                             axis=2)
        # This is used to avoid dividing zero
        distances = distances + 0.00000000001
        util.log_info('distances shape %s' % str(distances.shape))
        distances_broadcast = expr.reshape(
            distances, (distances.shape[0], 1, distances.shape[1]))
        distances_broadcast2 = expr.reshape(
            distances, (distances.shape[0], distances.shape[1], 1))
        prob = 1.0 / expr.sum(expr.power(
            distances_broadcast / distances_broadcast2, 2.0 / (m - 1)),
                              axis=2)
        prob.force()
        counts = expr.sum(prob, axis=0)
        counts = expr.reshape(counts, (counts.shape[0], 1))
        labels = expr.argmax(prob, axis=1)
        centers = expr.sum(
            expr.reshape(points, (points.shape[0], 1, points.shape[1])) *
            expr.reshape(prob, (prob.shape[0], prob.shape[1], 1)),
            axis=0)

        # We assume that the size of centers are relative small that can be handled
        # on the master.
        counts = counts.glom()
        centers = centers.glom()
        # If any centroids don't have any points assigned to them.
        zcount_indices = (counts == 0).reshape(k)

        if np.any(zcount_indices):
            # One or more centroids may not have any points assigned to them, which results in their
            # position being the zero-vector.  We reseed these centroids with new random values
            # and set their counts to 1 in order to get rid of dividing by zero.
            counts[zcount_indices, :] = 1
            centers[zcount_indices, :] = np.random.rand(
                np.count_nonzero(zcount_indices), num_dim)

        centers = centers / counts
    return labels

Example #50

0

Show file

 def profile2(self):
   self.create_path()
   t1 = expr.sparse_rand((10000, 10000)).evaluate()
   time_a, a = util.timeit(lambda: expr.save(t1, "fiotest3", self.test_dir, False))
   util.log_info('Save a %s sparse array in %s without zip', t1.shape, time_a)
   time_a, a = util.timeit(lambda: expr.load("fiotest3", self.test_dir, False).evaluate())
   util.log_info('Load a %s sparse array in %s without zip', t1.shape, time_a)
   time_a, a = util.timeit(lambda: expr.save(t1, "fiotest3", self.test_dir, True))
   util.log_info('Save a %s sparse array in %s with zip', t1.shape, time_a)
   time_a, a = util.timeit(lambda: expr.load("fiotest3", self.test_dir, True).evaluate())
   util.log_info('Load a %s sparse array in %s with zip', t1.shape, time_a)
   time_a, a = util.timeit(lambda: expr.pickle(t1, "fiotest4", self.test_dir, False))
   util.log_info('Pickle a %s sparse array in %s without zip', t1.shape, time_a)
   time_a, a = util.timeit(lambda: expr.unpickle("fiotest4", self.test_dir, False).evaluate())
   util.log_info('Unpickle a %s sparse array in %s without zip', t1.shape, time_a)
   time_a, a = util.timeit(lambda: expr.pickle(t1, "fiotest4", self.test_dir, True))
   util.log_info('Pickle a %s sparse array in %s with zip', t1.shape, time_a)
   time_a, a = util.timeit(lambda: expr.unpickle("fiotest4", self.test_dir, True).evaluate())
   util.log_info('Unpickle a %s sparse array in %s with zip', t1.shape, time_a)

Example #51

0

Show file

 def test_find_change(self):
     arr = expr.randn(100)
     movers = finance.find_change(arr)
     #util.log_info(optimize(movers))
     util.log_info(movers.glom())

Example #52

0

Show file

 def test_put(self):
     put, call = finance.black_scholes(self.current, self.strike, maturity,
                                       rate, volatility)
     #util.log_info(put)
     #util.log_info(optimize(put))
     util.log_info(put.glom())

Example #53

0

Show file

 def test_call(self):
     put, call = finance.black_scholes(self.current, self.strike, maturity,
                                       rate, volatility)
     #util.log_info(call)
     util.log_info(call.glom())

Example #54

0

Show file

File: lanczos.py Project: muddimedia/spartan-1

def solve(A, AT, desired_rank, is_symmetric=False):
  '''
  A simple implementation of the Lanczos algorithm
  (http://en.wikipedia.org/wiki/Lanczos_algorithm) for eigenvalue computation.

  Like the Mahout implementation, only the matrix*vector step is parallelized.
  
  First we use lanczos method to turn the matrix into tridiagonoal form. Then
  we use numpy.linalg.eig function to extract the eigenvalues and eigenvectors 
  from the tridiagnonal matrix(desired_rank*desired_rank). Since desired_rank 
  should be smaller than the size of matrix, so we could it in local machine 
  efficiently. 
  '''
  # Calculate two more eigenvalues, but we only keep the largest desired_rank
  # one. Doing this to keep the result consistent with scipy.sparse.linalg.svds.
  desired_rank += 2

  n = A.shape[1]
  v_next = np.ones(n) / np.sqrt(n)
  v_prev = np.zeros(n)
  beta = np.zeros(desired_rank+1)
  beta[0] = 0
  alpha = np.zeros(desired_rank)

  # Since the disiredRank << size of matrix, so we keep
  # V in local memory for efficiency reason(It needs to be updated
  # for every iteration). 
  # If the case which V can't be fit in local memory occurs, 
  # you could turn it into spartan distributed array. 
  V = np.zeros((n, desired_rank))


  for i in range(0, desired_rank):
    util.log_info("Iter : %s", i)
    v_next_expr = expr.from_numpy(v_next.reshape(n, 1))

    if is_symmetric:
      w = expr.dot(A, v_next_expr).optimized().glom().reshape(n)
    else:
      w = expr.dot(A, v_next_expr)
      w = expr.dot(AT, w).optimized().glom().reshape(n)

    alpha[i] = np.dot(w, v_next)
    w = w - alpha[i] * v_next - beta[i] * v_prev
    
    # Orthogonalize:
    for t in range(i):
      tmpa = np.dot(w, V[:, t])
      if tmpa == 0.0:
        continue
      w -= tmpa * V[:, t] 

    beta[i+1] = np.linalg.norm(w, 2) 
    v_prev = v_next
    v_next = w / beta[i+1]
    V[:, i] = v_prev
  
  # Create tridiag matrix with size (desired_rank X desired_rank)  
  tridiag = np.diag(alpha)
  for i in range(0, desired_rank-1):
    tridiag[i, i+1] = beta[i+1] 
    tridiag[i+1, i] = beta[i+1]
  
  # Get eigenvectors and eigenvalues of this tridiagonal matrix.  
  # The eigenvalues of this tridiagnoal matrix equals to the eigenvalues
  # of matrix dot(A, A.T.). We can get the eigenvectors of dot(A, A.T) 
  # by multiplying V with eigenvectors of this tridiagonal matrix.
  d, v = np.linalg.eig(tridiag) 
  
  # Sort eigenvalues and their corresponding eigenvectors 
  sorted_idx = np.argsort(np.absolute(d))[::-1]
  d = d[sorted_idx]
  v = v[:, sorted_idx]
  
  # Get the eigenvetors of dot(A, A.T)
  s = np.dot(V, v)
  return d[0:desired_rank-2], s[:, 0:desired_rank-2]

Example #55

0

Show file

File: test_extent.py Project: muddimedia/spartan-1

def test_local_offset():
    a = extent.create((0, 0), (5, 5), None)
    b = extent.create((2, 2), (3, 3), None)
    util.log_info('%s', extent.offset_from(a, b))