コード例 #1
0
ファイル: test_netflix.py プロジェクト: rossparks/spartan
def test_netflix_sgd(ctx):
    U = 100
    M = 100 * 100
    r = 20
    d = 8
    P_RATING = 1000.0 / (U * M)

    # create random factor and value matrices
    Mfactor = spartan.eager(spartan.rand(M, r).astype(np.float32))
    Ufactor = spartan.eager(spartan.rand(U, r).astype(np.float32))

    V = spartan.sparse_empty((U, M),
                             tile_hint=(divup(U, d), divup(M, d)),
                             dtype=np.float32)

    #   V = spartan.shuffle(V, netflix.load_netflix_mapper,
    #                           kw={ 'load_file' : '/big1/netflix.zip' })

    V = spartan.eager(
        spartan.tocoo(
            spartan.shuffle(V,
                            netflix.fake_netflix_mapper,
                            target=V,
                            kw={'p_rating': P_RATING})))

    for i in range(2):
        _ = netflix.sgd(V, Mfactor, Ufactor).force()
コード例 #2
0
ファイル: test_netflix.py プロジェクト: GabrielWen/spartan
def test_netflix_sgd(ctx):
  U = 100
  M = 100*100
  r = 20
  d = 8
  P_RATING = 1000.0 / (U * M)

  # create random factor and value matrices
  Mfactor = spartan.eager(spartan.rand(M, r).astype(np.float32))
  Ufactor = spartan.eager(spartan.rand(U, r).astype(np.float32))

  V = spartan.sparse_empty((U, M),
                           tile_hint=(divup(U, d), divup(M, d)),
                           dtype=np.float32)

#   V = spartan.shuffle(V, netflix.load_netflix_mapper,
#                           kw={ 'load_file' : '/big1/netflix.zip' })

  V = spartan.eager(
        spartan.tocoo(
          spartan.shuffle(V, netflix.fake_netflix_mapper,
                          target=V, kw={'p_rating': P_RATING})))

  for i in range(2):
    _ = netflix.sgd(V, Mfactor, Ufactor).evaluate()
コード例 #3
0
def benchmark_convnet(ctx, timer):
  image_size = BASE_IMG_SIZE
  minibatch = 64
  #minibatch = ctx.num_workers
  hint = util.divup(image_size, sqrt(ctx.num_workers))
  tile_hint = (util.divup(minibatch, ctx.num_workers), N_COLORS, image_size, image_size)
  util.log_info('Hint: %s', tile_hint)
    
  images = expr.eager(expr.ones((minibatch, N_COLORS, image_size, image_size),
                                tile_hint=tile_hint))
  
  w1 = expr.eager(expr.ones((N_FILTERS, N_COLORS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))
  w2 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))
  w3 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))
  
  def _():
    conv1 = stencil.stencil(images, w1, 2)
    pool1 = stencil.maxpool(conv1)
   
    conv2 = stencil.stencil(pool1, w2, 2)
    pool2 = stencil.maxpool(conv2)
    
    conv3 = stencil.stencil(pool2, w3, 2)
    pool3 = stencil.maxpool(conv3)
    
    expr.force(pool3)
 
  # force parakeet functions to compile before timing. 
  _()  
  for i in range(2):
    timer.time_op('convnet', _)
コード例 #4
0
def benchmark_netflix_sgd(ctx, timer):
    d = ctx.num_workers

    V = spartan.sparse_empty((U, M),
                             tile_hint=(divup(U, d), divup(M, d)),
                             dtype=np.float32)

    V = timer.time_op(
        'prep', lambda: spartan.eager(
            spartan.tocoo(
                spartan.shuffle(V,
                                netflix.fake_netflix_mapper,
                                target=V,
                                kw={'p_rating': P_RATING}))))

    #   V = spartan.shuffle(V, netflix.load_netflix_mapper,
    #                           kw={ 'load_file' : '/big1/netflix.zip' })

    for r in [25, 50]:
        Mfactor = spartan.eager(
            spartan.rand(M, r, tile_hint=(divup(M, d), r)).astype(np.float32))
        Ufactor = spartan.eager(
            spartan.rand(U, r, tile_hint=(divup(U, d), r)).astype(np.float32))

        timer.time_op('rank %d' % r, netflix.sgd(V, Mfactor, Ufactor).force)
コード例 #5
0
def benchmark_convnet(ctx, timer):
  image_size = BASE_IMG_SIZE
  minibatch = 64
  #minibatch = ctx.num_workers
  hint = util.divup(image_size, sqrt(ctx.num_workers))
  tile_hint = (util.divup(minibatch, ctx.num_workers), N_COLORS, image_size, image_size)
  util.log_info('Hint: %s', tile_hint)

  images = expr.eager(expr.ones((minibatch, N_COLORS, image_size, image_size),
                                tile_hint=tile_hint))

  w1 = expr.eager(expr.ones((N_FILTERS, N_COLORS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))
  w2 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))
  w3 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))

  def _():
    conv1 = stencil.stencil(images, w1, 2)
    pool1 = stencil.maxpool(conv1)

    conv2 = stencil.stencil(pool1, w2, 2)
    pool2 = stencil.maxpool(conv2)

    conv3 = stencil.stencil(pool2, w3, 2)
    pool3 = stencil.maxpool(conv3)

    pool3.evaluate()

  # force parakeet functions to compile before timing.
  _()
  for i in range(2):
    timer.time_op('convnet', _)
コード例 #6
0
ファイル: test_convnet.py プロジェクト: rossparks/spartan
def test_convnet(ctx):
    hint = util.divup(64, sqrt(ctx.num_workers))

    images = expr.eager(
        expr.ones((N_IMGS, ) + IMG_SIZE,
                  tile_hint=(N_IMGS, N_COLORS, hint, hint)))

    w1 = expr.eager(
        expr.ones((N_FILTERS, N_COLORS) + FILTER_SIZE, tile_hint=ONE_TILE))

    conv1 = stencil.stencil(images, w1, 2)
    pool1 = stencil.maxpool(conv1)

    w2 = expr.eager(
        expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE, tile_hint=ONE_TILE))

    conv2 = stencil.stencil(pool1, w2, 2)
    pool2 = stencil.maxpool(conv2)

    w3 = expr.eager(
        expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE, tile_hint=ONE_TILE))
    conv3 = stencil.stencil(pool2, w3, 2)
    pool3 = stencil.maxpool(conv3)

    util.log_info(pool3.shape)
コード例 #7
0
ファイル: test_kmeans.py プロジェクト: EasonLiao/spartan
  def test_kmeans_expr(self):
    ctx = spartan.blob_ctx.get()
    pts = expr.rand(N_PTS, N_DIM,
                  tile_hint=(divup(N_PTS, ctx.num_workers), N_DIM)).force()

    k = KMeans(N_CENTERS, ITER)
    k.fit(pts)
コード例 #8
0
ファイル: test_svm.py プロジェクト: rossparks/spartan
def benchmark_svm(ctx, timer):
  
  print "#worker:", ctx.num_workers
  max_iter = 2
  #N = 200000 * ctx.num_workers
  N = 1000 * 64
  D = 64
  
  # create data
  data = expr.randn(N, D, dtype=np.float64, tile_hint=(N, util.divup(D, ctx.num_workers)))
  labels = expr.shuffle(data, _init_label_mapper, shape_hint=(data.shape[0], 1))
  
  t1 = datetime.now()
  w = fit(data, labels, T=max_iter).force()
  t2 = datetime.now()
  util.log_warn('train time per iteration:%s ms, final w:%s', millis(t1,t2)/max_iter, w.glom().T)
  
  correct = 0
  for i in range(10):
    new_data = expr.randn(1, D, dtype=np.float64, tile_hint=[1, D])
    new_label = predict(w, new_data)
    #print 'point %s, predict %s' % (new_data.glom(), new_label)
     
    new_data = new_data.glom()
    if new_data[0,0] >= new_data[0,1] and new_label == 1.0 or new_data[0,0] < new_data[0,1] and new_label == -1.0:
      correct += 1
  print 'predict precision:', correct * 1.0 / 10
コード例 #9
0
  def precompute(self):
    '''Precompute the most k similar items for each item.

    After this funcion returns. 2 attributes will be created.

    Attributes
    ------
    top_k_similar_table : Numpy array of shape (N, k). 
                          Records the most k similar scores between each items. 
    top_k_similar_indices : Numpy array of shape (N, k).
                            Records the indices of most k similar items for each item.
    '''
    M = self.rating_table.shape[0]
    N = self.rating_table.shape[1]

    self.similarity_table = expr.shuffle(self.rating_table, _similarity_mapper, 
                                         kw={'item_norm': self._get_norm_of_each_item(self.rating_table), 
                                             'step': util.divup(self.rating_table.shape[1], blob_ctx.get().num_workers)}, 
                                         shape_hint=(N, N))

    # Release the memory for item_norm
    top_k_similar_indices = expr.zeros((N, self.k), dtype=np.int)
    
    # Find top-k similar items for each item.
    # Store the similarity scores into table top_k_similar table.
    # Store the indices of top k items into table top_k_similar_indices.
    cost = np.prod(top_k_similar_indices.shape)
    top_k_similar_table = expr.shuffle(self.similarity_table, _select_most_k_similar_mapper, 
                                       kw = {'top_k_similar_indices': top_k_similar_indices, 'k': self.k}, 
                                       shape_hint=(N, self.k), 
                                       cost_hint={hash(top_k_similar_indices):{'00': 0, '01': cost, '10': cost, '11': cost}})
    self.top_k_similar_table = top_k_similar_table.optimized().glom()
    self.top_k_similar_indices = top_k_similar_indices.optimized().glom()
コード例 #10
0
def benchmark_netflix_sgd(ctx, timer):
    d = ctx.num_workers

    V = spartan.sparse_empty((U, M), tile_hint=(divup(U, d), divup(M, d)), dtype=np.float32)

    V = timer.time_op(
        "prep",
        lambda: spartan.eager(
            spartan.tocoo(spartan.shuffle(V, netflix.fake_netflix_mapper, target=V, kw={"p_rating": P_RATING}))
        ),
    )

    #   V = spartan.shuffle(V, netflix.load_netflix_mapper,
    #                           kw={ 'load_file' : '/big1/netflix.zip' })

    for r in [25, 50]:
        Mfactor = spartan.eager(spartan.rand(M, r, tile_hint=(divup(M, d), r)).astype(np.float32))
        Ufactor = spartan.eager(spartan.rand(U, r, tile_hint=(divup(U, d), r)).astype(np.float32))

        timer.time_op("rank %d" % r, netflix.sgd(V, Mfactor, Ufactor).force)
コード例 #11
0
ファイル: benchmark_dot.py プロジェクト: GabrielWen/spartan
def benchmark_matmul(ctx, timer):
    N = int(1000 * math.pow(ctx.num_workers, 1.0 / 3.0))
    # N = 4000
    M = util.divup(N, ctx.num_workers)
    T = util.divup(N, math.sqrt(ctx.num_workers))

    util.log_info("Testing with %d workers, N = %d, tile_size=%s", ctx.num_workers, N, T)

    # x = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(N, M)))
    # y = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(N, M)))

    x = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(T, T)))
    y = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(T, T)))

    # print expr.glom(expr.dot(x, y))
    # print expr.dag(expr.dot(x, y))

    def _step():
        expr.evaluate(expr.dot(x, y))

    timer.time_op("matmul", _step)
コード例 #12
0
    def precompute(self):
        '''Precompute the most k similar items for each item.

    After this funcion returns. 2 attributes will be created.

    Attributes
    ------
    top_k_similar_table : Numpy array of shape (N, k). 
                          Records the most k similar scores between each items. 
    top_k_similar_indices : Numpy array of shape (N, k).
                            Records the indices of most k similar items for each item.
    '''
        M = self.rating_table.shape[0]
        N = self.rating_table.shape[1]

        self.similarity_table = expr.shuffle(
            self.rating_table,
            _similarity_mapper,
            kw={
                'item_norm':
                self._get_norm_of_each_item(self.rating_table),
                'step':
                util.divup(self.rating_table.shape[1],
                           blob_ctx.get().num_workers)
            },
            shape_hint=(N, N))

        # Release the memory for item_norm
        top_k_similar_indices = expr.zeros((N, self.k), dtype=np.int)

        # Find top-k similar items for each item.
        # Store the similarity scores into table top_k_similar table.
        # Store the indices of top k items into table top_k_similar_indices.
        cost = np.prod(top_k_similar_indices.shape)
        top_k_similar_table = expr.shuffle(self.similarity_table,
                                           _select_most_k_similar_mapper,
                                           kw={
                                               'top_k_similar_indices':
                                               top_k_similar_indices,
                                               'k': self.k
                                           },
                                           shape_hint=(N, self.k),
                                           cost_hint={
                                               hash(top_k_similar_indices): {
                                                   '00': 0,
                                                   '01': cost,
                                                   '10': cost,
                                                   '11': cost
                                               }
                                           })
        self.top_k_similar_table = top_k_similar_table.optimized().glom()
        self.top_k_similar_indices = top_k_similar_indices.optimized().glom()
コード例 #13
0
ファイル: benchmark_dot.py プロジェクト: muddimedia/spartan-1
def benchmark_matmul(ctx, timer):
    N = int(1000 * math.pow(ctx.num_workers, 1.0 / 3.0))
    #N = 4000
    M = util.divup(N, ctx.num_workers)
    T = util.divup(N, math.sqrt(ctx.num_workers))

    util.log_info('Testing with %d workers, N = %d, tile_size=%s',
                  ctx.num_workers, N, T)

    #x = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(N, M)))
    #y = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(N, M)))

    x = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(T, T)))
    y = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(T, T)))

    #print expr.glom(expr.dot(x, y))
    #print expr.dag(expr.dot(x, y))

    def _step():
        expr.evaluate(expr.dot(x, y))

    timer.time_op('matmul', _step)
コード例 #14
0
ファイル: cluster.py プロジェクト: rossparks/spartan
def start_cluster(num_workers, use_cluster_workers):
    '''
  Start a cluster with ``num_workers`` workers.

  If use_cluster_workers is True, then use the remote workers
  defined in `spartan.config`.  Otherwise, workers are all
  spawned on the localhost.

  :param num_workers:
  :param use_cluster_workers:
  '''
    rpc.set_default_timeout(FLAGS.default_rpc_timeout)
    #clean the checkpoint directory
    if os.path.exists(FLAGS.checkpoint_path):
        shutil.rmtree(FLAGS.checkpoint_path)

    master = spartan.master.Master(FLAGS.port_base, num_workers)

    ssh_processes = []
    if not use_cluster_workers:
        start_remote_worker('localhost', 0, num_workers)
    else:
        available_workers = sum([cnt for _, cnt in FLAGS.hosts])
        assert available_workers >= num_workers, 'Insufficient slots to run all workers.'
        count = 0
        num_hosts = len(FLAGS.hosts)
        for worker, total_tasks in FLAGS.hosts:
            if FLAGS.assign_mode == AssignMode.BY_CORE:
                sz = total_tasks
            else:
                sz = util.divup(num_workers, num_hosts)

            sz = min(sz, num_workers - count)
            ssh_processes.append(start_remote_worker(worker, count,
                                                     count + sz))
            count += sz
            if count == num_workers:
                break

    master.wait_for_initialization()

    # Kill the now unnecessary ssh processes.
    # Fegin : if we kill these processes, we can't get log from workers.
    #for process in ssh_processes:
    #process.kill()
    return master
コード例 #15
0
ファイル: cluster.py プロジェクト: MaggieQi/spartan
def start_cluster(num_workers, use_cluster_workers):
  '''
  Start a cluster with ``num_workers`` workers.

  If use_cluster_workers is True, then use the remote workers
  defined in `spartan.config`.  Otherwise, workers are all
  spawned on the localhost.

  :param num_workers:
  :param use_cluster_workers:
  '''
  rpc.set_default_timeout(FLAGS.default_rpc_timeout)
  #clean the checkpoint directory
  if os.path.exists(FLAGS.checkpoint_path):
    shutil.rmtree(FLAGS.checkpoint_path)

  master = spartan.master.Master(FLAGS.port_base, num_workers)

  ssh_processes = []
  if not use_cluster_workers:
    start_remote_worker('localhost', 0, num_workers)
  else:
    available_workers = sum([cnt for _, cnt in FLAGS.hosts])
    assert available_workers >= num_workers, 'Insufficient slots to run all workers.'
    count = 0
    num_hosts = len(FLAGS.hosts)
    for worker, total_tasks in FLAGS.hosts:
      if FLAGS.assign_mode == AssignMode.BY_CORE:
        sz = total_tasks
      else:
        sz = util.divup(num_workers, num_hosts)

      sz = min(sz, num_workers - count)
      ssh_processes.append(start_remote_worker(worker, count, count + sz))
      count += sz
      if count == num_workers:
        break

  master.wait_for_initialization()

  # Kill the now unnecessary ssh processes.
  # Fegin : if we kill these processes, we can't get log from workers.
  #for process in ssh_processes:
    #process.kill()
  return master
コード例 #16
0
ファイル: test_als.py プロジェクト: rossparks/spartan
def benchmark_als(ctx, timer):
  print "#worker:", ctx.num_workers
  #USER_SIZE = 100 * ctx.num_workers
  USER_SIZE = 320
  #USER_SIZE = 200 * 64
  MOVIE_SIZE = 12800
  num_features = 20
  num_iter = 2
  
  A = expr.randint(USER_SIZE, MOVIE_SIZE, low=0, high=5, tile_hint=(USER_SIZE, util.divup(MOVIE_SIZE, ctx.num_workers)))
  #A = expr.randint(USER_SIZE, MOVIE_SIZE, low=0, high=5)
  
  util.log_warn('begin als!')
  t1 = datetime.now()
  U, M = als(A, implicit_feedback=True, num_features=num_features, num_iter=num_iter)
  U.force()
  M.force()
  t2 = datetime.now()
  cost_time = millis(t1,t2)
  print "total cost time:%s ms, per iter cost time:%s ms" % (cost_time, cost_time/num_iter)
コード例 #17
0
def benchmark_pr(ctx, timer):
    num_pages = 300 * 1000 * 3 * ctx.num_workers
    num_outlinks = 10
    density = num_outlinks * 1.0 / num_pages
    same_site_prob = 0.9
    print "#worker:", ctx.num_workers
    col_step = util.divup(num_pages, ctx.num_workers)

    wts_tile_hint = [num_pages, col_step]
    p_tile_hint = [col_step, 1]
    #wts = expr.sparse_diagonal((num_pages, num_pages), dtype=np.float32, tile_hint=wts_tile_hint)
    #wts = expr.eager(
    #         expr.sparse_rand((num_pages, num_pages),
    #                          density=density,
    #                          format='csr',
    #                          dtype=np.float32,
    #                          tile_hint=wts_tile_hint))

    wts = pagerank_sparse(num_pages, num_outlinks, same_site_prob)
    #res = wts.glom().todense()
    #for i in range(res.shape[0]):
    #  l = []
    #  for j in range(res.shape[1]):
    #    l.append(round(res[i,j],1))
    #  print l
    #p = expr.sparse_empty((num_pages,1), dtype=np.float32, tile_hint=p_tile_hint).evaluate()
    #for i in range(num_pages):
    #  p[i,0] = 1
    #p = expr.sparse_rand((num_pages, 1), density=1.0, format='csc', dtype=np.float32, tile_hint=p_tile_hint)
    p = expr.rand(num_pages, 1).astype(np.float32)
    #q = expr.zeros((num_pages, 1), dtype=np.float32, tile_hint=p_tile_hint).evaluate()
    #q[:] = p.glom().todense()
    #q = expr.lazify(q)

    #r = expr.dot(wts, p)
    #print r.glom()
    t1 = datetime.now()
    sparse_multiply(wts, p, p_tile_hint)
    t2 = datetime.now()
    cost_time = millis(t1, t2)
    print 'current benchmark:', cost_time / num_iter / 1000
コード例 #18
0
ファイル: test_pagerank.py プロジェクト: GabrielWen/spartan
def benchmark_pr(ctx, timer):
  num_pages = 300 * 1000 * 3 * ctx.num_workers
  num_outlinks = 10
  density = num_outlinks * 1.0 / num_pages
  same_site_prob = 0.9
  print "#worker:", ctx.num_workers
  col_step = util.divup(num_pages, ctx.num_workers)

  wts_tile_hint = [num_pages, col_step]
  p_tile_hint = [col_step, 1]
  #wts = expr.sparse_diagonal((num_pages, num_pages), dtype=np.float32, tile_hint=wts_tile_hint)
  #wts = expr.eager(
  #         expr.sparse_rand((num_pages, num_pages),
  #                          density=density,
  #                          format='csr',
  #                          dtype=np.float32,
  #                          tile_hint=wts_tile_hint))

  wts = pagerank_sparse(num_pages, num_outlinks, same_site_prob)
  #res = wts.glom().todense()
  #for i in range(res.shape[0]):
  #  l = []
  #  for j in range(res.shape[1]):
  #    l.append(round(res[i,j],1))
  #  print l
  #p = expr.sparse_empty((num_pages,1), dtype=np.float32, tile_hint=p_tile_hint).evaluate()
  #for i in range(num_pages):
  #  p[i,0] = 1
  #p = expr.sparse_rand((num_pages, 1), density=1.0, format='csc', dtype=np.float32, tile_hint=p_tile_hint)
  p = expr.rand(num_pages, 1).astype(np.float32)
  #q = expr.zeros((num_pages, 1), dtype=np.float32, tile_hint=p_tile_hint).evaluate()
  #q[:] = p.glom().todense()
  #q = expr.lazify(q)

  #r = expr.dot(wts, p)
  #print r.glom()
  t1 = datetime.now()
  sparse_multiply(wts, p, p_tile_hint)
  t2 = datetime.now()
  cost_time = millis(t1, t2)
  print 'current benchmark:', cost_time / num_iter / 1000
コード例 #19
0
ファイル: test_stencil.py プロジェクト: rossparks/spartan
def test_stencil(ctx):
  st = time.time()

  IMG_SIZE = int(8 * math.sqrt(ctx.num_workers))
  FILT_SIZE = 8
  N = 8
  F = 32
  
  tile_size = util.divup(IMG_SIZE, math.sqrt(ctx.num_workers))
  
  images = expr.ones((N, 3, IMG_SIZE, IMG_SIZE), 
                     dtype=np.float, 
                     tile_hint=(N, 3, tile_size, tile_size))
  
  filters = expr.ones((F, 3, FILT_SIZE, FILT_SIZE), 
                      dtype=np.float, 
                      tile_hint=ONE_TILE)
  
  result = stencil.stencil(images, filters, 1)
  ed = time.time()
  print ed - st
コード例 #20
0
ファイル: test_convnet.py プロジェクト: GabrielWen/spartan
def test_convnet(ctx):
  hint = util.divup(64, sqrt(ctx.num_workers))

  images = expr.eager(expr.ones((N_IMGS,) + IMG_SIZE,
                                tile_hint=(N_IMGS, N_COLORS, hint, hint)))

  w1 = expr.eager(expr.ones((N_FILTERS, N_COLORS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))

  conv1 = stencil.stencil(images, w1, 2)
  pool1 = stencil.maxpool(conv1)

  w2 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))

  conv2 = stencil.stencil(pool1, w2, 2)
  pool2 = stencil.maxpool(conv2)

  w3 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE,
                            tile_hint=ONE_TILE))
  conv3 = stencil.stencil(pool2, w3, 2)
  pool3 = stencil.maxpool(conv3)

  util.log_info(pool3.shape)