Exemple #1
0
def benchmark_cg(ctx, timer):
  print "#worker:", ctx.num_workers
  l = int(math.sqrt(ctx.num_workers))
  n = 2000 * 16
  #n = 4000 * l
  la = 20
  niter = 5
  tile_hint = (n, n/ctx.num_workers)
  
  #nonzer = 7
  #nz = n * (nonzer + 1) * (nonzer + 1) + n * (nonzer + 2)
  #density = 0.5 * nz/(n*n)
  A = expr.rand(n, n, tile_hint=tile_hint)
  A = (A + expr.transpose(A))*0.5
  
  I = expr.sparse_diagonal((n,n), tile_hint=tile_hint) * la
  I.force()
  A = expr.eager(A - I)

  #x1 = numpy_cg(A.glom(), niter)
  util.log_warn('begin cg!')
  t1 = datetime.now()
  x2 = conj_gradient(A, niter).force()
  t2 = datetime.now()
  cost_time = millis(t1,t2)
  print "total cost time:%s ms, per iter cost time:%s ms" % (cost_time, cost_time/niter)
def benchmark_naive_bayes(ctx, timer):
  
  print "#worker:", ctx.num_workers
  #N = 100000 * ctx.num_workers
  N = 10000 * 64
  D = 128
  
  # create data
  data = expr.randint(N, D, low=0, high=D, tile_hint=(N, D/ctx.num_workers))
  labels = expr.shuffle(expr.ndarray((data.shape[0], 1), dtype=np.int), _init_label_mapper,
                        kw={'data': data}, shape_hint=(data.shape[0], 1), 
                        cost_hint={hash(data):{'00': 0, '10': np.prod(data.shape)}}
                       )
    
  #util.log_warn('data:%s, label:%s', data.glom(), labels.glom())   
  
  util.log_warn('begin train')
  t1 = datetime.now()
  model = fit(data, labels, D)
  t2 = datetime.now()
  util.log_warn('train time:%s ms', millis(t1,t2))

  correct = 0
  for i in range(10):
    new_data = expr.randint(1, D, low=0, high=D, tile_hint=(1, D))
    new_label = predict(model, new_data)
    #print 'point %s, predict %s' % (new_data.glom(), new_label)
   
    new_data = new_data.glom()
    if np.isclose(new_data[0, new_label], np.max(new_data)):
      correct += 1
  print 'predict precision:', correct * 1.0 / 10
def benchmark_naive_bayes(ctx, timer):
  
  print "#worker:", ctx.num_workers
  N = 100000 * ctx.num_workers
  D = 128
  
  # create data
  data = expr.randint(N, D, low=0, high=D, tile_hint=(N/ctx.num_workers, D))
  labels = expr.eager(expr.shuffle(data, _init_label_mapper))
    
  #util.log_warn('data:%s, label:%s', data.glom(), labels.glom())   
  
  util.log_warn('begin train')
  t1 = datetime.now()
  model = fit(data, labels, D)
  t2 = datetime.now()
  util.log_warn('train time:%s ms', millis(t1,t2))

  correct = 0
  for i in range(10):
    new_data = expr.randint(1, D, low=0, high=D, tile_hint=(1, D))
    new_label = predict(model, new_data)
    #print 'point %s, predict %s' % (new_data.glom(), new_label)
   
    new_data = new_data.glom()
    if np.isclose(new_data[0, new_label], np.max(new_data)):
      correct += 1
  print 'predict precision:', correct * 1.0 / 10
Exemple #4
0
def benchmark_lda(ctx, timer):

    print "#worker:", ctx.num_workers
    NUM_TERMS = 160
    NUM_DOCS = 200 * ctx.num_workers
    #NUM_DOCS = 10 * 64

    # create data
    # NUM_TERMS = 41807
    # NUM_DOCS = 21578
    # terms_docs_matrix = from_file("/scratch/cq/numpy_dense_matrix", sparse = False, tile_hint = (NUM_TERMS, int((NUM_DOCS + ctx.num_workers - 1) / ctx.num_workers))).evaluate()

    terms_docs_matrix = expr.randint(NUM_TERMS, NUM_DOCS, low=0, high=100)

    max_iter = 3
    k_topics = 16

    t1 = datetime.now()
    doc_topics, topic_term_count = learn_topics(terms_docs_matrix,
                                                k_topics,
                                                max_iter=max_iter)
    doc_topics.optimized().evaluate()
    topic_term_count.optimized().evaluate()
    t2 = datetime.now()
    time_cost = millis(t1, t2)
    util.log_warn('total_time:%s ms, train time per iteration:%s ms',
                  time_cost, time_cost / max_iter)
Exemple #5
0
def benchmark_cholesky(ctx, timer):
    print "#worker:", ctx.num_workers

    #n = int(math.pow(ctx.num_workers, 1.0 / 3.0))
    n = int(math.sqrt(ctx.num_workers))
    #ARRAY_SIZE = 1600 * 4
    ARRAY_SIZE = 1600 * n

    util.log_warn('prepare data!')
    #A = np.random.randn(ARRAY_SIZE, ARRAY_SIZE)
    #A = np.dot(A, A.T)
    #A = expr.force(from_numpy(A, tile_hint=(ARRAY_SIZE/n, ARRAY_SIZE/n)))

    #A = expr.randn(ARRAY_SIZE, ARRAY_SIZE, tile_hint=(ARRAY_SIZE/n, ARRAY_SIZE/n))
    A = expr.randn(ARRAY_SIZE, ARRAY_SIZE)
    # FIXME: Ideally we should be able to get rid of tile_hint.
    #        However, current extent.change_partition_axis relies on the
    #        information of one-dimentional size to change tiling to grid tiling.
    #        It assumes that every extent should be partitioned in the same size.
    #        Trace extent.pyx to think about how to fix it!
    A = expr.dot(A,
                 expr.transpose(A),
                 tile_hint=(ARRAY_SIZE, ARRAY_SIZE / ctx.num_workers)).force()

    util.log_warn('begin cholesky!')
    t1 = datetime.now()
    L = cholesky(A).glom()
    t2 = datetime.now()
    assert np.all(np.isclose(A.glom(), np.dot(L, L.T.conj())))
    cost_time = millis(t1, t2)
    print "total cost time:%s ms, per iter cost time:%s ms" % (cost_time,
                                                               cost_time / n)
Exemple #6
0
def benchmark_svm(ctx, timer):
  
  print "#worker:", ctx.num_workers
  max_iter = 2
  #N = 200000 * ctx.num_workers
  N = 1000 * 64
  D = 64
  
  # create data
  data = expr.randn(N, D, dtype=np.float64, tile_hint=(N, util.divup(D, ctx.num_workers)))
  labels = expr.shuffle(data, _init_label_mapper, shape_hint=(data.shape[0], 1))
  
  t1 = datetime.now()
  w = fit(data, labels, T=max_iter).force()
  t2 = datetime.now()
  util.log_warn('train time per iteration:%s ms, final w:%s', millis(t1,t2)/max_iter, w.glom().T)
  
  correct = 0
  for i in range(10):
    new_data = expr.randn(1, D, dtype=np.float64, tile_hint=[1, D])
    new_label = predict(w, new_data)
    #print 'point %s, predict %s' % (new_data.glom(), new_label)
     
    new_data = new_data.glom()
    if new_data[0,0] >= new_data[0,1] and new_label == 1.0 or new_data[0,0] < new_data[0,1] and new_label == -1.0:
      correct += 1
  print 'predict precision:', correct * 1.0 / 10
Exemple #7
0
def benchmark_cg(ctx, timer):
    print "#worker:", ctx.num_workers
    l = int(math.sqrt(ctx.num_workers))
    #n = 2000 * 16
    n = 500 * ctx.num_workers
    la = 20
    niter = 5

    #nonzer = 7
    #nz = n * (nonzer + 1) * (nonzer + 1) + n * (nonzer + 2)
    #density = 0.5 * nz/(n*n)
    A = expr.rand(n, n)
    A = (A + expr.transpose(A)) * 0.5

    I = expr.sparse_diagonal((n, n)) * la
    A = A - I

    #x1 = numpy_cg(A.glom(), niter)
    util.log_warn('begin cg!')
    t1 = datetime.now()
    x2 = conj_gradient(A, niter).force()
    t2 = datetime.now()
    cost_time = millis(t1, t2)
    print "total cost time:%s ms, per iter cost time:%s ms" % (
        cost_time, cost_time / niter)
Exemple #8
0
def benchmark_ssvd(ctx, timer):
    DIM = (1280, 1280)
    #A = expr.randn(*DIM, dtype=np.float64)
    A = np.random.randn(*DIM)
    A = expr.from_numpy(A)
    t1 = datetime.now()
    U, S, VT = svd(A)
    t2 = datetime.now()
    cost_time = millis(t1, t2)

    print "total cost time:%s ms" % (cost_time)
def benchmark_canopy_clustering(ctx, timer):
    # N_PTS = 60000 * ctx.num_workers
    N_PTS = 30000 * 64
    N_DIM = 2

    pts = expr.rand(N_PTS, N_DIM, tile_hint=(N_PTS / ctx.num_workers, N_DIM)).evaluate()

    t1 = datetime.now()
    cluster_result = canopy_cluster(pts).evaluate()
    t2 = datetime.now()
    print "canopy_cluster time:%s ms" % millis(t1, t2)
Exemple #10
0
def benchmark_ssvd(ctx, timer):
  DIM = (1280, 1280)
  #A = expr.randn(*DIM, dtype=np.float64)
  A = np.random.randn(*DIM)
  A = expr.from_numpy(A)
  t1 = datetime.now()
  U,S,VT = svd(A)
  t2 = datetime.now()
  cost_time = millis(t1, t2)
    
  print "total cost time:%s ms" % (cost_time)
Exemple #11
0
def benchmark_pca(ctx, timer):
    DIM = (1280, 512)
    data = np.random.randn(*DIM)
    A = expr.from_numpy(data)
    #A = expr.randn(*DIM, dtype=np.float64)
    t1 = datetime.now()
    m = PCA(N_COMPONENTS)
    m.fit(A)
    t2 = datetime.now()
    cost_time = millis(t1, t2)

    print "total cost time:%s ms" % (cost_time)
Exemple #12
0
def benchmark_canopy_clustering(ctx, timer):
  #N_PTS = 60000 * ctx.num_workers
  N_PTS = 30000 * 64
  N_DIM = 2

  pts = expr.rand(N_PTS, N_DIM,
                  tile_hint=(N_PTS / ctx.num_workers, N_DIM)).force()

  t1 = datetime.now()
  cluster_result = canopy_cluster(pts).force()
  t2 = datetime.now()
  print 'canopy_cluster time:%s ms' % millis(t1, t2)
Exemple #13
0
def benchmark_pca(ctx, timer):
  DIM = (1280, 512)
  data = np.random.randn(*DIM)
  A = expr.from_numpy(data)
  #A = expr.randn(*DIM, dtype=np.float64)
  t1 = datetime.now()
  m = PCA(N_COMPONENTS)
  m.fit(A)
  t2 = datetime.now()
  cost_time = millis(t1, t2)
    
  print "total cost time:%s ms" % (cost_time)
Exemple #14
0
def benchmark_qr(ctx, timer):
  M = 1280
  N = 1280
  Y = np.random.randn(M, N)
  Y = expr.from_numpy(Y)
  #Y = expr.randn(M, N)

  t1 = datetime.now()
  Q, R = qr(Y)
  t2 = datetime.now()
  cost_time = millis(t1, t2)
    
  print "total cost time:%s ms" % (cost_time)
Exemple #15
0
def benchmark_knn(ctx, timer):
  print "#worker:", ctx.num_workers
  N_SAMPLES = ctx.num_workers * 300
  N_QUERY = ctx.num_workers * 2
  N_DIM = ctx.num_workers * 2
  X = expr.rand(N_SAMPLES, N_DIM)
  Y = expr.rand(N_QUERY, N_DIM)
  
  t1 = datetime.now()
  dist2, ind2 = NearestNeighbors().fit(X).kneighbors(Y)
  t2 = datetime.now()
  cost_time = millis(t1, t2)
  print "total cost time:%s ms" % (cost_time)
Exemple #16
0
def benchmark_kmeans(ctx, timer):
  print "#worker:", ctx.num_workers
  N_PTS = 1000 * 256
  N_CENTERS = 10
  N_DIM = 512
  ITER = 1
  pts = expr.rand(N_PTS, N_DIM)
  k = KMeans(N_CENTERS, ITER)
  t1 = datetime.now()
  k.fit(pts)
  t2 = datetime.now()
  cost_time = millis(t1, t2)
  print "total cost time:%s ms, per iter cost time:%s ms" % (cost_time, cost_time/ITER)
Exemple #17
0
def benchmark_qr(ctx, timer):
    M = 1280
    N = 1280
    Y = np.random.randn(M, N)
    Y = expr.from_numpy(Y)
    #Y = expr.randn(M, N)

    t1 = datetime.now()
    Q, R = qr(Y)
    t2 = datetime.now()
    cost_time = millis(t1, t2)

    print "total cost time:%s ms" % (cost_time)
Exemple #18
0
def benchmark_fuzzy_kmeans(ctx, timer):
  #N_PTS = 40000 * ctx.num_workers
  N_PTS = 1000 * 256
  N_DIM = 512
  ITER = 5
  N_CENTERS = 10

  pts = expr.rand(N_PTS, N_DIM)

  t1 = datetime.now()
  cluster_result = fuzzy_kmeans(pts, k=N_CENTERS, num_iter=ITER).evaluate()
  t2 = datetime.now()
  time_cost = millis(t1, t2)
  print 'fuzzy_cluster time:%s ms, per_iter:%s ms' % (time_cost, time_cost/ITER)
def benchmark_spectral_clustering(ctx, timer):
  #N_PTS = 500 * ctx.num_workers
  N_PTS = 50 * 64
  N_DIM = 2
  ITER = 5
  N_CENTERS = 5

  pts = expr.rand(N_PTS, N_DIM,
                  tile_hint=(N_PTS / ctx.num_workers, N_DIM)).evaluate()

  t1 = datetime.now()
  cluster_result = spectral_cluster(pts, N_CENTERS, ITER).glom()
  t2 = datetime.now()
  print 'spectral_cluster time:%s ms' % millis(t1, t2)
Exemple #20
0
def benchmark_ib_recommander(ctx, timer):
    print "#worker:", ctx.num_workers
    N_ITEMS = 800
    N_USERS = 8000
    rating_table = expr.sparse_rand((N_USERS, N_ITEMS),
                                    dtype=np.float64,
                                    density=0.1,
                                    format="csr")
    t1 = datetime.now()
    model = ItemBasedRecommender(rating_table)
    model.precompute()
    t2 = datetime.now()
    cost_time = millis(t1, t2)
    print "total cost time:%s ms" % cost_time
Exemple #21
0
def benchmark_kmeans(ctx, timer):
    print "#worker:", ctx.num_workers
    N_PTS = 1000 * 256
    N_CENTERS = 10
    N_DIM = 512
    ITER = 1
    pts = expr.rand(N_PTS, N_DIM)
    k = KMeans(N_CENTERS, ITER)
    t1 = datetime.now()
    k.fit(pts)
    t2 = datetime.now()
    cost_time = millis(t1, t2)
    print "total cost time:%s ms, per iter cost time:%s ms" % (
        cost_time, cost_time / ITER)
Exemple #22
0
def benchmark_spectral_clustering(ctx, timer):
    #N_PTS = 500 * ctx.num_workers
    N_PTS = 50 * 64
    N_DIM = 2
    ITER = 5
    N_CENTERS = 5

    pts = expr.rand(N_PTS, N_DIM,
                    tile_hint=(N_PTS / ctx.num_workers, N_DIM)).force()

    t1 = datetime.now()
    cluster_result = spectral_cluster(pts, N_CENTERS, ITER).glom()
    t2 = datetime.now()
    print 'spectral_cluster time:%s ms' % millis(t1, t2)
def benchmark_fuzzy_kmeans(ctx, timer):
    # N_PTS = 40000 * ctx.num_workers
    N_PTS = 1000 * 256
    N_DIM = 512
    ITER = 5
    N_CENTERS = 10

    pts = expr.rand(N_PTS, N_DIM)

    t1 = datetime.now()
    cluster_result = fuzzy_kmeans(pts, k=N_CENTERS, num_iter=ITER).evaluate()
    t2 = datetime.now()
    time_cost = millis(t1, t2)
    print "fuzzy_cluster time:%s ms, per_iter:%s ms" % (time_cost, time_cost / ITER)
def benchmark_ib_recommander(ctx, timer):
  print "#worker:", ctx.num_workers
  N_ITEMS = 800
  N_USERS = 8000
  rating_table = expr.sparse_rand((N_USERS, N_ITEMS), 
                                    dtype=np.float64, 
                                    density=0.1, 
                                    format = "csr")
  t1 = datetime.now()
  model = ItemBasedRecommender(rating_table)
  model.precompute()
  t2 = datetime.now()
  cost_time = millis(t1, t2)
  print "total cost time:%s ms" % cost_time
def benchmark_fuzzy_kmeans(ctx, timer):
  #N_PTS = 40000 * ctx.num_workers
  N_PTS = 20000 * 64
  N_DIM = 2
  ITER = 5
  N_CENTERS = 10
  
  pts = expr.rand(N_PTS, N_DIM,
                  tile_hint=(N_PTS / ctx.num_workers, N_DIM)).force()

  t1 = datetime.now()
  cluster_result = fuzzy_kmeans(pts, k=N_CENTERS, num_iter=ITER).force()
  t2 = datetime.now()
  time_cost = millis(t1, t2)
  print 'fuzzy_cluster time:%s ms, per_iter:%s ms' % (time_cost, time_cost/ITER)
def benchmark_streaming_kmeans(ctx, timer):
  #N_PTS = 100 * ctx.num_workers
  N_PTS = 100 * 64
  N_DIM = 2
  N_CENTERS = 5
  
  pts = expr.rand(N_PTS, N_DIM,
                  tile_hint=(N_PTS / ctx.num_workers, N_DIM)).force()

  print pts.glom()
  t1 = datetime.now()
  cluster_result = streaming_kmeans(pts, k=N_CENTERS).glom()
  t2 = datetime.now()
  #print cluster_result.glom()
  time_cost = millis(t1, t2)
  print 'streaming_kmeans_cluster time:%s ms' % time_cost
def benchmark_streaming_kmeans(ctx, timer):
    #N_PTS = 100 * ctx.num_workers
    N_PTS = 100 * 64
    N_DIM = 2
    N_CENTERS = 5

    pts = expr.rand(N_PTS, N_DIM,
                    tile_hint=(N_PTS / ctx.num_workers, N_DIM)).evaluate()

    print pts.glom()
    t1 = datetime.now()
    cluster_result = streaming_kmeans(pts, k=N_CENTERS).glom()
    t2 = datetime.now()
    #print cluster_result.glom()
    time_cost = millis(t1, t2)
    print 'streaming_kmeans_cluster time:%s ms' % time_cost
Exemple #28
0
def benchmark_als(ctx, timer):
  print "#worker:", ctx.num_workers
  #USER_SIZE = 400 * ctx.num_workers
  USER_SIZE = 200 * 64
  MOVIE_SIZE = 12800
  num_features = 20
  num_iter = 5
  
  A = expr.eager(expr.randint(USER_SIZE, MOVIE_SIZE, low=0, high=5, tile_hint=(USER_SIZE/ctx.num_workers, MOVIE_SIZE)))
  
  util.log_warn('begin als!')
  t1 = datetime.now()
  U, M = als(A, implicit_feedback=True, num_features=num_features, num_iter=num_iter)
  U.force()
  M.force()
  t2 = datetime.now()
  cost_time = millis(t1,t2)
  print "total cost time:%s ms, per iter cost time:%s ms" % (cost_time, cost_time/num_iter)
Exemple #29
0
def benchmark_als(ctx, timer):
  print "#worker:", ctx.num_workers
  #USER_SIZE = 100 * ctx.num_workers
  USER_SIZE = 320
  #USER_SIZE = 200 * 64
  MOVIE_SIZE = 12800
  num_features = 20
  num_iter = 2
  
  A = expr.randint(USER_SIZE, MOVIE_SIZE, low=0, high=5, tile_hint=(USER_SIZE, util.divup(MOVIE_SIZE, ctx.num_workers)))
  #A = expr.randint(USER_SIZE, MOVIE_SIZE, low=0, high=5)
  
  util.log_warn('begin als!')
  t1 = datetime.now()
  U, M = als(A, implicit_feedback=True, num_features=num_features, num_iter=num_iter)
  U.force()
  M.force()
  t2 = datetime.now()
  cost_time = millis(t1,t2)
  print "total cost time:%s ms, per iter cost time:%s ms" % (cost_time, cost_time/num_iter)
Exemple #30
0
def benchmark_cholesky(ctx, timer):
  print "#worker:", ctx.num_workers

  #n = int(math.pow(ctx.num_workers, 1.0 / 3.0))
  n = int(math.sqrt(ctx.num_workers))
  #ARRAY_SIZE = 1600 * 4
  ARRAY_SIZE = 900 * n

  util.log_warn('prepare data!')
  #A = np.random.randn(ARRAY_SIZE, ARRAY_SIZE)
  #A = np.dot(A, A.T)

  A = expr.randn(ARRAY_SIZE, ARRAY_SIZE)
  A = expr.dot(A, expr.transpose(A))

  util.log_warn('begin cholesky!')
  t1 = datetime.now()
  L = cholesky(A).optimized().glom()
  t2 = datetime.now()
  #assert np.all(np.isclose(A.glom(), np.dot(L, L.T.conj())))
  cost_time = millis(t1, t2)
  print "total cost time:%s ms, per iter cost time:%s ms" % (cost_time, cost_time/n)
Exemple #31
0
def benchmark_cholesky(ctx, timer):
    print "#worker:", ctx.num_workers

    # n = int(math.pow(ctx.num_workers, 1.0 / 3.0))
    n = int(math.sqrt(ctx.num_workers))
    ARRAY_SIZE = 1600 * 4
    # ARRAY_SIZE = 1600 * n

    util.log_warn("prepare data!")
    # A = np.random.randn(ARRAY_SIZE, ARRAY_SIZE)
    # A = np.dot(A, A.T)
    # A = expr.force(from_numpy(A, tile_hint=(ARRAY_SIZE/n, ARRAY_SIZE/n)))

    A = expr.randn(ARRAY_SIZE, ARRAY_SIZE, tile_hint=(ARRAY_SIZE / n, ARRAY_SIZE / n))
    A = expr.dot(A, expr.transpose(A)).force()

    util.log_warn("begin cholesky!")
    t1 = datetime.now()
    L = cholesky(A).glom()
    t2 = datetime.now()
    assert np.all(np.isclose(A.glom(), np.dot(L, L.T.conj())))
    cost_time = millis(t1, t2)
    print "total cost time:%s ms, per iter cost time:%s ms" % (cost_time, cost_time / n)
Exemple #32
0
def benchmark_cholesky(ctx, timer):
    print "#worker:", ctx.num_workers

    #n = int(math.pow(ctx.num_workers, 1.0 / 3.0))
    n = int(math.sqrt(ctx.num_workers))
    #ARRAY_SIZE = 1600 * 4
    ARRAY_SIZE = 900 * n

    util.log_warn('prepare data!')
    #A = np.random.randn(ARRAY_SIZE, ARRAY_SIZE)
    #A = np.dot(A, A.T)

    A = expr.randn(ARRAY_SIZE, ARRAY_SIZE)
    A = expr.dot(A, expr.transpose(A))

    util.log_warn('begin cholesky!')
    t1 = datetime.now()
    L = cholesky(A).optimized().glom()
    t2 = datetime.now()
    #assert np.all(np.isclose(A.glom(), np.dot(L, L.T.conj())))
    cost_time = millis(t1, t2)
    print "total cost time:%s ms, per iter cost time:%s ms" % (cost_time,
                                                               cost_time / n)
Exemple #33
0
def benchmark_lda(ctx, timer):
  
  print "#worker:", ctx.num_workers
  NUM_TERMS = 160
  NUM_DOCS = 200 * ctx.num_workers
  #NUM_DOCS = 10 * 64

  # create data
  # NUM_TERMS = 41807
  # NUM_DOCS = 21578
  # terms_docs_matrix = from_file("/scratch/cq/numpy_dense_matrix", sparse = False, tile_hint = (NUM_TERMS, int((NUM_DOCS + ctx.num_workers - 1) / ctx.num_workers))).force()
  
  terms_docs_matrix = expr.randint(NUM_TERMS, NUM_DOCS, low=0, high=100)
  
  max_iter = 3
  k_topics = 16
  
  t1 = datetime.now()
  doc_topics, topic_term_count = learn_topics(terms_docs_matrix, k_topics, max_iter=max_iter)
  doc_topics.optimized().force()
  topic_term_count.optimized().force()
  t2 = datetime.now()
  time_cost = millis(t1,t2)
  util.log_warn('total_time:%s ms, train time per iteration:%s ms', time_cost, time_cost/max_iter)