class TestAccumulate(unittest.TestCase): ovl.clear_op_cache() def test(self): """ Test the outputs of the operators to make sure they are consistent with the numpy implementation """ a = np.random.random((5, 5, 5)) ovl.logger.debug(u'Testing C') assert np.allclose( np.cumsum(a, axis=0), ovl.evaluate(cumsum(a, axis=0), target_language='cpp')) assert np.allclose( np.cumsum(a, axis=1), ovl.evaluate(cumsum(a, axis=1), target_language='cpp')) assert np.allclose( np.cumsum(a, axis=2), ovl.evaluate(cumsum(a, axis=2), target_language='cpp')) assert np.allclose( np.cumprod(a, axis=0), ovl.evaluate(cumprod(a, axis=0), target_language='cpp')) assert np.allclose( np.cumprod(a, axis=1), ovl.evaluate(cumprod(a, axis=1), target_language='cpp')) assert np.allclose( np.cumprod(a, axis=2), ovl.evaluate(cumprod(a, axis=2), target_language='cpp')) if ovl.cuda_enabled: ovl.logger.debug(u'Testing CUDA') assert np.allclose( np.cumsum(a, axis=0), ovl.evaluate(cumsum(a, axis=0), target_language='cuda')) assert np.allclose( np.cumsum(a, axis=1), ovl.evaluate(cumsum(a, axis=1), target_language='cuda')) assert np.allclose( np.cumsum(a, axis=2), ovl.evaluate(cumsum(a, axis=2), target_language='cuda')) assert np.allclose( np.cumprod(a, axis=0), ovl.evaluate(cumprod(a, axis=0), target_language='cuda')) assert np.allclose( np.cumprod(a, axis=1), ovl.evaluate(cumprod(a, axis=1), target_language='cuda')) assert np.allclose( np.cumprod(a, axis=2), ovl.evaluate(cumprod(a, axis=2), target_language='cuda'))
class TestExpm1(unittest.TestCase): ovl.clear_op_cache() def test(self): """ Test the correctness of ovl operator vs numpy implementation """ a = np.array([1e-99, -1e-99, 0.0], dtype=np.float64) log1pOp = log1p(a) ref = np.log1p(a) ovl_res = ovl.evaluate(log1pOp) ovl.logger.debug(u'numpy: ' + str(ref) + u' ovl: ' + str(ovl_res)) assert np.allclose(ref, ovl_res, rtol=0, atol=1e-20) if ovl.cuda_enabled: assert np.allclose(np.log1p(a), ovl.evaluate(log1pOp, target_language='cuda'), rtol=0, atol=1e-20) # test vs tensorflow test_config = tf.ConfigProto(allow_soft_placement=False) # ensure TF runs on GPU when asked test_config.graph_options.optimizer_options.opt_level = -1 ones = np.ones_like(a) if ovl.cuda_enabled: devices = ['/cpu:0', '/gpu:0'] else: devices = ['/cpu:0'] with tf.Session(config=test_config) as sess: for dev_string in devices: with tf.device(dev_string): log1p_tf = ovl.as_tensorflow(log1pOp) sess.run(tf.initialize_all_variables()) log1p_tf_result = sess.run(log1p_tf) assert np.allclose(ref, log1p_tf_result, rtol=0, atol=1e-20) # TF exp - 1 tf_out = tf.log(a - ones) tf_result = tf_out.eval() # this should fail assert (np.allclose(ref, tf_result, rtol=0, atol=1e-20) == False) sess.close()
class TestAccumulatePerf(unittest.TestCase): ovl.clear_op_cache() def test_performance(self): """ test the performance vs. numpy running standalone and from tensorflow based on tensorflow issue 813 https://github.com/tensorflow/tensorflow/issues/813 """ import tensorflow as tf import timeit import time logger = ovl.logger iters = 10 X = np.random.uniform(0, 1, size=(10000, 1000)) # note, np.cumsum fails with memory error at input size 10 ^^ 6 ref = np.cumsum(X, axis=0) # timeit returns seconds for 'number' iterations. For 10 iterations, multiply by 100 to get time in ms np_time = 100 * timeit.timeit( 'np.cumsum(X, axis=0)', setup= 'import numpy as np; X = np.random.uniform(0, 1, size=(10000, 1000))', number=iters) logger.debug(u'Best numpy time (ms): ' + str(np_time)) cumsumOp = cumsum(X, axis=0) ovl_cpp, prof_cpp = ovl.profile(cumsumOp, target_language='cpp', profiling_iterations=iters, opt_level=0) assert np.allclose(ref, ovl_cpp) ovl_cpp_time = np.min(list(prof_cpp.values())[0]) logger.debug(u'Best ovl cpp time (ms): ' + str(ovl_cpp_time)) if ovl.cuda_enabled: ovl_cuda, prof_cuda = ovl.profile(cumsumOp, target_language='cuda', profiling_iterations=iters, opt_level=0) assert np.allclose(ref, ovl_cuda) ovl_cuda_time = np.min(list(prof_cuda.values())[0]) logger.debug(u'Best ovl cuda time (ms): ' + str(ovl_cuda_time)) # OVL-TF integration # ensure TF runs on GPU test_config = tf.ConfigProto(allow_soft_placement=False) test_config.graph_options.optimizer_options.opt_level = -1 if ovl.cuda_enabled: devices = ['/cpu:0', '/gpu:0'] else: devices = ['/cpu:0'] with tf.Session(config=test_config) as sess: for dev_string in devices: with tf.device(dev_string): cumsum_tf = ovl.as_tensorflow(cumsumOp) sess.run(tf.initialize_all_variables()) cumsum_tf_result = sess.run(cumsum_tf) prof_ovl = np.zeros(iters) for i in range(iters): t0 = time.time() sess.run(cumsum_tf.op) t1 = time.time() prof_ovl[i] = t1 - t0 tf_ovl_time = np.min(prof_ovl) * 1000.00 logger.debug(u'Best tf + ovl time (ms) on ' + dev_string + ' :' + str(tf_ovl_time)) assert np.allclose(ref, cumsum_tf_result) # TF cumsum tf_out = tf.cumsum(X, axis=0, exclusive=False, reverse=False) tf_result = tf_out.eval() assert np.allclose(ref, tf_result) prof_tf = np.zeros(iters) for i in range(iters): t0 = time.time() sess.run(tf_out.op) t1 = time.time() prof_tf[i] = t1 - t0 tf_time = np.min(prof_tf) * 1000.00 logger.debug(u'Best tf cumsum time (ms) on ' + dev_string + ' :' + str(tf_time)) sess.close()
class TestExpm1(unittest.TestCase): ovl.clear_op_cache() def test(self): """ Test the correctness of ovl operator vs numpy implementation """ a = np.array([1e-10, -1e-10, 0.0, np.Infinity], dtype=np.float64) expm1_op = expm1(a) ref = np.expm1(a) ovl_res = ovl.evaluate(expm1_op) ovl.logger.info(u'numpy: ' + str(ref) + u' ovl: ' + str(ovl_res)) assert np.allclose(ref, ovl_res, rtol=0, atol=1e-20) if ovl.cuda_enabled: assert np.allclose(np.expm1(a), ovl.evaluate(expm1_op, target_language='cuda'), rtol=0, atol=1e-20) # test vs tensorflow # ensure TF runs on GPU when asked test_config = tf.ConfigProto(allow_soft_placement=False) test_config.graph_options.optimizer_options.opt_level = -1 ones = np.ones_like(a) if ovl.cuda_enabled: devices = ['/cpu:0', '/gpu:0'] else: devices = ['/cpu:0'] with tf.Session(config=test_config) as sess: for dev_string in devices: with tf.device(dev_string): expm1_tf = ovl.as_tensorflow(expm1_op) sess.run(tf.initialize_all_variables()) expm1_tf_result = sess.run(expm1_tf) assert np.allclose(ref, expm1_tf_result, rtol=0, atol=1e-20) # TF exp - 1 tf_out = tf.exp(a) - ones tf_result = tf_out.eval() # this should fail assert (np.allclose(ref, tf_result, rtol=0, atol=1e-20) == False) sess.close() def test_gradient(self): """ Test the correctness of the gradient against tensorflow """ if ovl.cuda_enabled: devices = ['/cpu:0', '/gpu:0'] else: devices = ['/cpu:0'] # ensure TF runs on GPU when asked test_config = tf.ConfigProto(allow_soft_placement=False) test_config.graph_options.optimizer_options.opt_level = -1 with tf.Session(config=test_config) as sess: for dev_string in devices: with tf.device(dev_string): a = np.random.random(100) grad_input = tf.constant(np.random.random(100)) arg = tf.constant(a) ovl_op = expm1(arg) ones = tf.constant(np.ones_like(a)) ovl_out = ovl.as_tensorflow(ovl_op) tf_out = tf.exp(arg) - ones ovl_grad = tf.gradients(ovl_out, arg, grad_input)[0] tf_grad = tf.gradients(tf_out, arg, grad_input)[0] ovl_out, tf_out, ovl_grad, tf_grad = sess.run( [ovl_out, tf_out, ovl_grad, tf_grad]) assert np.allclose(ovl_out, tf_out) assert np.allclose(ovl_grad, tf_grad) sess.close()
def test(self): """ This test cases compares the numpy reference implementation and the opveclib implementation with the ground-truth count. """ # Specify the graph data. tmpName = "/tmp/v7e20.txt" nTriangle = 3 writeExampleGraphToTextFile(tmpName) ovl.logger.debug('Testing graph %s.' % tmpName) startEdge, fromVertex, toVertex = loadGraphFromTextFile(tmpName) nTriangleNPY = countTrianglesNp(startEdge, fromVertex, toVertex) nTriangleCPU = countTrianglesCPU(startEdge, fromVertex, toVertex) assert nTriangleNPY == nTriangle assert nTriangleCPU == nTriangle if ovl.local.cuda_enabled: nTriangleGPU = countTrianglesGPU(startEdge, fromVertex, toVertex) assert nTriangleGPU == nTriangle if __name__ == '__main__': ovl.clear_op_cache() unittest.main()
class TestGraphTriangleCountOp(unittest.TestCase): """ Test cases for the triangle counting operator. """ def test(self): """ This test cases compares the numpy reference implementation and the opveclib implementation with the ground-truth count. """ # Specify the graph data. tmpName = "/tmp/v7e20.txt" nTriangle = 3 write_example_graph_to_text_file(tmpName) ovl.logger.info('Testing graph %s.' % tmpName) startEdge, fromVertex, toVertex = load_graph_from_text_file(tmpName) assert nTriangle == triangles(startEdge, fromVertex, toVertex) assert nTriangle == reference(startEdge, fromVertex, toVertex) if ovl.local.cuda_enabled: assert nTriangle == triangles(startEdge, fromVertex, toVertex, target_language='cuda') if __name__ == '__main__': ovl.clear_op_cache() unittest.main()