Example #1
0
def test_dot_real(data_dict):
    """Dot operator testing with real datasets"""
    data_dir = os.path.join(os.getcwd(), 'data')

    path = os.path.join(data_dir, data_dict['data_name'])
    if not os.path.exists(path):
        get_data(
            data_dir,
            data_dict['data_name'],
            data_dict['url'],
            data_dict['data_origin_name']
        )
        assert os.path.exists(path)

    k = data_dict['feature_dim']
    m = data_dict['m']
    batch_size_list = data_dict['batch_size']

    default_output_index = data_dict['default_index']['output_dim']
    default_batch_size_index = data_dict['default_index']['batch_size']
    density = estimate_density(path, data_dict['feature_dim'])
    num_batches = data_dict['num_batches']

    assert default_batch_size_index < len(batch_size_list)
    assert default_output_index < len(m)
    if ARGS.verbose:
        print("Running Benchmarking on %r data") % data_dict['data_mini']
    print('{:>15} {:>10} {:>10} {:>10} {:>20} {:>15} {:>15} {:>10} {:>10}'.format('density(%)',
                                                                                 'n',
                                                                                 'm',
                                                                                 'k',
                                                                                 't_dense/t_sparse',
                                                                                 't_dense(ms)',
                                                                                 't_sparse(ms)',
                                                                                 'is_transpose',
                                                                                 'rhs_rsp'))


    for output_dim in m:
        _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'],
                              k, output_dim, density,
                              batch_size_list[default_batch_size_index], num_batches)
        _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'],
                              k, output_dim, density,
                              batch_size_list[default_batch_size_index], num_batches,
                              transpose=True)
        _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'],
                              k, output_dim, density,
                              batch_size_list[default_batch_size_index], num_batches, rsp=True)

    for batch_size in batch_size_list:
        _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'],
                              k, m[default_output_index], density, batch_size, num_batches)
        _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'],
                              k, m[default_output_index], density, batch_size, num_batches,
                              transpose=True)
        _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'],
                              k, output_dim, density,
                              batch_size_list[default_batch_size_index], num_batches, rsp=True)
Example #2
0
def test_dot_real(data_dict):
    """Dot operator testing with real datasets"""
    data_dir = os.path.join(os.getcwd(), 'data')

    path = os.path.join(data_dir, data_dict['data_name'])
    if not os.path.exists(path):
        get_bz2_data(
            data_dir,
            data_dict['data_name'],
            data_dict['url'],
            data_dict['data_origin_name']
        )
        assert os.path.exists(path)

    k = data_dict['feature_dim']
    m = data_dict['m']
    batch_size_list = data_dict['batch_size']

    default_output_index = data_dict['default_index']['output_dim']
    default_batch_size_index = data_dict['default_index']['batch_size']
    density = estimate_density(path, data_dict['feature_dim'])
    num_batches = data_dict['num_batches']

    assert default_batch_size_index < len(batch_size_list)
    assert default_output_index < len(m)
    if ARGS.verbose:
        print("Running Benchmarking on %r data") % data_dict['data_mini']
    print('{:>15} {:>10} {:>10} {:>10} {:>20} {:>15} {:>15} {:>10} {:>10}'.format('density(%)',
                                                                                 'n',
                                                                                 'm',
                                                                                 'k',
                                                                                 't_dense/t_sparse',
                                                                                 't_dense(ms)',
                                                                                 't_sparse(ms)',
                                                                                 'is_transpose',
                                                                                 'rhs_rsp'))


    for output_dim in m:
        _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'],
                              k, output_dim, density,
                              batch_size_list[default_batch_size_index], num_batches)
        _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'],
                              k, output_dim, density,
                              batch_size_list[default_batch_size_index], num_batches,
                              transpose=True)
        _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'],
                              k, output_dim, density,
                              batch_size_list[default_batch_size_index], num_batches, rsp=True)

    for batch_size in batch_size_list:
        _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'],
                              k, m[default_output_index], density, batch_size, num_batches)
        _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'],
                              k, m[default_output_index], density, batch_size, num_batches,
                              transpose=True)
        _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'],
                              k, output_dim, density,
                              batch_size_list[default_batch_size_index], num_batches, rsp=True)
Example #3
0
def test_dot_real(data_dict):
    def get_iter(path, data_shape, batch_size):
        data_train = mx.io.LibSVMIter(data_libsvm=path,
                                      data_shape=data_shape,
                                      batch_size=batch_size)
        data_iter = iter(data_train)
        return data_iter

    data_dir = os.path.join(os.getcwd(), 'data')

    path = os.path.join(data_dir, data_dict['data_name'])
    if not os.path.exists(path):
        get_bz2_data(data_dir, data_dict['data_name'], data_dict['url'],
                     data_dict['data_origin_name'])
        assert os.path.exists(path)

    k = data_dict['feature_dim']
    m = data_dict['m']
    density = estimate_density(path, data_dict['feature_dim'])

    mini_path = os.path.join(data_dir, data_dict['data_mini'])
    if not os.path.exists(mini_path):
        os.system("head -n 2000 %r > %r" % (path, mini_path))
        assert os.path.exists(mini_path)

    print("Running Benchmarking on %r data" % data_dict['data_mini'])
    for batch_size in data_dict[
            'batch_size']:  # iterator through different batch size of choice
        print("batch_size is %d" % batch_size)
        # model
        data_shape = (k, )
        train_iter = get_iter(mini_path, data_shape, batch_size)
        weight = mx.nd.random.uniform(low=0, high=1, shape=(k, m))

        csr_data = []
        dns_data = []
        num_batch = 0
        for batch in train_iter:
            data = train_iter.getdata()
            csr_data.append(data)
            dns_data.append(data.tostype('default'))
            num_batch += 1
        bag_of_data = [csr_data, dns_data]
        num_repeat = 5
        costs = []
        for d in bag_of_data:
            weight.wait_to_read()
            cost = 0.
            count = 0
            for d_batch in d:
                d_batch.wait_to_read()
                cost += measure_cost(num_repeat, mx.nd.dot, d_batch, weight)
                count += 1
            costs.append(cost / count)
        t_sparse = costs[0]
        t_dense = costs[1]
        ratio = t_dense / t_sparse
        print('density(%)\tn\tm\tk\tt_dense/t_sparse\tt_dense\tt_sparse')
        fmt = "%0.4f\t\t%d\t%d\t%d\t%0.2f\t\t\t%0.4f\t%0.6f"
        print(fmt %
              (density * 100, batch_size, m, k, ratio, t_dense, t_sparse))
Example #4
0
def test_dot_real(data_dict):
    def get_iter(path, data_shape, batch_size):
        data_train = mx.io.LibSVMIter(data_libsvm=path,
                                      data_shape=data_shape,
                                      batch_size=batch_size)
        data_iter = iter(data_train)
        return data_iter

    data_dir = os.path.join(os.getcwd(), 'data')

    path = os.path.join(data_dir, data_dict['data_name'])
    if not os.path.exists(path):
        get_bz2_data(
            data_dir,
            data_dict['data_name'],
            data_dict['url'],
            data_dict['data_origin_name']
        )
        assert os.path.exists(path)

    k = data_dict['feature_dim']
    m = data_dict['m']
    density = estimate_density(path, data_dict['feature_dim'])

    mini_path = os.path.join(data_dir, data_dict['data_mini'])
    if not os.path.exists(mini_path):
        os.system("head -n 2000 %r > %r" % (path, mini_path))
        assert os.path.exists(mini_path)

    print "Running Benchmarking on %r data" % data_dict['data_mini']
    for batch_size in data_dict['batch_size']:  # iterator through different batch size of choice
        print "batch_size is %d" % batch_size
        # model
        data_shape = (k, )
        train_iter = get_iter(mini_path, data_shape, batch_size)
        weight = mx.nd.random.uniform(low=0, high=1, shape=(k, m))

        csr_data = []
        dns_data = []
        num_batch = 0
        for batch in train_iter:
            data = train_iter.getdata()
            csr_data.append(data)
            dns_data.append(data.tostype('default'))
            num_batch += 1
        bag_of_data = [csr_data, dns_data]
        num_repeat = 5
        costs = []
        for d in bag_of_data:
            weight.wait_to_read()
            cost = 0.
            count = 0
            for d_batch in d:
                d_batch.wait_to_read()
                cost += measure_cost(num_repeat, mx.nd.dot, d_batch, weight)
                count += 1
            costs.append(cost/count)
        t_sparse = costs[0]
        t_dense = costs[1]
        ratio = t_dense / t_sparse
        print('density(%)\tn\tm\tk\tt_dense/t_sparse\tt_dense\tt_sparse')
        fmt = "%0.4f\t\t%d\t%d\t%d\t%0.2f\t\t\t%0.4f\t%0.6f"
        print(fmt % (density * 100, batch_size, m, k, ratio, t_dense, t_sparse))