Exemple #1
0
 def predict(self, X: BlockArray):
     _check_array(X, True)
     r_oid = instance().cm.call_actor_method(self.actor, "predict",
                                             X.flattened_oids()[0])
     return BlockArray.from_oid(r_oid,
                                shape=(X.shape[0], ),
                                dtype=predict_dtype,
                                cm=instance().cm)
Exemple #2
0
 def __init__(self, *args, **kwargs):
     device_id = None
     if self.__class__ in _place_on_node_0:
         device_id = instance().cm.devices()[0]
     self.actor = instance().cm.make_actor(name,
                                           *args,
                                           device_id=device_id,
                                           **kwargs)
Exemple #3
0
def train_test_split(*arrays,
                     test_size: Union[int, float] = None,
                     train_size: Union[int, float] = None,
                     random_state: Optional[Union[NumsRandomState,
                                                  int]] = None,
                     shuffle: bool = True,
                     stratify=None):
    # pylint: disable = protected-access
    updated_arrays = []
    for array in arrays:
        updated_arrays.append(_check_array(array))
    syskwargs = {
        "options": {
            "num_returns": 2 * len(updated_arrays)
        },
        "grid_entry": (0, ),
        "grid_shape": (1, ),
    }

    if random_state is None:
        rng_params = None
    else:
        if isinstance(random_state, int):
            # It's a seed.
            random_state: NumsRandomState = instance().random_state(
                random_state)
        rng_params = random_state._rng.new_block_rng_params()

    array_oids = [array.flattened_oids()[0] for array in updated_arrays]
    result_oids = instance().cm.call("train_test_split",
                                     *array_oids,
                                     rng_params=rng_params,
                                     test_size=test_size,
                                     train_size=train_size,
                                     shuffle=shuffle,
                                     stratify=stratify,
                                     syskwargs=syskwargs)
    # Optimize by computing this directly.
    shape_dtype_oids = [
        instance().cm.shape_dtype(r_oid,
                                  syskwargs={
                                      "grid_entry": (0, ),
                                      "grid_shape": (1, )
                                  }) for r_oid in result_oids
    ]
    shape_dtypes = instance().cm.get(shape_dtype_oids)
    results = []
    for i, r_oid in enumerate(result_oids):
        shape, dtype = shape_dtypes[i]
        results.append(
            BlockArray.from_oid(r_oid,
                                shape=shape,
                                dtype=dtype,
                                cm=instance().cm))
    return results
Exemple #4
0
 def fit_transform(self, X: BlockArray, y: BlockArray = None):
     _check_array(X, True)
     if y is not None:
         _check_array(y, True)
         y = y.flattened_oids()[0]
     r_oid = instance().cm.call_actor_method(self.actor,
                                             "fit_transform",
                                             X.flattened_oids()[0], y)
     return BlockArray.from_oid(r_oid,
                                shape=X.shape,
                                dtype=float,
                                cm=instance().cm)
Exemple #5
0
def test_rwd_s3():
    import nums
    from nums.core import application_manager
    from nums.core import settings
    settings.system_name = "serial"
    nps_app_inst = application_manager.instance()

    conn = boto3.resource('s3', region_name='us-east-1')
    assert conn.Bucket('darrays') not in conn.buckets.all()
    conn.create_bucket(Bucket='darrays')

    array: np.ndarray = np.random.random(35).reshape(7, 5)
    ba: BlockArray = nps_app_inst.array(array, block_shape=(3, 4))
    filename = "s3://darrays/read_write_delete_array_test"
    write_result_ba: BlockArray = nums.write(filename, ba)
    write_result_np = write_result_ba.get()
    for grid_entry in write_result_ba.grid.get_entry_iterator():
        assert write_result_ba[grid_entry].get() == write_result_np[grid_entry]
    ba_read: BlockArray = nums.read(filename)
    assert nps_app_inst.get(nps_app_inst.allclose(ba, ba_read))
    delete_result_ba: BlockArray = nums.delete(filename)
    delete_result_np = delete_result_ba.get()
    for grid_entry in delete_result_ba.grid.get_entry_iterator():
        assert delete_result_ba[grid_entry].get(
        ) == delete_result_np[grid_entry]
 def _get_and_register_block_shape(self, shape):
     # pylint: disable=import-outside-toplevel
     # Only allow this to be used if app manager is maintaining an app instance.
     import nums.core.application_manager as am
     assert am.is_initialized(), "Unexpected application state: " \
                                 "application instance doesn't exist."
     app = am.instance()
     return app.get_block_shape(shape, self.dtype)
Exemple #7
0
def nps_app_inst(request):
    # This triggers initialization; it's not to be mixed with the app_inst fixture.
    # Observed (core dumped) after updating this fixture to run functions with "serial" backend.
    # Last time this happened, it was due poor control over the
    # scope and duration of ray resources.
    # pylint: disable = import-outside-toplevel
    from nums.core import settings
    from nums.core import application_manager
    settings.system_name = request.param
    yield application_manager.instance()
    application_manager.destroy()
Exemple #8
0
 def score(self,
           X: BlockArray,
           y: BlockArray,
           sample_weight: BlockArray = None):
     _check_array(X, True)
     _check_array(y, True)
     if sample_weight is not None:
         _check_array(sample_weight, True)
         sample_weight = sample_weight.flattened_oids()[0]
     r_oid = instance().cm.call_actor_method(
         self.actor,
         "score",
         X.flattened_oids()[0],
         y.flattened_oids()[0],
         sample_weight,
     )
     return BlockArray.from_oid(r_oid,
                                shape=(),
                                dtype=float,
                                cm=instance().cm)
Exemple #9
0
def random_stub():
    # pylint: disable = unused-variable
    import numpy.random as numpy_module
    from nums.core.array.random import NumsRandomState
    from nums.core.application_manager import instance
    app = instance()
    sys = app.system
    rs_inst = NumsRandomState(system=sys, seed=1337)
    numpy_items = sorted(
        systems_utils.get_module_functions(numpy_module).items())
    nums_items = sorted(systems_utils.get_instance_functions(rs_inst).items())
    raise NotImplementedError()
Exemple #10
0
def test_app_manager():
    for compute_name in ["numpy"]:
        for system_name in ["serial", "ray-cyclic", "ray-task"]:
            settings.compute_name = compute_name
            settings.system_name = system_name
            app: ArrayApplication = application_manager.instance()
            assert np.allclose(
                np.arange(10),
                app.arange(0, shape=(10, ), block_shape=(10, )).get())
            application_manager.destroy()
            assert not application_manager.is_initialized()
            time.sleep(1)
    def check_swapaxes(_np_a, axis1, axis2):
        ns_ins = application_manager.instance()
        np_swapaxes = np.__getattribute__("swapaxes")
        ns_swapaxes = nps.__getattribute__("swapaxes")

        _ns_a = nps.array(_np_a)
        _ns_ins_a = ns_ins.array(_np_a, block_shape=block_shape)

        _np_result = np_swapaxes(_np_a, axis1, axis2)
        _ns_result = ns_swapaxes(_ns_a, axis1, axis2)
        _ns_ins_result = ns_swapaxes(_ns_ins_a, axis1, axis2)
        assert np.allclose(_np_result, _ns_result.get())
        assert np.allclose(_np_result, _ns_ins_result.get())
Exemple #12
0
def example(max_iters, batch_size):

    app = am.instance()
    model = LogisticRegression(app=app,
                               cluster_shape=(1, 1),
                               fit_intercept=False)
    X, y = sample(app, sample_size=8)
    model.init(X)

    for i in range(max_iters):
        # Take a step.
        X, y = sample(app, batch_size)
        model.partial_fit(X, y)
        print("train accuracy",
              (nps.sum(y == model.predict(X)) / X.shape[0]).get())
Exemple #13
0
def nps_app_inst(request):
    # This triggers initialization; it's not to be mixed with the app_inst fixture.
    # Observed (core dumped) after updating this fixture to run functions with "serial" backend.
    # Last time this happened, it was due poor control over the
    # scope and duration of ray resources.
    # pylint: disable = import-outside-toplevel
    from nums.core import settings
    from nums.core import application_manager
    import nums.numpy as nps
    settings.system_name, settings.device_grid_name = request.param

    # Need to reset numpy random state.
    # It's the only stateful numpy API object.
    nps.random.reset()
    yield application_manager.instance()
    application_manager.destroy()
Exemple #14
0
def test_app_manager(compute_name, system_name, device_grid_name):
    settings.use_head = True
    settings.compute_name = compute_name
    settings.system_name = system_name
    settings.device_grid_name = device_grid_name

    app: ArrayApplication = application_manager.instance()
    app_arange = app.arange(0, shape=(10, ), block_shape=(10, ))
    assert np.allclose(np.arange(10), app_arange.get())
    application_manager.destroy()
    assert not application_manager.is_initialized()
    time.sleep(1)

    # Revert for other tests.
    settings.compute_name = "numpy"
    settings.system_name = "ray"
    settings.device_grid_name = "cyclic"
Exemple #15
0
def test_rwd():
    import nums
    from nums.core import application_manager
    from nums.core import settings

    settings.system_name = "serial"
    nps_app_inst = application_manager.instance()

    array: np.ndarray = np.random.random(35).reshape(7, 5)
    ba: BlockArray = nps_app_inst.array(array, block_shape=(3, 4))
    filename = "/tmp/darrays/read_write_delete_array_test"
    write_result_ba: BlockArray = nums.write(filename, ba)
    write_result_np = write_result_ba.get()
    for grid_entry in write_result_ba.grid.get_entry_iterator():
        assert write_result_ba[grid_entry].get() == write_result_np[grid_entry]
    ba_read: BlockArray = nums.read(filename)
    assert nps_app_inst.get(nps_app_inst.allclose(ba, ba_read))
    delete_result_ba: bool = nums.delete(filename)
    assert delete_result_ba
Exemple #16
0
def _check_array(array, strict=False):
    if not isinstance(array, BlockArray):
        if strict:
            raise TypeError("Input array is not a BlockArray.")
        # These arrays should be a single block.
        array = instance().array(array, block_shape=array.shape)
    if not array.is_single_block():
        if strict:
            raise ValueError("Input array is not a single block.")
        array_size_gb = array.nbytes / 10**9
        if array_size_gb > 100.0:
            raise MemoryError("Operating on an "
                              "array of size %sGB is not supported." %
                              array_size_gb)
        elif array_size_gb > 10.0:
            # This is a large array of size 10GB.
            warnings.warn("Attempting to convert an array "
                          "of size %sGB to a single block." % array_size_gb)
        array = array.to_single_block()
    return array
Exemple #17
0
def test_app_manager(compute_name, system_name, device_grid_name, num_cpus):
    settings.use_head = True
    settings.compute_name = compute_name
    settings.system_name = system_name
    settings.device_grid_name = device_grid_name
    settings.num_cpus = num_cpus

    app: ArrayApplication = application_manager.instance()
    print(settings.num_cpus, num_cpus, app.cm.num_cores_total())
    app_arange = app.arange(0, shape=(10, ), block_shape=(10, ))
    assert np.allclose(np.arange(10), app_arange.get())
    if num_cpus is None:
        assert app.cm.num_cores_total() == get_num_cores()
    else:
        assert app.cm.num_cores_total() == num_cpus
    application_manager.destroy()
    assert not application_manager.is_initialized()
    time.sleep(1)

    # Revert for other tests.
    settings.compute_name = "numpy"
    settings.system_name = "ray"
    settings.device_grid_name = "cyclic"
    settings.num_cpus = None
Exemple #18
0
def benchmark_mlp(num_gpus, N_list, system_class_list, d=1000, optimizer=True, dtype=np.float32):
    format_string = "%20s,%10s,%10s,%10s,%10s,%10s"
    print(format_string % ("Library", "N", "Cost", "CostOpt", "CostInit", "CV"))
    global app

    for N in N_list:
        N = int(N)
        N_block = N // num_gpus
        d_block = d // 1

        for system_class in system_class_list:
            # try:
            if True:
                if system_class in ["Cupy", "Numpy"]:
                    name = system_class
                    import cupy as cp

                    arr_lib = cp if system_class == "Cupy" else np
                    arr_lib.inv = arr_lib.linalg.inv
                    app = arr_lib

                    X, y = np_sample(np, sample_size=N, feature=1000)
                    W_in_1, W_1_2, W_2_out, B_1, B_2, B_out = np_init_weights(np, X, y)

                    # X = arr_lib.zeros((N, d), dtype=dtype)
                    # y = arr_lib.ones((N,), dtype=dtype)

                    # Prevent the Singular matrix Error in np.linalg.inv
                    # arange = arr_lib.arange(N)
                    # X[arange, arange % d] = 1
                    X = cp.asarray(X)
                    y = cp.asarray(y)
                    # print("initialize weights")
                    W_in_1 = cp.asarray(W_in_1)
                    W_1_2 = cp.asarray(W_1_2)
                    W_2_out = cp.asarray(W_2_out)
                    # print("initialize bias")
                    B_1 = cp.asarray(B_1)
                    B_2 = cp.asarray(B_2)
                    B_out = cp.asarray(B_out)
                    cp.cuda.Device(0).synchronize()
                    # W_in_1 = app.random.normal(shape=(X.shape[1], dim_1), dtype=X.dtype)
                    # W_1_2 = app.random.normal(shape=(dim_1, dim_2), dtype=X.dtype)
                    # W_2_out = app.random.normal(shape=(dim_2, y.shape[1]), dtype=X.dtype)

                    # Initialize bias

                    # B_1 = app.zeros((X.shape[0], dim_1), dtype=X.dtype)
                    # B_2 = app.zeros((X.shape[0], dim_2), dtype=X.dtype)
                    # B_out = app.zeros((X.shape[0], y.shape[1]), dtype=X.dtype)
                    # print("done initialize bias")
                    # cp.cuda.Device(0).synchronize()

                    # Benchmark one step mlp
                    def func():
                        tic = time.time()
                        # toc_end = np_feedforward(app, X, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out)
                        toc_end = one_step_fit_np(arr_lib, X, y, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out)
                        cp.cuda.Device(0).synchronize()
                        toc = time.time()
                        return toc - tic, toc_end - tic, 0, None

                    # func()
                    # exit()

                    costs, costs_opt, costs_init = benchmark_func(func)
                    del (X, y, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out)
                else:
                    # Init system
                    name = system_class.__name__
                    # system = system_class(num_gpus)
                    app = am.instance()
                    app.system.num_gpus = num_gpus
                    app.system.cluster_shape = (num_gpus, 1)

                    app.system.optimizer = optimizer
                    # system.init()
                    # app = ArrayApplication(system=system, filesystem=FileSystem(system))

                    # Make dataset
                    # print("hi there")
                    nps.random.seed(0)
                    # print("a", flush=True)
                    X, y = sample(app, sample_size=N, feature=1000, num_gpus=num_gpus)
                    # print(f"X.shape {X.shape} X.block_shape {X.block_shape}")
                    # print(f"y.shape {y.shape} y.block_shape {y.block_shape}")
                    W_in_1, W_1_2, W_2_out, B_1, B_2, B_out = init_weights(app, num_gpus, X, y)

                    # X = sample(app, sample_size=N, feature=1000, num_gpus=num_gpus)
                    # print("b", flush=True)

                    # X = app.ones((N, d), block_shape=(N_block, d_block), dtype=dtype)
                    # y = app.ones((N,), block_shape=(N_block,), dtype=dtype)

                    # Benchmark one step MLP
                    def func():
                        tic = time.time()
                        if optimizer:
                            toc_init, toc_opt = one_step_fit_opt(app, X, y, W_in_1, W_1_2, W_2_out, B_1, B_2,
                                                                       B_out, num_gpus)
                            # toc_init, toc_opt = feedforward_opt(app, X, W_in_1, W_1_2, W_2_out, B_1, B_2,
                            # B_out, num_gpus)
                        else:
                            toc_opt = feedforward(app, X, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out)
                            toc_init = tic
                            # toc_opt = one_step_fit(app, X, y, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out)

                        toc = time.time()
                        return toc - tic, toc_opt - tic, toc_init - tic, None

                    costs, costs_opt, costs_init = benchmark_func(func)

                    del (X, y, app, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out)
                    # del (X, app)
            # except Exception:
            else:
                costs = [-1]
                costs_opt = [-1]
                costs_init = [-1]

            log_str = format_string % (
                # system_class.__name__,
                name,
                "%d" % N,
                "%.4f" % np.mean(costs),
                "%.4f" % np.mean(costs_opt),
                "%.4f" % np.mean(costs_init),
                "%.2f" % (np.std(costs) / np.mean(costs)),
            )
            print(log_str)
            with open("result_lr.csv", "a") as f:
                f.write(log_str + "\n")
Exemple #19
0
def benchmark_lr(num_gpus, N_list, system_class_list, d=1000, optimizer=True, dtype=np.float32):
    format_string = "%20s,%10s,%10s,%10s,%10s,%10s"
    print(format_string % ("Library", "N", "Cost", "CostOpt", "CostInit", "CV"))
    global app

    for N in N_list:
        N = int(N)

        for system_class in system_class_list:
            # try:
            if True:
                if system_class in ["Cupy", "Numpy"]:
                    name = system_class
                    import cupy as cp

                    arr_lib = cp if system_class == "Cupy" else np
                    arr_lib.inv = arr_lib.linalg.inv
                    app = arr_lib

                    X = arr_lib.zeros((N, d), dtype=dtype)
                    y = arr_lib.ones((N,), dtype=dtype)

                    # Prevent the Singular matrix Error in np.linalg.inv
                    arange = arr_lib.arange(N)
                    X[arange, arange % d] = 1
                    cp.cuda.Device(0).synchronize()

                    # Benchmark one step LR
                    def func():
                        tic = time.time()
                        one_step_fit_np(arr_lib, X, y)
                        cp.cuda.Device(0).synchronize()
                        toc = time.time()
                        return toc - tic, 0, 0, None

                    costs, costs_opt, costs_init = benchmark_func(func)
                    del (X, y, app)
                else:
                    # Init system
                    name = system_class.__name__
                    app = am.instance(num_gpus, optimizer)

                    # Make dataset
                    nps.random.seed(0)
                    X = app.ones((N, d), block_shape=(N // num_gpus, d), dtype=dtype)
                    y = app.ones((N,), block_shape=(N // num_gpus,), dtype=dtype)
                    theta = app.zeros((X.shape[1],), (X.block_shape[1],), dtype=X.dtype)

                    # Benchmark one step LR
                    def func():
                        tic = time.time()
                        if optimizer:
                            toc_init, toc_opt = one_step_fit_opt(app, X, y, theta, num_gpus=num_gpus)
                        else:
                            toc_init = tic
                            toc_opt = one_step_fit(app, X, y, theta)
                        toc = time.time()
                        return toc - tic, toc_opt - tic, toc_init - tic, None

                    costs, costs_opt, costs_init = benchmark_func(func)
                    
                    del (X, y, app)
            #except Exception:
            else:
                costs = [-1]
                costs_opt = [-1]
                costs_init = [-1]

            log_str = format_string % (
                name,
                "%d" % N,
                "%.4f" % np.mean(costs),
                "%.4f" % np.mean(costs_opt),
                "%.4f" % np.mean(costs_init),
                "%.2f" % (np.std(costs) / np.mean(costs)),
            )
            print(log_str)
            with open("result_lr.csv", "a") as f:
                f.write(log_str + "\n")
def benchmark_bop(num_gpus, N_list, system_class_list, d=400000, optimizer=True, dtype=np.float32):
    format_string = "%20s,%10s,%10s,%10s,%10s,%10s"
    print(format_string % ("Library", "N", "Cost", "CostOpt", "CostInit", "CV"))
    # global app

    for N in N_list:
        N = int(N)
        d1 = N
        d2 = d
        for system_class in system_class_list:
            # try:
            if True:
                if system_class in ["Cupy", "Numpy"]:
                    name = system_class
                    import cupy as cp

                    arr_lib = cp if system_class == "Cupy" else np
                    arr_lib.inv = arr_lib.linalg.inv
                    app = arr_lib

                    # X = arr_lib.ones((N, d), dtype=dtype)

                    W = arr_lib.ones(shape=(d1, d2), dtype=dtype)
                    D = arr_lib.ones(shape=(d2, N), dtype=dtype)
                    # Prevent the Singular matrix Error in np.linalg.inv
                    # arange = arr_lib.arange(N)
                    # X[arange, arange % d] = 1
                    cp.cuda.Device(0).synchronize()

                    # Benchmark bop
                    def func():
                        tic = time.time()
                        Z = W @ D
                        # Z = X.T @ X
                        cp.cuda.Device(0).synchronize()
                        toc = time.time()
                        return toc - tic, 0, 0, None

                    costs, costs_opt, costs_init = benchmark_func(func)
                    # del (X, app)
                    del (W, D, app)
                else:
                    # Init system
                    name = system_class.__name__
                    app = am.instance(num_gpus, optimizer)

                    W = app.ones(shape=(d1, d2), block_shape=(d1, d2 // num_gpus), dtype=dtype)
                    D = app.ones(shape=(d2, N), block_shape=(d2 // num_gpus, N), dtype=dtype)
                    # X = app.ones((N, d), block_shape=(N // num_gpus, d), dtype=dtype)

                    # Benchmark bop
                    def func():
                        tic = time.time()
                        if optimizer:
                            toc_init, toc_opt = matmul_opt(app, W, D, num_gpus)
                            # toc_init, toc_opt = matmul_opt(app, X, num_gpus)
                        else:
                            Z = (W @ D).touch()
                            # Z = (X.T @ X).touch()
                        toc = time.time()
                        return toc - tic, 0, 0, None

                    costs, costs_opt, costs_init = benchmark_func(func)

                    del (W, D)
                    am.destroy()
            #except Exception:
            else:
                costs = [-1]
                costs_opt = [-1]
                costs_init = [-1]

            log_str = format_string % (
                name,
                "%d" % N,
                "%.4f" % np.mean(costs),
                "%.4f" % np.mean(costs_opt),
                "%.4f" % np.mean(costs_init),
                "%.2f" % (np.std(costs) / np.mean(costs)),
            )
            print(log_str)
            with open("result_bop.csv", "a") as f:
                f.write(log_str + "\n")
Exemple #21
0
def benchmark_mlp(num_gpus,
                  N_list,
                  system_class_list,
                  d=1000,
                  optimizer=True,
                  dtype=np.float32):
    # format_string = "%20s,%10s,%10s,%10s,%10s,%10s"
    # print(format_string % ("Library", "N", "Cost", "CostOpt", "CostInit", "CV"))
    global app

    for N in N_list:
        N = int(N)
        N_block = N // num_gpus
        d_block = d // 1

        app = am.instance()  # cupy-parallel
        app.system.num_gpus = num_gpus
        app.system.cluster_shape = (num_gpus, 1)

        app.system.optimizer = optimizer
        nps.random.seed(0)
        # print("a", flush=True)
        X, y = sample(app, sample_size=N, feature=1000, num_gpus=num_gpus)
        # print(f"X.shape {X.shape} X.block_shape {X.block_shape}")
        # print(f"y.shape {y.shape} y.block_shape {y.block_shape}")
        W_in_1, W_1_2, W_2_out = data_init_weights(app, num_gpus, X, y)

        X_cp = X.copy()
        y_cp = y.copy()
        W_in_1_cp = W_in_1.copy()
        W_1_2_cp = W_1_2.copy()
        W_2_out_cp = W_2_out.copy()

        # No Partition
        import cupy as cp
        with cp.cuda.Device(0):
            X_tmp = cp.asarray(X_cp.get())
            y_tmp = cp.asarray(y_cp.get())
            W_in_1_tmp = cp.asarray(W_in_1_cp.get())
            W_1_2_tmp = cp.asarray(W_1_2_cp.get())
            W_2_out_tmp = cp.asarray(W_2_out_cp.get())

        cp.cuda.Device(0).synchronize()

        np.testing.assert_allclose(W_1_2.get(), W_1_2_tmp.get())
        print(" W_in_1 all close ")
        for system_class in system_class_list:
            # try:
            if True:
                if system_class in ["Cupy", "Numpy"]:
                    name = system_class

                    arr_lib = cp if system_class == "Cupy" else np
                    arr_lib.inv = arr_lib.linalg.inv
                    # app = arr_lib

                    # X = arr_lib.zeros((N, d), dtype=dtype)
                    # y = arr_lib.ones((N,), dtype=dtype)

                    # Prevent the Singular matrix Error in np.linalg.inv
                    # arange = arr_lib.arange(N)
                    # X[arange, arange % d] = 1
                    # cp.cuda.Device(0).synchronize()

                    # W_in_1 = app.random.normal(shape=(X.shape[1], dim_1), dtype=X.dtype)
                    # W_1_2 = app.random.normal(shape=(dim_1, dim_2), dtype=X.dtype)
                    # W_2_out = app.random.normal(shape=(dim_2, y.shape[1]), dtype=X.dtype)

                    # Initialize bias

                    # B_1 = app.zeros((X.shape[0], dim_1), dtype=X.dtype)
                    # B_2 = app.zeros((X.shape[0], dim_2), dtype=X.dtype)
                    # B_out = app.zeros((X.shape[0], y.shape[1]), dtype=X.dtype)
                    # print("done initialize bias")
                    print("----Cupy----")
                    y_tmp = \
                        np_feedforward(arr_lib, X_tmp, W_in_1_tmp, W_1_2_tmp, W_2_out_tmp)
                    cp.cuda.Device(0).synchronize()

                    # Benchmark one step LR
                    def func():
                        tic = time.time()
                        # one_step_fit(arr_lib, X_tmp, y_tmp, W_in_1_tmp, W_1_2_tmp, W_2_out_tmp, B_1_tmp, B_2_tmp, B_out_tmp)
                        # one_step_fit(arr_lib, X, y, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out)
                        # cp.cuda.Device(0).synchronize()
                        toc = time.time()
                        return toc - tic, None

                    # func()
                    # exit()

                    # costs = benchmark_func(func)
                    del (X_tmp, W_in_1_tmp, W_1_2_tmp, W_2_out_tmp)
                else:
                    # Init system
                    # name = system_class.__name__
                    # system = system_class(num_gpus)
                    # app = am.instance()
                    # app.system.num_gpus = num_gpus
                    # app.system.cluster_shape = (num_gpus, 1)
                    #
                    # app.system.optimizer = optimizer
                    # system.init()
                    # app = ArrayApplication(system=system, filesystem=FileSystem(system))

                    # Make dataset
                    # print("hi there")

                    # X = sample(app, sample_size=N, feature=1000, num_gpus=num_gpus)
                    # print("b", flush=True)

                    # X = app.ones((N, d), block_shape=(N_block, d_block), dtype=dtype)
                    # y = app.ones((N,), block_shape=(N_block,), dtype=dtype)
                    print("----CupyParallel----")
                    if optimizer:
                        # W_in_1, W_1_2, W_2_out, B_1, B_2, B_out = \
                        #     one_step_fit_opt(app, X, y, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out, num_gpus)
                        y = feedforward_opt(app, X, W_in_1, W_1_2, W_2_out,
                                            num_gpus)
                    else:
                        # toc_opt = feedforward(app, X, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out)
                        # toc_init = tic
                        W_in_1, W_1_2, W_2_out, B_1, B_2, B_out = \
                            one_step_fit(app, app.one, X, y, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out)
                    # Benchmark one step MLP

                    # def func():
                    # tic = time.time()
                    # if optimizer:
                    #     toc_init, toc_opt = one_step_fit_opt(app, X, y, W_in_1, W_1_2, W_2_out, B_1, B_2,
                    #                                          B_out, num_gpus)
                    #     # toc_init, toc_opt = feedforward_opt(app, X, W_in_1, W_1_2, W_2_out, B_1, B_2,
                    #     # B_out, num_gpus)
                    # else:
                    #     # toc_opt = feedforward(app, X, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out)
                    #     toc_init = tic
                    #     toc_opt = one_step_fit(app, X, y, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out)

                    # toc = time.time()
                    # return toc - tic, toc_opt - tic, toc_init - tic, None

                    # costs, costs_opt, costs_init = benchmark_func(func)
                    costs = [1]
                    costs_opt = [0]
                    costs_init = [0]
                    del (X, W_in_1, W_1_2, W_2_out)
            # except Exception:
            else:
                costs = [-1]
                costs_opt = [-1]
                costs_init = [-1]

            # log_str = format_string % (
            #     # system_class.__name__,
            #     # name,
            #     "%d" % N,
            #     "%.4f" % np.mean(costs),
            #     "%.4f" % np.mean(costs_opt),
            #     "%.4f" % np.mean(costs_init),
            #     "%.2f" % (np.std(costs) / np.mean(costs)),
            # )
            # print(log_str)
            # with open("result_lr.csv", "a") as f:
            #     f.write(log_str + "\n")
        print("assert all close ")
        # np.testing.assert_allclose(X.get(), X_tmp.get())
        # print(" X all close ")
        np.testing.assert_allclose(y.get(), y_tmp.get())
        print(" y all close ")

        del (y_tmp, y, app, X_cp, y_cp, W_in_1_cp, W_1_2_cp, W_2_out_cp)
def test_shape(nps_app_inst):
    from nums import numpy as nps
    from nums.core import application_manager

    assert nps_app_inst is not None

    shape = (10, 20, 30, 40)
    block_shape = (10, 10, 10, 10)
    ns_ins = application_manager.instance()

    def check_expand_and_squeeze(_np_a, axes):
        np_expand_dims = np.__getattribute__("expand_dims")
        ns_expand_dims = nps.__getattribute__("expand_dims")
        np_squeeze = np.__getattribute__("squeeze")
        ns_squeeze = nps.__getattribute__("squeeze")

        _ns_a = nps.array(_np_a)
        _ns_ins_a = ns_ins.array(_np_a, block_shape=block_shape)

        _np_result = np_expand_dims(_np_a, axes)
        _ns_result = ns_expand_dims(_ns_a, axes)
        _ns_ins_result = ns_expand_dims(_ns_ins_a, axes)
        assert np.allclose(_np_result, _ns_result.get())
        assert np.allclose(_np_result, _ns_ins_result.get())
        check_dim(_np_result, _ns_result)
        check_dim(_np_result, _ns_ins_result)

        _np_result = np_squeeze(_np_a)
        _ns_result = ns_squeeze(_ns_a)
        _ns_ins_result = ns_squeeze(_ns_ins_a)
        assert np.allclose(_np_result, _ns_result.get())
        assert np.allclose(_np_result, _ns_ins_result.get())
        check_dim(_np_result, _ns_result)
        check_dim(_np_result, _ns_ins_result)

    def check_dim(_np_a, _ns_a):
        np_ndim = np.__getattribute__("ndim")
        assert np_ndim(_np_a) == np_ndim(_ns_a)

    def check_swapaxes(_np_a, axis1, axis2):
        ns_ins = application_manager.instance()
        np_swapaxes = np.__getattribute__("swapaxes")
        ns_swapaxes = nps.__getattribute__("swapaxes")

        _ns_a = nps.array(_np_a)
        _ns_ins_a = ns_ins.array(_np_a, block_shape=block_shape)

        _np_result = np_swapaxes(_np_a, axis1, axis2)
        _ns_result = ns_swapaxes(_ns_a, axis1, axis2)
        _ns_ins_result = ns_swapaxes(_ns_ins_a, axis1, axis2)
        assert np.allclose(_np_result, _ns_result.get())
        assert np.allclose(_np_result, _ns_ins_result.get())

    np_A = np.ones(shape)
    check_expand_and_squeeze(np_A, axes=0)
    check_expand_and_squeeze(np_A, axes=2)
    check_expand_and_squeeze(np_A, axes=4)
    check_expand_and_squeeze(np_A, axes=(2, 3))
    check_expand_and_squeeze(np_A, axes=(0, 5))
    check_expand_and_squeeze(np_A, axes=(0, 5, 6))
    check_expand_and_squeeze(np_A, axes=(2, 3, 5, 6, 7))

    for a1 in range(4):
        for a2 in range(4):
            check_swapaxes(np_A, axis1=a1, axis2=a2)
Exemple #23
0
def init():
    # pylint: disable = import-outside-toplevel
    # Explicitly initialize application instance.
    from nums.core.application_manager import instance

    return instance()
def benchmark_mlp(num_gpus,
                  N_list,
                  system_class_list,
                  d=140000,
                  optimizer=True,
                  dtype=np.float32):
    format_string = "%20s,%10s,%10s,%10s,%10s,%10s,%10s,%10s"
    print(format_string %
          ("Library", "N", "d_in", "d_2", "Cost", "CostOpt", "CostInit", "CV"))
    global app

    for N in N_list:
        for system_class in system_class_list:
            # try:
            if True:
                if system_class in ["Cupy", "Numpy"]:
                    name = system_class
                    import cupy as cp

                    arr_lib = cp if system_class == "Cupy" else np
                    arr_lib.inv = arr_lib.linalg.inv
                    app = arr_lib

                    X, y = np_sample(np, sample_size=N, feature=d, dtype=dtype)
                    W_in_1, W_1_2, W_2_out = np_init_weights(np,
                                                             X,
                                                             y,
                                                             d2,
                                                             dtype=dtype)

                    X = cp.asarray(X)
                    y = cp.asarray(y)
                    W_in_1 = cp.asarray(W_in_1)
                    W_1_2 = cp.asarray(W_1_2)
                    W_2_out = cp.asarray(W_2_out)

                    cp.cuda.Device(0).synchronize()

                    # Benchmark one step mlp
                    def func():
                        tic = time.time()
                        toc_end = one_step_fit_np(arr_lib, X, y, W_in_1, W_1_2,
                                                  W_2_out)
                        cp.cuda.Device(0).synchronize()
                        toc = time.time()
                        return toc - tic, toc_end - tic, 0, None

                    costs, costs_opt, costs_init = benchmark_func(func)
                    del (X, y, W_in_1, W_1_2, W_2_out)
                else:
                    # Init system
                    name = system_class.__name__
                    app = am.instance(num_gpus, optimizer)

                    # Make dataset
                    nps.random.seed(0)
                    X, y = sample(app,
                                  sample_size=N,
                                  feature=d,
                                  num_gpus=num_gpus,
                                  dtype=dtype)
                    W_in_1, W_1_2, W_2_out = model_init_weights(app,
                                                                num_gpus,
                                                                X,
                                                                y,
                                                                d2,
                                                                verbose=False)

                    # Benchmark one step MLP
                    def func():
                        tic = time.time()
                        if optimizer:
                            toc_init, toc_opt = one_step_fit_opt(
                                app, X, y, W_in_1, W_1_2, W_2_out, num_gpus)
                        else:
                            toc_init = tic
                            toc_opt = one_step_fit(app, X, y, W_in_1, W_1_2,
                                                   W_2_out)

                        toc = time.time()
                        return toc - tic, toc_opt - tic, toc_init - tic, None

                    costs, costs_opt, costs_init = benchmark_func(func)

                    del (X, y, app, W_in_1, W_1_2, W_2_out)
            # except Exception:
            else:
                costs = [-1]
                costs_opt = [-1]
                costs_init = [-1]

            log_str = format_string % (
                name,
                "%d" % N,
                "%d" % d,
                "%d" % d2,
                "%.4f" % np.mean(costs),
                "%.4f" % np.mean(costs_opt),
                "%.4f" % np.mean(costs_init),
                "%.2f" % (np.std(costs) / np.mean(costs)),
            )
            print(log_str)
            with open("result_mlp_model.csv", "a") as f:
                f.write(log_str + "\n")
Exemple #25
0
 def fit(self, X: BlockArray, y: BlockArray):
     _check_array(X, True)
     _check_array(y, True)
     instance().cm.call_actor_method(self.actor, "fit",
                                     X.flattened_oids()[0],
                                     y.flattened_oids()[0])
    assert nps_app_inst is not None

    ba: BlockArray = nps.array([[-1, 4, np.nan, 5], [3, 2, nps.nan, 6]])
    block_shapes = [(1, 1), (1, 2), (1, 4), (2, 1), (2, 4)]
    for block_shape in block_shapes:
        ba = ba.reshape(block_shape=block_shape)
        np_arr = ba.get()
        op_params = ["nanmax", "nanmin", "nansum", "nanmean", "nanvar", "nanstd"]
        axis_params = [None, 0, 1]
        keepdims_params = [True, False]

        for op, axis, keepdims in itertools.product(
            op_params, axis_params, keepdims_params
        ):
            ns_op = nps.__getattribute__(op)
            np_op = np.__getattribute__(op)
            np_result = np_op(np_arr, axis=axis, keepdims=keepdims)
            ba_result: BlockArray = ns_op(ba, axis=axis, keepdims=keepdims)
            assert ba_result.grid.grid_shape == ba_result.blocks.shape
            assert ba_result.shape == np_result.shape
            assert np.allclose(ba_result.get(), np_result, equal_nan=True)


if __name__ == "__main__":
    from nums.core import application_manager
    import nums.core.settings

    nums.core.settings.system_name = "serial"
    nps_app_inst = application_manager.instance()
    test_nan_reductions(nps_app_inst)
Exemple #27
0
from nums.core.application_manager import instance
from nums.core.array.blockarray import BlockArray

from nums.core.array.application import ArrayApplication
from nums.core.systems.systems import System

import numpy as np

app: ArrayApplication = instance()
system: System = app.system

X: BlockArray = app.random.random(shape=(2, 2), block_shape=(1, 1))
Y: BlockArray = app.random.random(shape=(2, 2), block_shape=(1, 1))

print(X.get())
print(Y.get())

print((X + Y).get())

first_entry = list(X.grid.get_entry_iterator())[0]
x_block = X.blocks[first_entry]
y_block = Y.blocks[first_entry]

print(x_block.get())
print(y_block.get())

result = app.zeros(shape=(2, 2), block_shape=(1, 1))

# system.call_with_options()

result.blocks[first_entry].oid = system.bop("add",