def t1():
                out = clone_here(a[j, j])  # Move data to the current device
                rhs = clone_here(a[j, k])

                out = update(rhs, rhs, out)

                copy(a[j, j], out)  # Move the result to the global array
예제 #2
0
            def t1():
                out = clone_here(a[j, j])  # Move data to the current device
                rhs = clone_here(a[j, k])

                out -= rhs @ rhs.T

                copy(a[j, j], out)  # Move the result to the global array
예제 #3
0
        def _check_set(x):
            x, i = x

            # TODO (bozhi) need C pointers!
            def _hard_set(i, value):
                if len(i) == 1: self._latest_view[i[0]] = value
                elif len(i) == 2: self._latest_view[i[0]][i[1]] = value
                else:
                    raise NotImplementedError(
                        "High-dimensional PartitionedTensor with None not supported!"
                    )

            if x is None:
                _hard_set(i, value)
                return
            is_to_array = is_array(x)
            is_from_array = is_array(value)
            if is_from_array and is_to_array:
                try:
                    copy(x, value)
                except ValueError:
                    warn(
                        "Incompatible arrays (e.g. different shapes). Overwritting."
                    )
                    _hard_set(i, value)
            else:
                # TODO (bozhi): should not allow None assignment but implement free(index)
                if not is_to_array and x is not None:
                    warn("Array partition was modified as %s object." %
                         type(x))
                if not is_from_array and value is not None:
                    warn("Modifying array partition with %s object!" %
                         type(value))
                x = value
예제 #4
0
 def t4():
     factor = clone_here(a[j, j])
     panel = clone_here(a[i, j])
     print(i, j, "Before", panel, flush=True)
     out = ltriang_solve(factor, panel)
     print(i, j, "Panel", panel, flush=True)
     print(i, j, "Out", out, flush=True)
     copy(a[i, j], out)
예제 #5
0
                def t3():
                    out = clone_here(a[i,
                                       j])  # Move data to the current device
                    rhs1 = clone_here(a[i, k])
                    rhs2 = clone_here(a[j, k])

                    out = update(rhs1, rhs2, out)
                    copy(a[i, j], out)  # Move the result to the global array
예제 #6
0
    async def run_jacobi():
        assert steps > 0
        # Specify which set of blocks is used as input or output
        # (they will be swapped for each iteration).
        in_blocks = a0_row_groups
        out_blocks = a1_row_groups
        # Create a set of labels for the tasks that perform the first
        # Jacobi iteration step.
        previous_block_tasks = CompletedTaskSpace()
        # Now create the tasks for subsequent iteration steps.
        for i in range(steps):
            # Swap input and output blocks for the next step.
            in_blocks, out_blocks = out_blocks, in_blocks
            # Create a new set of labels for the tasks that do this iteration step.
            current_block_tasks = TaskSpace("block_tasks[{}]".format(i))
            # Create the tasks to do the i'th iteration.
            # As before, each task needs the following info:
            #  a block index "j"
            #  a "device" where it should execute (supplied by mapper used for partitioning)
            #  the "in_block" of data used as input
            #  the "out_block" to write the output to
            for j in range(divisions):
                device = mapper.device(j)
                in_block = in_blocks[j]
                out_block = out_blocks[j]
                # Make each task operating on each block depend on the tasks for
                # that block and its immediate neighbors from the previous iteration.
                @spawn(current_block_tasks[j],
                       dependencies=[
                           previous_block_tasks[max(0, j -
                                                    1):min(divisions, j + 2)]
                       ],
                       placement=device)
                def device_local_jacobi_task():
                    # Read boundary values from adjacent blocks in the partition.
                    # This may communicate across device boundaries.
                    if j > 0:
                        copy(in_block[0], in_blocks[j - 1][-2])
                    if j < divisions - 1:
                        copy(in_block[-1], in_blocks[j + 1][1])
                    # Run the computation, dispatching to device specific code.
                    jacobi(in_block, out_block)

            # For the next iteration, use the newly created tasks as
            # the tasks from the previous step.
            previous_block_tasks = current_block_tasks
        await previous_block_tasks
        cupy.cuda.get_current_stream().synchronize()
        cupy.cuda.Stream.null.synchronize()
        end = time.perf_counter()
        print(end - start)

        # This depends on all the tasks from the last iteration step.
        for j in range(divisions):
            start_index = 1 if j > 0 else 0
            end_index = -1 if j < divisions - 1 else None  # None indicates the last element of the dimension
            copy(a1[mapper.slice(j, len(a1))],
                 out_blocks[j][start_index:end_index])
예제 #7
0
 def device_local_jacobi_task():
     # Read boundary values from adjacent blocks in the partition.
     # This may communicate across device boundaries.
     if j > 0:
         copy(in_block[0], in_blocks[j - 1][-2])
     if j < divisions - 1:
         copy(in_block[-1], in_blocks[j + 1][1])
     # Run the computation, dispatching to device specific code.
     jacobi(in_block, out_block)
예제 #8
0
                def t3():
                    out = clone_here(a[i,
                                       j])  # Move data to the current device
                    rhs1 = clone_here(a[i, k])
                    rhs2 = clone_here(a[j, k])

                    out -= rhs1 @ rhs2.T

                    copy(a[i, j], out)  # Move the result to the global array
예제 #9
0
 def t3():
     out = clone_here(a[i, j])
     rhs1 = clone_here(a[i, k])
     rhs2 = clone_here(a[j, k])
     out -= rhs1 @ rhs2.T
     copy(a[i, j], out)
예제 #10
0
 def t2():
     dblock = clone_here(a[j, j])
     dblock = cholesky(dblock)
     copy(a[j, j], dblock)
예제 #11
0
파일: inner.py 프로젝트: bozhiyou/Parla.py
 def inner_local():
     # Perform the local inner product using the numpy multiply operation, @.
     copy(partial_sums[i:i + 1], a_part[i] @ b_part[i])
예제 #12
0
 def b():
     copy(xp[i][j], xp[j][j])
예제 #13
0
 def c():
     copy(y[mapper.slice_x(i, y.shape[0])], yp[i][i])
예제 #14
0
 def inner_local():
     copy(partial_sums[i:i + 1], a_part[i] @ b_part[i])
예제 #15
0
 def t1():
     out = clone_here(a[j, j])
     rhs = clone_here(a[j, k])
     out -= rhs @ rhs.T
     copy(a[j, j], out)
예제 #16
0
def cholesky_inplace(a):
    if a.shape[0] != a.shape[1]:
        raise ValueError("A square array is required.")
    ca = clone_here(a)
    ca[:] = cupy.linalg.cholesky(ca)
    copy(a, ca)
 def t4():
     factor = clone_here(a[j, j])
     panel = clone_here(a[i, j])
     panel = ltriang_solve(factor, panel)
     copy(a[i, j], panel)