Python distributionの例、ga4py.ga.distribution Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test.py プロジェクト: fred1653/lotsofcoresbook1code

def create_global_array(gatype):
    if NEW_API:
        g_a = ga.create_handle()
        ga.set_data(g_a, [n,n], gatype)
        ga.set_array_name(g_a, 'a')
        if USE_RESTRICTED:
            num_restricted = nproc/2 or 1
            restricted_list = np.arange(num_restricted) + num_restricted/2
            ga.set_restricted(g_a, restricted_list)
        if BLOCK_CYCLIC:
            if USE_SCALAPACK_DISTR:
                if nproc % 2 == 0:
                    ga.error('Available procs must be divisible by 2',nproc)
                ga.set_block_cyclic_proc_grid(g_a, block_size, proc_grid)
            else:
                ga.set_block_cyclic(g_a, block_size)
        if MIRROR:
            p_mirror = ga.pgroup_get_mirror()
            ga.set_pgroup(g_a, p_mirror)
        ga.allocate(g_a)
    else:
        if MIRROR:
            p_mirror = ga.pgroup_get_mirror()
            ga.create_config(gatype, (n,n), 'a', None, p_mirror)
        else:
            g_a = ga.create(gatype, (n,n), 'a')
    if 0 == g_a:
        ga.error('ga.create failed')
    if MIRROR:
        lproc = me - ga.cluster_procid(inode, 0)
        lo,hi = ga.distribution(g_a, lproc)
    else:
        lo,hi = ga.distribution(g_a, me)
    ga.sync()
    return g_a

コード例 #2

0

ファイルを表示

def matrix_multiply():
    # Configure array dimensions. Force an unequal data distribution.
    dims = [TOTALELEMS]*NDIM
    chunk = [TOTALELEMS/nprocs-1]*NDIM

    # Create a global array g_a and duplicate it to get g_b and g_c.
    g_a = ga.create(ga.C_DBL, dims, "array A", chunk)
    if not g_a: ga.error("create failed: A")
    if not me: print "Created Array A"

    g_b = ga.duplicate(g_a, "array B")
    g_c = ga.duplicate(g_a, "array C")
    if not g_b or not g_c: ga.eror("duplicate failed")
    if not me: print "Created Arrays B and C"

    # Initialize data in matrices a and b.
    if not me: print "Initializing matrix A and B"
    a = np.random.rand(*dims)*29
    b = np.random.rand(*dims)*37

    # Copy data to global arrays g_a and g_b.
    if not me:
        ga.put(g_a, a)
        ga.put(g_b, b)

    # Synchronize all processors to make sure everyone has data.
    ga.sync()

    # Determine which block of data is locally owned. Note that
    # the same block is locally owned for all GAs.
    lo,hi = ga.distribution(g_c)

    # Get the blocks from g_a and g_b needed to compute this block in
    # g_c and copy them into the local buffers a and b.
    a = ga.get(g_a, (lo[0],0), (hi[0],dims[0]))
    b = ga.get(g_b, (0,lo[1]), (dims[1],hi[1]))

    # Do local matrix multiplication and store the result in local
    # buffer c. Start by evaluating the transpose of b.
    btrns = b.transpose()

    # Multiply a and b to get c.
    c = np.dot(a,b)

    # Copy c back to g_c.
    ga.put(g_c, c, lo, hi)

    verify(g_a, g_b, g_c)

    # Deallocate arrays.
    ga.destroy(g_a)
    ga.destroy(g_b)
    ga.destroy(g_c)

コード例 #3

0

ファイルを表示

ファイル: transp1D.answer.py プロジェクト: GlobalArrays/ga-tutorial

def TRANSPOSE1D():
    # Configure array dimensions. Force an unequal data distribution.
    dims = [nprocs * TOTALELEMS + nprocs / 2]
    chunk = [TOTALELEMS]  # minimum data on each process

    # create a global array g_a and duplicate it to get g_b
    g_a = ga.create(ga.C_INT, dims, "array A", chunk)
    if not g_a: ga.error("create failed: A")
    if not me: print "Created Array A"

    g_b = ga.duplicate(g_a, "array B")
    if not g_b: ga.error("duplicate failed")
    if not me: print "Created Array B"

    # initialize data in g_a
    if not me:
        print "Initializing matrix A"
        ga.put(g_a, np.arange(dims[0], dtype=np.int32))

    # Synchronize all processors to guarantee that everyone has data
    # before proceeding to the next step.
    ga.sync()

    # Start initial phase of inversion by inverting the data held locally on
    # each processor. Start by finding out which data each processor owns.
    lo, hi = ga.distribution(g_a)

    # Get locally held data and copy it into local buffer a
    a = ga.get(g_a, lo, hi)

    # Invert data locally
    b = a[::-1]

    # Invert data globally by copying locally inverted blocks into
    # their inverted positions in the GA
    ga.put(g_b, b, dims[0] - hi[0], dims[0] - lo[0])

    # Synchronize all processors to make sure inversion is complete
    ga.sync()

    # Check to see if inversion is correct
    if not me: verify(g_a, g_b)

    # Deallocate arrays
    ga.destroy(g_a)
    ga.destroy(g_b)

コード例 #4

0

ファイルを表示

ファイル: perf.py プロジェクト: fred1653/lotsofcoresbook1code

def test2D():
    n = 1024
    buf = np.zeros((n,n), dtype=np.float64)
    chunk = np.asarray([1,3,4,9,16,24,30,48,64,91,128,171,256,353,440,512])
    g_a = ga.create(ga.C_DBL, (n,n), 'a')
    if 0 == g_a:
        ga.error('ga.create failed')
    buf[:] = 0.01
    ga.zero(g_a)
    if 0 == me:
        print (' Performance of GA get, put & acc'
                ' for square sections of array[%d,%d]' % (n,n))
    lo,hi = ga.distribution(g_a, me)
    # local ops
    TestPutGetAcc(g_a, n, chunk, buf, lo, hi, True)
    # remote ops
    TestPutGetAcc(g_a, n, chunk, buf, lo, hi, False)

コード例 #5

0

ファイルを表示

ファイル: transp1D.answer.py プロジェクト: fred1653/lotsofcoresbook1code

def TRANSPOSE1D():
    # Configure array dimensions. Force an unequal data distribution.
    dims = [nprocs*TOTALELEMS + nprocs/2]
    chunk = [TOTALELEMS] # minimum data on each process

    # create a global array g_a and duplicate it to get g_b
    g_a = ga.create(ga.C_INT, dims, "array A", chunk)
    if not g_a: ga.error("create failed: A")
    if not me: print "Created Array A"

    g_b = ga.duplicate(g_a, "array B")
    if not g_b: ga.error("duplicate failed")
    if not me: print "Created Array B"

    # initialize data in g_a
    if not me:
        print "Initializing matrix A"
        ga.put(g_a, np.arange(dims[0], dtype=np.int32))

    # Synchronize all processors to guarantee that everyone has data
    # before proceeding to the next step.
    ga.sync()

    # Start initial phase of inversion by inverting the data held locally on
    # each processor. Start by finding out which data each processor owns.
    lo,hi = ga.distribution(g_a)

    # Get locally held data and copy it into local buffer a
    a = ga.get(g_a, lo, hi)

    # Invert data locally
    b = a[::-1]

    # Invert data globally by copying locally inverted blocks into
    # their inverted positions in the GA
    ga.put(g_b, b, dims[0]-hi[0], dims[0]-lo[0])

    # Synchronize all processors to make sure inversion is complete
    ga.sync()

    # Check to see if inversion is correct
    if not me: verify(g_a, g_b)

    # Deallocate arrays
    ga.destroy(g_a)
    ga.destroy(g_b)

コード例 #6

0

ファイルを表示

def test2D():
    n = 1024
    buf = np.zeros((n, n), dtype=np.float64)
    chunk = np.asarray(
        [1, 3, 4, 9, 16, 24, 30, 48, 64, 91, 128, 171, 256, 353, 440, 512])
    g_a = ga.create(ga.C_DBL, (n, n), 'a')
    if 0 == g_a:
        ga.error('ga.create failed')
    buf[:] = 0.01
    ga.zero(g_a)
    if 0 == me:
        print(
            ' Performance of GA get, put & acc'
            ' for square sections of array[%d,%d]' % (n, n))
    lo, hi = ga.distribution(g_a, me)
    # local ops
    TestPutGetAcc(g_a, n, chunk, buf, lo, hi, True)
    # remote ops
    TestPutGetAcc(g_a, n, chunk, buf, lo, hi, False)

コード例 #7

0

ファイルを表示

ファイル: perf.py プロジェクト: fred1653/lotsofcoresbook1code

def test1D():
    n = 1024*1024
    buf = np.zeros(n/4, dtype=np.float64)
    chunk = np.asarray([1,9,16,81,256,576,900,2304,4096,8281,
        16384,29241,65536,124609,193600,262144])
    g_a = ga.create(ga.C_DBL, (n,), 'a')
    if 0 == g_a:
        ga.error('ga.create failed')
    buf[:] = 0.01
    ga.zero(g_a)
    if 0 == me:
        print ''
        print ''
        print ''
        print (' Performance of GA get, put & acc'
                ' for 1-dimensional sections of array[%d]' % n)
    lo,hi = ga.distribution(g_a, me)
    # local ops
    TestPutGetAcc1(g_a, n, chunk, buf, lo, hi, True)
    # remote ops
    TestPutGetAcc1(g_a, n, chunk, buf, lo, hi, False)

コード例 #8

0

ファイルを表示

def test1D():
    n = 1024 * 1024
    buf = np.zeros(n / 4, dtype=np.float64)
    chunk = np.asarray([
        1, 9, 16, 81, 256, 576, 900, 2304, 4096, 8281, 16384, 29241, 65536,
        124609, 193600, 262144
    ])
    g_a = ga.create(ga.C_DBL, (n, ), 'a')
    if 0 == g_a:
        ga.error('ga.create failed')
    buf[:] = 0.01
    ga.zero(g_a)
    if 0 == me:
        print ''
        print ''
        print ''
        print(
            ' Performance of GA get, put & acc'
            ' for 1-dimensional sections of array[%d]' % n)
    lo, hi = ga.distribution(g_a, me)
    # local ops
    TestPutGetAcc1(g_a, n, chunk, buf, lo, hi, True)
    # remote ops
    TestPutGetAcc1(g_a, n, chunk, buf, lo, hi, False)

コード例 #9

0

ファイルを表示

ファイル: locality.py プロジェクト: fred1653/lotsofcoresbook1code

def print_distribution(g_a):
    for i in range(ga.nnodes()):
        lo,hi = ga.distribution(g_a, i)
        print "%s lo=%s hi=%s" % (i,lo,hi)

コード例 #10

0

ファイルを表示

ファイル: copy.py プロジェクト: GlobalArrays/ga4py

def print_distribution(g_a):
    for i in range(ga.nnodes()):
        lo, hi = ga.distribution(g_a, i)
        print "%s lo=%s hi=%s" % (i, lo, hi)

コード例 #11

0

ファイルを表示

ファイル: jacobi_2d_ga.py プロジェクト: rcloud/Performance-Python-Paper-Project

g_b = ga.duplicate(g_a)

# process 0 initializes global array
# Note: alternatively, each process could initialize its local data using
# ga.access() and ga.distribution()
a = np.zeros((dim,dim), dtype=np.float32)
if rank == 0:
    a[0,:] = 100 #top row
    a[:,0] = 75 #left column
    a[:,a.shape[0] - 1] = 50 #right column
    ga.put(g_a, a)
ga.sync()

# which piece of array do I own?
# note that rhi and chi follow python range conventions i.e. [lo,hi)
(rlo,clo),(rhi,chi) = ga.distribution(g_a)

iteration = 0
start = ga.wtime()
while True:
    iteration += 1
    if iteration % HOW_MANY_STEPS_BEFORE_CONVERGENCE_TEST == 0:
        # check for convergence will occur, so make a copy of the GA
        ga.sync()
        ga.copy(g_a, g_b)
    # the iteration
    if rlo == 0 and rhi == dim:
        # I own the top and bottom rows
        ga.sync()
        my_array = ga.access(g_a)
        my_array[1:-1,1:-1] = (

コード例 #12

0

ファイルを表示

ファイル: put_get.py プロジェクト: GlobalArrays/ga-tutorial

if me == 0:
    print "Initialized GA library on %d processes" % nprocs

# Create a GA
dims = (NSIZE,NSIZE)
chunk = (-1,-1)
ld = NSIZE

g_a = ga.create(ga.C_INT, dims, "test_a", chunk)
if me == 0 and g_a:
    print "\nSuccessfully created Global Array"

# Initialize data in GA. Find data owned by neighboring processor

nghbr = (me+1)%nprocs
lo,hi = ga.distribution(g_a, nghbr)

# Create data in local buffer, assign unique value for each data element
patch_shape = hi-lo
a_buf = np.fromfunction(lambda i,j: j*NSIZE + i,
        patch_shape, dtype=ga.dtype(ga.C_INT))
a_buf += lo[1,np.newaxis]
a_buf += lo[np.newaxis,0]*dims[0]

# Copy local data to GA
ga.put(g_a, a_buf, lo, hi)
ga.sync()
if me == 0:
    print "\nCopied values into Global Array from local buffer\n"

# Check data in GA to see if it is correct. Find data owned by this