コード例 #1
0
def parallel_task():
    me = ga.pgroup_nodeid()
    nproc = ga.pgroup_nnodes()
    if not me:
        print "This is process 0 on group %s" % ga.pgroup_get_default()
    g_a = ga.create(ga.C_DBL, (3,4,5))
    ga.randomize(g_a)
    if me == 0:
        print np.sum(ga.access(g_a))
コード例 #2
0
ファイル: matrix.answer.py プロジェクト: dmlb2000/nwchem-cml
def matrix_multiply():
    # Configure array dimensions. Force an unequal data distribution.
    dims = [TOTALELEMS] * NDIM
    chunk = [TOTALELEMS / nprocs - 1] * NDIM

    # Create a global array g_a and duplicate it to get g_b and g_c.
    g_a = ga.create(ga.C_DBL, dims, "array A", chunk)
    if not g_a: ga.error("create failed: A")
    if not me: print "Created Array A"

    g_b = ga.duplicate(g_a, "array B")
    g_c = ga.duplicate(g_a, "array C")
    if not g_b or not g_c: ga.eror("duplicate failed")
    if not me: print "Created Arrays B and C"

    # Initialize data in matrices a and b.
    if not me: print "Initializing matrix A and B"
    a = np.random.rand(*dims) * 29
    b = np.random.rand(*dims) * 37

    # Copy data to global arrays g_a and g_b.
    if not me:
        ga.put(g_a, a)
        ga.put(g_b, b)

    # Synchronize all processors to make sure everyone has data.
    ga.sync()

    # Determine which block of data is locally owned. Note that
    # the same block is locally owned for all GAs.
    lo, hi = ga.distribution(g_c)

    # Get the blocks from g_a and g_b needed to compute this block in
    # g_c and copy them into the local buffers a and b.
    a = ga.get(g_a, (lo[0], 0), (hi[0], dims[0]))
    b = ga.get(g_b, (0, lo[1]), (dims[1], hi[1]))

    # Do local matrix multiplication and store the result in local
    # buffer c. Start by evaluating the transpose of b.
    btrns = b.transpose()

    # Multiply a and b to get c.
    c = np.dot(a, b)

    # Copy c back to g_c.
    ga.put(g_c, c, lo, hi)

    verify(g_a, g_b, g_c)

    # Deallocate arrays.
    ga.destroy(g_a)
    ga.destroy(g_b)
    ga.destroy(g_c)
コード例 #3
0
ファイル: matrix.answer.py プロジェクト: dmlb2000/nwchem-cml
def matrix_multiply():
    # Configure array dimensions. Force an unequal data distribution.
    dims = [TOTALELEMS]*NDIM
    chunk = [TOTALELEMS/nprocs-1]*NDIM

    # Create a global array g_a and duplicate it to get g_b and g_c.
    g_a = ga.create(ga.C_DBL, dims, "array A", chunk)
    if not g_a: ga.error("create failed: A")
    if not me: print "Created Array A"

    g_b = ga.duplicate(g_a, "array B")
    g_c = ga.duplicate(g_a, "array C")
    if not g_b or not g_c: ga.eror("duplicate failed")
    if not me: print "Created Arrays B and C"

    # Initialize data in matrices a and b.
    if not me: print "Initializing matrix A and B"
    a = np.random.rand(*dims)*29
    b = np.random.rand(*dims)*37

    # Copy data to global arrays g_a and g_b.
    if not me:
        ga.put(g_a, a)
        ga.put(g_b, b)

    # Synchronize all processors to make sure everyone has data.
    ga.sync()

    # Determine which block of data is locally owned. Note that
    # the same block is locally owned for all GAs.
    lo,hi = ga.distribution(g_c)

    # Get the blocks from g_a and g_b needed to compute this block in
    # g_c and copy them into the local buffers a and b.
    a = ga.get(g_a, (lo[0],0), (hi[0],dims[0]))
    b = ga.get(g_b, (0,lo[1]), (dims[1],hi[1]))

    # Do local matrix multiplication and store the result in local
    # buffer c. Start by evaluating the transpose of b.
    btrns = b.transpose()

    # Multiply a and b to get c.
    c = np.dot(a,b)

    # Copy c back to g_c.
    ga.put(g_c, c, lo, hi)

    verify(g_a, g_b, g_c)

    # Deallocate arrays.
    ga.destroy(g_a)
    ga.destroy(g_b)
    ga.destroy(g_c)
コード例 #4
0
def TRANSPOSE1D():
    # Configure array dimensions. Force an unequal data distribution.
    dims = [nprocs * TOTALELEMS + nprocs / 2]
    chunk = [TOTALELEMS]  # minimum data on each process

    # create a global array g_a and duplicate it to get g_b
    g_a = ga.create(ga.C_INT, dims, "array A", chunk)
    if not g_a: ga.error("create failed: A")
    if not me: print "Created Array A"

    g_b = ga.duplicate(g_a, "array B")
    if not g_b: ga.error("duplicate failed")
    if not me: print "Created Array B"

    # initialize data in g_a
    if not me:
        print "Initializing matrix A"
        ga.put(g_a, np.arange(dims[0], dtype=np.int32))

    # Synchronize all processors to guarantee that everyone has data
    # before proceeding to the next step.
    ga.sync()

    # Start initial phase of inversion by inverting the data held locally on
    # each processor. Start by finding out which data each processor owns.
    lo, hi = ga.distribution(g_a)

    # Get locally held data and copy it into local buffer a
    a = ga.get(g_a, lo, hi)

    # Invert data locally
    b = a[::-1]

    # Invert data globally by copying locally inverted blocks into
    # their inverted positions in the GA
    ga.put(g_b, b, dims[0] - hi[0], dims[0] - lo[0])

    # Synchronize all processors to make sure inversion is complete
    ga.sync()

    # Check to see if inversion is correct
    if not me: verify(g_a, g_b)

    # Deallocate arrays
    ga.destroy(g_a)
    ga.destroy(g_b)
コード例 #5
0
def TRANSPOSE1D():
    # Configure array dimensions. Force an unequal data distribution.
    dims = [nprocs*TOTALELEMS + nprocs/2]
    chunk = [TOTALELEMS] # minimum data on each process

    # create a global array g_a and duplicate it to get g_b
    g_a = ga.create(ga.C_INT, dims, "array A", chunk)
    if not g_a: ga.error("create failed: A")
    if not me: print "Created Array A"

    g_b = ga.duplicate(g_a, "array B")
    if not g_b: ga.error("duplicate failed")
    if not me: print "Created Array B"

    # initialize data in g_a
    if not me:
        print "Initializing matrix A"
        ga.put(g_a, np.arange(dims[0], dtype=np.int32))

    # Synchronize all processors to guarantee that everyone has data
    # before proceeding to the next step.
    ga.sync()

    # Start initial phase of inversion by inverting the data held locally on
    # each processor. Start by finding out which data each processor owns.
    lo,hi = ga.distribution(g_a)

    # Get locally held data and copy it into local buffer a
    a = ga.get(g_a, lo, hi)

    # Invert data locally
    b = a[::-1]

    # Invert data globally by copying locally inverted blocks into
    # their inverted positions in the GA
    ga.put(g_b, b, dims[0]-hi[0], dims[0]-lo[0])

    # Synchronize all processors to make sure inversion is complete
    ga.sync()

    # Check to see if inversion is correct
    if not me: verify(g_a, g_b)

    # Deallocate arrays
    ga.destroy(g_a)
    ga.destroy(g_b)
コード例 #6
0
ファイル: access.answer.py プロジェクト: dmlb2000/nwchem-cml
"""Use ga.access() to sum locally per SMP node."""

import mpi4py.MPI
import ga
import numpy as np

world_id = ga.nodeid()
world_nproc = ga.nnodes()
node_id = ga.cluster_nodeid()
node_nproc = ga.cluster_nprocs(node_id)
node_me = ga.cluster_procid(node_id,ga.nodeid())

g_a = ga.create(ga.C_DBL, (3,4,5,6))
if world_id == 0:
    ga.put(g_a, np.arange(3*4*5*6))
ga.sync()

if node_me == 0:
    sum = 0
    for i in range(node_nproc):
        smp_neighbor_world_id = ga.cluster_procid(node_id,i)
        buffer = ga.access(g_a, proc=smp_neighbor_world_id)
        sum += np.sum(buffer)
    print sum
コード例 #7
0
ファイル: copy.py プロジェクト: dmlb2000/nwchem-cml
import mpi4py.MPI # initialize Message Passing Interface
import ga # initialize Global Arrays

me = ga.nodeid()

def print_distribution(g_a):
    for i in range(ga.nnodes()):
        lo,hi = ga.distribution(g_a, i)
        print "%s lo=%s hi=%s" % (i,lo,hi)

# create some arrays
g_a = ga.create(ga.C_DBL, (10,20,30), chunk=(-1,20,-1))
g_b = ga.create(ga.C_DBL, (10,20,30), chunk=(10,-1,-1))

if not me:
    print_distribution(g_a)
    print_distribution(g_b)

ga.fill(g_a, 6)
ga.copy(g_a,g_b)

if not me:
    buffer = ga.access(g_b)
    print buffer.shape
    print buffer
コード例 #8
0
ファイル: readinc.py プロジェクト: dmlb2000/nwchem-cml
def verify_using_np(g_a, g_b, g_c):
    a = ga.get(g_a)
    b = ga.get(g_b)
    c = ga.get(g_c)
    v = np.dot(a,b)
    val = int(np.abs(np.sum(c-v))>0.0001)
    val = ga.gop_add(val)
    return val == 0

if __name__ == '__main__':
    if nproc > MULTIPLIER**3:
        if 0 == me:
            print "You must use less than %s processors" % (MULTIPLIER**3+1)
    else:
        g_a = ga.create(ga.C_DBL, [N,N])
        g_b = ga.create(ga.C_DBL, [N,N])
        g_c = ga.create(ga.C_DBL, [N,N])
        g_counter = ga.create(ga.C_INT, [1])
        ga.zero(g_counter)
        # put some fake data into input arrays A and B
        if me == 0:
            ga.put(g_a, np.random.random(N*N))
            ga.put(g_b, np.random.random(N*N))
        ga.sync()
        if me == 0:
            print "srumma...",
        srumma(g_a, g_b, g_c, CHUNK_SIZE, MULTIPLIER, g_counter)
        if me == 0:
            print "done"
        if me == 0:
コード例 #9
0
import mpi4py.MPI # initialize Message Passing Interface
import ga # initialize Global Arrays

import numpy as np

me = ga.nodeid()
nproc = ga.nnodes()

def print_distribution(g_a):
    for i in range(ga.nnodes()):
        lo,hi = ga.distribution(g_a, i)
        print "P=%s lo=%s hi=%s" % (i,lo,hi)

# create some irregular arrays
block = [3,2]
map = [0,2,6,0,5]
if nproc < np.prod(block):
    raise ValueError, "ERROR: fewer procs than requested blocks"
g_a = ga.create_irreg(ga.C_DBL, [8,10], block, map, "Array A")
if not g_a:
    ga.error("Could not create global array A",g_a)
g_b = ga.create(ga.C_INT, (2,3,4,5,6))

if not me:
    print_distribution(g_a)
    print_distribution(g_b)
コード例 #10
0
ファイル: srumma.answer.py プロジェクト: dmlb2000/nwchem-cml
def verify_using_np(g_a, g_b, g_c):
    a = ga.get(g_a)
    b = ga.get(g_b)
    c = ga.get(g_c)
    v = np.dot(a, b)
    val = int(np.abs(np.sum(c - v)) > 0.0001)
    val = ga.gop_add(val)
    return val == 0


if __name__ == '__main__':
    if nproc > MULTIPLIER**3:
        if 0 == me:
            print "You must use less than %s processors" % (MULTIPLIER**3 + 1)
    else:
        g_a = ga.create(ga.C_DBL, [N, N])
        g_b = ga.create(ga.C_DBL, [N, N])
        g_c = ga.create(ga.C_DBL, [N, N])
        # put some fake data into input arrays A and B
        if me == 0:
            ga.put(g_a, np.random.random(N * N))
            ga.put(g_b, np.random.random(N * N))
        ga.sync()
        if me == 0:
            print "srumma...",
        srumma(g_a, g_b, g_c, CHUNK_SIZE, MULTIPLIER)
        if me == 0:
            print "done"
        if me == 0:
            print "verifying using ga.gemm...",
        ok = verify_using_ga(g_a, g_b, g_c)
コード例 #11
0
ファイル: access.py プロジェクト: dmlb2000/nwchem-cml
"""Use ga.access() to sum locally per SMP node."""

import mpi4py.MPI
import ga
import numpy as np

# Okay, we create the global array
g_a = ga.create(ga.C_DBL, (3, 4, 5, 6))
if world_id == 0:
    ga.put(g_a, np.arange(3 * 4 * 5 * 6))
ga.sync()

# You're on your own!
コード例 #12
0
import mpi4py.MPI  # initialize Message Passing Interface
import ga  # initialize Global Arrays

me = ga.nodeid()


def print_distribution(g_a):
    for i in range(ga.nnodes()):
        lo, hi = ga.distribution(g_a, i)
        print "%s lo=%s hi=%s" % (i, lo, hi)


# create some arrays
g_a = ga.create(ga.C_DBL, (10, 20, 30), chunk=(-1, 20, -1))
g_b = ga.create(ga.C_DBL, (10, 20, 30), chunk=(10, -1, -1))

if not me:
    print_distribution(g_a)
    print_distribution(g_b)

ga.fill(g_a, 6)
ga.copy(g_a, g_b)

if not me:
    buffer = ga.access(g_b)
    print buffer.shape
    print buffer
コード例 #13
0
ファイル: groups.py プロジェクト: dmlb2000/nwchem-cml
def parallel_task():
    me = ga.pgroup_nodeid()
    nproc = ga.pgroup_nnodes()
    ### print a message from the master of the group
    g_a = ga.create(ga.C_DBL, (3,4,5))
    ga.randomize(g_a)
コード例 #14
0
def comp_pi(n, myrank=0, nprocs=1):
    h = 1.0 / n;
    s = 0.0;
    for i in xrange(myrank + 1, n + 1, nprocs):
        x = h * (i - 0.5);
        s += 4.0 / (1.0 + x**2);
    return s * h

def prn_pi(pi, PI):
    message = "pi is approximately %.16f, error is %.16f"
    print  (message % (pi, abs(pi - PI)))

nprocs = ga.nnodes()
myrank = ga.nodeid()

g_pi = ga.create(ga.C_DBL, [1])

one_time = False
if len(sys.argv) == 2:
    n = int(sys.argv[1])
    one_time = True

while True:
    if not one_time:
        if myrank == 0:
            n = get_n()
            n = ga.brdcst(n)
        else:
            n = ga.brdcst(0)
        if n == 0:
            break
コード例 #15
0
def parallel_task():
    me = ga.pgroup_nodeid()
    nproc = ga.pgroup_nnodes()
    ### print a message from the master of the group
    g_a = ga.create(ga.C_DBL, (3, 4, 5))
    ga.randomize(g_a)
コード例 #16
0
ファイル: srumma.answer.py プロジェクト: dmlb2000/nwchem-cml
def verify_using_np(g_a, g_b, g_c):
    a = ga.get(g_a)
    b = ga.get(g_b)
    c = ga.get(g_c)
    v = np.dot(a,b)
    val = int(np.abs(np.sum(c-v))>0.0001)
    val = ga.gop_add(val)
    return val == 0

if __name__ == '__main__':
    if nproc > MULTIPLIER**3:
        if 0 == me:
            print "You must use less than %s processors" % (MULTIPLIER**3+1)
    else:
        g_a = ga.create(ga.C_DBL, [N,N])
        g_b = ga.create(ga.C_DBL, [N,N])
        g_c = ga.create(ga.C_DBL, [N,N])
        # put some fake data into input arrays A and B
        if me == 0:
            ga.put(g_a, np.random.random(N*N))
            ga.put(g_b, np.random.random(N*N))
        ga.sync()
        if me == 0:
            print "srumma...",
        srumma(g_a, g_b, g_c, CHUNK_SIZE, MULTIPLIER)
        if me == 0:
            print "done"
        if me == 0:
            print "verifying using ga.gemm...",
        ok = verify_using_ga(g_a, g_b, g_c)
コード例 #17
0
ファイル: locality.py プロジェクト: dmlb2000/nwchem-cml
import mpi4py.MPI # initialize Message Passing Interface
import ga # initialize Global Arrays

me = ga.nodeid()

def print_distribution(g_a):
    for i in range(ga.nnodes()):
        lo,hi = ga.distribution(g_a, i)
        print "%s lo=%s hi=%s" % (i,lo,hi)

# create some arrays
g_a = ga.create(ga.C_DBL, (10,20,30))
g_b = ga.create(ga.C_INT, (2,3,4,5,6))

if not me:
    print_distribution(g_a)
    print_distribution(g_b)