Python function 예제들, synkhronos.function Python 예제들

예제 #1

0

파일 보기

파일: train.py 프로젝트: david-leon/Synkhronos

def make_train_function(loss, params, x, y, update_rule, *args, **kwargs):
    grad_updates, param_updates, grad_shared = \
        update_rule(loss, params, *args, **kwargs)
    f_grad_shared = synk.function(
        inputs=[x, y],
        outputs=loss,  # (assumes this is an avg)
        updates=grad_updates)
    f_param_update = synk.function(inputs=[], updates=param_updates)

    def train_minibatch(x_data, y_data, batch=None):
        train_loss = f_grad_shared(x_data, y_data, batch=batch)  # (synk_data)
        synk.all_reduce(grad_shared, op="avg")  # (assumes loss is an avg)
        f_param_update()
        return train_loss

    return train_minibatch

예제 #2

0

파일 보기

파일: example_1.py 프로젝트: mharradon/synkhronos

import numpy as np
import theano
import theano.tensor as T
import synkhronos as synk

synk.fork()
s_init = np.ones(3, dtype='float32')
x = T.matrix('x')
s = theano.shared(s_init, name='s')
s_old = s
f = synk.function([x], updates={s: T.sum(x * s, axis=0)})
synk.distribute()
x_dat = np.array([[1., 1, 1],
                  [2, 2, 2],
                  [3, 3, 3],
                  [4, 4, 4]]).astype('float32')
print("\ns initial:\n", s.get_value())
f.as_theano(x_dat)
print("\ns after Theano call:\n", s.get_value())
s.set_value(s_init)
f(x_dat)
print("\nlocal s after reset and Synkhronos call:\n", s.get_value())
gathered_s = synk.gather(s, nd_up=1)
print("\ngathered s:\n", gathered_s)
synk.reduce(s, op="sum")
print("\nlocal s after in-place reduce:\n", s.get_value())
gathered_s = synk.gather(s, nd_up=1)
print("\ngathered s after reduce:\n", gathered_s)
s.set_value(s_init)
synk.broadcast(s)
f(x_dat)

예제 #3

0

파일 보기

파일: get_set_value_lengths.py 프로젝트: david-leon/Synkhronos

import synkhronos as synk
import numpy as np
import theano

synk.fork()

s = theano.shared(np.ones([5, 5], dtype='float32'), name="shared_var")
s2 = theano.shared(np.ones([4, 4], dtype='float32'), name="shared_var_2")

f = synk.function([], [s.dot(s), s2.dot(s2)])

synk.distribute()

# print(f())

# print(synk.get_value(1, s))

# d = 2 * np.ones([5, 5], dtype='float32')

# synk.set_value(1, s, d)

d55 = np.array(list(range(5 * 5)), dtype='float32').reshape(5, 5)
d64 = np.array(list(range(6 * 4)), dtype='float32').reshape(6, 4)

# (run interactive in iPython for setup)

예제 #4

0

파일 보기

파일: lasagne_mnist.py 프로젝트: mharradon/synkhronos

def main(model='mlp', num_epochs=500):
    # Load the dataset
    print("Loading data...")
    X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()

    # Fork workers and initialize gpu before building any variables.
    synk.fork()

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')

    # Create neural network model (depending on first command line parameter)
    print("Building model and compiling functions...")
    if model == 'mlp':
        network = build_mlp(input_var)
    elif model.startswith('custom_mlp:'):
        depth, width, drop_in, drop_hid = model.split(':', 1)[1].split(',')
        network = build_custom_mlp(input_var, int(depth), int(width),
                                   float(drop_in), float(drop_hid))
    elif model == 'cnn':
        network = build_cnn(input_var)
    else:
        print("Unrecognized model type %r." % model)
        return

    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()
    # We could add some weight decay as well here, see lasagne.regularization.

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Stochastic Gradient
    # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.nesterov_momentum(loss,
                                                params,
                                                learning_rate=0.01,
                                                momentum=0.9)
    # ipdb.set_trace()
    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(
        test_prediction, target_var)
    test_loss = test_loss.mean()
    # As a bonus, also create an expression for the classification accuracy:
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    # train_fn = theano.function([input_var, target_var], loss, updates=updates)
    train_fn = synk.function([input_var, target_var], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    # val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
    val_fn = synk.function([input_var, target_var], [test_loss, test_acc])

    # Send all functions and variables to workers (in the future, automatic)
    synk.distribute()

    # Write data into input shared memory (also applies to val_fn--same vars).
    X_train_synk, y_train_synk = train_fn.build_inputs(X_train, y_train)
    X_val_synk, y_val_synk = train_fn.build_inputs(X_val, y_val)
    X_test_synk, y_test_synk = train_fn.build_inputs(X_test, y_test)

    # Finally, launch the training loop.
    print("Starting training...")
    # We iterate over epochs:
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        for batch in iterate_minibatch_indices(len(y_train), 500,
                                               shuffle=True):
            train_err += train_fn(X_train_synk, y_train_synk, batch=batch)
            synk.all_reduce(params)
            train_batches += 1
        mid_time = time.time()

        # And a full pass over the validation data:
        val_err = 0
        val_acc = 0
        val_batches = 0
        for batch in iterate_minibatch_indices(len(y_val), 500, shuffle=False):
            err, acc = val_fn(X_val_synk,
                              y_val_synk,
                              batch=batch,
                              num_slices=1)
            val_err += err
            val_acc += acc
            val_batches += 1
        end_time = time.time()

        val_fn_time = end_time - mid_time
        train_fn_time = mid_time - start_time

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs,
                                                   time.time() - start_time))
        print("Train function time: {:.3f}s".format(train_fn_time))
        print("Validation function time: {:.3f}s".format(val_fn_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
        print("  validation accuracy:\t\t{:.2f} %".format(val_acc /
                                                          val_batches * 100))

    # After training, we compute and print the test error:
    test_err = 0
    test_acc = 0
    test_batches = 0
    for batch in iterate_minibatch_indices(len(y_test), 500, shuffle=False):
        err, acc = val_fn(X_test_synk, y_test_synk, batch=batch)
        test_err += err
        test_acc += acc
        test_batches += 1

    print("Final results:")
    print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
    print("  test accuracy:\t\t{:.2f} %".format(test_acc / test_batches * 100))

    # Optionally, you could now dump the network weights to a file like this:
    np.savez('model.npz', *lasagne.layers.get_all_param_values(network))

    # And load them again later on like this:
    with np.load('model.npz') as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    lasagne.layers.set_all_param_values(network, param_values)

예제 #5

0

파일 보기

import numpy as np
import theano
import theano.tensor as T
import synkhronos as synk

n_gpus = synk.fork()

# Make data-parallel computation with Theano shared variable (exists on GPU).
dtype = theano.config.floatX
s_x = theano.shared(np.ones([100, 4], dtype=dtype), name='s_x')
s_y = theano.shared(np.zeros([4, 5], dtype=dtype), name='s_y')
s_unused = theano.shared(np.zeros([5, 5], dtype=dtype))  # (see note at bottom)
z = T.mean(s_x.dot(s_y), axis=0)

f = synk.function(inputs=[], sliceable_shareds=[s_x], outputs=z)
synk.distribute()  # (shared variable data sent to workers with function)

# Inspect values of Theano shared variables--separate copy on each GPU.
print("\nLengths of s_x on each GPU: ", synk.get_lengths(s_x))
print("Shapes of s_x on each GPU: ", synk.get_shapes(s_x))

x_dat = np.random.randn(8 * n_gpus, 4).astype(dtype)
y_dat = np.random.randn(4, 5).astype(dtype)

# Manipulate values of Theano shared variables across all GPUs.
synk.scatter(s_x, x_dat)
synk.broadcast(s_y, y_dat)  # (without data arg, operates on existing var data)

print("\nData scattered to s_x and broadcast to s_y...")
print("\nShapes of s_x on each GPU: ", synk.get_shapes(s_x))

예제 #6

0

파일 보기

if RUN_BOTH:
    import synkhronos as synk
    synk.fork()
else:
    import theano.gpuarray
    theano.gpuarray.use("cuda")

x = T.matrix('x')
y = T.matrix('y')
v = T.vector('v')
s = theano.shared(np.ones([1, 5], dtype='float32'), name='s')

z = T.sum(x.dot(y), axis=0)

if RUN_BOTH:
    f_synk = synk.function([x, y], z, broadcast_inputs=[y])
    g_synk = synk.function([v], updates={s: s + v}, broadcast_inputs=[v])
    synk.distribute()

f_theano = theano.function([x, y], z)
g_theano = theano.function([v], updates={s: s + v})

x_dat = 0.01 * np.ones([1000, 10], dtype='float32')
x_dat1 = x_dat[:400]
x_dat2 = x_dat[400:]
y_dat = np.ones([10, 5], dtype='float32')

r_theano = f_theano(x_dat, y_dat)
print("result of f_theano: ", r_theano)
r_t_1 = f_theano(x_dat1, y_dat)
r_t_2 = f_theano(x_dat2, y_dat)

예제 #7

0

파일 보기

import numpy as np
import theano
import theano.tensor as T
import synkhronos as synk

synk.fork()
x = T.matrix('x')
y = theano.shared(np.random.randn(10, 20).astype('float32'))
z = T.mean(x.dot(y), axis=0)
f_th = theano.function([x], z)  # just for comparison
f = synk.function([x], z)
synk.distribute()

x_dat = np.random.randn(100, 10).astype('float32')
r_th = f_th(x_dat)
r = f(x_dat)
r_as_th = f.as_theano(x_dat)
assert np.allclose(r, r_th)
assert np.allclose(r_as_th, r_th)
print("All assertions passed.")

예제 #8

0

파일 보기

파일: example_0.py 프로젝트: david-leon/Synkhronos

import numpy as np
import theano
import theano.tensor as T
import synkhronos as synk

synk.fork()
x = T.matrix('x')
y = T.vector('y')
z = T.mean(x.dot(y), axis=0)
f_th = theano.function(inputs=[x, y], outputs=z)
f = synk.function(inputs=[x], bcast_inputs=[y], outputs=z)
synk.distribute()

x_dat = np.random.randn(100, 10).astype('float32')
y_dat = np.random.randn(10).astype('float32')
x_synk = synk.data(x_dat)
y_synk = synk.data(y_dat)
r_th = f_th(x_dat, y_dat)
r = f(x_synk, y_synk)

assert np.allclose(r, r_th)
print("All assertions passed.")

예제 #9

0

파일 보기

파일: cpu_comm_test.py 프로젝트: david-leon/Synkhronos

import theano
import theano.tensor as T
import numpy as np
import synkhronos as synk

n_gpu = synk.fork()

# x = T.matrix('x')
x_dat = np.random.randn(100, 10).astype(theano.config.floatX)
y_dat = np.random.randn(10, 5).astype(theano.config.floatX)
x = theano.shared(x_dat, 'x_gpu')
y = theano.shared(y_dat, 'y_gpu')
z = T.mean(x.dot(y), axis=0)

f = synk.function(inputs=[], outputs=z, sliceable_shareds=[x])

synk.distribute()

full_x_dat = np.random.randn(n_gpu * 100, 10).astype(theano.config.floatX)

synk.scatter(x, full_x_dat)

r = f()

예제 #10

0

파일 보기

def main():

    B_SIZE = 10000
    MID = B_SIZE // 2

    synk.fork()
    import lasagne

    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')
    network = build_mlp(input_var)
    # network = build_cnn(input_var)
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()
    params = lasagne.layers.get_all_params(network, trainable=True)

    grads = theano.grad(loss, wrt=params)
    flat_grad = T.concatenate(list(map(T.flatten, grads)))

    f_loss = synk.function([input_var, target_var],
                           loss,
                           collect_modes=[None],
                           reduce_ops="sum")
    f_grad = synk.function([input_var, target_var],
                           flat_grad,
                           collect_modes=[None])

    synk.distribute()

    x_data, y_data = make_data([1, 28, 28], B_SIZE)

    loss_1 = f_loss(x_data, y_data)
    grad_1 = f_grad(x_data, y_data)

    x_shmem, y_shmem = f_loss.get_input_shmems()
    x_dat_sh = x_shmem[:B_SIZE]
    y_dat_sh = y_shmem[:B_SIZE]
    x_data_1 = x_data[:MID]
    x_data_2 = x_data[MID:]
    y_data_1 = y_data[:MID]
    y_data_2 = y_data[MID:]

    ITERS = 10
    t0 = timer()
    for _ in range(ITERS):
        loss_i = f_loss.as_theano(x_data_1, y_data_1)
        loss_j = f_loss.as_theano(x_data_2, y_data_2)
    loss_time = timer() - t0
    print("theano loss_time: ", loss_time)

    t0 = timer()
    for _ in range(ITERS):
        grad_i = f_grad.as_theano(x_data_1, y_data_1)
        grad_j = f_grad.as_theano(x_data_2, y_data_2)
    grad_time = timer() - t0
    print("theano grad_time: ", grad_time)

    t0 = timer()
    for _ in range(ITERS):
        loss_i = f_loss(x_dat_sh, y_dat_sh)
    loss_time = timer() - t0
    print("synk shmem loss_time: ", loss_time)

    t0 = timer()
    for _ in range(ITERS):
        grad_i = f_grad(x_dat_sh, y_dat_sh)
    grad_time = timer() - t0
    print("synk shmem grad_time: ", grad_time)

    t0 = timer()
    for _ in range(ITERS):
        loss_i = f_loss(x_data, y_data)
    loss_time = timer() - t0
    print("synk new input loss_time: ", loss_time)

    t0 = timer()
    for _ in range(ITERS):
        grad_i = f_grad(x_data, y_data)
    grad_time = timer() - t0
    print("synk new input grad_time: ", grad_time)

예제 #11

0

파일 보기

import numpy as np
import theano
import theano.tensor as T

import synkhronos as synk
n_gpus = synk.fork()

DAT = 200  # (data length on each GPU)

# Build simple data-parallel computations with shraed variables.
s_x = theano.shared(np.empty([DAT, 10], dtype=theano.config.floatX))
s_y = theano.shared(np.empty([10, 5], dtype=theano.config.floatX))
z = T.sum(s_x.dot(s_y), axis=0)

f = synk.function(inputs=[], outputs=(z, "sum"), sliceable_shareds=[s_x])
synk.distribute()

x_dat = 0.01 * np.random.randn(DAT * n_gpus, 10).astype(theano.config.floatX)
y_dat = np.random.randn(10, 5).astype(theano.config.floatX)

synk.scatter(s_x, x_dat)
synk.broadcast(s_y, y_dat)

# Build an assortment of subsets of the data to compute on.
# (Can either build a single slice or single list, which will be applied within
# each GPU (AFTER the data is scattered), or can build a list of slices or lists,
# one for each GPU.)
slice_1 = slice(100, 200)
list_2 = np.random.randint(low=0, high=DAT, size=100)
slices_3 = [slice(0 + i, 100 + i) for i in range(n_gpus)]

예제 #12

0

파일 보기

import synkhronos as synk
synk.fork()  # processes forked, GPUs initialized

# Build simple data-parallel computations (parallel across rows of "x")
x = T.matrix('x')
y = T.matrix('y')
z_avg = T.mean(x.dot(y), axis=0)
z_sum = T.sum(x.dot(y), axis=0)
z_max = T.max(x.dot(y), axis=0)

# Build Synk function. NOTES:
# 1. bcast_input "y" will have the full value broadcast to all workers
# 2. outputs have different reduce operations (default is "avg")
f = synk.function(inputs=[x],
                  bcast_inputs=[y],
                  outputs=[z_avg, (z_sum, "sum"), (z_max, "max")])
synk.distribute()  # worker GPUs receive all synk functions, prepare to execute

# Generate random data and compute results
x_dat = 0.01 * np.random.randn(1000, 10).astype(theano.config.floatX)
y_dat = np.random.randn(10, 5).astype(theano.config.floatX)

# For comparison, run on only master GPU, as if standard Theano built by:
# f = theano.function(inputs=[x, y], outputs=[z_avg, z_sum, z_max])
r_avg, r_sum, r_max = f.as_theano(x_dat, y_dat)

# Prepare for computation: move data into OS-shared memory (this is one way)
x_dat_synk, y_dat_synk = f.build_inputs(x_dat, y_dat)

# Compute result using multiple GPUs, reduce to master

예제 #13

0

파일 보기

def main(model='mlp', batch_size=500, num_epochs=10):

    # Load the dataset
    print("Loading data...")
    X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()
    y_train = y_train.astype("int32")  # (some downstream type error on uint8)
    y_val = y_val.astype("int32")

    # Fork worker processes and initilize GPU before building variables.
    n_gpu = synk.fork()

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')

    network = build_network(model, input_var)

    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()
    # We could add some weight decay as well here, see lasagne.regularization.

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Stochastic Gradient
    # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
    params = lasagne.layers.get_all_params(network, trainable=True)

    grad_updates, param_updates, grad_shared = updates.nesterov_momentum(
        loss, params, learning_rate=0.01, momentum=0.9)
    # updates = lasagne.updates.nesterov_momentum(
    #         loss, params, learning_rate=0.01, momentum=0.9)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(
        test_prediction, target_var)
    test_loss = test_loss.mean()
    # As a bonus, also create an expression for the classification accuracy:
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    # Make GPU variables to hold the data.
    s_input_train = theano.shared(X_train[:len(X_train) // n_gpu])
    s_target_train = theano.shared(y_train[:len(y_train) // n_gpu])
    s_input_val = theano.shared(X_val[:len(X_val) // n_gpu])
    s_target_val = theano.shared(y_val[:len(y_val) // n_gpu])

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_grad_fn = synk.function(
        inputs=[],
        outputs=loss,
        givens=[(input_var, s_input_train), (target_var, s_target_train)],
        sliceable_shareds=[s_input_train, s_target_train],
        updates=grad_updates)
    train_update_fn = synk.function([], updates=param_updates)
    # train_fn = theano.function([input_var, target_var], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = synk.function(inputs=[],
                           givens=[(input_var, s_input_val),
                                   (target_var, s_target_val)],
                           sliceable_shareds=[s_input_val, s_target_val],
                           outputs=[test_loss, test_acc])
    # val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

    # Don't bother to put test data on GPU ahead of time.
    test_fn = synk.function([input_var, target_var],
                            outputs=[test_loss, test_acc])

    # After building all functions, give them to workers.
    synk.distribute()

    # Put data into OS shared memory for worker access.
    X_test, y_test = test_fn.build_inputs(X_test, y_test)

    print("Scattering data to GPUs.")
    scatter_vars = [s_input_train, s_target_train, s_input_val, s_target_val]
    scatter_vals = [X_train, y_train, X_val, y_val]
    synk.scatter(scatter_vars, scatter_vals)
    train_worker_len = min(synk.get_lengths(s_target_train))
    worker_batch_size = batch_size // n_gpu

    # Finally, launch the training loop.
    print("Starting training...")
    # We iterate over epochs:
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        # for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=True):
        for batch in iterate_minibatch_indices(train_worker_len,
                                               worker_batch_size,
                                               shuffle=True):
            train_err += train_grad_fn(batch_s=batch)
            synk.all_reduce(grad_shared)  # (averges)
            train_update_fn()
            train_batches += 1

        # And a full pass over the validation data:
        # val_err = 0
        # val_acc = 0
        # val_batches = 0
        # for batch in iterate_minibatches(X_val, y_val, batch_size, shuffle=False):
        #     inputs, targets = batch
        #     err, acc = val_fn(inputs, targets)
        #     val_err += err
        #     val_acc += acc
        #     val_batches += 1
        val_err, val_acc = val_fn(num_slices=4)

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs,
                                                   time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  validation loss:\t\t{:.6f}".format(float(val_err)))
        print("  validation accuracy:\t\t{:.2f} %".format(
            float(val_acc) * 100))

    # After training, we compute and print the test error:
    # test_err = 0
    # test_acc = 0
    # test_batches = 0
    # for batch in iterate_minibatches(X_test, y_test, batch_size, shuffle=False):
    #     inputs, targets = batch
    #     err, acc = val_fn(inputs, targets)
    #     test_err += err
    #     test_acc += acc
    #     test_batches += 1
    test_err, test_acc = test_fn(X_test, y_test, num_slices=4)
    print("Final results:")
    print("  test loss:\t\t\t{:.6f}".format(float(test_err)))
    print("  test accuracy:\t\t{:.2f} %".format(float(test_acc) * 100))

예제 #14

0

파일 보기

파일: batch_s_test.py 프로젝트: david-leon/Synkhronos

import theano
import synkhronos as synk
import numpy as np

synk.fork()

s = theano.shared(np.zeros([100, 2], dtype='float32'), name='shared_var')
# s = theano.shared(np.array(list(range(100 * 2)), dtype='float32').reshape(100, 2))

f = synk.function([], outputs=(s, "gather"), sliceable_shareds=[s])

synk.distribute()

d = np.ones([200, 2], dtype='float32')
for i, row in enumerate(d):
    row *= i

sd = synk.data(value=d)

synk.scatter(s, sd)

print(f())
print("\n")
print(f(num_slices=3))
print("\n")
print(f(batch_s=[0, 1, 2, 3, 4, 5]))
print("\n")
print(f(batch_s=[0, 1, 2, 3, 4, 5], num_slices=2))
print("\n")
print(f(batch_s=[49, 23, 1, 7, 23]))
print("\n")

예제 #15

0

파일 보기

"""

import numpy as np
import theano
import theano.tensor as T

import synkhronos as synk
synk.fork()

# Build simple data-parallel computations (parallel across rows of "x")
x = T.matrix('x')
y = T.matrix('y')
w = T.matrix('w')
z = T.sum((x + w).dot(y), axis=0)

f = synk.function(inputs=[x, w], bcast_inputs=[y], outputs=(z, "sum"))
synk.distribute()

x_dat = 0.01 * np.random.randn(1000, 10).astype(theano.config.floatX)
y_dat = np.random.randn(10, 5).astype(theano.config.floatX)
w_dat = 0.01 * np.random.randn(100, 10).astype(theano.config.floatX)

# Build assortment of subsets of the data to compute on.
# (Can be int, slice, or list (e.g. list for random shuffle))
max_idx_0 = 100
slice_1 = slice(100, 200)  # must specify start and stop (for now)
list_2 = np.random.randint(low=0, high=999, size=100)

r_theano_0 = f.as_theano(x_dat[:max_idx_0], w_dat, y_dat)
r_theano_1 = f.as_theano(x_dat[slice_1], w_dat, y_dat)
r_theano_2 = f.as_theano(x_dat[list_2], w_dat, y_dat)

예제 #16

0

파일 보기

import numpy as np
import theano
import theano.tensor as T
import synkhronos as synk

synk.fork(2)
s_init = np.ones(2, dtype='float32')
x = T.matrix('x')
s = theano.shared(s_init, name='s')
f = synk.function([x], updates=[(s, T.sum(x * s, axis=0))])
synk.distribute()
x_dat = synk.data(np.array([[1, 1], [2, 2], [3, 3], [4, 4]]).astype('float32'))
print("\ns initial:\n", s.get_value())

f.as_theano(x_dat.data)
print("\ns after Theano call:\n", s.get_value())

s.set_value(s_init)
f(x_dat)
print("\nlocal s after reset and Synkhronos call:\n", s.get_value())

gathered_s = synk.gather(s, nd_up=1)
print("\ngathered s:\n", gathered_s)

synk.reduce(s, op="sum")
print("\nlocal s after in-place reduce:\n", s.get_value())

gathered_s = synk.gather(s, nd_up=1)
print("\ngathered s after reduce:\n", gathered_s)

synk.broadcast(s, s_init)