Example #1
0
def train(batch_size, num_lambda, lr, lambda_size, s3_url, kv_url):

    pool = Pool(int(num_lambda) + 1)
    procs = []

    for rank in range(0, int(num_lambda)):
        payload = json.dumps({
            "batch-size": batch_size,
            "learning-rate": lr,
            "s3-url": s3_url,
            "kv-url": kv_url,
            "rank": rank,
            "lambda_size": lambda_size
        })
        print("Launch AWS Lambda #%d" % rank)
        procs.append(pool.apply_async(lambda_call, (payload, )))

    res = [proc.get() for proc in procs]

    print(res)
    pool.close()
    pool.join()

    w, b = pull(kv_url, False)
    print("Weight: ", w)
    print("Bias:", b)
Example #2
0
def load_data(s3_url, batch_size, rank):
    X, y = pull(s3_url)
    offset = rank * batch_size
    if (offset + batch_size) < len(X):
        return X[offset:offset + batch_size], y[offset:offset + batch_size]
    else:
        return X[offset:], y[offset:]
Example #3
0
def SGD(params, lr, kv_url):
    ps_params = pull(kv_url)

    for i in range(0, len(params)):
        params[i][:] = ps_params[i] - lr * params[i].grad

    # print(params[0], params[1])
    push(params, kv_url)
    return params
Example #4
0
def main():

    parser = argparse.ArgumentParser(description='Process some integers.')
    parser.add_argument('--data',
                        dest='is_data_ready',
                        default=True,
                        action='store_false',
                        help='is data ready in S3')
    args = parser.parse_args()

    epochs = 1
    learning_rate = .001
    batch_size = 20
    lambda_size = 1000

    num_inputs = 2
    num_outputs = 1
    num_examples = 10000

    num_lambda = int(num_examples / lambda_size)

    kv_url = "s3://ps-lambda-mxnet/w-b-%d" % num_examples
    s3_url = "s3://ps-lambda-mxnet/X-y-%d" % num_examples

    X, y = gen_data(num_examples, num_inputs)

    if not args.is_data_ready:
        upload_input_data([X, y], s3_url)

        init_w_b(num_inputs, num_outputs, kv_url)

    for i in range(0, epochs):
        train(batch_size, num_lambda, learning_rate, lambda_size, s3_url,
              kv_url)

    # collect final results
    w, b = pull(kv_url, False)
    print("Weight: ", w)
    print("Bias:", b)
Example #5
0
def train(kv_url, s3_url, batch_size, lambda_size, rank, lr):
    data_ctx = mx.cpu()
    model_ctx = mx.cpu()

    # load data
    X, y = load_data(s3_url, lambda_size, rank)
    num_batches = y.shape[0] / batch_size

    train_data = gluon.data.DataLoader(gluon.data.ArrayDataset(X, y),
                                       batch_size=batch_size,
                                       shuffle=True)

    params = pull(kv_url)
    # initialize with parameters from KV
    cumulative_loss = 0

    for param in params:
        param.attach_grad()
    # total_loss = [np.mean(square_loss(net(X), y).asnumpy())]

    def net(X):
        return mx.nd.dot(X, params[0]) + params[1]

    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx)
        label = label.as_in_context(model_ctx).reshape((-1, 1))
        with autograd.record():
            output = net(data)
            loss = square_loss(output, label)
        loss.backward()
        params = SGD(params, lr, kv_url)
        cumulative_loss += loss.asscalar()

    push([params[0], params[1]], kv_url)
    # print(cumulative_loss / num_batches)

    result = (params[0], params[1], cumulative_loss / num_batches)
    return result