def train(batch_size, num_lambda, lr, lambda_size, s3_url, kv_url): pool = Pool(int(num_lambda) + 1) procs = [] for rank in range(0, int(num_lambda)): payload = json.dumps({ "batch-size": batch_size, "learning-rate": lr, "s3-url": s3_url, "kv-url": kv_url, "rank": rank, "lambda_size": lambda_size }) print("Launch AWS Lambda #%d" % rank) procs.append(pool.apply_async(lambda_call, (payload, ))) res = [proc.get() for proc in procs] print(res) pool.close() pool.join() w, b = pull(kv_url, False) print("Weight: ", w) print("Bias:", b)
def load_data(s3_url, batch_size, rank): X, y = pull(s3_url) offset = rank * batch_size if (offset + batch_size) < len(X): return X[offset:offset + batch_size], y[offset:offset + batch_size] else: return X[offset:], y[offset:]
def SGD(params, lr, kv_url): ps_params = pull(kv_url) for i in range(0, len(params)): params[i][:] = ps_params[i] - lr * params[i].grad # print(params[0], params[1]) push(params, kv_url) return params
def main(): parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('--data', dest='is_data_ready', default=True, action='store_false', help='is data ready in S3') args = parser.parse_args() epochs = 1 learning_rate = .001 batch_size = 20 lambda_size = 1000 num_inputs = 2 num_outputs = 1 num_examples = 10000 num_lambda = int(num_examples / lambda_size) kv_url = "s3://ps-lambda-mxnet/w-b-%d" % num_examples s3_url = "s3://ps-lambda-mxnet/X-y-%d" % num_examples X, y = gen_data(num_examples, num_inputs) if not args.is_data_ready: upload_input_data([X, y], s3_url) init_w_b(num_inputs, num_outputs, kv_url) for i in range(0, epochs): train(batch_size, num_lambda, learning_rate, lambda_size, s3_url, kv_url) # collect final results w, b = pull(kv_url, False) print("Weight: ", w) print("Bias:", b)
def train(kv_url, s3_url, batch_size, lambda_size, rank, lr): data_ctx = mx.cpu() model_ctx = mx.cpu() # load data X, y = load_data(s3_url, lambda_size, rank) num_batches = y.shape[0] / batch_size train_data = gluon.data.DataLoader(gluon.data.ArrayDataset(X, y), batch_size=batch_size, shuffle=True) params = pull(kv_url) # initialize with parameters from KV cumulative_loss = 0 for param in params: param.attach_grad() # total_loss = [np.mean(square_loss(net(X), y).asnumpy())] def net(X): return mx.nd.dot(X, params[0]) + params[1] for i, (data, label) in enumerate(train_data): data = data.as_in_context(model_ctx) label = label.as_in_context(model_ctx).reshape((-1, 1)) with autograd.record(): output = net(data) loss = square_loss(output, label) loss.backward() params = SGD(params, lr, kv_url) cumulative_loss += loss.asscalar() push([params[0], params[1]], kv_url) # print(cumulative_loss / num_batches) result = (params[0], params[1], cumulative_loss / num_batches) return result