def compute_network_gradient(network_json, opt_state_json): global shuffle_X global shuffle_Y global prev_opt_state opt_state = json.loads(opt_state_json) mb = opt_state.get("mb") # Need to determine if this is needed based on a new epoch if prev_opt_state is None or prev_opt_state.get("n") < opt_state.get("n"): shuffle_data() size = len(shuffle_X) // opt_state.get("mbpe") mini_X = shuffle_X[mb * size:(mb + 1) * size] mini_Y = shuffle_Y[mb * size:(mb + 1) * size] print("Computing gradient for epoch {} mini-batch {}".format( opt_state.get("n"), mb)) net = ParticleNetwork.read_from_json(None, network_json) dc_db, dc_dq, dc_dr, dc_dt = net.cost_gradient(mini_X, mini_Y) # De-mean-ify the gradient scale = float(size) # Convert gradients to dict of lists for JSON serialization dc_dt[0] = list(dc_dt[0] * scale) dc_dr[0][0] = list(dc_dr[0][0] * scale) dc_dr[0][1] = list(dc_dr[0][1] * scale) dc_dr[0][2] = list(dc_dr[0][2] * scale) for l, layer in enumerate(net.layers): dc_db[l] = list(dc_db[l]) dc_db[l][0] = list(dc_db[l][0] * scale) dc_dq[l] = list(dc_dq[l] * scale) dc_dt[l + 1] = list(dc_dt[l + 1] * scale) dc_dr[l + 1][0] = list(dc_dr[l + 1][0] * scale) dc_dr[l + 1][1] = list(dc_dr[l + 1][1] * scale) dc_dr[l + 1][2] = list(dc_dr[l + 1][2] * scale) gradient = dict() gradient["dc_db"] = dc_db gradient["dc_dq"] = dc_dq gradient["dc_dr"] = dc_dr gradient["dc_dt"] = dc_dt # Compute the cost every ten mini batches cost = 0.0 if (mb % 10) == 0: cost = net.cost(shuffle_X, shuffle_Y) * len(shuffle_X) gradient["cost"] = cost prev_opt_state = opt_state return gradient
def compute_network_gradient(network_json, opt_state_json): global shuffle_X global shuffle_Y global prev_opt_state opt_state = json.loads(opt_state_json) mb = opt_state.get("mb") # Need to determine if this is needed based on a new epoch if prev_opt_state is None or prev_opt_state.get("n") < opt_state.get("n"): shuffle_data() size = len(shuffle_X) // opt_state.get("mbpe") mini_X = shuffle_X[mb*size:(mb+1)*size] mini_Y = shuffle_Y[mb*size:(mb+1)*size] print("Computing gradient for epoch {} mini-batch {}".format(opt_state.get("n"), mb)) net = ParticleNetwork.read_from_json(None, network_json) dc_db, dc_dq, dc_dr, dc_dt = net.cost_gradient(mini_X, mini_Y) # De-mean-ify the gradient scale = float(size) # Convert gradients to dict of lists for JSON serialization dc_dt[0] = list(dc_dt[0] * scale) dc_dr[0][0] = list(dc_dr[0][0] * scale) dc_dr[0][1] = list(dc_dr[0][1] * scale) dc_dr[0][2] = list(dc_dr[0][2] * scale) for l, layer in enumerate(net.layers): dc_db[l] = list(dc_db[l]) dc_db[l][0] = list(dc_db[l][0] * scale) dc_dq[l] = list(dc_dq[l] * scale) dc_dt[l+1] = list(dc_dt[l+1] * scale) dc_dr[l+1][0] = list(dc_dr[l+1][0] * scale) dc_dr[l+1][1] = list(dc_dr[l+1][1] * scale) dc_dr[l+1][2] = list(dc_dr[l+1][2] * scale) gradient = dict() gradient["dc_db"] = dc_db gradient["dc_dq"] = dc_dq gradient["dc_dr"] = dc_dr gradient["dc_dt"] = dc_dt # Compute the cost every ten mini batches cost = 0.0 if (mb % 10) == 0: cost = net.cost(shuffle_X, shuffle_Y) * len(shuffle_X) gradient["cost"] = cost prev_opt_state = opt_state return gradient
def main(): print("Master setting up particle network") # To ensure always same... np.random.set_state(state) phase = True s = 0.5 t = None q = None b = None net = ParticleNetwork(cost="categorical_cross_entropy", particle_input=ParticleInput(784, s=s, t=t, phase_enabled=phase)) net.append(Particle(784, 32, activation="sigmoid", s=s, t=t, q=q, b=b, phase_enabled=phase)) net.append(Particle(32, 10, activation="softmax", s=s, t=t, q=q, b=b, phase_enabled=phase)) cost_acc_list = [] mbs = 200 nt = 4 cs = np.min((mbs // nt, 500)) sgd = ParticleSGD(n_epochs=1, mini_batch_size=mbs, verbosity=2, weight_update="rmsprop", beta=0.95, gamma=0.99, cost_freq=5, alpha=0.01, n_threads=nt, chunk_size=cs) n_epochs = 1 worker_list = ["nebula0", "nebula1", "nebula2", "nebula3"] # Set up server print("Master setting up server") server = socket.socket() host = socket.gethostname() port = 8100 server.bind((host, port)) server.listen(5) mini_batches_per_epoch = 60000 // mbs opt_state = { "n": 0, "mb": 0, "mbpe": mini_batches_per_epoch } for n in range(n_epochs): opt_state[n] = n for mb in range(mini_batches_per_epoch): opt_state["mb"] = mb print("Epoch: {} Mini-batch: {}".format(n, mb)) # JSON-ify the net net_json = net.write_to_json(None) # Broadcast the net for the gradient for nw in ["---".join((net_json, w, json.dumps(opt_state), ":::")) for w in worker_list]: broadcast(nw) # Wait for responses from workers gradient_data = {} while len(gradient_data) < len(worker_list): client, addr = server.accept() # Establish connection with client. # print('Got connection from ', addr) recv_data = bytes.decode(buffer_recv(client)) json_data, worker, _ = recv_data.split("---") gradient_data[worker] = json.loads(json_data) # deserialize grads client.close() # Close the connection # print("Received all gradients") # Aggregate gradient over pool dc_db, dc_dq, dc_dr, dc_dt, cost = aggregate_gradient(gradient_data, mbs, net) # Update network according to SGD method sgd.dc_db = dc_db sgd.dc_dq = dc_dq sgd.dc_dt = dc_dt sgd.dc_dr = dc_dr sgd.weight_update_func(net) # Report cost if cost > 0.0: print("Cost at epoch {} mini-batch {}: {:g}".format(n, mb, cost)) stats = classification_stats(net.predict(X_test).argmax(axis=1), Y_test.argmax(axis=1)) print("Test accuracy for epoch: {}".format(stats.get("total_accuracy")))
def main(): print("Master setting up particle network") # To ensure always same... np.random.set_state(state) phase = True s = 0.5 t = None q = None b = None net = ParticleNetwork(cost="categorical_cross_entropy", particle_input=ParticleInput(784, s=s, t=t, phase_enabled=phase)) net.append( Particle(784, 32, activation="sigmoid", s=s, t=t, q=q, b=b, phase_enabled=phase)) net.append( Particle(32, 10, activation="softmax", s=s, t=t, q=q, b=b, phase_enabled=phase)) cost_acc_list = [] mbs = 200 nt = 4 cs = np.min((mbs // nt, 500)) sgd = ParticleSGD(n_epochs=1, mini_batch_size=mbs, verbosity=2, weight_update="rmsprop", beta=0.95, gamma=0.99, cost_freq=5, alpha=0.01, n_threads=nt, chunk_size=cs) n_epochs = 1 worker_list = ["nebula0", "nebula1", "nebula2", "nebula3"] # Set up server print("Master setting up server") server = socket.socket() host = socket.gethostname() port = 8100 server.bind((host, port)) server.listen(5) mini_batches_per_epoch = 60000 // mbs opt_state = {"n": 0, "mb": 0, "mbpe": mini_batches_per_epoch} for n in range(n_epochs): opt_state[n] = n for mb in range(mini_batches_per_epoch): opt_state["mb"] = mb print("Epoch: {} Mini-batch: {}".format(n, mb)) # JSON-ify the net net_json = net.write_to_json(None) # Broadcast the net for the gradient for nw in [ "---".join((net_json, w, json.dumps(opt_state), ":::")) for w in worker_list ]: broadcast(nw) # Wait for responses from workers gradient_data = {} while len(gradient_data) < len(worker_list): client, addr = server.accept( ) # Establish connection with client. # print('Got connection from ', addr) recv_data = bytes.decode(buffer_recv(client)) json_data, worker, _ = recv_data.split("---") gradient_data[worker] = json.loads( json_data) # deserialize grads client.close() # Close the connection # print("Received all gradients") # Aggregate gradient over pool dc_db, dc_dq, dc_dr, dc_dt, cost = aggregate_gradient( gradient_data, mbs, net) # Update network according to SGD method sgd.dc_db = dc_db sgd.dc_dq = dc_dq sgd.dc_dt = dc_dt sgd.dc_dr = dc_dr sgd.weight_update_func(net) # Report cost if cost > 0.0: print("Cost at epoch {} mini-batch {}: {:g}".format( n, mb, cost)) stats = classification_stats( net.predict(X_test).argmax(axis=1), Y_test.argmax(axis=1)) print("Test accuracy for epoch: {}".format( stats.get("total_accuracy")))