예제 #1
0
def compute_network_gradient(network_json, opt_state_json):
    global shuffle_X
    global shuffle_Y
    global prev_opt_state

    opt_state = json.loads(opt_state_json)
    mb = opt_state.get("mb")

    # Need to determine if this is needed based on a new epoch
    if prev_opt_state is None or prev_opt_state.get("n") < opt_state.get("n"):
        shuffle_data()
    size = len(shuffle_X) // opt_state.get("mbpe")
    mini_X = shuffle_X[mb * size:(mb + 1) * size]
    mini_Y = shuffle_Y[mb * size:(mb + 1) * size]

    print("Computing gradient for epoch {} mini-batch {}".format(
        opt_state.get("n"), mb))

    net = ParticleNetwork.read_from_json(None, network_json)
    dc_db, dc_dq, dc_dr, dc_dt = net.cost_gradient(mini_X, mini_Y)

    # De-mean-ify the gradient
    scale = float(size)

    # Convert gradients to dict of lists for JSON serialization
    dc_dt[0] = list(dc_dt[0] * scale)
    dc_dr[0][0] = list(dc_dr[0][0] * scale)
    dc_dr[0][1] = list(dc_dr[0][1] * scale)
    dc_dr[0][2] = list(dc_dr[0][2] * scale)
    for l, layer in enumerate(net.layers):
        dc_db[l] = list(dc_db[l])
        dc_db[l][0] = list(dc_db[l][0] * scale)
        dc_dq[l] = list(dc_dq[l] * scale)
        dc_dt[l + 1] = list(dc_dt[l + 1] * scale)
        dc_dr[l + 1][0] = list(dc_dr[l + 1][0] * scale)
        dc_dr[l + 1][1] = list(dc_dr[l + 1][1] * scale)
        dc_dr[l + 1][2] = list(dc_dr[l + 1][2] * scale)

    gradient = dict()
    gradient["dc_db"] = dc_db
    gradient["dc_dq"] = dc_dq
    gradient["dc_dr"] = dc_dr
    gradient["dc_dt"] = dc_dt

    # Compute the cost every ten mini batches
    cost = 0.0
    if (mb % 10) == 0:
        cost = net.cost(shuffle_X, shuffle_Y) * len(shuffle_X)
    gradient["cost"] = cost

    prev_opt_state = opt_state

    return gradient
예제 #2
0
def compute_network_gradient(network_json, opt_state_json):
    global shuffle_X
    global shuffle_Y
    global prev_opt_state

    opt_state = json.loads(opt_state_json)
    mb = opt_state.get("mb")

    # Need to determine if this is needed based on a new epoch
    if prev_opt_state is None or prev_opt_state.get("n") < opt_state.get("n"):
        shuffle_data()
    size = len(shuffle_X) // opt_state.get("mbpe")
    mini_X = shuffle_X[mb*size:(mb+1)*size]
    mini_Y = shuffle_Y[mb*size:(mb+1)*size]

    print("Computing gradient for epoch {} mini-batch {}".format(opt_state.get("n"), mb))

    net = ParticleNetwork.read_from_json(None, network_json)
    dc_db, dc_dq, dc_dr, dc_dt = net.cost_gradient(mini_X, mini_Y)

    # De-mean-ify the gradient
    scale = float(size)

    # Convert gradients to dict of lists for JSON serialization
    dc_dt[0] = list(dc_dt[0] * scale)
    dc_dr[0][0] = list(dc_dr[0][0] * scale)
    dc_dr[0][1] = list(dc_dr[0][1] * scale)
    dc_dr[0][2] = list(dc_dr[0][2] * scale)
    for l, layer in enumerate(net.layers):
        dc_db[l] = list(dc_db[l])
        dc_db[l][0] = list(dc_db[l][0] * scale)
        dc_dq[l] = list(dc_dq[l] * scale)
        dc_dt[l+1] = list(dc_dt[l+1] * scale)
        dc_dr[l+1][0] = list(dc_dr[l+1][0] * scale)
        dc_dr[l+1][1] = list(dc_dr[l+1][1] * scale)
        dc_dr[l+1][2] = list(dc_dr[l+1][2] * scale)

    gradient = dict()
    gradient["dc_db"] = dc_db
    gradient["dc_dq"] = dc_dq
    gradient["dc_dr"] = dc_dr
    gradient["dc_dt"] = dc_dt

    # Compute the cost every ten mini batches
    cost = 0.0
    if (mb % 10) == 0:
        cost = net.cost(shuffle_X, shuffle_Y) * len(shuffle_X)
    gradient["cost"] = cost

    prev_opt_state = opt_state

    return gradient
예제 #3
0
def main():
    print("Master setting up particle network")

    # To ensure always same...
    np.random.set_state(state)

    phase = True
    s = 0.5
    t = None
    q = None
    b = None

    net = ParticleNetwork(cost="categorical_cross_entropy",
                          particle_input=ParticleInput(784, s=s, t=t, phase_enabled=phase))
    net.append(Particle(784, 32, activation="sigmoid", s=s, t=t, q=q, b=b, phase_enabled=phase))
    net.append(Particle(32, 10, activation="softmax", s=s, t=t, q=q, b=b, phase_enabled=phase))

    cost_acc_list = []

    mbs = 200
    nt = 4
    cs = np.min((mbs // nt, 500))
    sgd = ParticleSGD(n_epochs=1, mini_batch_size=mbs, verbosity=2, weight_update="rmsprop",
                      beta=0.95, gamma=0.99, cost_freq=5, alpha=0.01,
                      n_threads=nt, chunk_size=cs)

    n_epochs = 1
    worker_list = ["nebula0", "nebula1", "nebula2", "nebula3"]

    # Set up server
    print("Master setting up server")
    server = socket.socket()
    host = socket.gethostname()
    port = 8100
    server.bind((host, port))
    server.listen(5)

    mini_batches_per_epoch = 60000 // mbs
    opt_state = {
        "n": 0,
        "mb": 0,
        "mbpe": mini_batches_per_epoch
    }

    for n in range(n_epochs):
        opt_state[n] = n

        for mb in range(mini_batches_per_epoch):
            opt_state["mb"] = mb

            print("Epoch: {} Mini-batch: {}".format(n, mb))

            # JSON-ify the net
            net_json = net.write_to_json(None)

            # Broadcast the net for the gradient
            for nw in ["---".join((net_json, w, json.dumps(opt_state), ":::")) for w in worker_list]:
                broadcast(nw)

            # Wait for responses from workers
            gradient_data = {}
            while len(gradient_data) < len(worker_list):
                client, addr = server.accept()  # Establish connection with client.
                # print('Got connection from ', addr)
                recv_data = bytes.decode(buffer_recv(client))
                json_data, worker, _ = recv_data.split("---")
                gradient_data[worker] = json.loads(json_data)  # deserialize grads
                client.close()  # Close the connection

            # print("Received all gradients")

            # Aggregate gradient over pool
            dc_db, dc_dq, dc_dr, dc_dt, cost = aggregate_gradient(gradient_data, mbs, net)

            # Update network according to SGD method
            sgd.dc_db = dc_db
            sgd.dc_dq = dc_dq
            sgd.dc_dt = dc_dt
            sgd.dc_dr = dc_dr
            sgd.weight_update_func(net)

            # Report cost
            if cost > 0.0:
                print("Cost at epoch {} mini-batch {}: {:g}".format(n, mb, cost))

        stats = classification_stats(net.predict(X_test).argmax(axis=1), Y_test.argmax(axis=1))
        print("Test accuracy for epoch: {}".format(stats.get("total_accuracy")))
예제 #4
0
def main():
    print("Master setting up particle network")

    # To ensure always same...
    np.random.set_state(state)

    phase = True
    s = 0.5
    t = None
    q = None
    b = None

    net = ParticleNetwork(cost="categorical_cross_entropy",
                          particle_input=ParticleInput(784,
                                                       s=s,
                                                       t=t,
                                                       phase_enabled=phase))
    net.append(
        Particle(784,
                 32,
                 activation="sigmoid",
                 s=s,
                 t=t,
                 q=q,
                 b=b,
                 phase_enabled=phase))
    net.append(
        Particle(32,
                 10,
                 activation="softmax",
                 s=s,
                 t=t,
                 q=q,
                 b=b,
                 phase_enabled=phase))

    cost_acc_list = []

    mbs = 200
    nt = 4
    cs = np.min((mbs // nt, 500))
    sgd = ParticleSGD(n_epochs=1,
                      mini_batch_size=mbs,
                      verbosity=2,
                      weight_update="rmsprop",
                      beta=0.95,
                      gamma=0.99,
                      cost_freq=5,
                      alpha=0.01,
                      n_threads=nt,
                      chunk_size=cs)

    n_epochs = 1
    worker_list = ["nebula0", "nebula1", "nebula2", "nebula3"]

    # Set up server
    print("Master setting up server")
    server = socket.socket()
    host = socket.gethostname()
    port = 8100
    server.bind((host, port))
    server.listen(5)

    mini_batches_per_epoch = 60000 // mbs
    opt_state = {"n": 0, "mb": 0, "mbpe": mini_batches_per_epoch}

    for n in range(n_epochs):
        opt_state[n] = n

        for mb in range(mini_batches_per_epoch):
            opt_state["mb"] = mb

            print("Epoch: {} Mini-batch: {}".format(n, mb))

            # JSON-ify the net
            net_json = net.write_to_json(None)

            # Broadcast the net for the gradient
            for nw in [
                    "---".join((net_json, w, json.dumps(opt_state), ":::"))
                    for w in worker_list
            ]:
                broadcast(nw)

            # Wait for responses from workers
            gradient_data = {}
            while len(gradient_data) < len(worker_list):
                client, addr = server.accept(
                )  # Establish connection with client.
                # print('Got connection from ', addr)
                recv_data = bytes.decode(buffer_recv(client))
                json_data, worker, _ = recv_data.split("---")
                gradient_data[worker] = json.loads(
                    json_data)  # deserialize grads
                client.close()  # Close the connection

            # print("Received all gradients")

            # Aggregate gradient over pool
            dc_db, dc_dq, dc_dr, dc_dt, cost = aggregate_gradient(
                gradient_data, mbs, net)

            # Update network according to SGD method
            sgd.dc_db = dc_db
            sgd.dc_dq = dc_dq
            sgd.dc_dt = dc_dt
            sgd.dc_dr = dc_dr
            sgd.weight_update_func(net)

            # Report cost
            if cost > 0.0:
                print("Cost at epoch {} mini-batch {}: {:g}".format(
                    n, mb, cost))

        stats = classification_stats(
            net.predict(X_test).argmax(axis=1), Y_test.argmax(axis=1))
        print("Test accuracy for epoch: {}".format(
            stats.get("total_accuracy")))