def main(): # MLP to be fit net_mlp = None with open("/Users/alange/programming/MNIST/store/classic_32.json", "r") as f: net_mlp = Network.read_from_json(f) # Initial particle network net = ParticleNetwork(cost="categorical_cross_entropy", particle_input=ParticleInput(784, s=2.0)) net.append(Particle(784, 32, activation="tanh", zeta=1.0, s=2.0)) net.append(Particle(32, 10, activation="softmax", zeta=1.0, s=2.0)) compute_matrices(net) error = compute_error(net_mlp, net) de_db, de_dq, de_dt, de_drx, de_dry, de_drz = compute_grad_w(net_mlp, net) fd_b, fd_q, fd_t, fd_x, fd_y, fd_z = compute_fd_grad(net_mlp, net) for l, layer in enumerate(net.layers): diff_b = np.mean(de_db[l] - fd_b[l]) diff_q = np.mean(de_dq[l] - fd_q[l]) diff_t = np.mean(de_dt[l] - fd_t[l]) diff_x = np.mean(de_drx[l] - fd_x[l]) print("b", diff_b) print("q", diff_q) print("t", diff_t) print("x", diff_x)
def main2(): sgd = ParticleSGD(alpha=0.2, n_epochs=1, mini_batch_size=1, verbosity=2, weight_update="momentum", beta=0.5) # sgd = ParticleSGD(alpha=0.2, n_epochs=1, mini_batch_size=1, verbosity=2) train_X = np.asarray([[0.2, -0.3]]) train_Y = np.asarray([[0.0, 1.0, 0.0]]) net = ParticleNetwork(cost="mse", particle_input=ParticleInput(2)) net.append(Particle(2, 5, activation="sigmoid")) net.append(Particle(5, 3, activation="sigmoid")) sgd.optimize(net, train_X, train_Y)
def main4(): rprop = ParticleRPROP(n_epochs=1, verbosity=0, cost_freq=25, init_delta=0.01, eta_minus=0.5, eta_plus=1.2, delta_max=0.5, delta_min=1e-6, manhattan=False, n_threads=2) train_X = np.asarray([[0.2, -0.3], [0.2, -0.4], [0.1, 0.1], [0.9, 1.1], [2.2, 4.4]]) train_Y = np.asarray([[0.0, 1.0, 0.0], [0.0, 1.0, 0.0], [1.0, 0.0, 0.0], [0.0, 0.0, 1.0], [1.0, 0.0, 0.0]]) phase = False net = ParticleNetwork(cost="categorical_cross_entropy", particle_input=ParticleInput(2, phase_enabled=phase)) net.append(Particle(2, 5, activation="sigmoid", phase_enabled=phase)) net.append(Particle(5, 7, activation="sigmoid", phase_enabled=phase)) net.append(Particle(7, 3, activation="softmax", phase_enabled=phase)) print(net.cost(train_X, train_Y))
def main3(): train_X = np.asarray([[0.2, -0.3], [0.1, -0.9]]) train_Y = np.asarray([[0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) net = ParticleNetwork(cost="mse", particle_input=ParticleInput(2)) net.append(Particle(2, 5, activation="sigmoid")) net.append(Particle(5, 3, activation="sigmoid")) print(net.predict(train_X)) with open("/Users/adrianlange/network.json", "w") as f: net.write_to_json(f) with open("/Users/adrianlange/network.json", "r") as f: new_net = ParticleNetwork.read_from_json(f) print(new_net.predict(train_X))
def main(): # train_X = np.asarray([[0.2, -0.3]]) # train_Y = np.asarray([[0.0, 1.0, 0.0]]) # train_X = np.asarray([{0: 0.45, 1: 3.33}, {1: 2.22}]) train_X = np.asarray([[0.45, 3.33], [0.0, 2.22]]) # train_Y = np.asarray([[0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) train_Y = np.asarray([[1.0], [0.0]]) net = ParticleNetwork(cost="mse", particle_input=ParticleInput(2, phase_enabled=True)) net.append(Particle(2, 3, activation="sigmoid", phase_enabled=True)) net.append(Particle(3, 1, activation="sigmoid", phase_enabled=True)) print(net.particle_input.get_rxyz()) print(net.predict(train_X)) print(net.cost(train_X, train_Y))
255.0) # scaled values in range [0-1] # length ten categorical vector Y = [] for val in raw_data_train.ix[:, 0]: y = np.zeros(10) y[val] = 1.0 Y.append(y) Y = np.asarray(Y) # Data subset n_sub = len(X) X_sub = X[:n_sub, :] Y_sub = Y[:n_sub, :] net = ParticleNetwork(cost="categorical_cross_entropy", particle_input=ParticleInput(784)) net.append(Particle(784, 10, activation="sigmoid")) net.append(Particle(10, 10, activation="softmax")) start = time.time() rprop = ParticleSGD(n_epochs=1, verbosity=2, cost_freq=25, n_threads=4, chunk_size=500, mini_batch_size=n_sub) rprop.optimize(net, X_sub, Y_sub) print(time.time() - start)
def fd(): # train_X = np.asarray([[0.2, -0.3]]) # train_Y = np.asarray([[0.0, 1.0, 0.0]]) train_X = np.asarray([[0.2, -0.3], [0.1, -0.9], [0.1, 0.05]]) train_Y = np.asarray([[0.0, 1.0, 0.0], [0.0, 0.0, 1.0], [1.0, 0.0, 0.0]]) phase = True p = "gwell" net = ParticleNetwork(cost="categorical_cross_entropy", particle_input=ParticleInput(2, phase_enabled=phase)) # net = ParticleNetwork(cost="categorical_cross_entropy", particle_input=ParticleInput(2), regularizer=ParticleRegularize(1.0)) # net = ParticleNetwork(cost="categorical_cross_entropy", particle_input=ParticleInput(2), regularizer=ParticleRegularizeL2Charge(0.3)) net.append(Particle(2, 5, activation="sigmoid", potential=p, phase_enabled=phase)) net.append(Particle(5, 4, activation="sigmoid", potential=p, phase_enabled=phase)) net.append(Particle(4, 3, activation="softmax", potential=p, phase_enabled=phase)) # Finite difference checking net.cost(train_X, train_Y) db, dq, dr, dt, dt_in = net.cost_gradient(train_X, train_Y) # db, dq, dr, dt = net.cost_gradient(train_X, train_Y) h = 0.001 print("analytic b") print(db) fd_b = [] for l in range(len(net.layers)): lb = [] for c in range(len(net.layers[l].b)): for b in range(len(net.layers[l].b[c])): orig = net.layers[l].b[c][b] net.layers[l].b[c][b] += h fp = net.cost(train_X, train_Y) net.layers[l].b[c][b] -= 2*h fm = net.cost(train_X, train_Y) lb.append((fp - fm) / (2*h)) net.layers[l].b[c][b] = orig fd_b.append(lb) print("numerical b") print(fd_b) print("analytic q") for x in dq: print(x) fd_q = [] for l in range(len(net.layers)): lq = [] for i in range(len(net.layers[l].q)): orig = net.layers[l].q[i] net.layers[l].q[i] += h fp = net.cost(train_X, train_Y) net.layers[l].q[i] -= 2*h fm = net.cost(train_X, train_Y) lq.append((fp - fm) / (2*h)) net.layers[l].q[i] = orig fd_q.append(lq) print("numerical q") for x in fd_q: print(x) print("analytic theta") for x in dt: print(x) # input layer fd_t = [] layer = net.particle_input lt = [] for i in range(len(layer.theta)): orig = layer.theta[i] layer.theta[i] += h fp = net.cost(train_X, train_Y) layer.theta[i] -= 2*h fm = net.cost(train_X, train_Y) lt.append((fp - fm) / (2*h)) layer.theta[i] = orig fd_t.append(lt) # layers for l in range(len(net.layers)): lt = [] for i in range(len(net.layers[l].theta)): orig = net.layers[l].theta[i] net.layers[l].theta[i] += h fp = net.cost(train_X, train_Y) net.layers[l].theta[i] -= 2*h fm = net.cost(train_X, train_Y) lt.append((fp - fm) / (2*h)) net.layers[l].theta[i] = orig fd_t.append(lt) print("numerical theta") for x in fd_t: print(x) print("analytic theta_in") for x in dt_in: print(x) # input layer fd_t = [] layer = net.particle_input lt = [] for i in range(len(layer.theta_in)): lt.append(0.0) fd_t.append(lt) # layers for l in range(len(net.layers)): lt = [] for i in range(len(net.layers[l].theta_in)): orig = net.layers[l].theta_in[i] net.layers[l].theta_in[i] += h fp = net.cost(train_X, train_Y) net.layers[l].theta_in[i] -= 2*h fm = net.cost(train_X, train_Y) lt.append((fp - fm) / (2*h)) net.layers[l].theta_in[i] = orig fd_t.append(lt) print("numerical theta_in") for x in fd_t: print(x) # print("analytic zeta") # for x in dzeta: # print(x) # # # input layer # fd_zeta = [] # layer = net.particle_input # lt = [] # for i in range(len(layer.zeta)): # orig = layer.zeta[i] # layer.zeta[i] += h # fp = net.cost(train_X, train_Y) # layer.zeta[i] -= 2*h # fm = net.cost(train_X, train_Y) # lt.append((fp - fm) / (2*h)) # layer.zeta[i] = orig # fd_zeta.append(lt) # # # layers # for l in range(len(net.layers)): # lt = [] # for i in range(len(net.layers[l].zeta)): # orig = net.layers[l].zeta[i] # net.layers[l].zeta[i] += h # fp = net.cost(train_X, train_Y) # net.layers[l].zeta[i] -= 2*h # fm = net.cost(train_X, train_Y) # lt.append((fp - fm) / (2*h)) # net.layers[l].zeta[i] = orig # fd_zeta.append(lt) # # print("numerical zeta") # for x in fd_zeta: # print(x) fd_r_x = [] fd_r_y = [] fd_r_z = [] # input first layer = net.particle_input lr_x = [] lr_y = [] lr_z = [] for i in range(layer.output_size): # x orig = layer.rx[i] layer.rx[i] += h fp = net.cost(train_X, train_Y) layer.rx[i] -= 2*h fm = net.cost(train_X, train_Y) lr_x.append((fp - fm) / (2*h)) layer.rx[i] = orig # y orig = layer.ry[i] layer.ry[i] += h fp = net.cost(train_X, train_Y) layer.ry[i] -= 2*h fm = net.cost(train_X, train_Y) lr_y.append((fp - fm) / (2*h)) layer.ry[i] = orig # z orig = layer.rz[i] layer.rz[i] += h fp = net.cost(train_X, train_Y) layer.rz[i] -= 2*h fm = net.cost(train_X, train_Y) lr_z.append((fp - fm) / (2*h)) layer.rz[i] = orig fd_r_x.append(lr_x) fd_r_y.append(lr_y) fd_r_z.append(lr_z) # layers for layer in net.layers: lr_x = [] lr_y = [] lr_z = [] for i in range(layer.output_size): # x orig = layer.rx[i] layer.rx[i] += h fp = net.cost(train_X, train_Y) layer.rx[i] -= 2*h fm = net.cost(train_X, train_Y) lr_x.append((fp - fm) / (2*h)) layer.rx[i] = orig # y orig = layer.ry[i] layer.ry[i] += h fp = net.cost(train_X, train_Y) layer.ry[i] -= 2*h fm = net.cost(train_X, train_Y) lr_y.append((fp - fm) / (2*h)) layer.ry[i] = orig # z orig = layer.rz[i] layer.rz[i] += h fp = net.cost(train_X, train_Y) layer.rz[i] -= 2*h fm = net.cost(train_X, train_Y) lr_z.append((fp - fm) / (2*h)) layer.rz[i] = orig fd_r_x.append(lr_x) fd_r_y.append(lr_y) fd_r_z.append(lr_z) print("analytic x") for layer in dr[0]: print(layer) print("numerical r x") for f in fd_r_x: print(f) print("analytic y") for layer in dr[1]: print(layer) print("numerical r y") for f in fd_r_y: print(f) print("analytic z") for layer in dr[2]: print(layer) print("numerical r z") for f in fd_r_z: print(f)
n_seed = 100 np.random.seed(n_seed) state = np.random.get_state() # MNIST data raw_data_train = pd.read_csv("/Users/alange/programming/MNIST/data/mnist_train.csv", header=None) print("data loaded") # Prepare data X = np.asarray(raw_data_train.ix[:, 1:] / 255.0) # scaled values in range [0-1] # length ten categorical vector Y = [] for val in raw_data_train.ix[:, 0]: y = np.zeros(10) y[val] = 1.0 Y.append(y) Y = np.asarray(Y) # Data subset n_sub = 4000 X_sub = X[:n_sub, :] Y_sub = Y[:n_sub, :] net = ParticleNetwork(cost="categorical_cross_entropy", particle_input=ParticleInput(784)) net.append(Particle(784, 128, activation="sigmoid")) net.append(Particle(128, 10, activation="softmax")) rprop = ParticleRPROP(n_epochs=4, verbosity=2, cost_freq=25, n_threads=4, chunk_size=500) rprop.optimize(net, X_sub, Y_sub)