def test_ntm_gradients(): state_size = 1 memory_shape = (5,1) batch_size=10 controller_num_layers=1 controller_hidden_size=10 input_size=1 n_batches=20 T=2 controller_network = MLP((input_size, memory_shape[1]), controller_num_layers, controller_hidden_size, state_size) x = tf.placeholder(tf.float32, [batch_size, T, input_size]) x_ = np.random.randn(batch_size*n_batches, T, input_size) y_ = 2*x_ + 1. addr = ShortcircuitAddressing(memory_shape, batch_size) rh = ReadHead(state_size, memory_shape, addresser=addr, batch_size=batch_size, hidden_size=2) #ntm_cell = NTMCell(controller_network, memory_shape, batch_size, # read_head=rh) ntm_cell = NTMCell(controller_network, memory_shape, batch_size) ntm = Network(ntm_cell, x) loss = lambda a, b: tf.nn.l2_loss(a - b) optimizer = tf.train.GradientDescentOptimizer(1e-4) ntm.compile(loss, optimizer) ntm.train(x_, y_, batch_size=batch_size, n_epochs=2)
def test_rnn(): input_size = 1 output_size = 1 T = 10 n_batches = 5 batch_size = 5 cell = RNNLayer(input_size, output_size, batch_size) x = tf.placeholder(tf.float32, [batch_size, T, input_size]) y = tf.placeholder(tf.float32, [batch_size, T, output_size]) network = Network(cell, x) x_ = np.random.randn(batch_size, T, input_size) y_ = 2*x_ + 1 optimizer = tf.train.GradientDescentOptimizer(1e-4) loss = lambda a, b: tf.reduce_mean(tf.pow(a - b, 2)) #loss = tf.reduce_mean(tf.pow(network.output()[0] - y, 2)) network.compile(loss, optimizer) losses = network.train(x_, y_, batch_size=batch_size, verbose=False)
features, nextMoves = read_data(file) print( "==========================================================" ) print("loaded training file %s" % file) print("****dont train with this file ever again****") print("Total feature size is %d, Total next move size is %d" % (len(features), len(nextMoves))) print( "==========================================================" ) batch = get_batch(features, nextMoves, BATCH_SIZE) while len(batch['features']) != 0: # train 1 batch at a time: network.train(batch) if i % 5 == 0: network.average_summary() if i % 100 == 0 and i != 0: network.save_checkpoint(CHECKPOINT_DIR, network.get_global_step()) # get rid of the data used in previous batch # and get the next batch features = features[BATCH_SIZE:] nextMoves = nextMoves[BATCH_SIZE:] batch = get_batch(features, nextMoves, BATCH_SIZE) i += 1
def relu_func_prime(z): z[z < 0] = 0 z[z > 0] = 1 return z def cost_func(y_true, y_pred): return 0.5 * (y_pred - y_true)**2 def cost_func_prime(y_true, y_pred): return y_pred - y_true x_train = np.array([[[0, 0]], [[0, 1]], [[1, 0]], [[1, 1]]]) y_train = np.array([[[0]], [[1]], [[1]], [[0]]]) network = Network() network.add(FCLayer((1, 2), (1, 3))) network.add(ALayer((1, 3), (1, 3), relu_func, relu_func_prime)) network.add(FCLayer((1, 3), (1, 1))) network.add(ALayer((1, 1), (1, 1), relu_func, relu_func_prime)) network.setup_cost_func(cost_func, cost_func_prime) network.train(x_train, y_train, epochs=1000, learning_rate=0.01) out = network.predict([[0, 1]]) print(out)
import utils.data_loader as data_loader from network.network import Network from network.network_improved import NetworkImproved training_data, validation_data, test_data = data_loader.load_data() # Train network print('Which network do you want to train?') print('1. Basic network') print('2. Network improved') n = int(input()) if n == 1: my_network = Network((784, 50, 10)) my_network.train(*training_data, 3.0, 30, 10) elif n == 2: my_network = NetworkImproved((784, 50, 10)) my_network.setAttribs(l2_norm = True) my_network.train(*training_data, 0.5, 30, 10) # Test network correct, tot = 0, len(test_data[0]) for data, label in zip(*test_data): h, ans = np.argmax(my_network.predict(data)), np.argmax(label) if h == ans: correct += 1 print('Correctness: %d/%d = %.2f%%' % (correct, tot, correct/tot*100)) # Store network in json file f = open('json.js', 'w')