def build_model(X, p_drop): l1 = nnbuilder.denseLayer(X, 256, activation=nn.rectify, w_init=nn.XavierNormal(root_two)) d1 = nnbuilder.dropoutLayer(l1, p_drop) l2 = nnbuilder.denseLayer(d1, 256, activation=nn.rectify, w_init=nn.XavierNormal(root_two)) d2 = nnbuilder.dropoutLayer(l2, p_drop) out = nnbuilder.denseLayer(d2, 10, activation=nn.softmax) return out
def build_nn(nn_input): d1 = nnbuilder.denseLayer(nn_input, 100, w_init=nn.XavierNormal()) drop = nn.dropout(d1, 0.5) d2 = nnbuilder.denseLayer(drop, 50, w_init=nn.XavierNormal()) drop2 = nn.dropout(d2, 0.5) d3 = nnbuilder.denseLayer(drop2, 25, w_init=nn.XavierNormal()) drop3 = nn.dropout(d3, 0.5) d3 = nnbuilder.denseLayer(drop3, 2, activation=nn.softmax) return d3
def main(num_epochs=NUM_EPOCHS): #cgt.set_precision('half') print("Building network ...") # Recurrent layers expect input of shape # (batch size, max sequence length, number of features) X = cgt.tensor3(name='X', fixed_shape=(N_BATCH, MAX_LENGTH, 2)) l_forward = nnbuilder.recurrentLayer(nn_input=X, num_units=N_HIDDEN) l_backward = nnbuilder.recurrentLayer(nn_input=X, num_units=N_HIDDEN, backwards=True) #l_forward = nnbuilder.LSTMLayer(nn_input=X, num_units=N_HIDDEN, activation=cgt.sigmoid) #l_backward = nnbuilder.LSTMLayer(nn_input=X, num_units=N_HIDDEN, activation=cgt.sigmoid, backwards=True) #l_forward = nnbuilder.GRULayer(nn_input=X, num_units=N_HIDDEN, activation=nn.rectify) #l_backward = nnbuilder.GRULayer(nn_input=X, num_units=N_HIDDEN, activation=nn.rectify, backwards=True) l_forward_slice = l_forward[:, MAX_LENGTH-1, :] # Take the last element in the forward slice time dimension l_backward_slice = l_backward[:, 0, :] # And the first element in the backward slice time dimension l_sum = cgt.concatenate([l_forward_slice, l_backward_slice], axis=1) l_out = nnbuilder.denseLayer(l_sum, num_units=1, activation=cgt.tanh) target_values = cgt.vector('target_output') predicted_values = l_out[:, 0] # For this task we only need the last value cost = cgt.mean((predicted_values - target_values)**2) # Compute SGD updates for training print("Computing updates ...") updates = nn.rmsprop(cost, nn.get_parameters(l_out), LEARNING_RATE) #updates = nn.nesterov_momentum(cost, nn.get_parameters(l_out), 0.05) # cgt functions for training and computing cost print("Compiling functions ...") train = cgt.function([X, target_values], cost, updates=updates) compute_cost = cgt.function([X, target_values], cost) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val = gen_data() print("Training ...") time_start = time.time() try: for epoch in range(num_epochs): for _ in range(EPOCH_SIZE): X, y, m = gen_data() train(X, y) cost_val = compute_cost(X_val, y_val) print("Epoch {} validation cost = {}".format(epoch+1, cost_val)) print ('Epoch took ' + str(time.time() - time_start)) time_start = time.time() except KeyboardInterrupt: pass