def __init__(self): super(Encoder, self).__init__() self._self_attention = MixedMultiHeadAttention(num_heads=48, key_dim=64, local_scope=4, num_timesteps=128, num_features=3072, dropout=0.2) self._feed_forward = FeedForwardNetwork() self._layer_norm = LayerNormalization()
def __init__(self, vocab_size: int, hidden_dim: int, dropout_rate: float, *args, **kwargs): super().__init__(*args, **kwargs) self.vocab_size = vocab_size self.hidden_dim = hidden_dim attention_base_layer = Attention(depth=hidden_dim) ffn_base_layer = FeedForwardNetwork(hidden_dim=hidden_dim, dropout_rate=dropout_rate) self.attention = AddNormalizationWrapper(attention_base_layer, dropout_rate) self.ffn = AddNormalizationWrapper(ffn_base_layer, dropout_rate) self.output_normalization = LayerNormalization()
return vector training = False if training: print("Loading training data") training_images = read_idx("../hand_written_digits/train-images.idx3-ubyte") training_labels = read_idx("../hand_written_digits/train-labels.idx1-ubyte") print("Assigning labels to images") training_data = create_training_data(training_images, training_labels, 5) print("Done! Creating neural network") layers = [784, 392, 196, 98, 5, 98, 196, 392, 784] network = FeedForwardNetwork(layers) network.randomize(-1.0, 1.0) print("Start training over {} samples".format(len(training_data))) network.stochastic_gradient_descent(training_data, 1000, 100, 1.0) print("Done! Saving network") save_network(network, "trained_network/network_auto_encoder_1000e_100b_1.0lr.ffann") else: print("Loading network") network = load_network("trained_networks/network_auto_encoder_1000e_100b_1.0lr.ffann") encoder = network.sub_network(0, 5) decoder = network.sub_network(4, 9)
# Shuffle data instances np.random.shuffle(data_instances) for num_of_hidden_nodes in [i for i in range(0, 3, 1)]: print("Testing with %d hidden nodes" % num_of_hidden_nodes) data_indices = [idx for idx in range(data_instances.shape[0])] # 5-fold cross validation num_of_folds = 5 fold_size = (data_instances.shape[0]) / num_of_folds total_performance = 0.0 for holdout_fold_idx in range(num_of_folds): training_indices = np.array( np.setdiff1d( data_indices, data_indices[fold_size * holdout_fold_idx : \ fold_size * holdout_fold_idx + fold_size])) test_indices = np.array([i for i in range( fold_size * holdout_fold_idx, fold_size * holdout_fold_idx + fold_size)]) model = FeedForwardNetwork(learning_rates[test], data_instances.shape[1] - 1, 2, [tuned_first_layer_size[test], num_of_hidden_nodes], num_of_outputs[test]) # Train the model model.train(data_instances[training_indices]) # Test performance on test set predictions = model.predict(data_instances[test_indices, :-1]) total_performance += \ sum(predictions == data_instances[test_indices, -1]) / \ float(test_indices.shape[0]) print("Average overall classification rate: %f" % (total_performance / num_of_folds))
hidden_dim = input_shape[-1] self.scale = self.add_weight('layer_norm_scale', shape=[hidden_dim], initializer=tf.ones_initializer()) self.bias = self.add_weight('layer_norm_bias', [hidden_dim], initializer=tf.zeros_initializer()) super().build(input_shape) def call(self, x: tf.Tensor, epsilon: float = 1e-6) -> tf.Tensor: mean = tf.reduce_mean(x, axis=[-1], keepdims=True) variance = tf.reduce_mean(tf.square(x - mean), axis=[-1], keepdims=True) norm_x = (x - mean) * tf.math.rsqrt(variance + epsilon) return norm_x * self.scale + self.bias if __name__ == '__main__': from feed_forward_network import FeedForwardNetwork do_rate = 0.1 ffn = FeedForwardNetwork(hidden_dim=2, dropout_rate=do_rate) ffn_addnorm = AddNormalizationWrapper(ffn, do_rate) x = tf.constant([[[1., 2.], [3., 4.]], [[1., 3.], [2., 4.]]]) print(x) y = ffn_addnorm(x) print(y)
return vector training = False if training: print("Loading training data") training_images = read_idx("../hand_written_digits/train-images.idx3-ubyte") training_labels = read_idx("../hand_written_digits/train-labels.idx1-ubyte") print("Assigning labels to images") training_data = create_training_data(training_images, training_labels) print("Done! Creating neural network") layers = [784, 16, 16, 10] network = FeedForwardNetwork(layers) network.randomize(-1.0, 1.0) print("Start training over {} samples".format(len(training_data))) network.stochastic_gradient_descent(training_data, 1000, 100, 1.0) print("Done! Saving network") save_network(network, "trained_networks/network_predictor_1000e_100b_1.0lr.ffann") else: print("Loading network") network = load_network("trained_networks/network_predictor_1000e_100b_1.0lr.ffann") print("Loading testing data") testing_images = read_idx("../hand_written_digits/t10k-images.idx3-ubyte") testing_labels = read_idx("../hand_written_digits/t10k-labels.idx1-ubyte")
ENV_NAME = 'AdpCarsharing-v0' train_for = 5000 num_eps_tr_per_curve = 50 layer_sizes = [5, 20, 5] pop_size = 50 sigma = 0.03 learning_rate = 0.003 print("alpha/(n*sigma)=", learning_rate / (pop_size * sigma)) EPS_AVG = 1 decay = 1 num_threads = 1 weights_filename = 'weights/weights_' GAMMA = 1 agent = Agent(GAMMA, ENV_NAME, FeedForwardNetwork(layer_sizes), pop_size,\ sigma, learning_rate,EPS_AVG, decay, num_threads, weights_filename) # the pre-trained weights are saved into 'weights.pkl' which you can use. #t=agent.load_mod('weights/weights_array3_input_DP_policy_1000.pkl') #agent.load('weights_array3_input_DP_policy_100.pkl') #print(t) #agent.load('weights/weights_array0_input_xt_100000.pkl') #optimized_weights = agent.es.get_weights() #agent.model.set_weights(optimized_weights) ## play one episode #episodes_return_xt=agent.play(100)
print "Cross validation fold %d" % (holdout_fold_idx + 1) # training_indices = data_indices - holdout_fold indices training_indices = np.array( np.setdiff1d( data_indices, data_indices[fold_size * holdout_fold_idx : \ fold_size * holdout_fold_idx + fold_size])) # test_indices = holdout_fold indices test_indices = np.array([ i for i in xrange(fold_size * holdout_fold_idx, fold_size * holdout_fold_idx + fold_size) ]) model = FeedForwardNetwork(learning_rates[test], data_instances.shape[1] - 1, 0, [], num_of_outputs[test]) # Train the model model.train(data_instances[training_indices]) print "Learned model: %s" % str(model) # Test performance on test set predictions = model.predict(data_instances[test_indices, :-1]) for inst, actual, predicted in zip( data_instances[test_indices].tolist(), data_instances[test_indices, -1], predictions): print "For instance %s, the model predicted %s and the actual label was %s" % ( inst, predicted, actual) total_performance += \ sum(predictions == data_instances[test_indices, -1]) / \ float(test_indices.shape[0]) print "Average overall classification rate: %f" % (total_performance /