Beispiel #1
0
 def __init__(self):
     super(Encoder, self).__init__()
     self._self_attention = MixedMultiHeadAttention(num_heads=48,
                                                    key_dim=64,
                                                    local_scope=4,
                                                    num_timesteps=128,
                                                    num_features=3072,
                                                    dropout=0.2)
     self._feed_forward = FeedForwardNetwork()
     self._layer_norm = LayerNormalization()
    def __init__(self, vocab_size: int, hidden_dim: int, dropout_rate: float,
                 *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.vocab_size = vocab_size
        self.hidden_dim = hidden_dim

        attention_base_layer = Attention(depth=hidden_dim)
        ffn_base_layer = FeedForwardNetwork(hidden_dim=hidden_dim,
                                            dropout_rate=dropout_rate)

        self.attention = AddNormalizationWrapper(attention_base_layer,
                                                 dropout_rate)
        self.ffn = AddNormalizationWrapper(ffn_base_layer, dropout_rate)
        self.output_normalization = LayerNormalization()
Beispiel #3
0
    return vector


training = False

if training:
    print("Loading training data")
    training_images = read_idx("../hand_written_digits/train-images.idx3-ubyte")
    training_labels = read_idx("../hand_written_digits/train-labels.idx1-ubyte")

    print("Assigning labels to images")
    training_data = create_training_data(training_images, training_labels, 5)

    print("Done! Creating neural network")
    layers = [784, 392, 196, 98, 5, 98, 196, 392, 784]
    network = FeedForwardNetwork(layers)
    network.randomize(-1.0, 1.0)

    print("Start training over {} samples".format(len(training_data)))
    network.stochastic_gradient_descent(training_data, 1000, 100, 1.0)

    print("Done! Saving network")
    save_network(network, "trained_network/network_auto_encoder_1000e_100b_1.0lr.ffann")

else:
    print("Loading network")
    network = load_network("trained_networks/network_auto_encoder_1000e_100b_1.0lr.ffann")

    encoder = network.sub_network(0, 5)
    decoder = network.sub_network(4, 9)
Beispiel #4
0
    # Shuffle data instances
    np.random.shuffle(data_instances)

    for num_of_hidden_nodes in [i for i in range(0, 3, 1)]:
        print("Testing with %d hidden nodes" % num_of_hidden_nodes)
        data_indices = [idx for idx in range(data_instances.shape[0])]
        # 5-fold cross validation
        num_of_folds = 5
        fold_size = (data_instances.shape[0]) / num_of_folds
        total_performance = 0.0
        for holdout_fold_idx in range(num_of_folds):
            training_indices = np.array(
                np.setdiff1d(
                    data_indices, 
                    data_indices[fold_size * holdout_fold_idx : \
                                 fold_size * holdout_fold_idx + fold_size]))
            test_indices = np.array([i for i in range(
                fold_size * holdout_fold_idx, fold_size * holdout_fold_idx + fold_size)])
    
            model = FeedForwardNetwork(learning_rates[test], 
                data_instances.shape[1] - 1, 2, [tuned_first_layer_size[test], num_of_hidden_nodes], num_of_outputs[test])
            # Train the model
            model.train(data_instances[training_indices])

            # Test performance on test set
            predictions = model.predict(data_instances[test_indices, :-1])
            total_performance += \
                sum(predictions == data_instances[test_indices, -1]) / \
                float(test_indices.shape[0])
        print("Average overall classification rate: %f" % (total_performance / num_of_folds))
        hidden_dim = input_shape[-1]
        self.scale = self.add_weight('layer_norm_scale',
                                     shape=[hidden_dim],
                                     initializer=tf.ones_initializer())
        self.bias = self.add_weight('layer_norm_bias', [hidden_dim],
                                    initializer=tf.zeros_initializer())
        super().build(input_shape)

    def call(self, x: tf.Tensor, epsilon: float = 1e-6) -> tf.Tensor:
        mean = tf.reduce_mean(x, axis=[-1], keepdims=True)
        variance = tf.reduce_mean(tf.square(x - mean),
                                  axis=[-1],
                                  keepdims=True)
        norm_x = (x - mean) * tf.math.rsqrt(variance + epsilon)

        return norm_x * self.scale + self.bias


if __name__ == '__main__':
    from feed_forward_network import FeedForwardNetwork

    do_rate = 0.1

    ffn = FeedForwardNetwork(hidden_dim=2, dropout_rate=do_rate)
    ffn_addnorm = AddNormalizationWrapper(ffn, do_rate)

    x = tf.constant([[[1., 2.], [3., 4.]], [[1., 3.], [2., 4.]]])
    print(x)
    y = ffn_addnorm(x)
    print(y)
Beispiel #6
0
    return vector


training = False

if training:
    print("Loading training data")
    training_images = read_idx("../hand_written_digits/train-images.idx3-ubyte")
    training_labels = read_idx("../hand_written_digits/train-labels.idx1-ubyte")

    print("Assigning labels to images")
    training_data = create_training_data(training_images, training_labels)

    print("Done! Creating neural network")
    layers = [784, 16, 16, 10]
    network = FeedForwardNetwork(layers)
    network.randomize(-1.0, 1.0)

    print("Start training over {} samples".format(len(training_data)))
    network.stochastic_gradient_descent(training_data, 1000, 100, 1.0)

    print("Done! Saving network")
    save_network(network, "trained_networks/network_predictor_1000e_100b_1.0lr.ffann")

else:
    print("Loading network")
    network = load_network("trained_networks/network_predictor_1000e_100b_1.0lr.ffann")

    print("Loading testing data")
    testing_images = read_idx("../hand_written_digits/t10k-images.idx3-ubyte")
    testing_labels = read_idx("../hand_written_digits/t10k-labels.idx1-ubyte")
ENV_NAME = 'AdpCarsharing-v0'
train_for = 5000
num_eps_tr_per_curve = 50
layer_sizes = [5, 20, 5]
pop_size = 50
sigma = 0.03
learning_rate = 0.003
print("alpha/(n*sigma)=", learning_rate / (pop_size * sigma))
EPS_AVG = 1
decay = 1
num_threads = 1
weights_filename = 'weights/weights_'
GAMMA = 1

agent = Agent(GAMMA, ENV_NAME, FeedForwardNetwork(layer_sizes), pop_size,\
              sigma, learning_rate,EPS_AVG, decay, num_threads, weights_filename)

# the pre-trained weights are saved into 'weights.pkl' which you can use.

#t=agent.load_mod('weights/weights_array3_input_DP_policy_1000.pkl')
#agent.load('weights_array3_input_DP_policy_100.pkl')

#print(t)
#agent.load('weights/weights_array0_input_xt_100000.pkl')

#optimized_weights = agent.es.get_weights()
#agent.model.set_weights(optimized_weights)

## play one episode
#episodes_return_xt=agent.play(100)
        print "Cross validation fold %d" % (holdout_fold_idx + 1)
        # training_indices = data_indices - holdout_fold indices
        training_indices = np.array(
            np.setdiff1d(
                data_indices,
                data_indices[fold_size * holdout_fold_idx : \
                             fold_size * holdout_fold_idx + fold_size]))
        # test_indices = holdout_fold indices
        test_indices = np.array([
            i for i in xrange(fold_size *
                              holdout_fold_idx, fold_size * holdout_fold_idx +
                              fold_size)
        ])

        model = FeedForwardNetwork(learning_rates[test],
                                   data_instances.shape[1] - 1, 0, [],
                                   num_of_outputs[test])
        # Train the model
        model.train(data_instances[training_indices])
        print "Learned model: %s" % str(model)
        # Test performance on test set
        predictions = model.predict(data_instances[test_indices, :-1])
        for inst, actual, predicted in zip(
                data_instances[test_indices].tolist(),
                data_instances[test_indices, -1], predictions):
            print "For instance %s, the model predicted %s and the actual label was %s" % (
                inst, predicted, actual)
        total_performance += \
            sum(predictions == data_instances[test_indices, -1]) / \
            float(test_indices.shape[0])
    print "Average overall classification rate: %f" % (total_performance /