Exemple #1
0
 def _feed_forward_builder(x):
     return FeedForward(
         units=hidden_dim,
         activation=activation,
         trainable=trainable,
         name=name,
     )(x)
Exemple #2
0
 def __init__(self, hidden_size, output_size, batch_size=96, drop_out=0.3):
     super(TransDecoder, self).__init__()
     self.hidden_size = hidden_size
     self.batch_size = batch_size
     self.length = 1
     self.hopping = 6
     
     self.embedding = nn.Embedding(output_size, hidden_size, padding_idx=0)
     self.PosEnc = PosEnc()
     self.drop = nn.Dropout(p=drop_out)
     self.out = nn.Linear(hidden_size, output_size)  # bias=False
     
     # Weight Sharing
     self.out.weight = self.embedding.weight
     
     self.layer = nn.ModuleList([])
     for i in range(self.hopping):
         Attention = MultiAttention(hidden_size, batch_size, drop_out).to(device)
         MaskAttention = MaskedMultiAttention(hidden_size, batch_size, drop_out).to(device)
         FFN = FeedForward(hidden_size, hidden_size*4, hidden_size, drop_out).to(device)
         layer = DecoderLayer(hidden_size, MaskAttention, Attention, FFN, drop_out).to(device)
         self.layer.append(layer)
     
     self.norm = nn.LayerNorm(self.hidden_size).to(device)
     #self.norm = LayerNorm(hidden_size)
         
     self.softmax = nn.LogSoftmax(dim=2)
 def __init__(self, d_model, d_ff, dropout_rate):
     super(TransEncoder, self).__init__()
     self.multi_head_attn = MultiHeadAttn(d_model, num_heads=8)
     self.dropout1 = nn.Dropout(dropout_rate)
     self.norm1 = LayerNorm(d_model)
     self.feed_forward = FeedForward(d_model, d_ff)
     self.dropout2 = nn.Dropout(dropout_rate)
     self.norm2 = LayerNorm(d_model)
    def __init__(self, d_model, dropout=0.1):
        super(TransformerBlock, self).__init__()

        self.norm_1 = nn.LayerNorm(d_model)
        self.norm_2 = nn.LayerNorm(d_model)

        self.attn = MultiHeadAttention(d_model, 3)

        self.ff = FeedForward(d_model)

        self.dropout_1 = nn.Dropout(dropout)
        self.dropout_2 = nn.Dropout(dropout)
Exemple #5
0
def _wrap_layer(name,
                input_layer,
                build_func,
                dropout_rate=0.0,
                trainable=True,
                use_adapter=False,
                adapter_units=None,
                adapter_activation='relu'):
    """Wrap layers with residual, normalization and dropout.

    :param name: Prefix of names for internal layers.
    :param input_layer: Input layer.
    :param build_func: A callable that takes the input tensor and generates the output tensor.
    :param dropout_rate: Dropout rate.
    :param trainable: Whether the layers are trainable.
    :param use_adapter: Whether to use feed-forward adapters before each residual connections.
    :param adapter_units: The dimension of the first transformation in feed-forward adapter.
    :param adapter_activation: The activation after the first transformation in feed-forward adapter.
    :return: Output layer.
    """
    build_output = build_func(input_layer)
    if dropout_rate > 0.0:
        dropout_layer = keras.layers.Dropout(
            rate=dropout_rate,
            name='%s-Dropout' % name,
        )(build_output)
    else:
        dropout_layer = build_output
    if isinstance(input_layer, list):
        input_layer = input_layer[0]
    if use_adapter:
        adapter = FeedForward(
            units=adapter_units,
            activation=adapter_activation,
            kernel_initializer=keras.initializers.TruncatedNormal(
                mean=0.0, stddev=0.001),
            name='%s-Adapter' % name,
        )(dropout_layer)
        dropout_layer = keras.layers.Add(name='%s-Adapter-Add' %
                                         name)([dropout_layer, adapter])
    add_layer = keras.layers.Add(name='%s-Add' %
                                 name)([input_layer, dropout_layer])
    normal_layer = LayerNormalization(
        trainable=trainable,
        name='%s-Norm' % name,
    )(add_layer)
    return normal_layer
    def __init__(self, input_size, hidden_size, batch_size=96, drop_out=0.3):
        super(TransEncoder, self).__init__()
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.length = 1
        self.hopping = 6

        self.embedding = nn.Embedding(input_size, hidden_size, padding_idx=0)
        self.PosEnc = PosEnc()
        self.drop = nn.Dropout(p=drop_out)

        self.layer = nn.ModuleList([])
        for i in range(self.hopping):
            SelfAttention = MultiAttention(hidden_size, batch_size,
                                           drop_out).to(device)
            FFN = FeedForward(hidden_size, hidden_size * 4, hidden_size,
                              drop_out).to(device)
            layer = EncoderLayer(hidden_size, SelfAttention, FFN,
                                 drop_out).to(device)
            self.layer.append(layer)

        self.norm = nn.LayerNorm(self.hidden_size).to(device)
Exemple #7
0
def run(dataset, arhitecture, learning_rate, eval_every, stop):
	input_size = dataset["train_imgs"][0].shape
	nn = FeedForward(input_size, arhitecture)
	print(nn.to_string())

	return train_nn(nn, dataset, learning_rate, eval_every, stop)
Exemple #8
0
	return (inputTrain, outputTrain, inputTest, outputTest)

def run(dataset, arhitecture, learning_rate, eval_every, stop):
	input_size = dataset["train_imgs"][0].shape
	nn = FeedForward(input_size, arhitecture)
	print(nn.to_string())

	return train_nn(nn, dataset, learning_rate, eval_every, stop)


if __name__ == "__main__":
	parser = ArgumentParser()
	parser.add_argument("--learning_rate", type = float, default = 0.001,
						help="Learning rate")
	parser.add_argument("--eval_every", type = int, default = 2000,
						help="Learning rate")
	args = parser.parse_args()
	
	#dataset = load_mnist()
	dataset = load_cifrar()

	input_size = dataset["train_imgs"][0].shape

	nn = FeedForward(input_size, [(CONV, (6, 28, 28), 5, 1), (RELU, -1), (MAX_POOLING, (6, 14, 14)), (CONV, (16, 10, 10), 5, 1), (RELU, -1), (MAX_POOLING, (16, 5, 5)), 
		(LINEARIZE, -1), (FULLY_CONNECTED, 120), (FULLY_CONNECTED, 84), (FULLY_CONNECTED, 10) ,(SOFTMAX, -1)])
	#nn = FeedForward(input_size, [(LINEARIZE, -1), (FULLY_CONNECTED, 300), (TANH, -1), (FULLY_CONNECTED, 100), (TANH, -1), (FULLY_CONNECTED, 10), (SOFTMAX, -1)])
	print(nn.to_string())

	train_nn(nn, dataset, args.learning_rate, args.eval_every, 10000)
Exemple #9
0
    args = parser.parse_args()

    input_size = (32, 32, 3)

    dataset = import_first_dataset(
    ) if args.dataset == 1 else import_second_dataset()

    # Liniarize I
    # nn = FeedForward([LinearizeLayer(3, 32, 32), FullyConnected(3*32*32, 300, identity), Tanh(), FullyConnected(300, 10, identity), SoftMax()])

    # Liniarize II
    nn = FeedForward([
        LinearizeLayer(32, 32, 3),
        FullyConnected(32 * 32 * 3, 300, identity),
        Tanh(),
        FullyConnected(300, 200, identity),
        Tanh(),
        FullyConnected(200, 10, identity),
        SoftMax()
    ])

    # # Convolutional I
    nn = FeedForward([
        ConvolutionalLayer(3, 32, 32, 6, 5, 1),
        MaxPoolingLayer(2),
        ReluLayer(),
        ConvolutionalLayer(6, 14, 14, 16, 5, 1),
        MaxPoolingLayer(2),
        ReluLayer(),
        LinearizeLayer(16, 5, 5),
        FullyConnected(400, 300, relu),
Exemple #10
0
        nn.update_parameters(args.learning_rate) 

        # Evaluate the network
        if cnt % args.eval_every == 0:
            test_acc, test_cm = \
                eval_nn(nn, data["test_imgs"], data["test_labels"])
            train_acc, train_cm = \
                eval_nn(nn, data["train_imgs"], data["train_labels"], 5000)
            print("Train acc: %2.6f ; Test acc: %2.6f" % (train_acc, test_acc))
            pylab.imshow(test_cm)
            pylab.draw()

            matplotlib.pyplot.pause(0.001)

if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("--learning_rate", type = float, default = 0.001,
                        help="Learning rate")
    parser.add_argument("--eval_every", type = int, default = 200,
                        help="Learning rate")
    args = parser.parse_args()


    mnist = load_mnist()
    input_size = mnist["train_imgs"][0].size
    print input_size
    nn = FeedForward(input_size, [(300, logistic), (10, identity)])
    print(nn.to_string())

    train_nn(nn, mnist, args)
Exemple #11
0
    # nn = FeedForward([ConvolutionalLayer(3, 32, 32, 6, 5, 1), MaxPoolingLayer(2), ConvolutionalLayer(6, 14, 14, 16, 5, 1), MaxPoolingLayer(2), ConvolutionalLayer(16, 5, 5, 120, 5, 1), LinearizeLayer(120, 1, 1), FcLayer(120, 84, identity), FcLayer(84, 10, identity), SoftMaxLayer()])
    
    # CFK
    # data = load_cfk()
    # nn = FeedForward([LinearizeLayer(3, 32, 32), FcLayer(3072, 300, identity), TanHLayer(), FcLayer(300, 62, identity), SoftMaxLayer()])
    # nn = FeedForward([LinearizeLayer(3, 32, 32), FcLayer(3072, 300, identity), TanHLayer(), FcLayer(300, 100, identity), TanHLayer(), FcLayer(100, 62, identity), SoftMaxLayer()])
    # nn = FeedForward([LinearizeLayer(3, 32, 32), FcLayer(3072, 600, identity), TanHLayer(), FcLayer(600, 62, identity), SoftMaxLayer()])
    # train_nn(nn, data, args, 62)

    #CIFAR
    # data = load_cifar()
    # nn = FeedForward([LinearizeLayer(3, 32, 32), FcLayer(3072, 300, identity), TanHLayer(), FcLayer(300, 10, identity), SoftMaxLayer()])
    # nn = FeedForward([LinearizeLayer(3, 32, 32), FcLayer(3072, 600, identity), TanHLayer(), FcLayer(600, 400, identity), TanHLayer(), FcLayer(400, 100, identity), TanHLayer(), FcLayer(100, 10, identity), SoftMaxLayer()])
    # train_nn(nn, data, args, 10)

    #CONV CFK
    data = load_cfk()
    # nn = FeedForward([ConvolutionalLayer(3, 32, 32, 6, 5, 1), MaxPoolingLayer(2), ConvolutionalLayer(6, 14, 14, 16, 5, 1), MaxPoolingLayer(2), ConvolutionalLayer(16, 5, 5, 120, 5, 1), LinearizeLayer(120, 1, 1), FcLayer(120, 84, identity), FcLayer(84, 62, identity), SoftMaxLayer()])    
    # nn = FeedForward([ConvolutionalLayer(3, 32, 32, 6, 5, 1), MaxPoolingLayer(2), ReluLayer(), ConvolutionalLayer(6, 14, 14, 16, 5, 1), MaxPoolingLayer(2), ReluLayer(), LinearizeLayer(16, 5, 5), FcLayer(400, 300, identity), TanHLayer(), FcLayer(300, 62, identity), SoftMaxLayer()])
    # nn = FeedForward([ConvolutionalLayer(3, 32, 32, 6, 5, 1), MaxPoolingLayer(2), ReluLayer(), ConvolutionalLayer(6, 14, 14, 16, 5, 1), ReluLayer(), ConvolutionalLayer(16, 10, 10, 25, 3, 1), ReluLayer(),  ConvolutionalLayer(25, 8, 8, 40, 3, 1), ReluLayer(), MaxPoolingLayer(2), 
        # LinearizeLayer(40, 3, 3), FcLayer(360, 84, identity), TanHLayer(), FcLayer(84, 62, identity), SoftMaxLayer()])
    # nn = FeedForward([ConvolutionalLayer(3, 32, 32, 6, 5, 1), ReluLayer(), ConvolutionalLayer(6, 28, 28, 16, 5, 1), ReluLayer(), ConvolutionalLayer(16, 24, 24, 25, 3, 1), ReluLayer(),  ConvolutionalLayer(25, 22, 22, 40, 3, 1), ReluLayer(), MaxPoolingLayer(2), 
        # LinearizeLayer(40, 10, 10), FcLayer(4000, 1000, identity), ReluLayer(),  FcLayer(1000, 300, identity), ReluLayer(), FcLayer(300, 62, identity), SoftMaxLayer()])
    nn = FeedForward([ConvolutionalLayer(3, 32, 32, 20, 5, 1), ReluLayer(), ConvolutionalLayer(20, 28, 28, 20, 5, 1), ReluLayer(), ConvolutionalLayer(20, 24, 24, 50, 3, 1), ReluLayer(),  ConvolutionalLayer(50, 22, 22, 30, 3, 1), ReluLayer(), MaxPoolingLayer(2), 
        LinearizeLayer(30, 10, 10), FcLayer(3000, 1000, identity), ReluLayer(),  FcLayer(1000, 300, identity), ReluLayer(), FcLayer(300, 62, identity), SoftMaxLayer()])
    train_nn(nn, data, args, 62)

    # CONV CIFAR
    # data = load_cifar()
    # nn = FeedForward([ConvolutionalLayer(3, 32, 32, 20, 5, 1), MaxPoolingLayer(2), ConvolutionalLayer(20, 14, 14, 25, 5, 1), MaxPoolingLayer(2), ConvolutionalLayer(25, 5, 5, 100, 5, 1), LinearizeLayer(100, 1, 1), FcLayer(100, 84, identity), FcLayer(84, 10, identity), SoftMaxLayer()])
    # train_nn(nn, data, args, 10)
def _train_ff_network(
        hyperparameter_dict: dict,
        data: SignalData) -> Tuple[FeedForward, List, List, List, List]:
    """Trains a feed-forward network using the specified hyperparameters.
    """
    # Ensure reproducibility by giving PyTorch the same seed every time we train.
    torch.manual_seed(1)

    # Print hyperparameters.
    print(f'Hyperparameters: {hyperparameter_dict}')

    # Get hyperparameters.
    learning_rate = hyperparameter_dict['learning_rate']
    batch_size = hyperparameter_dict['batch_size']
    optimizer_str = hyperparameter_dict['optimizer']

    # There are 6 labels, and Pytorch expects them to go from 0 to 5.
    full_train_labels = data.train_labels - 1

    # Get generators.
    signal_dataset = SignalDataset(data.train_signals, full_train_labels)
    (training_generator,
     validation_generator) = utils_nn.get_trainval_generators(
         signal_dataset, batch_size, num_workers=0, training_fraction=0.8)

    # Crete feed forward network.
    input_size = data.num_timesteps * data.num_components
    feed_forward = FeedForward(input_size, input_size,
                               data.num_activity_labels)
    print(feed_forward)

    # Parameters should be moved to GPU before constructing the optimizer.
    device = torch.device('cuda:0' if USE_CUDA else 'cpu')
    feed_forward = feed_forward.to(device)

    # Get optimizer.
    optimizer = None
    if optimizer_str == 'adam':
        optimizer = torch.optim.Adam(feed_forward.parameters(),
                                     lr=learning_rate)
    elif optimizer_str == 'sgd':
        optimizer = torch.optim.SGD(feed_forward.parameters(),
                                    lr=learning_rate)
    else:
        raise Exception(f'Specified optimizer not valid: {optimizer_str}')

    training_accuracy_list = []
    training_loss_list = []
    validation_accuracy_list = []
    validation_loss_list = []
    max_epochs = 10
    for epoch in range(max_epochs):
        print(f'Epoch {epoch}')

        # Training data.
        (training_accuracy,
         training_loss) = utils_nn.fit(feed_forward, training_generator,
                                       optimizer, USE_CUDA)
        training_accuracy_list.append(training_accuracy)
        training_loss_list.append(training_loss)

        # Validation data.
        (validation_accuracy,
         validation_loss) = utils_nn.evaluate(feed_forward,
                                              validation_generator,
                                              'Validation', USE_CUDA)
        validation_accuracy_list.append(validation_accuracy)
        validation_loss_list.append(validation_loss)

    return (feed_forward, training_accuracy_list, training_loss_list,
            validation_accuracy_list, validation_loss_list)
Exemple #13
0
        # Evaluate the network
        if cnt % args.eval_every == 0:
            test_acc, test_cm = \
                eval_nn(nn, data["test_imgs"], data["test_labels"])
            train_acc, train_cm = \
                eval_nn(nn, data["train_imgs"], data["train_labels"], 5000)
            print("Train acc: %2.6f ; Test acc: %2.6f" % (train_acc, test_acc))
            pylab.imshow(test_cm)
            pylab.draw()

            matplotlib.pyplot.pause(0.001)

if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("--learning_rate", type = float, default = 0.001,
                        help="Learning rate")
    parser.add_argument("--eval_every", type = int, default = 200,
                        help="Learning rate")
    args = parser.parse_args()

    mnist = load_mnist()
    input_size = mnist["train_imgs"][0].size

    # TODO 5
    nn = FeedForward([Layer(input_size, 300, logistic), Layer(300, 10, identity)])
    # nn = FeedForward([LinearizeLayerReverse(1, 28, 28), ConvolutionalLayer(1, 28, 28, 16, 5, 1), MaxPoolingLayer(2), ReluLayer(), ConvolutionalLayer(16, 12, 12, 16, 5, 1), MaxPoolingLayer(2), ReluLayer(), LinearizeLayer(16, 4, 4), Layer(256, 10, identity)])
    print(nn.to_string())

    train_nn(nn, mnist, args)
Exemple #14
0
import minst_data as minst
from feed_forward import FeedForward

# Loading the data from mnist file.
# TODO: Include in the class?
PATH = "data/mnist_data.pkl.gz"
data = minst.MnistData(PATH)

print ">>> Unpacking data."

# Converting data into a shape that can be used by the network.
training_data = data.get_training_data()
test_data = data.get_test_data()
validation_data = data.get_validation_data()

# Creating standard neural network.
net = FeedForward([784, 30, 10])

# Load a previously trained network.
# This currently just loads the weights and biases without actually checking
# if they are the right weights.
# net.load_state()

# Training for 10 epochs with 3.0 learning rate.
net.train(training_data, 10, 3.0, test_data)

# Saving the state so it can be used later on.
net.save_state()


Exemple #15
0
    parser = ArgumentParser()
    parser.add_argument("--learning_rate",
                        type=float,
                        default=0.001,
                        help="Learning rate")
    parser.add_argument("--eval_every",
                        type=int,
                        default=50,
                        help="Learning rate")
    args = parser.parse_args()

    mnist = load_mnist()
    input_size = mnist["train_imgs"][0].size

    # TODO 5
    #    nn = FeedForward([Layer(input_size, 300, logistic), Layer(300, 10, identity)])
    nn = FeedForward([
        LinearizeLayerReverse(1, 28, 28),
        ConvolutionalLayer(1, 28, 28, 16, 5, 1),
        MaxPoolingLayer(2),
        ReluLayer(),
        ConvolutionalLayer(16, 12, 12, 16, 5, 1),
        MaxPoolingLayer(2),
        ReluLayer(),
        LinearizeLayer(16, 4, 4),
        Layer(256, 10, identity)
    ])
    print(nn.to_string())

    train_nn(nn, mnist, args)