def _feed_forward_builder(x): return FeedForward( units=hidden_dim, activation=activation, trainable=trainable, name=name, )(x)
def __init__(self, hidden_size, output_size, batch_size=96, drop_out=0.3): super(TransDecoder, self).__init__() self.hidden_size = hidden_size self.batch_size = batch_size self.length = 1 self.hopping = 6 self.embedding = nn.Embedding(output_size, hidden_size, padding_idx=0) self.PosEnc = PosEnc() self.drop = nn.Dropout(p=drop_out) self.out = nn.Linear(hidden_size, output_size) # bias=False # Weight Sharing self.out.weight = self.embedding.weight self.layer = nn.ModuleList([]) for i in range(self.hopping): Attention = MultiAttention(hidden_size, batch_size, drop_out).to(device) MaskAttention = MaskedMultiAttention(hidden_size, batch_size, drop_out).to(device) FFN = FeedForward(hidden_size, hidden_size*4, hidden_size, drop_out).to(device) layer = DecoderLayer(hidden_size, MaskAttention, Attention, FFN, drop_out).to(device) self.layer.append(layer) self.norm = nn.LayerNorm(self.hidden_size).to(device) #self.norm = LayerNorm(hidden_size) self.softmax = nn.LogSoftmax(dim=2)
def __init__(self, d_model, d_ff, dropout_rate): super(TransEncoder, self).__init__() self.multi_head_attn = MultiHeadAttn(d_model, num_heads=8) self.dropout1 = nn.Dropout(dropout_rate) self.norm1 = LayerNorm(d_model) self.feed_forward = FeedForward(d_model, d_ff) self.dropout2 = nn.Dropout(dropout_rate) self.norm2 = LayerNorm(d_model)
def __init__(self, d_model, dropout=0.1): super(TransformerBlock, self).__init__() self.norm_1 = nn.LayerNorm(d_model) self.norm_2 = nn.LayerNorm(d_model) self.attn = MultiHeadAttention(d_model, 3) self.ff = FeedForward(d_model) self.dropout_1 = nn.Dropout(dropout) self.dropout_2 = nn.Dropout(dropout)
def _wrap_layer(name, input_layer, build_func, dropout_rate=0.0, trainable=True, use_adapter=False, adapter_units=None, adapter_activation='relu'): """Wrap layers with residual, normalization and dropout. :param name: Prefix of names for internal layers. :param input_layer: Input layer. :param build_func: A callable that takes the input tensor and generates the output tensor. :param dropout_rate: Dropout rate. :param trainable: Whether the layers are trainable. :param use_adapter: Whether to use feed-forward adapters before each residual connections. :param adapter_units: The dimension of the first transformation in feed-forward adapter. :param adapter_activation: The activation after the first transformation in feed-forward adapter. :return: Output layer. """ build_output = build_func(input_layer) if dropout_rate > 0.0: dropout_layer = keras.layers.Dropout( rate=dropout_rate, name='%s-Dropout' % name, )(build_output) else: dropout_layer = build_output if isinstance(input_layer, list): input_layer = input_layer[0] if use_adapter: adapter = FeedForward( units=adapter_units, activation=adapter_activation, kernel_initializer=keras.initializers.TruncatedNormal( mean=0.0, stddev=0.001), name='%s-Adapter' % name, )(dropout_layer) dropout_layer = keras.layers.Add(name='%s-Adapter-Add' % name)([dropout_layer, adapter]) add_layer = keras.layers.Add(name='%s-Add' % name)([input_layer, dropout_layer]) normal_layer = LayerNormalization( trainable=trainable, name='%s-Norm' % name, )(add_layer) return normal_layer
def __init__(self, input_size, hidden_size, batch_size=96, drop_out=0.3): super(TransEncoder, self).__init__() self.hidden_size = hidden_size self.batch_size = batch_size self.length = 1 self.hopping = 6 self.embedding = nn.Embedding(input_size, hidden_size, padding_idx=0) self.PosEnc = PosEnc() self.drop = nn.Dropout(p=drop_out) self.layer = nn.ModuleList([]) for i in range(self.hopping): SelfAttention = MultiAttention(hidden_size, batch_size, drop_out).to(device) FFN = FeedForward(hidden_size, hidden_size * 4, hidden_size, drop_out).to(device) layer = EncoderLayer(hidden_size, SelfAttention, FFN, drop_out).to(device) self.layer.append(layer) self.norm = nn.LayerNorm(self.hidden_size).to(device)
def run(dataset, arhitecture, learning_rate, eval_every, stop): input_size = dataset["train_imgs"][0].shape nn = FeedForward(input_size, arhitecture) print(nn.to_string()) return train_nn(nn, dataset, learning_rate, eval_every, stop)
return (inputTrain, outputTrain, inputTest, outputTest) def run(dataset, arhitecture, learning_rate, eval_every, stop): input_size = dataset["train_imgs"][0].shape nn = FeedForward(input_size, arhitecture) print(nn.to_string()) return train_nn(nn, dataset, learning_rate, eval_every, stop) if __name__ == "__main__": parser = ArgumentParser() parser.add_argument("--learning_rate", type = float, default = 0.001, help="Learning rate") parser.add_argument("--eval_every", type = int, default = 2000, help="Learning rate") args = parser.parse_args() #dataset = load_mnist() dataset = load_cifrar() input_size = dataset["train_imgs"][0].shape nn = FeedForward(input_size, [(CONV, (6, 28, 28), 5, 1), (RELU, -1), (MAX_POOLING, (6, 14, 14)), (CONV, (16, 10, 10), 5, 1), (RELU, -1), (MAX_POOLING, (16, 5, 5)), (LINEARIZE, -1), (FULLY_CONNECTED, 120), (FULLY_CONNECTED, 84), (FULLY_CONNECTED, 10) ,(SOFTMAX, -1)]) #nn = FeedForward(input_size, [(LINEARIZE, -1), (FULLY_CONNECTED, 300), (TANH, -1), (FULLY_CONNECTED, 100), (TANH, -1), (FULLY_CONNECTED, 10), (SOFTMAX, -1)]) print(nn.to_string()) train_nn(nn, dataset, args.learning_rate, args.eval_every, 10000)
args = parser.parse_args() input_size = (32, 32, 3) dataset = import_first_dataset( ) if args.dataset == 1 else import_second_dataset() # Liniarize I # nn = FeedForward([LinearizeLayer(3, 32, 32), FullyConnected(3*32*32, 300, identity), Tanh(), FullyConnected(300, 10, identity), SoftMax()]) # Liniarize II nn = FeedForward([ LinearizeLayer(32, 32, 3), FullyConnected(32 * 32 * 3, 300, identity), Tanh(), FullyConnected(300, 200, identity), Tanh(), FullyConnected(200, 10, identity), SoftMax() ]) # # Convolutional I nn = FeedForward([ ConvolutionalLayer(3, 32, 32, 6, 5, 1), MaxPoolingLayer(2), ReluLayer(), ConvolutionalLayer(6, 14, 14, 16, 5, 1), MaxPoolingLayer(2), ReluLayer(), LinearizeLayer(16, 5, 5), FullyConnected(400, 300, relu),
nn.update_parameters(args.learning_rate) # Evaluate the network if cnt % args.eval_every == 0: test_acc, test_cm = \ eval_nn(nn, data["test_imgs"], data["test_labels"]) train_acc, train_cm = \ eval_nn(nn, data["train_imgs"], data["train_labels"], 5000) print("Train acc: %2.6f ; Test acc: %2.6f" % (train_acc, test_acc)) pylab.imshow(test_cm) pylab.draw() matplotlib.pyplot.pause(0.001) if __name__ == "__main__": parser = ArgumentParser() parser.add_argument("--learning_rate", type = float, default = 0.001, help="Learning rate") parser.add_argument("--eval_every", type = int, default = 200, help="Learning rate") args = parser.parse_args() mnist = load_mnist() input_size = mnist["train_imgs"][0].size print input_size nn = FeedForward(input_size, [(300, logistic), (10, identity)]) print(nn.to_string()) train_nn(nn, mnist, args)
# nn = FeedForward([ConvolutionalLayer(3, 32, 32, 6, 5, 1), MaxPoolingLayer(2), ConvolutionalLayer(6, 14, 14, 16, 5, 1), MaxPoolingLayer(2), ConvolutionalLayer(16, 5, 5, 120, 5, 1), LinearizeLayer(120, 1, 1), FcLayer(120, 84, identity), FcLayer(84, 10, identity), SoftMaxLayer()]) # CFK # data = load_cfk() # nn = FeedForward([LinearizeLayer(3, 32, 32), FcLayer(3072, 300, identity), TanHLayer(), FcLayer(300, 62, identity), SoftMaxLayer()]) # nn = FeedForward([LinearizeLayer(3, 32, 32), FcLayer(3072, 300, identity), TanHLayer(), FcLayer(300, 100, identity), TanHLayer(), FcLayer(100, 62, identity), SoftMaxLayer()]) # nn = FeedForward([LinearizeLayer(3, 32, 32), FcLayer(3072, 600, identity), TanHLayer(), FcLayer(600, 62, identity), SoftMaxLayer()]) # train_nn(nn, data, args, 62) #CIFAR # data = load_cifar() # nn = FeedForward([LinearizeLayer(3, 32, 32), FcLayer(3072, 300, identity), TanHLayer(), FcLayer(300, 10, identity), SoftMaxLayer()]) # nn = FeedForward([LinearizeLayer(3, 32, 32), FcLayer(3072, 600, identity), TanHLayer(), FcLayer(600, 400, identity), TanHLayer(), FcLayer(400, 100, identity), TanHLayer(), FcLayer(100, 10, identity), SoftMaxLayer()]) # train_nn(nn, data, args, 10) #CONV CFK data = load_cfk() # nn = FeedForward([ConvolutionalLayer(3, 32, 32, 6, 5, 1), MaxPoolingLayer(2), ConvolutionalLayer(6, 14, 14, 16, 5, 1), MaxPoolingLayer(2), ConvolutionalLayer(16, 5, 5, 120, 5, 1), LinearizeLayer(120, 1, 1), FcLayer(120, 84, identity), FcLayer(84, 62, identity), SoftMaxLayer()]) # nn = FeedForward([ConvolutionalLayer(3, 32, 32, 6, 5, 1), MaxPoolingLayer(2), ReluLayer(), ConvolutionalLayer(6, 14, 14, 16, 5, 1), MaxPoolingLayer(2), ReluLayer(), LinearizeLayer(16, 5, 5), FcLayer(400, 300, identity), TanHLayer(), FcLayer(300, 62, identity), SoftMaxLayer()]) # nn = FeedForward([ConvolutionalLayer(3, 32, 32, 6, 5, 1), MaxPoolingLayer(2), ReluLayer(), ConvolutionalLayer(6, 14, 14, 16, 5, 1), ReluLayer(), ConvolutionalLayer(16, 10, 10, 25, 3, 1), ReluLayer(), ConvolutionalLayer(25, 8, 8, 40, 3, 1), ReluLayer(), MaxPoolingLayer(2), # LinearizeLayer(40, 3, 3), FcLayer(360, 84, identity), TanHLayer(), FcLayer(84, 62, identity), SoftMaxLayer()]) # nn = FeedForward([ConvolutionalLayer(3, 32, 32, 6, 5, 1), ReluLayer(), ConvolutionalLayer(6, 28, 28, 16, 5, 1), ReluLayer(), ConvolutionalLayer(16, 24, 24, 25, 3, 1), ReluLayer(), ConvolutionalLayer(25, 22, 22, 40, 3, 1), ReluLayer(), MaxPoolingLayer(2), # LinearizeLayer(40, 10, 10), FcLayer(4000, 1000, identity), ReluLayer(), FcLayer(1000, 300, identity), ReluLayer(), FcLayer(300, 62, identity), SoftMaxLayer()]) nn = FeedForward([ConvolutionalLayer(3, 32, 32, 20, 5, 1), ReluLayer(), ConvolutionalLayer(20, 28, 28, 20, 5, 1), ReluLayer(), ConvolutionalLayer(20, 24, 24, 50, 3, 1), ReluLayer(), ConvolutionalLayer(50, 22, 22, 30, 3, 1), ReluLayer(), MaxPoolingLayer(2), LinearizeLayer(30, 10, 10), FcLayer(3000, 1000, identity), ReluLayer(), FcLayer(1000, 300, identity), ReluLayer(), FcLayer(300, 62, identity), SoftMaxLayer()]) train_nn(nn, data, args, 62) # CONV CIFAR # data = load_cifar() # nn = FeedForward([ConvolutionalLayer(3, 32, 32, 20, 5, 1), MaxPoolingLayer(2), ConvolutionalLayer(20, 14, 14, 25, 5, 1), MaxPoolingLayer(2), ConvolutionalLayer(25, 5, 5, 100, 5, 1), LinearizeLayer(100, 1, 1), FcLayer(100, 84, identity), FcLayer(84, 10, identity), SoftMaxLayer()]) # train_nn(nn, data, args, 10)
def _train_ff_network( hyperparameter_dict: dict, data: SignalData) -> Tuple[FeedForward, List, List, List, List]: """Trains a feed-forward network using the specified hyperparameters. """ # Ensure reproducibility by giving PyTorch the same seed every time we train. torch.manual_seed(1) # Print hyperparameters. print(f'Hyperparameters: {hyperparameter_dict}') # Get hyperparameters. learning_rate = hyperparameter_dict['learning_rate'] batch_size = hyperparameter_dict['batch_size'] optimizer_str = hyperparameter_dict['optimizer'] # There are 6 labels, and Pytorch expects them to go from 0 to 5. full_train_labels = data.train_labels - 1 # Get generators. signal_dataset = SignalDataset(data.train_signals, full_train_labels) (training_generator, validation_generator) = utils_nn.get_trainval_generators( signal_dataset, batch_size, num_workers=0, training_fraction=0.8) # Crete feed forward network. input_size = data.num_timesteps * data.num_components feed_forward = FeedForward(input_size, input_size, data.num_activity_labels) print(feed_forward) # Parameters should be moved to GPU before constructing the optimizer. device = torch.device('cuda:0' if USE_CUDA else 'cpu') feed_forward = feed_forward.to(device) # Get optimizer. optimizer = None if optimizer_str == 'adam': optimizer = torch.optim.Adam(feed_forward.parameters(), lr=learning_rate) elif optimizer_str == 'sgd': optimizer = torch.optim.SGD(feed_forward.parameters(), lr=learning_rate) else: raise Exception(f'Specified optimizer not valid: {optimizer_str}') training_accuracy_list = [] training_loss_list = [] validation_accuracy_list = [] validation_loss_list = [] max_epochs = 10 for epoch in range(max_epochs): print(f'Epoch {epoch}') # Training data. (training_accuracy, training_loss) = utils_nn.fit(feed_forward, training_generator, optimizer, USE_CUDA) training_accuracy_list.append(training_accuracy) training_loss_list.append(training_loss) # Validation data. (validation_accuracy, validation_loss) = utils_nn.evaluate(feed_forward, validation_generator, 'Validation', USE_CUDA) validation_accuracy_list.append(validation_accuracy) validation_loss_list.append(validation_loss) return (feed_forward, training_accuracy_list, training_loss_list, validation_accuracy_list, validation_loss_list)
# Evaluate the network if cnt % args.eval_every == 0: test_acc, test_cm = \ eval_nn(nn, data["test_imgs"], data["test_labels"]) train_acc, train_cm = \ eval_nn(nn, data["train_imgs"], data["train_labels"], 5000) print("Train acc: %2.6f ; Test acc: %2.6f" % (train_acc, test_acc)) pylab.imshow(test_cm) pylab.draw() matplotlib.pyplot.pause(0.001) if __name__ == "__main__": parser = ArgumentParser() parser.add_argument("--learning_rate", type = float, default = 0.001, help="Learning rate") parser.add_argument("--eval_every", type = int, default = 200, help="Learning rate") args = parser.parse_args() mnist = load_mnist() input_size = mnist["train_imgs"][0].size # TODO 5 nn = FeedForward([Layer(input_size, 300, logistic), Layer(300, 10, identity)]) # nn = FeedForward([LinearizeLayerReverse(1, 28, 28), ConvolutionalLayer(1, 28, 28, 16, 5, 1), MaxPoolingLayer(2), ReluLayer(), ConvolutionalLayer(16, 12, 12, 16, 5, 1), MaxPoolingLayer(2), ReluLayer(), LinearizeLayer(16, 4, 4), Layer(256, 10, identity)]) print(nn.to_string()) train_nn(nn, mnist, args)
import minst_data as minst from feed_forward import FeedForward # Loading the data from mnist file. # TODO: Include in the class? PATH = "data/mnist_data.pkl.gz" data = minst.MnistData(PATH) print ">>> Unpacking data." # Converting data into a shape that can be used by the network. training_data = data.get_training_data() test_data = data.get_test_data() validation_data = data.get_validation_data() # Creating standard neural network. net = FeedForward([784, 30, 10]) # Load a previously trained network. # This currently just loads the weights and biases without actually checking # if they are the right weights. # net.load_state() # Training for 10 epochs with 3.0 learning rate. net.train(training_data, 10, 3.0, test_data) # Saving the state so it can be used later on. net.save_state()
parser = ArgumentParser() parser.add_argument("--learning_rate", type=float, default=0.001, help="Learning rate") parser.add_argument("--eval_every", type=int, default=50, help="Learning rate") args = parser.parse_args() mnist = load_mnist() input_size = mnist["train_imgs"][0].size # TODO 5 # nn = FeedForward([Layer(input_size, 300, logistic), Layer(300, 10, identity)]) nn = FeedForward([ LinearizeLayerReverse(1, 28, 28), ConvolutionalLayer(1, 28, 28, 16, 5, 1), MaxPoolingLayer(2), ReluLayer(), ConvolutionalLayer(16, 12, 12, 16, 5, 1), MaxPoolingLayer(2), ReluLayer(), LinearizeLayer(16, 4, 4), Layer(256, 10, identity) ]) print(nn.to_string()) train_nn(nn, mnist, args)