config = ClassifierParams() config.num_clusters = 47 config.weight_std = 0.1 config.weight_initializer = "HeNormal" config.nonlinearity = "relu" config.optimizer = "adam" config.learning_rate = 0.002 config.momentum = 0.9 config.gradient_clipping = 1 config.weight_decay = 0 config.lam = 0.3 config.mu = 1.0 config.sigma = 100.0 config.ip = 1 model = Sequential() # model.add(Linear(None, 1800)) # model.add(Activation(config.nonlinearity)) # model.add(BatchNormalization(1800, use_cudnn=False)) # model.add(Linear(None, 1800)) # model.add(Activation(config.nonlinearity)) # model.add(BatchNormalization(1800, use_cudnn=False)) # model.add(Linear(None, 1800)) # model.add(Activation(config.nonlinearity)) # model.add(BatchNormalization(1800, use_cudnn=False)) # model.add(Linear(None, config.num_clusters)) model.add(Convolution2D(1, 32, ksize=4, stride=2, pad=1)) model.add(Activation(config.nonlinearity)) model.add(BatchNormalization(32))
config.ndim_x = 28 * 28 config.ndim_y = 10 config.ndim_reduction = 2 config.ndim_z = config.ndim_reduction config.cluster_head_distance_threshold = 1 config.distribution_z = "deterministic" # deterministic or gaussian config.weight_std = 0.001 config.weight_initializer = "Normal" config.nonlinearity = "relu" config.optimizer = "Adam" config.learning_rate = 0.0001 config.momentum = 0.5 config.gradient_clipping = 5 config.weight_decay = 0 decoder = Sequential() decoder.add(Linear(None, 1000)) decoder.add(Activation(config.nonlinearity)) # decoder.add(BatchNormalization(1000)) decoder.add(Linear(None, 1000)) decoder.add(Activation(config.nonlinearity)) # decoder.add(BatchNormalization(1000)) decoder.add(Linear(None, config.ndim_x)) decoder.add(tanh()) discriminator_z = Sequential() discriminator_z.add(gaussian_noise(std=0.3)) discriminator_z.add(Linear(config.ndim_z, 1000)) discriminator_z.add(Activation(config.nonlinearity)) # discriminator_z.add(BatchNormalization(1000)) discriminator_z.add(Linear(None, 1000))
else: config = DiscriminatorParams() config.a = 0 config.b = 1 config.c = 1 config.weight_std = 0.01 config.weight_initializer = "Normal" config.nonlinearity = "leaky_relu" config.optimizer = "adam" config.learning_rate = 0.0001 config.momentum = 0.5 config.gradient_clipping = 1 config.weight_decay = 0 discriminator = Sequential() discriminator.add(Convolution2D(3, 32, ksize=4, stride=2, pad=1)) discriminator.add(BatchNormalization(32)) discriminator.add(Activation(config.nonlinearity)) discriminator.add(Convolution2D(32, 64, ksize=4, stride=2, pad=1)) discriminator.add(BatchNormalization(64)) discriminator.add(Activation(config.nonlinearity)) discriminator.add(Convolution2D(64, 128, ksize=4, stride=2, pad=1)) discriminator.add(BatchNormalization(128)) discriminator.add(Activation(config.nonlinearity)) discriminator.add(Convolution2D(128, 256, ksize=4, stride=2, pad=1)) discriminator.add(BatchNormalization(256)) discriminator.add(Activation(config.nonlinearity)) discriminator.add(Linear(None, 1)) discriminator_params = {
raise Exception("could not load {}".format(discriminator_sequence_filename)) else: config = DiscriminatorParams() config.weight_init_std = 0.001 config.weight_initializer = "Normal" config.use_weightnorm = False config.nonlinearity = "elu" config.optimizer = "Adam" config.learning_rate = 0.0001 config.momentum = 0.5 config.gradient_clipping = 10 config.weight_decay = 0 config.use_feature_matching = False config.use_minibatch_discrimination = False discriminator = Sequential(weight_initializer=config.weight_initializer, weight_init_std=config.weight_init_std) discriminator.add(gaussian_noise(std=0.3)) discriminator.add(Convolution2D(3, 32, ksize=4, stride=2, pad=1, use_weightnorm=config.use_weightnorm)) discriminator.add(BatchNormalization(32)) discriminator.add(Activation(config.nonlinearity)) discriminator.add(Convolution2D(32, 64, ksize=4, stride=2, pad=1, use_weightnorm=config.use_weightnorm)) discriminator.add(BatchNormalization(64)) discriminator.add(Activation(config.nonlinearity)) discriminator.add(Convolution2D(64, 128, ksize=4, stride=2, pad=1, use_weightnorm=config.use_weightnorm)) discriminator.add(BatchNormalization(128)) discriminator.add(Activation(config.nonlinearity)) discriminator.add(Convolution2D(128, 256, ksize=4, stride=2, pad=1, use_weightnorm=config.use_weightnorm)) discriminator.add(BatchNormalization(256)) discriminator.add(Activation(config.nonlinearity)) if config.use_minibatch_discrimination: discriminator.add(reshape_1d())
from dataset import cifar100 import numpy as np from full import FullLayer from softmax import SoftMaxLayer from cross_entropy import CrossEntropyLayer from sequential import Sequential from relu import ReluLayer import matplotlib.pyplot as plt (x_train, y_train), (x_test, y_test) = cifar100(1212149859) model = Sequential(layers=(FullLayer(32 * 32 * 3, 256), ReluLayer(), FullLayer(256, 4), SoftMaxLayer()), loss=CrossEntropyLayer()) model.fit(x_train, y_train, epochs=15, lr=0.48, batch_size=128) pred = model.predict(x_test) acc = np.mean(pred == y_test) print('Accuracy = %f' % acc) index_0 = np.where(y_test == 0)[0] index_1 = np.where(y_test == 1)[0] index_2 = np.where(y_test == 2)[0] index_3 = np.where(y_test == 3)[0] acc0 = np.mean(y_test[index_0] == pred[index_0]) acc1 = np.mean(y_test[index_1] == pred[index_1]) acc2 = np.mean(y_test[index_2] == pred[index_2]) acc3 = np.mean(y_test[index_3] == pred[index_3]) print('class0 accuracy =%f' % acc0) print('class1 accuracy =%f' % acc1) print('class2 accuracy =%f' % acc2) print('class3 accuracy =%f' % acc3)
X_train, y_train = build_data(1000) #(1000,2) X_test, y_test = build_data(1000) #(1000,2) print( 'Start training with parameters : {0} rounds, {1} epochs and {2} batch size' .format(rounds, epochs, mini_batch_size)) result_rounds = [] #training_losses, training_acc, test_losses, test_acc time1 = time.perf_counter() for i in range(rounds): print("Training round {0} : ".format(i + 1)) model = Sequential(Linear(input_units, hidden_units), ReLU(), Linear(hidden_units, hidden_units), ReLU(), Linear(hidden_units, hidden_units), ReLU(), Linear(hidden_units, output_units), Sigmoid()) #array of shape (rounds,epochs) model_trained, train_loss, train_acc, test_pred, test_loss, test_acc = train_model( model, X_train, y_train, X_test, y_test, epochs, mini_batch_size, lr=0.01, opt='SGD', loss_name='MSE') result_rounds.append([train_loss, train_acc, test_loss, test_acc])
config.ndim_input = image_width * image_height config.clamp_lower = -0.01 config.clamp_upper = 0.01 config.num_critic = 5 config.weight_std = 0.001 config.weight_initializer = "Normal" config.nonlinearity = "leaky_relu" config.optimizer = "rmsprop" config.learning_rate = 0.0001 config.momentum = 0.5 config.gradient_clipping = 1 config.weight_decay = 0 chainer.global_config.discriminator = config discriminator = Sequential() discriminator.add(Linear(None, 500)) discriminator.add(Activation(config.nonlinearity)) discriminator.add(Linear(None, 500)) params = { "config": config.to_dict(), "model": discriminator.to_dict(), } with open(discriminator_sequence_filename, "w") as f: json.dump(params, f, indent=4, sort_keys=True, separators=(',', ': ')) discriminator_params = params # specify generator
mean, std = train_input.mean(), train_input.std() train_input.sub_(mean).div_(std) test_input.sub_(mean).div_(std) nb_hidden = 25 nb_classes = 2 nb_train_samples = train_input.size(0) eta = 1e-1/nb_train_samples # Possibility to add Linear, Tanh and ReLU ann = Sequential( Linear(train_input.size(1),nb_hidden), Tanh(), Linear(nb_hidden,nb_classes), Tanh(), ) MSE = MSELoss() nb_epochs = 4000 for k in range(0, nb_epochs): acc_loss = 0 nb_train_errors = 0 ann.reset_parameters() # Training for n in range(0, train_input.size(0)): output = ann.forward(train_input[n])
raise Exception("could not load {}".format(energy_model_filename)) else: config = EnergyModelParams() config.num_experts = 512 config.weight_init_std = 0.05 config.weight_initializer = "Normal" config.use_weightnorm = False config.nonlinearity = "elu" config.optimizer = "Adam" config.learning_rate = 0.0002 config.momentum = 0.5 config.gradient_clipping = 10 config.weight_decay = 0 # feature extractor feature_extractor = Sequential(weight_initializer=config.weight_initializer, weight_init_std=config.weight_init_std) feature_extractor.add(Convolution2D(3, 32, ksize=4, stride=2, pad=1, use_weightnorm=config.use_weightnorm)) feature_extractor.add(BatchNormalization(32)) feature_extractor.add(Activation(config.nonlinearity)) feature_extractor.add(dropout()) feature_extractor.add(Convolution2D(32, 64, ksize=4, stride=2, pad=1, use_weightnorm=config.use_weightnorm)) feature_extractor.add(BatchNormalization(64)) feature_extractor.add(Activation(config.nonlinearity)) feature_extractor.add(dropout()) feature_extractor.add(Convolution2D(64, 192, ksize=4, stride=2, pad=1, use_weightnorm=config.use_weightnorm)) feature_extractor.add(BatchNormalization(192)) feature_extractor.add(Activation(config.nonlinearity)) feature_extractor.add(dropout()) feature_extractor.add(Convolution2D(192, 256, ksize=4, stride=2, pad=1, use_weightnorm=config.use_weightnorm)) feature_extractor.add(reshape_1d()) feature_extractor.add(MinibatchDiscrimination(None, num_kernels=50, ndim_kernel=5, train_weights=True))
0] = (train_samples[:, 0] - 0.5)**2 + (train_samples[:, 1] - 0.5)**2 < 1 / (2 * pi) train_labels[:, 1] = 1 - train_labels[:, 0] test_labels[:, 0] = (test_samples[:, 0] - 0.5)**2 + (test_samples[:, 1] - 0.5)**2 < 1 / (2 * pi) test_labels[:, 1] = 1 - test_labels[:, 0] return train_samples, test_samples, train_labels.type( torch.FloatTensor), test_labels.type(torch.FloatTensor) # Generating the train and test data train_x, test_x, train_target, test_target = generate_fake_samples(1000) n_samples, dim_input = train_x.size() # Define Net test = Sequential(Linear(2, 25), Tanh(), Linear(25, 25), Tanh(), Linear(25, 25), Tanh(), Linear(25, 2), MSE()) # Setting the number of gradient steps we want to do and the value of eta gradient_steps = 1000 eta = 0.01 / n_samples # Gradient descent to train on the training data for step in range(gradient_steps): test(train_x) if step % 100 == 0: #(just to have less things displayed) print('For step', step, 'we have the loss', test.calculate_loss(train_target).item()) test.backward(train_target) test.optimize(eta) # Predictions with the trained network
config.ndim_input = 2 config.clamp_lower = -0.01 config.clamp_upper = 0.01 config.num_critic = 5 config.weight_std = 0.001 config.weight_initializer = "Normal" config.nonlinearity = "leaky_relu" config.optimizer = "rmsprop" config.learning_rate = 0.0001 config.momentum = 0.5 config.gradient_clipping = 1 config.weight_decay = 0 chainer.global_config.discriminator = config discriminator = Sequential() discriminator.add(Linear(None, 128)) discriminator.add(Activation(config.nonlinearity)) discriminator.add(Linear(None, 128)) params = { "config": config.to_dict(), "model": discriminator.to_dict(), } with open(discriminator_sequence_filename, "w") as f: json.dump(params, f, indent=4, sort_keys=True, separators=(',', ': ')) discriminator_params = params # specify generator
config = Config() config.gamma = 0.5 config.num_mixture = args.num_mixture config.ndim_z = 256 config.ndim_h = 128 config.weight_std = 0.1 config.weight_initializer = "Normal" config.nonlinearity_d = "elu" config.nonlinearity_g = "elu" config.optimizer = "adam" config.learning_rate = 0.0001 config.momentum = 0.1 config.gradient_clipping = 1 config.weight_decay = 0 encoder = Sequential() encoder.add(gaussian_noise(std=0.1)) encoder.add(Linear(2, 64)) encoder.add(Activation(config.nonlinearity_d)) # encoder.add(BatchNormalization(64)) encoder.add(Linear(None, 64)) encoder.add(Activation(config.nonlinearity_d)) # encoder.add(BatchNormalization(64)) encoder.add(Linear(None, config.ndim_h)) decoder = Sequential() decoder.add(Linear(config.ndim_h, 64)) decoder.add(Activation(config.nonlinearity_d)) # decoder.add(BatchNormalization(64)) decoder.add(Linear(None, 64)) decoder.add(Activation(config.nonlinearity_d))
def init_nn(): learning_rate = 0.001 nn = Sequential(learning_rate=learning_rate, epochs=50, batch_size=100, learning_rate_decay=0.95, weight_decay=0.001) nn.add(Dense(n=200, in_shape=train.shape[1])) nn.add(BatchNorm()) nn.add(Dense(n=100)) nn.add(BatchNorm()) nn.add(Dense(n=80)) nn.add(BatchNorm()) nn.add(Dense(n=20)) nn.add(BatchNorm()) nn.add(Dense(n=80)) nn.add(BatchNorm()) nn.add(Dense(n=100)) nn.add(BatchNorm()) nn.add(Dense(n=200)) nn.add(BatchNorm()) nn.add(Dense(n=10, activation="softmax")) nn.compile(loss="cross_entropy_softmax", optimiser="Adam") return nn
'''Tournament class''' def __init__(self, player1, player2, number_of_games): self.player1 = player1 self.player2 = player2 self.number_of_games = number_of_games def arrange_singlegame(self): s = SingleGame(self.player1, self.player2) s.playGame() s.show_result() def arrange_tournament(self): player_1_winpercentage = [] player_1_wins = 0 for game in range(self.number_of_games): singalegame = SingleGame(self.player1, self.player2) wins = singalegame.show_result() if wins == self.player1: player_1_wins+=1 elif wins is None: player_1_wins+=0.5 player_1_winpercentage.append(player_1_wins/(game+1)) plt.plot(player_1_winpercentage) plt.show() p = Historian(1) p2 = Sequential() t = Tournament(p, p2, 100) t.arrange_tournament()
hidden_units = 25 nb_epochs = 100 test_size = 1000 train_size = 1000 # Generate train / test set train_input, train_target = generate_disc_set(train_size) test_input, test_target = generate_disc_set(test_size) train_target_one_hot = one_hot(train_target) test_target_one_hot = one_hot(test_target) train_target_one_hot = one_hot(train_target) test_target_one_hot = one_hot(test_target) # Initialize network network = Sequential( Linear(input_units, hidden_units), Tanh(), Linear(hidden_units, hidden_units), Tanh(), Linear(hidden_units, output_units), Tanh()) # Train the model mini_batch_size = 100 for i in range(100): train_model(network, train_input, train_target_one_hot, nb_epochs, mini_batch_size) print("Train accuracy: ", round(compute_accuracy(network, train_input, train_target, mini_batch_size), 2)) print("Test accuracy:", round(compute_accuracy(network, test_input, test_target, mini_batch_size), 2))
def model_adam(X, y, verbose): nn = Sequential(learning_rate=learning_rate, epochs=epochs, batch_size=100, learning_rate_decay=0.95, weight_decay=0.01) nn.add(Dense(n=200, in_shape=X.shape[1])) nn.add(BatchNorm()) nn.add(Dense(n=100)) nn.add(BatchNorm()) nn.add(Dense(n=80)) nn.add(BatchNorm()) nn.add(Dense(n=40)) nn.add(BatchNorm()) nn.add(Dense(n=80)) nn.add(BatchNorm()) nn.add(Dense(n=100)) nn.add(BatchNorm()) nn.add(Dense(n=200)) nn.add(BatchNorm()) nn.add(Dense(n=10, activation="softmax")) nn.compile(loss="cross_entropy_softmax", optimiser="Adam") nn.fit(X, y, verbose) return nn
def build_model(): model = Sequential(MSE(), input_size=2) model.add_layer(Linear(2, 25)) model.add_layer(ReLU(25)) model.add_layer(Linear(25, 25)) model.add_layer(ReLU(25)) model.add_layer(Linear(25, 25)) model.add_layer(Tanh(25)) model.add_layer(Linear(25, 2)) return model
else: config = EnergyModelParams() config.ndim_input = image_width * image_height config.num_experts = 128 config.weight_init_std = 0.05 config.weight_initializer = "Normal" config.use_weightnorm = True config.nonlinearity = "elu" config.optimizer = "Adam" config.learning_rate = 0.0002 config.momentum = 0.5 config.gradient_clipping = 10 config.weight_decay = 0 # feature extractor feature_extractor = Sequential(weight_initializer=config.weight_initializer, weight_init_std=config.weight_init_std) feature_extractor.add(Linear(config.ndim_input, 1000, use_weightnorm=config.use_weightnorm)) feature_extractor.add(Activation(config.nonlinearity)) feature_extractor.add(gaussian_noise(std=0.3)) feature_extractor.add(Linear(None, 500, use_weightnorm=config.use_weightnorm)) feature_extractor.add(Activation(config.nonlinearity)) feature_extractor.add(gaussian_noise(std=0.3)) feature_extractor.add(Linear(None, 250, use_weightnorm=config.use_weightnorm)) feature_extractor.add(Activation(config.nonlinearity)) feature_extractor.add(gaussian_noise(std=0.3)) feature_extractor.add(Linear(None, config.num_experts, use_weightnorm=config.use_weightnorm)) feature_extractor.add(tanh()) # experts experts = Sequential(weight_initializer=config.weight_initializer, weight_init_std=config.weight_init_std) experts.add(Linear(config.num_experts, config.num_experts, use_weightnorm=config.use_weightnorm))
config = Config() config.gamma = 0.5 config.ndim_z = ndim_z config.ndim_h = ndim_h config.weight_std = 0.01 config.weight_initializer = "Normal" config.nonlinearity_d = "elu" config.nonlinearity_g = "elu" config.optimizer = "adam" config.learning_rate = 0.0001 config.momentum = 0.5 config.gradient_clipping = 1 config.weight_decay = 0 # Discriminator encoder = Sequential() encoder.add(gaussian_noise(std=0.3)) encoder.add(Convolution2D(3, 32, ksize=4, stride=2, pad=1)) encoder.add(BatchNormalization(32)) encoder.add(Activation(config.nonlinearity_d)) encoder.add(Convolution2D(32, 64, ksize=4, stride=2, pad=1)) encoder.add(BatchNormalization(64)) encoder.add(Activation(config.nonlinearity_d)) encoder.add(Convolution2D(64, 128, ksize=4, stride=2, pad=1)) encoder.add(BatchNormalization(128)) encoder.add(Activation(config.nonlinearity_d)) encoder.add(Convolution2D(128, 256, ksize=4, stride=2, pad=1)) encoder.add(BatchNormalization(256)) encoder.add(Activation(config.nonlinearity_d)) encoder.add(Linear(None, ndim_h))
config.ndim_x = 28 * 28 config.ndim_y = 10 config.ndim_z = 100 config.weight_init_std = 0.01 config.weight_initializer = "Normal" config.nonlinearity = "relu" config.optimizer = "Adam" config.learning_rate = 0.0003 config.momentum = 0.9 config.gradient_clipping = 10 config.weight_decay = 0 config.use_weightnorm = False config.num_mc_samples = 1 # p(x|y,z) - x ~ Bernoulli p_x_ayz = Sequential(weight_initializer=config.weight_initializer, weight_init_std=config.weight_init_std) p_x_ayz.add(Merge(num_inputs=3, out_size=500, use_weightnorm=config.use_weightnorm)) p_x_ayz.add(BatchNormalization(500)) p_x_ayz.add(Activation(config.nonlinearity)) p_x_ayz.add(Linear(None, 500, use_weightnorm=config.use_weightnorm)) p_x_ayz.add(BatchNormalization(500)) p_x_ayz.add(Activation(config.nonlinearity)) p_x_ayz.add(Linear(None, 500, use_weightnorm=config.use_weightnorm)) p_x_ayz.add(BatchNormalization(500)) p_x_ayz.add(Activation(config.nonlinearity)) p_x_ayz.add(Linear(None, config.ndim_x, use_weightnorm=config.use_weightnorm)) # p(a|x,y,z) - a ~ Gaussian p_a_yz = Sequential(weight_initializer=config.weight_initializer, weight_init_std=config.weight_init_std) p_a_yz.add(Merge(num_inputs=2, out_size=500, use_weightnorm=config.use_weightnorm)) p_a_yz.add(BatchNormalization(500))
else: config = Config() config.ndim_x = 28 * 28 config.ndim_y = 10 config.weight_init_std = 0.01 config.weight_initializer = "Normal" config.nonlinearity = "relu" config.optimizer = "Adam" config.learning_rate = 0.0002 config.momentum = 0.9 config.gradient_clipping = 10 config.weight_decay = 0 config.lambda_ = 1 config.Ip = 1 model = Sequential(weight_initializer=config.weight_initializer, weight_init_std=config.weight_init_std) model.add(Linear(None, 1200)) model.add(Activation(config.nonlinearity)) model.add(BatchNormalization(1200)) model.add(Linear(None, 600)) model.add(Activation(config.nonlinearity)) model.add(BatchNormalization(600)) model.add(Linear(None, config.ndim_y)) params = { "config": config.to_dict(), "model": model.to_dict(), } with open(model_filename, "w") as f: json.dump(params, f, indent=4, sort_keys=True, separators=(',', ': '))
config.ndim_x = 28 * 28 config.ndim_y = 10 config.ndim_z = 100 config.weight_init_std = 0.01 config.weight_initializer = "Normal" config.nonlinearity = "relu" config.optimizer = "Adam" config.learning_rate = 0.0003 config.momentum = 0.9 config.gradient_clipping = 10 config.weight_decay = 0 config.use_weightnorm = False config.num_mc_samples = 1 # p(x|y,z) - x ~ Bernoulli p_x_ayz = Sequential(weight_initializer=config.weight_initializer, weight_init_std=config.weight_init_std) p_x_ayz.add( Merge(num_inputs=3, out_size=500, use_weightnorm=config.use_weightnorm)) p_x_ayz.add(BatchNormalization(500)) p_x_ayz.add(Activation(config.nonlinearity)) p_x_ayz.add(Linear(None, 500, use_weightnorm=config.use_weightnorm)) p_x_ayz.add(BatchNormalization(500)) p_x_ayz.add(Activation(config.nonlinearity)) p_x_ayz.add(Linear(None, 500, use_weightnorm=config.use_weightnorm)) p_x_ayz.add(BatchNormalization(500)) p_x_ayz.add(Activation(config.nonlinearity)) p_x_ayz.add( Linear(None, config.ndim_x, use_weightnorm=config.use_weightnorm)) # p(a|x,y,z) - a ~ Gaussian
else: config = DiscriminatorParams() config.a = 0 config.b = 1 config.c = 1 config.weight_std = 0.01 config.weight_initializer = "Normal" config.use_weightnorm = False config.nonlinearity = "leaky_relu" config.optimizer = "adam" config.learning_rate = 0.0001 config.momentum = 0.5 config.gradient_clipping = 1 config.weight_decay = 0 discriminator = Sequential() discriminator.add(Linear(None, 500, use_weightnorm=config.use_weightnorm)) # discriminator.add(gaussian_noise(std=0.5)) discriminator.add(Activation(config.nonlinearity)) # discriminator.add(BatchNormalization(500)) discriminator.add(Linear(None, 500, use_weightnorm=config.use_weightnorm)) discriminator.add(Activation(config.nonlinearity)) # discriminator.add(BatchNormalization(500)) discriminator.add(Linear(None, 1, use_weightnorm=config.use_weightnorm)) discriminator_params = { "config": config.to_dict(), "model": discriminator.to_dict(), } with open(discriminator_sequence_filename, "w") as f:
params = json.load(f) except Exception as e: raise Exception("could not load {}".format(sequence_filename)) else: config = Params() config.num_classes = 10 config.weight_std = 0.1 config.weight_initializer = "Normal" config.nonlinearity = "relu" config.optimizer = "adam" config.learning_rate = 0.0001 config.momentum = 0.9 config.gradient_clipping = 1 config.weight_decay = 0 model = Sequential() model.add(Linear(None, 500)) model.add(Activation(config.nonlinearity)) model.add(BatchNormalization(500)) model.add(Linear(None, 500)) model.add(Activation(config.nonlinearity)) model.add(BatchNormalization(500)) model.add(Linear(None, config.num_classes)) params = { "config": config.to_dict(), "model": model.to_dict(), } with open(sequence_filename, "w") as f: json.dump(params, f, indent=4, sort_keys=True, separators=(',', ': '))
def main(): # generate data and translate labels train_features, train_targets = generate_all_datapoints_and_labels() test_features, test_targets = generate_all_datapoints_and_labels() train_labels, test_labels = convert_labels(train_targets), convert_labels(test_targets) print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('Model: Linear + ReLU + Linear +ReLU + Linear + ReLU + Linear + Tanh') print('Loss: MSE') print('Optimizer: SGD') print('*************************************************************************') print('Training') print('*************************************************************************') # build network, loss and optimizer for Model 1 my_model_design_1=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), ReLU(), Linear(25,25), ReLU(),Linear(25,2),Tanh()] my_model_1=Sequential(my_model_design_1) optimizer_1=SGD(my_model_1,lr=1e-3) criterion_1=LossMSE() # train Model 1 batch_size=1 for epoch in range(50): temp_train_loss_sum=0. temp_test_loss_sum=0. num_train_correct=0 num_test_correct=0 # trained in batch-fashion: here batch size = 1 for temp_batch in range(0,len(train_features), batch_size): temp_train_features=train_features.narrow(0, temp_batch, batch_size) temp_train_labels=train_labels.narrow(0, temp_batch, batch_size) for i in range(batch_size): # clean parameter gradient before each batch optimizer_1.zero_grad() temp_train_feature=temp_train_features[i] temp_train_label=temp_train_labels[i] # forward pass to compute loss temp_train_pred=my_model_1.forward(temp_train_feature) temp_train_loss=criterion_1.forward(temp_train_pred,temp_train_label) temp_train_loss_sum+=temp_train_loss _, temp_train_pred_cat=torch.max(temp_train_pred,0) _, temp_train_label_cat=torch.max(temp_train_label,0) if temp_train_pred_cat==temp_train_label_cat: num_train_correct+=1 # calculate gradient according to loss gradient temp_train_loss_grad=criterion_1.backward(temp_train_pred,temp_train_label) # accumulate parameter gradient in each batch my_model_1.backward(temp_train_loss_grad) # update parameters by optimizer optimizer_1.step() # evaluate the current model on testing set # only forward pass is implemented for i_test in range(len(test_features)): temp_test_feature=test_features[i_test] temp_test_label=test_labels[i_test] temp_test_pred=my_model_1.forward(temp_test_feature) temp_test_loss=criterion_1.forward(temp_test_pred,temp_test_label) temp_test_loss_sum+=temp_test_loss _, temp_test_pred_cat=torch.max(temp_test_pred,0) _, temp_test_label_cat=torch.max(temp_test_label,0) if temp_test_pred_cat==temp_test_label_cat: num_test_correct+=1 temp_train_loss_mean=temp_train_loss_sum/len(train_features) temp_test_loss_mean=temp_test_loss_sum/len(test_features) temp_train_accuracy=num_train_correct/len(train_features) temp_test_accuracy=num_test_correct/len(test_features) print("Epoch: {}/{}..".format(epoch+1, 50), "Training Loss: {:.4f}..".format(temp_train_loss_mean), "Training Accuracy: {:.4f}..".format(temp_train_accuracy), "Validation/Test Loss: {:.4f}..".format(temp_test_loss_mean), "Validation/Test Accuracy: {:.4f}..".format(temp_test_accuracy), ) # # visualize the classification performance of Model 1 on testing set test_pred_labels_1=[] for i in range(1000): temp_test_feature=test_features[i] temp_test_label=test_labels[i] temp_test_pred=my_model_1.forward(temp_test_feature) _, temp_train_pred_cat=torch.max(temp_test_pred,0) if test_targets[i].int() == temp_train_pred_cat.int(): test_pred_labels_1.append(int(test_targets[i])) else: test_pred_labels_1.append(2) fig,axes = plt.subplots(1,1,figsize=(6,6)) axes.scatter(test_features[:,0], test_features[:,1], c=test_pred_labels_1) axes.set_title('Classification Performance of Model 1') plt.show() print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('Model: Linear + ReLU + Linear + Dropout+ SeLU + Linear + Dropout + ReLU + Linear + Sigmoid') print('Loss: Cross Entropy') print('Optimizer: Adam') print('*************************************************************************') print('Training') print('*************************************************************************') # build network, loss function and optimizer for Model 2 my_model_design_2=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), SeLU(), Linear(25,25),Dropout(p=0.5), ReLU(),Linear(25,2), Sigmoid()] my_model_2=Sequential(my_model_design_2) optimizer_2=Adam(my_model_2,lr=1e-3) criterion_2=CrossEntropy() # train Model 2 batch_size=1 epoch=0 while(epoch<25): temp_train_loss_sum=0. temp_test_loss_sum=0. num_train_correct=0 num_test_correct=0 # trained in batch-fashion: here batch size = 1 for temp_batch in range(0,len(train_features), batch_size): temp_train_features=train_features.narrow(0, temp_batch, batch_size) temp_train_labels=train_labels.narrow(0, temp_batch, batch_size) for i in range(batch_size): # clean parameter gradient before each batch optimizer_2.zero_grad() temp_train_feature=temp_train_features[i] temp_train_label=temp_train_labels[i] # forward pass to compute loss temp_train_pred=my_model_2.forward(temp_train_feature) temp_train_loss=criterion_2.forward(temp_train_pred,temp_train_label) temp_train_loss_sum+=temp_train_loss _, temp_train_pred_cat=torch.max(temp_train_pred,0) _, temp_train_label_cat=torch.max(temp_train_label,0) if temp_train_pred_cat==temp_train_label_cat: num_train_correct+=1 # calculate gradient according to loss gradient temp_train_loss_grad=criterion_2.backward(temp_train_pred,temp_train_label) ''' if (not temp_train_loss_grad[0]>=0) and (not temp_train_loss_grad[0]<0): continue ''' # accumulate parameter gradient in each batch my_model_2.backward(temp_train_loss_grad) # update parameters by optimizer optimizer_2.step() # evaluate the current model on testing set # only forward pass is implemented for i_test in range(len(test_features)): temp_test_feature=test_features[i_test] temp_test_label=test_labels[i_test] temp_test_pred=my_model_2.forward(temp_test_feature) temp_test_loss=criterion_2.forward(temp_test_pred,temp_test_label) temp_test_loss_sum+=temp_test_loss _, temp_test_pred_cat=torch.max(temp_test_pred,0) _, temp_test_label_cat=torch.max(temp_test_label,0) if temp_test_pred_cat==temp_test_label_cat: num_test_correct+=1 temp_train_loss_mean=temp_train_loss_sum/len(train_features) temp_test_loss_mean=temp_test_loss_sum/len(test_features) temp_train_accuracy=num_train_correct/len(train_features) temp_test_accuracy=num_test_correct/len(test_features) # in case there is gradient explosion problem, initiliza model again and restart training # but the situation seldom happens if (not temp_train_loss_grad[0]>=0) and (not temp_train_loss_grad[0]<0): epoch=0 my_model_design_2=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), ReLU(), Linear(25,25),Dropout(p=0.5), ReLU(),Linear(25,2),Sigmoid()] my_model_2=Sequential(my_model_design_2) optimizer_2=Adam(my_model_2,lr=1e-3) criterion_2=CrossEntropy() print('--------------------------------------------------------------------------------') print('--------------------------------------------------------------------------------') print('--------------------------------------------------------------------------------') print('--------------------------------------------------------------------------------') print('--------------------------------------------------------------------------------') print('Restart training because of gradient explosion') continue print("Epoch: {}/{}..".format(epoch+1, 25), "Training Loss: {:.4f}..".format(temp_train_loss_mean), "Training Accuracy: {:.4f}..".format(temp_train_accuracy), "Validation/Test Loss: {:.4f}..".format(temp_test_loss_mean), "Validation/Test Accuracy: {:.4f}..".format(temp_test_accuracy), ) epoch+=1 # visualize the classification performance of Model 2 on testing set test_pred_labels_2=[] for i in range(1000): temp_test_feature=test_features[i] temp_test_label=test_labels[i] temp_test_pred=my_model_2.forward(temp_test_feature) _, temp_train_pred_cat=torch.max(temp_test_pred,0) if test_targets[i].int() == temp_train_pred_cat.int(): test_pred_labels_2.append(int(test_targets[i])) else: test_pred_labels_2.append(2) fig,axes = plt.subplots(1,1,figsize=(6,6)) axes.scatter(test_features[:,0], test_features[:,1], c=test_pred_labels_2) axes.set_title('Classification Performance of Model 2') plt.show()
config.clamp_lower = -0.01 config.clamp_upper = 0.01 config.num_critic = 5 config.weight_init_std = 0.001 config.weight_initializer = "Normal" config.use_weightnorm = False config.nonlinearity = "leaky_relu" config.optimizer = "rmsprop" config.learning_rate = 0.0001 config.momentum = 0.5 config.gradient_clipping = 1 config.weight_decay = 0 config.use_feature_matching = False config.use_minibatch_discrimination = False discriminator = Sequential(weight_initializer=config.weight_initializer, weight_init_std=config.weight_init_std) discriminator.add(Linear(None, 500, use_weightnorm=config.use_weightnorm)) # discriminator.add(gaussian_noise(std=0.5)) discriminator.add(Activation(config.nonlinearity)) # discriminator.add(BatchNormalization(500)) if config.use_minibatch_discrimination: discriminator.add( MinibatchDiscrimination(None, num_kernels=50, ndim_kernel=5)) discriminator.add(Linear(None, 500, use_weightnorm=config.use_weightnorm)) params = { "config": config.to_dict(), "model": discriminator.to_dict(), } with open(discriminator_sequence_filename, "w") as f:
from dataset import cifar100 import numpy as np from full import FullLayer from conv import ConvLayer from maxpool import MaxPoolLayer from flatten import FlattenLayer from softmax import SoftMaxLayer from cross_entropy import CrossEntropyLayer from sequential import Sequential from relu import ReluLayer import matplotlib.pyplot as plt from time import time (x_train, y_train), (x_test, y_test) = cifar100(1212149859) model = Sequential(layers=(ConvLayer(3, 16, 3), ReluLayer(), MaxPoolLayer(), ConvLayer(16, 32, 3), ReluLayer(), MaxPoolLayer(), FlattenLayer(), FullLayer(2048, 4), SoftMaxLayer()), loss=CrossEntropyLayer()) t0 = time() epo = 15 loss = model.fit(x_train, y_train, epochs=epo, lr=0.1, batch_size=128) space = np.arange(0, epo) pred = model.predict(x_test) y_test = np.argmax(y_test, axis=1) acc = np.mean(pred == y_test) plt.plot(space, loss, c='r') print("done in %0.3fs." % (time() - t0)) plt.figure() plt.plot(space, loss, label='Accuracy =' + str(acc) + ' with lr = 0.1') plt.xlabel('epochs') plt.ylabel('loss') plt.legend()
params = json.load(f) except Exception as e: raise Exception("could not load {}".format(sequence_filename)) else: config = Params() config.num_classes = 10 config.weight_std = 0.1 config.weight_initializer = "Normal" config.nonlinearity = "relu" config.optimizer = "adam" config.learning_rate = 0.0001 config.momentum = 0.9 config.gradient_clipping = 1 config.weight_decay = 0 model = Sequential() model.add(Convolution2D(1, 32, ksize=4, stride=2, pad=1)) model.add(BatchNormalization(32)) model.add(Activation(config.nonlinearity)) model.add(Convolution2D(32, 64, ksize=4, stride=2, pad=1)) model.add(BatchNormalization(64)) model.add(Activation(config.nonlinearity)) model.add(Convolution2D(64, 128, ksize=3, stride=2, pad=1)) model.add(BatchNormalization(128)) model.add(Activation(config.nonlinearity)) model.add(Linear(None, config.num_classes)) params = { "config": config.to_dict(), "model": model.to_dict(), }
config = ClassifierParams() config.num_clusters = 10 config.weight_std = 0.1 config.weight_initializer = "Normal" config.nonlinearity = "relu" config.optimizer = "adam" config.learning_rate = 0.002 config.momentum = 0.9 config.gradient_clipping = 1 config.weight_decay = 0 config.lam = 0.2 config.mu = 4.0 config.sigma = 100.0 config.ip = 1 model = Sequential() model.add(Linear(None, 1200)) model.add(Activation(config.nonlinearity)) model.add(BatchNormalization(1200)) model.add(Linear(None, 1200)) model.add(Activation(config.nonlinearity)) model.add(BatchNormalization(1200)) model.add(Linear(None, config.num_clusters)) params = { "config": config.to_dict(), "model": model.to_dict(), } with open(sequence_filename, "w") as f: json.dump(params, f, indent=4, sort_keys=True, separators=(',', ': '))
else: config = Config() config.ndim_x = 28 * 28 config.ndim_y = 10 config.ndim_z = 2 config.distribution_z = "deterministic" # deterministic or gaussian config.weight_std = 0.001 config.weight_initializer = "Normal" config.nonlinearity = "relu" config.optimizer = "Adam" config.learning_rate = 0.001 config.momentum = 0.5 config.gradient_clipping = 5 config.weight_decay = 0 decoder = Sequential() decoder.add(Linear(None, 1000)) decoder.add(Activation(config.nonlinearity)) # decoder.add(BatchNormalization(1000)) decoder.add(Linear(None, 1000)) decoder.add(Activation(config.nonlinearity)) # decoder.add(BatchNormalization(1000)) decoder.add(Linear(None, config.ndim_x)) decoder.add(tanh()) discriminator = Sequential() discriminator.add(Merge(num_inputs=2, out_size=1000, nobias=True)) discriminator.add(gaussian_noise(std=0.3)) discriminator.add(Activation(config.nonlinearity)) # discriminator.add(BatchNormalization(1000)) discriminator.add(Linear(None, 1000))
config = Config() config.ndim_x = 28 * 28 config.ndim_y = 10 config.ndim_z = 10 config.distribution_z = "deterministic" # deterministic or gaussian config.weight_init_std = 0.001 config.weight_initializer = "Normal" config.nonlinearity = "relu" config.optimizer = "Adam" config.learning_rate = 0.0001 config.momentum = 0.1 config.gradient_clipping = 5 config.weight_decay = 0 # x = decoder(y, z) decoder = Sequential(weight_initializer=config.weight_initializer, weight_init_std=config.weight_init_std) decoder.add(Merge(num_inputs=2, out_size=1000, nobias=True)) decoder.add(Activation(config.nonlinearity)) decoder.add(Linear(None, 1000)) decoder.add(Activation(config.nonlinearity)) decoder.add(Linear(None, 1000)) decoder.add(Activation(config.nonlinearity)) decoder.add(Linear(None, config.ndim_x)) decoder.add(sigmoid()) discriminator_z = Sequential(weight_initializer=config.weight_initializer, weight_init_std=config.weight_init_std) discriminator_z.add(gaussian_noise(std=0.3)) discriminator_z.add(Linear(config.ndim_z, 1000)) discriminator_z.add(Activation(config.nonlinearity)) discriminator_z.add(Linear(None, 1000)) discriminator_z.add(Activation(config.nonlinearity))