if __name__ == '__main__': freeze_support() data = dataset.cifar10_dataset.load() num_passes = 30 initializers = [[], [], [], [], [], [], []] for i in [8*16*16, 16*8*8, 32*4*4]: initializers[0].append(weight_initializer.Fill(0)), initializers[1].append(weight_initializer.Fill(1e-3)), initializers[2].append(weight_initializer.Fill(1)), initializers[3].append(weight_initializer.RandomUniform(-1, 1)) initializers[4].append(weight_initializer.RandomUniform(-1/np.sqrt(i), 1/np.sqrt( i))) initializers[5].append(weight_initializer.RandomNormal()) initializers[6].append(weight_initializer.RandomNormal(1/np.sqrt(i))) labels = [ 'Fill(0)', 'Fill(0.001)', 'Fill(1)', 'Uniform(low=-1, high=1)', 'Uniform(low=-1/sqrt(fan_out), high=1/sqrt(fan_out))', 'Normal(sigma=1, mu=0)', 'Normal(sigma=1/sqrt(fan_out), mu=0)', ] statistics = [] for initializer in initializers: layers = [
# # weight_initializer.RandomUniform(-1/np.sqrt(num_hidden_units), 1/np.sqrt(num_hidden_units)), # # weight_initializer.RandomUniform(-1/num_hidden_units, 1/num_hidden_units), # # weight_initializer.RandomUniform(-100, 100), # # weight_initializer.RandomNormal(1, 0), # weight_initializer.RandomNormal(1/np.sqrt(num_hidden_units)), # weight_initializer.RandomNormal(3/np.sqrt(num_hidden_units)), # weight_initializer.RandomNormal(1/(3 * np.sqrt(num_hidden_units))), # ] initializers = ['Normal(1, 0)', 'Normal(1/sqrt(fan_out), 0)'] model_layers = [ [ MaxPool(size=2, stride=2), Convolution((16, 3, 3, 3), stride=1, padding=1, dropout_rate=0, activation=activation.tanh, weight_initializer=weight_initializer.Fill(0), fb_weight_initializer=weight_initializer.RandomNormal()), MaxPool(size=2, stride=2), Convolution((16, 16, 3, 3), stride=1, padding=1, dropout_rate=0, activation=activation.tanh, weight_initializer=weight_initializer.Fill(0), fb_weight_initializer=weight_initializer.RandomNormal()), MaxPool(size=2, stride=2), Convolution((32, 16, 3, 3), stride=1, padding=1, dropout_rate=0, activation=activation.tanh, weight_initializer=weight_initializer.Fill(0), fb_weight_initializer=weight_initializer.RandomNormal()), MaxPool(size=2, stride=2), ConvToFullyConnected(), FullyConnected(size=64, activation=activation.tanh), FullyConnected(size=10, activation=None, last_layer=True) ], [ MaxPool(size=2, stride=2), Convolution((16, 3, 3, 3), stride=1, padding=1, dropout_rate=0, activation=activation.tanh, weight_initializer=weight_initializer.Fill(0), fb_weight_initializer=weight_initializer.RandomNormal(1/np.sqrt(16*16*16))),
num_hidden_units = 500 num_hidden_layers = 5 num_passes = 30 # data = dataset.mnist_dataset.load('dataset/mnist') data = dataset.cifar10_dataset.load() initializers = [ weight_initializer.Fill(0), weight_initializer.Fill(1e-3), weight_initializer.Fill(1), weight_initializer.RandomUniform(-1, 1), weight_initializer.RandomUniform(-1/np.sqrt(num_hidden_units), 1/np.sqrt(num_hidden_units)), weight_initializer.RandomUniform(-1/num_hidden_units, 1/num_hidden_units), weight_initializer.RandomNormal(1, 0), weight_initializer.RandomNormal(1 / np.sqrt(num_hidden_units)) ] labels = [ 'Fill(0)', 'Fill(0.001)', 'Fill(1)', 'Uniform(low=-1, high=1)', 'Uniform(low=-1/sqrt(fan_out), high=1/sqrt(fan_out))', 'Uniform(low=-1/fan_out, high=1/fan_out)', 'Normal(sigma=1, mu=0)', 'Normal(sigma=1/sqrt(fan_out), mu=0)', ] statistics = []
initializers = ['Normal(1/sqrt(fan_in), 0)', 'Normal(1/sqrt(fan_out), 0)'] train_methods = ['dfa', 'bp'] statistics = [] labels = [] for sizes, initializer in zip([fan_in, fan_out], initializers): for train_method in train_methods: layers = [ConvToFullyConnected()] for i in range(len(sizes)): layers.append( FullyConnected( size=fan_out[i], activation=activation.tanh, weight_initializer=weight_initializer.RandomNormal( 1 / np.sqrt(sizes[i])))), layers.append( FullyConnected(size=10, activation=None, last_layer=True)) model = Model( layers=layers, num_classes=10, optimizer=GDMomentumOptimizer(lr=1e-3, mu=0.9), regularization=0.001, # lr_decay=0.5, # lr_decay_interval=100 ) print("\nRun training:\n------------------------------------")