def create_network(): layers = [ DataTransform(transform=Normalizer(divisor=128.)), Conv((11, 11, 96), init=Kaiming(), activation=Rectlin(), strides=4, padding=1), Conv((1, 1, 96), init=Kaiming(), activation=Rectlin(), strides=1), Conv((3, 3, 96), init=Kaiming(), activation=Rectlin(), strides=2, padding=1), # 54->2, Conv((5, 5, 256), init=Kaiming(), activation=Rectlin(), strides=1), # 27->2, Conv((1, 1, 256), init=Kaiming(), activation=Rectlin(), strides=1), Conv((3, 3, 256), init=Kaiming(), activation=Rectlin(), strides=2, padding=1), # 23->1, Conv((3, 3, 384), init=Kaiming(), activation=Rectlin(), strides=1, padding=1), Conv((1, 1, 384), init=Kaiming(), activation=Rectlin(), strides=1), Conv((3, 3, 384), init=Kaiming(), activation=Rectlin(), strides=2, padding=1), # 12->, Dropout(keep=0.5), Conv((3, 3, 1024), init=Kaiming(), activation=Rectlin(), strides=1, padding=1), Conv((1, 1, 1024), init=Kaiming(), activation=Rectlin(), strides=1), Conv((1, 1, 1000), init=Kaiming(), activation=Rectlin(), strides=1), Pooling(6, op='avg'), Activation(Softmax()) ] return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
test.init_batch_provider() relu = Rectlin() init_uni = GlorotUniform() # The parameters below are straight out of [Springenberg2014] opt_gdm = GradientDescentMomentum(learning_rate=0.01, schedule=Schedule(step_config=[10], change=0.1), momentum_coef=0.9, wdecay=.0005) # set up model layers layers = [] layers.append(DataTransform(transform=Normalizer(divisor=128.))) layers.append(Conv((11, 11, 96), init=init_uni, activation=relu, strides=4, padding=1)) layers.append(Conv((1, 1, 96), init=init_uni, activation=relu, strides=1)) layers.append(Conv((3, 3, 96), init=init_uni, activation=relu, strides=2, padding=1)) # 54->27 layers.append(Conv((5, 5, 256), init=init_uni, activation=relu, strides=1)) # 27->23 layers.append(Conv((1, 1, 256), init=init_uni, activation=relu, strides=1)) layers.append(Conv((3, 3, 256), init=init_uni, activation=relu, strides=2, padding=1)) # 23->12 layers.append(Conv((3, 3, 384), init=init_uni, activation=relu, strides=1, padding=1)) layers.append(Conv((1, 1, 384), init=init_uni, activation=relu, strides=1)) layers.append(Conv((3, 3, 384), init=init_uni, activation=relu, strides=2, padding=1)) # 12->6 layers.append(Dropout(keep=0.5)) layers.append(Conv((3, 3, 1024), init=init_uni, activation=relu, strides=1, padding=1))