Exemple #1
0
# weight initialization
init_norm = Gaussian(loc=0.0, scale=0.01)

# setup model layers
relu = Rectlin()
conv_params = {'strides': 1,
               'padding': 1,
               'init': Xavier(local=True),
               'bias': Constant(0),
               'activation': relu}
               
vgg_layers = []

# set up 3x3 conv stacks with different number of filters
vgg_layers.append(Conv((7, 7, 32), **conv_params))
vgg_layers.append(Pooling(2, strides=2))
vgg_layers.append(Conv((3, 3, 64), **conv_params))
vgg_layers.append(Conv((3, 3, 128), **conv_params))
vgg_layers.append(Pooling(2, strides=2))
vgg_layers.append(Conv((3, 3, 256), **conv_params))
vgg_layers.append(Conv((3, 3, 256), **conv_params))
vgg_layers.append(Conv((3, 3, 256), **conv_params))
vgg_layers.append(Pooling(2, strides=2))
vgg_layers.append(Dropout(keep=0.5))

vgg_layers.append(Linear(nout=4, init=Gaussian(loc=0.0, scale=0.01)))

model = Model(layers=vgg_layers)

# cost = GeneralizedCost(costfunc=CrossEntropyBinary())
cost = GeneralizedCost(costfunc=SumSquared())
    Conv((3, 3, 256), init=init_uni, activation=relu, strides=2,
         padding=1))  # 23->12

layers.append(
    Conv((3, 3, 384), init=init_uni, activation=relu, strides=1, padding=1))
layers.append(Conv((1, 1, 384), init=init_uni, activation=relu, strides=1))
layers.append(
    Conv((3, 3, 384), init=init_uni, activation=relu, strides=2,
         padding=1))  # 12->6

layers.append(Dropout(keep=0.5))
layers.append(
    Conv((3, 3, 1024), init=init_uni, activation=relu, strides=1, padding=1))
layers.append(Conv((1, 1, 1024), init=init_uni, activation=relu, strides=1))
layers.append(Conv((1, 1, 1000), init=init_uni, activation=relu, strides=1))
layers.append(Pooling(6, op='avg'))

layers.append(Activation(Softmax()))

cost = GeneralizedCost(costfunc=CrossEntropyMulti())

mlp = Model(layers=layers)

if args.model_file:
    import os
    assert os.path.exists(args.model_file), '%s not found' % args.model_file
    mlp.load_params(args.model_file)

# configure callbacks
callbacks = Callbacks(mlp, train, eval_set=test, **args.callback_args)
if args.deconv:
Exemple #3
0
                    lshape=(3, 32, 32),
                    name='test')

init_uni = Uniform(low=-0.1, high=0.1)
if args.datatype in [np.float32, np.float64]:
    opt_gdm = GradientDescentMomentum(learning_rate=0.01,
                                      momentum_coef=0.9,
                                      stochastic_round=args.rounding)
elif args.datatype in [np.float16]:
    opt_gdm = GradientDescentMomentum(learning_rate=0.01 / cost_scale,
                                      momentum_coef=0.9,
                                      stochastic_round=args.rounding)

layers = [
    Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=True),
    Pooling((2, 2)),
    Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=True),
    Pooling((2, 2)),
    Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=True),
    Affine(nout=10, init=init_uni, activation=Softmax())
]

if args.datatype in [np.float32, np.float64]:
    cost = GeneralizedCost(costfunc=CrossEntropyMulti())
elif args.datatype in [np.float16]:
    cost = GeneralizedCost(costfunc=CrossEntropyMulti(scale=cost_scale))

model = Model(layers=layers)

# configure callbacks
callbacks = Callbacks(model, eval_set=test, **args.callback_args)
train_set_x, train_set_y, valid_set_x, valid_set_y = load_data()
train_set = ArrayIterator(train_set_x,
                          train_set_y,
                          nclass=10,
                          lshape=(1, 28, 28))
valid_set = ArrayIterator(valid_set_x,
                          valid_set_y,
                          nclass=10,
                          lshape=(1, 28, 28))
init_uni = Gaussian(loc=0.0, scale=0.01)

# setup model layers
layers = [
    Conv(fshape=(5, 5, 32), init=init_uni, activation=Rectlin()),
    Pooling(fshape=2, strides=2),
    Conv(fshape=(5, 5, 32), init=init_uni, activation=Rectlin()),
    Pooling(fshape=2, strides=2),
    Dropout(),
    Affine(nout=500, init=init_uni, activation=Rectlin()),
    Dropout(),
    Affine(nout=10, init=init_uni, activation=Softmax())
]

# setup cost function as CrossEntropy
cost = GeneralizedCost(costfunc=CrossEntropyBinary())

# setup optimizer
optimizer = GradientDescentMomentum(0.1,
                                    momentum_coef=0.9,
                                    stochastic_round=args.rounding)
                strides=2)

layers = [
    Dropout(keep=.8),
    Conv((3, 3, 96), **convp1),
    Conv((3, 3, 96), **convp1),
    Conv((3, 3, 96), **convp1s2),
    Dropout(keep=.5),
    Conv((3, 3, 192), **convp1),
    Conv((3, 3, 192), **convp1),
    Conv((3, 3, 192), **convp1s2),
    Dropout(keep=.5),
    Conv((3, 3, 192), **convp1),
    Conv((1, 1, 192), **conv),
    Conv((1, 1, 16), **conv),
    Pooling(8, op="avg"),
    Activation(Softmax())
]

cost = GeneralizedCost(costfunc=CrossEntropyMulti())

mlp = Model(layers=layers)

if args.model_file:
    import os
    assert os.path.exists(args.model_file), '%s not found' % args.model_file
    mlp.load_weights(args.model_file)

# configure callbacks
callbacks = Callbacks(mlp, train_set, eval_set=valid_set, **args.callback_args)
    module = [MergeSum([mainpath, sidepath]), Activation(Rectlin())]
    return module


# Structure of the deep residual part of the network:
# args.depth modules of 2 convolutional layers each at feature map depths of 16, 32, 64
nfms = [2**(stage + 4) for stage in sorted(range(3) * args.depth)]
strides = [1] + [
    1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])
]

# Now construct the network
layers = [Conv(**conv_params(3, 16))]
for nfm, stride in zip(nfms, strides):
    layers.append(module_factory(nfm, stride))
layers.append(Pooling('all', op='avg'))
layers.append(
    Affine(10,
           init=Kaiming(local=False),
           batch_norm=True,
           activation=Softmax()))

model = Model(layers=layers)
opt = GradientDescentMomentum(0.1,
                              0.9,
                              wdecay=0.0001,
                              schedule=Schedule([90, 135], 0.1))

# configure callbacks
valmetric = Misclassification()
callbacks = Callbacks(model,
Exemple #7
0
                  repo_dir=data_dir,
                  **common)
test = DataLoader(set_name=test_set,
                  media_params=test_params,
                  index_file=test_idx,
                  repo_dir=test_dir,
                  **common)
gauss = Gaussian(scale=0.01)
glorot = GlorotUniform()
tiny = dict(str_h=1, str_w=1)
small = dict(str_h=1, str_w=2)
big = dict(str_h=1, str_w=4)
common = dict(batch_norm=True, activation=Rectlin())
layers = [
    Conv((3, 5, 64), init=gauss, activation=Rectlin(), strides=big),
    Pooling(2, strides=2),
    Conv((3, 3, 128), init=gauss, strides=small, **common),
    Pooling(2, strides=2),
    Conv((3, 3, 256), init=gauss, strides=small, **common),
    Conv((2, 2, 512), init=gauss, strides=tiny, **common),
    DeepBiRNN(128, init=glorot, reset_cells=True, depth=3, **common),
    RecurrentMean(),
    Affine(nout=2, init=gauss, activation=Softmax())
]

model = Model(layers=layers)
opt = Adagrad(learning_rate=0.001)
callbacks = Callbacks(model, eval_set=test, **args.callback_args)
cost = GeneralizedCost(costfunc=CrossEntropyBinary())

model.fit(tain,