Ejemplo n.º 1
0
def module_factory(nfm, stride=1):
    mainpath = [
        Conv(**conv_params(3, nfm, stride=stride)),
        Conv(**conv_params(3, nfm, relu=False))
    ]
    sidepath = [SkipNode() if stride == 1 else Conv(**id_params(nfm))]

    module = [MergeSum([mainpath, sidepath]), Activation(Rectlin())]
    return module
Ejemplo n.º 2
0
def module_factory(nfm, stride=1):
    projection = None if stride == 1 else IdentityInit()
    module = [
        Conv(**conv_params(3, nfm, stride=stride)),
        Conv(**conv_params(3, nfm, relu=False))
    ]
    module = module if args.network == 'plain' else [
        ResidualModule(module, projection)
    ]
    module.append(Activation(Rectlin()))
    return module
Ejemplo n.º 3
0
def gen_model(num_channels, height, width):
    assert NervanaObject.be is not None, 'need to generate a backend before using this function'

    init_uni = Kaiming()

    # we have 1 issue, they have bias layers we don't allow batchnorm and biases
    conv_common = dict(padding=1, init=init_uni, activation=Rectlin(), batch_norm=True)

    # set up the layers
    layers = []

    # need to store a ref to the pooling layers to pass
    # to the upsampling layers to get the argmax indicies
    # for upsampling, this stack holds the pooling layer refs
    pool_layers = []

    # first loop generates the encoder layers
    nchan = [64, 128, 256, 512, 512]
    for ind in range(len(nchan)):
        nchanu = nchan[ind]
        lrng = 2 if ind <= 1 else 3
        for lind in range(lrng):
            nm = 'conv%d_%d' % (ind+1, lind+1)
            layers.append(Conv((3, 3, nchanu), strides=1, name=nm, **conv_common))

        layers.append(Pooling(2, strides=2, name='conv%d_pool' % ind))
        pool_layers.append(layers[-1])
        if ind >= 2:
            layers.append(Dropout(keep=0.5, name='drop%d' % (ind+1)))

    # this loop generates the decoder layers
    for ind in range(len(nchan)-1,-1,-1):
        nchanu = nchan[ind]
        lrng = 2 if ind <= 1 else 3
        # upsampling layers need a ref to the corresponding pooling layer
        # to access the argmx indices for upsampling
        layers.append(Upsampling(2, pool_layers.pop(), strides=2, padding=0,
                      name='conv%d_unpool' % ind))
        for lind in range(lrng):
            nm = 'deconv%d_%d' % (ind+1, lind+1)
            if ind < 4 and lind == lrng-1:
                nchanu = nchan[ind]/2
            layers.append(Conv((3, 3, nchanu), strides=1, name=nm, **conv_common))
            if ind == 0:
                break
        if ind >= 2:
            layers.append(Dropout(keep=0.5, name='drop%d' % (ind+1)))

    # last conv layer outputs 12 channels, 1 for each output class
    # with a pixelwise softmax over the channels
    act_last = PixelwiseSoftmax(num_channels, height, width, name="PixelwiseSoftmax")
    conv_last = dict(padding=1, init=init_uni, activation=act_last, batch_norm=False)
    layers.append(Conv((3, 3, num_channels), strides=1, name='deconv_out', **conv_last))
    return layers
Ejemplo n.º 4
0
 def _createLayers(self, num_actions):
   # create network
   init_xavier_conv = Xavier(local=True)
   init_xavier_affine = Xavier(local=False)
   # init_uniform_conv = Uniform(low=-.01, high=.01)
   # init_uniform_affine = Uniform(low=-.01, high=.01)
   layers = []
   # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity.
   # layers.append(Conv((8, 8, 32), strides=4, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm))
   layers.append(Conv((5, 5, 32), strides=2, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm))
   # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity.
   # layers.append(Conv((4, 4, 64), strides=2, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm))
   layers.append(Conv((5, 5, 32), strides=2, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm))
   # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier.
   # layers.append(Conv((3, 3, 64), strides=1, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm))
   # The final hidden layer is fully-connected and consists of 512 rectifier units.
   layers.append(Affine(nout=512, init=init_xavier_affine, activation=Rectlin(), batch_norm=self.batch_norm))
   # The output layer is a fully-connected linear layer with a single output for each valid action.
   layers.append(Affine(nout=num_actions, init = init_xavier_affine))
   return layers
Ejemplo n.º 5
0
def create_network():
    init = GlorotUniform()
    layers = [
        Conv((3, 3, 128),
             init=init,
             activation=Rectlin(),
             strides=dict(str_h=1, str_w=2)),
        Conv((3, 3, 256), init=init, batch_norm=True, activation=Rectlin()),
        Pooling(2, strides=2),
        Conv((2, 2, 512), init=init, batch_norm=True, activation=Rectlin()),
        DeepBiRNN(256,
                  init=init,
                  activation=Rectlin(),
                  reset_cells=True,
                  depth=3),
        RecurrentLast(),
        Affine(32, init=init, batch_norm=True, activation=Rectlin()),
        Affine(nout=2, init=init, activation=Softmax())
    ]

    return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyBinary())
Ejemplo n.º 6
0
def fit_model(train_set, val_set, num_epochs=50):
    relu = Rectlin()
    conv_params = {
        'strides': 1,
        'padding': 1,
        'init': Xavier(local=True),  # Xavier sqrt(3)/num_inputs [CHECK THIS]
        'bias': Constant(0),
        'activation': relu
    }

    layers = []
    layers.append(Conv((3, 3, 128), **conv_params))  # 3x3 kernel * 128 nodes
    layers.append(Pooling(2))
    layers.append(Conv((3, 3, 128), **conv_params))
    layers.append(Pooling(2))  # Highest value from 2x2 window.
    layers.append(Conv((3, 3, 128), **conv_params))
    layers.append(
        Dropout(keep=0.5)
    )  # Flattens Convolution into a flat array, with probability 0.5 sets activation values to 0
    layers.append(
        Affine(nout=128,
               init=GlorotUniform(),
               bias=Constant(0),
               activation=relu)
    )  # 1 value per conv kernel - Linear Combination of layers
    layers.append(Dropout(keep=0.5))
    layers.append(
        Affine(nout=2,
               init=GlorotUniform(),
               bias=Constant(0),
               activation=Softmax(),
               name="class_layer"))

    # initialize model object
    cnn = Model(layers=layers)
    cost = GeneralizedCost(costfunc=CrossEntropyMulti())

    optimizer = Adam()

    # callbacks = Callbacks(cnn)
    #    out_fname = 'yarin_fdl_out_data.h5'
    callbacks = Callbacks(cnn, eval_set=val_set,
                          eval_freq=1)  # , output_file=out_fname

    cnn.fit(train_set,
            optimizer=optimizer,
            num_epochs=num_epochs,
            cost=cost,
            callbacks=callbacks)

    return cnn
Ejemplo n.º 7
0
def test_model_N_S_setter(backend_default):

    # weight initialization
    init = Constant(0.08)

    # model initialization
    layers = [
        Recurrent(150, init, activation=Logistic()),
        Affine(100, init, bias=init, activation=Rectlin())
    ]

    model = Model(layers=layers)
    model.set_batch_size(20)
    model.set_seq_len(10)
    def build(self):
        """
        Build the model's layers
        """
        first_layer_dens = 64
        second_layer_dens = 64
        output_layer_dens = 2
        # setup weight initialization function
        init_norm = Gaussian(scale=0.01)
        # setup model layers
        layers = [
            Affine(nout=first_layer_dens, init=init_norm,
                   activation=Rectlin()),
            Affine(nout=second_layer_dens,
                   init=init_norm,
                   activation=Rectlin()),
            Affine(nout=output_layer_dens,
                   init=init_norm,
                   activation=Logistic(shortcut=True))
        ]

        # initialize model object
        self.model = Model(layers=layers)
Ejemplo n.º 9
0
def create_network():
    # weight initialization
    g1 = Gaussian(scale=0.01)
    g5 = Gaussian(scale=0.005)
    c0 = Constant(0)
    c1 = Constant(1)

    # model initialization
    padding = {'pad_d': 1, 'pad_h': 1, 'pad_w': 1}
    strides = {'str_d': 2, 'str_h': 2, 'str_w': 2}
    layers = [
        Conv((3, 3, 3, 64),
             padding=padding,
             init=g1,
             bias=c0,
             activation=Rectlin()),
        Pooling((1, 2, 2), strides={
            'str_d': 1,
            'str_h': 2,
            'str_w': 2
        }),
        Conv((3, 3, 3, 128),
             padding=padding,
             init=g1,
             bias=c1,
             activation=Rectlin()),
        Pooling((2, 2, 2), strides=strides),
        Conv((3, 3, 3, 256),
             padding=padding,
             init=g1,
             bias=c1,
             activation=Rectlin()),
        Pooling((2, 2, 2), strides=strides),
        Conv((3, 3, 3, 256),
             padding=padding,
             init=g1,
             bias=c1,
             activation=Rectlin()),
        Pooling((2, 2, 2), strides=strides),
        Conv((3, 3, 3, 256),
             padding=padding,
             init=g1,
             bias=c1,
             activation=Rectlin()),
        Pooling((2, 2, 2), strides=strides),
        Affine(nout=2048, init=g5, bias=c1, activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(nout=2048, init=g5, bias=c1, activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(nout=101, init=g1, bias=c0, activation=Softmax())
    ]
    return Model(layers=layers)
Ejemplo n.º 10
0
 def layers(self):
     init_uni = Uniform(low=-0.1, high=0.1)
     bn = False
     return [
         Conv((5, 5, 16),
              init=init_uni,
              activation=Rectlin(),
              batch_norm=bn),
         Pooling((2, 2)),
         Conv((5, 5, 32),
              init=init_uni,
              activation=Rectlin(),
              batch_norm=bn),
         Pooling((2, 2)),
         Affine(nout=500,
                init=init_uni,
                activation=Rectlin(),
                batch_norm=bn),
         Affine(nout=self.noutputs,
                init=init_uni,
                bias=Constant(0),
                activation=Softmax() if self.use_softmax else Logistic(
                    shortcut=True))
     ]
Ejemplo n.º 11
0
def constuct_network():
    """
	Constructs the layers of the AlexNet architecture.
	"""
    layers = [
        Conv((11, 11, 64),
             init=Gaussian(scale=0.01),
             bias=Constant(0),
             activation=Rectlin(),
             padding=3,
             strides=4),
        Pooling(3, strides=2),
        Conv((5, 5, 192),
             init=Gaussian(scale=0.01),
             bias=Constant(1),
             activation=Rectlin(),
             padding=2),
        Pooling(3, strides=2),
        Conv((3, 3, 384),
             init=Gaussian(scale=0.03),
             bias=Constant(0),
             activation=Rectlin(),
             padding=1),
        Conv((3, 3, 256),
             init=Gaussian(scale=0.03),
             bias=Constant(1),
             activation=Rectlin(),
             padding=1),
        Conv((3, 3, 256),
             init=Gaussian(scale=0.03),
             bias=Constant(1),
             activation=Rectlin(),
             padding=1),
        Pooling(3, strides=2),
        Affine(nout=4096,
               init=Gaussian(scale=0.01),
               bias=Constant(1),
               activation=Rectlin()),
        DropoutBinary(keep=0.5),
        Affine(nout=4096,
               init=Gaussian(scale=0.01),
               bias=Constant(1),
               activation=Rectlin()),
        DropoutBinary(keep=0.5),
        Affine(nout=101,
               init=Gaussian(scale=0.01),
               bias=Constant(-7),
               activation=Softmax())
    ]
    return Model(layers=layers)
Ejemplo n.º 12
0
def module_factory(nfm, bottleneck=True, stride=1):
    nfm_out = nfm * 4 if bottleneck else nfm
    use_skip = True if stride == 1 else False
    stride = abs(stride)
    sidepath = [SkipNode() if use_skip else Conv(
        **conv_params(1, nfm_out, stride, False))]

    if bottleneck:
        mainpath = [Conv(**conv_params(1, nfm, stride)),
                    Conv(**conv_params(3, nfm)),
                    Conv(**conv_params(1, nfm_out, relu=False))]
    else:
        mainpath = [Conv(**conv_params(3, nfm, stride)),
                    Conv(**conv_params(3, nfm, relu=False))]
    return [MergeSum([mainpath, sidepath]),
            Activation(Rectlin())]
Ejemplo n.º 13
0
    def __init__(self):
        self.in_shape = (1, 32, 32)

        init_norm = Gaussian(loc=0.0, scale=0.01)

        normrelu = dict(init=init_norm, activation=Rectlin())
        normsigm = dict(init=init_norm, activation=Logistic(shortcut=True))
        normsoft = dict(init=init_norm, activation=Softmax())

        # setup model layers
        b1 = BranchNode(name="b1")
        b2 = BranchNode(name="b2")

        p1 = [
            Affine(nout=100, name="main1", **normrelu),
            b1,
            Affine(nout=32, name="main2", **normrelu),
            Affine(nout=160, name="main3", **normrelu),
            b2,
            Affine(nout=32, name="main2", **normrelu),
            # make next layer big to check sizing
            Affine(nout=320, name="main2", **normrelu),
            Affine(nout=10, name="main4", **normsoft)
        ]

        p2 = [
            b1,
            Affine(nout=16, name="branch1_1", **normrelu),
            Affine(nout=10, name="branch1_2", **normsigm)
        ]

        p3 = [
            b2,
            Affine(nout=16, name="branch2_1", **normrelu),
            Affine(nout=10, name="branch2_2", **normsigm)
        ]

        self.cost = Multicost(costs=[
            GeneralizedCost(costfunc=CrossEntropyMulti()),
            GeneralizedCost(costfunc=CrossEntropyBinary()),
            GeneralizedCost(costfunc=CrossEntropyBinary())
        ],
                              weights=[1, 0., 0.])

        self.layers = SingleOutputTree([p1, p2, p3], alphas=[1, .2, .2])
        self.model = Model(layers=self.layers)
        self.model.initialize(self.in_shape, cost=self.cost)
Ejemplo n.º 14
0
def create_network():
    layers = [
        Conv((11, 11, 64),
             init=Gaussian(scale=0.01),
             bias=Constant(0),
             activation=Rectlin(),
             padding=3,
             strides=4),
        Pooling(3, strides=2),
        Conv((5, 5, 192),
             init=Gaussian(scale=0.01),
             bias=Constant(1),
             activation=Rectlin(),
             padding=2),
        Pooling(3, strides=2),
        Conv((3, 3, 384),
             init=Gaussian(scale=0.03),
             bias=Constant(0),
             activation=Rectlin(),
             padding=1),
        Conv((3, 3, 256),
             init=Gaussian(scale=0.03),
             bias=Constant(1),
             activation=Rectlin(),
             padding=1),
        Conv((3, 3, 256),
             init=Gaussian(scale=0.03),
             bias=Constant(1),
             activation=Rectlin(),
             padding=1),
        Pooling(3, strides=2),
        Affine(nout=4096,
               init=Gaussian(scale=0.01),
               bias=Constant(1),
               activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(nout=4096,
               init=Gaussian(scale=0.01),
               bias=Constant(1),
               activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(nout=1000,
               init=Gaussian(scale=0.01),
               bias=Constant(-7),
               activation=Softmax()),
    ]

    return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
Ejemplo n.º 15
0
 def conv_params(fsize,
                 nfm,
                 padding='SAME',
                 strides=1,
                 activation=Rectlin(),
                 batch_norm=True):
     fsize = fsize if isinstance(fsize, tuple) else (fsize, fsize)
     fshape = fsize + (nfm, )
     padding = {
         'pad_h': (fsize[0] // 2 if padding == 'SAME' else 0),
         'pad_w': (fsize[1] // 2 if padding == 'SAME' else 0),
         'pad_d': 0
     }
     strides = {'str_h': strides, 'str_w': strides, 'str_d': 1}
     return dict(fshape=fshape,
                 strides=strides,
                 activation=activation,
                 padding=padding,
                 batch_norm=batch_norm,
                 init=Kaiming(local=True))
Ejemplo n.º 16
0
    def __init__(self):
        self.in_shape = (3, 32, 32)
        relu = Rectlin()
        init_use = Constant(0)
        conv = dict(init=init_use, batch_norm=False, activation=relu)
        convp1 = dict(init=init_use,
                      batch_norm=False,
                      bias=init_use,
                      activation=relu,
                      padding=1)
        convp1s2 = dict(init=init_use,
                        batch_norm=False,
                        bias=init_use,
                        padding=1,
                        strides=2)

        layers = [
            Dropout(keep=.8),
            Conv((3, 3, 96), **convp1),
            Conv((3, 3, 96), **convp1),
            Conv((3, 3, 96), **convp1s2),
            Dropout(keep=.5),
            Conv((3, 3, 192), **convp1),
            Conv((3, 3, 192), **convp1),
            Conv((3, 3, 192), **convp1s2),
            Dropout(keep=.5),
            Conv((3, 3, 192), **convp1),
            Conv((1, 1, 192), **conv),
            Conv((1, 1, 16), **conv),
            Pooling(8, op="avg"),
            Activation(Softmax())
        ]
        self.layers = layers
        model = Model(layers=layers)
        cost = GeneralizedCost(costfunc=CrossEntropyMulti())
        model.initialize(self.in_shape, cost=cost)
        self.model = model
Ejemplo n.º 17
0
def test_model_get_outputs(backend_default, data):
    dataset = MNIST(path=data)
    train_set = dataset.train_iter

    init_norm = Gaussian(loc=0.0, scale=0.1)

    layers = [
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()),
        Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))
    ]
    mlp = Model(layers=layers)
    out_list = []
    mlp.initialize(train_set)
    for x, t in train_set:
        x = mlp.fprop(x)
        out_list.append(x.get().T.copy())
    ref_output = np.vstack(out_list)

    train_set.reset()
    output = mlp.get_outputs(train_set)
    assert np.allclose(output, ref_output[:output.shape[0], :])

    # test model benchmark inference
    mlp.benchmark(train_set, inference=True, niterations=5)