Esempio n. 1
0
def create_frcn_model(frcn_fine_tune=False):

    b1 = BranchNode(name="b1")

    imagenet_layers = add_vgg_layers()
    HW = (7, 7)

    frcn_layers = [
        RoiPooling(layers=imagenet_layers, HW=HW,
                   bprop_enabled=frcn_fine_tune),
        Affine(nout=4096,
               init=Gaussian(scale=0.005),
               bias=Constant(.1),
               activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(nout=4096,
               init=Gaussian(scale=0.005),
               bias=Constant(.1),
               activation=Rectlin()),
        Dropout(keep=0.5), b1,
        Affine(nout=21,
               init=Gaussian(scale=0.01),
               bias=Constant(0),
               activation=Softmax())
    ]
    bb_layers = [
        b1,
        Affine(nout=84,
               init=Gaussian(scale=0.001),
               bias=Constant(0),
               activation=Identity())
    ]

    return Model(layers=Tree([frcn_layers, bb_layers]))
Esempio n. 2
0
def create_network():
    # weight initialization
    g1 = Gaussian(scale=0.01)
    g5 = Gaussian(scale=0.005)
    c0 = Constant(0)
    c1 = Constant(1)

    # model initialization
    padding = {'pad_d': 1, 'pad_h': 1, 'pad_w': 1}
    strides = {'str_d': 2, 'str_h': 2, 'str_w': 2}
    layers = [
        Conv((3, 3, 3, 64), padding=padding, init=g1, bias=c0, activation=Rectlin()),
        Pooling((1, 2, 2), strides={'str_d': 1, 'str_h': 2, 'str_w': 2}),
        Conv((3, 3, 3, 128), padding=padding, init=g1, bias=c1, activation=Rectlin()),
        Pooling((2, 2, 2), strides=strides),
        Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()),
        Pooling((2, 2, 2), strides=strides),
        Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()),
        Pooling((2, 2, 2), strides=strides),
        Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()),
        Pooling((2, 2, 2), strides=strides),
        Affine(nout=2048, init=g5, bias=c1, activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(nout=2048, init=g5, bias=c1, activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(nout=101, init=g1, bias=c0, activation=Softmax())
    ]
    return Model(layers=layers)
Esempio n. 3
0
def test_multi_optimizer(backend_default):
    opt_gdm = GradientDescentMomentum(learning_rate=0.001,
                                      momentum_coef=0.9,
                                      wdecay=0.005)
    opt_ada = Adadelta()
    opt_adam = Adam()
    opt_rms = RMSProp()
    opt_rms_1 = RMSProp(gradient_clip_value=5)
    init_one = Gaussian(scale=0.01)

    l1 = Conv((11, 11, 64),
              strides=4,
              padding=3,
              init=init_one,
              bias=Constant(0),
              activation=Rectlin())
    l2 = Affine(nout=4096,
                init=init_one,
                bias=Constant(1),
                activation=Rectlin())
    l3 = LSTM(output_size=1000,
              init=init_one,
              activation=Logistic(),
              gate_activation=Tanh())
    l4 = GRU(output_size=100,
             init=init_one,
             activation=Logistic(),
             gate_activation=Tanh())
    layers = [l1, l2, l3, l4]
    layer_list = []
    for layer in layers:
        if isinstance(layer, list):
            layer_list.extend(layer)
        else:
            layer_list.append(layer)

    opt = MultiOptimizer({
        'default': opt_gdm,
        'Bias': opt_ada,
        'Convolution': opt_adam,
        'Linear': opt_rms,
        'LSTM': opt_rms_1,
        'GRU': opt_rms_1
    })

    map_list = opt._map_optimizers(layer_list)
    assert map_list[opt_adam][0].__class__.__name__ == 'Convolution'
    assert map_list[opt_ada][0].__class__.__name__ == 'Bias'
    assert map_list[opt_rms][0].__class__.__name__ == 'Linear'
    assert map_list[opt_gdm][0].__class__.__name__ == 'Activation'
    assert map_list[opt_rms_1][0].__class__.__name__ == 'LSTM'
    assert map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
Esempio n. 4
0
def fit_model(train_set, val_set, num_epochs=50):
    relu = Rectlin()
    conv_params = {
        'strides': 1,
        'padding': 1,
        'init': Xavier(local=True),  # Xavier sqrt(3)/num_inputs [CHECK THIS]
        'bias': Constant(0),
        'activation': relu
    }

    layers = []
    layers.append(Conv((3, 3, 128), **conv_params))  # 3x3 kernel * 128 nodes
    layers.append(Pooling(2))
    layers.append(Conv((3, 3, 128), **conv_params))
    layers.append(Pooling(2))  # Highest value from 2x2 window.
    layers.append(Conv((3, 3, 128), **conv_params))
    layers.append(
        Dropout(keep=0.5)
    )  # Flattens Convolution into a flat array, with probability 0.5 sets activation values to 0
    layers.append(
        Affine(nout=128,
               init=GlorotUniform(),
               bias=Constant(0),
               activation=relu)
    )  # 1 value per conv kernel - Linear Combination of layers
    layers.append(Dropout(keep=0.5))
    layers.append(
        Affine(nout=2,
               init=GlorotUniform(),
               bias=Constant(0),
               activation=Softmax(),
               name="class_layer"))

    # initialize model object
    cnn = Model(layers=layers)
    cost = GeneralizedCost(costfunc=CrossEntropyMulti())

    optimizer = Adam()

    # callbacks = Callbacks(cnn)
    #    out_fname = 'yarin_fdl_out_data.h5'
    callbacks = Callbacks(cnn, eval_set=val_set,
                          eval_freq=1)  # , output_file=out_fname

    cnn.fit(train_set,
            optimizer=optimizer,
            num_epochs=num_epochs,
            cost=cost,
            callbacks=callbacks)

    return cnn
Esempio n. 5
0
 def constructCNN(self):
     layers = []
     if self.network_type == "idsia":
         layers.append(
             Conv((3, 3, 100),
                  strides=1,
                  init=Kaiming(),
                  bias=Constant(0.0),
                  activation=Rectlin(),
                  name="Conv1"))
         layers.append(Pooling(2, op="max", strides=2, name="neon_pool1"))
         layers.append(
             Conv((4, 4, 150),
                  strides=1,
                  init=Kaiming(),
                  bias=Constant(0.0),
                  activation=Rectlin(),
                  name="Conv2"))
         layers.append(Pooling(2, op="max", strides=2, name="neon_pool2"))
         layers.append(
             Conv((3, 3, 250),
                  strides=1,
                  init=Kaiming(),
                  bias=Constant(0.0),
                  activation=Rectlin(),
                  name="Conv3"))
         layers.append(Pooling(2, op="max", strides=2, name="neon_pool3"))
         layers.append(
             Affine(nout=200,
                    init=Kaiming(local=False),
                    bias=Constant(0.0),
                    activation=Rectlin(),
                    name="neon_fc1"))
         layers.append(
             Affine(nout=self.class_num,
                    init=Kaiming(local=False),
                    bias=Constant(0.0),
                    activation=Softmax(),
                    name="neon_fc2"))
     elif self.network_type == "resnet-56":
         layers = resnet(9, self.class_num,
                         int(self.resize_size[0] / 4))  # 6*9 + 2 = 56
     elif self.network_type == "resnet-32":
         layers = resnet(5, self.class_num,
                         int(self.resize_size[0] / 4))  # 6*5 + 2 = 32
     elif self.network_type == "resnet-20":
         layers = resnet(3, self.class_num,
                         int(self.resize_size[0] / 4))  # 6*3 + 2 = 20
     return layers
Esempio n. 6
0
    def generate_leafs(self, layer):
        """
        Given a key to the ssd_config, generate the leafs
        """
        config = self.ssd_config[layer]

        leaf_params = {
            'strides': 1,
            'padding': 1,
            'init': Xavier(local=True),
            'bias': Constant(0)
        }

        # to match caffe layer's naming
        if config['normalize']:
            layer += '_norm'

        priorbox_args = self.get_priorbox_args(config)
        mbox_prior = PriorBox(**priorbox_args)
        num_priors = mbox_prior.num_priors_per_pixel

        loc_name = layer + '_mbox_loc'
        mbox_loc = Conv((3, 3, 4 * num_priors), name=loc_name, **leaf_params)

        conf_name = layer + '_mbox_conf'
        mbox_conf = Conv((3, 3, self.num_classes * num_priors),
                         name=conf_name,
                         **leaf_params)

        return {
            'mbox_prior': mbox_prior,
            'mbox_loc': mbox_loc,
            'mbox_conf': mbox_conf
        }
Esempio n. 7
0
 def layers(self):
     init_uni = Uniform(low=-0.1, high=0.1)
     bn = False
     return [
         DOG((5.0, 4.0, 3.0, 1.6), 1.8),
         Conv((5, 5, 16),
              init=init_uni,
              activation=Rectlin(),
              batch_norm=bn),
         Pooling((2, 2)),
         Conv((5, 5, 32),
              init=init_uni,
              activation=Rectlin(),
              batch_norm=bn),
         Pooling((2, 2)),
         Affine(nout=500,
                init=init_uni,
                activation=Rectlin(),
                batch_norm=bn),
         Affine(nout=self.noutputs,
                init=init_uni,
                bias=Constant(0),
                activation=Softmax() if self.use_softmax else Logistic(
                    shortcut=True))
     ]
Esempio n. 8
0
def main_branch(branch_nodes):
    return [
        Conv((7, 7, 64), padding=3, strides=2, name='conv1/7x7_s2', **common),
        Pooling(name="pool1/3x3_s2", **pool3s2p1),
        Conv((1, 1, 64), name='conv2/3x3_reduce', **common),
        Conv((3, 3, 192), name="conv2/3x3", **commonp1),
        Pooling(name="pool2/3x3_s2", **pool3s2p1),
        inception([(64, ), (96, 128), (16, 32), (32, )], name='inception_3a/'),
        inception([(128, ), (128, 192), (32, 96), (64, )],
                  name='inception_3b/'),
        Pooling(name='pool3/3x3_s2', **pool3s2p1),
        inception([(192, ), (96, 208), (16, 48), (64, )],
                  name='inception_4a/'), branch_nodes[0],
        inception([(160, ), (112, 224), (24, 64), (64, )],
                  name='inception_4b/'),
        inception([(128, ), (128, 256), (24, 64), (64, )],
                  name='inception_4c/'),
        inception([(112, ), (144, 288), (32, 64), (64, )],
                  name='inception_4d/'), branch_nodes[1],
        inception([(256, ), (160, 320), (32, 128), (128, )],
                  name='inception_4e/'),
        Pooling(name='pool4/3x3_s2', **pool3s2p1),
        inception([(256, ), (160, 320), (32, 128), (128, )],
                  name='inception_5a/'),
        inception([(384, ), (192, 384), (48, 128), (128, )],
                  name="inception_5b/"),
        Pooling(fshape=7, strides=1, op="avg", name='pool5/7x7_s1'),
        Affine(nout=1000,
               init=init1,
               activation=Softmax(),
               bias=Constant(0),
               name='loss3/classifier')
    ]
Esempio n. 9
0
def test_model_get_outputs_rnn(backend_default, data):

    data_path = load_ptb_test(path=data)
    data_set = Text(time_steps=50, path=data_path)

    # weight initialization
    init = Constant(0.08)

    # model initialization
    layers = [
        Recurrent(150, init, activation=Logistic()),
        Affine(len(data_set.vocab), init, bias=init, activation=Rectlin())
    ]

    model = Model(layers=layers)
    output = model.get_outputs(data_set)

    assert output.shape == (data_set.ndata, data_set.seq_length,
                            data_set.nclass)

    # since the init are all constant and model is un-trained:
    # along the feature dim, the values should be all the same
    assert np.allclose(output[0, 0], output[0, 0, 0], rtol=0, atol=1e-5)
    assert np.allclose(output[0, 1], output[0, 1, 0], rtol=0, atol=1e-5)

    # along the time dim, the values should be increasing:
    assert np.alltrue(output[0, 2] > output[0, 1])
    assert np.alltrue(output[0, 1] > output[0, 0])
Esempio n. 10
0
    def __init__(self):
        self.in_shape = [1024, (2538, 38)]

        init = Constant(0)
        image_path = Sequential(
            [Affine(20, init, bias=init),
             Affine(10, init, bias=init)])
        sent_path = Sequential([Affine(30, init, bias=init), Affine(10, init)])

        layers = [
            MergeMultistream(layers=[image_path, sent_path],
                             merge="recurrent"),
            Dropout(keep=0.5),
            LSTM(4,
                 init,
                 activation=Logistic(),
                 gate_activation=Tanh(),
                 reset_cells=True),
            Affine(20, init, bias=init, activation=Softmax())
        ]
        self.layers = layers
        self.cost = GeneralizedCostMask(CrossEntropyMulti())

        self.model = Model(layers=layers)
        self.model.initialize(self.in_shape, cost=self.cost)
Esempio n. 11
0
def test_multi_optimizer(backend_default_mkl):
    """
    A test for MultiOptimizer.
    """
    opt_gdm = GradientDescentMomentum(
        learning_rate=0.001, momentum_coef=0.9, wdecay=0.005)
    opt_ada = Adadelta()
    opt_adam = Adam()
    opt_rms = RMSProp()
    opt_rms_1 = RMSProp(gradient_clip_value=5)
    init_one = Gaussian(scale=0.01)

    l1 = Conv((11, 11, 64), strides=4, padding=3,
              init=init_one, bias=Constant(0), activation=Rectlin())
    l2 = Affine(nout=4096, init=init_one,
                bias=Constant(1), activation=Rectlin())
    l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh())
    l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh())
    layers = [l1, l2, l3, l4]
    layer_list = []
    for layer in layers:
        if isinstance(layer, list):
            layer_list.extend(layer)
        else:
            layer_list.append(layer)
    for l in layer_list:
        l.configure(in_obj=(16, 28, 28))
        l.allocate()
    # separate layer_list into two, the last two recurrent layers and the rest
    layer_list1, layer_list2 = layer_list[:-2], layer_list[-2:]
    opt = MultiOptimizer({'default': opt_gdm,
                          'Bias': opt_ada,
                          'Convolution': opt_adam,
                          'Convolution_bias': opt_adam,
                          'Linear': opt_rms,
                          'LSTM': opt_rms_1,
                          'GRU': opt_rms_1})
    layers_to_optimize1 = [l for l in layer_list1 if isinstance(l, ParameterLayer)]
    layers_to_optimize2 = [l for l in layer_list2 if isinstance(l, ParameterLayer)]
    opt.optimize(layers_to_optimize1, 0)
    assert opt.map_list[opt_adam][0].__class__.__name__ is 'Convolution_bias'
    assert opt.map_list[opt_rms][0].__class__.__name__ == 'Linear'
    opt.optimize(layers_to_optimize2, 0)
    assert opt.map_list[opt_rms_1][0].__class__.__name__ == 'LSTM'
    assert opt.map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
Esempio n. 12
0
def aux_branch(bnode):
    return [
        bnode,
        Pooling(fshape=5, strides=3, op="avg"),
        Conv((1, 1, 128), **common),
        Affine(nout=1024, init=init1, activation=relu, bias=bias),
        Dropout(keep=0.3),
        Affine(nout=1000, init=init1, activation=Softmax(), bias=Constant(0))
    ]
Esempio n. 13
0
def aux_branch(bnode, ind):
    # TODO put dropout back in
    nm = 'loss%d/' % ind
    return [bnode,
            Pooling(fshape=5, strides=3, op="avg", name=nm+'ave_pool'),
            Conv((1, 1, 128), name=nm+'conv', **common),
            Affine(nout=1024, init=init1, activation=relu, bias=bias, name=nm+'fc'),
            Dropout(keep=1.0, name=nm+'drop_fc'),
            Affine(nout=1000, init=init1, activation=Softmax(),
                   bias=Constant(0), name=nm+'classifier')]
Esempio n. 14
0
 def layers(self):
     return [
         Conv((7, 7, 96),
              init=Gaussian(scale=0.0001),
              bias=Constant(0),
              activation=Rectlin(),
              padding=3,
              strides=1),
         LRN(31, ascale=0.001, bpower=0.75),
         Pooling(3, strides=2, padding=1),
         Conv((5, 5, 256),
              init=Gaussian(scale=0.01),
              bias=Constant(0),
              activation=Rectlin(),
              padding=2,
              strides=1),
         LRN(31, ascale=0.001, bpower=0.75),
         Pooling(3, strides=2, padding=1),
         Conv((3, 3, 384),
              init=Gaussian(scale=0.01),
              bias=Constant(0),
              activation=Rectlin(),
              padding=1,
              strides=1),
         Conv((3, 3, 384),
              init=Gaussian(scale=0.01),
              bias=Constant(0),
              activation=Rectlin(),
              padding=1,
              strides=1),
         Conv((3, 3, 256),
              init=Gaussian(scale=0.01),
              bias=Constant(0),
              activation=Rectlin(),
              padding=1,
              strides=1),
         Pooling(3, strides=2, padding=1),
         Affine(nout=4096,
                init=Gaussian(scale=0.01),
                bias=Constant(0),
                activation=Identity()),
         Dropout(keep=0.5),
         Affine(nout=4096,
                init=Gaussian(scale=0.01),
                bias=Constant(0),
                activation=Identity()),
         Dropout(keep=0.5),
         Affine(nout=self.noutputs,
                init=Gaussian(scale=0.01),
                bias=Constant(0),
                activation=Softmax() if self.use_softmax else Logistic(
                    shortcut=True))
     ]
Esempio n. 15
0
def constuct_network():
    """
	Constructs the layers of our RCNN architecture. It is similar to AlexNet but simplified to only a 
	few convolutional layers and 3 LSTM layers.
	"""
    layers = [
        Conv((11, 11, 64),
             init=Gaussian(scale=0.01),
             bias=Constant(0),
             activation=Rectlin(),
             padding=3,
             strides=4),
        Pooling(3, strides=2),
        Conv((7, 7, 128),
             init=Gaussian(scale=0.01),
             bias=Constant(1),
             activation=Rectlin(),
             padding=2),
        Pooling(3, strides=2),
        Conv((5, 5, 256),
             init=Gaussian(scale=0.03),
             bias=Constant(0),
             activation=Rectlin(),
             padding=1),
        Conv((3, 3, 256),
             init=Gaussian(scale=0.03),
             bias=Constant(1),
             activation=Rectlin(),
             padding=1),
        Pooling(3, strides=2),
        Affine(nout=4096,
               init=Gaussian(scale=0.01),
               bias=Constant(1),
               activation=Rectlin()),
        DropoutBinary(keep=0.5),
        LSTM(512,
             init=Gaussian(scale=0.03),
             activation=Rectlin(),
             gate_activation=Tanh()),
        LSTM(512,
             init=Gaussian(scale=0.03),
             activation=Rectlin(),
             gate_activation=Tanh()),
        LSTM(512,
             init=Gaussian(scale=0.03),
             activation=Rectlin(),
             gate_activation=Tanh()),
        Affine(nout=4096,
               init=Gaussian(scale=0.01),
               bias=Constant(1),
               activation=Rectlin()),
        DropoutBinary(keep=0.5),
        Affine(nout=101,
               init=Gaussian(scale=0.01),
               bias=Constant(-7),
               activation=Softmax())
    ]
    return Model(layers=layers)
Esempio n. 16
0
 def layers(self):
     bn = True
     return [
         Conv((7, 7, 96), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=3, strides=1)\
             if self.bn_first_layer else\
             Conv((7, 7, 96), init=Kaiming(), bias=Constant(0), activation=Explin(), padding=3, strides=1),
         Pooling(3, strides=2, padding=1),
         Conv((7, 7, 128), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=3, strides=1),
         Pooling(3, strides=2, padding=1),
         Conv((5, 5, 256), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=2, strides=1),
         Pooling(3, strides=2, padding=1),
         Conv((3, 3, 384), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1),
         Conv((3, 3, 384), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1),
         Conv((3, 3, 384), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1),
         Pooling(3, strides=2, padding=1, op='avg'),
         Affine(nout=self.noutputs, init=Kaiming(), activation=Explin(), batch_norm=bn),
         Affine(nout=self.noutputs, init=Kaiming(), activation=Explin(), batch_norm=bn),
         Affine(nout=self.noutputs, init=Kaiming(), bias=Constant(0),
                activation=Softmax() if self.use_softmax else Logistic(shortcut=True))
     ]
Esempio n. 17
0
def constuct_network():
    """
	Constructs the layers of the AlexNet architecture.
	"""
    layers = [
        Conv((11, 11, 64),
             init=Gaussian(scale=0.01),
             bias=Constant(0),
             activation=Rectlin(),
             padding=3,
             strides=4),
        Pooling(3, strides=2),
        Conv((5, 5, 192),
             init=Gaussian(scale=0.01),
             bias=Constant(1),
             activation=Rectlin(),
             padding=2),
        Pooling(3, strides=2),
        Conv((3, 3, 384),
             init=Gaussian(scale=0.03),
             bias=Constant(0),
             activation=Rectlin(),
             padding=1),
        Conv((3, 3, 256),
             init=Gaussian(scale=0.03),
             bias=Constant(1),
             activation=Rectlin(),
             padding=1),
        Conv((3, 3, 256),
             init=Gaussian(scale=0.03),
             bias=Constant(1),
             activation=Rectlin(),
             padding=1),
        Pooling(3, strides=2),
        Affine(nout=4096,
               init=Gaussian(scale=0.01),
               bias=Constant(1),
               activation=Rectlin()),
        DropoutBinary(keep=0.5),
        Affine(nout=4096,
               init=Gaussian(scale=0.01),
               bias=Constant(1),
               activation=Rectlin()),
        DropoutBinary(keep=0.5),
        Affine(nout=101,
               init=Gaussian(scale=0.01),
               bias=Constant(-7),
               activation=Softmax())
    ]
    return Model(layers=layers)
Esempio n. 18
0
def create_network():
    layers = [
        Conv((11, 11, 64),
             init=Gaussian(scale=0.01),
             bias=Constant(0),
             activation=Rectlin(),
             padding=3,
             strides=4),
        Pooling(3, strides=2),
        Conv((5, 5, 192),
             init=Gaussian(scale=0.01),
             bias=Constant(1),
             activation=Rectlin(),
             padding=2),
        Pooling(3, strides=2),
        Conv((3, 3, 384),
             init=Gaussian(scale=0.03),
             bias=Constant(0),
             activation=Rectlin(),
             padding=1),
        Conv((3, 3, 256),
             init=Gaussian(scale=0.03),
             bias=Constant(1),
             activation=Rectlin(),
             padding=1),
        Conv((3, 3, 256),
             init=Gaussian(scale=0.03),
             bias=Constant(1),
             activation=Rectlin(),
             padding=1),
        Pooling(3, strides=2),
        Affine(nout=4096,
               init=Gaussian(scale=0.01),
               bias=Constant(1),
               activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(nout=4096,
               init=Gaussian(scale=0.01),
               bias=Constant(1),
               activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(nout=1000,
               init=Gaussian(scale=0.01),
               bias=Constant(-7),
               activation=Softmax()),
    ]

    return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
Esempio n. 19
0
def test_model_N_S_setter(backend_default):

    # weight initialization
    init = Constant(0.08)

    # model initialization
    layers = [
        Recurrent(150, init, activation=Logistic()),
        Affine(100, init, bias=init, activation=Rectlin())
    ]

    model = Model(layers=layers)
    model.set_batch_size(20)
    model.set_seq_len(10)
Esempio n. 20
0
    def create_normalize_branch(self, leafs, branch_node, layer):
        """
        Append leafs to trunk_layers at the branch_node. If normalize, add a Normalize layer.
        """

        tree_branch = BranchNode(name=layer + '_norm_branch')
        branch1 = [
            Normalize(init=Constant(20.0), name=layer + '_norm'), tree_branch,
            leafs['mbox_loc']
        ]

        branch2 = [tree_branch, leafs['mbox_conf']]
        new_tree = Tree([branch1, branch2])

        return [branch_node, new_tree]
Esempio n. 21
0
File: util.py Progetto: yw774/neon
def train_regressor(orig_wordvecs, w2v_W, w2v_vocab):
    """
    Return regressor to map word2vec to RNN word space

    Function modified from:
    https://github.com/ryankiros/skip-thoughts/blob/master/training/tools.py
    """
    # Gather all words from word2vec that appear in wordvecs
    d = defaultdict(lambda: 0)
    for w in w2v_vocab.keys():
        d[w] = 1
    shared = OrderedDict()
    count = 0

    for w in list(orig_wordvecs.keys())[:-2]:
        if d[w] > 0:
            shared[w] = count
            count += 1

    # Get the vectors for all words in 'shared'
    w2v = np.zeros((len(shared), 300), dtype='float32')
    sg = np.zeros((len(shared), 620), dtype='float32')
    for w in shared.keys():
        w2v[shared[w]] = w2v_W[w2v_vocab[w]]
        sg[shared[w]] = orig_wordvecs[w]

    train_set = ArrayIterator(X=w2v, y=sg, make_onehot=False)

    layers = [
        Linear(nout=620, init=Gaussian(loc=0.0, scale=0.1)),
        Bias(init=Constant(0.0))
    ]
    clf = Model(layers=layers)

    # regression model is trained using default global batch size
    cost = GeneralizedCost(costfunc=SumSquared())
    opt = GradientDescentMomentum(0.1, 0.9, gradient_clip_value=5.0)
    callbacks = Callbacks(clf)

    clf.fit(train_set,
            num_epochs=20,
            optimizer=opt,
            cost=cost,
            callbacks=callbacks)
    return clf
Esempio n. 22
0
def add_vgg_layers():

    # setup layers
    init1_vgg = Xavier(local=True)
    relu = Rectlin()

    conv_params = {
        'strides': 1,
        'padding': 1,
        'init': init1_vgg,
        'bias': Constant(0),
        'activation': relu
    }

    # Set up the model layers
    vgg_layers = []

    # set up 3x3 conv stacks with different feature map sizes
    vgg_layers.append(Conv((3, 3, 64), **conv_params))
    vgg_layers.append(Conv((3, 3, 64), **conv_params))
    vgg_layers.append(Pooling(2, strides=2))
    vgg_layers.append(Conv((3, 3, 128), **conv_params))
    vgg_layers.append(Conv((3, 3, 128), **conv_params))
    vgg_layers.append(Pooling(2, strides=2))
    vgg_layers.append(Conv((3, 3, 256), **conv_params))
    vgg_layers.append(Conv((3, 3, 256), **conv_params))
    vgg_layers.append(Conv((3, 3, 256), **conv_params))
    vgg_layers.append(Pooling(2, strides=2))
    vgg_layers.append(Conv((3, 3, 512), **conv_params))
    vgg_layers.append(Conv((3, 3, 512), **conv_params))
    vgg_layers.append(Conv((3, 3, 512), **conv_params))
    vgg_layers.append(Pooling(2, strides=2))
    vgg_layers.append(Conv((3, 3, 512), **conv_params))
    vgg_layers.append(Conv((3, 3, 512), **conv_params))
    vgg_layers.append(Conv((3, 3, 512), **conv_params))
    # not used after this layer
    # vgg_layers.append(Pooling(2, strides=2))
    # vgg_layers.append(Affine(nout=4096, init=initfc, bias=Constant(0), activation=relu))
    # vgg_layers.append(Dropout(keep=0.5))
    # vgg_layers.append(Affine(nout=4096, init=initfc, bias=Constant(0), activation=relu))
    # vgg_layers.append(Dropout(keep=0.5))
    # vgg_layers.append(Affine(nout=1000, init=initfc, bias=Constant(0), activation=Softmax()))

    return vgg_layers
Esempio n. 23
0
def test_model_get_outputs_rnn(backend_default, data):

    data_path = load_text('ptb-valid', path=data)

    data_set = Text(time_steps=50, path=data_path)

    # weight initialization
    init = Constant(0.08)

    # model initialization
    layers = [
        Recurrent(150, init, Logistic()),
        Affine(len(data_set.vocab), init, bias=init, activation=Rectlin())
    ]

    model = Model(layers=layers)
    output = model.get_outputs(data_set)

    assert output.shape == (
        data_set.ndata, data_set.seq_length, data_set.nclass)
Esempio n. 24
0
def gen_model(backend_type):
    # setup backend
    gen_backend(backend=backend_type,
                batch_size=batch_size,
                rng_seed=2,
                device_id=args.device_id,
                datatype=args.datatype)

    init_uni = Uniform(low=-0.1, high=0.1)

    # Set up the model layers
    layers = []
    layers.append(Conv((5, 5, 16), init=init_uni, bias=Constant(0), activation=Rectlin()))
    layers.append(Pooling(2))
    layers.append(Conv((5, 5, 32), init=init_uni, activation=Rectlin()))
    layers.append(Pooling(2))
    layers.append(Affine(nout=500, init=init_uni, activation=Rectlin()))
    layers.append(Affine(nout=10, init=init_uni, activation=Logistic(shortcut=True)))

    mlp = Model(layers=layers)
    return mlp
Esempio n. 25
0
def main_branch(branch_nodes):
    return [
        Conv((7, 7, 64), padding=3, strides=2, **common),
        Pooling(**pool3s2p1),
        Conv((1, 1, 64), **common),
        Conv((3, 3, 192), **commonp1),
        Pooling(**pool3s2p1),
        inception([(64, ), (96, 128), (16, 32), (32, )]),
        inception([(128, ), (128, 192), (32, 96), (64, )]),
        Pooling(**pool3s2p1),
        inception([(192, ), (96, 208), (16, 48), (64, )]), branch_nodes[0],
        inception([(160, ), (112, 224), (24, 64), (64, )]),
        inception([(128, ), (128, 256), (24, 64), (64, )]),
        inception([(112, ), (144, 288), (32, 64), (64, )]), branch_nodes[1],
        inception([(256, ), (160, 320), (32, 128), (128, )]),
        Pooling(**pool3s2p1),
        inception([(256, ), (160, 320), (32, 128), (128, )]),
        inception([(384, ), (192, 384), (48, 128), (128, )]),
        Pooling(fshape=7, strides=1, op="avg"),
        Affine(nout=1000, init=init1, activation=Softmax(), bias=Constant(0))
    ]
Esempio n. 26
0
    def __init__(self):
        self.in_shape = (3, 32, 32)
        relu = Rectlin()
        init_use = Constant(0)
        conv = dict(init=init_use, batch_norm=False, activation=relu)
        convp1 = dict(init=init_use,
                      batch_norm=False,
                      bias=init_use,
                      activation=relu,
                      padding=1)
        convp1s2 = dict(init=init_use,
                        batch_norm=False,
                        bias=init_use,
                        padding=1,
                        strides=2)

        layers = [
            Dropout(keep=.8),
            Conv((3, 3, 96), **convp1),
            Conv((3, 3, 96), **convp1),
            Conv((3, 3, 96), **convp1s2),
            Dropout(keep=.5),
            Conv((3, 3, 192), **convp1),
            Conv((3, 3, 192), **convp1),
            Conv((3, 3, 192), **convp1s2),
            Dropout(keep=.5),
            Conv((3, 3, 192), **convp1),
            Conv((1, 1, 192), **conv),
            Conv((1, 1, 16), **conv),
            Pooling(8, op="avg"),
            Activation(Softmax())
        ]
        self.layers = layers
        model = Model(layers=layers)
        cost = GeneralizedCost(costfunc=CrossEntropyMulti())
        model.initialize(self.in_shape, cost=cost)
        self.model = model
Esempio n. 27
0
from neon.util.argparser import NeonArgparser

# parse the command line arguments
parser = NeonArgparser(__doc__)
args = parser.parse_args()

dataset = MNIST(path=args.data_dir)
train_set = dataset.train_iter
valid_set = dataset.valid_iter

# weight initialization
init_norm = Gaussian(loc=0.0, scale=0.01)

# initialize model
layers = []
layers.append(Affine(nout=100, init=init_norm, bias=Constant(0),
                     activation=Rectlin()))
layers.append(Affine(nout=10, init=init_norm, bias=Constant(0),
                     activation=Logistic(shortcut=True),
                     name='special_linear'))

cost = GeneralizedCost(costfunc=CrossEntropyBinary())
mlp = Model(layers=layers)

# fit and validate
optimizer_one = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9)
optimizer_two = RMSProp()

# all bias layers and the last linear layer will use
# optimizer_two. all other layers will use optimizer_one.
opt = MultiOptimizer({'default': optimizer_one,
# hyperparameters
hidden_size = 512
num_epochs = args.epochs

# setup backend
be = gen_backend(**extract_valid_args(args, gen_backend))

# download dataset
data_path = load_flickr8k(path=args.data_dir)  # Other setnames are flickr30k and coco

# load data
train_set = ImageCaption(path=data_path, max_images=-1)

# weight initialization
init = Uniform(low=-0.08, high=0.08)
init2 = Constant(val=train_set.be.array(train_set.bias_init))

# model initialization
image_path = Sequential([Affine(hidden_size, init, bias=Constant(val=0.0))])
sent_path = Sequential([Affine(hidden_size, init, linear_name='sent')])

layers = [
    MergeMultistream(layers=[image_path, sent_path], merge="recurrent"),
    Dropout(keep=0.5),
    LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True),
    Affine(train_set.vocab_size, init, bias=init2, activation=Softmax())
]

cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True))

# configure callbacks
Esempio n. 29
0
def test_model_serialize(backend):
    (X_train, y_train), (X_test, y_test), nclass = load_mnist()
    train_set = DataIterator([X_train, X_train],
                             y_train,
                             nclass=nclass,
                             lshape=(1, 28, 28))

    init_norm = Gaussian(loc=0.0, scale=0.01)

    # initialize model
    path1 = [
        Conv((5, 5, 16),
             init=init_norm,
             bias=Constant(0),
             activation=Rectlin()),
        Pooling(2),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())
    ]
    path2 = [
        Dropout(keep=0.5),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())
    ]
    layers = [
        MergeConcat([path1, path2]),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()),
        BatchNorm(),
        Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))
    ]

    tmp_save = 'test_model_serialize_tmp_save.pickle'
    mlp = Model(layers=layers)
    mlp.optimizer = GradientDescentMomentum(learning_rate=0.1,
                                            momentum_coef=0.9)
    mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary())

    n_test = 3
    num_epochs = 3
    # Train model for num_epochs and n_test batches
    for epoch in range(num_epochs):
        for i, (x, t) in enumerate(train_set):
            x = mlp.fprop(x)
            delta = mlp.cost.get_errors(x, t)
            mlp.bprop(delta)
            mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch)
            if i > n_test:
                break

    # Get expected outputs of n_test batches and states of all layers
    outputs_exp = []
    pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs_exp.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Serialize model
    save_obj(mlp.serialize(keep_states=True), tmp_save)

    # Load model
    mlp = Model(layers=layers)
    mlp.load_weights(tmp_save)

    outputs = []
    pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Check outputs, states, and params are the same
    for output, output_exp in zip(outputs, outputs_exp):
        assert np.allclose(output.get(), output_exp.get())

    for pd, pd_exp in zip(pdicts, pdicts_exp):
        for s, s_e in zip(pd['states'], pd_exp['states']):
            if isinstance(s, list):  # this is the batch norm case
                for _s, _s_e in zip(s, s_e):
                    assert np.allclose(_s, _s_e)
            else:
                assert np.allclose(s, s_e)
        for p, p_e in zip(pd['params'], pd_exp['params']):
            if isinstance(p, list):  # this is the batch norm case
                for _p, _p_e in zip(p, p_e):
                    assert np.allclose(_p, _p_e)
            else:
                assert np.allclose(p, p_e)

    os.remove(tmp_save)
Esempio n. 30
0
# setup backend
be = gen_backend(**extract_valid_args(args, gen_backend))

# download dataset
data_path = load_flickr8k(
    path=args.data_dir)  # Other setnames are flickr30k and coco

# load data
train_set = ImageCaption(path=data_path, max_images=-1)

# weight initialization
init = Uniform(low=-0.08, high=0.08)
init2 = Array(val=train_set.be.array(train_set.bias_init))

# model initialization
image_path = Sequential([Affine(hidden_size, init, bias=Constant(val=0.0))])
sent_path = Sequential([Affine(hidden_size, init, name='sent')])

layers = [
    MergeMultistream(layers=[image_path, sent_path], merge="recurrent"),
    Dropout(keep=0.5),
    LSTM(hidden_size,
         init,
         activation=Logistic(),
         gate_activation=Tanh(),
         reset_cells=True),
    Affine(train_set.vocab_size, init, bias=init2, activation=Softmax())
]

cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True))