Beispiel #1
0
def create_frcn_model(frcn_fine_tune=False):

    b1 = BranchNode(name="b1")

    imagenet_layers = add_vgg_layers()
    HW = (7, 7)

    frcn_layers = [
        RoiPooling(layers=imagenet_layers, HW=HW,
                   bprop_enabled=frcn_fine_tune),
        Affine(nout=4096,
               init=Gaussian(scale=0.005),
               bias=Constant(.1),
               activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(nout=4096,
               init=Gaussian(scale=0.005),
               bias=Constant(.1),
               activation=Rectlin()),
        Dropout(keep=0.5), b1,
        Affine(nout=21,
               init=Gaussian(scale=0.01),
               bias=Constant(0),
               activation=Softmax())
    ]
    bb_layers = [
        b1,
        Affine(nout=84,
               init=Gaussian(scale=0.001),
               bias=Constant(0),
               activation=Identity())
    ]

    return Model(layers=Tree([frcn_layers, bb_layers]))
def main(args):
    # load up the mnist data set
    dataset = MNIST(path=args.data_dir)

    # initialize model object
    mlp = Model(layers=[
        Affine(nout=100,
               init=Gaussian(loc=0.0, scale=0.01),
               activation=Rectlin()),
        Affine(nout=10,
               init=Gaussian(loc=0.0, scale=0.01),
               activation=Logistic(shortcut=True))
    ])

    # setup optimizer
    optimizer = GradientDescentMomentum(0.1,
                                        momentum_coef=0.9,
                                        stochastic_round=args.rounding)

    # configure callbacks
    callbacks = Callbacks(mlp,
                          eval_set=dataset.valid_iter,
                          **args.callback_args)

    # run fit
    # setup cost function as CrossEntropy
    mlp.fit(dataset.train_iter,
            optimizer=optimizer,
            num_epochs=args.epochs,
            cost=GeneralizedCost(costfunc=CrossEntropyBinary()),
            callbacks=callbacks)
    error_rate = mlp.eval(dataset.valid_iter, metric=Misclassification())
    neon_logger.display('Classification accuracy = %.4f' % (1 - error_rate))
Beispiel #3
0
def create_network():
    # weight initialization
    g1 = Gaussian(scale=0.01)
    g5 = Gaussian(scale=0.005)
    c0 = Constant(0)
    c1 = Constant(1)

    # model initialization
    padding = {'pad_d': 1, 'pad_h': 1, 'pad_w': 1}
    strides = {'str_d': 2, 'str_h': 2, 'str_w': 2}
    layers = [
        Conv((3, 3, 3, 64), padding=padding, init=g1, bias=c0, activation=Rectlin()),
        Pooling((1, 2, 2), strides={'str_d': 1, 'str_h': 2, 'str_w': 2}),
        Conv((3, 3, 3, 128), padding=padding, init=g1, bias=c1, activation=Rectlin()),
        Pooling((2, 2, 2), strides=strides),
        Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()),
        Pooling((2, 2, 2), strides=strides),
        Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()),
        Pooling((2, 2, 2), strides=strides),
        Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()),
        Pooling((2, 2, 2), strides=strides),
        Affine(nout=2048, init=g5, bias=c1, activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(nout=2048, init=g5, bias=c1, activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(nout=101, init=g1, bias=c0, activation=Softmax())
    ]
    return Model(layers=layers)
Beispiel #4
0
def constuct_network():
    """
	Constructs the layers of our RCNN architecture. It is similar to AlexNet but simplified to only a 
	few convolutional layers and 3 LSTM layers.
	"""
    layers = [
        Conv((11, 11, 64),
             init=Gaussian(scale=0.01),
             bias=Constant(0),
             activation=Rectlin(),
             padding=3,
             strides=4),
        Pooling(3, strides=2),
        Conv((7, 7, 128),
             init=Gaussian(scale=0.01),
             bias=Constant(1),
             activation=Rectlin(),
             padding=2),
        Pooling(3, strides=2),
        Conv((5, 5, 256),
             init=Gaussian(scale=0.03),
             bias=Constant(0),
             activation=Rectlin(),
             padding=1),
        Conv((3, 3, 256),
             init=Gaussian(scale=0.03),
             bias=Constant(1),
             activation=Rectlin(),
             padding=1),
        Pooling(3, strides=2),
        Affine(nout=4096,
               init=Gaussian(scale=0.01),
               bias=Constant(1),
               activation=Rectlin()),
        DropoutBinary(keep=0.5),
        LSTM(512,
             init=Gaussian(scale=0.03),
             activation=Rectlin(),
             gate_activation=Tanh()),
        LSTM(512,
             init=Gaussian(scale=0.03),
             activation=Rectlin(),
             gate_activation=Tanh()),
        LSTM(512,
             init=Gaussian(scale=0.03),
             activation=Rectlin(),
             gate_activation=Tanh()),
        Affine(nout=4096,
               init=Gaussian(scale=0.01),
               bias=Constant(1),
               activation=Rectlin()),
        DropoutBinary(keep=0.5),
        Affine(nout=101,
               init=Gaussian(scale=0.01),
               bias=Constant(-7),
               activation=Softmax())
    ]
    return Model(layers=layers)
    def build_model(self):
        # setup weight initialization function
        init_norm = Gaussian(loc=0.0, scale=0.01)

        # setup model layers
        layers = [
            Affine(nout=100,
                   init=init_norm,
                   bias=Uniform(),
                   activation=Rectlin()),
            Affine(nout=10,
                   init=init_norm,
                   bias=Uniform(),
                   activation=Logistic(shortcut=True))
        ]

        # setup cost function as CrossEntropy
        self.cost = GeneralizedCost(costfunc=CrossEntropyBinary())

        # setup optimizer
        self.optimizer = GradientDescentMomentum(
            0.1, momentum_coef=0.9, stochastic_round=self.args.rounding)

        # initialize model object
        self.model = ModelDist(layers=layers)
Beispiel #6
0
    def _createLayers(self, num_actions):
        # create network
        init_norm = Gaussian(loc=0.0, scale=0.01)
        layers = []
        if self.screen_dim == (84, 84):
            # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity.
            logger.info("Using 8x8 filter for first Conv")
            layers.append(
                Conv((8, 8, 32),
                     strides=4,
                     init=init_norm,
                     activation=Rectlin()))
        elif self.screen_dim == (52, 40):
            # The first hidden layer convolves 32 filters of 5x4 with stride 2 with the input image and applies a rectifier nonlinearity.
            logger.info("Using 5x4 filter for first Conv")
            layers.append(
                Conv((5, 4, 32),
                     strides=2,
                     init=init_norm,
                     activation=Rectlin()))
        else:
            raise NotImplementedError("Unsupported screen dim.")

        # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity.
        layers.append(
            Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin()))
        # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier.
        layers.append(
            Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin()))
        # The final hidden layer is fully-connected and consists of 512 rectifier units.
        layers.append(Affine(nout=512, init=init_norm, activation=Rectlin()))
        # The output layer is a fully-connected linear layer with a single output for each valid action.
        layers.append(Affine(nout=num_actions, init=init_norm))
        return layers
Beispiel #7
0
def run(train, test):
    init = Gaussian(scale=0.01)
    layers = [
        Conv((3, 3, 128),
             init=init,
             activation=Rectlin(),
             strides=dict(str_h=1, str_w=2)),
        Conv((3, 3, 256), init=init, batch_norm=True, activation=Rectlin()),
        Pooling(2, strides=2),
        Conv((2, 2, 512), init=init, batch_norm=True, activation=Rectlin()),
        DeepBiRNN(256,
                  init=init,
                  activation=Rectlin(),
                  reset_cells=True,
                  depth=3),
        RecurrentLast(),
        Affine(32, init=init, batch_norm=True, activation=Rectlin()),
        Affine(nout=common['nclasses'], init=init, activation=Softmax())
    ]

    model = Model(layers=layers)
    opt = Adadelta()
    metric = Misclassification()
    callbacks = Callbacks(model,
                          eval_set=test,
                          metric=metric,
                          **args.callback_args)
    cost = GeneralizedCost(costfunc=CrossEntropyBinary())

    model.fit(train,
              optimizer=opt,
              num_epochs=args.epochs,
              cost=cost,
              callbacks=callbacks)
    return model
 def _createLayers(self, num_actions):
     # create network
     init_norm = Gaussian(loc=0.0, scale=0.01)
     layers = []
     # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity.
     layers.append(
         Conv((8, 8, 32),
              strides=4,
              init=init_norm,
              activation=Rectlin(),
              batch_norm=self.batch_norm))
     # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity.
     layers.append(
         Conv((4, 4, 64),
              strides=2,
              init=init_norm,
              activation=Rectlin(),
              batch_norm=self.batch_norm))
     # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier.
     layers.append(
         Conv((3, 3, 64),
              strides=1,
              init=init_norm,
              activation=Rectlin(),
              batch_norm=self.batch_norm))
     # The final hidden layer is fully-connected and consists of 512 rectifier units.
     layers.append(
         Affine(nout=512,
                init=init_norm,
                activation=Rectlin(),
                batch_norm=self.batch_norm))
     # The output layer is a fully-connected linear layer with a single output for each valid action.
     layers.append(Affine(nout=num_actions, init=init_norm))
     return layers
Beispiel #9
0
 def _createLayers(self):
   # create network
   init_norm = Gaussian(loc=0.0, scale=0.01)
   layers = []
   for i in xrange(self.num_layers):
       layers.append(Affine(nout=self.hidden_nodes, init=init_norm, activation=Rectlin()))
   layers.append(Affine(nout=self.num_actions, init = init_norm))
   return layers
Beispiel #10
0
 def _createLayers(self, num_actions):
   # create network
   init_norm = Gaussian(loc=0.0, scale=0.01)
   layers = []
   # The final hidden layer is fully-connected and consists of 512 rectifier units.
   layers.append(Affine(nout=64, init=init_norm, bias=init_norm, activation=Rectlin()))
   # The output layer is a fully-connected linear layer with a single output for each valid action.
   layers.append(Affine(nout=num_actions, init=init_norm, bias=init_norm))
   return layers
Beispiel #11
0
 def get_initializer(self, input_size):
     dnnInit = self.args.dnn_initializer
     if dnnInit == 'xavier':
         initializer = Xavier()
     elif dnnInit == 'fan_in':
         std_dev = 1.0 / math.sqrt(input_size)
         initializer = Uniform(low=-std_dev, high=std_dev)
     else:
         initializer = Gaussian(0, 0.01)
     return initializer
Beispiel #12
0
 def layers(self):
     return [
         Conv((7, 7, 96),
              init=Gaussian(scale=0.0001),
              bias=Constant(0),
              activation=Rectlin(),
              padding=3,
              strides=1),
         LRN(31, ascale=0.001, bpower=0.75),
         Pooling(3, strides=2, padding=1),
         Conv((5, 5, 256),
              init=Gaussian(scale=0.01),
              bias=Constant(0),
              activation=Rectlin(),
              padding=2,
              strides=1),
         LRN(31, ascale=0.001, bpower=0.75),
         Pooling(3, strides=2, padding=1),
         Conv((3, 3, 384),
              init=Gaussian(scale=0.01),
              bias=Constant(0),
              activation=Rectlin(),
              padding=1,
              strides=1),
         Conv((3, 3, 384),
              init=Gaussian(scale=0.01),
              bias=Constant(0),
              activation=Rectlin(),
              padding=1,
              strides=1),
         Conv((3, 3, 256),
              init=Gaussian(scale=0.01),
              bias=Constant(0),
              activation=Rectlin(),
              padding=1,
              strides=1),
         Pooling(3, strides=2, padding=1),
         Affine(nout=4096,
                init=Gaussian(scale=0.01),
                bias=Constant(0),
                activation=Identity()),
         Dropout(keep=0.5),
         Affine(nout=4096,
                init=Gaussian(scale=0.01),
                bias=Constant(0),
                activation=Identity()),
         Dropout(keep=0.5),
         Affine(nout=self.noutputs,
                init=Gaussian(scale=0.01),
                bias=Constant(0),
                activation=Softmax() if self.use_softmax else Logistic(
                    shortcut=True))
     ]
Beispiel #13
0
 def __init__(self, rounding, callback_args, epochs):
     # setup weight initialization function
     self.init = Gaussian(loc=0.0, scale=0.01)
     # setup optimizer
     self.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9,
                                              stochastic_round=rounding)
     # setup cost function as CrossEntropy
     self.cost = GeneralizedCost(costfunc=SumSquared())
     self.epochs = epochs
     self.model = None
     self.callback_args = callback_args
Beispiel #14
0
def prepare_model(ninputs=9600, nclass=5):
    """
    Set up and compile the model architecture (Logistic regression)
    """
    layers = [Affine(nout=nclass, init=Gaussian(loc=0.0, scale=0.01), activation=Softmax())]

    cost = GeneralizedCost(costfunc=CrossEntropyMulti())
    opt = Adam()
    lrmodel = Model(layers=layers)

    return lrmodel, opt, cost
Beispiel #15
0
def constuct_network():
    """
	Constructs the layers of the AlexNet architecture.
	"""
    layers = [
        Conv((11, 11, 64),
             init=Gaussian(scale=0.01),
             bias=Constant(0),
             activation=Rectlin(),
             padding=3,
             strides=4),
        Pooling(3, strides=2),
        Conv((5, 5, 192),
             init=Gaussian(scale=0.01),
             bias=Constant(1),
             activation=Rectlin(),
             padding=2),
        Pooling(3, strides=2),
        Conv((3, 3, 384),
             init=Gaussian(scale=0.03),
             bias=Constant(0),
             activation=Rectlin(),
             padding=1),
        Conv((3, 3, 256),
             init=Gaussian(scale=0.03),
             bias=Constant(1),
             activation=Rectlin(),
             padding=1),
        Conv((3, 3, 256),
             init=Gaussian(scale=0.03),
             bias=Constant(1),
             activation=Rectlin(),
             padding=1),
        Pooling(3, strides=2),
        Affine(nout=4096,
               init=Gaussian(scale=0.01),
               bias=Constant(1),
               activation=Rectlin()),
        DropoutBinary(keep=0.5),
        Affine(nout=4096,
               init=Gaussian(scale=0.01),
               bias=Constant(1),
               activation=Rectlin()),
        DropoutBinary(keep=0.5),
        Affine(nout=101,
               init=Gaussian(scale=0.01),
               bias=Constant(-7),
               activation=Softmax())
    ]
    return Model(layers=layers)
Beispiel #16
0
def test_multi_optimizer(backend_default):
    opt_gdm = GradientDescentMomentum(learning_rate=0.001,
                                      momentum_coef=0.9,
                                      wdecay=0.005)
    opt_ada = Adadelta()
    opt_adam = Adam()
    opt_rms = RMSProp()
    opt_rms_1 = RMSProp(gradient_clip_value=5)
    init_one = Gaussian(scale=0.01)

    l1 = Conv((11, 11, 64),
              strides=4,
              padding=3,
              init=init_one,
              bias=Constant(0),
              activation=Rectlin())
    l2 = Affine(nout=4096,
                init=init_one,
                bias=Constant(1),
                activation=Rectlin())
    l3 = LSTM(output_size=1000,
              init=init_one,
              activation=Logistic(),
              gate_activation=Tanh())
    l4 = GRU(output_size=100,
             init=init_one,
             activation=Logistic(),
             gate_activation=Tanh())
    layers = [l1, l2, l3, l4]
    layer_list = []
    for layer in layers:
        if isinstance(layer, list):
            layer_list.extend(layer)
        else:
            layer_list.append(layer)

    opt = MultiOptimizer({
        'default': opt_gdm,
        'Bias': opt_ada,
        'Convolution': opt_adam,
        'Linear': opt_rms,
        'LSTM': opt_rms_1,
        'GRU': opt_rms_1
    })

    map_list = opt._map_optimizers(layer_list)
    assert map_list[opt_adam][0].__class__.__name__ == 'Convolution'
    assert map_list[opt_ada][0].__class__.__name__ == 'Bias'
    assert map_list[opt_rms][0].__class__.__name__ == 'Linear'
    assert map_list[opt_gdm][0].__class__.__name__ == 'Activation'
    assert map_list[opt_rms_1][0].__class__.__name__ == 'LSTM'
    assert map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
Beispiel #17
0
def create_network():
    layers = [
        Conv((11, 11, 64),
             init=Gaussian(scale=0.01),
             bias=Constant(0),
             activation=Rectlin(),
             padding=3,
             strides=4),
        Pooling(3, strides=2),
        Conv((5, 5, 192),
             init=Gaussian(scale=0.01),
             bias=Constant(1),
             activation=Rectlin(),
             padding=2),
        Pooling(3, strides=2),
        Conv((3, 3, 384),
             init=Gaussian(scale=0.03),
             bias=Constant(0),
             activation=Rectlin(),
             padding=1),
        Conv((3, 3, 256),
             init=Gaussian(scale=0.03),
             bias=Constant(1),
             activation=Rectlin(),
             padding=1),
        Conv((3, 3, 256),
             init=Gaussian(scale=0.03),
             bias=Constant(1),
             activation=Rectlin(),
             padding=1),
        Pooling(3, strides=2),
        Affine(nout=4096,
               init=Gaussian(scale=0.01),
               bias=Constant(1),
               activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(nout=4096,
               init=Gaussian(scale=0.01),
               bias=Constant(1),
               activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(nout=1000,
               init=Gaussian(scale=0.01),
               bias=Constant(-7),
               activation=Softmax()),
    ]

    return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
Beispiel #18
0
    def __init__(self):
        self.in_shape = (1, 32, 32)

        init_norm = Gaussian(loc=0.0, scale=0.01)

        normrelu = dict(init=init_norm, activation=Rectlin())
        normsigm = dict(init=init_norm, activation=Logistic(shortcut=True))
        normsoft = dict(init=init_norm, activation=Softmax())

        # setup model layers
        b1 = BranchNode(name="b1")
        b2 = BranchNode(name="b2")

        p1 = [
            Affine(nout=100, name="main1", **normrelu),
            b1,
            Affine(nout=32, name="main2", **normrelu),
            Affine(nout=160, name="main3", **normrelu),
            b2,
            Affine(nout=32, name="main2", **normrelu),
            # make next layer big to check sizing
            Affine(nout=320, name="main2", **normrelu),
            Affine(nout=10, name="main4", **normsoft)
        ]

        p2 = [
            b1,
            Affine(nout=16, name="branch1_1", **normrelu),
            Affine(nout=10, name="branch1_2", **normsigm)
        ]

        p3 = [
            b2,
            Affine(nout=16, name="branch2_1", **normrelu),
            Affine(nout=10, name="branch2_2", **normsigm)
        ]

        self.cost = Multicost(costs=[
            GeneralizedCost(costfunc=CrossEntropyMulti()),
            GeneralizedCost(costfunc=CrossEntropyBinary()),
            GeneralizedCost(costfunc=CrossEntropyBinary())
        ],
                              weights=[1, 0., 0.])

        self.layers = SingleOutputTree([p1, p2, p3], alphas=[1, .2, .2])
        self.model = Model(layers=self.layers)
        self.model.initialize(self.in_shape, cost=self.cost)
Beispiel #19
0
def train_regressor(orig_wordvecs, w2v_W, w2v_vocab):
    """
    Return regressor to map word2vec to RNN word space

    Function modified from:
    https://github.com/ryankiros/skip-thoughts/blob/master/training/tools.py
    """
    # Gather all words from word2vec that appear in wordvecs
    d = defaultdict(lambda: 0)
    for w in w2v_vocab.keys():
        d[w] = 1
    shared = OrderedDict()
    count = 0

    for w in list(orig_wordvecs.keys())[:-2]:
        if d[w] > 0:
            shared[w] = count
            count += 1

    # Get the vectors for all words in 'shared'
    w2v = np.zeros((len(shared), 300), dtype='float32')
    sg = np.zeros((len(shared), 620), dtype='float32')
    for w in shared.keys():
        w2v[shared[w]] = w2v_W[w2v_vocab[w]]
        sg[shared[w]] = orig_wordvecs[w]

    train_set = ArrayIterator(X=w2v, y=sg, make_onehot=False)

    layers = [
        Linear(nout=620, init=Gaussian(loc=0.0, scale=0.1)),
        Bias(init=Constant(0.0))
    ]
    clf = Model(layers=layers)

    # regression model is trained using default global batch size
    cost = GeneralizedCost(costfunc=SumSquared())
    opt = GradientDescentMomentum(0.1, 0.9, gradient_clip_value=5.0)
    callbacks = Callbacks(clf)

    clf.fit(train_set,
            num_epochs=20,
            optimizer=opt,
            cost=cost,
            callbacks=callbacks)
    return clf
Beispiel #20
0
def test_multi_optimizer(backend_default_mkl):
    """
    A test for MultiOptimizer.
    """
    opt_gdm = GradientDescentMomentum(
        learning_rate=0.001, momentum_coef=0.9, wdecay=0.005)
    opt_ada = Adadelta()
    opt_adam = Adam()
    opt_rms = RMSProp()
    opt_rms_1 = RMSProp(gradient_clip_value=5)
    init_one = Gaussian(scale=0.01)

    l1 = Conv((11, 11, 64), strides=4, padding=3,
              init=init_one, bias=Constant(0), activation=Rectlin())
    l2 = Affine(nout=4096, init=init_one,
                bias=Constant(1), activation=Rectlin())
    l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh())
    l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh())
    layers = [l1, l2, l3, l4]
    layer_list = []
    for layer in layers:
        if isinstance(layer, list):
            layer_list.extend(layer)
        else:
            layer_list.append(layer)
    for l in layer_list:
        l.configure(in_obj=(16, 28, 28))
        l.allocate()
    # separate layer_list into two, the last two recurrent layers and the rest
    layer_list1, layer_list2 = layer_list[:-2], layer_list[-2:]
    opt = MultiOptimizer({'default': opt_gdm,
                          'Bias': opt_ada,
                          'Convolution': opt_adam,
                          'Convolution_bias': opt_adam,
                          'Linear': opt_rms,
                          'LSTM': opt_rms_1,
                          'GRU': opt_rms_1})
    layers_to_optimize1 = [l for l in layer_list1 if isinstance(l, ParameterLayer)]
    layers_to_optimize2 = [l for l in layer_list2 if isinstance(l, ParameterLayer)]
    opt.optimize(layers_to_optimize1, 0)
    assert opt.map_list[opt_adam][0].__class__.__name__ is 'Convolution_bias'
    assert opt.map_list[opt_rms][0].__class__.__name__ == 'Linear'
    opt.optimize(layers_to_optimize2, 0)
    assert opt.map_list[opt_rms_1][0].__class__.__name__ == 'LSTM'
    assert opt.map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
Beispiel #21
0
def test_model_get_outputs(backend_default):
    (X_train, y_train), (X_test, y_test), nclass = load_mnist()
    train_set = DataIterator(X_train[:backend_default.bsz * 3])

    init_norm = Gaussian(loc=0.0, scale=0.1)

    layers = [Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()),
              Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))]
    mlp = Model(layers=layers)
    out_list = []
    mlp.initialize(train_set)
    for x, t in train_set:
        x = mlp.fprop(x)
        out_list.append(x.get().T.copy())
    ref_output = np.vstack(out_list)

    train_set.reset()
    output = mlp.get_outputs(train_set)
    assert np.allclose(output, ref_output)
Beispiel #22
0
def create_network():
    init = Gaussian(scale=0.01)
    layers = [
        Conv((3, 3, 128),
             init=init,
             activation=Rectlin(),
             strides=dict(str_h=1, str_w=2)),
        Conv((3, 3, 256), init=init, batch_norm=True, activation=Rectlin()),
        Pooling(2, strides=2),
        Conv((2, 2, 512), init=init, batch_norm=True, activation=Rectlin()),
        DeepBiRNN(256,
                  init=init,
                  activation=Rectlin(),
                  reset_cells=True,
                  depth=3),
        RecurrentLast(),
        Affine(32, init=init, batch_norm=True, activation=Rectlin()),
        Affine(nout=2, init=init, activation=Softmax())
    ]

    return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyBinary())
    def build(self):
        """
        Build the model's layers
        """
        first_layer_dens = 64
        second_layer_dens = 64
        output_layer_dens = 2
        # setup weight initialization function
        init_norm = Gaussian(scale=0.01)
        # setup model layers
        layers = [
            Affine(nout=first_layer_dens, init=init_norm,
                   activation=Rectlin()),
            Affine(nout=second_layer_dens,
                   init=init_norm,
                   activation=Rectlin()),
            Affine(nout=output_layer_dens,
                   init=init_norm,
                   activation=Logistic(shortcut=True))
        ]

        # initialize model object
        self.model = Model(layers=layers)
Beispiel #24
0
def test_model_get_outputs(backend_default, data):
    dataset = MNIST(path=data)
    train_set = dataset.train_iter

    init_norm = Gaussian(loc=0.0, scale=0.1)

    layers = [
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()),
        Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))
    ]
    mlp = Model(layers=layers)
    out_list = []
    mlp.initialize(train_set)
    for x, t in train_set:
        x = mlp.fprop(x)
        out_list.append(x.get().T.copy())
    ref_output = np.vstack(out_list)

    train_set.reset()
    output = mlp.get_outputs(train_set)
    assert np.allclose(output, ref_output[:output.shape[0], :])

    # test model benchmark inference
    mlp.benchmark(train_set, inference=True, niterations=5)
Beispiel #25
0
# parse the command line arguments
parser = NeonArgparser(__doc__)

args = parser.parse_args()

# load up the mnist data set
# split into train and tests sets
(X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir)

# setup a training set iterator
train_set = DataIterator(X_train, y_train, nclass=nclass)
# setup a validation data set iterator
valid_set = DataIterator(X_test, y_test, nclass=nclass)

# setup weight initialization function
init_norm = Gaussian(loc=0.0, scale=0.01)

normrelu = dict(init=init_norm, activation=Rectlin())
normsigm = dict(init=init_norm, activation=Logistic(shortcut=True))
normsoft = dict(init=init_norm, activation=Softmax())

# setup model layers
b1 = BranchNode(name="b1")
b2 = BranchNode(name="b2")


p1 = [Affine(nout=100, linear_name="m_l1", **normrelu),
      b1,
      Affine(nout=32, linear_name="m_l2", **normrelu),
      Affine(nout=16, linear_name="m_l3", **normrelu),
      b2,
Beispiel #26
0
rate = 0.00001
nepochs = {1: 8, 2: 3, 3: 6}[subj]
logger.warn('Overriding --epochs option')

if args.electrode == '-1':
    from loader import MultiLoader as Loader
    elecs = range(16)
else:
    from loader import SingleLoader as Loader
    rate /= 10
    elecs = args.electrode

tain = Loader(data_dir, subj, elecs, args.validate_mode, training=True)
test = Loader(data_dir, subj, elecs, args.validate_mode, training=False)

gauss = Gaussian(scale=0.01)
glorot = GlorotUniform()
tiny = dict(str_h=1, str_w=1)
small = dict(str_h=1, str_w=2)
big = dict(str_h=1, str_w=4)
common = dict(batch_norm=True, activation=Rectlin())
layers = {
    1: [
        Conv((3, 5, 64), init=gauss, strides=big, **common),
        Pooling(2, strides=2),
        Conv((3, 3, 128), init=gauss, strides=small, **common),
        Pooling(2, strides=2),
        Conv((3, 3, 256), init=gauss, strides=small, **common),
        Conv((2, 2, 512), init=gauss, strides=tiny, **common),
        Conv((2, 2, 128), init=gauss, strides=tiny, **common),
        DeepBiRNN(64, init=glorot, reset_cells=True, depth=3, **common),
from neon.optimizers import GradientDescentMomentum, MultiOptimizer, Schedule
from neon.transforms import Rectlin, Softmax, CrossEntropyMulti
from neon.models import Model
from neon.data import ArrayIterator
import numpy as np
parser = NeonArgparser(__doc__)
args = parser.parse_args()

NervanaObject.be.enable_winograd = 4

# setup data provider
X_train = np.random.uniform(-1, 1, (128, 3*224*224))
y_train = np.random.uniform(-1, 1, (128, 1000))
train = ArrayIterator(X_train, y_train, nclass=1000, lshape=(3, 224, 224))

layers = [Conv((11, 11, 64), init=Gaussian(scale=0.01),
               activation=Rectlin(), padding=3, strides=4),
          Pooling(3, strides=2),
          Conv((5, 5, 192), init=Gaussian(scale=0.01), activation=Rectlin(), padding=2),
          Pooling(3, strides=2),
          Conv((3, 3, 384), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1),
          Conv((3, 3, 256), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1),
          Conv((3, 3, 256), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1),
          Pooling(3, strides=2),
          Affine(nout=4096, init=Gaussian(scale=0.01), activation=Rectlin()),
          Affine(nout=4096, init=Gaussian(scale=0.01), activation=Rectlin()),
          Affine(nout=1000, init=Gaussian(scale=0.01), activation=Softmax())]
model = Model(layers=layers)

weight_sched = Schedule([22, 44, 65], (1/250.)**(1/3.))
opt_gdm = GradientDescentMomentum(0.01, 0.0, wdecay=0.0005, schedule=weight_sched)
Beispiel #28
0
def test_model_serialize(backend):
    (X_train, y_train), (X_test, y_test), nclass = load_mnist()
    train_set = DataIterator([X_train, X_train],
                             y_train,
                             nclass=nclass,
                             lshape=(1, 28, 28))

    init_norm = Gaussian(loc=0.0, scale=0.01)

    # initialize model
    path1 = [
        Conv((5, 5, 16),
             init=init_norm,
             bias=Constant(0),
             activation=Rectlin()),
        Pooling(2),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())
    ]
    path2 = [
        Dropout(keep=0.5),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())
    ]
    layers = [
        MergeConcat([path1, path2]),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()),
        BatchNorm(),
        Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))
    ]

    tmp_save = 'test_model_serialize_tmp_save.pickle'
    mlp = Model(layers=layers)
    mlp.optimizer = GradientDescentMomentum(learning_rate=0.1,
                                            momentum_coef=0.9)
    mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary())

    n_test = 3
    num_epochs = 3
    # Train model for num_epochs and n_test batches
    for epoch in range(num_epochs):
        for i, (x, t) in enumerate(train_set):
            x = mlp.fprop(x)
            delta = mlp.cost.get_errors(x, t)
            mlp.bprop(delta)
            mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch)
            if i > n_test:
                break

    # Get expected outputs of n_test batches and states of all layers
    outputs_exp = []
    pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs_exp.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Serialize model
    save_obj(mlp.serialize(keep_states=True), tmp_save)

    # Load model
    mlp = Model(layers=layers)
    mlp.load_weights(tmp_save)

    outputs = []
    pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Check outputs, states, and params are the same
    for output, output_exp in zip(outputs, outputs_exp):
        assert np.allclose(output.get(), output_exp.get())

    for pd, pd_exp in zip(pdicts, pdicts_exp):
        for s, s_e in zip(pd['states'], pd_exp['states']):
            if isinstance(s, list):  # this is the batch norm case
                for _s, _s_e in zip(s, s_e):
                    assert np.allclose(_s, _s_e)
            else:
                assert np.allclose(s, s_e)
        for p, p_e in zip(pd['params'], pd_exp['params']):
            if isinstance(p, list):  # this is the batch norm case
                for _p, _p_e in zip(p, p_e):
                    assert np.allclose(_p, _p_e)
            else:
                assert np.allclose(p, p_e)

    os.remove(tmp_save)
Beispiel #29
0
from neon.util.argparser import NeonArgparser
from neon.util.persist import ensure_dirs_exist
from neon.layers.layer import Dropout

# parse the command line arguments
parser = NeonArgparser(__doc__)
parser.add_argument('--kbatch',
                    type=int,
                    default=1,
                    help='number of data batches per noise batch in training')
args = parser.parse_args()

# load up the data set

# setup weight initialization function
init = Gaussian()

# discriminiator using convolution layers
lrelu = Rectlin(slope=0.1)  # leaky relu for discriminator
# sigmoid = Logistic() # sigmoid activation function
conv1 = dict(
    init=init, batch_norm=False,
    activation=lrelu)  # what's about BatchNorm Layer and batch_norm parameter?
conv2 = dict(init=init, batch_norm=True, activation=lrelu, padding=2)
conv3 = dict(init=init, batch_norm=True, activation=lrelu, padding=1)
D_layers = [
    Conv((5, 5, 5, 32), **conv1),
    Dropout(keep=0.8),
    Conv((5, 5, 5, 8), **conv2),
    Dropout(keep=0.8),
    Conv((5, 5, 5, 8), **conv2),
logger.setLevel(logging.DEBUG)

parser = NeonArgparser(__doc__)
args = parser.parse_args()
be = gen_backend(backend='gpu', batch_size=128, datatype=np.float32)

# setup a dataset iterator
mnist = MNIST(path='../dataset/mnist')
(X_train, y_train), (X_test, y_test), nclass = mnist.load_data()
train_set = ArrayIterator(X_train, y_train, nclass=nclass, lshape=(1, 28, 28))
valid_set = ArrayIterator(X_test, y_test, nclass=nclass, lshape=(1, 28, 28))

# define model
nfilters = [20, 50, 500]
# nfilters = [24, 56, 500]
init_w = Gaussian(scale=0.01)
relu = Rectlin()
common_params = dict(init=init_w, activation=relu)
layers = [
    Conv((5, 5, nfilters[0]), bias=Constant(0.1), padding=0, **common_params),
    Pooling(2, strides=2, padding=0),
    Conv((5, 5, nfilters[1]), bias=Constant(0.1), padding=0, **common_params),
    Pooling(2, strides=2, padding=0),
    Affine(nout=nfilters[2], bias=Constant(0.1), **common_params),
    Affine(nout=10,
           bias=Constant(0.1),
           activation=Softmax(),
           init=Gaussian(scale=0.01))
]
model = Model(layers=layers)
cost = GeneralizedCost(costfunc=CrossEntropyMulti())