예제 #1
0
def constuct_network():
    """
	Constructs the layers of our RCNN architecture. It is similar to AlexNet but simplified to only a 
	few convolutional layers and 3 LSTM layers.
	"""
    layers = [
        Conv((11, 11, 64),
             init=Gaussian(scale=0.01),
             bias=Constant(0),
             activation=Rectlin(),
             padding=3,
             strides=4),
        Pooling(3, strides=2),
        Conv((7, 7, 128),
             init=Gaussian(scale=0.01),
             bias=Constant(1),
             activation=Rectlin(),
             padding=2),
        Pooling(3, strides=2),
        Conv((5, 5, 256),
             init=Gaussian(scale=0.03),
             bias=Constant(0),
             activation=Rectlin(),
             padding=1),
        Conv((3, 3, 256),
             init=Gaussian(scale=0.03),
             bias=Constant(1),
             activation=Rectlin(),
             padding=1),
        Pooling(3, strides=2),
        Affine(nout=4096,
               init=Gaussian(scale=0.01),
               bias=Constant(1),
               activation=Rectlin()),
        DropoutBinary(keep=0.5),
        LSTM(512,
             init=Gaussian(scale=0.03),
             activation=Rectlin(),
             gate_activation=Tanh()),
        LSTM(512,
             init=Gaussian(scale=0.03),
             activation=Rectlin(),
             gate_activation=Tanh()),
        LSTM(512,
             init=Gaussian(scale=0.03),
             activation=Rectlin(),
             gate_activation=Tanh()),
        Affine(nout=4096,
               init=Gaussian(scale=0.01),
               bias=Constant(1),
               activation=Rectlin()),
        DropoutBinary(keep=0.5),
        Affine(nout=101,
               init=Gaussian(scale=0.01),
               bias=Constant(-7),
               activation=Softmax())
    ]
    return Model(layers=layers)
예제 #2
0
    def __init__(self):
        self.in_shape = [1024, (2538, 38)]

        init = Constant(0)
        image_path = Sequential(
            [Affine(20, init, bias=init),
             Affine(10, init, bias=init)])
        sent_path = Sequential([Affine(30, init, bias=init), Affine(10, init)])

        layers = [
            MergeMultistream(layers=[image_path, sent_path],
                             merge="recurrent"),
            Dropout(keep=0.5),
            LSTM(4,
                 init,
                 activation=Logistic(),
                 gate_activation=Tanh(),
                 reset_cells=True),
            Affine(20, init, bias=init, activation=Softmax())
        ]
        self.layers = layers
        self.cost = GeneralizedCostMask(CrossEntropyMulti())

        self.model = Model(layers=layers)
        self.model.initialize(self.in_shape, cost=self.cost)
예제 #3
0
    def input_layers(self, analytics_input, init, activation, gate):
        """
        return the input layers. we currently support convolutional and LSTM
        :return:
        """
        if self.recurrent:
            if analytics_input:
                # support analytics + content
                input_layers = MergeMultistream([[
                    LSTM(300,
                         init,
                         init_inner=Kaiming(),
                         activation=activation,
                         gate_activation=gate,
                         reset_cells=True),
                    RecurrentSum()
                ], [Affine(30, init, activation=activation)]], 'stack')
            else:
                # content only
                input_layers = [
                    LSTM(300,
                         init,
                         init_inner=Kaiming(),
                         activation=activation,
                         gate_activation=gate,
                         reset_cells=True),
                    RecurrentSum()
                ]
        else:
            if analytics_input:
                # support analytics + content
                input_layers = MergeMultistream([
                    self.conv_net(activation),
                    [Affine(30, init, activation=Logistic())]
                ], 'stack')
            else:
                # content only
                input_layers = self.conv_net(activation)

        return input_layers
예제 #4
0
def test_multi_optimizer(backend_default):
    opt_gdm = GradientDescentMomentum(learning_rate=0.001,
                                      momentum_coef=0.9,
                                      wdecay=0.005)
    opt_ada = Adadelta()
    opt_adam = Adam()
    opt_rms = RMSProp()
    opt_rms_1 = RMSProp(gradient_clip_value=5)
    init_one = Gaussian(scale=0.01)

    l1 = Conv((11, 11, 64),
              strides=4,
              padding=3,
              init=init_one,
              bias=Constant(0),
              activation=Rectlin())
    l2 = Affine(nout=4096,
                init=init_one,
                bias=Constant(1),
                activation=Rectlin())
    l3 = LSTM(output_size=1000,
              init=init_one,
              activation=Logistic(),
              gate_activation=Tanh())
    l4 = GRU(output_size=100,
             init=init_one,
             activation=Logistic(),
             gate_activation=Tanh())
    layers = [l1, l2, l3, l4]
    layer_list = []
    for layer in layers:
        if isinstance(layer, list):
            layer_list.extend(layer)
        else:
            layer_list.append(layer)

    opt = MultiOptimizer({
        'default': opt_gdm,
        'Bias': opt_ada,
        'Convolution': opt_adam,
        'Linear': opt_rms,
        'LSTM': opt_rms_1,
        'GRU': opt_rms_1
    })

    map_list = opt._map_optimizers(layer_list)
    assert map_list[opt_adam][0].__class__.__name__ == 'Convolution'
    assert map_list[opt_ada][0].__class__.__name__ == 'Bias'
    assert map_list[opt_rms][0].__class__.__name__ == 'Linear'
    assert map_list[opt_gdm][0].__class__.__name__ == 'Activation'
    assert map_list[opt_rms_1][0].__class__.__name__ == 'LSTM'
    assert map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
예제 #5
0
def test_multi_optimizer(backend_default_mkl):
    """
    A test for MultiOptimizer.
    """
    opt_gdm = GradientDescentMomentum(
        learning_rate=0.001, momentum_coef=0.9, wdecay=0.005)
    opt_ada = Adadelta()
    opt_adam = Adam()
    opt_rms = RMSProp()
    opt_rms_1 = RMSProp(gradient_clip_value=5)
    init_one = Gaussian(scale=0.01)

    l1 = Conv((11, 11, 64), strides=4, padding=3,
              init=init_one, bias=Constant(0), activation=Rectlin())
    l2 = Affine(nout=4096, init=init_one,
                bias=Constant(1), activation=Rectlin())
    l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh())
    l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh())
    layers = [l1, l2, l3, l4]
    layer_list = []
    for layer in layers:
        if isinstance(layer, list):
            layer_list.extend(layer)
        else:
            layer_list.append(layer)
    for l in layer_list:
        l.configure(in_obj=(16, 28, 28))
        l.allocate()
    # separate layer_list into two, the last two recurrent layers and the rest
    layer_list1, layer_list2 = layer_list[:-2], layer_list[-2:]
    opt = MultiOptimizer({'default': opt_gdm,
                          'Bias': opt_ada,
                          'Convolution': opt_adam,
                          'Convolution_bias': opt_adam,
                          'Linear': opt_rms,
                          'LSTM': opt_rms_1,
                          'GRU': opt_rms_1})
    layers_to_optimize1 = [l for l in layer_list1 if isinstance(l, ParameterLayer)]
    layers_to_optimize2 = [l for l in layer_list2 if isinstance(l, ParameterLayer)]
    opt.optimize(layers_to_optimize1, 0)
    assert opt.map_list[opt_adam][0].__class__.__name__ is 'Convolution_bias'
    assert opt.map_list[opt_rms][0].__class__.__name__ == 'Linear'
    opt.optimize(layers_to_optimize2, 0)
    assert opt.map_list[opt_rms_1][0].__class__.__name__ == 'LSTM'
    assert opt.map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
예제 #6
0
print "Vocab size - ", vocab_size
print "Sentence Length - ", sentence_length
print "# of train sentences", X_train.shape[0]
print "# of test sentence", X_test.shape[0]

train_set = ArrayIterator(X_train, y_train, nclass=2)
valid_set = ArrayIterator(X_test, y_test, nclass=2)

# weight initialization
uni = Uniform(low=-0.1 / embedding_dim, high=0.1 / embedding_dim)
g_uni = GlorotUniform()

if args.rlayer_type == 'lstm':
    rlayer = LSTM(hidden_size,
                  g_uni,
                  activation=Tanh(),
                  gate_activation=Logistic(),
                  reset_cells=True)
elif args.rlayer_type == 'bilstm':
    rlayer = DeepBiLSTM(hidden_size,
                        g_uni,
                        activation=Tanh(),
                        depth=1,
                        gate_activation=Logistic(),
                        reset_cells=True)
elif args.rlayer_type == 'rnn':
    rlayer = Recurrent(hidden_size, g_uni, activation=Tanh(), reset_cells=True)
elif args.rlayer_type == 'birnn':
    rlayer = DeepBiRNN(hidden_size,
                       g_uni,
                       activation=Tanh(),
# load data
train_set = ImageCaption(path=data_path, max_images=-1)

# weight initialization
init = Uniform(low=-0.08, high=0.08)
init2 = Constant(val=train_set.be.array(train_set.bias_init))

# model initialization
image_path = Sequential([Affine(hidden_size, init, bias=Constant(val=0.0))])
sent_path = Sequential([Affine(hidden_size, init, linear_name='sent')])

layers = [
    MergeMultistream(layers=[image_path, sent_path], merge="recurrent"),
    Dropout(keep=0.5),
    LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True),
    Affine(train_set.vocab_size, init, bias=init2, activation=Softmax())
]

cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True))

# configure callbacks
checkpoint_model_path = "~/image_caption2.pickle"
if args.callback_args['save_path'] is None:
    args.callback_args['save_path'] = checkpoint_model_path

if args.callback_args['serialize'] is None:
    args.callback_args['serialize'] = 1

model = Model(layers=layers)
예제 #8
0
def test_reshape_layer_model(backend_default, fargs):
    """
    test cases:
    - conv before RNNs
    - conv after RNNs
    - conv after LUT
    """
    np.random.seed(seed=0)

    nin, nout, bsz = fargs
    be = backend_default
    be.bsz = bsz
    input_size = (nin, be.bsz)

    init = Uniform(-0.1, 0.1)
    g_uni = GlorotUniform()

    inp_np = np.random.rand(nin, be.bsz)
    delta_np = np.random.rand(nout, be.bsz)

    inp = be.array(inp_np)
    delta = be.array(delta_np)

    conv_lut_1 = [
        LookupTable(vocab_size=2000, embedding_dim=400, init=init),
        Reshape(reshape=(4, 100, -1)),
        Conv((3, 3, 16), init=init),
        LSTM(64,
             g_uni,
             activation=Tanh(),
             gate_activation=Logistic(),
             reset_cells=True),
        RecurrentSum(),
        Affine(nout, init, bias=init, activation=Softmax())
    ]

    conv_lut_2 = [
        LookupTable(vocab_size=1000, embedding_dim=400, init=init),
        Reshape(reshape=(4, 50, -1)),
        Conv((3, 3, 16), init=init),
        Pooling(2, strides=2),
        Affine(nout=nout, init=init, bias=init, activation=Softmax()),
    ]

    conv_rnn_1 = [
        LookupTable(vocab_size=2000, embedding_dim=400, init=init),
        LSTM(64,
             g_uni,
             activation=Tanh(),
             gate_activation=Logistic(),
             reset_cells=True),
        Reshape(reshape=(4, 32, -1)),
        Conv((3, 3, 16), init=init),
        Affine(nout, init, bias=init, activation=Softmax())
    ]

    conv_rnn_2 = [
        LookupTable(vocab_size=2000, embedding_dim=400, init=init),
        Recurrent(64, g_uni, activation=Tanh(), reset_cells=True),
        Reshape(reshape=(4, -1, 32)),
        Conv((3, 3, 16), init=init),
        Affine(nout, init, bias=init, activation=Softmax())
    ]

    lut_sum_1 = [
        LookupTable(vocab_size=1000, embedding_dim=128, init=init),
        RecurrentSum(),
        Affine(nout=nout, init=init, bias=init, activation=Softmax()),
    ]

    lut_birnn_1 = [
        LookupTable(vocab_size=1000, embedding_dim=200, init=init),
        DeepBiRNN(32,
                  init=GlorotUniform(),
                  batch_norm=True,
                  activation=Tanh(),
                  reset_cells=True,
                  depth=1),
        Reshape((4, 32, -1)),
        Conv((3, 3, 16), init=init),
        Affine(nout=nout, init=init, bias=init, activation=Softmax())
    ]

    layers_test = [
        conv_lut_1, conv_lut_2, conv_rnn_1, conv_rnn_2, lut_sum_1, lut_birnn_1
    ]

    for lg in layers_test:
        model = Model(layers=lg)
        cost = GeneralizedCost(costfunc=CrossEntropyBinary())
        model.initialize(input_size, cost)
        model.fprop(inp)
        model.bprop(delta)
예제 #9
0
파일: word_lstm.py 프로젝트: ydm2011/neon
                 vocab=train_set.vocab,
                 tokenizer=tokenizer,
                 onehot_input=False)

# weight initialization
init = Uniform(low=-0.1, high=0.1)

# model initialization
rlayer_params = {
    "output_size": hidden_size,
    "init": init,
    "activation": Tanh(),
    "gate_activation": Logistic()
}
if args.rlayer_type == 'lstm':
    rlayer1, rlayer2 = LSTM(**rlayer_params), LSTM(**rlayer_params)
else:
    rlayer1, rlayer2 = GRU(**rlayer_params), GRU(**rlayer_params)

layers = [
    LookupTable(vocab_size=len(train_set.vocab),
                embedding_dim=hidden_size,
                init=init), rlayer1, rlayer2,
    Affine(len(train_set.vocab), init, bias=init, activation=Softmax())
]

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))

model = Model(layers=layers)

# vanilla gradient descent with decay schedule on learning rate and gradient scaling
예제 #10
0
                 default_dtype=args.datatype)

# download penn treebank
train_path = load_text('ptb-train', path=args.data_dir)
valid_path = load_text('ptb-valid', path=args.data_dir)

# load data and parse on character-level
train_set = Text(time_steps, train_path)
valid_set = Text(time_steps, valid_path, vocab=train_set.vocab)

# weight initialization
init = Uniform(low=-0.08, high=0.08)

# model initialization
if rlayer_type == 'lstm':
    rlayer = LSTM(hidden_size, init, Logistic(), Tanh())
elif rlayer_type == 'gru':
    rlayer = GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic())
else:
    raise NotImplementedError('%s layer not implemented' % rlayer_type)

layers = [
    rlayer,
    Affine(len(train_set.vocab), init, bias=init, activation=Softmax())
]

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))

model = Model(layers=layers)

optimizer = RMSProp(clip_gradients=clip_gradients, stochastic_round=args.rounding)
예제 #11
0
def main():
    parser = NeonArgparser(__doc__)
    args = parser.parse_args(gen_be=False)

    #mat_data = sio.loadmat('../data/timeseries/02_timeseries.mat')

    #ts = V1TimeSeries(mat_data['timeseries'], mat_data['stim'], binning=10)

    seq_len = 30
    hidden = 20

    be = gen_backend(**extract_valid_args(args, gen_backend))

    kohn = KohnV1Dataset(path='../tmp/')
    kohn.gen_iterators(seq_len)
    import pdb; pdb.set_trace()
    train_spike_set = V1IteratorSequence(ts.train, seq_len, return_sequences=False)
    valid_spike_set = V1IteratorSequence(ts.test, seq_len, return_sequences=False)

    init = GlorotUniform()

    # dataset = MNIST(path=args.data_dir)
    # (X_train, y_train), (X_test, y_test), nclass = dataset.load_data()
    # train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28))
    # valid_set = ArrayIterator([X_test, X_test], y_test, nclass=nclass, lshape=(1, 28, 28))

    # # weight initialization
    # init_norm = Gaussian(loc=0.0, scale=0.01)

    # # initialize model
    # path1 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()),
    #                            Affine(nout=100, init=init_norm, activation=Rectlin())])

    # path2 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()),
    #                            Affine(nout=100, init=init_norm, activation=Rectlin())])

    # layers = [MergeMultistream(layers=[path1, path2], merge="stack"),
    #           Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))]

    spike_rnn_path = Sequential( layers = [

        LSTM(hidden, init, activation=Logistic(),
            gate_activation=Logistic(), reset_cells=False),

        Dropout(keep=0.5),

         LSTM(hidden, init, activation=Logistic(),
             gate_activation=Logistic(), reset_cells=False),

        #Dropout(keep=0.85),

        RecurrentLast(),

        Affine(train_set.nfeatures, init, bias=init, activation=Identity(), name='spike_in')])

    stim_rnn_path = Sequential( layers = [

        LSTM(hidden, init, activation=Logistic(),
            gate_activation=Logistic(), reset_cells=False),

        Dropout(keep=0.5),

        RecurrentLast(),
        Affine(1, init, bias=init, activation=Identity(), name='stim')])

    layers = [
            MergeMultiStream(
                layers = [
                    spike_rnn_path,
                    stim_rnn_path],
                merge="stack"),

            Affine(train_set.nfeatures, init, bias=init, activation=Identity(), name='spike_out'),

            Round()
            ]

    model = Model(layers=layers)

    sched = ExpSchedule(decay=0.7)

    # cost = GeneralizedCost(SumSquared())
    cost = GeneralizedCost(MeanSquared())

    optimizer_two = RMSProp(stochastic_round=args.rounding)
    optimizer_one = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9, schedule=sched)

    opt = MultiOptimizer({'default': optimizer_one,
                          'Bias': optimizer_two,
                          'special_linear': optimizer_two})

    callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args)
    callbacks.add_hist_callback(filter_key = ['W'])
    #callbacks.add_callback(MetricCallback(eval_set=valid_set, metric=FractionExplainedVariance(), epoch_freq=args.eval_freq))
    #callbacks.add_callback(MetricCallback(eval_set=valid_set,metric=Accuracy(),  epoch_freq=args.eval_freq))

    model.fit(train_set,
              optimizer=opt,
              num_epochs=args.epochs,
              cost=cost,
              callbacks=callbacks)

    train_output = model.get_outputs(
    train_set).reshape(-1, train_set.nfeatures)
    valid_output = model.get_outputs(
    valid_set).reshape(-1, valid_set.nfeatures)
    train_target = train_set.y_series
    valid_target = valid_set.y_series

    tfev = fev(train_output, train_target, train_set.mean)
    vfev = fev(valid_output, valid_target, valid_set.mean)

    neon_logger.display('Train FEV: %g, Valid FEV:  %g' % (tfev, vfev))
    # neon_logger.display('Train Mean: %g, Valid Mean:  %g' % (train_set.mean, valid_set.mean))

    plt.figure()
    plt.plot(train_output[:, 0], train_output[
        :, 1], 'bo', label='prediction')
    plt.plot(train_target[:, 0], train_target[:, 1], 'r.', label='target')
    plt.legend()
    plt.title('Neon on training set')
    plt.savefig('neon_series_training_output.png')

    plt.figure()
    plt.plot(valid_output[:, 0], valid_output[
        :, 1], 'bo', label='prediction')
    plt.plot(valid_target[:, 0], valid_target[:, 1], 'r.', label='target')
    plt.legend()
    plt.title('Neon on validation set')
    plt.savefig('neon_series_validation_output.png')
예제 #12
0
                 default_dtype=args.datatype)

# download shakespeare text
data_path = load_text('shakespeare', path=args.data_dir)
train_path, valid_path = Text.create_valid_file(data_path)

# load data and parse on character-level
train_set = Text(time_steps, train_path)
valid_set = Text(time_steps, valid_path, vocab=train_set.vocab)

# weight initialization
init = Uniform(low=-0.08, high=0.08)

# model initialization
layers = [
    LSTM(hidden_size, init, Logistic(), Tanh()),
    Affine(len(train_set.vocab), init, bias=init, activation=Softmax())
]
model = Model(layers=layers)

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))

optimizer = RMSProp(clip_gradients=clip_gradients,
                    stochastic_round=args.rounding)

# configure callbacks
callbacks = Callbacks(model,
                      train_set,
                      output_file=args.output_file,
                      valid_set=valid_set,
                      valid_freq=1,
예제 #13
0
    train_set = DataIteratorSequence(time_series.train,
                                     seq_len,
                                     return_sequences=return_sequences)
    valid_set = DataIteratorSequence(time_series.test,
                                     seq_len,
                                     return_sequences=return_sequences)

    # define weights initialization
    init = GlorotUniform()  # Uniform(low=-0.08, high=0.08)

    # define model: model is different for the 2 strategies (sequence target or not)
    if return_sequences is True:
        layers = [
            LSTM(hidden,
                 init,
                 activation=Logistic(),
                 gate_activation=Tanh(),
                 reset_cells=False),
            Affine(train_set.nfeatures, init, bias=init, activation=Identity())
        ]
    else:
        layers = [
            LSTM(hidden,
                 init,
                 activation=Logistic(),
                 gate_activation=Tanh(),
                 reset_cells=True),
            RecurrentLast(),
            Affine(train_set.nfeatures, init, bias=init, activation=Identity())
        ]
예제 #14
0
# setup backend
be = gen_backend(**extract_valid_args(args, gen_backend))

# download penn treebank
dataset = PTB(time_steps, path=args.data_dir)
train_set = dataset.train_iter
valid_set = dataset.valid_iter

# weight initialization
init = Uniform(low=-0.08, high=0.08)

# model initialization
if args.rlayer_type == 'lstm':
    rlayer1 = LSTM(hidden_size,
                   init,
                   activation=Tanh(),
                   gate_activation=Logistic())
    rlayer2 = LSTM(hidden_size,
                   init,
                   activation=Tanh(),
                   gate_activation=Logistic())
else:
    rlayer1 = GRU(hidden_size,
                  init,
                  activation=Tanh(),
                  gate_activation=Logistic())
    rlayer2 = GRU(hidden_size,
                  init,
                  activation=Tanh(),
                  gate_activation=Logistic())
예제 #15
0
common_params = dict(sampling_freq=22050, clip_duration=16000, frame_duration=16)
train_params = AudioParams(**common_params)
valid_params = AudioParams(**common_params)
common = dict(target_size=1, nclasses=10, repo_dir=args.data_dir)
train = DataLoader(set_name='music-train', media_params=train_params,
                   index_file=train_idx, shuffle=True, **common)
valid = DataLoader(set_name='music-valid', media_params=valid_params,
                   index_file=valid_idx, shuffle=False, **common)
init = Gaussian(scale=0.01)
layers = [Conv((2, 2, 4), init=init, activation=Rectlin(),
               strides=dict(str_h=2, str_w=4)),
          Pooling(2, strides=2),
          Conv((3, 3, 4), init=init, batch_norm=True, activation=Rectlin(),
               strides=dict(str_h=1, str_w=2)),
          LSTM(128, init=GlorotUniform(), gate_activation=Tanh(),
               activation=Logistic(), reset_cells=True),
          RecurrentMean(),
          Affine(nout=common['nclasses'], init=init, activation=Softmax())]

model = Model(layers=layers)
opt = Adagrad(learning_rate=0.01, gradient_clip_value=15)
metric = Misclassification()
callbacks = Callbacks(model, eval_set=valid, metric=metric, **args.callback_args)
cost = GeneralizedCost(costfunc=CrossEntropyMulti())

model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
print('Misclassification error = %.1f%%' % (model.eval(valid, metric=metric)*100))
display(model, ['Convolution_0'], 'inputs')
display(model, ['Convolution_0', 'Convolution_1', 'Pooling_0'], 'outputs')
예제 #16
0
train_set = imdb.train_iter
test_set = imdb.test_iter
valid_set = imdb.test_iter
#Model specification
#Initialization

init_glorot = GlorotUniform()
init_uniform = Uniform(-0.1 / 128, 0.1 / 128)

#Following are the list of layers we are gonna implement in out network

layers = [
    LookupTable(vocab_size=vocab_size, embedding_dim=128, init=init_uniform),
    LSTM(output_size=128,
         init=init_glorot,
         activation=Tanh(),
         gate_activation=Logistic(),
         reset_cells=True),
    RecurrentSum(),
    Dropout(keep=0.5),
    Affine(nout=2, init=init_glorot, bias=init_glorot, activation=Softmax())
]

#cost optimizer and callbacks

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))
optimizer = Adagrad(learning_rate=0.01)

num_epochs = 2
fname = 'imdb_lstm_model'
예제 #17
0
파일: test_model.py 프로젝트: zmoon111/neon
def test_conv_rnn(backend_default):
    train_shape = (1, 17, 142)

    be = backend_default
    inp = be.array(be.rng.randn(np.prod(train_shape), be.bsz))
    delta = be.array(be.rng.randn(10, be.bsz))

    init_norm = Gaussian(loc=0.0, scale=0.01)
    bilstm = DeepBiLSTM(128,
                        init_norm,
                        activation=Rectlin(),
                        gate_activation=Rectlin(),
                        depth=1,
                        reset_cells=True)
    birnn_1 = DeepBiRNN(128,
                        init_norm,
                        activation=Rectlin(),
                        depth=1,
                        reset_cells=True,
                        batch_norm=False)
    birnn_2 = DeepBiRNN(128,
                        init_norm,
                        activation=Rectlin(),
                        depth=2,
                        reset_cells=True,
                        batch_norm=False)
    bibnrnn = DeepBiRNN(128,
                        init_norm,
                        activation=Rectlin(),
                        depth=1,
                        reset_cells=True,
                        batch_norm=True)
    birnnsum = DeepBiRNN(128,
                         init_norm,
                         activation=Rectlin(),
                         depth=1,
                         reset_cells=True,
                         batch_norm=False,
                         bi_sum=True)
    rnn = Recurrent(128,
                    init=init_norm,
                    activation=Rectlin(),
                    reset_cells=True)
    lstm = LSTM(128,
                init_norm,
                activation=Rectlin(),
                gate_activation=Rectlin(),
                reset_cells=True)
    gru = GRU(128,
              init_norm,
              activation=Rectlin(),
              gate_activation=Rectlin(),
              reset_cells=True)

    rlayers = [bilstm, birnn_1, birnn_2, bibnrnn, birnnsum, rnn, lstm, gru]

    for rl in rlayers:
        layers = [
            Conv((2, 2, 4),
                 init=init_norm,
                 activation=Rectlin(),
                 strides=dict(str_h=2, str_w=4)),
            Pooling(2, strides=2),
            Conv((3, 3, 4),
                 init=init_norm,
                 batch_norm=True,
                 activation=Rectlin(),
                 strides=dict(str_h=1, str_w=2)),
            rl,
            RecurrentMean(),
            Affine(nout=10, init=init_norm, activation=Rectlin()),
        ]
        model = Model(layers=layers)
        cost = GeneralizedCost(costfunc=CrossEntropyBinary())
        model.initialize(train_shape, cost)
        model.fprop(inp)
        model.bprop(delta)
예제 #18
0
                                 seq_len,
                                 return_sequences=args.predict_seq)

# define weights initialization
init = GlorotUniform()  # Uniform(low=-0.08, high=0.08)

# Number of recurrent units in the network
recurrent_units = 32

# define model: model is different for the 2 strategies (sequence target
# or not)
if args.predict_seq:
    layers = [
        LSTM(recurrent_units,
             init,
             activation=Logistic(),
             gate_activation=Tanh(),
             reset_cells=False),
        Affine(train_set.nfeatures, init, bias=init, activation=Identity())
    ]
else:
    layers = [
        LSTM(recurrent_units,
             init,
             activation=Logistic(),
             gate_activation=Tanh(),
             reset_cells=True),
        RecurrentLast(),
        Affine(train_set.nfeatures, init, bias=init, activation=Identity())
    ]
예제 #19
0
gradient_clip_value = 5

# download shakespeare text
data_path = load_shakespeare(path=args.data_dir)
train_path, valid_path = Text.create_valid_file(data_path)

# load data and parse on character-level
train_set = Text(time_steps, train_path)
valid_set = Text(time_steps, valid_path, vocab=train_set.vocab)

# weight initialization
init = Uniform(low=-0.08, high=0.08)

# model initialization
layers = [
    LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh()),
    Affine(len(train_set.vocab), init, bias=init, activation=Softmax())
]
model = Model(layers=layers)

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))

optimizer = RMSProp(gradient_clip_value=gradient_clip_value,
                    stochastic_round=args.rounding)

# configure callbacks
callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args)

# fit and validate
model.fit(train_set,
          optimizer=optimizer,
예제 #20
0
gauss = Gaussian(scale=0.01)
glorot = GlorotUniform()
tiny = dict(str_h=1, str_w=1)
small = dict(str_h=1, str_w=2)
big = dict(str_h=1, str_w=4)
common = dict(batch_norm=True, activation=Rectlin())
layers = [
    Conv((3, 5, 64), init=gauss, activation=Rectlin(), strides=big),
    Pooling(2, strides=2),
    Conv((3, 3, 128), init=gauss, strides=small, **common),
    Pooling(2, strides=2),
    Conv((3, 3, 256), init=gauss, strides=small, **common),
    Conv((2, 2, 512), init=gauss, strides=tiny, **common),
    LSTM(128,
         glorot,
         activation=Tanh(),
         gate_activation=Logistic(),
         reset_cells=True),
    RecurrentMean(),
    Dropout(keep=0.5),
    Affine(nout=2, init=gauss, activation=Softmax())
]

model = Model(layers=layers)
opt = Adagrad(learning_rate=0.001)
callbacks = Callbacks(model, eval_set=test, **args.callback_args)
cost = GeneralizedCost(costfunc=CrossEntropyBinary())

model.fit(tain,
          optimizer=opt,
          num_epochs=args.epochs,