def create_model(vocab_size, rlayer_type): """ Create LSTM/GRU model for bAbI dataset. Args: vocab_size (int) : String of bAbI data. rlayer_type (string) : Type of recurrent layer to use (gru or lstm). Returns: Model : Model of the created network """ # recurrent layer parameters (default gru) rlayer_obj = GRU if rlayer_type == 'gru' else LSTM rlayer_params = dict(output_size=100, reset_cells=True, init=GlorotUniform(), init_inner=Orthonormal(0.5), activation=Tanh(), gate_activation=Logistic()) # if using lstm, swap the activation functions if rlayer_type == 'lstm': rlayer_params.update(dict(activation=Logistic(), gate_activation=Tanh())) # lookup layer parameters lookup_params = dict(vocab_size=vocab_size, embedding_dim=50, init=Uniform(-0.05, 0.05)) # Model construction story_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)] query_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)] layers = [MergeMultistream(layers=[story_path, query_path], merge="stack"), Affine(vocab_size, init=GlorotUniform(), activation=Softmax())] return Model(layers=layers)
def fit_model(train_set, val_set, num_epochs=50): relu = Rectlin() conv_params = { 'strides': 1, 'padding': 1, 'init': Xavier(local=True), # Xavier sqrt(3)/num_inputs [CHECK THIS] 'bias': Constant(0), 'activation': relu } layers = [] layers.append(Conv((3, 3, 128), **conv_params)) # 3x3 kernel * 128 nodes layers.append(Pooling(2)) layers.append(Conv((3, 3, 128), **conv_params)) layers.append(Pooling(2)) # Highest value from 2x2 window. layers.append(Conv((3, 3, 128), **conv_params)) layers.append( Dropout(keep=0.5) ) # Flattens Convolution into a flat array, with probability 0.5 sets activation values to 0 layers.append( Affine(nout=128, init=GlorotUniform(), bias=Constant(0), activation=relu) ) # 1 value per conv kernel - Linear Combination of layers layers.append(Dropout(keep=0.5)) layers.append( Affine(nout=2, init=GlorotUniform(), bias=Constant(0), activation=Softmax(), name="class_layer")) # initialize model object cnn = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) optimizer = Adam() # callbacks = Callbacks(cnn) # out_fname = 'yarin_fdl_out_data.h5' callbacks = Callbacks(cnn, eval_set=val_set, eval_freq=1) # , output_file=out_fname cnn.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=callbacks) return cnn
def __init__(self, overlapping_classes=None, exclusive_classes=None, analytics_input=True, network_type='conv_net', num_words=60, width=100, lookup_size=0, lookup_dim=0, optimizer=Adam()): assert (overlapping_classes is not None) or (exclusive_classes is not None) self.width = width self.num_words = num_words self.overlapping_classes = overlapping_classes self.exclusive_classes = exclusive_classes self.analytics_input = analytics_input self.recurrent = network_type == 'lstm' self.lookup_size = lookup_size self.lookup_dim = lookup_dim init = GlorotUniform() activation = Rectlin(slope=1E-05) gate = Logistic() input_layers = self.input_layers(analytics_input, init, activation, gate) if self.overlapping_classes is None: output_layers = [ Affine(len(self.exclusive_classes), init, activation=Softmax()) ] elif self.exclusive_classes is None: output_layers = [ Affine(len(self.overlapping_classes), init, activation=Logistic()) ] else: output_branch = BranchNode(name='exclusive_overlapping') output_layers = Tree([[ SkipNode(), output_branch, Affine(len(self.exclusive_classes), init, activation=Softmax()) ], [ output_branch, Affine(len(self.overlapping_classes), init, activation=Logistic()) ]]) layers = [ input_layers, # this is where inputs meet, and where we may want to add depth or # additional functionality Dropout(keep=0.8), output_layers ] super(ClassifierNetwork, self).__init__(layers, optimizer=optimizer)
def main(): parser = get_parser() args = parser.parse_args() print('Args:', args) loggingLevel = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=loggingLevel, format='') ext = extension_from_parameters(args) loader = p1b3.DataLoader(feature_subsample=args.feature_subsample, scaling=args.scaling, drug_features=args.drug_features, scramble=args.scramble, min_logconc=args.min_logconc, max_logconc=args.max_logconc, subsample=args.subsample, category_cutoffs=args.category_cutoffs) # initializer = Gaussian(loc=0.0, scale=0.01) initializer = GlorotUniform() activation = get_function(args.activation)() layers = [] reshape = None if args.convolution and args.convolution[0]: reshape = (1, loader.input_dim, 1) layer_list = list(range(0, len(args.convolution), 3)) for l, i in enumerate(layer_list): nb_filter = args.convolution[i] filter_len = args.convolution[i+1] stride = args.convolution[i+2] # print(nb_filter, filter_len, stride) # fshape: (height, width, num_filters). layers.append(Conv((1, filter_len, nb_filter), strides={'str_h':1, 'str_w':stride}, init=initializer, activation=activation)) if args.pool: layers.append(Pooling((1, args.pool))) for layer in args.dense: if layer: layers.append(Affine(nout=layer, init=initializer, activation=activation)) if args.drop: layers.append(Dropout(keep=(1-args.drop))) layers.append(Affine(nout=1, init=initializer, activation=neon.transforms.Identity())) model = Model(layers=layers) train_iter = ConcatDataIter(loader, ndata=args.train_samples, lshape=reshape, datatype=args.datatype) val_iter = ConcatDataIter(loader, partition='val', ndata=args.val_samples, lshape=reshape, datatype=args.datatype) cost = GeneralizedCost(get_function(args.loss)()) optimizer = get_function(args.optimizer)() callbacks = Callbacks(model, eval_set=val_iter, **args.callback_args) model.fit(train_iter, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
def conv_net(self, activation, init=Kaiming(), version=-1): width = max([self.width, self.lookup_dim]) if version == -1: if self.lookup_size: pre_layers = [ LookupTable(vocab_size=self.lookup_size, embedding_dim=width, init=GlorotUniform()), Reshape((1, self.num_words, width)), ] first_width = width else: pre_layers = [ Conv((1, width, width), padding=0, init=init, activation=activation) ] first_width = 1 return pre_layers + \ [ MergeBroadcast( [ [ Conv((3, first_width, 15), padding={'pad_h': 1, 'pad_w': 0}, init=init, activation=activation) ], [ Conv((5, first_width, 15), padding={'pad_h': 2, 'pad_w': 0}, init=init, activation=activation) ], [ Conv((7, first_width, 15), padding={'pad_h': 3, 'pad_w': 0}, init=init, activation=activation) ], ], merge='depth' ), NoisyDropout(keep=0.5, noise_pct=1.0, noise_std=0.001), Conv((5, 1, 15), strides={'str_h': 2 if self.num_words > 59 else 1, 'str_w': 1}, padding=0, init=init, activation=activation), NoisyDropout(keep=0.9, noise_pct=1.0, noise_std=0.00001), Conv((3, 1, 9), strides={'str_h': 2, 'str_w': 1}, padding=0, init=init, activation=activation), NoisyDropout(keep=0.9, noise_pct=1.0, noise_std=0.00001), Conv((9, 1, 9), strides={'str_h': 2, 'str_w': 1}, padding=0, init=init, activation=activation) ]
def create_network(): init = GlorotUniform() layers = [ Conv((3, 3, 128), init=init, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), Conv((3, 3, 256), init=init, batch_norm=True, activation=Rectlin()), Pooling(2, strides=2), Conv((2, 2, 512), init=init, batch_norm=True, activation=Rectlin()), DeepBiRNN(256, init=init, activation=Rectlin(), reset_cells=True, depth=3), RecurrentLast(), Affine(32, init=init, batch_norm=True, activation=Rectlin()), Affine(nout=2, init=init, activation=Softmax()) ] return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyBinary())
common_params = dict(sampling_freq=22050, clip_duration=16000, frame_duration=16) train_params = AudioParams(**common_params) valid_params = AudioParams(**common_params) common = dict(target_size=1, nclasses=10, repo_dir=args.data_dir) train = DataLoader(set_name='music-train', media_params=train_params, index_file=train_idx, shuffle=True, **common) valid = DataLoader(set_name='music-valid', media_params=valid_params, index_file=valid_idx, shuffle=False, **common) init = Gaussian(scale=0.01) layers = [Conv((2, 2, 4), init=init, activation=Rectlin(), strides=dict(str_h=2, str_w=4)), Pooling(2, strides=2), Conv((3, 3, 4), init=init, batch_norm=True, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), DeepBiRNN(128, init=GlorotUniform(), batch_norm=True, activation=Rectlin(), reset_cells=True, depth=3), RecurrentMean(), Affine(nout=common['nclasses'], init=init, activation=Softmax())] model = Model(layers=layers) opt = Adagrad(learning_rate=0.01, gradient_clip_value=15) metric = Misclassification() callbacks = Callbacks(model, eval_set=valid, metric=metric, **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) print('Misclassification error = %.1f%%' % (model.eval(valid, metric=metric)*100)) display(model, ['Convolution_0'], 'inputs') display(model, ['Convolution_0', 'Convolution_1', 'Pooling_0'], 'outputs')
biases = None if use_batch_norm else Constant(0) # setup backend be = gen_backend(**extract_valid_args(args, gen_backend)) # initialize the data provider img_set_options = dict(repo_dir=args.data_dir, inner_size=224, dtype=args.datatype, subset_pct=100) train = ImgMaster(set_name='train', **img_set_options) test = ImgMaster(set_name='validation', do_transforms=False, **img_set_options) train.init_batch_provider() test.init_batch_provider() init1 = GlorotUniform() relu = Rectlin() common_params = dict(init=init1, activation=Rectlin(), batch_norm=use_batch_norm, bias=biases) conv_params = dict(padding=1, **common_params) # Set up the model layers, using 3x3 conv stacks with different feature map sizes layers = [] for nofm in [64, 128, 256, 512, 512]: layers.append(Conv((3, 3, nofm), **conv_params)) layers.append(Conv((3, 3, nofm), **conv_params)) if nofm > 128: if VGG in ('D', 'E'):
shuffle=False, **common) init = Gaussian(scale=0.01) layers = [ Conv((7, 7, 32), init=init, activation=Rectlin(), strides=dict(str_h=2, str_w=4)), Pooling(2, strides=2), Conv((5, 5, 64), init=init, batch_norm=True, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), DeepBiRNN(128, init=GlorotUniform(), batch_norm=True, activation=Rectlin(), reset_cells=True, depth=3), RecurrentMean(), Affine(nout=common['nclasses'], init=init, activation=Softmax()) ] model = Model(layers=layers) opt = Adagrad(learning_rate=0.01) metric = Misclassification() callbacks = Callbacks(model, eval_set=val, metric=metric, **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.fit(train,
help='vgg model type') parser.add_argument('--subset_pct', type=float, default=100, help='subset of training dataset to use (percentage)') parser.add_argument('--test_only', action='store_true', help='skip fitting - evaluate metrics on trained model weights') args = parser.parse_args() img_set_options = dict(repo_dir=args.data_dir, inner_size=224, subset_pct=args.subset_pct) train = ImageLoader(set_name='train', scale_range=(256, 384), shuffle=True, **img_set_options) test = ImageLoader(set_name='validation', scale_range=(256, 256), do_transforms=False, shuffle=False, **img_set_options) init1 = Xavier(local=True) initfc = GlorotUniform() relu = Rectlin() conv_params = {'init': init1, 'strides': 1, 'padding': 1, 'bias': Constant(0), 'activation': relu} # Set up the model layers layers = [] # set up 3x3 conv stacks with different feature map sizes for nofm in [64, 128, 256, 512, 512]: layers.append(Conv((3, 3, nofm), **conv_params)) layers.append(Conv((3, 3, nofm), **conv_params))
task = task_list[args.task - 1] # setup backend be = gen_backend(**extract_valid_args(args, gen_backend)) # load the bAbI dataset babi = BABI(path=args.data_dir, task=task, subset=subset) train_set = QA(*babi.train) valid_set = QA(*babi.test) # recurrent layer parameters (default gru) rlayer_obj = GRU if args.rlayer_type == 'gru' else LSTM rlayer_params = dict(output_size=100, reset_cells=True, init=GlorotUniform(), init_inner=Orthonormal(0.5), activation=Tanh(), gate_activation=Logistic()) # if using lstm, swap the activation functions if args.rlayer_type == 'lstm': rlayer_params.update(dict(activation=Logistic(), gate_activation=Tanh())) # lookup layer parameters lookup_params = dict(vocab_size=babi.vocab_size, embedding_dim=50, init=Uniform(-0.05, 0.05)) # Model construction story_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)]
def __init__(self, sentence_length, token_vocab_size, pos_vocab_size=None, char_vocab_size=None, max_char_word_length=20, token_embedding_size=None, pos_embedding_size=None, char_embedding_size=None, num_labels=None, lstm_hidden_size=100, num_lstm_layers=1, use_external_embedding=None, dropout=0.5): init = GlorotUniform() tokens = [] if use_external_embedding is None: tokens.append( LookupTable(vocab_size=token_vocab_size, embedding_dim=token_embedding_size, init=init, pad_idx=0)) else: tokens.append(DataInput()) tokens.append(Reshape((-1, sentence_length))) f_layers = [tokens] # add POS tag input if pos_vocab_size is not None and pos_embedding_size is not None: f_layers.append([ LookupTable(vocab_size=pos_vocab_size, embedding_dim=pos_embedding_size, init=init, pad_idx=0), Reshape((-1, sentence_length)) ]) # add Character RNN input if char_vocab_size is not None and char_embedding_size is not None: char_lut_layer = LookupTable(vocab_size=char_vocab_size, embedding_dim=char_embedding_size, init=init, pad_idx=0) char_nn = [ char_lut_layer, TimeDistBiLSTM(char_embedding_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True, reset_freq=max_char_word_length), TimeDistributedRecurrentLast(timesteps=max_char_word_length), Reshape((-1, sentence_length)) ] f_layers.append(char_nn) layers = [] if len(f_layers) == 1: layers.append(f_layers[0][0]) else: layers.append(MergeMultistream(layers=f_layers, merge="stack")) layers.append(Reshape((-1, sentence_length))) layers += [ DeepBiLSTM(lstm_hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True, depth=num_lstm_layers), Dropout(keep=dropout), Affine(num_labels, init, bias=init, activation=Softmax()) ] self._model = Model(layers=layers)
(X_train, y_train), (X_test, y_test), nclass = pad_data(path, vocab_size=vocab_size, sentence_length=sentence_length) print "Vocab size - ", vocab_size print "Sentence Length - ", sentence_length print "# of train sentences", X_train.shape[0] print "# of test sentence", X_test.shape[0] train_set = ArrayIterator(X_train, y_train, nclass=2) valid_set = ArrayIterator(X_test, y_test, nclass=2) # weight initialization uni = Uniform(low=-0.1 / embedding_dim, high=0.1 / embedding_dim) g_uni = GlorotUniform() if args.rlayer_type == 'lstm': rlayer = LSTM(hidden_size, g_uni, activation=Tanh(), gate_activation=Logistic(), reset_cells=True) elif args.rlayer_type == 'bilstm': rlayer = DeepBiLSTM(hidden_size, g_uni, activation=Tanh(), depth=1, gate_activation=Logistic(), reset_cells=True) elif args.rlayer_type == 'rnn':
common_params = dict(sampling_freq=22050, clip_duration=16000, frame_duration=16) train_params = AudioParams(**common_params) valid_params = AudioParams(**common_params) common = dict(target_size=1, nclasses=10, repo_dir=args.data_dir) train = DataLoader(set_name='music-train', media_params=train_params, index_file=train_idx, shuffle=True, **common) valid = DataLoader(set_name='music-valid', media_params=valid_params, index_file=valid_idx, shuffle=False, **common) init = Gaussian(scale=0.01) layers = [Conv((2, 2, 4), init=init, activation=Rectlin(), strides=dict(str_h=2, str_w=4)), Pooling(2, strides=2), Conv((3, 3, 4), init=init, batch_norm=True, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), LSTM(128, init=GlorotUniform(), gate_activation=Tanh(), activation=Logistic(), reset_cells=True), RecurrentMean(), Affine(nout=common['nclasses'], init=init, activation=Softmax())] model = Model(layers=layers) opt = Adagrad(learning_rate=0.01, gradient_clip_value=15) metric = Misclassification() callbacks = Callbacks(model, eval_set=valid, metric=metric, **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) print('Misclassification error = %.1f%%' % (model.eval(valid, metric=metric)*100)) display(model, ['Convolution_0'], 'inputs') display(model, ['Convolution_0', 'Convolution_1', 'Pooling_0'], 'outputs')
def main(): parser = NeonArgparser(__doc__) args = parser.parse_args(gen_be=False) #mat_data = sio.loadmat('../data/timeseries/02_timeseries.mat') #ts = V1TimeSeries(mat_data['timeseries'], mat_data['stim'], binning=10) seq_len = 30 hidden = 20 be = gen_backend(**extract_valid_args(args, gen_backend)) kohn = KohnV1Dataset(path='../tmp/') kohn.gen_iterators(seq_len) import pdb; pdb.set_trace() train_spike_set = V1IteratorSequence(ts.train, seq_len, return_sequences=False) valid_spike_set = V1IteratorSequence(ts.test, seq_len, return_sequences=False) init = GlorotUniform() # dataset = MNIST(path=args.data_dir) # (X_train, y_train), (X_test, y_test), nclass = dataset.load_data() # train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) # valid_set = ArrayIterator([X_test, X_test], y_test, nclass=nclass, lshape=(1, 28, 28)) # # weight initialization # init_norm = Gaussian(loc=0.0, scale=0.01) # # initialize model # path1 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), # Affine(nout=100, init=init_norm, activation=Rectlin())]) # path2 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), # Affine(nout=100, init=init_norm, activation=Rectlin())]) # layers = [MergeMultistream(layers=[path1, path2], merge="stack"), # Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))] spike_rnn_path = Sequential( layers = [ LSTM(hidden, init, activation=Logistic(), gate_activation=Logistic(), reset_cells=False), Dropout(keep=0.5), LSTM(hidden, init, activation=Logistic(), gate_activation=Logistic(), reset_cells=False), #Dropout(keep=0.85), RecurrentLast(), Affine(train_set.nfeatures, init, bias=init, activation=Identity(), name='spike_in')]) stim_rnn_path = Sequential( layers = [ LSTM(hidden, init, activation=Logistic(), gate_activation=Logistic(), reset_cells=False), Dropout(keep=0.5), RecurrentLast(), Affine(1, init, bias=init, activation=Identity(), name='stim')]) layers = [ MergeMultiStream( layers = [ spike_rnn_path, stim_rnn_path], merge="stack"), Affine(train_set.nfeatures, init, bias=init, activation=Identity(), name='spike_out'), Round() ] model = Model(layers=layers) sched = ExpSchedule(decay=0.7) # cost = GeneralizedCost(SumSquared()) cost = GeneralizedCost(MeanSquared()) optimizer_two = RMSProp(stochastic_round=args.rounding) optimizer_one = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9, schedule=sched) opt = MultiOptimizer({'default': optimizer_one, 'Bias': optimizer_two, 'special_linear': optimizer_two}) callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) callbacks.add_hist_callback(filter_key = ['W']) #callbacks.add_callback(MetricCallback(eval_set=valid_set, metric=FractionExplainedVariance(), epoch_freq=args.eval_freq)) #callbacks.add_callback(MetricCallback(eval_set=valid_set,metric=Accuracy(), epoch_freq=args.eval_freq)) model.fit(train_set, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) train_output = model.get_outputs( train_set).reshape(-1, train_set.nfeatures) valid_output = model.get_outputs( valid_set).reshape(-1, valid_set.nfeatures) train_target = train_set.y_series valid_target = valid_set.y_series tfev = fev(train_output, train_target, train_set.mean) vfev = fev(valid_output, valid_target, valid_set.mean) neon_logger.display('Train FEV: %g, Valid FEV: %g' % (tfev, vfev)) # neon_logger.display('Train Mean: %g, Valid Mean: %g' % (train_set.mean, valid_set.mean)) plt.figure() plt.plot(train_output[:, 0], train_output[ :, 1], 'bo', label='prediction') plt.plot(train_target[:, 0], train_target[:, 1], 'r.', label='target') plt.legend() plt.title('Neon on training set') plt.savefig('neon_series_training_output.png') plt.figure() plt.plot(valid_output[:, 0], valid_output[ :, 1], 'bo', label='prediction') plt.plot(valid_target[:, 0], valid_target[:, 1], 'r.', label='target') plt.legend() plt.title('Neon on validation set') plt.savefig('neon_series_validation_output.png')
# Set up the testset to load via aeon image_config = dict(height=64, width=64, channels=1) label_config = dict(binary=False) config = dict(type="image,label", image=image_config, label=label_config, manifest_filename='manifest_subset2_augmented.csv', minibatch_size=args.batch_size, subset_fraction=1.0) test_set = DataLoader(config, be) test_set = TypeCast(test_set, index=0, dtype=np.float32) # cast image to float #test_set = OneHot(test_set, index=1, nclasses=2) #init_uni = Gaussian(scale=0.05) init_uni = GlorotUniform() #opt_gdm = Adam(learning_rate=args.learning_rate, beta_1=0.9, beta_2=0.999) opt_gdm = Adadelta(decay=0.95, epsilon=1e-6) relu = Rectlin() conv_params = {'strides': 1, 'padding': 1, 'init': Xavier(local=True), 'bias': Constant(0), 'activation': relu, 'batch_norm': False} # Set up the model layers vgg_layers = [] # set up 3x3 conv stacks with different number of filters
# Set up the testset to load via aeon image_config = dict(height=64, width=64, channels=1) label_config = dict(binary=False) config = dict(type="image,label", image=image_config, label=label_config, manifest_filename='manifest_subset2_augmented.csv', minibatch_size=args.batch_size, subset_fraction=1.0) test_set = DataLoader(config, be) test_set = TypeCast(test_set, index=0, dtype=np.float32) # cast image to float test_set = OneHot(test_set, index=1, nclasses=2) #init_uni = Gaussian(scale=0.05) init_uni = GlorotUniform() #opt_gdm = Adam(learning_rate=args.learning_rate, beta_1=0.9, beta_2=0.999) opt_gdm = Adadelta(decay=0.95, epsilon=1e-6) relu = Rectlin() conv_params = {'strides': 1, 'padding': 1, 'init': Xavier(local=True), 'bias': Constant(0), 'activation': relu, 'batch_norm': False} # Set up the model layers vgg_layers = [] # set up 3x3 conv stacks with different number of filters
def test_reshape_layer_model(backend_default, fargs): """ test cases: - conv before RNNs - conv after RNNs - conv after LUT """ np.random.seed(seed=0) nin, nout, bsz = fargs be = backend_default be.bsz = bsz input_size = (nin, be.bsz) init = Uniform(-0.1, 0.1) g_uni = GlorotUniform() inp_np = np.random.rand(nin, be.bsz) delta_np = np.random.rand(nout, be.bsz) inp = be.array(inp_np) delta = be.array(delta_np) conv_lut_1 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), Reshape(reshape=(4, 100, -1)), Conv((3, 3, 16), init=init), LSTM(64, g_uni, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), RecurrentSum(), Affine(nout, init, bias=init, activation=Softmax()) ] conv_lut_2 = [ LookupTable(vocab_size=1000, embedding_dim=400, init=init), Reshape(reshape=(4, 50, -1)), Conv((3, 3, 16), init=init), Pooling(2, strides=2), Affine(nout=nout, init=init, bias=init, activation=Softmax()), ] conv_rnn_1 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), LSTM(64, g_uni, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), Reshape(reshape=(4, 32, -1)), Conv((3, 3, 16), init=init), Affine(nout, init, bias=init, activation=Softmax()) ] conv_rnn_2 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), Recurrent(64, g_uni, activation=Tanh(), reset_cells=True), Reshape(reshape=(4, -1, 32)), Conv((3, 3, 16), init=init), Affine(nout, init, bias=init, activation=Softmax()) ] lut_sum_1 = [ LookupTable(vocab_size=1000, embedding_dim=128, init=init), RecurrentSum(), Affine(nout=nout, init=init, bias=init, activation=Softmax()), ] lut_birnn_1 = [ LookupTable(vocab_size=1000, embedding_dim=200, init=init), DeepBiRNN(32, init=GlorotUniform(), batch_norm=True, activation=Tanh(), reset_cells=True, depth=1), Reshape((4, 32, -1)), Conv((3, 3, 16), init=init), Affine(nout=nout, init=init, bias=init, activation=Softmax()) ] layers_test = [ conv_lut_1, conv_lut_2, conv_rnn_1, conv_rnn_2, lut_sum_1, lut_birnn_1 ] for lg in layers_test: model = Model(layers=lg) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.initialize(input_size, cost) model.fprop(inp) model.bprop(delta)
batch_size = 1 clip_gradients = True gradient_limit = 5 vocab_size = 20000 sentence_length = 128 embedding_dim = 128 hidden_size = 128 reset_cells = True num_epochs = args.epochs # setup backend be = gen_backend(**extract_valid_args(args, gen_backend)) be.bsz = 1 # define same model as in train init_glorot = GlorotUniform() init_emb = Uniform(low=-0.1 / embedding_dim, high=0.1 / embedding_dim) nclass = 2 layers = [ LookupTable(vocab_size=vocab_size, embedding_dim=embedding_dim, init=init_emb, pad_idx=0, update=True), LSTM(hidden_size, init_glorot, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), RecurrentSum(), Dropout(keep=0.5),
nepochs = {1: 8, 2: 3, 3: 6}[subj] logger.warn('Overriding --epochs option') if args.electrode == '-1': from loader import MultiLoader as Loader elecs = range(16) else: from loader import SingleLoader as Loader rate /= 10 elecs = args.electrode tain = Loader(data_dir, subj, elecs, args.validate_mode, training=True) test = Loader(data_dir, subj, elecs, args.validate_mode, training=False) gauss = Gaussian(scale=0.01) glorot = GlorotUniform() tiny = dict(str_h=1, str_w=1) small = dict(str_h=1, str_w=2) big = dict(str_h=1, str_w=4) common = dict(batch_norm=True, activation=Rectlin()) layers = { 1: [ Conv((3, 5, 64), init=gauss, strides=big, **common), Pooling(2, strides=2), Conv((3, 3, 128), init=gauss, strides=small, **common), Pooling(2, strides=2), Conv((3, 3, 256), init=gauss, strides=small, **common), Conv((2, 2, 512), init=gauss, strides=tiny, **common), Conv((2, 2, 128), init=gauss, strides=tiny, **common), DeepBiRNN(64, init=glorot, reset_cells=True, depth=3, **common), RecurrentMean(),
# create synthetic data as a whole series time_series = TimeSeries(npoints, ncycles=ncycles, curvetype=args.curvetype) # use data iterator to feed X, Y. return_sequence determines training strategy train_set = DataIteratorSequence(time_series.train, seq_len, return_sequences=return_sequences) valid_set = DataIteratorSequence(time_series.test, seq_len, return_sequences=return_sequences) # define weights initialization init = GlorotUniform() # Uniform(low=-0.08, high=0.08) # define model: model is different for the 2 strategies (sequence target or not) if return_sequences is True: layers = [ LSTM(hidden, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=False), Affine(train_set.nfeatures, init, bias=init, activation=Identity()) ] else: layers = [ LSTM(hidden, init,
dtype=np.float32) # cast image to float # Set up the testset to load via aeon image_config = dict(height=64, width=64, channels=3) label_config = dict(binary=False) config = dict(type="image,label", image=image_config, label=label_config, manifest_filename='manifest_subset9_augmented.csv', minibatch_size=args.batch_size, subset_fraction=1.0) test_set = DataLoader(config, be) test_set = TypeCast(test_set, index=0, dtype=np.float32) # cast image to float #init_uni = Gaussian(scale=0.05) init_uni = GlorotUniform() #opt_gdm = Adam(learning_rate=args.learning_rate, beta_1=0.9, beta_2=0.999) opt_gdm = Adadelta(decay=0.95, epsilon=1e-6) relu = Rectlin() conv_params = { 'strides': 1, 'padding': 1, 'init': Xavier(local=True), 'bias': Constant(0), 'activation': relu, 'batch_norm': False } # Set up the model layers vgg_layers = []
datatype=args.datatype) # setup data provider img_provider = ImgMaster if args.loader_version == 'old' else ImageLoader img_set_options = dict(repo_dir=args.data_dir, inner_size=224, dtype=args.datatype, subset_pct=100) train = img_provider(set_name='train', **img_set_options) test = img_provider(set_name='validation', do_transforms=False, **img_set_options) train.init_batch_provider() test.init_batch_provider() relu = Rectlin() init_uni = GlorotUniform() # The parameters below are straight out of [Springenberg2014] opt_gdm = GradientDescentMomentum(learning_rate=0.01, schedule=Schedule(step_config=[10], change=0.1), momentum_coef=0.9, wdecay=.0005) # set up model layers layers = [] layers.append(DataTransform(transform=Normalizer(divisor=128.))) layers.append(Conv((11, 11, 96), init=init_uni, activation=relu, strides=4, padding=1)) layers.append(Conv((1, 1, 96), init=init_uni, activation=relu, strides=1)) layers.append(Conv((3, 3, 96), init=init_uni, activation=relu, strides=2, padding=1)) # 54->27
vgg_layers.append(Conv((3, 3, 128), **conv_params)) vgg_layers.append(Conv((3, 3, 128), **conv_params)) vgg_layers.append(Pooling(2, strides=2)) vgg_layers.append(Conv((3, 3, 256), **conv_params)) vgg_layers.append(Conv((3, 3, 256), **conv_params)) vgg_layers.append(Conv((3, 3, 256), **conv_params)) vgg_layers.append(Pooling(2, strides=2)) vgg_layers.append(Conv((3, 3, 512), **conv_params)) vgg_layers.append(Conv((3, 3, 512), **conv_params)) vgg_layers.append(Conv((3, 3, 512), **conv_params)) vgg_layers.append(Pooling(2, strides=2)) vgg_layers.append(Conv((3, 3, 512), **conv_params)) vgg_layers.append(Conv((3, 3, 512), **conv_params)) vgg_layers.append(Conv((3, 3, 512), **conv_params)) vgg_layers.append(Pooling(2, strides=2)) vgg_layers.append(Affine(nout=4096, init=GlorotUniform(), bias=Constant(0), activation=relu)) vgg_layers.append(Dropout(keep=0.5)) vgg_layers.append(Affine(nout=4096, init=GlorotUniform(), bias=Constant(0), activation=relu)) vgg_layers.append(Dropout(keep=0.5)) vgg_layers.append(Linear(nout=4, init=GlorotUniform())) model = Model(layers=vgg_layers) # cost = GeneralizedCost(costfunc=CrossEntropyBinary()) cost = GeneralizedCost(costfunc=SumSquared()) # fit and validate optimizer = RMSProp() # configure callbacks callbacks = Callbacks(model, eval_set=eval_set)