def __init__(self, inputs, bs, max_time, classes, feature_dim, hidden_size, method='max', seed=12345): self._inputs = inputs self.method = method self.batch_size = bs self.classes = classes self.max_time = max_time self.feature_dim = feature_dim self.dropout = True self.hidden = HiddenLayer(input_size=feature_dim, hidden_size=hidden_size, batch_size=bs, name='hidden', dropout=0.5, activation=act.LeakyRelu()) self.softmax = SoftmaxLayer(input_size=hidden_size, classes=self.classes, batch_size=bs, name='softmax', dropout=0.5)
def __init__(self, inputs, bs, max_time, classes, feature_dim, hidden_size, levels, N=1, pool=None, seed=12345): self._inputs = inputs self.N = N self.batch_size = bs self.classes = classes self.max_time = max_time self.levels = levels self.feature_dim = feature_dim self.pool = pool self.dropout = True # create a pyramid of filters self.temporal_pyramid = [] for l in range(self.levels): for f in range(2**l): tf = TemporalAttentionLayer(batch_size=bs, N=N, channels=feature_dim, name='temporal-attention-layer-'+str(l)+'-filter-'+str(f)) tf.test = True tf.d = theano.shared(value=np.asarray([1./2**(l+1)]).astype('float32'), name='d', borrow=True, broadcastable=[True]) tf.g = theano.shared(value=np.asarray([((1./2**l)+(2*f/2.**l))]).astype('float32'), name='g', borrow=True, broadcastable=[True]) tf.sigma = theano.shared(value=np.asarray([5.0]).astype('float32'), name='sigma', borrow=True, broadcastable=[True]) self.temporal_pyramid.append(tf) input_size = feature_dim*N*(len(self.temporal_pyramid) if pool == None else 1) self.hidden = HiddenLayer(input_size=input_size, hidden_size=hidden_size, activation=act.LeakyRelu(), batch_size=bs, name='hidden', dropout=0.5) self.softmax = SoftmaxLayer(input_size=hidden_size, classes=self.classes, batch_size=bs, name='softmax', dropout=0.5)
def __init__(self, param_dict): self.param_dict = param_dict self.training_batch_size = param_dict['training_batch_size'] nkerns = param_dict['nkerns'] recept_width = param_dict['recept_width'] pool_width = param_dict['pool_width'] stride = param_dict['stride'] dropout_prob = param_dict['dropout_prob'] weight_decay = param_dict['l2_reg'] activation = param_dict['activation'] weights_variance = param_dict['weights_variance'] n_channels = param_dict['n_channels'] n_timesteps = param_dict['n_timesteps'] n_fbins = param_dict['n_fbins'] global_pooling = param_dict['global_pooling'] rng = np.random.RandomState(23455) self.training_mode = T.iscalar('training_mode') self.x = T.tensor4('x') self.y = T.bvector('y') self.batch_size = theano.shared(self.training_batch_size) self.input = self.x.reshape((self.batch_size, 1, n_channels * n_fbins, n_timesteps)) self.feature_extractor = FeatureExtractor(rng, self.input, nkerns, recept_width, pool_width, stride, self.training_mode, dropout_prob[0], activation, weights_variance, n_channels, n_timesteps, n_fbins, global_pooling) self.classifier = SoftmaxLayer(rng=rng, input=self.feature_extractor.output, n_in=nkerns[-1], training_mode=self.training_mode, dropout_prob=dropout_prob[-1]) self.weights = self.feature_extractor.weights + self.classifier.weights # ---------------------- BACKPROP self.cost = self.classifier.cross_entropy_cost(self.y) self.cost = self.classifier.cross_entropy_cost(self.y) L2_sqr = sum((weight ** 2).sum() for weight in self.weights[::2]) self.grads = T.grad(self.cost + weight_decay * L2_sqr, self.weights) self.updates = self.adadelta_updates(self.grads, self.weights) # self.updates = self.nesterov_momentum(self.grads, self.weights) # --------------------- FUNCTIONS self.train_model = theano.function([self.x, self.y, Param(self.training_mode, default=1)], outputs=self.cost, updates=self.updates) self.validate_model = theano.function([self.x, self.y, Param(self.training_mode, default=0)], self.cost) self.test_model = theano.function([self.x, Param(self.training_mode, default=0)], self.classifier.p_y_given_x[:, 1])
def __init__(self, inputs, bs, max_time, classes, feature_dim, hidden_size, filters, N=1, pool=None, lstm_dim=4096, steps=8, seed=12345): self._inputs = inputs self.N = N self.batch_size = bs self.classes = classes self.max_time = max_time self.filters = filters self.feature_dim = feature_dim self.pool = pool self.dropout = True self.steps = steps self.temporal_filters = [] for f in range(filters): tf = TemporalAttentionLayer( batch_size=bs, N=N, channels=feature_dim, input_hidden_size=lstm_dim, name='temporal-attention-layer-filter-' + str(f)) self.temporal_filters.append(tf) input_size = feature_dim * len( self.temporal_filters) * (N if pool == None else 1) self.lstm_in = HiddenLayer(input_size=input_size, hidden_size=lstm_dim * 4, batch_size=bs) self.lstm = LSTMLayer(input_size=lstm_dim, hidden_size=lstm_dim) self.hidden = HiddenLayer(input_size=lstm_dim, hidden_size=hidden_size, activation=act.relu, batch_size=bs, name='hidden', dropout=0.5) self.softmax = SoftmaxLayer(input_size=hidden_size, classes=self.classes, batch_size=bs, name='softmax', dropout=0.5)
def add_softmax_layer(self): l = SoftmaxLayer(self.layers[-1]) self.layers.append(l)
# One hot encoding number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0] y_train_enc = one_hot(y_train) # number of classes / pixels per image num_classes = y_train_enc.shape[0] num_pixels = x_train.shape[0] # Create our NN structure net = NeuralNetwork() net.add(FCLayer(num_pixels, 100, activation=TanH(), optimizer=Adam())) net.add(DropOut(rate=0.0)) net.add(FCLayer(100, 50, activation=TanH(), optimizer=Adam())) net.add(DropOut(rate=0.0)) net.add(FCLayer(50, 25, activation=TanH(), optimizer=Adam())) net.add(DropOut(rate=0.0)) net.add(SoftmaxLayer(25, num_classes, activation=Softmax(), optimizer=Adam())) # train net.use(loss=MultiClassCrossEntropy(), regularizer=L2Regularizer(lambd=0.01)) net.train(x_train, y_train_enc, epochs=50, learning_rate=0.001, batch_size=256) # check training accuracy train_results = net.predict(x_train) train_results = np.argmax(train_results, axis=0) print("Accuracy on training set:", np.mean(train_results == y_train) * 100, "%") # Check our model on the test set x_test = normalize_images(x_test) test_results = net.predict(x_test)
11, 11, 48, 2, 4, Relu(), 0.05, momentum_rate=0.0, decay_rate=0.1) # CLh(net, 2, 2, 10, 1, 2, Relu(), 0.05) LrnLayer(net, 2, 0.0001, 5, 0.75) MaxPoolingLayer(net, 3, 3, 2) FcLayer(net, 5, Relu(), momentum_rate=0.0, decay_rate=0.1) DropoutLayer(net, dropout_prob=0.5) SoftmaxLayer(net, 10, momentum_rate=0.0, decay_rate=0.1) # net1 = Network() # sp = SciPlot('Curve of softmax output') # CLM(net1, 25, 25, 3, 11, 11, 48, 2, 4, Relu(), 0.05) # CLMh(net1, 2, 2, 10, 1, 2, Relu(), 0.05) # LrnLayer(net1, 2, 0.0001, 5, 0.75) # MaxPoolingLayer(net1, 3, 3, 2) # # # FcLayer(net1, 5, Relu()) # DropoutLayer(net1, dropout_prob=0.5) # SoftmaxLayer(net1, 10) sp.plot(net.predict(fake_image, training=False), desc='episode-' + str(0)) for i in range(0, 5): net.train_one_sample(fake_label, fake_image, 1)