def __init__(self, inputs, bs, max_time, classes, feature_dim, hidden_size, levels, N=1, pool=None, seed=12345): self._inputs = inputs self.N = N self.batch_size = bs self.classes = classes self.max_time = max_time self.levels = levels self.feature_dim = feature_dim self.pool = pool self.dropout = True # create a pyramid of filters self.temporal_pyramid = [] for l in range(self.levels): for f in range(2**l): tf = TemporalAttentionLayer(batch_size=bs, N=N, channels=feature_dim, name='temporal-attention-layer-'+str(l)+'-filter-'+str(f)) tf.test = True tf.d = theano.shared(value=np.asarray([1./2**(l+1)]).astype('float32'), name='d', borrow=True, broadcastable=[True]) tf.g = theano.shared(value=np.asarray([((1./2**l)+(2*f/2.**l))]).astype('float32'), name='g', borrow=True, broadcastable=[True]) tf.sigma = theano.shared(value=np.asarray([5.0]).astype('float32'), name='sigma', borrow=True, broadcastable=[True]) self.temporal_pyramid.append(tf) input_size = feature_dim*N*(len(self.temporal_pyramid) if pool == None else 1) self.hidden = HiddenLayer(input_size=input_size, hidden_size=hidden_size, activation=act.LeakyRelu(), batch_size=bs, name='hidden', dropout=0.5) self.softmax = SoftmaxLayer(input_size=hidden_size, classes=self.classes, batch_size=bs, name='softmax', dropout=0.5)
def __init__(self, inputs, bs, max_time, classes, feature_dim, hidden_size, method='max', seed=12345): self._inputs = inputs self.method = method self.batch_size = bs self.classes = classes self.max_time = max_time self.feature_dim = feature_dim self.dropout = True self.hidden = HiddenLayer(input_size=feature_dim, hidden_size=hidden_size, batch_size=bs, name='hidden', dropout=0.5, activation=act.LeakyRelu()) self.softmax = SoftmaxLayer(input_size=hidden_size, classes=self.classes, batch_size=bs, name='softmax', dropout=0.5)
def __init__(self, param_dict): self.param_dict = param_dict self.training_batch_size = param_dict['training_batch_size'] nkerns = param_dict['nkerns'] recept_width = param_dict['recept_width'] pool_width = param_dict['pool_width'] stride = param_dict['stride'] dropout_prob = param_dict['dropout_prob'] weight_decay = param_dict['l2_reg'] activation = param_dict['activation'] weights_variance = param_dict['weights_variance'] n_channels = param_dict['n_channels'] n_timesteps = param_dict['n_timesteps'] n_fbins = param_dict['n_fbins'] global_pooling = param_dict['global_pooling'] rng = np.random.RandomState(23455) self.training_mode = T.iscalar('training_mode') self.x = T.tensor4('x') self.y = T.bvector('y') self.batch_size = theano.shared(self.training_batch_size) self.input = self.x.reshape((self.batch_size, 1, n_channels * n_fbins, n_timesteps)) self.feature_extractor = FeatureExtractor(rng, self.input, nkerns, recept_width, pool_width, stride, self.training_mode, dropout_prob[0], activation, weights_variance, n_channels, n_timesteps, n_fbins, global_pooling) self.classifier = SoftmaxLayer(rng=rng, input=self.feature_extractor.output, n_in=nkerns[-1], training_mode=self.training_mode, dropout_prob=dropout_prob[-1]) self.weights = self.feature_extractor.weights + self.classifier.weights # ---------------------- BACKPROP self.cost = self.classifier.cross_entropy_cost(self.y) self.cost = self.classifier.cross_entropy_cost(self.y) L2_sqr = sum((weight ** 2).sum() for weight in self.weights[::2]) self.grads = T.grad(self.cost + weight_decay * L2_sqr, self.weights) self.updates = self.adadelta_updates(self.grads, self.weights) # self.updates = self.nesterov_momentum(self.grads, self.weights) # --------------------- FUNCTIONS self.train_model = theano.function([self.x, self.y, Param(self.training_mode, default=1)], outputs=self.cost, updates=self.updates) self.validate_model = theano.function([self.x, self.y, Param(self.training_mode, default=0)], self.cost) self.test_model = theano.function([self.x, Param(self.training_mode, default=0)], self.classifier.p_y_given_x[:, 1])
def __init__(self, inputs, bs, max_time, classes, feature_dim, hidden_size, filters, N=1, pool=None, lstm_dim=4096, steps=8, seed=12345): self._inputs = inputs self.N = N self.batch_size = bs self.classes = classes self.max_time = max_time self.filters = filters self.feature_dim = feature_dim self.pool = pool self.dropout = True self.steps = steps self.temporal_filters = [] for f in range(filters): tf = TemporalAttentionLayer( batch_size=bs, N=N, channels=feature_dim, input_hidden_size=lstm_dim, name='temporal-attention-layer-filter-' + str(f)) self.temporal_filters.append(tf) input_size = feature_dim * len( self.temporal_filters) * (N if pool == None else 1) self.lstm_in = HiddenLayer(input_size=input_size, hidden_size=lstm_dim * 4, batch_size=bs) self.lstm = LSTMLayer(input_size=lstm_dim, hidden_size=lstm_dim) self.hidden = HiddenLayer(input_size=lstm_dim, hidden_size=hidden_size, activation=act.relu, batch_size=bs, name='hidden', dropout=0.5) self.softmax = SoftmaxLayer(input_size=hidden_size, classes=self.classes, batch_size=bs, name='softmax', dropout=0.5)
def add_softmax_layer(self): l = SoftmaxLayer(self.layers[-1]) self.layers.append(l)
class ConvNet(object): def __init__(self, param_dict): self.param_dict = param_dict self.training_batch_size = param_dict['training_batch_size'] nkerns = param_dict['nkerns'] recept_width = param_dict['recept_width'] pool_width = param_dict['pool_width'] stride = param_dict['stride'] dropout_prob = param_dict['dropout_prob'] weight_decay = param_dict['l2_reg'] activation = param_dict['activation'] weights_variance = param_dict['weights_variance'] n_channels = param_dict['n_channels'] n_timesteps = param_dict['n_timesteps'] n_fbins = param_dict['n_fbins'] global_pooling = param_dict['global_pooling'] rng = np.random.RandomState(23455) self.training_mode = T.iscalar('training_mode') self.x = T.tensor4('x') self.y = T.bvector('y') self.batch_size = theano.shared(self.training_batch_size) self.input = self.x.reshape((self.batch_size, 1, n_channels * n_fbins, n_timesteps)) self.feature_extractor = FeatureExtractor(rng, self.input, nkerns, recept_width, pool_width, stride, self.training_mode, dropout_prob[0], activation, weights_variance, n_channels, n_timesteps, n_fbins, global_pooling) self.classifier = SoftmaxLayer(rng=rng, input=self.feature_extractor.output, n_in=nkerns[-1], training_mode=self.training_mode, dropout_prob=dropout_prob[-1]) self.weights = self.feature_extractor.weights + self.classifier.weights # ---------------------- BACKPROP self.cost = self.classifier.cross_entropy_cost(self.y) self.cost = self.classifier.cross_entropy_cost(self.y) L2_sqr = sum((weight ** 2).sum() for weight in self.weights[::2]) self.grads = T.grad(self.cost + weight_decay * L2_sqr, self.weights) self.updates = self.adadelta_updates(self.grads, self.weights) # self.updates = self.nesterov_momentum(self.grads, self.weights) # --------------------- FUNCTIONS self.train_model = theano.function([self.x, self.y, Param(self.training_mode, default=1)], outputs=self.cost, updates=self.updates) self.validate_model = theano.function([self.x, self.y, Param(self.training_mode, default=0)], self.cost) self.test_model = theano.function([self.x, Param(self.training_mode, default=0)], self.classifier.p_y_given_x[:, 1]) def train(self, train_set, max_iter): print 'training for', max_iter, 'iterations' self.batch_size.set_value(self.training_batch_size) train_set_iterator = RandomTrainIterator(train_set, self.training_batch_size) done_looping = False iter = 0 while not done_looping: for train_x, train_y in train_set_iterator: self.train_model(train_x, train_y) # if iter % 10 == 0: # self.batch_size.set_value(train_set[0].shape[0]) # print self.validate_model(train_set[0], train_set[1]) # self.batch_size.set_value(self.training_batch_size) if iter > max_iter: done_looping = True break iter += 1 def validate(self, train_set, valid_set, valid_freq, max_iter, fname_out): train_set_iterator = RandomTrainIterator(train_set, self.training_batch_size) valid_set_size = len(valid_set[1]) f_out = open(fname_out, 'w') # ------------------------------ TRAINING epoch = 0 iter = 0 best_ce = np.inf best_iter_ce = 0 best_auc = 0 best_iter_auc = 0 done_looping = False patience = 100000 patience_increase = 2 improvement_threshold = 0.995 while iter < max_iter and not done_looping: epoch += 1 for train_x, train_y in train_set_iterator: self.train_model(train_x, train_y) iter += 1 # ------------------------ VALIDATION if iter % valid_freq == 0: self.batch_size.set_value(valid_set_size) cost_valid = self.validate_model(valid_set[0], valid_set[1]) auc_valid = self.get_auc(valid_set) # print "%4s %7s %15s %15s %10s " % ( # epoch, iter, auc_valid, cost_valid, # patience) f_out.write("%s \t %s \t %s \n" % ( iter, auc_valid, cost_valid)) self.batch_size.set_value(self.training_batch_size) if cost_valid <= best_ce: if cost_valid < best_ce * improvement_threshold: patience = max(patience, iter * patience_increase) best_iter_ce = iter best_ce = cost_valid if auc_valid >= best_auc: best_iter_auc = iter best_auc = auc_valid if patience <= iter: done_looping = True print 'best_iter_cost:', best_iter_ce, 'best_cost:', best_ce print 'best_iter_auc:', best_iter_auc, 'best_auc:', best_auc f_out.close() return max(best_iter_ce, best_iter_auc) def get_auc(self, data_xy): x, y = data_xy[0], data_xy[1] p_y_given_x = self.get_test_proba(x) fpr, tpr, thresholds = roc_curve(y, p_y_given_x, pos_label=1) roc_auc = auc(fpr, tpr) return roc_auc def get_test_proba(self, x_test): self.batch_size.set_value(len(x_test)) p_y_given_x = self.test_model(x_test) return p_y_given_x def nesterov_momentum(self, grads, weights, learning_rate=0.001, momentum=0.9): updates = [] for param_i, grad_i in zip(weights, grads): mparam_i = theano.shared(np.zeros(param_i.get_value().shape, dtype=theano.config.floatX)) v = momentum * mparam_i - learning_rate * grad_i w = param_i + momentum * v - learning_rate * grad_i updates.append((mparam_i, v)) updates.append((param_i, w)) return updates def adadelta_updates(self, grads, weights, learning_rate=0.01, rho=0.95, epsilon=1e-6): accumulators = [theano.shared(np.zeros_like(param_i.get_value())) for param_i in weights] delta_accumulators = [theano.shared(np.zeros_like(param_i.get_value())) for param_i in weights] updates = [] for param_i, grad_i, acc_i, acc_delta_i in zip(weights, grads, accumulators, delta_accumulators): acc_i_new = rho * acc_i + (1 - rho) * grad_i ** 2 updates.append((acc_i, acc_i_new)) update_i = grad_i * T.sqrt(acc_delta_i + epsilon) / T.sqrt(acc_i_new + epsilon) updates.append((param_i, param_i - learning_rate * update_i)) acc_delta_i_new = rho * acc_delta_i + (1 - rho) * update_i ** 2 updates.append((acc_delta_i, acc_delta_i_new)) return updates def get_state(self): state = {} state['params'] = self.param_dict weights_vals = [] for p in self.weights: weights_vals.append(p.get_value()) state['weights'] = weights_vals return state def set_weights(self, weights_vals): for i, w in enumerate(weights_vals): self.weights[i].set_value(w)
# One hot encoding number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0] y_train_enc = one_hot(y_train) # number of classes / pixels per image num_classes = y_train_enc.shape[0] num_pixels = x_train.shape[0] # Create our NN structure net = NeuralNetwork() net.add(FCLayer(num_pixels, 100, activation=TanH(), optimizer=Adam())) net.add(DropOut(rate=0.0)) net.add(FCLayer(100, 50, activation=TanH(), optimizer=Adam())) net.add(DropOut(rate=0.0)) net.add(FCLayer(50, 25, activation=TanH(), optimizer=Adam())) net.add(DropOut(rate=0.0)) net.add(SoftmaxLayer(25, num_classes, activation=Softmax(), optimizer=Adam())) # train net.use(loss=MultiClassCrossEntropy(), regularizer=L2Regularizer(lambd=0.01)) net.train(x_train, y_train_enc, epochs=50, learning_rate=0.001, batch_size=256) # check training accuracy train_results = net.predict(x_train) train_results = np.argmax(train_results, axis=0) print("Accuracy on training set:", np.mean(train_results == y_train) * 100, "%") # Check our model on the test set x_test = normalize_images(x_test) test_results = net.predict(x_test)
class TemporalModel(Model): def __init__(self, inputs, bs, max_time, classes, feature_dim, hidden_size, method='max', seed=12345): self._inputs = inputs self.method = method self.batch_size = bs self.classes = classes self.max_time = max_time self.feature_dim = feature_dim self.dropout = True self.hidden = HiddenLayer(input_size=feature_dim, hidden_size=hidden_size, batch_size=bs, name='hidden', dropout=0.5, activation=act.LeakyRelu()) self.softmax = SoftmaxLayer(input_size=hidden_size, classes=self.classes, batch_size=bs, name='softmax', dropout=0.5) @property def params(self): return self.softmax.params + self.hidden.params @property def inputs(self): return self._inputs @property def outputs(self): return self._outputs @property def updates(self): return self._updates @property def test_algorithm(self): if not hasattr(self, '_talgorithm'): d = self.dropout self.dropout = False o = self.run(*self.inputs) for i, ot in enumerate(self.outputs): o[i].name = ot.name self._talgorithm = theano.function(inputs=self.inputs, outputs=o, on_unused_input='warn') self.dropout = d return self._talgorithm def run(self, x, mask, y): # get the max/mean/sum of x for each feature # from all frame if self.method == 'max': m = (-100 * (1 - mask)).dimshuffle([0, 1, 'x']) x = T.max(x + m, axis=1) elif self.method == 'sum' or self.method == 'mean': x = T.sum(x, axis=1) elif self.method == 'mean': x = x / T.sum(mask, axis=1).dimshuffle([0, 'x']) x = x.astype(theano.config.floatX) x = self.hidden.run(x, self.dropout) prob, pred = self.softmax.run(x, self.dropout) y = y.reshape((y.shape[0], )) loss = self.softmax.loss(prob, y) + T.sum( self.hidden.w**2) * 0.001 + T.sum(self.softmax.w**2) * 0.0001 y = T.extra_ops.to_one_hot(y, 51) error = self.softmax.error(pred, y) acc = 1 - error return prob, pred, loss, error, acc
class TemporalModel(Model): def __init__(self, inputs, bs, max_time, classes, feature_dim, hidden_size, levels, N=1, pool=None, seed=12345): self._inputs = inputs self.N = N self.batch_size = bs self.classes = classes self.max_time = max_time self.levels = levels self.feature_dim = feature_dim self.pool = pool self.dropout = True # create a pyramid of filters self.temporal_pyramid = [] for l in range(self.levels): for f in range(2**l): tf = TemporalAttentionLayer(batch_size=bs, N=N, channels=feature_dim, name='temporal-attention-layer-'+str(l)+'-filter-'+str(f)) tf.test = True tf.d = theano.shared(value=np.asarray([1./2**(l+1)]).astype('float32'), name='d', borrow=True, broadcastable=[True]) tf.g = theano.shared(value=np.asarray([((1./2**l)+(2*f/2.**l))]).astype('float32'), name='g', borrow=True, broadcastable=[True]) tf.sigma = theano.shared(value=np.asarray([5.0]).astype('float32'), name='sigma', borrow=True, broadcastable=[True]) self.temporal_pyramid.append(tf) input_size = feature_dim*N*(len(self.temporal_pyramid) if pool == None else 1) self.hidden = HiddenLayer(input_size=input_size, hidden_size=hidden_size, activation=act.LeakyRelu(), batch_size=bs, name='hidden', dropout=0.5) self.softmax = SoftmaxLayer(input_size=hidden_size, classes=self.classes, batch_size=bs, name='softmax', dropout=0.5) @property def params(self): return self.softmax.params+self.hidden.params#+[p for f in self.temporal_filters for p in f.params] @property def inputs(self): return self._inputs @property def outputs(self): return self._outputs @property def updates(self): return self._updates @property def test_algorithm(self): if not hasattr(self, '_talgorithm'): d = self.dropout self.dropout = False o = self.run(*self.inputs) for i,ot in enumerate(self.outputs): o[i].name = ot.name self._talgorithm = theano.function(inputs=self.inputs, outputs=o, on_unused_input='warn') self.dropout = d return self._talgorithm def run(self, x, mask, y): # use temporal filters results = [] # make x to be batch x features x time x = x.transpose([0,2,1]) for tf in self.temporal_pyramid: # results is batch x features x N # flatten to batch x features*N res, (g,s2,d) = tf.run(x, mask) if self.pool == None: results.append(res.reshape((x.shape[0], self.feature_dim*self.N))) else: results.append(res.reshape((x.shape[0], 1, self.feature_dim*self.N))) # concatenate on axis 1 to get batch x features*N*filters x = T.concatenate(results, axis=1) if self.pool == 'max': x = T.max(x, axis=1) elif self.pool == 'sum': x = T.sum(x, axis=1) elif self.pool == 'mean': x = T.mean(x, axis=1) x = self.hidden.run(x, self.dropout) prob, pred = self.softmax.run(x, self.dropout) loss = self.softmax.loss(prob, y) error = self.softmax.error(pred, y) acc = 1-error return prob, pred, loss, error, acc
11, 11, 48, 2, 4, Relu(), 0.05, momentum_rate=0.0, decay_rate=0.1) # CLh(net, 2, 2, 10, 1, 2, Relu(), 0.05) LrnLayer(net, 2, 0.0001, 5, 0.75) MaxPoolingLayer(net, 3, 3, 2) FcLayer(net, 5, Relu(), momentum_rate=0.0, decay_rate=0.1) DropoutLayer(net, dropout_prob=0.5) SoftmaxLayer(net, 10, momentum_rate=0.0, decay_rate=0.1) # net1 = Network() # sp = SciPlot('Curve of softmax output') # CLM(net1, 25, 25, 3, 11, 11, 48, 2, 4, Relu(), 0.05) # CLMh(net1, 2, 2, 10, 1, 2, Relu(), 0.05) # LrnLayer(net1, 2, 0.0001, 5, 0.75) # MaxPoolingLayer(net1, 3, 3, 2) # # # FcLayer(net1, 5, Relu()) # DropoutLayer(net1, dropout_prob=0.5) # SoftmaxLayer(net1, 10) sp.plot(net.predict(fake_image, training=False), desc='episode-' + str(0)) for i in range(0, 5): net.train_one_sample(fake_label, fake_image, 1)
class TemporalModel(Model): def __init__(self, inputs, bs, max_time, classes, feature_dim, hidden_size, filters, N=1, pool=None, lstm_dim=4096, steps=8, seed=12345): self._inputs = inputs self.N = N self.batch_size = bs self.classes = classes self.max_time = max_time self.filters = filters self.feature_dim = feature_dim self.pool = pool self.dropout = True self.steps = steps self.temporal_filters = [] for f in range(filters): tf = TemporalAttentionLayer( batch_size=bs, N=N, channels=feature_dim, input_hidden_size=lstm_dim, name='temporal-attention-layer-filter-' + str(f)) self.temporal_filters.append(tf) input_size = feature_dim * len( self.temporal_filters) * (N if pool == None else 1) self.lstm_in = HiddenLayer(input_size=input_size, hidden_size=lstm_dim * 4, batch_size=bs) self.lstm = LSTMLayer(input_size=lstm_dim, hidden_size=lstm_dim) self.hidden = HiddenLayer(input_size=lstm_dim, hidden_size=hidden_size, activation=act.relu, batch_size=bs, name='hidden', dropout=0.5) self.softmax = SoftmaxLayer(input_size=hidden_size, classes=self.classes, batch_size=bs, name='softmax', dropout=0.5) @property def params(self): return self.softmax.params + self.hidden.params + self.lstm_in.params + self.lstm.params + [ p for f in self.temporal_filters for p in f.params ] @property def inputs(self): return self._inputs @property def outputs(self): return self._outputs @property def updates(self): return self._updates @property def test_algorithm(self): if not hasattr(self, '_talgorithm'): d = self.dropout self.dropout = False o = self.run(*self.inputs) for i, ot in enumerate(self.outputs): o[i].name = ot.name self._talgorithm = theano.function(inputs=self.inputs, outputs=o, on_unused_input='warn') self.dropout = d return self._talgorithm def run(self, x, mask, y): # use temporal filters # make x to be batch x features x time x = x.transpose([0, 2, 1]) h, c = self.lstm.get_initial_hidden(x) outputs_info = [ dict(initial=h, taps=[-1]), # h dict(initial=c, taps=[-1]) ] # c [h, c], _ = theano.scan(fn=self.step, non_sequences=[x, mask], outputs_info=outputs_info, n_steps=self.steps) x = self.hidden.run(h[-1], self.dropout) prob, pred = self.softmax.run(x, self.dropout) loss = self.softmax.loss(prob, y) error = self.softmax.error(pred, y) acc = 1 - error return prob, pred, loss, error, acc def step(self, h, c, x, mask): results = [] for tf in self.temporal_filters: # results is batch x features x N # flatten to batch x features*N res, (g, s2, d) = tf.run(x, h, mask) if self.pool == None: results.append( res.reshape((x.shape[0], self.feature_dim * self.N))) elif self.pool == 'max': results.append( T.max(res, axis=2).reshape((x.shape[0], self.feature_dim))) elif self.pool == 'sum': results.append( T.sum(res, axis=2).reshape((x.shape[0], self.feature_dim))) elif self.pool == 'mean': results.append( T.mean(res, axis=2).reshape( (x.shape[0], self.feature_dim))) # concatenate on axis 1 to get batch x features*N*filters x = T.concatenate(results, axis=1) x = self.lstm_in.run(x) h, c = self.lstm.run(x, h, c) return h, c