def build_model(tparams, options): trng = RandomStreams(1234) use_noise = theano.shared(numpy.float32(0.)) # description string: #words x #samples if options['use_target_as_input']: x = tensor.tensor3('x', dtype='float32') else: x = tensor.matrix('x', dtype='int64') mask = tensor.matrix('mask', dtype='float32') # context: #samples x dim ctx = tensor.matrix('ctx', dtype='float32') n_timesteps = x.shape[0] n_samples = x.shape[1] # word embedding if options['use_target_as_input']: emb = x else: emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']]) # decoder if options.setdefault('feedforward', False): proj_h = tensor.dot(emb, tparams['Wff']) proj_h = (proj_h * mask[:,:,None]).sum(axis=0) proj_h = proj_h / mask.sum(axis=0)[:,None] elif options.setdefault('regress', False): proj_h = (emb * mask[:,:,None]).sum(axis=0) proj_h = tensor.dot(proj_h, tparams['Wff']) proj_h = proj_h / mask.sum(axis=0)[:,None] else: proj = get_layer('lstm')[1](tparams, emb, options, prefix='encoder', mask=mask) proj_h = proj[0] if options['use_mean']: proj_h = (proj_h * mask[:,:,None]).sum(axis=0) proj_h = proj_h / mask.sum(axis=0)[:,None] else: proj_h = proj_h[-1] if 'n_layers' in options: for lidx in xrange(1, options['n_layers']): proj_h = get_layer('ff')[1](tparams, proj_h, options, prefix='ff_out_%d'%lidx, activ='tanh') out = get_layer('ff')[1](tparams, proj_h, options, prefix='ff_out', activ='linear') # cost if options['loss_type'] == 'cosine': out = out / tensor.sqrt((out ** 2).sum(1))[:,None] cost = 1. - (out * ctx).sum(1) elif options['loss_type'] == 'ranking': out = out / tensor.sqrt((out ** 2).sum(1))[:,None] rndidx = trng.permutation(n=ctx.shape[0]) ctx_rnd = ctx[rndidx] cost = tensor.maximum(0., 1 - (out * ctx).sum(1) + (out * ctx_rnd).sum(1)) else: raise Exception('Unknown loss function') return trng, use_noise, x, mask, ctx, cost
def add_negative(cls, var_x, x_tilde, type='samples'): if type is None: return 0 random_stream = RandomStreams() if type == 'samples': n = var_x.shape[0] perm = random_stream.permutation(n=n) shuffled_var_x = var_x[perm, :] return Tensor.mean(((shuffled_var_x - x_tilde) ** 2).sum(axis=1)) if type == 'features': n = var_x.shape[1] perm = random_stream.permutation(n=n) shuffled_var_x = var_x[:, perm] return Tensor.mean(((shuffled_var_x - x_tilde) ** 2).sum(axis=1))
def shuffle_training_data(self): print "Shuffling training X and y data..." numRows = self.train_set_x.shape[0] srng = RandomStreams(seed=None) mask = srng.permutation(n=numRows, size=(1,)).reshape((numRows,)) self.train_set_x = self.train_set_x[mask] self.train_set_y = self.train_set_y[mask]
class Visual(task.Task): def __init__(self, config): autoassign(locals()) self.updater = util.Adam(max_norm=config['max_norm'], lr=config['lr']) self.Encode = Encoder(config['size_vocab'], config['size_embed'], config['size'], config['depth'], activation=eval(config.get('activation', 'clipped_rectify')), filter_length=config.get('filter_length', 6), filter_size=config.get('filter_size', 1024), stride=config.get('stride', 3), residual=config.get('residual',False)) self.Attn = Attention(config['size']) self.ToImg = Dense(config['size'], config['size_target']) self.inputs = [T.ftensor3()] self.target = T.fmatrix() self.config['margin'] = self.config.get('margin', False) if self.config['margin']: self.srng = RandomStreams(seed=234) def params(self): return params(self.Encode, self.Attn, self.ToImg) def __call__(self, input): return self.ToImg(self.Attn(self.Encode(input))) def cost(self, target, prediction): if self.config['margin']: return self.Margin(target, prediction, dist=CosineDistance, d=1) else: return CosineDistance(target, prediction) def Margin(self, U, V, dist=CosineDistance, d=1.0): V_ = (V[self.srng.permutation(n=T.shape(V)[0], size=(1,)),]).reshape(T.shape(V)) # A bit silly making it nondet return T.maximum(0.0, dist(U, V) - dist(U, V_) + d) def args(self, item): return (item['audio'], item['target_v']) def _make_representation(self): with context.context(training=False): rep = self.Encode(*self.inputs) return theano.function(self.inputs, rep) def _make_pile(self): with context.context(training=False): rep = self.Encode.GRU.intermediate(*self.inputs) return theano.function(self.inputs, rep)
def test_permutation(self): """Test that RandomStreams.permutation generates the same results as numpy""" # Check over two calls to see if the random state is correctly updated. random = RandomStreams(utt.fetch_seed()) fn = function([], random.permutation((20,), 10), updates=random.updates()) fn_val0 = fn() fn_val1 = fn() rng_seed = numpy.random.RandomState(utt.fetch_seed()).randint(2**30) rng = numpy.random.RandomState(int(rng_seed)) # int() is for 32bit # rng.permutation outputs one vector at a time, so we iterate. numpy_val0 = numpy.asarray([rng.permutation(10) for i in range(20)]) numpy_val1 = numpy.asarray([rng.permutation(10) for i in range(20)]) assert numpy.all(fn_val0 == numpy_val0) assert numpy.all(fn_val1 == numpy_val1)
def test_permutation(self): # Test that RandomStreams.permutation generates the same results as numpy # Check over two calls to see if the random state is correctly updated. random = RandomStreams(utt.fetch_seed()) fn = function([], random.permutation((20, ), 10), updates=random.updates()) fn_val0 = fn() fn_val1 = fn() rng_seed = np.random.RandomState(utt.fetch_seed()).randint(2**30) rng = np.random.RandomState(int(rng_seed)) # int() is for 32bit # rng.permutation outputs one vector at a time, so we iterate. numpy_val0 = np.asarray([rng.permutation(10) for i in range(20)]) numpy_val1 = np.asarray([rng.permutation(10) for i in range(20)]) assert np.all(fn_val0 == numpy_val0) assert np.all(fn_val1 == numpy_val1)
def compute_joint_loss(self, h, tg): # input: (batch_size, n_in) srng = RandomStreams(seed=234) a = srng.permutation(n=h.shape[0], size=(1, ))[0] h_noised = h[a] means = self.means[tg] # z: (batch_size, n_in) z = (h - means) / (self.stds + 1e-6) l = (T.log(self.priors[tg]) - T.log(self.stds).sum() - 0.5 * (z**2).sum(axis=-1)) # add Jacobian equal = T.eq(h.sum(axis=1), h_noised.sum(axis=1)) inc = T.switch( equal, T.zeros_like(equal, equal.dtype), self.jacobian_factor * 0.5 * self.n_inputs * (T.log(1e-8 + ((h - h_noised)**2).sum(axis=-1)))) l += inc return -l
def do_gd(etaVal, epochs, layers, train_set, valid_set=None, test_set=None, L2_reg=0, batch_size=100, scale=1, noise_scale=1): ''' batch_size = 100 0 L2 regularization (by default) function returns training error and validation error after each epoch ''' SEED = 5318 np.random.seed(SEED) X = T.matrix('X') Y = T.ivector('Y') index = T.lscalar('index') noise = T.matrix('noise') eta = T.fscalar('eta') n_scale = T.fscalar('noise_scale') n_in = layers[0] n_out = layers[-1] # Get the datasets trainX, trainY = train_set validX, validY = valid_set testX, testY = test_set # Get the dataset sizes train_dims = trainX.get_value(borrow=True).shape train_size = trainX.get_value(borrow=True).shape[0] valid_size = validX.get_value(borrow=True).shape[0] test_size = testX.get_value(borrow=True).shape[0] classifier = MLP( rng = np.random.RandomState(SEED), inpt = X, layers = layers, scale = scale ) cost = ( classifier.negative_log_likelihood(Y) + L2_reg * classifier.L2_sqr # using the L2 regularization ) gparams = [T.grad(cost, param) for param in classifier.params] # Random number generator for the gaussian noise # theano_rng = RandomStreams(int(np.random.rand()*100)) train_model = theano.function( inputs = [index, eta, noise], outputs = cost, updates = [(param, param - eta * gparam) for param, gparam in zip(classifier.params, gparams)], givens = { # train_dims[1] is the number of columns (features) in the training data # apparently trainX gets first added to the random numbers before its sliced # Hence we use 784 (features) random numbers and not 100 (batch_size) random numbers # X : trainX[index * batch_size : (index + 1) * batch_size] + theano_rng.normal(size=(train_dims[1],))* n_scale, X : trainX[index * batch_size : (index + 1) * batch_size] + noise, Y : trainY[index * batch_size : (index + 1) * batch_size] } ) validate_model = theano.function( inputs = [index], outputs = classifier.errors(Y), givens = { X : validX[index * batch_size : (index + 1) * batch_size], Y : validY[index * batch_size : (index + 1) * batch_size] } ) test_model = theano.function( inputs = [index], outputs = classifier.errors(Y), givens = { X : testX[index * batch_size : (index + 1) * batch_size], Y : testY[index * batch_size : (index + 1) * batch_size] } ) train_error = [] valid_error = [] test_error = [] # Calculate the number of batches. n_train_batches = int(train_size / batch_size) n_val_batches = int(valid_size / batch_size) n_test_batches = int(test_size / batch_size) ANNEAL = 10*train_size # rate at which learning parameter "eta" is reduced as iterations increase ( momentum ) print("Anneal = {}".format(ANNEAL)) start_time = timeit.default_timer() learn_rate = etaVal # Initial Gaussian Noise gaussian_noise = 0 for epoch in xrange(epochs): # shuffle data, reset the seed so that trainX and trainY are randomized # the same way theano_seed = int(np.random.rand()*100) theano_rng = RandomStreams(theano_seed) trainX = trainX[theano_rng.permutation(n=train_size, size=(1,)),] theano_rng = RandomStreams(theano_seed) trainY = trainY[theano_rng.permutation(n=train_size, size=(1,)),] cost = [] val_cost = [] # Add new gaussian noise # of size (batch_size, # of features) gaussian_noise = noise_scale * np.random.normal(size=(batch_size,train_dims[1])).astype(theano.config.floatX) for batch_idx in xrange(n_train_batches): cost.append(np.mean(np.asarray([train_model(batch_idx, learn_rate, gaussian_noise)]))) # Delete the gaussian noise # trainX = trainX - gaussian_noise # Validation error checked in each epoch for val_batch_idx in xrange(n_val_batches): val_cost.append(np.mean(np.asarray([validate_model(val_batch_idx)]))) train_error.append(np.mean(cost)) valid_error.append(np.mean(val_cost)) time_check = timeit.default_timer() iteration = (epoch * batch_idx) + batch_idx print("epoch={}, mean train cost={}, mean_val_cost = {} time = {} eta={}".format(epoch, train_error[-1], valid_error[-1], (time_check - start_time)/60.0, learn_rate)) # Search and then converge learn_rate = etaVal / ( 1.0 + (iteration*1.0 / ANNEAL)) return train_error, valid_error
import theano as th data = np.random.rand(10,3) it = th.shared(0) y = th.shared(data) srng = RandomStreams(seed=234) expectRvs = srng.normal(size=(3,1)) expectRvs.name='expectRvs' epochStream = srng.permutation(n=10) currentBatch = epochStream.reshape((5,2))[:,it] y_mini = y[ currentBatch, :] L = th.tensor.sum(th.tensor.dot( y_mini, expectRvs )) L_func = function([], L, no_default_updates=True) padding = srng.choice(size=(3,), a=10, replace=False, p=None, ndim=None, dtype='int64') f1 = function([], expectRvs, no_default_updates=True) f2 = function([], expectRvs)
############Normal RV rn_n = srng.normal(size=(), avg=0.0, std=2.3) norm = function([],rn_n) print "Single Normal ", norm() #############Random integer list rn_i = srng.random_integers(size = (4, ), low=1, high=900) inte = function([], rn_i) print "Integer list ", inte() #############Generating a permutation unifromly at random rn_p = srng.permutation(size=(), n = 10) perm = function([], rn_p) print "Random permutation of 0 to 9", perm() #############choosing from a list randomly rn_list = srng.choice(size=(), a=[2,3, 4.5, 6], replace=True, p=[.5, 0, .5, 0], dtype='float64') lis = function([], rn_list) print "Choosing 3 times from the specified list ", lis() print lis() print lis() rn_another_list = srng.choice(size=(), a=3, replace=True, p=None) an_list = function([], rn_another_list) print "Choosing 3 times from [0,1, 2] since a is scalar", an_list()
def train(self, X_train, y_train, X_valid, y_valid, n_epochs, batch_size, optimization_function, cost_function, random_order=True): unsupervised = (X_train is y_train) if not isinstance(X_train, (TensorVariable, SharedVariable)): N = X_train.shape[0] else: N = function([], X_train.shape[0])() n_batches = N // batch_size + (N % batch_size != 0) if not isinstance(X_train, (TensorVariable, SharedVariable)): X_train = shared(X_train.astype('float32'), name="X_train") if not isinstance(X_valid, (TensorVariable, SharedVariable)): X_valid = shared(X_valid.astype('float32'), name="X_valid") if not unsupervised and not isinstance(y_train, (TensorVariable, SharedVariable)): if self.classification: y_train = shared(y_train.astype('int32'), name="y_train") else: y_train = shared(y_train.astype('float32'), name="y_train") if not unsupervised and not isinstance(y_valid, (TensorVariable, SharedVariable)): if self.classification: y_valid = shared(y_valid.astype('int32'), name="y_valid") else: y_valid = shared(y_valid.astype('float32'), name="y_valid") if random_order: perm_rng = RandomStreams(1) perm = perm_rng.permutation(n=N) if unsupervised: self.manual_updates.append(function([], updates=[(X_train, X_train[perm])])) else: self.manual_updates.append(function([], updates=[(X_train, X_train[perm]), (y_train, y_train[perm])])) if unsupervised: y_train = X_train y_valid = X_valid cost = cost_function(self.yScaled, self.out, self.params) error = self.error() validate = function([], [cost, error], givens=[(self.X, X_valid), (self.y, y_valid)] + self.turn_off_dropout, no_default_updates=self.no_default_upd) index = T.iscalar() upd = optimization_function(self.params, cost) batch_begin = index * batch_size batch_end = T.min(((index+1) * batch_size, N)) optimize = function([index], [cost, error], givens=[(self.X, X_train[batch_begin:batch_end]), (self.y, y_train[batch_begin:batch_end])], updates=upd, no_default_updates=self.no_default_upd) for epoch in range(n_epochs): print("Epoch", epoch) cost_sum, error_sum = 0, 0 print("Running batches...") for i in range(n_batches): c, a = optimize(i) cost_sum += c error_sum += a print("Done!") print("training: cost", cost_sum / float(n_batches), ", error", error_sum / float(n_batches)) c, a = validate() print("validation: cost", c, ", error", a) for man_upd in self.manual_updates: man_upd()
class UNET(object): def __init__( self, id, rng, batch_size, patch_size=572, patch_size_out=388, offline=False, path=None, train_time=5.0, learning_rate=0.01, momentum=0.95): self.id = id self.type = 'UNET' self.offline = offline self.done = False self.path = path self.batchSize = batch_size self.patchSize = patch_size self.patchSize_out = patch_size_out self.learning_rate = learning_rate self.momentum = momentum self.best_validation_loss = numpy.inf self.trainTime = train_time self.resample = False self.error = np.inf self.error_threshold = 0.06 self.best_val_loss_so_far = 0 self.patience_counter = 0 self.patience = 100 self.patience_reset = 100 self.doBatchNormAll = False self.doFineTune = False self.weight_decay = 0. self.weight_class_1 = 1. self.initialization = 'glorot_uniform' self.model = None self.srng = RandomStreams(1234) self.initialize() def initialize(self): print 'Unet.initialize' def trainiold(self, offline=False, data=None, mean=None, std=None): print 'UNET.train()' # need to define a custom loss, because all pre-implementations # seem to assume that scores over patch add up to one which # they clearly don't and shouldn't def unet_crossentropy_loss(y_true, y_pred): weight_class_1 = 1. epsilon = 1.0e-4 y_pred_clipped = T.clip(y_pred, epsilon, 1.0-epsilon) loss_vector = -T.mean(weight_class_1*y_true * T.log(y_pred_clipped) + (1-y_true) * T.log(1-y_pred_clipped), axis=1) average_loss = T.mean(loss_vector) return average_loss def unet_crossentropy_loss_sampled(y_true, y_pred): print 'unet_crossentropy_loss_sampled' epsilon = 1.0e-4 y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0-epsilon)) y_true = T.flatten(y_true) # this seems to work # it is super ugly though and I am sure there is a better way to do it # but I am struggling with theano to cooperate # filter the right indices indPos = T.nonzero(y_true)[0] # no idea why this is a tuple indNeg = T.nonzero(1-y_true)[0] # shuffle n = indPos.shape[0] indPos = indPos[self.srng.permutation(n=n)] n = indNeg.shape[0] indNeg = indNeg[self.srng.permutation(n=n)] # take equal number of samples depending on which class has less n_samples = T.cast(T.min([T.sum(y_true), T.sum(1-y_true)]), dtype='int64') indPos = indPos[:n_samples] indNeg = indNeg[:n_samples] loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(1-y_pred_clipped[indNeg])) average_loss = T.mean(loss_vector) #return average_loss return T.mean(T.log(y_pred_clipped[indPos])) # input data should be large patches as prediction is also over large patches print print "=== building network ===" print "== BLOCK 1 ==" input = Input(shape=(1, self.patchSize, self.patchSize)) print "input ", input._keras_shape block1_act, block1_pool = unet_block_down(input=input, nb_filter=64, doBatchNorm=self.doBatchNormAll) print "block1 act ", block1_act._keras_shape print "block1 ", block1_pool._keras_shape #sys.stdout.flush() print "== BLOCK 2 ==" block2_act, block2_pool = unet_block_down(input=block1_pool, nb_filter=128, doBatchNorm=self.doBatchNormAll) print "block2 ", block2_pool._keras_shape #sys.stdout.flush() print "== BLOCK 3 ==" block3_act, block3_pool = unet_block_down(input=block2_pool, nb_filter=256, doBatchNorm=self.doBatchNormAll) print "block3 ", block3_pool._keras_shape #sys.stdout.flush() print "== BLOCK 4 ==" block4_act, block4_pool = unet_block_down(input=block3_pool, nb_filter=512, doDropout=True, doBatchNorm=self.doBatchNormAll) print "block4 ", block4_pool._keras_shape #sys.stdout.flush() print "== BLOCK 5 ==" print "no pooling" block5_act, block5_pool = unet_block_down(input=block4_pool, nb_filter=1024, doDropout=True, doPooling=False, doBatchNorm=self.doBatchNormAll) print "block5 ", block5_pool._keras_shape #sys.stdout.flush() print "==============" print print "== BLOCK 4 UP ==" block4_up = unet_block_up(input=block5_act, nb_filter=512, down_block_out=block4_act, doBatchNorm=self.doBatchNormAll) print "block4 up", block4_up._keras_shape print #sys.stdout.flush() print "== BLOCK 3 UP ==" block3_up = unet_block_up(input=block4_up, nb_filter=256, down_block_out=block3_act, doBatchNorm=self.doBatchNormAll) print "block3 up", block3_up._keras_shape print #sys.stdout.flush() print "== BLOCK 2 UP ==" block2_up = unet_block_up(input=block3_up, nb_filter=128, down_block_out=block2_act, doBatchNorm=self.doBatchNormAll) print "block2 up", block2_up._keras_shape #sys.stdout.flush() print print "== BLOCK 1 UP ==" block1_up = unet_block_up(input=block2_up, nb_filter=64, down_block_out=block1_act, doBatchNorm=self.doBatchNormAll) print "block1 up", block1_up._keras_shape sys.stdout.flush() print "== 1x1 convolution ==" output = Convolution2D(nb_filter=1, nb_row=1, nb_col=1, subsample=(1,1), init=self.initialization, activation='sigmoid', border_mode="valid")(block1_up) print "output ", output._keras_shape output_flat = Flatten()(output) print "output flat ", output_flat._keras_shape model = Model(input=input, output=output_flat) #model = Model(input=input, output=block1_act) #sys.stdout.flush() ''' if doFineTune: model = model_from_json(open('unet_sampling_best.json').read()) model.load_weights('unet_sampling_best_weights.h5') ''' sgd = SGD(lr=self.learning_rate, decay=0, momentum=self.momentum, nesterov=False) #model.compile(loss='mse', optimizer=sgd) model.compile(loss=unet_crossentropy_loss_sampled, optimizer=sgd) #model.compile(loss=unet_crossentropy_loss, optimizer=sgd) print 'sampling data...' d = data.sample() data_x = d[0] data_y = d[1] data_x_val = d[2] data_y_val = d[3] reset = d[4] patchSize = self.patchSize patchSize_out = self.patchSize_out print 'patchSize:',patchSize,'patchSize_out:', patchSize_out data_x_val = np.reshape(data_x_val, [-1, 1, patchSize, patchSize]) data_x = np.reshape(data_x, [-1, 1, patchSize, patchSize]) data_label_val = data_y_val val_samples = data_y_val.shape[0] print data_x.shape, data_y.shape print 'got data...' print "current learning rate: ", model.optimizer.lr.get_value() o = model.fit(data_x, data_y, batch_size=1, nb_epoch=1) print o.history["loss"] exit(1) def train(self, offline=False, data=None, mean=None, std=None): print 'Unet.train' # input data should be large patches as prediction is also over large patches print print "=== building network ===" print "== BLOCK 1 ==" input = Input(shape=(1, self.patchSize, self.patchSize)) print "input ", input._keras_shape block1_act, block1_pool = UNET.unet_block_down(input=input, nb_filter=64, doBatchNorm=self.doBatchNormAll) print "block1 act ", block1_act._keras_shape print "block1 ", block1_pool._keras_shape #sys.stdout.flush() print "== BLOCK 2 ==" block2_act, block2_pool = UNET.unet_block_down(input=block1_pool, nb_filter=128, doBatchNorm=self.doBatchNormAll) print "block2 ", block2_pool._keras_shape #sys.stdout.flush() print "== BLOCK 3 ==" block3_act, block3_pool = UNET.unet_block_down(input=block2_pool, nb_filter=256, doBatchNorm=self.doBatchNormAll) print "block3 ", block3_pool._keras_shape #sys.stdout.flush() print "== BLOCK 4 ==" block4_act, block4_pool = UNET.unet_block_down(input=block3_pool, nb_filter=512, doDropout=True, doBatchNorm=self.doBatchNormAll) print "block4 ", block4_pool._keras_shape #sys.stdout.flush() print "== BLOCK 5 ==" print "no pooling" block5_act, block5_pool = UNET.unet_block_down(input=block4_pool, nb_filter=1024, doDropout=True, doPooling=False, doBatchNorm=self.doBatchNormAll) print "block5 ", block5_pool._keras_shape #sys.stdout.flush() print "==============" print print "== BLOCK 4 UP ==" block4_up = UNET.unet_block_up(input=block5_act, nb_filter=512, down_block_out=block4_act, doBatchNorm=self.doBatchNormAll) print "block4 up", block4_up._keras_shape print #sys.stdout.flush() print "== BLOCK 3 UP ==" block3_up = UNET.unet_block_up(input=block4_up, nb_filter=256, down_block_out=block3_act, doBatchNorm=self.doBatchNormAll) print "block3 up", block3_up._keras_shape print #sys.stdout.flush() print "== BLOCK 2 UP ==" block2_up = UNET.unet_block_up(input=block3_up, nb_filter=128, down_block_out=block2_act, doBatchNorm=self.doBatchNormAll) print "block2 up", block2_up._keras_shape #sys.stdout.flush() print print "== BLOCK 1 UP ==" block1_up = UNET.unet_block_up(input=block2_up, nb_filter=64, down_block_out=block1_act, doBatchNorm=self.doBatchNormAll) print "block1 up", block1_up._keras_shape sys.stdout.flush() print "== 1x1 convolution ==" output = Convolution2D(nb_filter=1, nb_row=1, nb_col=1, subsample=(1,1), init=self.initialization, activation='sigmoid', border_mode="valid")(block1_up) print "output ", output._keras_shape output_flat = Flatten()(output) print "output flat ", output_flat._keras_shape print 'Unet.train' #self.load() if self.model == None: j_path, w_path = self.get_model_paths( ) if os.path.exists( j_path ) and os.path.exists( w_path ): print 'loading from: ', j_path self.model = model_from_json(open( j_path ).read()) self.model.load_weights( w_path ) else: print 'creating....' self.model = Model(input=input, output=output_flat) sgd = SGD(lr=self.learning_rate, decay=0, momentum=self.momentum, nesterov=False) #self.model.compile(loss=UNET.unet_crossentropy_loss_sampled, optimizer=sgd) self.model.compile(loss=UNET.unet_crossentropy_loss, optimizer=sgd) print 'sampling data...' d = data.sample() data_x = d[0] data_y = d[1] data_x_val = d[2] data_y_val = d[3] reset = d[4] patchSize = self.patchSize patchSize_out = self.patchSize_out print 'patchSize:',patchSize,'patchSize_out:', patchSize_out data_x_val = np.reshape(data_x_val, [-1, 1, patchSize, patchSize]) data_x = np.reshape(data_x, [-1, 1, patchSize, patchSize]) data_label_val = data_y_val val_samples = data_y_val.shape[0] print data_x.shape, data_y.shape print 'got data...' print "current learning rate: ", self.model.optimizer.lr.get_value() self.model.fit(data_x, data_y, batch_size=1, nb_epoch=1) im_pred = 1-self.model.predict(x=data_x_val, batch_size = 1) print im_pred.shape print data_label_val.shape print data_x_val.shape mean_val_rand = 0.0 for val_ind in xrange(val_samples): im_pred_single = np.reshape(im_pred[val_ind,:], (patchSize_out,patchSize_out)) im_gt = np.reshape(data_label_val[val_ind], (patchSize_out,patchSize_out)) validation_rand = Rand_membrane_prob(im_pred_single, im_gt) mean_val_rand += validation_rand print 'val:', val_ind, 'rand:', validation_rand, 'mrand:', mean_val_rand mean_val_rand /= np.double(val_samples) print "validation RAND ", mean_val_rand exit(1) self.save_current() print mean_val_rand, " > ", self.best_val_loss_so_far print mean_val_rand - self.best_val_loss_so_far if mean_val_rand > self.best_val_loss_so_far: self.best_val_loss_so_far = mean_val_rand print "NEW BEST MODEL" self.save_best() self.patience_counter=0 else: self.patience_counter +=1 # no progress anymore, need to decrease learning rate if self.patience_counter == self.patience: print "DECREASING LEARNING RATE" print "before: ", learning_rate learning_rate *= 0.1 print "now: ", learning_rate self.model.optimizer.lr.set_value(learning_rate) self.patience = self.patience_reset self.patience_counter = 0 # reload best state seen so far self.model = self.load() ''' model = model_from_json(open(filename+'.json').read()) model.load_weights(filename+'_weights.h5') model.compile(loss=unet_crossentropy_loss_sampled, optimizer=sgd) ''' def train_offline(self, data, mean=None, std=None): pass def classify(self, image, mean=None, std=None): print 'Unet.classify' def predict(self, image, mean=None, std=None, threshold=0.5): print 'Unet.predict' start_time = time.clock() j_path, w_path = self.get_model_paths( ) print 'loading model from:', j_path model = model_from_json(open( j_path ).read()) model.load_weights( w_path ) sgd = SGD(lr=0.01, decay=0, momentum=0.0, nesterov=False) model.compile(loss='categorical_crossentropy', optimizer=sgd) image = image - 0.5 probImage = np.zeros(image.shape) # count compilation time to init row = 0 col = 0 patch = image[row:row+patchSize,col:col+patchSize] data = np.reshape(patch, (1,1,patchSize,patchSize)) probs = model.predict(x=data, batch_size=1) init_time = time.clock() #print "Initialization took: ", init_time - start_time patchSize = self.patchSize patchSize_out = self.patchSize_out image_orig = image.copy() for rotation in range(1): image = np.rot90(image_orig, rotation) # pad the image padding_ul = int(np.ceil((patchSize - patchSize_out)/2.0)) # need large padding for lower right corner paddedImage = np.pad(image, patchSize, mode='reflect') needed_ul_padding = patchSize - padding_ul paddedImage = paddedImage[needed_ul_padding:, needed_ul_padding:] probImage_tmp = np.zeros(image.shape) for row in xrange(0,image.shape[0],patchSize_out): for col in xrange(0,image.shape[1],patchSize_out): patch = paddedImage[row:row+patchSize,col:col+patchSize] data = np.reshape(patch, (1,1,patchSize,patchSize)) probs = 1-model.predict(x=data, batch_size = 1) probs = np.reshape(probs, (patchSize_out,patchSize_out)) row_end = patchSize_out if row+patchSize_out > probImage.shape[0]: row_end = probImage.shape[0]-row col_end = patchSize_out if col+patchSize_out > probImage.shape[1]: col_end = probImage.shape[1]-col probImage_tmp[row:row+row_end,col:col+col_end] = probs[:row_end,:col_end] probImage += np.rot90(probImage_tmp, 4-rotation) probImage = probImage / 1.0 prob = self.threshold( probImage, factor=threshold ) prob = prob.astype(dtype=int) prob = prob.flatten() end_time = time.clock() print "Prediction took: ", end_time - init_time print "Speed: ", 1./(end_time - init_time) print "Time total: ", end_time-start_time print 'results :', np.bincount( prob ) print prob.shape print prob return prob def threshold(self, prob, factor=0.5): prob[ prob >= factor ] = 9 prob[ prob < factor ] = 0 prob[ prob == 9 ] = 1 return prob def get_model_paths(self): path = self.get_path() j_path = '%s_best.json'%(path) w_path = '%s_best_weights.h5'%(path) # first, attempt the best model, otherwise default to the latest if not os.path.exists( j_path ) and not os.path.exists( w_path ): path = Utility.get_dir(self.path) j_path = '%s/%s_%s.json'%(Paths.Models, self.id, self.type) w_path = '%s/%s_%s_weights.h5'%(Paths.Models, self.id, self.type) return j_path.lower(), w_path.lower() def load(self): j_path, w_path = self.get_model_paths( ) if os.path.exists( j_path ) and os.path.exists( w_path ): print 'loading from: ', j_path self.model = model_from_json(open( j_path ).read()) self.model.load_weights( w_path ) else: print 'creating....' inp, out = self.gen_input_output() print inp.shape, out.shape self.model = Model(input=inp, output=out) sgd = SGD(lr=self.learning_rate, decay=0, momentum=self.momentum, nesterov=False) self.model.compile(loss=UNET.unet_crossentropy_loss_sampled, optimizer=sgd) def save_current(self): path = Utility.get_dir(self.path) j_path = '%s/%s_%s.json'%(Paths.Models, self.id, self.type) w_path = '%s/%s_%s_weights.h5'%(Paths.Models, self.id, self.type) j_path = j_path.lower() w_path = w_path.lower() json_string = self.model.to_json() open(j_path, 'w').write(json_string) self.model.save_weights(w_path, overwrite=True) def save_best(self): print 'Unet.save' path = Utility.get_dir(self.path) revision = 0 if not self.offline: revision = DB.getRevision( self.id ) revision = (revision+1)%10 path = '%s/%s_%s_%d'%(Paths.Models, self.id, self.type, revision) path = path.lower() j_path = '%s_best.json'%(path) w_path = '%s_best_weights.h5'%(path) j_path = j_path.lower() w_path = w_path.lower() print 'saving...', path # saving code here... json_string = self.model.to_json() open(j_path, 'w').write(json_string) self.model.save_weights(w_path, overwrite=True) if not self.offline: DB.finishSaveModel( self.id, revision ) def get_path(self): if self.offline: return self.path rev = DB.getRevision( self.id ) path = '%s/%s.%s.%d'%(Paths.Models, self.id, self.type, rev ) return path.lower() def reportTrainingStats(self, elapsedTime, batchIndex, valLoss, trainCost, mode=0): DB.storeTrainingStats( self.id, valLoss, trainCost, mode=mode) msg = '(%0.1f) %i %f%%'%\ ( elapsedTime, batchIndex, valLoss ) status = '[%f]'%(trainCost) Utility.report_status( msg, status ) # need to define a custom loss, because all pre-implementations # seem to assume that scores over patch add up to one which # they clearly don't and shouldn't @staticmethod def unet_crossentropy_loss(y_true, y_pred): weight_class_1 = 1. epsilon = 1.0e-4 y_pred_clipped = T.clip(y_pred, epsilon, 1.0-epsilon) loss_vector = -T.mean(weight_class_1*y_true * T.log(y_pred_clipped) + (1-y_true) * T.log(1-y_pred_clipped), axis=1) average_loss = T.mean(loss_vector) return average_loss @staticmethod def unet_crossentropy_loss_sampled(y_true, y_pred): epsilon = 1.0e-4 y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0-epsilon)) y_true = T.flatten(y_true) # this seems to work # it is super ugly though and I am sure there is a better way to do it # but I am struggling with theano to cooperate # filter the right indices indPos = T.nonzero(y_true)[0] # no idea why this is a tuple indNeg = T.nonzero(1-y_true)[0] # shuffle n = indPos.shape[0] indPos = indPos[UNET.srng.permutation(n=n)] n = indNeg.shape[0] indNeg = indNeg[UNET.srng.permutation(n=n)] # take equal number of samples depending on which class has less n_samples = T.cast(T.min([T.sum(y_true), T.sum(1-y_true)]), dtype='int64') indPos = indPos[:n_samples] indNeg = indNeg[:n_samples] loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(1-y_pred_clipped[indNeg])) average_loss = T.mean(loss_vector) return average_loss @staticmethod def unet_block_down(input, nb_filter, doPooling=True, doDropout=False, doBatchNorm=False, initialization = 'glorot_uniform', weight_decay = 0.): # first convolutional block consisting of 2 conv layers plus activation, then maxpool. # All are valid area, not same act1 = Convolution2D(nb_filter=nb_filter, nb_row=3, nb_col=3, subsample=(1,1), init=initialization, activation='relu', border_mode="valid", W_regularizer=l2(weight_decay))(input) if doBatchNorm: act1 = BatchNormalization(mode=0, axis=1)(act1) act2 = Convolution2D(nb_filter=nb_filter, nb_row=3, nb_col=3, subsample=(1,1), init=initialization, activation='relu', border_mode="valid", W_regularizer=l2(weight_decay))(act1) if doBatchNorm: act2 = BatchNormalization(mode=0, axis=1)(act2) if doDropout: act2 = Dropout(0.5)(act2) if doPooling: # now downsamplig with maxpool pool1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode="valid")(act2) else: pool1 = act2 return (act2, pool1) # need to define lambda layer to implement cropping # input is a tensor of size (batchsize, channels, width, height) @staticmethod def crop_layer( x, cs): cropSize = cs return x[:,:,cropSize:-cropSize, cropSize:-cropSize] @staticmethod def unet_block_up(input, nb_filter, down_block_out, doBatchNorm=False, initialization = 'glorot_uniform', weight_decay = 0.): print "This is unet_block_up" print "input ", input._keras_shape # upsampling up_sampled = UpSampling2D(size=(2,2))(input) print "upsampled ", up_sampled._keras_shape # up-convolution conv_up = Convolution2D(nb_filter=nb_filter, nb_row=2, nb_col=2, subsample=(1,1), init=initialization, activation='relu', border_mode="same", W_regularizer=l2(weight_decay))(up_sampled) print "up-convolution ", conv_up._keras_shape # concatenation with cropped high res output # this is too large and needs to be cropped print "to be merged with ", down_block_out._keras_shape #padding_1 = int((down_block_out._keras_shape[2] - conv_up._keras_shape[2])/2) #padding_2 = int((down_block_out._keras_shape[3] - conv_up._keras_shape[3])/2) #print "padding: ", (padding_1, padding_2) #conv_up_padded = ZeroPadding2D(padding=(padding_1, padding_2))(conv_up) #merged = merge([conv_up_padded, down_block_out], mode='concat', concat_axis=1) cropSize = int((down_block_out._keras_shape[2] - conv_up._keras_shape[2])/2) down_block_out_cropped = Lambda(UNET.crop_layer, output_shape=conv_up._keras_shape[1:], arguments={"cs":cropSize})(down_block_out) print "cropped layer size: ", down_block_out_cropped._keras_shape merged = merge([conv_up, down_block_out_cropped], mode='concat', concat_axis=1) print "merged ", merged._keras_shape # two 3x3 convolutions with ReLU # first one halves the feature channels act1 = Convolution2D(nb_filter=nb_filter, nb_row=3, nb_col=3, subsample=(1,1), init=initialization, activation='relu', border_mode="valid", W_regularizer=l2(weight_decay))(merged) if doBatchNorm: act1 = BatchNormalization(mode=0, axis=1)(act1) print "conv1 ", act1._keras_shape act2 = Convolution2D(nb_filter=nb_filter, nb_row=3, nb_col=3, subsample=(1,1), init=initialization, activation='relu', border_mode="valid", W_regularizer=l2(weight_decay))(act1) if doBatchNorm: act2 = BatchNormalization(mode=0, axis=1)(act2) print "conv2 ", act2._keras_shape return act2
def __init__(self, rng, input, n_in, n_batch, d_bucket, activation, activation_deriv, w=None, index_permute=None, index_permute_reverse=None): srng = RandomStreams(seed=234) n_bucket = n_in / d_bucket + 1 self.input = input # randomly permute input space if index_permute is None: index_permute = srng.permutation(n=n_in)#numpy.random.permutation(n_in) index_permute_reverse = T.argsort(index_permute) self.index_permute = index_permute self.index_permute_reverse = index_permute_reverse permuted_input = input[:, index_permute] self.permuted_input = permuted_input # initialize reflection parameters if w is None: bound = numpy.sqrt(3. / d_bucket) w_values = numpy.asarray(rng.uniform(low=-bound, high=bound, size=(n_bucket, d_bucket, d_bucket)), dtype=theano.config.floatX) w = theano.shared(value=w_values, name='w') self.w = w # compute outputs and Jacobians log_jacobian = T.alloc(0, n_batch) for b in xrange(n_bucket): bucket_size = d_bucket if b == n_bucket - 1: bucket_size = n_in - b * d_bucket x_b = self.permuted_input[:, b*d_bucket:b*d_bucket + bucket_size] w_b = self.w[b, :bucket_size, :bucket_size] # W = T.slinalg.Expm()(w_b) # log_jacobian = log_jacobian + T.alloc(T.nlinalg.trace(w_b), n_batch) Upper = T.triu(w_b) # Upper = T.extra_ops.fill_diagonal(Upper, 1.) Lower = T.tril(w_b) Lower = T.extra_ops.fill_diagonal(Lower, 1.) log_det_Upper = T.log(T.abs_(T.nlinalg.ExtractDiag()(Upper))).sum() # log_det_Lower = T.log(T.abs_(T.nlinalg.ExtractDiag()(Lower))).sum() W = T.dot(Upper, Lower) log_jacobian = log_jacobian + T.alloc(log_det_Upper, n_batch) # W = T.dot(T.transpose(w_b), w_b) + 0.001*T.eye(bucket_size) # log_jacobian = log_jacobian + T.alloc(T.log(T.abs_(T.nlinalg.Det()(W))), n_batch) # diag = T.nlinalg.diag(W) # div = T.tile(T.reshape(T.sqrt(diag), [1, bucket_size]), (bucket_size, 1)) # W = W / div / T.transpose(div) #import pdb; pdb.set_trace() lin_output_b = T.dot(x_b, W) if b>0: lin_output = T.concatenate([lin_output, lin_output_b], axis=1) else: lin_output = lin_output_b if activation is not None: derivs = activation_deriv(lin_output_b) #import pdb; pdb.set_trace() log_jacobian = log_jacobian + T.log(T.abs_(derivs)).sum(axis=1) # for n in xrange(n_batch): # mat = T.tile(T.reshape(derivs[n], [1, bucket_size]), (bucket_size, 1)) # mat = mat * W # T.inc_subtensor(log_jacobian[n], T.log(T.abs_(T.nlinalg.Det()(mat)))) self.log_jacobian = log_jacobian self.output = ( lin_output if activation is None else activation(lin_output) ) self.params = [w]
def __init__(self, rng, input, n_in, n_batch, d_bucket, activation, activation_deriv, w=None, index_permute=None, index_permute_reverse=None): srng = RandomStreams(seed=234) n_bucket = n_in / d_bucket + 1 self.input = input # randomly permute input space if index_permute is None: index_permute = srng.permutation(n=n_in)#numpy.random.permutation(n_in) index_permute_reverse = T.argsort(index_permute) self.index_permute = index_permute self.index_permute_reverse = index_permute_reverse permuted_input = input[:, index_permute] self.permuted_input = permuted_input # initialize reflection parameters if w is None: bound = numpy.sqrt(3. / d_bucket) w_values = numpy.asarray(rng.uniform(low=-bound, high=bound, size=(n_bucket, d_bucket, d_bucket)), dtype=theano.config.floatX) w = theano.shared(value=w_values, name='w') self.w = w # compute outputs and Jacobians log_jacobian = T.alloc(0, n_batch) for b in xrange(n_bucket): bucket_size = d_bucket if b == n_bucket - 1: bucket_size = n_in - b * d_bucket x_b = self.permuted_input[:, b*d_bucket:b*d_bucket + bucket_size] w_b = w[b, :bucket_size, :bucket_size] wTwinv = T.nlinalg.MatrixInverse()(T.dot(T.transpose(w_b), w_b) + 0.001*T.eye(bucket_size)) L = T.slinalg.Cholesky()(wTwinv) W = T.dot(w_b, L) #import pdb; pdb.set_trace() lin_output_b = T.dot(x_b, W) if b>0: lin_output = T.concatenate([lin_output, lin_output_b], axis=1) else: lin_output = lin_output_b if activation is not None: derivs = activation_deriv(x_b) for n in xrange(n_batch): mat = T.tile(T.reshape(derivs[n], [1, bucket_size]), (bucket_size, 1)) mat = mat * W T.inc_subtensor(log_jacobian[n], T.log(T.abs_(T.nlinalg.Det()(mat)))) self.log_jacobian = log_jacobian self.output = ( lin_output if activation is None else activation(lin_output) ) self.params = [w]
class Visual(task.Task): def __init__(self, config): autoassign(locals()) self.updater = util.Adam(max_norm=config['max_norm'], lr=config['lr']) self.Encode = Encoder(config['size_vocab'], config['size_embed'], config['size'], config['depth'], activation=eval(config.get('activation', 'clipped_rectify')), residual=config.get('residual',False)) self.ToImg = Dense(config['size'], config['size_target']) self.inputs = [T.imatrix()] self.target = T.fmatrix() self.config['margin'] = self.config.get('margin', False) if self.config['margin']: self.srng = RandomStreams(seed=234) def params(self): return params(self.Encode, self.ToImg) def __call__(self, input): return self.ToImg(last(self.Encode(input))) def cost(self, target, prediction): if self.config['margin']: return self.Margin(target, prediction, dist=CosineDistance, d=1) elif self.config.get('contrastive'): return self.contrastive(target, prediction, margin=0.2) else: return CosineDistance(target, prediction) def contrastive(self, i, s, margin=0.2): # i: (fixed) image embedding, # s: sentence embedding errors = - util.cosine_matrix(i, s) diagonal = errors.diagonal() # compare every diagonal score to scores in its column (all contrastive images for each sentence) cost_s = T.maximum(0, margin - errors + diagonal) # all contrastive sentences for each image cost_i = T.maximum(0, margin - errors + diagonal.reshape((-1, 1))) cost_tot = cost_s + cost_i # clear diagonals cost_tot = fill_diagonal(cost_tot, 0) return cost_tot.mean() def Margin(self, U, V, dist=CosineDistance, d=1.0): V_ = (V[self.srng.permutation(n=T.shape(V)[0], size=(1,)),]).reshape(T.shape(V)) # A bit silly making it nondet return T.maximum(0.0, dist(U, V) - dist(U, V_) + d) def args(self, item): return (item['input'], item['target_v']) def _make_representation(self): with context.context(training=False): rep = self.Encode(*self.inputs) return theano.function(self.inputs, rep) def _make_pile(self): with context.context(training=False): rep = self.Encode.GRU.intermediate(self.Encode.Embed(*self.inputs)) return theano.function(self.inputs, rep)
def __init__(self, rng, input, n_in, n_batch, d_bucket, activation, activation_deriv, w=None, index_permute=None, index_permute_reverse=None): srng = RandomStreams(seed=234) n_bucket = n_in / d_bucket + 1 self.input = input # randomly permute input space if index_permute is None: index_permute = srng.permutation(n=n_in)#numpy.random.permutation(n_in) index_permute_reverse = T.argsort(index_permute) self.index_permute = index_permute self.index_permute_reverse = index_permute_reverse permuted_input = input[:, index_permute] self.permuted_input = permuted_input # initialize matrix parameters if w is None: bound = numpy.sqrt(3. / d_bucket) w_values = numpy.asarray(rng.uniform(low=-bound, high=bound, size=(n_bucket, d_bucket, d_bucket)), dtype=theano.config.floatX) w = theano.shared(value=w_values, name='w') self.w = w # compute outputs and Jacobians log_jacobian = T.alloc(0, n_batch) for b in xrange(n_bucket): bucket_size = d_bucket if b == n_bucket - 1: bucket_size = n_in - b * d_bucket if b>0: prev_input = x_b """here we warp the previous bucket of inputs and add to the new input""" x_b = self.permuted_input[:, b*d_bucket:b*d_bucket + bucket_size] w_b = self.w[b, :bucket_size, :bucket_size] if b>0: x_b_plus = x_b + m_b else: x_b_plus = x_b Upper = T.triu(w_b) Lower = T.tril(w_b) Lower = T.extra_ops.fill_diagonal(Lower, 1.) log_det_Upper = T.log(T.abs_(T.nlinalg.ExtractDiag()(Upper))).sum() W = T.dot(Upper, Lower) log_jacobian = log_jacobian + T.alloc(log_det_Upper, n_batch) lin_output_b = T.dot(x_b_plus, W) if b>0: lin_output = T.concatenate([lin_output, lin_output_b], axis=1) else: lin_output = lin_output_b if activation is not None: derivs = activation_deriv(lin_output_b) #import pdb; pdb.set_trace() log_jacobian = log_jacobian + T.log(T.abs_(derivs)).sum(axis=1) self.log_jacobian = log_jacobian self.output = ( lin_output[:, index_permute_reverse] if activation is None else activation(lin_output[:, index_permute_reverse]) ) self.params = [w]
class UNET(object): def __init__(self, id, rng, batch_size, patch_size=572, patch_size_out=388, offline=False, path=None, train_time=5.0, learning_rate=0.01, momentum=0.95): self.id = id self.type = 'UNET' self.offline = offline self.done = False self.path = path self.batchSize = batch_size self.patchSize = patch_size self.patchSize_out = patch_size_out self.learning_rate = learning_rate self.momentum = momentum self.best_validation_loss = numpy.inf self.trainTime = train_time self.resample = False self.error = np.inf self.error_threshold = 0.06 self.best_val_loss_so_far = 0 self.patience_counter = 0 self.patience = 100 self.patience_reset = 100 self.doBatchNormAll = False self.doFineTune = False self.weight_decay = 0. self.weight_class_1 = 1. self.initialization = 'glorot_uniform' self.model = None self.srng = RandomStreams(1234) self.initialize() def initialize(self): print 'Unet.initialize' def trainiold(self, offline=False, data=None, mean=None, std=None): print 'UNET.train()' # need to define a custom loss, because all pre-implementations # seem to assume that scores over patch add up to one which # they clearly don't and shouldn't def unet_crossentropy_loss(y_true, y_pred): weight_class_1 = 1. epsilon = 1.0e-4 y_pred_clipped = T.clip(y_pred, epsilon, 1.0 - epsilon) loss_vector = -T.mean( weight_class_1 * y_true * T.log(y_pred_clipped) + (1 - y_true) * T.log(1 - y_pred_clipped), axis=1) average_loss = T.mean(loss_vector) return average_loss def unet_crossentropy_loss_sampled(y_true, y_pred): print 'unet_crossentropy_loss_sampled' epsilon = 1.0e-4 y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0 - epsilon)) y_true = T.flatten(y_true) # this seems to work # it is super ugly though and I am sure there is a better way to do it # but I am struggling with theano to cooperate # filter the right indices indPos = T.nonzero(y_true)[0] # no idea why this is a tuple indNeg = T.nonzero(1 - y_true)[0] # shuffle n = indPos.shape[0] indPos = indPos[self.srng.permutation(n=n)] n = indNeg.shape[0] indNeg = indNeg[self.srng.permutation(n=n)] # take equal number of samples depending on which class has less n_samples = T.cast(T.min([T.sum(y_true), T.sum(1 - y_true)]), dtype='int64') indPos = indPos[:n_samples] indNeg = indNeg[:n_samples] loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean( T.log(1 - y_pred_clipped[indNeg])) average_loss = T.mean(loss_vector) #return average_loss return T.mean(T.log(y_pred_clipped[indPos])) # input data should be large patches as prediction is also over large patches print print "=== building network ===" print "== BLOCK 1 ==" input = Input(shape=(1, self.patchSize, self.patchSize)) print "input ", input._keras_shape block1_act, block1_pool = unet_block_down( input=input, nb_filter=64, doBatchNorm=self.doBatchNormAll) print "block1 act ", block1_act._keras_shape print "block1 ", block1_pool._keras_shape #sys.stdout.flush() print "== BLOCK 2 ==" block2_act, block2_pool = unet_block_down( input=block1_pool, nb_filter=128, doBatchNorm=self.doBatchNormAll) print "block2 ", block2_pool._keras_shape #sys.stdout.flush() print "== BLOCK 3 ==" block3_act, block3_pool = unet_block_down( input=block2_pool, nb_filter=256, doBatchNorm=self.doBatchNormAll) print "block3 ", block3_pool._keras_shape #sys.stdout.flush() print "== BLOCK 4 ==" block4_act, block4_pool = unet_block_down( input=block3_pool, nb_filter=512, doDropout=True, doBatchNorm=self.doBatchNormAll) print "block4 ", block4_pool._keras_shape #sys.stdout.flush() print "== BLOCK 5 ==" print "no pooling" block5_act, block5_pool = unet_block_down( input=block4_pool, nb_filter=1024, doDropout=True, doPooling=False, doBatchNorm=self.doBatchNormAll) print "block5 ", block5_pool._keras_shape #sys.stdout.flush() print "==============" print print "== BLOCK 4 UP ==" block4_up = unet_block_up(input=block5_act, nb_filter=512, down_block_out=block4_act, doBatchNorm=self.doBatchNormAll) print "block4 up", block4_up._keras_shape print #sys.stdout.flush() print "== BLOCK 3 UP ==" block3_up = unet_block_up(input=block4_up, nb_filter=256, down_block_out=block3_act, doBatchNorm=self.doBatchNormAll) print "block3 up", block3_up._keras_shape print #sys.stdout.flush() print "== BLOCK 2 UP ==" block2_up = unet_block_up(input=block3_up, nb_filter=128, down_block_out=block2_act, doBatchNorm=self.doBatchNormAll) print "block2 up", block2_up._keras_shape #sys.stdout.flush() print print "== BLOCK 1 UP ==" block1_up = unet_block_up(input=block2_up, nb_filter=64, down_block_out=block1_act, doBatchNorm=self.doBatchNormAll) print "block1 up", block1_up._keras_shape sys.stdout.flush() print "== 1x1 convolution ==" output = Convolution2D(nb_filter=1, nb_row=1, nb_col=1, subsample=(1, 1), init=self.initialization, activation='sigmoid', border_mode="valid")(block1_up) print "output ", output._keras_shape output_flat = Flatten()(output) print "output flat ", output_flat._keras_shape model = Model(input=input, output=output_flat) #model = Model(input=input, output=block1_act) #sys.stdout.flush() ''' if doFineTune: model = model_from_json(open('unet_sampling_best.json').read()) model.load_weights('unet_sampling_best_weights.h5') ''' sgd = SGD(lr=self.learning_rate, decay=0, momentum=self.momentum, nesterov=False) #model.compile(loss='mse', optimizer=sgd) model.compile(loss=unet_crossentropy_loss_sampled, optimizer=sgd) #model.compile(loss=unet_crossentropy_loss, optimizer=sgd) print 'sampling data...' d = data.sample() data_x = d[0] data_y = d[1] data_x_val = d[2] data_y_val = d[3] reset = d[4] patchSize = self.patchSize patchSize_out = self.patchSize_out print 'patchSize:', patchSize, 'patchSize_out:', patchSize_out data_x_val = np.reshape(data_x_val, [-1, 1, patchSize, patchSize]) data_x = np.reshape(data_x, [-1, 1, patchSize, patchSize]) data_label_val = data_y_val val_samples = data_y_val.shape[0] print data_x.shape, data_y.shape print 'got data...' print "current learning rate: ", model.optimizer.lr.get_value() o = model.fit(data_x, data_y, batch_size=1, nb_epoch=1) print o.history["loss"] exit(1) def train(self, offline=False, data=None, mean=None, std=None): print 'Unet.train' # input data should be large patches as prediction is also over large patches print print "=== building network ===" print "== BLOCK 1 ==" input = Input(shape=(1, self.patchSize, self.patchSize)) print "input ", input._keras_shape block1_act, block1_pool = UNET.unet_block_down( input=input, nb_filter=64, doBatchNorm=self.doBatchNormAll) print "block1 act ", block1_act._keras_shape print "block1 ", block1_pool._keras_shape #sys.stdout.flush() print "== BLOCK 2 ==" block2_act, block2_pool = UNET.unet_block_down( input=block1_pool, nb_filter=128, doBatchNorm=self.doBatchNormAll) print "block2 ", block2_pool._keras_shape #sys.stdout.flush() print "== BLOCK 3 ==" block3_act, block3_pool = UNET.unet_block_down( input=block2_pool, nb_filter=256, doBatchNorm=self.doBatchNormAll) print "block3 ", block3_pool._keras_shape #sys.stdout.flush() print "== BLOCK 4 ==" block4_act, block4_pool = UNET.unet_block_down( input=block3_pool, nb_filter=512, doDropout=True, doBatchNorm=self.doBatchNormAll) print "block4 ", block4_pool._keras_shape #sys.stdout.flush() print "== BLOCK 5 ==" print "no pooling" block5_act, block5_pool = UNET.unet_block_down( input=block4_pool, nb_filter=1024, doDropout=True, doPooling=False, doBatchNorm=self.doBatchNormAll) print "block5 ", block5_pool._keras_shape #sys.stdout.flush() print "==============" print print "== BLOCK 4 UP ==" block4_up = UNET.unet_block_up(input=block5_act, nb_filter=512, down_block_out=block4_act, doBatchNorm=self.doBatchNormAll) print "block4 up", block4_up._keras_shape print #sys.stdout.flush() print "== BLOCK 3 UP ==" block3_up = UNET.unet_block_up(input=block4_up, nb_filter=256, down_block_out=block3_act, doBatchNorm=self.doBatchNormAll) print "block3 up", block3_up._keras_shape print #sys.stdout.flush() print "== BLOCK 2 UP ==" block2_up = UNET.unet_block_up(input=block3_up, nb_filter=128, down_block_out=block2_act, doBatchNorm=self.doBatchNormAll) print "block2 up", block2_up._keras_shape #sys.stdout.flush() print print "== BLOCK 1 UP ==" block1_up = UNET.unet_block_up(input=block2_up, nb_filter=64, down_block_out=block1_act, doBatchNorm=self.doBatchNormAll) print "block1 up", block1_up._keras_shape sys.stdout.flush() print "== 1x1 convolution ==" output = Convolution2D(nb_filter=1, nb_row=1, nb_col=1, subsample=(1, 1), init=self.initialization, activation='sigmoid', border_mode="valid")(block1_up) print "output ", output._keras_shape output_flat = Flatten()(output) print "output flat ", output_flat._keras_shape print 'Unet.train' #self.load() if self.model == None: j_path, w_path = self.get_model_paths() if os.path.exists(j_path) and os.path.exists(w_path): print 'loading from: ', j_path self.model = model_from_json(open(j_path).read()) self.model.load_weights(w_path) else: print 'creating....' self.model = Model(input=input, output=output_flat) sgd = SGD(lr=self.learning_rate, decay=0, momentum=self.momentum, nesterov=False) #self.model.compile(loss=UNET.unet_crossentropy_loss_sampled, optimizer=sgd) self.model.compile(loss=UNET.unet_crossentropy_loss, optimizer=sgd) print 'sampling data...' d = data.sample() data_x = d[0] data_y = d[1] data_x_val = d[2] data_y_val = d[3] reset = d[4] patchSize = self.patchSize patchSize_out = self.patchSize_out print 'patchSize:', patchSize, 'patchSize_out:', patchSize_out data_x_val = np.reshape(data_x_val, [-1, 1, patchSize, patchSize]) data_x = np.reshape(data_x, [-1, 1, patchSize, patchSize]) data_label_val = data_y_val val_samples = data_y_val.shape[0] print data_x.shape, data_y.shape print 'got data...' print "current learning rate: ", self.model.optimizer.lr.get_value() self.model.fit(data_x, data_y, batch_size=1, nb_epoch=1) im_pred = 1 - self.model.predict(x=data_x_val, batch_size=1) print im_pred.shape print data_label_val.shape print data_x_val.shape mean_val_rand = 0.0 for val_ind in xrange(val_samples): im_pred_single = np.reshape(im_pred[val_ind, :], (patchSize_out, patchSize_out)) im_gt = np.reshape(data_label_val[val_ind], (patchSize_out, patchSize_out)) validation_rand = Rand_membrane_prob(im_pred_single, im_gt) mean_val_rand += validation_rand print 'val:', val_ind, 'rand:', validation_rand, 'mrand:', mean_val_rand mean_val_rand /= np.double(val_samples) print "validation RAND ", mean_val_rand exit(1) self.save_current() print mean_val_rand, " > ", self.best_val_loss_so_far print mean_val_rand - self.best_val_loss_so_far if mean_val_rand > self.best_val_loss_so_far: self.best_val_loss_so_far = mean_val_rand print "NEW BEST MODEL" self.save_best() self.patience_counter = 0 else: self.patience_counter += 1 # no progress anymore, need to decrease learning rate if self.patience_counter == self.patience: print "DECREASING LEARNING RATE" print "before: ", learning_rate learning_rate *= 0.1 print "now: ", learning_rate self.model.optimizer.lr.set_value(learning_rate) self.patience = self.patience_reset self.patience_counter = 0 # reload best state seen so far self.model = self.load() ''' model = model_from_json(open(filename+'.json').read()) model.load_weights(filename+'_weights.h5') model.compile(loss=unet_crossentropy_loss_sampled, optimizer=sgd) ''' def train_offline(self, data, mean=None, std=None): pass def classify(self, image, mean=None, std=None): print 'Unet.classify' def predict(self, image, mean=None, std=None, threshold=0.5): print 'Unet.predict' start_time = time.clock() j_path, w_path = self.get_model_paths() print 'loading model from:', j_path model = model_from_json(open(j_path).read()) model.load_weights(w_path) sgd = SGD(lr=0.01, decay=0, momentum=0.0, nesterov=False) model.compile(loss='categorical_crossentropy', optimizer=sgd) image = image - 0.5 probImage = np.zeros(image.shape) # count compilation time to init row = 0 col = 0 patch = image[row:row + patchSize, col:col + patchSize] data = np.reshape(patch, (1, 1, patchSize, patchSize)) probs = model.predict(x=data, batch_size=1) init_time = time.clock() #print "Initialization took: ", init_time - start_time patchSize = self.patchSize patchSize_out = self.patchSize_out image_orig = image.copy() for rotation in range(1): image = np.rot90(image_orig, rotation) # pad the image padding_ul = int(np.ceil((patchSize - patchSize_out) / 2.0)) # need large padding for lower right corner paddedImage = np.pad(image, patchSize, mode='reflect') needed_ul_padding = patchSize - padding_ul paddedImage = paddedImage[needed_ul_padding:, needed_ul_padding:] probImage_tmp = np.zeros(image.shape) for row in xrange(0, image.shape[0], patchSize_out): for col in xrange(0, image.shape[1], patchSize_out): patch = paddedImage[row:row + patchSize, col:col + patchSize] data = np.reshape(patch, (1, 1, patchSize, patchSize)) probs = 1 - model.predict(x=data, batch_size=1) probs = np.reshape(probs, (patchSize_out, patchSize_out)) row_end = patchSize_out if row + patchSize_out > probImage.shape[0]: row_end = probImage.shape[0] - row col_end = patchSize_out if col + patchSize_out > probImage.shape[1]: col_end = probImage.shape[1] - col probImage_tmp[row:row + row_end, col:col + col_end] = probs[:row_end, :col_end] probImage += np.rot90(probImage_tmp, 4 - rotation) probImage = probImage / 1.0 prob = self.threshold(probImage, factor=threshold) prob = prob.astype(dtype=int) prob = prob.flatten() end_time = time.clock() print "Prediction took: ", end_time - init_time print "Speed: ", 1. / (end_time - init_time) print "Time total: ", end_time - start_time print 'results :', np.bincount(prob) print prob.shape print prob return prob def threshold(self, prob, factor=0.5): prob[prob >= factor] = 9 prob[prob < factor] = 0 prob[prob == 9] = 1 return prob def get_model_paths(self): path = self.get_path() j_path = '%s_best.json' % (path) w_path = '%s_best_weights.h5' % (path) # first, attempt the best model, otherwise default to the latest if not os.path.exists(j_path) and not os.path.exists(w_path): path = Utility.get_dir(self.path) j_path = '%s/%s_%s.json' % (Paths.Models, self.id, self.type) w_path = '%s/%s_%s_weights.h5' % (Paths.Models, self.id, self.type) return j_path.lower(), w_path.lower() def load(self): j_path, w_path = self.get_model_paths() if os.path.exists(j_path) and os.path.exists(w_path): print 'loading from: ', j_path self.model = model_from_json(open(j_path).read()) self.model.load_weights(w_path) else: print 'creating....' inp, out = self.gen_input_output() print inp.shape, out.shape self.model = Model(input=inp, output=out) sgd = SGD(lr=self.learning_rate, decay=0, momentum=self.momentum, nesterov=False) self.model.compile(loss=UNET.unet_crossentropy_loss_sampled, optimizer=sgd) def save_current(self): path = Utility.get_dir(self.path) j_path = '%s/%s_%s.json' % (Paths.Models, self.id, self.type) w_path = '%s/%s_%s_weights.h5' % (Paths.Models, self.id, self.type) j_path = j_path.lower() w_path = w_path.lower() json_string = self.model.to_json() open(j_path, 'w').write(json_string) self.model.save_weights(w_path, overwrite=True) def save_best(self): print 'Unet.save' path = Utility.get_dir(self.path) revision = 0 if not self.offline: revision = DB.getRevision(self.id) revision = (revision + 1) % 10 path = '%s/%s_%s_%d' % (Paths.Models, self.id, self.type, revision) path = path.lower() j_path = '%s_best.json' % (path) w_path = '%s_best_weights.h5' % (path) j_path = j_path.lower() w_path = w_path.lower() print 'saving...', path # saving code here... json_string = self.model.to_json() open(j_path, 'w').write(json_string) self.model.save_weights(w_path, overwrite=True) if not self.offline: DB.finishSaveModel(self.id, revision) def get_path(self): if self.offline: return self.path rev = DB.getRevision(self.id) path = '%s/%s.%s.%d' % (Paths.Models, self.id, self.type, rev) return path.lower() def reportTrainingStats(self, elapsedTime, batchIndex, valLoss, trainCost, mode=0): DB.storeTrainingStats(self.id, valLoss, trainCost, mode=mode) msg = '(%0.1f) %i %f%%'%\ ( elapsedTime, batchIndex, valLoss ) status = '[%f]' % (trainCost) Utility.report_status(msg, status) # need to define a custom loss, because all pre-implementations # seem to assume that scores over patch add up to one which # they clearly don't and shouldn't @staticmethod def unet_crossentropy_loss(y_true, y_pred): weight_class_1 = 1. epsilon = 1.0e-4 y_pred_clipped = T.clip(y_pred, epsilon, 1.0 - epsilon) loss_vector = -T.mean(weight_class_1 * y_true * T.log(y_pred_clipped) + (1 - y_true) * T.log(1 - y_pred_clipped), axis=1) average_loss = T.mean(loss_vector) return average_loss @staticmethod def unet_crossentropy_loss_sampled(y_true, y_pred): epsilon = 1.0e-4 y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0 - epsilon)) y_true = T.flatten(y_true) # this seems to work # it is super ugly though and I am sure there is a better way to do it # but I am struggling with theano to cooperate # filter the right indices indPos = T.nonzero(y_true)[0] # no idea why this is a tuple indNeg = T.nonzero(1 - y_true)[0] # shuffle n = indPos.shape[0] indPos = indPos[UNET.srng.permutation(n=n)] n = indNeg.shape[0] indNeg = indNeg[UNET.srng.permutation(n=n)] # take equal number of samples depending on which class has less n_samples = T.cast(T.min([T.sum(y_true), T.sum(1 - y_true)]), dtype='int64') indPos = indPos[:n_samples] indNeg = indNeg[:n_samples] loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean( T.log(1 - y_pred_clipped[indNeg])) average_loss = T.mean(loss_vector) return average_loss @staticmethod def unet_block_down(input, nb_filter, doPooling=True, doDropout=False, doBatchNorm=False, initialization='glorot_uniform', weight_decay=0.): # first convolutional block consisting of 2 conv layers plus activation, then maxpool. # All are valid area, not same act1 = Convolution2D(nb_filter=nb_filter, nb_row=3, nb_col=3, subsample=(1, 1), init=initialization, activation='relu', border_mode="valid", W_regularizer=l2(weight_decay))(input) if doBatchNorm: act1 = BatchNormalization(mode=0, axis=1)(act1) act2 = Convolution2D(nb_filter=nb_filter, nb_row=3, nb_col=3, subsample=(1, 1), init=initialization, activation='relu', border_mode="valid", W_regularizer=l2(weight_decay))(act1) if doBatchNorm: act2 = BatchNormalization(mode=0, axis=1)(act2) if doDropout: act2 = Dropout(0.5)(act2) if doPooling: # now downsamplig with maxpool pool1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode="valid")(act2) else: pool1 = act2 return (act2, pool1) # need to define lambda layer to implement cropping # input is a tensor of size (batchsize, channels, width, height) @staticmethod def crop_layer(x, cs): cropSize = cs return x[:, :, cropSize:-cropSize, cropSize:-cropSize] @staticmethod def unet_block_up(input, nb_filter, down_block_out, doBatchNorm=False, initialization='glorot_uniform', weight_decay=0.): print "This is unet_block_up" print "input ", input._keras_shape # upsampling up_sampled = UpSampling2D(size=(2, 2))(input) print "upsampled ", up_sampled._keras_shape # up-convolution conv_up = Convolution2D(nb_filter=nb_filter, nb_row=2, nb_col=2, subsample=(1, 1), init=initialization, activation='relu', border_mode="same", W_regularizer=l2(weight_decay))(up_sampled) print "up-convolution ", conv_up._keras_shape # concatenation with cropped high res output # this is too large and needs to be cropped print "to be merged with ", down_block_out._keras_shape #padding_1 = int((down_block_out._keras_shape[2] - conv_up._keras_shape[2])/2) #padding_2 = int((down_block_out._keras_shape[3] - conv_up._keras_shape[3])/2) #print "padding: ", (padding_1, padding_2) #conv_up_padded = ZeroPadding2D(padding=(padding_1, padding_2))(conv_up) #merged = merge([conv_up_padded, down_block_out], mode='concat', concat_axis=1) cropSize = int( (down_block_out._keras_shape[2] - conv_up._keras_shape[2]) / 2) down_block_out_cropped = Lambda(UNET.crop_layer, output_shape=conv_up._keras_shape[1:], arguments={"cs": cropSize})(down_block_out) print "cropped layer size: ", down_block_out_cropped._keras_shape merged = merge([conv_up, down_block_out_cropped], mode='concat', concat_axis=1) print "merged ", merged._keras_shape # two 3x3 convolutions with ReLU # first one halves the feature channels act1 = Convolution2D(nb_filter=nb_filter, nb_row=3, nb_col=3, subsample=(1, 1), init=initialization, activation='relu', border_mode="valid", W_regularizer=l2(weight_decay))(merged) if doBatchNorm: act1 = BatchNormalization(mode=0, axis=1)(act1) print "conv1 ", act1._keras_shape act2 = Convolution2D(nb_filter=nb_filter, nb_row=3, nb_col=3, subsample=(1, 1), init=initialization, activation='relu', border_mode="valid", W_regularizer=l2(weight_decay))(act1) if doBatchNorm: act2 = BatchNormalization(mode=0, axis=1)(act2) print "conv2 ", act2._keras_shape return act2
#mBGsub = BGsubstract.BGsubstract(x_activ)#load pretrained and continue training #get what we neet to define loss p_fb_flat_train = mBGsub.p_fb_flat_train p_fb_flat_test = mBGsub.p_fb_flat_test params = mBGsub.params #get what we neet to check test p_fb = mBGsub.p_fb #define cost function to optimize y_activ_flat = y_activ.dimshuffle(0,2,3,1).reshape((y_activ.shape[0]*y_activ.shape[2]*y_activ.shape[3],y_activ.shape[1])) #take on all image #cost = T.mean(T.nnet.categorical_crossentropy(p_fb, y_activ_flat)) #or take only a few pixels in image nbRandomSamples = 100 permutations_samples = srng.permutation(n=p_fb_flat_train.shape[0], size=(1,))[0]#create a vector of size (1,shape) cost_train = T.mean(T.nnet.categorical_crossentropy(p_fb_flat_train[permutations_samples[0:nbRandomSamples]], y_activ_flat[permutations_samples[0:nbRandomSamples]])) cost_pred = T.mean(T.nnet.categorical_crossentropy(p_fb_flat_test[permutations_samples[0:nbRandomSamples]], y_activ_flat[permutations_samples[0:nbRandomSamples]])) #updates = Optimisation.momentum(cost, params, learning_rate=0.0001, momentum=0.9) updates = Optimisation.adam(cost_train, params, learn_rate = 0.0005) #reshape p_fb for printing downsampled_x_rgb = x_activ[:,0:3] # compile theano functions train = theano.function([x, y], cost_train, updates=updates) getCost = theano.function([x, y], cost_pred) getRGBdownsampled = theano.function([x], downsampled_x_rgb) predict = theano.function([x], p_fb)
def __init__(self, rng, input, n_in, n_batch, d_bucket, n_reflections, activation, activation_deriv, w=None, index_permute=None, index_permute_reverse=None): srng = RandomStreams(seed=234) n_bucket = n_in / d_bucket + 1 self.input = input # randomly permute input space if index_permute is None: index_permute = srng.permutation(n=n_in)#numpy.random.permutation(n_in) index_permute_reverse = T.argsort(index_permute) self.index_permute = index_permute self.index_permute_reverse = index_permute_reverse permuted_input = input[:, index_permute] self.permuted_input = permuted_input # initialize reflection parameters if w is None: w_values = numpy.asarray(rng.uniform(low=-1, high=1, size=(n_bucket, n_reflections, d_bucket)), dtype=theano.config.floatX) w = theano.shared(value=w_values, name='w') self.w = w # compute outputs and Jacobians log_jacobian = T.alloc(0, n_batch) for b in xrange(n_bucket): bucket_size = d_bucket if b == n_bucket - 1: #import pdb; pdb.set_trace() bucket_size = n_in - b * d_bucket x_b = self.permuted_input[:, b*d_bucket:b*d_bucket + bucket_size] for r in xrange(n_reflections): w_b_r = w[b, r, :bucket_size] if r>0: Wtemp = T.eye(bucket_size) \ - 2 * T.outer(w_b_r, w_b_r) / ((w_b_r ** 2).sum()) W = T.dot(W, Wtemp) # import pdb; pdb.set_trace() else: W = T.eye(bucket_size) - 2 * T.outer(w_b_r, w_b_r) / ((w_b_r ** 2).sum()) lin_output_b = T.dot(x_b, W) if b>0: lin_output = T.concatenate([lin_output, lin_output_b], axis=1) else: lin_output = lin_output_b if activation is not None: derivs = activation_deriv(lin_output_b) log_jacobian = log_jacobian + T.log(T.abs_(derivs)).sum(axis=1) # for n in xrange(n_batch): # mat = T.tile(T.reshape(derivs[n], [1, bucket_size]), (bucket_size, 1)) # mat = mat * W # T.inc_subtensor(log_jacobian[n], T.log(T.abs_(T.nlinalg.Det()(mat)))) self.log_jacobian = log_jacobian self.output = ( lin_output if activation is None else activation(lin_output) ) self.params = [w]
class DBenGurionOCR(object): """ Constructor para uso productivo """ def __init__(self): return """ Constructor para validacion """ @classmethod def Validator(self, id_experiment, layers_metaData, batch_size, raw_data_set, logger, weigthts_service, experimentsRepo, initial_weights): self.idExperiment = id_experiment self.logger = logger self.weigthts_service = weigthts_service self.experimentsRepo = experimentsRepo self.x = T.tensor4('x') # the data is presented as rasterized images self.y = T.ivector('y') index = T.lscalar() random_droput = np.random.RandomState(1234) rng_droput = T.shared_randomstreams.RandomStreams( random_droput.randint(999999)) rawXDataSet = raw_data_set[0] rawYDataSet = raw_data_set[1] self.totalDataSize = rawXDataSet.shape[0] self.no_batchs_in_data_set = self.totalDataSize // batch_size # batch_size = 50000 # img_input = x #T.reshape(x,(batch_size, 1, 28, 28)) self.CNN = DBenGurionArchitecture.DBenGurionArchitecture( image_input=self.x, batch_size=batch_size, layers_metaData=layers_metaData, initWeights=initial_weights, srng=rng_droput, no_channels_imageInput=1, isTraining=1) XimgLetras = np.asarray(rawXDataSet, dtype=theano.config.floatX).reshape( (self.totalDataSize, 1, 64, 64)) XimgLetrasShared = theano.shared(XimgLetras) YimgLetras = np.asarray(rawYDataSet, dtype=np.int32) YimgLetrasShared = theano.shared(YimgLetras) cost = self.CNN.SoftMax_1.negative_log_likelihood(self.y) self.evaluate_model_with_cost = theano.function( [index], cost, givens={ self.x: XimgLetrasShared[index * batch_size:(index + 1) * batch_size], self.y: YimgLetrasShared[index * batch_size:(index + 1) * batch_size] }) error = self.CNN.SoftMax_1.errors(self.y) self.evaluate_model_with_error = theano.function( [index], error, givens={ self.x: XimgLetrasShared[index * batch_size:(index + 1) * batch_size], self.y: YimgLetrasShared[index * batch_size:(index + 1) * batch_size] }) return self() """ Constructor para Entrenamiento """ @classmethod def Trainer(self, id_experiment, layers_metaData, batch_size, raw_train_set, logger, weigthts_service, experimentsRepo, initial_weights, max_epochs, with_lr_decay, learning_rate, saveWeigthsFrecuency, frecuency_lr_decay, p_DropOut): self.idExperiment = id_experiment self.logger = logger self.max_epochs = max_epochs self.with_lr_decay = with_lr_decay self.learning_rate = float(learning_rate) self.weigthts_service = weigthts_service self.saveWeigthsFrecuency = saveWeigthsFrecuency self.frecuency_lr_decay = frecuency_lr_decay self.experimentsRepo = experimentsRepo self.theano_rng = RandomStreams(123) self.x = T.tensor4('x') # the data is presented as rasterized images self.y = T.ivector('y') learningRate = T.fscalar() index = T.lscalar() random_droput = np.random.RandomState(1234) rng_droput = T.shared_randomstreams.RandomStreams( random_droput.randint(999999)) rawXTrainingDataSet = raw_train_set[0] rawYTrainingDataSet = raw_train_set[1] self.trainDataSetSize = rawXTrainingDataSet.shape[0] self.no_batchs_in_data_set = self.trainDataSetSize // batch_size # batch_size = 50000 # img_input = x #T.reshape(x,(batch_size, 1, 28, 28)) self.CNN = DBenGurionArchitecture.DBenGurionArchitecture( image_input=self.x, batch_size=batch_size, layers_metaData=layers_metaData, initWeights=initial_weights, srng=rng_droput, no_channels_imageInput=1, isTraining=1, pDropOut=p_DropOut) XimgLetras = np.asarray(rawXTrainingDataSet, dtype=theano.config.floatX).reshape( (self.trainDataSetSize, 1, 64, 64)) XimgLetrasShared = theano.shared(XimgLetras) YimgLetras = np.asarray(rawYTrainingDataSet, dtype=np.int32) YimgLetrasShared = theano.shared(YimgLetras) #cost = self.CNN.SoftMax_1.cost_function(y) cost = self.CNN.SoftMax_1.negative_log_likelihood(self.y) error = self.CNN.SoftMax_1.errors(self.y) #error = self.CNN.SoftMax_1.(y) weights = [ self.CNN.conv1.Filter, self.CNN.conv2.Filter, self.CNN.conv3.Filter, self.CNN.conv4.Filter, self.CNN.conv5.Filter, self.CNN.conv6.Filter, self.CNN.FC_1.Filter, self.CNN.FC_1.Bias, self.CNN.FC_2.Filter, self.CNN.FC_2.Bias, self.CNN.SoftMax_1.Filter, self.CNN.SoftMax_1.Bias ] grads = T.grad(cost, weights, disconnected_inputs="raise") updates = [(param_i, param_i + (learningRate * grad_i)) for param_i, grad_i in zip(weights, grads)] #errors = self.CNN.SoftMax_1. self.train_model = theano.function( [index, learningRate], cost, updates=updates, givens={ self.x: XimgLetrasShared[index * batch_size:(index + 1) * batch_size], self.y: YimgLetrasShared[index * batch_size:(index + 1) * batch_size] }) return self() def GetWeigthsValuesByLayer(self, layer): if layer is LayerEnum.LayerEnum.conv1: return np.asarray(self.CNN.conv1.Filter.get_value(), dtype=theano.config.floatX) if layer is LayerEnum.LayerEnum.conv2: return np.asarray(self.CNN.conv2.Filter.get_value(), dtype=theano.config.floatX) if layer is LayerEnum.LayerEnum.conv3: return np.asarray(self.CNN.conv3.Filter.get_value(), dtype=theano.config.floatX) if layer is LayerEnum.LayerEnum.conv4: return np.asarray(self.CNN.conv4.Filter.get_value(), dtype=theano.config.floatX) if layer is LayerEnum.LayerEnum.conv5: return np.asarray(self.CNN.conv5.Filter.get_value(), dtype=theano.config.floatX) if layer is LayerEnum.LayerEnum.conv6: return np.asarray(self.CNN.conv6.Filter.get_value(), dtype=theano.config.floatX) elif layer is LayerEnum.LayerEnum.FC_1: return (np.asarray(self.CNN.FC_1.Filter.get_value(), dtype=theano.config.floatX), np.asarray(self.CNN.FC_1.Bias.get_value(), dtype=theano.config.floatX)) elif layer is LayerEnum.LayerEnum.FC_2: return (np.asarray(self.CNN.FC_2.Filter.get_value(), dtype=theano.config.floatX), np.asarray(self.CNN.FC_2.Bias.get_value(), dtype=theano.config.floatX)) elif layer is LayerEnum.LayerEnum.SoftMax_1: return (np.asarray(self.CNN.SoftMax_1.Filter.get_value(), dtype=theano.config.floatX), np.asarray(self.CNN.SoftMax_1.Bias.get_value(), dtype=theano.config.floatX)) def Train(self, current_epoch=0, id_train='', extra_info=''): for epoch_index in range(self.max_epochs): if epoch_index < current_epoch: # hacemos esta verificacion pues solo tiene sentido iniciar en una epoca diferente cuando existen pesos iniciales (para reanudar) continue if epoch_index != 0 and self.with_lr_decay == True and epoch_index % self.frecuency_lr_decay == 0: self.learning_rate *= 0.1 elif self.with_lr_decay == False: decreaseNow = self.experimentsRepo.ObtenerDecreaseNow() increaseNow = self.experimentsRepo.ObtenerIncreaseNow() if decreaseNow == True: self.experimentsRepo.UpdateLearningRate(self.learning_rate) self.experimentsRepo.SetFalseDecreaseNow() self.learning_rate *= 0.1 print("Decremento mandatorio, learning rate: " + str(self.learning_rate)) elif increaseNow == True: self.experimentsRepo.UpdateLearningRate(self.learning_rate) self.experimentsRepo.SetFalseIncreaseNow() self.learning_rate /= 0.1 newOrder = self.theano_rng.permutation(n=self.trainDataSetSize, size=(1, )), self.x = self.x[newOrder] self.y = self.y[newOrder] #Recorremos todo el dataset dividido en n Batches for batch_index in range(self.no_batchs_in_data_set): cost = self.train_model(batch_index, self.learning_rate) print("costo: " + str(cost) + " epoca: " + str(epoch_index) + " Batch: " + str(batch_index) + "/" + str(self.no_batchs_in_data_set) + " Learning Rate: " + str(self.learning_rate)) self.logger.LogTrain(cost, str(epoch_index), str(batch_index), str(self.learning_rate)) #self.logger.Log(str(cost), "costo", str(epoch_index), str(batch_index), id_train, # "learning rate: " + str(self.learning_rate) + "," + extra_info) if (epoch_index + 1) % self.saveWeigthsFrecuency == 0: self.SaveWeights(epoch_index, batch_index, -1) def SaveWeights(self, epoch, batch, iteration, cost=0, error=0, costVal=0, errorVal=0, costTest=0, errorTest=0): allWeiths = { "conv1Values": self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.conv1), "conv2Values": self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.conv2), "conv3Values": self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.conv3), "conv4Values": self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.conv4), "conv5Values": self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.conv5), "conv6Values": self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.conv6), "FC1Values": self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.FC_1)[0], "FC1BiasValues": self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.FC_1)[1], "FC2Values": self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.FC_2)[0], "FC2BiasValues": self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.FC_2)[1], "SoftMax1Values": self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.SoftMax_1)[0], "SoftMax1BiasValues": self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.SoftMax_1)[1] } hyper_params = "learning rate: " + str(self.learning_rate) # = [c1_v,c3_v,fc5v_v,fc5b_v,fc6v,fc6b_v] self.weigthts_service.SaveWeights(allWeiths, self.idExperiment, epoch, batch, iteration, hyper_params, cost, error, costVal, errorVal, costTest, errorTest) return def CalculateCost(self, noBatchsToEvaluate=-1): if noBatchsToEvaluate == -1: noBatchsToEvaluate = self.no_batchs_in_data_set sumaCost = 0.0 for batch_index in range(noBatchsToEvaluate): cost = self.evaluate_model_with_cost(batch_index) print("calculando costos: costo: " + str(cost) + " en batch: " + str(batch_index)) sumaCost = sumaCost + cost promedio = sumaCost / noBatchsToEvaluate return promedio def CalculateError(self, noBatchsToEvaluate=-1): if noBatchsToEvaluate == -1: noBatchsToEvaluate = self.no_batchs_in_data_set sumaCost = 0.0 for batch_index in range(noBatchsToEvaluate): error = self.evaluate_model_with_error(batch_index) print("calculando costos: errores: " + str(error) + " en batch: " + str(batch_index)) sumaCost = sumaCost + error promedio = sumaCost / noBatchsToEvaluate return promedio
def do_gd(train_set, etaVal, epochs, layers, batch_size=100, scale=1): ''' batch_size = 100 ''' SEED = 5318 np.random.seed(SEED) X = T.matrix('X') Y = T.ivector('Y') index = T.lscalar('index') eta = T.fscalar('eta') n_in = layers[0] n_out = layers[-1] trainX, trainY = train_set dataset_size = trainX.get_value(borrow=True).shape[0] classifier = MLP( rng = np.random.RandomState(SEED), inpt = X, layers = layers, scale = scale ) cost = classifier.negative_log_likelihood(Y) gparams = [T.grad(cost, param) for param in classifier.params] train_model = theano.function( inputs = [index, eta], outputs = cost, updates = [(param, param - eta * gparam) for param, gparam in zip(classifier.params, gparams)], givens = { X : trainX[index * batch_size : (index + 1) * batch_size], Y : trainY[index * batch_size : (index + 1) * batch_size] } ) # train_model = theano.function( # inputs = [index, eta], # outputs = cost, # updates = [(param, param - eta * gparam) # for param, gparam in zip(classifier.params, gparams)], # givens = { # X : trainX[index], # Y : trainY[index] # } # ) # pydotprint(train_model,'./test.png') # d3v.d3viz(train_model,'./test.html') cost = [] n_batches = int(dataset_size / batch_size) print dataset_size ANNEAL = 10*dataset_size # rate at which learning parameter "eta" is reduced as iterations increase ( momentum ) print("Anneal = {}".format(ANNEAL)) start_time = timeit.default_timer() learn_rate = etaVal for epoch in xrange(epochs): # shuffle data, reset the seed so that trainX and trainY are randomized # the same way theano_seed = int(np.random.rand()*100) theano_rng = RandomStreams(theano_seed) trainX = trainX[theano_rng.permutation(n=dataset_size, size=(1,)),] theano_rng = RandomStreams(theano_seed) trainY = trainY[theano_rng.permutation(n=dataset_size, size=(1,)),] for batch_idx in xrange(n_batches): cost.append(np.mean(np.asarray([train_model(batch_idx, learn_rate)]))) time_check = timeit.default_timer() iteration = (epoch * batch_idx) + batch_idx print("epoch={}, mean cost={}, total_time(mins)={}, eta={}, iters={}".format(epoch, np.mean(cost[-n_batches:]), (time_check - start_time)/60.0, learn_rate, iteration)) # Search and then converge learn_rate = etaVal / ( 1.0 + (iteration*1.0 / ANNEAL)) print("Eta = {}, Cost Last= {} Mean last 10 Costs = {}".format( eta, cost[-1], np.mean(cost[-10:])) ) return np.mean(cost[-10:])