예제 #1
0
파일: defgen_rev.py 프로젝트: JHnlp/DefGen2
def build_model(tparams, options):
    trng = RandomStreams(1234)
    use_noise = theano.shared(numpy.float32(0.))

    # description string: #words x #samples
    if options['use_target_as_input']:
        x = tensor.tensor3('x', dtype='float32')
    else:
        x = tensor.matrix('x', dtype='int64')
    mask = tensor.matrix('mask', dtype='float32')
    # context: #samples x dim
    ctx = tensor.matrix('ctx', dtype='float32')

    n_timesteps = x.shape[0]
    n_samples = x.shape[1]

    # word embedding
    if options['use_target_as_input']:
        emb = x
    else:
        emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']])
    # decoder
    if options.setdefault('feedforward', False):
        proj_h = tensor.dot(emb, tparams['Wff'])
        proj_h = (proj_h * mask[:,:,None]).sum(axis=0)
        proj_h = proj_h / mask.sum(axis=0)[:,None]
    elif options.setdefault('regress', False):
        proj_h = (emb * mask[:,:,None]).sum(axis=0)
        proj_h = tensor.dot(proj_h, tparams['Wff'])
        proj_h = proj_h / mask.sum(axis=0)[:,None]
    else:
        proj = get_layer('lstm')[1](tparams, emb, options, 
                                    prefix='encoder', 
                                    mask=mask)
        proj_h = proj[0]
        if options['use_mean']:
            proj_h = (proj_h * mask[:,:,None]).sum(axis=0)
            proj_h = proj_h / mask.sum(axis=0)[:,None]
        else:
            proj_h = proj_h[-1]

    if 'n_layers' in options:
        for lidx in xrange(1, options['n_layers']):
            proj_h = get_layer('ff')[1](tparams, proj_h, options, prefix='ff_out_%d'%lidx, activ='tanh')
    out = get_layer('ff')[1](tparams, proj_h, options, prefix='ff_out', activ='linear')

    # cost
    if options['loss_type'] == 'cosine':
        out = out / tensor.sqrt((out ** 2).sum(1))[:,None]
        cost = 1. - (out * ctx).sum(1)
    elif options['loss_type'] == 'ranking':
        out = out / tensor.sqrt((out ** 2).sum(1))[:,None]
        rndidx = trng.permutation(n=ctx.shape[0])
        ctx_rnd = ctx[rndidx]
        cost = tensor.maximum(0., 1 - (out * ctx).sum(1) + (out * ctx_rnd).sum(1))
    else:
        raise Exception('Unknown loss function')

    return trng, use_noise, x, mask, ctx, cost
예제 #2
0
    def add_negative(cls, var_x, x_tilde, type='samples'):

        if type is None:
            return 0

        random_stream = RandomStreams()
        if type == 'samples':
            n = var_x.shape[0]
            perm = random_stream.permutation(n=n)
            shuffled_var_x = var_x[perm, :]
            return Tensor.mean(((shuffled_var_x - x_tilde) ** 2).sum(axis=1))

        if type == 'features':
            n = var_x.shape[1]
            perm = random_stream.permutation(n=n)
            shuffled_var_x = var_x[:, perm]
            return Tensor.mean(((shuffled_var_x - x_tilde) ** 2).sum(axis=1))
    def shuffle_training_data(self):

        print "Shuffling training X and y data..."

        numRows = self.train_set_x.shape[0]

        srng = RandomStreams(seed=None)
        mask = srng.permutation(n=numRows, size=(1,)).reshape((numRows,))

        self.train_set_x = self.train_set_x[mask]
        self.train_set_y = self.train_set_y[mask]
예제 #4
0
class Visual(task.Task):

    def __init__(self, config):
        autoassign(locals())
        self.updater = util.Adam(max_norm=config['max_norm'], lr=config['lr'])
        self.Encode = Encoder(config['size_vocab'],
                              config['size_embed'], config['size'],
                              config['depth'],
                              activation=eval(config.get('activation',
                                                         'clipped_rectify')),
                              filter_length=config.get('filter_length', 6), 
                              filter_size=config.get('filter_size', 1024), 
                              stride=config.get('stride', 3),
                              residual=config.get('residual',False))
        self.Attn   = Attention(config['size'])
        self.ToImg  = Dense(config['size'], config['size_target'])
        self.inputs = [T.ftensor3()]
        self.target = T.fmatrix()
        self.config['margin'] = self.config.get('margin', False)
        if self.config['margin']:
            self.srng = RandomStreams(seed=234)
        
        
    def params(self):
        return params(self.Encode, self.Attn, self.ToImg)
    
    def __call__(self, input):
        return self.ToImg(self.Attn(self.Encode(input)))
    
    def cost(self, target, prediction):
        if self.config['margin']:
            return self.Margin(target, prediction, dist=CosineDistance, d=1)
        else:
            return CosineDistance(target, prediction)
    
    def Margin(self, U, V, dist=CosineDistance, d=1.0):
        V_ = (V[self.srng.permutation(n=T.shape(V)[0],
                                      size=(1,)),]).reshape(T.shape(V))
        # A bit silly making it nondet
        return T.maximum(0.0, dist(U, V) - dist(U, V_) + d)
    
    def args(self, item):
        return (item['audio'], item['target_v'])

    def _make_representation(self):
        with context.context(training=False):
            rep = self.Encode(*self.inputs)
        return theano.function(self.inputs, rep)

    def _make_pile(self):
        with context.context(training=False):
            rep = self.Encode.GRU.intermediate(*self.inputs)
        return theano.function(self.inputs, rep)
    def test_permutation(self):
        """Test that RandomStreams.permutation generates the same results as numpy"""
        # Check over two calls to see if the random state is correctly updated.
        random = RandomStreams(utt.fetch_seed())
        fn = function([], random.permutation((20,), 10), updates=random.updates())

        fn_val0 = fn()
        fn_val1 = fn()

        rng_seed = numpy.random.RandomState(utt.fetch_seed()).randint(2**30)
        rng = numpy.random.RandomState(int(rng_seed))  # int() is for 32bit

        # rng.permutation outputs one vector at a time, so we iterate.
        numpy_val0 = numpy.asarray([rng.permutation(10) for i in range(20)])
        numpy_val1 = numpy.asarray([rng.permutation(10) for i in range(20)])

        assert numpy.all(fn_val0 == numpy_val0)
        assert numpy.all(fn_val1 == numpy_val1)
예제 #6
0
    def test_permutation(self):
        # Test that RandomStreams.permutation generates the same results as numpy
        # Check over two calls to see if the random state is correctly updated.
        random = RandomStreams(utt.fetch_seed())
        fn = function([],
                      random.permutation((20, ), 10),
                      updates=random.updates())

        fn_val0 = fn()
        fn_val1 = fn()

        rng_seed = np.random.RandomState(utt.fetch_seed()).randint(2**30)
        rng = np.random.RandomState(int(rng_seed))  # int() is for 32bit

        # rng.permutation outputs one vector at a time, so we iterate.
        numpy_val0 = np.asarray([rng.permutation(10) for i in range(20)])
        numpy_val1 = np.asarray([rng.permutation(10) for i in range(20)])

        assert np.all(fn_val0 == numpy_val0)
        assert np.all(fn_val1 == numpy_val1)
예제 #7
0
파일: ff.py 프로젝트: kgonzales87/raccoon
    def compute_joint_loss(self, h, tg):
        # input: (batch_size, n_in)
        srng = RandomStreams(seed=234)
        a = srng.permutation(n=h.shape[0], size=(1, ))[0]
        h_noised = h[a]

        means = self.means[tg]

        # z: (batch_size, n_in)
        z = (h - means) / (self.stds + 1e-6)

        l = (T.log(self.priors[tg]) - T.log(self.stds).sum() - 0.5 *
             (z**2).sum(axis=-1))

        # add Jacobian
        equal = T.eq(h.sum(axis=1), h_noised.sum(axis=1))
        inc = T.switch(
            equal, T.zeros_like(equal,
                                equal.dtype), self.jacobian_factor * 0.5 *
            self.n_inputs * (T.log(1e-8 + ((h - h_noised)**2).sum(axis=-1))))
        l += inc

        return -l
def do_gd(etaVal, epochs, layers, train_set, 
        valid_set=None, test_set=None, L2_reg=0, batch_size=100, scale=1, noise_scale=1):
    '''
    batch_size = 100
    0 L2 regularization (by default)
    function returns training error and validation error after each epoch
    '''
    SEED = 5318
    np.random.seed(SEED)
    X = T.matrix('X')
    Y = T.ivector('Y')
    index = T.lscalar('index')
    noise = T.matrix('noise')
    eta = T.fscalar('eta')
    n_scale = T.fscalar('noise_scale')
    
    n_in = layers[0]
    n_out = layers[-1]

    # Get the datasets
    trainX, trainY = train_set
    validX, validY = valid_set
    testX, testY   = test_set

    # Get the dataset sizes
    train_dims = trainX.get_value(borrow=True).shape
    train_size = trainX.get_value(borrow=True).shape[0]
    valid_size = validX.get_value(borrow=True).shape[0]
    test_size  = testX.get_value(borrow=True).shape[0]


    classifier = MLP(
                    rng = np.random.RandomState(SEED),
                    inpt = X,
                    layers = layers,
                    scale = scale
                )
    cost = (
            classifier.negative_log_likelihood(Y) 
            + L2_reg * classifier.L2_sqr # using the L2 regularization
        )

    gparams = [T.grad(cost, param) for param in classifier.params]

    
    # Random number generator for the gaussian noise
    # theano_rng = RandomStreams(int(np.random.rand()*100))
    train_model = theano.function(
                 inputs = [index, eta, noise],
                 outputs = cost,
                 updates = [(param, param - eta * gparam) 
                    for param, gparam in zip(classifier.params, gparams)],
                 givens = {
                         # train_dims[1] is the number of columns (features) in the training data
                         # apparently trainX gets first added to the random numbers before its sliced
                         # Hence we use 784 (features) random numbers and not 100 (batch_size) random numbers
                         # X : trainX[index * batch_size : (index + 1) * batch_size] + theano_rng.normal(size=(train_dims[1],))* n_scale,
                         X : trainX[index * batch_size : (index + 1) * batch_size] + noise,
                         Y : trainY[index * batch_size : (index + 1) * batch_size]
                     }
             )
    
    validate_model = theano.function(
                inputs = [index],
                outputs = classifier.errors(Y),
                givens = {
                         X : validX[index * batch_size : (index + 1) * batch_size],
                         Y : validY[index * batch_size : (index + 1) * batch_size]
                }
            )
    
    test_model = theano.function(
                inputs = [index],
                outputs = classifier.errors(Y),
                givens = {
                         X : testX[index * batch_size : (index + 1) * batch_size],
                         Y : testY[index * batch_size : (index + 1) * batch_size]
                }
            )


    train_error = []
    valid_error = []
    test_error  = []

    # Calculate the number of batches.
    n_train_batches = int(train_size / batch_size)
    n_val_batches = int(valid_size / batch_size)
    n_test_batches = int(test_size / batch_size)

    ANNEAL = 10*train_size # rate at which learning parameter "eta" is reduced as iterations increase ( momentum )
    print("Anneal = {}".format(ANNEAL))
    
    start_time = timeit.default_timer()
    learn_rate = etaVal

    # Initial Gaussian Noise
    gaussian_noise = 0
    for epoch in xrange(epochs):
        # shuffle data, reset the seed so that trainX and trainY are randomized
        # the same way
        theano_seed = int(np.random.rand()*100)
        theano_rng = RandomStreams(theano_seed)
        trainX = trainX[theano_rng.permutation(n=train_size, size=(1,)),]
        theano_rng = RandomStreams(theano_seed)
        trainY = trainY[theano_rng.permutation(n=train_size, size=(1,)),]
        
        cost = []
        val_cost = []
        

        # Add new gaussian noise
        # of size (batch_size, # of features)
        gaussian_noise = noise_scale * np.random.normal(size=(batch_size,train_dims[1])).astype(theano.config.floatX)

        for batch_idx in xrange(n_train_batches):
            cost.append(np.mean(np.asarray([train_model(batch_idx, learn_rate, gaussian_noise)])))

        # Delete the gaussian noise
        # trainX = trainX - gaussian_noise

        # Validation error checked in each epoch
        for val_batch_idx in xrange(n_val_batches): 
            val_cost.append(np.mean(np.asarray([validate_model(val_batch_idx)])))

        train_error.append(np.mean(cost))
        valid_error.append(np.mean(val_cost))

        time_check = timeit.default_timer()
        iteration = (epoch * batch_idx) + batch_idx
        print("epoch={}, mean train cost={}, mean_val_cost = {} time = {} eta={}".format(epoch, train_error[-1], valid_error[-1], (time_check - start_time)/60.0, learn_rate))
        # Search and then converge
        learn_rate = etaVal / ( 1.0 + (iteration*1.0 / ANNEAL))

    return train_error, valid_error
import theano as th

data = np.random.rand(10,3)




it = th.shared(0)
y = th.shared(data)




srng = RandomStreams(seed=234)

expectRvs   = srng.normal(size=(3,1))
expectRvs.name='expectRvs'
epochStream = srng.permutation(n=10)
currentBatch = epochStream.reshape((5,2))[:,it]
y_mini = y[ currentBatch, :]
L = th.tensor.sum(th.tensor.dot( y_mini, expectRvs ))
L_func = function([], L, no_default_updates=True)

padding = srng.choice(size=(3,), a=10, replace=False, p=None, ndim=None, dtype='int64')



f1 = function([], expectRvs, no_default_updates=True)
f2 = function([], expectRvs)

예제 #10
0
############Normal RV

rn_n = srng.normal(size=(), avg=0.0, std=2.3)
norm = function([],rn_n)
print "Single Normal ", norm()

#############Random integer list

rn_i = srng.random_integers(size = (4, ), low=1, high=900)
inte = function([], rn_i)
print "Integer list ", inte()

#############Generating a permutation unifromly at random

rn_p = srng.permutation(size=(), n = 10)
perm = function([], rn_p)
print "Random permutation of 0 to 9", perm()

#############choosing from a list randomly

rn_list = srng.choice(size=(), a=[2,3, 4.5, 6], replace=True, p=[.5, 0, .5, 0], dtype='float64')
lis = function([], rn_list)
print "Choosing 3 times from the specified list ", lis()
print lis()
print lis()

rn_another_list = srng.choice(size=(), a=3, replace=True, p=None)
an_list = function([], rn_another_list)

print "Choosing 3 times from [0,1, 2] since a is scalar", an_list()
예제 #11
0
파일: model.py 프로젝트: crimsonlander/nn
    def train(self, X_train, y_train,
          X_valid, y_valid,
          n_epochs, batch_size,
          optimization_function,
          cost_function,
          random_order=True):

        unsupervised = (X_train is y_train)

        if not isinstance(X_train, (TensorVariable, SharedVariable)):
            N = X_train.shape[0]
        else:
            N = function([], X_train.shape[0])()

        n_batches = N // batch_size + (N % batch_size != 0)

        if not isinstance(X_train, (TensorVariable, SharedVariable)):
            X_train = shared(X_train.astype('float32'), name="X_train")

        if not isinstance(X_valid, (TensorVariable, SharedVariable)):
            X_valid = shared(X_valid.astype('float32'), name="X_valid")

        if not unsupervised and not isinstance(y_train, (TensorVariable, SharedVariable)):
            if self.classification:
                y_train = shared(y_train.astype('int32'), name="y_train")
            else:
                y_train = shared(y_train.astype('float32'), name="y_train")

        if not unsupervised and not isinstance(y_valid, (TensorVariable, SharedVariable)):
            if self.classification:
                y_valid = shared(y_valid.astype('int32'), name="y_valid")
            else:
                y_valid = shared(y_valid.astype('float32'), name="y_valid")

        if random_order:
            perm_rng = RandomStreams(1)
            perm = perm_rng.permutation(n=N)
            if unsupervised:
                self.manual_updates.append(function([], updates=[(X_train, X_train[perm])]))
            else:
                self.manual_updates.append(function([], updates=[(X_train, X_train[perm]),
                                                                 (y_train, y_train[perm])]))

        if unsupervised:
            y_train = X_train
            y_valid = X_valid

        cost = cost_function(self.yScaled, self.out, self.params)
        error = self.error()

        validate = function([], [cost, error],
                            givens=[(self.X, X_valid), (self.y, y_valid)]
                                   + self.turn_off_dropout,
                            no_default_updates=self.no_default_upd)

        index = T.iscalar()
        upd = optimization_function(self.params, cost)

        batch_begin = index * batch_size
        batch_end   = T.min(((index+1) * batch_size, N))

        optimize = function([index], [cost, error],
                            givens=[(self.X, X_train[batch_begin:batch_end]),
                                    (self.y, y_train[batch_begin:batch_end])],
                            updates=upd,
                            no_default_updates=self.no_default_upd)

        for epoch in range(n_epochs):
            print("Epoch", epoch)
            cost_sum, error_sum = 0, 0
            print("Running batches...")
            for i in range(n_batches):
                c, a = optimize(i)
                cost_sum += c
                error_sum += a

            print("Done!")
            print("training: cost", cost_sum / float(n_batches), ", error", error_sum / float(n_batches))
            c, a = validate()
            print("validation: cost", c, ", error", a)

            for man_upd in self.manual_updates:
                man_upd()
예제 #12
0
파일: oldunet.py 프로젝트: Rhoana/icon
class UNET(object):

    def __init__(
        self,
        id,
        rng,
        batch_size, 
        patch_size=572, 
        patch_size_out=388,
        offline=False,
        path=None,
        train_time=5.0,
        learning_rate=0.01,
        momentum=0.95):
        self.id = id
        self.type = 'UNET'
        self.offline = offline
        self.done = False
        self.path = path    
        self.batchSize = batch_size
        self.patchSize = patch_size 
        self.patchSize_out = patch_size_out
        self.learning_rate = learning_rate
        self.momentum      = momentum

        self.best_validation_loss = numpy.inf
        self.trainTime = train_time
        self.resample = False       
        self.error = np.inf
        self.error_threshold = 0.06
        self.best_val_loss_so_far = 0
        self.patience_counter = 0
        self.patience = 100
        self.patience_reset = 100

        self.doBatchNormAll = False
        self.doFineTune = False

        self.weight_decay = 0.
        self.weight_class_1 = 1.
        self.initialization = 'glorot_uniform'

        self.model = None

        self.srng = RandomStreams(1234)

        self.initialize() 


    def initialize(self):
        print 'Unet.initialize'


    def trainiold(self, offline=False, data=None, mean=None, std=None):
        print 'UNET.train()'

        # need to define a custom loss, because all pre-implementations
        # seem to assume that scores over patch add up to one which
        # they clearly don't and shouldn't
        def unet_crossentropy_loss(y_true, y_pred):
            weight_class_1 = 1.
            epsilon = 1.0e-4
            y_pred_clipped = T.clip(y_pred, epsilon, 1.0-epsilon)
            loss_vector = -T.mean(weight_class_1*y_true * T.log(y_pred_clipped) + (1-y_true) * T.log(1-y_pred_clipped), axis=1)
            average_loss = T.mean(loss_vector)
            return average_loss

        def unet_crossentropy_loss_sampled(y_true, y_pred):
            print 'unet_crossentropy_loss_sampled'
            epsilon = 1.0e-4
            y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0-epsilon))
            y_true = T.flatten(y_true)
            # this seems to work
            # it is super ugly though and I am sure there is a better way to do it
            # but I am struggling with theano to cooperate
            # filter the right indices
            indPos = T.nonzero(y_true)[0] # no idea why this is a tuple
            indNeg = T.nonzero(1-y_true)[0]
            # shuffle
            n = indPos.shape[0]
            indPos = indPos[self.srng.permutation(n=n)]
            n = indNeg.shape[0]
            indNeg = indNeg[self.srng.permutation(n=n)]
            # take equal number of samples depending on which class has less
            n_samples = T.cast(T.min([T.sum(y_true), T.sum(1-y_true)]), dtype='int64')

            indPos = indPos[:n_samples]
            indNeg = indNeg[:n_samples]
            loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(1-y_pred_clipped[indNeg]))
            average_loss = T.mean(loss_vector)
            #return average_loss
            return T.mean(T.log(y_pred_clipped[indPos]))


        # input data should be large patches as prediction is also over large patches
        print
        print "=== building network ==="

        print "== BLOCK 1 =="
        input = Input(shape=(1, self.patchSize, self.patchSize))
        print "input ", input._keras_shape
        block1_act, block1_pool = unet_block_down(input=input, nb_filter=64, doBatchNorm=self.doBatchNormAll)
        print "block1 act ", block1_act._keras_shape
        print "block1 ", block1_pool._keras_shape
        #sys.stdout.flush()

        print "== BLOCK 2 =="
        block2_act, block2_pool = unet_block_down(input=block1_pool, nb_filter=128, doBatchNorm=self.doBatchNormAll)
        print "block2 ", block2_pool._keras_shape
        #sys.stdout.flush()

        print "== BLOCK 3 =="
        block3_act, block3_pool = unet_block_down(input=block2_pool, nb_filter=256, doBatchNorm=self.doBatchNormAll)
        print "block3 ", block3_pool._keras_shape
        #sys.stdout.flush()

        print "== BLOCK 4 =="
        block4_act, block4_pool = unet_block_down(input=block3_pool, nb_filter=512, doDropout=True, doBatchNorm=self.doBatchNormAll)
        print "block4 ", block4_pool._keras_shape
        #sys.stdout.flush()

        print "== BLOCK 5 =="
        print "no pooling"
        block5_act, block5_pool = unet_block_down(input=block4_pool, nb_filter=1024, doDropout=True, doPooling=False, doBatchNorm=self.doBatchNormAll)
        print "block5 ", block5_pool._keras_shape
        #sys.stdout.flush()

        print "=============="
        print

        print "== BLOCK 4 UP =="
        block4_up = unet_block_up(input=block5_act, nb_filter=512, down_block_out=block4_act, doBatchNorm=self.doBatchNormAll)
        print "block4 up", block4_up._keras_shape
        print
        #sys.stdout.flush()

        print "== BLOCK 3 UP =="
        block3_up = unet_block_up(input=block4_up, nb_filter=256, down_block_out=block3_act, doBatchNorm=self.doBatchNormAll)
        print "block3 up", block3_up._keras_shape
        print
        #sys.stdout.flush()

        print "== BLOCK 2 UP =="
        block2_up = unet_block_up(input=block3_up, nb_filter=128, down_block_out=block2_act, doBatchNorm=self.doBatchNormAll)
        print "block2 up", block2_up._keras_shape
        #sys.stdout.flush()

        print
        print "== BLOCK 1 UP =="
        block1_up = unet_block_up(input=block2_up, nb_filter=64, down_block_out=block1_act, doBatchNorm=self.doBatchNormAll)
        print "block1 up", block1_up._keras_shape
        sys.stdout.flush()

        print "== 1x1 convolution =="
        output = Convolution2D(nb_filter=1, nb_row=1, nb_col=1, subsample=(1,1),
                                 init=self.initialization, activation='sigmoid', border_mode="valid")(block1_up)
        print "output ", output._keras_shape
        output_flat = Flatten()(output)
        print "output flat ", output_flat._keras_shape
        model = Model(input=input, output=output_flat)
        #model = Model(input=input, output=block1_act)
        #sys.stdout.flush()

        '''
        if doFineTune:
            model = model_from_json(open('unet_sampling_best.json').read())
            model.load_weights('unet_sampling_best_weights.h5')

        '''

        sgd = SGD(lr=self.learning_rate, decay=0, momentum=self.momentum, nesterov=False)
        #model.compile(loss='mse', optimizer=sgd)
        model.compile(loss=unet_crossentropy_loss_sampled, optimizer=sgd)
        #model.compile(loss=unet_crossentropy_loss, optimizer=sgd)


        print 'sampling data...'
        d          = data.sample()
        data_x     = d[0]
        data_y     = d[1]
        data_x_val = d[2]
        data_y_val = d[3]
        reset      = d[4]

        patchSize = self.patchSize
        patchSize_out = self.patchSize_out

        print 'patchSize:',patchSize,'patchSize_out:', patchSize_out
        data_x_val = np.reshape(data_x_val, [-1, 1, patchSize, patchSize])
        data_x = np.reshape(data_x, [-1, 1, patchSize, patchSize])

        data_label_val = data_y_val
        val_samples = data_y_val.shape[0]
        print data_x.shape, data_y.shape

        print 'got data...'
        print "current learning rate: ", model.optimizer.lr.get_value()
        o = model.fit(data_x, data_y, batch_size=1, nb_epoch=1)
        print o.history["loss"]
        exit(1)


    def train(self, offline=False, data=None, mean=None, std=None):

        print 'Unet.train'

        # input data should be large patches as prediction is also over large patches
        print
        print "=== building network ==="

        print "== BLOCK 1 =="
        input = Input(shape=(1, self.patchSize, self.patchSize))
        print "input ", input._keras_shape
        block1_act, block1_pool = UNET.unet_block_down(input=input, nb_filter=64, doBatchNorm=self.doBatchNormAll)
        print "block1 act ", block1_act._keras_shape
        print "block1 ", block1_pool._keras_shape
        #sys.stdout.flush()

        print "== BLOCK 2 =="
        block2_act, block2_pool = UNET.unet_block_down(input=block1_pool, nb_filter=128, doBatchNorm=self.doBatchNormAll)
        print "block2 ", block2_pool._keras_shape
        #sys.stdout.flush()

        print "== BLOCK 3 =="
        block3_act, block3_pool = UNET.unet_block_down(input=block2_pool, nb_filter=256, doBatchNorm=self.doBatchNormAll)
        print "block3 ", block3_pool._keras_shape
        #sys.stdout.flush()

        print "== BLOCK 4 =="
        block4_act, block4_pool = UNET.unet_block_down(input=block3_pool, nb_filter=512, doDropout=True, doBatchNorm=self.doBatchNormAll)
        print "block4 ", block4_pool._keras_shape
        #sys.stdout.flush()

        print "== BLOCK 5 =="
        print "no pooling"
        block5_act, block5_pool = UNET.unet_block_down(input=block4_pool, nb_filter=1024, doDropout=True, doPooling=False, doBatchNorm=self.doBatchNormAll)
        print "block5 ", block5_pool._keras_shape
        #sys.stdout.flush()

        print "=============="
        print

        print "== BLOCK 4 UP =="
        block4_up = UNET.unet_block_up(input=block5_act, nb_filter=512, down_block_out=block4_act, doBatchNorm=self.doBatchNormAll)
        print "block4 up", block4_up._keras_shape
        print
        #sys.stdout.flush()

        print "== BLOCK 3 UP =="
        block3_up = UNET.unet_block_up(input=block4_up, nb_filter=256, down_block_out=block3_act, doBatchNorm=self.doBatchNormAll)
        print "block3 up", block3_up._keras_shape
        print
        #sys.stdout.flush()

        print "== BLOCK 2 UP =="
        block2_up = UNET.unet_block_up(input=block3_up, nb_filter=128, down_block_out=block2_act, doBatchNorm=self.doBatchNormAll)
        print "block2 up", block2_up._keras_shape
        #sys.stdout.flush()

        print
        print "== BLOCK 1 UP =="
        block1_up = UNET.unet_block_up(input=block2_up, nb_filter=64, down_block_out=block1_act, doBatchNorm=self.doBatchNormAll)
        print "block1 up", block1_up._keras_shape
        sys.stdout.flush()

        print "== 1x1 convolution =="
        output = Convolution2D(nb_filter=1, nb_row=1, nb_col=1, subsample=(1,1),
                                 init=self.initialization, activation='sigmoid', border_mode="valid")(block1_up)
        print "output ", output._keras_shape
        output_flat = Flatten()(output)
        print "output flat ", output_flat._keras_shape

        print 'Unet.train'

        #self.load()
 
        if self.model == None: 
            j_path, w_path = self.get_model_paths( )

            if os.path.exists( j_path ) and os.path.exists( w_path ):
                print 'loading from: ', j_path
                self.model = model_from_json(open( j_path ).read())
                self.model.load_weights( w_path )
            else:
                print 'creating....'
                self.model = Model(input=input, output=output_flat)

        sgd = SGD(lr=self.learning_rate, decay=0, momentum=self.momentum, nesterov=False)
        #self.model.compile(loss=UNET.unet_crossentropy_loss_sampled, optimizer=sgd)
        self.model.compile(loss=UNET.unet_crossentropy_loss, optimizer=sgd)
 
        print 'sampling data...' 
        d          = data.sample()
        data_x     = d[0]
        data_y     = d[1]
        data_x_val = d[2]
        data_y_val = d[3]
        reset      = d[4]

        patchSize = self.patchSize
        patchSize_out = self.patchSize_out

        print 'patchSize:',patchSize,'patchSize_out:', patchSize_out
        data_x_val = np.reshape(data_x_val, [-1, 1, patchSize, patchSize])
        data_x = np.reshape(data_x, [-1, 1, patchSize, patchSize])

        data_label_val = data_y_val
        val_samples = data_y_val.shape[0]
        print data_x.shape, data_y.shape

        print 'got data...'
        print "current learning rate: ", self.model.optimizer.lr.get_value()
        self.model.fit(data_x, data_y, batch_size=1, nb_epoch=1)

        im_pred = 1-self.model.predict(x=data_x_val, batch_size = 1)

        print im_pred.shape
        print data_label_val.shape
        print data_x_val.shape
 
        mean_val_rand = 0.0
        for val_ind in xrange(val_samples):
            im_pred_single = np.reshape(im_pred[val_ind,:], (patchSize_out,patchSize_out))
            im_gt = np.reshape(data_label_val[val_ind], (patchSize_out,patchSize_out))
            validation_rand = Rand_membrane_prob(im_pred_single, im_gt)
            mean_val_rand += validation_rand
            print 'val:', val_ind, 'rand:', validation_rand, 'mrand:', mean_val_rand
        mean_val_rand /= np.double(val_samples)
        print "validation RAND ", mean_val_rand

        exit(1)
        self.save_current()

        print mean_val_rand, " > ",  self.best_val_loss_so_far
        print mean_val_rand - self.best_val_loss_so_far
        if mean_val_rand > self.best_val_loss_so_far:
            self.best_val_loss_so_far = mean_val_rand
            print "NEW BEST MODEL"
            self.save_best()
            self.patience_counter=0
        else:
            self.patience_counter +=1

        # no progress anymore, need to decrease learning rate
        if self.patience_counter == self.patience:
            print "DECREASING LEARNING RATE"
            print "before: ", learning_rate
            learning_rate *= 0.1
            print "now: ", learning_rate
            self.model.optimizer.lr.set_value(learning_rate)
            self.patience = self.patience_reset
            self.patience_counter = 0

            # reload best state seen so far
            self.model = self.load()
            '''
            model = model_from_json(open(filename+'.json').read())
            model.load_weights(filename+'_weights.h5')
            model.compile(loss=unet_crossentropy_loss_sampled, optimizer=sgd)
            '''

    def train_offline(self, data, mean=None, std=None):
        pass

    def classify(self, image, mean=None, std=None):
        print 'Unet.classify'

    def predict(self, image, mean=None, std=None, threshold=0.5):
        print 'Unet.predict'
        start_time = time.clock()

        j_path, w_path = self.get_model_paths( )

        print 'loading model from:', j_path

        model = model_from_json(open( j_path ).read())
        model.load_weights( w_path )
        sgd = SGD(lr=0.01, decay=0, momentum=0.0, nesterov=False)
        model.compile(loss='categorical_crossentropy', optimizer=sgd)

        image = image - 0.5

        probImage = np.zeros(image.shape)
        # count compilation time to init
        row = 0
        col = 0
        patch = image[row:row+patchSize,col:col+patchSize]
        data = np.reshape(patch, (1,1,patchSize,patchSize))
        probs = model.predict(x=data, batch_size=1)

        init_time = time.clock()
        #print "Initialization took: ", init_time - start_time

        patchSize = self.patchSize
        patchSize_out = self.patchSize_out
        image_orig = image.copy()
        for rotation in range(1):
            image = np.rot90(image_orig, rotation)
            # pad the image
            padding_ul = int(np.ceil((patchSize - patchSize_out)/2.0))
            # need large padding for lower right corner
            paddedImage = np.pad(image, patchSize, mode='reflect')
            needed_ul_padding = patchSize - padding_ul
            paddedImage = paddedImage[needed_ul_padding:, needed_ul_padding:]

            probImage_tmp = np.zeros(image.shape)
            for row in xrange(0,image.shape[0],patchSize_out):
                for col in xrange(0,image.shape[1],patchSize_out):
                    patch = paddedImage[row:row+patchSize,col:col+patchSize]
                    data = np.reshape(patch, (1,1,patchSize,patchSize))
                    probs = 1-model.predict(x=data, batch_size = 1)
                    probs = np.reshape(probs, (patchSize_out,patchSize_out))

                    row_end = patchSize_out
                    if row+patchSize_out > probImage.shape[0]:
                        row_end = probImage.shape[0]-row
                    col_end = patchSize_out
                    if col+patchSize_out > probImage.shape[1]:
                        col_end = probImage.shape[1]-col

                    probImage_tmp[row:row+row_end,col:col+col_end] = probs[:row_end,:col_end]
            probImage += np.rot90(probImage_tmp, 4-rotation)

        probImage = probImage / 1.0

        prob = self.threshold( probImage, factor=threshold )
        prob = prob.astype(dtype=int)
        prob = prob.flatten()

        end_time = time.clock()

        print "Prediction took: ", end_time - init_time
        print "Speed: ", 1./(end_time - init_time)
        print "Time total: ", end_time-start_time

        print 'results :', np.bincount( prob )
        print prob.shape
        print prob
        return prob


    def threshold(self, prob, factor=0.5):
        prob[ prob >= factor ] = 9
        prob[ prob <  factor ] = 0
        prob[ prob == 9      ] = 1
        return prob


    def get_model_paths(self):
        path = self.get_path()
        j_path = '%s_best.json'%(path)
        w_path = '%s_best_weights.h5'%(path)

        # first, attempt the best model, otherwise default to the latest
        if not os.path.exists( j_path ) and not os.path.exists( w_path ):
            path = Utility.get_dir(self.path)
            j_path = '%s/%s_%s.json'%(Paths.Models, self.id, self.type)
            w_path = '%s/%s_%s_weights.h5'%(Paths.Models, self.id, self.type)

        return j_path.lower(), w_path.lower()

    def load(self):
        j_path, w_path = self.get_model_paths( )

        if os.path.exists( j_path ) and os.path.exists( w_path ):
            print 'loading from: ', j_path
            self.model = model_from_json(open( j_path ).read())
            self.model.load_weights( w_path )
        else:
            print 'creating....'
            inp, out = self.gen_input_output()
            print inp.shape, out.shape
            self.model = Model(input=inp, output=out)

        sgd = SGD(lr=self.learning_rate, decay=0, momentum=self.momentum, nesterov=False)
        self.model.compile(loss=UNET.unet_crossentropy_loss_sampled, optimizer=sgd)


    def save_current(self):
        path = Utility.get_dir(self.path)
        j_path = '%s/%s_%s.json'%(Paths.Models, self.id, self.type)
        w_path = '%s/%s_%s_weights.h5'%(Paths.Models, self.id, self.type)
        j_path = j_path.lower()
        w_path = w_path.lower()

        json_string = self.model.to_json()
        open(j_path, 'w').write(json_string)
        self.model.save_weights(w_path, overwrite=True)

    def save_best(self):
        print 'Unet.save'
        path = Utility.get_dir(self.path)
        revision = 0
        if not self.offline:
            revision = DB.getRevision( self.id )
            revision = (revision+1)%10
            path = '%s/%s_%s_%d'%(Paths.Models, self.id, self.type, revision)
            path = path.lower()

        j_path = '%s_best.json'%(path)
        w_path = '%s_best_weights.h5'%(path)
        j_path = j_path.lower()
        w_path = w_path.lower()

        print 'saving...', path
        # saving code here...
        json_string = self.model.to_json()
        open(j_path, 'w').write(json_string)
        self.model.save_weights(w_path, overwrite=True)

        if not self.offline:
            DB.finishSaveModel( self.id, revision )

    def get_path(self):
        if self.offline:
            return self.path

        rev  = DB.getRevision( self.id )
        path = '%s/%s.%s.%d'%(Paths.Models, self.id, self.type, rev )
        return path.lower()

    def reportTrainingStats(self, elapsedTime, batchIndex, valLoss, trainCost, mode=0):
        DB.storeTrainingStats( self.id, valLoss, trainCost, mode=mode)
        msg = '(%0.1f)     %i     %f%%'%\
        (
           elapsedTime,
           batchIndex,
           valLoss
        )
        status = '[%f]'%(trainCost)
        Utility.report_status( msg, status )


    # need to define a custom loss, because all pre-implementations
    # seem to assume that scores over patch add up to one which
    # they clearly don't and shouldn't
    @staticmethod
    def unet_crossentropy_loss(y_true, y_pred):
        weight_class_1 = 1.
        epsilon = 1.0e-4
        y_pred_clipped = T.clip(y_pred, epsilon, 1.0-epsilon)
        loss_vector = -T.mean(weight_class_1*y_true * T.log(y_pred_clipped) + (1-y_true) * T.log(1-y_pred_clipped), axis=1)
        average_loss = T.mean(loss_vector)
        return average_loss

    @staticmethod
    def unet_crossentropy_loss_sampled(y_true, y_pred):
        epsilon = 1.0e-4
        y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0-epsilon))
        y_true = T.flatten(y_true)
        # this seems to work
        # it is super ugly though and I am sure there is a better way to do it
        # but I am struggling with theano to cooperate
        # filter the right indices
        indPos = T.nonzero(y_true)[0] # no idea why this is a tuple
        indNeg = T.nonzero(1-y_true)[0]
        # shuffle
        n = indPos.shape[0]
        indPos = indPos[UNET.srng.permutation(n=n)]
        n = indNeg.shape[0]
        indNeg = indNeg[UNET.srng.permutation(n=n)]
        # take equal number of samples depending on which class has less
        n_samples = T.cast(T.min([T.sum(y_true), T.sum(1-y_true)]), dtype='int64')

        indPos = indPos[:n_samples]
        indNeg = indNeg[:n_samples]
        loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(1-y_pred_clipped[indNeg]))
        average_loss = T.mean(loss_vector)
        return average_loss

    @staticmethod
    def unet_block_down(input, nb_filter, doPooling=True, doDropout=False, doBatchNorm=False, initialization = 'glorot_uniform', weight_decay = 0.):
        # first convolutional block consisting of 2 conv layers plus activation, then maxpool.
        # All are valid area, not same
        act1 = Convolution2D(nb_filter=nb_filter, nb_row=3, nb_col=3, subsample=(1,1),
                                 init=initialization, activation='relu',  border_mode="valid", W_regularizer=l2(weight_decay))(input)
        if doBatchNorm:
            act1 = BatchNormalization(mode=0, axis=1)(act1)

        act2 = Convolution2D(nb_filter=nb_filter, nb_row=3, nb_col=3, subsample=(1,1),
                                 init=initialization, activation='relu', border_mode="valid", W_regularizer=l2(weight_decay))(act1)
        if doBatchNorm:
            act2 = BatchNormalization(mode=0, axis=1)(act2)

        if doDropout:
            act2 = Dropout(0.5)(act2)

        if doPooling:
            # now downsamplig with maxpool
            pool1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), border_mode="valid")(act2)
        else:
            pool1 = act2

        return (act2, pool1)

    # need to define lambda layer to implement cropping
    # input is a tensor of size (batchsize, channels, width, height)
    @staticmethod
    def crop_layer( x, cs):
        cropSize = cs
        return x[:,:,cropSize:-cropSize, cropSize:-cropSize]

    @staticmethod
    def unet_block_up(input, nb_filter, down_block_out, doBatchNorm=False, initialization = 'glorot_uniform', weight_decay = 0.):
        print "This is unet_block_up"
        print "input ", input._keras_shape
        # upsampling
        up_sampled = UpSampling2D(size=(2,2))(input)
        print "upsampled ", up_sampled._keras_shape
        # up-convolution
        conv_up = Convolution2D(nb_filter=nb_filter, nb_row=2, nb_col=2, subsample=(1,1),
                                 init=initialization, activation='relu', border_mode="same", W_regularizer=l2(weight_decay))(up_sampled)
        print "up-convolution ", conv_up._keras_shape
        # concatenation with cropped high res output
        # this is too large and needs to be cropped
        print "to be merged with ", down_block_out._keras_shape

        #padding_1 = int((down_block_out._keras_shape[2] - conv_up._keras_shape[2])/2)
        #padding_2 = int((down_block_out._keras_shape[3] - conv_up._keras_shape[3])/2)
        #print "padding: ", (padding_1, padding_2)
        #conv_up_padded = ZeroPadding2D(padding=(padding_1, padding_2))(conv_up)
        #merged = merge([conv_up_padded, down_block_out], mode='concat', concat_axis=1)

        cropSize = int((down_block_out._keras_shape[2] - conv_up._keras_shape[2])/2)
        down_block_out_cropped = Lambda(UNET.crop_layer, output_shape=conv_up._keras_shape[1:], arguments={"cs":cropSize})(down_block_out)
        print "cropped layer size: ", down_block_out_cropped._keras_shape
        merged = merge([conv_up, down_block_out_cropped], mode='concat', concat_axis=1)

        print "merged ", merged._keras_shape
        # two 3x3 convolutions with ReLU
        # first one halves the feature channels
        act1 = Convolution2D(nb_filter=nb_filter, nb_row=3, nb_col=3, subsample=(1,1),
                                 init=initialization, activation='relu', border_mode="valid", W_regularizer=l2(weight_decay))(merged)

        if doBatchNorm:
            act1 = BatchNormalization(mode=0, axis=1)(act1)

        print "conv1 ", act1._keras_shape
        act2 = Convolution2D(nb_filter=nb_filter, nb_row=3, nb_col=3, subsample=(1,1),
                                 init=initialization, activation='relu', border_mode="valid", W_regularizer=l2(weight_decay))(act1)
        if doBatchNorm:
            act2 = BatchNormalization(mode=0, axis=1)(act2)


        print "conv2 ", act2._keras_shape

        return act2
예제 #13
0
    def __init__(self, rng, input, n_in, n_batch, d_bucket, activation, activation_deriv,
                 w=None, index_permute=None, index_permute_reverse=None):
        srng = RandomStreams(seed=234)
        
        n_bucket = n_in / d_bucket + 1
        self.input = input

        # randomly permute input space
        if index_permute is None:
            index_permute = srng.permutation(n=n_in)#numpy.random.permutation(n_in)
            index_permute_reverse = T.argsort(index_permute)
            self.index_permute = index_permute
            self.index_permute_reverse = index_permute_reverse

        permuted_input = input[:, index_permute]
        self.permuted_input = permuted_input

        # initialize reflection parameters
        if w is None:
            bound = numpy.sqrt(3. / d_bucket)
            w_values = numpy.asarray(rng.uniform(low=-bound,
                                                 high=bound,
                                                 size=(n_bucket, d_bucket, d_bucket)),
                                     dtype=theano.config.floatX)
            w = theano.shared(value=w_values, name='w')
            
        self.w = w
        
        
        # compute outputs and Jacobians
        
        log_jacobian = T.alloc(0, n_batch)
        for b in xrange(n_bucket):
            bucket_size = d_bucket
            if b == n_bucket - 1:
                bucket_size = n_in - b * d_bucket
            
            x_b = self.permuted_input[:, b*d_bucket:b*d_bucket + bucket_size]

            
            w_b = self.w[b, :bucket_size, :bucket_size]

#            W = T.slinalg.Expm()(w_b)
#            log_jacobian = log_jacobian + T.alloc(T.nlinalg.trace(w_b), n_batch)

            Upper = T.triu(w_b)
#            Upper = T.extra_ops.fill_diagonal(Upper, 1.)
            Lower = T.tril(w_b)
            Lower = T.extra_ops.fill_diagonal(Lower, 1.)
            log_det_Upper = T.log(T.abs_(T.nlinalg.ExtractDiag()(Upper))).sum() 
#            log_det_Lower = T.log(T.abs_(T.nlinalg.ExtractDiag()(Lower))).sum() 


            W = T.dot(Upper, Lower)
            log_jacobian = log_jacobian + T.alloc(log_det_Upper, n_batch)

            
#            W = T.dot(T.transpose(w_b), w_b) + 0.001*T.eye(bucket_size)
#            log_jacobian = log_jacobian + T.alloc(T.log(T.abs_(T.nlinalg.Det()(W))), n_batch)

#            diag = T.nlinalg.diag(W)
#            div = T.tile(T.reshape(T.sqrt(diag), [1, bucket_size]), (bucket_size, 1))
            
#            W = W / div / T.transpose(div)
            #import pdb; pdb.set_trace()

            lin_output_b = T.dot(x_b, W)
            if b>0:
                lin_output = T.concatenate([lin_output, lin_output_b], axis=1)
            else:
                lin_output = lin_output_b
            if activation is not None:
                derivs = activation_deriv(lin_output_b)     
                #import pdb; pdb.set_trace()
                log_jacobian = log_jacobian + T.log(T.abs_(derivs)).sum(axis=1)

                
#                for n in xrange(n_batch):                    
#                    mat = T.tile(T.reshape(derivs[n], [1, bucket_size]), (bucket_size, 1))
#                    mat = mat * W                   
#                    T.inc_subtensor(log_jacobian[n], T.log(T.abs_(T.nlinalg.Det()(mat))))
                    
        self.log_jacobian = log_jacobian        

        self.output = (
            lin_output if activation is None
            else activation(lin_output)
        )


        self.params = [w]
예제 #14
0
    def __init__(self, rng, input, n_in, n_batch, d_bucket, activation, activation_deriv,
                 w=None, index_permute=None, index_permute_reverse=None):
        srng = RandomStreams(seed=234)
        
        n_bucket = n_in / d_bucket + 1
        self.input = input

        # randomly permute input space
        if index_permute is None:
            index_permute = srng.permutation(n=n_in)#numpy.random.permutation(n_in)
            index_permute_reverse = T.argsort(index_permute)
            self.index_permute = index_permute
            self.index_permute_reverse = index_permute_reverse

        permuted_input = input[:, index_permute]
        self.permuted_input = permuted_input

        # initialize reflection parameters
        if w is None:
            bound = numpy.sqrt(3. / d_bucket)
            w_values = numpy.asarray(rng.uniform(low=-bound,
                                                 high=bound,
                                                 size=(n_bucket, d_bucket, d_bucket)),
                                     dtype=theano.config.floatX)
            w = theano.shared(value=w_values, name='w')
            
        self.w = w
        
        
        # compute outputs and Jacobians
        
        log_jacobian = T.alloc(0, n_batch)
        for b in xrange(n_bucket):
            bucket_size = d_bucket
            if b == n_bucket - 1:                
                bucket_size = n_in - b * d_bucket
            
            x_b = self.permuted_input[:, b*d_bucket:b*d_bucket + bucket_size]

            
            w_b = w[b, :bucket_size, :bucket_size]
            wTwinv = T.nlinalg.MatrixInverse()(T.dot(T.transpose(w_b), w_b) + 0.001*T.eye(bucket_size))
            L = T.slinalg.Cholesky()(wTwinv) 

            W = T.dot(w_b, L)

            #import pdb; pdb.set_trace()

            lin_output_b = T.dot(x_b, W)
            if b>0:
                lin_output = T.concatenate([lin_output, lin_output_b], axis=1)
            else:
                lin_output = lin_output_b
            if activation is not None:
                derivs = activation_deriv(x_b)             
                for n in xrange(n_batch):                    
                    mat = T.tile(T.reshape(derivs[n], [1, bucket_size]), (bucket_size, 1))
                    mat = mat * W                   
                    T.inc_subtensor(log_jacobian[n], T.log(T.abs_(T.nlinalg.Det()(mat))))
                    
        self.log_jacobian = log_jacobian        

        self.output = (
            lin_output if activation is None
            else activation(lin_output)
        )

        self.params = [w]
예제 #15
0
class Visual(task.Task):

    def __init__(self, config):
        autoassign(locals())
        self.updater = util.Adam(max_norm=config['max_norm'], lr=config['lr'])
        self.Encode = Encoder(config['size_vocab'],
                              config['size_embed'], config['size'],
                              config['depth'],
                              activation=eval(config.get('activation',
                                                         'clipped_rectify')),
                              residual=config.get('residual',False))
        self.ToImg  = Dense(config['size'], config['size_target'])
        self.inputs = [T.imatrix()]
        self.target = T.fmatrix()
        self.config['margin'] = self.config.get('margin', False)
        if self.config['margin']:
            self.srng = RandomStreams(seed=234)
        
        
    def params(self):
        return params(self.Encode, self.ToImg)
    
    def __call__(self, input):
        return self.ToImg(last(self.Encode(input)))
    
    def cost(self, target, prediction):
        if self.config['margin']:
            return self.Margin(target, prediction, dist=CosineDistance, d=1)
        elif self.config.get('contrastive'):
            return self.contrastive(target, prediction, margin=0.2)
        else:
            return CosineDistance(target, prediction)
            
    def contrastive(self, i, s, margin=0.2): 
        # i: (fixed) image embedding, 
        # s: sentence embedding
        errors = - util.cosine_matrix(i, s)
        diagonal = errors.diagonal()
        # compare every diagonal score to scores in its column (all contrastive images for each sentence)
        cost_s = T.maximum(0, margin - errors + diagonal)  
        # all contrastive sentences for each image
        cost_i = T.maximum(0, margin - errors + diagonal.reshape((-1, 1)))  
        cost_tot = cost_s + cost_i
        # clear diagonals
        cost_tot = fill_diagonal(cost_tot, 0)

        return cost_tot.mean()
    
    def Margin(self, U, V, dist=CosineDistance, d=1.0):
        V_ = (V[self.srng.permutation(n=T.shape(V)[0],
                                      size=(1,)),]).reshape(T.shape(V))
        # A bit silly making it nondet
        return T.maximum(0.0, dist(U, V) - dist(U, V_) + d)
    
    def args(self, item):
        return (item['input'], item['target_v'])

    def _make_representation(self):
        with context.context(training=False):
            rep = self.Encode(*self.inputs)
        return theano.function(self.inputs, rep)

    def _make_pile(self):
        with context.context(training=False):
            rep = self.Encode.GRU.intermediate(self.Encode.Embed(*self.inputs))
        return theano.function(self.inputs, rep)
예제 #16
0
    def __init__(self, rng, input, n_in, n_batch, d_bucket, activation, activation_deriv,
                 w=None, index_permute=None, index_permute_reverse=None):
        srng = RandomStreams(seed=234)
        
        n_bucket = n_in / d_bucket + 1
        self.input = input

        # randomly permute input space
        if index_permute is None:
            index_permute = srng.permutation(n=n_in)#numpy.random.permutation(n_in)
            index_permute_reverse = T.argsort(index_permute)
            self.index_permute = index_permute
            self.index_permute_reverse = index_permute_reverse

        permuted_input = input[:, index_permute]
        self.permuted_input = permuted_input

        # initialize matrix parameters
        if w is None:
            bound = numpy.sqrt(3. / d_bucket)
            w_values = numpy.asarray(rng.uniform(low=-bound,
                                                 high=bound,
                                                 size=(n_bucket, d_bucket, d_bucket)),
                                     dtype=theano.config.floatX)
            w = theano.shared(value=w_values, name='w')
            
        self.w = w
        
        
        # compute outputs and Jacobians
        
        log_jacobian = T.alloc(0, n_batch)
        for b in xrange(n_bucket):
            bucket_size = d_bucket
            if b == n_bucket - 1:
                bucket_size = n_in - b * d_bucket
            
           
            if b>0:
                prev_input = x_b
                
                """here we warp the previous bucket of inputs and add to the new input"""            

            x_b = self.permuted_input[:, b*d_bucket:b*d_bucket + bucket_size]
            w_b = self.w[b, :bucket_size, :bucket_size]

            if b>0:
                x_b_plus = x_b + m_b
            else:
                x_b_plus = x_b

            Upper = T.triu(w_b)
            Lower = T.tril(w_b)
            Lower = T.extra_ops.fill_diagonal(Lower, 1.)
            log_det_Upper = T.log(T.abs_(T.nlinalg.ExtractDiag()(Upper))).sum() 

            W = T.dot(Upper, Lower)
            log_jacobian = log_jacobian + T.alloc(log_det_Upper, n_batch)

            lin_output_b = T.dot(x_b_plus, W)
            if b>0:
                lin_output = T.concatenate([lin_output, lin_output_b], axis=1)
            else:
                lin_output = lin_output_b
            if activation is not None:
                derivs = activation_deriv(lin_output_b)     
                #import pdb; pdb.set_trace()
                log_jacobian = log_jacobian + T.log(T.abs_(derivs)).sum(axis=1)                 
                    
        self.log_jacobian = log_jacobian        


        self.output = (
            lin_output[:, index_permute_reverse] if activation is None
            else activation(lin_output[:, index_permute_reverse])
        )

        self.params = [w]
예제 #17
0
파일: oldunet.py 프로젝트: afcarl/icon
class UNET(object):
    def __init__(self,
                 id,
                 rng,
                 batch_size,
                 patch_size=572,
                 patch_size_out=388,
                 offline=False,
                 path=None,
                 train_time=5.0,
                 learning_rate=0.01,
                 momentum=0.95):
        self.id = id
        self.type = 'UNET'
        self.offline = offline
        self.done = False
        self.path = path
        self.batchSize = batch_size
        self.patchSize = patch_size
        self.patchSize_out = patch_size_out
        self.learning_rate = learning_rate
        self.momentum = momentum

        self.best_validation_loss = numpy.inf
        self.trainTime = train_time
        self.resample = False
        self.error = np.inf
        self.error_threshold = 0.06
        self.best_val_loss_so_far = 0
        self.patience_counter = 0
        self.patience = 100
        self.patience_reset = 100

        self.doBatchNormAll = False
        self.doFineTune = False

        self.weight_decay = 0.
        self.weight_class_1 = 1.
        self.initialization = 'glorot_uniform'

        self.model = None

        self.srng = RandomStreams(1234)

        self.initialize()

    def initialize(self):
        print 'Unet.initialize'

    def trainiold(self, offline=False, data=None, mean=None, std=None):
        print 'UNET.train()'

        # need to define a custom loss, because all pre-implementations
        # seem to assume that scores over patch add up to one which
        # they clearly don't and shouldn't
        def unet_crossentropy_loss(y_true, y_pred):
            weight_class_1 = 1.
            epsilon = 1.0e-4
            y_pred_clipped = T.clip(y_pred, epsilon, 1.0 - epsilon)
            loss_vector = -T.mean(
                weight_class_1 * y_true * T.log(y_pred_clipped) +
                (1 - y_true) * T.log(1 - y_pred_clipped),
                axis=1)
            average_loss = T.mean(loss_vector)
            return average_loss

        def unet_crossentropy_loss_sampled(y_true, y_pred):
            print 'unet_crossentropy_loss_sampled'
            epsilon = 1.0e-4
            y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0 - epsilon))
            y_true = T.flatten(y_true)
            # this seems to work
            # it is super ugly though and I am sure there is a better way to do it
            # but I am struggling with theano to cooperate
            # filter the right indices
            indPos = T.nonzero(y_true)[0]  # no idea why this is a tuple
            indNeg = T.nonzero(1 - y_true)[0]
            # shuffle
            n = indPos.shape[0]
            indPos = indPos[self.srng.permutation(n=n)]
            n = indNeg.shape[0]
            indNeg = indNeg[self.srng.permutation(n=n)]
            # take equal number of samples depending on which class has less
            n_samples = T.cast(T.min([T.sum(y_true),
                                      T.sum(1 - y_true)]),
                               dtype='int64')

            indPos = indPos[:n_samples]
            indNeg = indNeg[:n_samples]
            loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(
                T.log(1 - y_pred_clipped[indNeg]))
            average_loss = T.mean(loss_vector)
            #return average_loss
            return T.mean(T.log(y_pred_clipped[indPos]))

        # input data should be large patches as prediction is also over large patches
        print
        print "=== building network ==="

        print "== BLOCK 1 =="
        input = Input(shape=(1, self.patchSize, self.patchSize))
        print "input ", input._keras_shape
        block1_act, block1_pool = unet_block_down(
            input=input, nb_filter=64, doBatchNorm=self.doBatchNormAll)
        print "block1 act ", block1_act._keras_shape
        print "block1 ", block1_pool._keras_shape
        #sys.stdout.flush()

        print "== BLOCK 2 =="
        block2_act, block2_pool = unet_block_down(
            input=block1_pool, nb_filter=128, doBatchNorm=self.doBatchNormAll)
        print "block2 ", block2_pool._keras_shape
        #sys.stdout.flush()

        print "== BLOCK 3 =="
        block3_act, block3_pool = unet_block_down(
            input=block2_pool, nb_filter=256, doBatchNorm=self.doBatchNormAll)
        print "block3 ", block3_pool._keras_shape
        #sys.stdout.flush()

        print "== BLOCK 4 =="
        block4_act, block4_pool = unet_block_down(
            input=block3_pool,
            nb_filter=512,
            doDropout=True,
            doBatchNorm=self.doBatchNormAll)
        print "block4 ", block4_pool._keras_shape
        #sys.stdout.flush()

        print "== BLOCK 5 =="
        print "no pooling"
        block5_act, block5_pool = unet_block_down(
            input=block4_pool,
            nb_filter=1024,
            doDropout=True,
            doPooling=False,
            doBatchNorm=self.doBatchNormAll)
        print "block5 ", block5_pool._keras_shape
        #sys.stdout.flush()

        print "=============="
        print

        print "== BLOCK 4 UP =="
        block4_up = unet_block_up(input=block5_act,
                                  nb_filter=512,
                                  down_block_out=block4_act,
                                  doBatchNorm=self.doBatchNormAll)
        print "block4 up", block4_up._keras_shape
        print
        #sys.stdout.flush()

        print "== BLOCK 3 UP =="
        block3_up = unet_block_up(input=block4_up,
                                  nb_filter=256,
                                  down_block_out=block3_act,
                                  doBatchNorm=self.doBatchNormAll)
        print "block3 up", block3_up._keras_shape
        print
        #sys.stdout.flush()

        print "== BLOCK 2 UP =="
        block2_up = unet_block_up(input=block3_up,
                                  nb_filter=128,
                                  down_block_out=block2_act,
                                  doBatchNorm=self.doBatchNormAll)
        print "block2 up", block2_up._keras_shape
        #sys.stdout.flush()

        print
        print "== BLOCK 1 UP =="
        block1_up = unet_block_up(input=block2_up,
                                  nb_filter=64,
                                  down_block_out=block1_act,
                                  doBatchNorm=self.doBatchNormAll)
        print "block1 up", block1_up._keras_shape
        sys.stdout.flush()

        print "== 1x1 convolution =="
        output = Convolution2D(nb_filter=1,
                               nb_row=1,
                               nb_col=1,
                               subsample=(1, 1),
                               init=self.initialization,
                               activation='sigmoid',
                               border_mode="valid")(block1_up)
        print "output ", output._keras_shape
        output_flat = Flatten()(output)
        print "output flat ", output_flat._keras_shape
        model = Model(input=input, output=output_flat)
        #model = Model(input=input, output=block1_act)
        #sys.stdout.flush()
        '''
        if doFineTune:
            model = model_from_json(open('unet_sampling_best.json').read())
            model.load_weights('unet_sampling_best_weights.h5')

        '''

        sgd = SGD(lr=self.learning_rate,
                  decay=0,
                  momentum=self.momentum,
                  nesterov=False)
        #model.compile(loss='mse', optimizer=sgd)
        model.compile(loss=unet_crossentropy_loss_sampled, optimizer=sgd)
        #model.compile(loss=unet_crossentropy_loss, optimizer=sgd)

        print 'sampling data...'
        d = data.sample()
        data_x = d[0]
        data_y = d[1]
        data_x_val = d[2]
        data_y_val = d[3]
        reset = d[4]

        patchSize = self.patchSize
        patchSize_out = self.patchSize_out

        print 'patchSize:', patchSize, 'patchSize_out:', patchSize_out
        data_x_val = np.reshape(data_x_val, [-1, 1, patchSize, patchSize])
        data_x = np.reshape(data_x, [-1, 1, patchSize, patchSize])

        data_label_val = data_y_val
        val_samples = data_y_val.shape[0]
        print data_x.shape, data_y.shape

        print 'got data...'
        print "current learning rate: ", model.optimizer.lr.get_value()
        o = model.fit(data_x, data_y, batch_size=1, nb_epoch=1)
        print o.history["loss"]
        exit(1)

    def train(self, offline=False, data=None, mean=None, std=None):

        print 'Unet.train'

        # input data should be large patches as prediction is also over large patches
        print
        print "=== building network ==="

        print "== BLOCK 1 =="
        input = Input(shape=(1, self.patchSize, self.patchSize))
        print "input ", input._keras_shape
        block1_act, block1_pool = UNET.unet_block_down(
            input=input, nb_filter=64, doBatchNorm=self.doBatchNormAll)
        print "block1 act ", block1_act._keras_shape
        print "block1 ", block1_pool._keras_shape
        #sys.stdout.flush()

        print "== BLOCK 2 =="
        block2_act, block2_pool = UNET.unet_block_down(
            input=block1_pool, nb_filter=128, doBatchNorm=self.doBatchNormAll)
        print "block2 ", block2_pool._keras_shape
        #sys.stdout.flush()

        print "== BLOCK 3 =="
        block3_act, block3_pool = UNET.unet_block_down(
            input=block2_pool, nb_filter=256, doBatchNorm=self.doBatchNormAll)
        print "block3 ", block3_pool._keras_shape
        #sys.stdout.flush()

        print "== BLOCK 4 =="
        block4_act, block4_pool = UNET.unet_block_down(
            input=block3_pool,
            nb_filter=512,
            doDropout=True,
            doBatchNorm=self.doBatchNormAll)
        print "block4 ", block4_pool._keras_shape
        #sys.stdout.flush()

        print "== BLOCK 5 =="
        print "no pooling"
        block5_act, block5_pool = UNET.unet_block_down(
            input=block4_pool,
            nb_filter=1024,
            doDropout=True,
            doPooling=False,
            doBatchNorm=self.doBatchNormAll)
        print "block5 ", block5_pool._keras_shape
        #sys.stdout.flush()

        print "=============="
        print

        print "== BLOCK 4 UP =="
        block4_up = UNET.unet_block_up(input=block5_act,
                                       nb_filter=512,
                                       down_block_out=block4_act,
                                       doBatchNorm=self.doBatchNormAll)
        print "block4 up", block4_up._keras_shape
        print
        #sys.stdout.flush()

        print "== BLOCK 3 UP =="
        block3_up = UNET.unet_block_up(input=block4_up,
                                       nb_filter=256,
                                       down_block_out=block3_act,
                                       doBatchNorm=self.doBatchNormAll)
        print "block3 up", block3_up._keras_shape
        print
        #sys.stdout.flush()

        print "== BLOCK 2 UP =="
        block2_up = UNET.unet_block_up(input=block3_up,
                                       nb_filter=128,
                                       down_block_out=block2_act,
                                       doBatchNorm=self.doBatchNormAll)
        print "block2 up", block2_up._keras_shape
        #sys.stdout.flush()

        print
        print "== BLOCK 1 UP =="
        block1_up = UNET.unet_block_up(input=block2_up,
                                       nb_filter=64,
                                       down_block_out=block1_act,
                                       doBatchNorm=self.doBatchNormAll)
        print "block1 up", block1_up._keras_shape
        sys.stdout.flush()

        print "== 1x1 convolution =="
        output = Convolution2D(nb_filter=1,
                               nb_row=1,
                               nb_col=1,
                               subsample=(1, 1),
                               init=self.initialization,
                               activation='sigmoid',
                               border_mode="valid")(block1_up)
        print "output ", output._keras_shape
        output_flat = Flatten()(output)
        print "output flat ", output_flat._keras_shape

        print 'Unet.train'

        #self.load()

        if self.model == None:
            j_path, w_path = self.get_model_paths()

            if os.path.exists(j_path) and os.path.exists(w_path):
                print 'loading from: ', j_path
                self.model = model_from_json(open(j_path).read())
                self.model.load_weights(w_path)
            else:
                print 'creating....'
                self.model = Model(input=input, output=output_flat)

        sgd = SGD(lr=self.learning_rate,
                  decay=0,
                  momentum=self.momentum,
                  nesterov=False)
        #self.model.compile(loss=UNET.unet_crossentropy_loss_sampled, optimizer=sgd)
        self.model.compile(loss=UNET.unet_crossentropy_loss, optimizer=sgd)

        print 'sampling data...'
        d = data.sample()
        data_x = d[0]
        data_y = d[1]
        data_x_val = d[2]
        data_y_val = d[3]
        reset = d[4]

        patchSize = self.patchSize
        patchSize_out = self.patchSize_out

        print 'patchSize:', patchSize, 'patchSize_out:', patchSize_out
        data_x_val = np.reshape(data_x_val, [-1, 1, patchSize, patchSize])
        data_x = np.reshape(data_x, [-1, 1, patchSize, patchSize])

        data_label_val = data_y_val
        val_samples = data_y_val.shape[0]
        print data_x.shape, data_y.shape

        print 'got data...'
        print "current learning rate: ", self.model.optimizer.lr.get_value()
        self.model.fit(data_x, data_y, batch_size=1, nb_epoch=1)

        im_pred = 1 - self.model.predict(x=data_x_val, batch_size=1)

        print im_pred.shape
        print data_label_val.shape
        print data_x_val.shape

        mean_val_rand = 0.0
        for val_ind in xrange(val_samples):
            im_pred_single = np.reshape(im_pred[val_ind, :],
                                        (patchSize_out, patchSize_out))
            im_gt = np.reshape(data_label_val[val_ind],
                               (patchSize_out, patchSize_out))
            validation_rand = Rand_membrane_prob(im_pred_single, im_gt)
            mean_val_rand += validation_rand
            print 'val:', val_ind, 'rand:', validation_rand, 'mrand:', mean_val_rand
        mean_val_rand /= np.double(val_samples)
        print "validation RAND ", mean_val_rand

        exit(1)
        self.save_current()

        print mean_val_rand, " > ", self.best_val_loss_so_far
        print mean_val_rand - self.best_val_loss_so_far
        if mean_val_rand > self.best_val_loss_so_far:
            self.best_val_loss_so_far = mean_val_rand
            print "NEW BEST MODEL"
            self.save_best()
            self.patience_counter = 0
        else:
            self.patience_counter += 1

        # no progress anymore, need to decrease learning rate
        if self.patience_counter == self.patience:
            print "DECREASING LEARNING RATE"
            print "before: ", learning_rate
            learning_rate *= 0.1
            print "now: ", learning_rate
            self.model.optimizer.lr.set_value(learning_rate)
            self.patience = self.patience_reset
            self.patience_counter = 0

            # reload best state seen so far
            self.model = self.load()
            '''
            model = model_from_json(open(filename+'.json').read())
            model.load_weights(filename+'_weights.h5')
            model.compile(loss=unet_crossentropy_loss_sampled, optimizer=sgd)
            '''

    def train_offline(self, data, mean=None, std=None):
        pass

    def classify(self, image, mean=None, std=None):
        print 'Unet.classify'

    def predict(self, image, mean=None, std=None, threshold=0.5):
        print 'Unet.predict'
        start_time = time.clock()

        j_path, w_path = self.get_model_paths()

        print 'loading model from:', j_path

        model = model_from_json(open(j_path).read())
        model.load_weights(w_path)
        sgd = SGD(lr=0.01, decay=0, momentum=0.0, nesterov=False)
        model.compile(loss='categorical_crossentropy', optimizer=sgd)

        image = image - 0.5

        probImage = np.zeros(image.shape)
        # count compilation time to init
        row = 0
        col = 0
        patch = image[row:row + patchSize, col:col + patchSize]
        data = np.reshape(patch, (1, 1, patchSize, patchSize))
        probs = model.predict(x=data, batch_size=1)

        init_time = time.clock()
        #print "Initialization took: ", init_time - start_time

        patchSize = self.patchSize
        patchSize_out = self.patchSize_out
        image_orig = image.copy()
        for rotation in range(1):
            image = np.rot90(image_orig, rotation)
            # pad the image
            padding_ul = int(np.ceil((patchSize - patchSize_out) / 2.0))
            # need large padding for lower right corner
            paddedImage = np.pad(image, patchSize, mode='reflect')
            needed_ul_padding = patchSize - padding_ul
            paddedImage = paddedImage[needed_ul_padding:, needed_ul_padding:]

            probImage_tmp = np.zeros(image.shape)
            for row in xrange(0, image.shape[0], patchSize_out):
                for col in xrange(0, image.shape[1], patchSize_out):
                    patch = paddedImage[row:row + patchSize,
                                        col:col + patchSize]
                    data = np.reshape(patch, (1, 1, patchSize, patchSize))
                    probs = 1 - model.predict(x=data, batch_size=1)
                    probs = np.reshape(probs, (patchSize_out, patchSize_out))

                    row_end = patchSize_out
                    if row + patchSize_out > probImage.shape[0]:
                        row_end = probImage.shape[0] - row
                    col_end = patchSize_out
                    if col + patchSize_out > probImage.shape[1]:
                        col_end = probImage.shape[1] - col

                    probImage_tmp[row:row + row_end, col:col +
                                  col_end] = probs[:row_end, :col_end]
            probImage += np.rot90(probImage_tmp, 4 - rotation)

        probImage = probImage / 1.0

        prob = self.threshold(probImage, factor=threshold)
        prob = prob.astype(dtype=int)
        prob = prob.flatten()

        end_time = time.clock()

        print "Prediction took: ", end_time - init_time
        print "Speed: ", 1. / (end_time - init_time)
        print "Time total: ", end_time - start_time

        print 'results :', np.bincount(prob)
        print prob.shape
        print prob
        return prob

    def threshold(self, prob, factor=0.5):
        prob[prob >= factor] = 9
        prob[prob < factor] = 0
        prob[prob == 9] = 1
        return prob

    def get_model_paths(self):
        path = self.get_path()
        j_path = '%s_best.json' % (path)
        w_path = '%s_best_weights.h5' % (path)

        # first, attempt the best model, otherwise default to the latest
        if not os.path.exists(j_path) and not os.path.exists(w_path):
            path = Utility.get_dir(self.path)
            j_path = '%s/%s_%s.json' % (Paths.Models, self.id, self.type)
            w_path = '%s/%s_%s_weights.h5' % (Paths.Models, self.id, self.type)

        return j_path.lower(), w_path.lower()

    def load(self):
        j_path, w_path = self.get_model_paths()

        if os.path.exists(j_path) and os.path.exists(w_path):
            print 'loading from: ', j_path
            self.model = model_from_json(open(j_path).read())
            self.model.load_weights(w_path)
        else:
            print 'creating....'
            inp, out = self.gen_input_output()
            print inp.shape, out.shape
            self.model = Model(input=inp, output=out)

        sgd = SGD(lr=self.learning_rate,
                  decay=0,
                  momentum=self.momentum,
                  nesterov=False)
        self.model.compile(loss=UNET.unet_crossentropy_loss_sampled,
                           optimizer=sgd)

    def save_current(self):
        path = Utility.get_dir(self.path)
        j_path = '%s/%s_%s.json' % (Paths.Models, self.id, self.type)
        w_path = '%s/%s_%s_weights.h5' % (Paths.Models, self.id, self.type)
        j_path = j_path.lower()
        w_path = w_path.lower()

        json_string = self.model.to_json()
        open(j_path, 'w').write(json_string)
        self.model.save_weights(w_path, overwrite=True)

    def save_best(self):
        print 'Unet.save'
        path = Utility.get_dir(self.path)
        revision = 0
        if not self.offline:
            revision = DB.getRevision(self.id)
            revision = (revision + 1) % 10
            path = '%s/%s_%s_%d' % (Paths.Models, self.id, self.type, revision)
            path = path.lower()

        j_path = '%s_best.json' % (path)
        w_path = '%s_best_weights.h5' % (path)
        j_path = j_path.lower()
        w_path = w_path.lower()

        print 'saving...', path
        # saving code here...
        json_string = self.model.to_json()
        open(j_path, 'w').write(json_string)
        self.model.save_weights(w_path, overwrite=True)

        if not self.offline:
            DB.finishSaveModel(self.id, revision)

    def get_path(self):
        if self.offline:
            return self.path

        rev = DB.getRevision(self.id)
        path = '%s/%s.%s.%d' % (Paths.Models, self.id, self.type, rev)
        return path.lower()

    def reportTrainingStats(self,
                            elapsedTime,
                            batchIndex,
                            valLoss,
                            trainCost,
                            mode=0):
        DB.storeTrainingStats(self.id, valLoss, trainCost, mode=mode)
        msg = '(%0.1f)     %i     %f%%'%\
        (
           elapsedTime,
           batchIndex,
           valLoss
        )
        status = '[%f]' % (trainCost)
        Utility.report_status(msg, status)

    # need to define a custom loss, because all pre-implementations
    # seem to assume that scores over patch add up to one which
    # they clearly don't and shouldn't
    @staticmethod
    def unet_crossentropy_loss(y_true, y_pred):
        weight_class_1 = 1.
        epsilon = 1.0e-4
        y_pred_clipped = T.clip(y_pred, epsilon, 1.0 - epsilon)
        loss_vector = -T.mean(weight_class_1 * y_true * T.log(y_pred_clipped) +
                              (1 - y_true) * T.log(1 - y_pred_clipped),
                              axis=1)
        average_loss = T.mean(loss_vector)
        return average_loss

    @staticmethod
    def unet_crossentropy_loss_sampled(y_true, y_pred):
        epsilon = 1.0e-4
        y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0 - epsilon))
        y_true = T.flatten(y_true)
        # this seems to work
        # it is super ugly though and I am sure there is a better way to do it
        # but I am struggling with theano to cooperate
        # filter the right indices
        indPos = T.nonzero(y_true)[0]  # no idea why this is a tuple
        indNeg = T.nonzero(1 - y_true)[0]
        # shuffle
        n = indPos.shape[0]
        indPos = indPos[UNET.srng.permutation(n=n)]
        n = indNeg.shape[0]
        indNeg = indNeg[UNET.srng.permutation(n=n)]
        # take equal number of samples depending on which class has less
        n_samples = T.cast(T.min([T.sum(y_true),
                                  T.sum(1 - y_true)]),
                           dtype='int64')

        indPos = indPos[:n_samples]
        indNeg = indNeg[:n_samples]
        loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(
            T.log(1 - y_pred_clipped[indNeg]))
        average_loss = T.mean(loss_vector)
        return average_loss

    @staticmethod
    def unet_block_down(input,
                        nb_filter,
                        doPooling=True,
                        doDropout=False,
                        doBatchNorm=False,
                        initialization='glorot_uniform',
                        weight_decay=0.):
        # first convolutional block consisting of 2 conv layers plus activation, then maxpool.
        # All are valid area, not same
        act1 = Convolution2D(nb_filter=nb_filter,
                             nb_row=3,
                             nb_col=3,
                             subsample=(1, 1),
                             init=initialization,
                             activation='relu',
                             border_mode="valid",
                             W_regularizer=l2(weight_decay))(input)
        if doBatchNorm:
            act1 = BatchNormalization(mode=0, axis=1)(act1)

        act2 = Convolution2D(nb_filter=nb_filter,
                             nb_row=3,
                             nb_col=3,
                             subsample=(1, 1),
                             init=initialization,
                             activation='relu',
                             border_mode="valid",
                             W_regularizer=l2(weight_decay))(act1)
        if doBatchNorm:
            act2 = BatchNormalization(mode=0, axis=1)(act2)

        if doDropout:
            act2 = Dropout(0.5)(act2)

        if doPooling:
            # now downsamplig with maxpool
            pool1 = MaxPooling2D(pool_size=(2, 2),
                                 strides=(2, 2),
                                 border_mode="valid")(act2)
        else:
            pool1 = act2

        return (act2, pool1)

    # need to define lambda layer to implement cropping
    # input is a tensor of size (batchsize, channels, width, height)
    @staticmethod
    def crop_layer(x, cs):
        cropSize = cs
        return x[:, :, cropSize:-cropSize, cropSize:-cropSize]

    @staticmethod
    def unet_block_up(input,
                      nb_filter,
                      down_block_out,
                      doBatchNorm=False,
                      initialization='glorot_uniform',
                      weight_decay=0.):
        print "This is unet_block_up"
        print "input ", input._keras_shape
        # upsampling
        up_sampled = UpSampling2D(size=(2, 2))(input)
        print "upsampled ", up_sampled._keras_shape
        # up-convolution
        conv_up = Convolution2D(nb_filter=nb_filter,
                                nb_row=2,
                                nb_col=2,
                                subsample=(1, 1),
                                init=initialization,
                                activation='relu',
                                border_mode="same",
                                W_regularizer=l2(weight_decay))(up_sampled)
        print "up-convolution ", conv_up._keras_shape
        # concatenation with cropped high res output
        # this is too large and needs to be cropped
        print "to be merged with ", down_block_out._keras_shape

        #padding_1 = int((down_block_out._keras_shape[2] - conv_up._keras_shape[2])/2)
        #padding_2 = int((down_block_out._keras_shape[3] - conv_up._keras_shape[3])/2)
        #print "padding: ", (padding_1, padding_2)
        #conv_up_padded = ZeroPadding2D(padding=(padding_1, padding_2))(conv_up)
        #merged = merge([conv_up_padded, down_block_out], mode='concat', concat_axis=1)

        cropSize = int(
            (down_block_out._keras_shape[2] - conv_up._keras_shape[2]) / 2)
        down_block_out_cropped = Lambda(UNET.crop_layer,
                                        output_shape=conv_up._keras_shape[1:],
                                        arguments={"cs":
                                                   cropSize})(down_block_out)
        print "cropped layer size: ", down_block_out_cropped._keras_shape
        merged = merge([conv_up, down_block_out_cropped],
                       mode='concat',
                       concat_axis=1)

        print "merged ", merged._keras_shape
        # two 3x3 convolutions with ReLU
        # first one halves the feature channels
        act1 = Convolution2D(nb_filter=nb_filter,
                             nb_row=3,
                             nb_col=3,
                             subsample=(1, 1),
                             init=initialization,
                             activation='relu',
                             border_mode="valid",
                             W_regularizer=l2(weight_decay))(merged)

        if doBatchNorm:
            act1 = BatchNormalization(mode=0, axis=1)(act1)

        print "conv1 ", act1._keras_shape
        act2 = Convolution2D(nb_filter=nb_filter,
                             nb_row=3,
                             nb_col=3,
                             subsample=(1, 1),
                             init=initialization,
                             activation='relu',
                             border_mode="valid",
                             W_regularizer=l2(weight_decay))(act1)
        if doBatchNorm:
            act2 = BatchNormalization(mode=0, axis=1)(act2)

        print "conv2 ", act2._keras_shape

        return act2
예제 #18
0
#mBGsub = BGsubstract.BGsubstract(x_activ)#load pretrained and continue training
#get what we neet to define loss
p_fb_flat_train = mBGsub.p_fb_flat_train
p_fb_flat_test = mBGsub.p_fb_flat_test
params = mBGsub.params
#get what we neet to check test
p_fb = mBGsub.p_fb

#define cost function to optimize
y_activ_flat = y_activ.dimshuffle(0,2,3,1).reshape((y_activ.shape[0]*y_activ.shape[2]*y_activ.shape[3],y_activ.shape[1]))

#take on all image
#cost = T.mean(T.nnet.categorical_crossentropy(p_fb, y_activ_flat))
#or take only a few pixels in image
nbRandomSamples = 100
permutations_samples = srng.permutation(n=p_fb_flat_train.shape[0], size=(1,))[0]#create a vector of size (1,shape)
cost_train = T.mean(T.nnet.categorical_crossentropy(p_fb_flat_train[permutations_samples[0:nbRandomSamples]], y_activ_flat[permutations_samples[0:nbRandomSamples]]))
cost_pred = T.mean(T.nnet.categorical_crossentropy(p_fb_flat_test[permutations_samples[0:nbRandomSamples]], y_activ_flat[permutations_samples[0:nbRandomSamples]]))

#updates = Optimisation.momentum(cost, params, learning_rate=0.0001, momentum=0.9)
updates = Optimisation.adam(cost_train, params, learn_rate = 0.0005)

#reshape p_fb for printing
downsampled_x_rgb = x_activ[:,0:3]

# compile theano functions
train = theano.function([x, y], cost_train, updates=updates)
getCost = theano.function([x, y], cost_pred)
getRGBdownsampled = theano.function([x], downsampled_x_rgb)
predict = theano.function([x], p_fb)
예제 #19
0
    def __init__(self, rng, input, n_in, n_batch, d_bucket, n_reflections, activation, activation_deriv,
                 w=None, index_permute=None, index_permute_reverse=None):
        srng = RandomStreams(seed=234)
        
        n_bucket = n_in / d_bucket + 1
        self.input = input

        # randomly permute input space
        if index_permute is None:
            index_permute = srng.permutation(n=n_in)#numpy.random.permutation(n_in)
            index_permute_reverse = T.argsort(index_permute)
            self.index_permute = index_permute
            self.index_permute_reverse = index_permute_reverse

        permuted_input = input[:, index_permute]
        self.permuted_input = permuted_input

        # initialize reflection parameters
        if w is None:
            w_values = numpy.asarray(rng.uniform(low=-1,
                                                 high=1,
                                                 size=(n_bucket, n_reflections, d_bucket)),
                                     dtype=theano.config.floatX)
            w = theano.shared(value=w_values, name='w')
            
        self.w = w
        
        
        # compute outputs and Jacobians
        
        log_jacobian = T.alloc(0, n_batch)
        for b in xrange(n_bucket):
            bucket_size = d_bucket
            if b == n_bucket - 1:
                #import pdb; pdb.set_trace()
                bucket_size = n_in - b * d_bucket
            
            x_b = self.permuted_input[:, b*d_bucket:b*d_bucket + bucket_size]
            for r in xrange(n_reflections):
                w_b_r = w[b, r, :bucket_size]
                if r>0:
                    Wtemp = T.eye(bucket_size) \
                        - 2 * T.outer(w_b_r, w_b_r) / ((w_b_r ** 2).sum())
                    W = T.dot(W, Wtemp)
#                    import pdb; pdb.set_trace()
                else:
                    W = T.eye(bucket_size) - 2 * T.outer(w_b_r, w_b_r) / ((w_b_r ** 2).sum())

            lin_output_b = T.dot(x_b, W)
            if b>0:
                lin_output = T.concatenate([lin_output, lin_output_b], axis=1)
            else:
                lin_output = lin_output_b
            if activation is not None:
                derivs = activation_deriv(lin_output_b)
                log_jacobian = log_jacobian + T.log(T.abs_(derivs)).sum(axis=1)
 
#                for n in xrange(n_batch):
#                    mat = T.tile(T.reshape(derivs[n], [1, bucket_size]), (bucket_size, 1))
#                    mat = mat * W
                    
#                    T.inc_subtensor(log_jacobian[n], T.log(T.abs_(T.nlinalg.Det()(mat))))
                    
        self.log_jacobian = log_jacobian        

        self.output = (
            lin_output if activation is None
            else activation(lin_output)
        )

        self.params = [w]
예제 #20
0
class DBenGurionOCR(object):
    """
    Constructor para uso productivo
    """
    def __init__(self):
        return

    """
    Constructor para validacion
    """

    @classmethod
    def Validator(self, id_experiment, layers_metaData, batch_size,
                  raw_data_set, logger, weigthts_service, experimentsRepo,
                  initial_weights):
        self.idExperiment = id_experiment
        self.logger = logger
        self.weigthts_service = weigthts_service
        self.experimentsRepo = experimentsRepo

        self.x = T.tensor4('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')

        index = T.lscalar()

        random_droput = np.random.RandomState(1234)
        rng_droput = T.shared_randomstreams.RandomStreams(
            random_droput.randint(999999))

        rawXDataSet = raw_data_set[0]
        rawYDataSet = raw_data_set[1]
        self.totalDataSize = rawXDataSet.shape[0]
        self.no_batchs_in_data_set = self.totalDataSize // batch_size

        # batch_size = 50000
        # img_input =  x #T.reshape(x,(batch_size, 1, 28, 28))
        self.CNN = DBenGurionArchitecture.DBenGurionArchitecture(
            image_input=self.x,
            batch_size=batch_size,
            layers_metaData=layers_metaData,
            initWeights=initial_weights,
            srng=rng_droput,
            no_channels_imageInput=1,
            isTraining=1)

        XimgLetras = np.asarray(rawXDataSet,
                                dtype=theano.config.floatX).reshape(
                                    (self.totalDataSize, 1, 64, 64))
        XimgLetrasShared = theano.shared(XimgLetras)

        YimgLetras = np.asarray(rawYDataSet, dtype=np.int32)
        YimgLetrasShared = theano.shared(YimgLetras)

        cost = self.CNN.SoftMax_1.negative_log_likelihood(self.y)
        self.evaluate_model_with_cost = theano.function(
            [index],
            cost,
            givens={
                self.x:
                XimgLetrasShared[index * batch_size:(index + 1) * batch_size],
                self.y:
                YimgLetrasShared[index * batch_size:(index + 1) * batch_size]
            })

        error = self.CNN.SoftMax_1.errors(self.y)
        self.evaluate_model_with_error = theano.function(
            [index],
            error,
            givens={
                self.x:
                XimgLetrasShared[index * batch_size:(index + 1) * batch_size],
                self.y:
                YimgLetrasShared[index * batch_size:(index + 1) * batch_size]
            })
        return self()

    """
    Constructor para Entrenamiento
    """

    @classmethod
    def Trainer(self, id_experiment, layers_metaData, batch_size,
                raw_train_set, logger, weigthts_service, experimentsRepo,
                initial_weights, max_epochs, with_lr_decay, learning_rate,
                saveWeigthsFrecuency, frecuency_lr_decay, p_DropOut):
        self.idExperiment = id_experiment
        self.logger = logger
        self.max_epochs = max_epochs
        self.with_lr_decay = with_lr_decay
        self.learning_rate = float(learning_rate)
        self.weigthts_service = weigthts_service
        self.saveWeigthsFrecuency = saveWeigthsFrecuency
        self.frecuency_lr_decay = frecuency_lr_decay
        self.experimentsRepo = experimentsRepo
        self.theano_rng = RandomStreams(123)

        self.x = T.tensor4('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')
        learningRate = T.fscalar()
        index = T.lscalar()

        random_droput = np.random.RandomState(1234)
        rng_droput = T.shared_randomstreams.RandomStreams(
            random_droput.randint(999999))

        rawXTrainingDataSet = raw_train_set[0]
        rawYTrainingDataSet = raw_train_set[1]
        self.trainDataSetSize = rawXTrainingDataSet.shape[0]
        self.no_batchs_in_data_set = self.trainDataSetSize // batch_size

        # batch_size = 50000
        # img_input =  x #T.reshape(x,(batch_size, 1, 28, 28))
        self.CNN = DBenGurionArchitecture.DBenGurionArchitecture(
            image_input=self.x,
            batch_size=batch_size,
            layers_metaData=layers_metaData,
            initWeights=initial_weights,
            srng=rng_droput,
            no_channels_imageInput=1,
            isTraining=1,
            pDropOut=p_DropOut)

        XimgLetras = np.asarray(rawXTrainingDataSet,
                                dtype=theano.config.floatX).reshape(
                                    (self.trainDataSetSize, 1, 64, 64))
        XimgLetrasShared = theano.shared(XimgLetras)

        YimgLetras = np.asarray(rawYTrainingDataSet, dtype=np.int32)
        YimgLetrasShared = theano.shared(YimgLetras)

        #cost = self.CNN.SoftMax_1.cost_function(y)
        cost = self.CNN.SoftMax_1.negative_log_likelihood(self.y)

        error = self.CNN.SoftMax_1.errors(self.y)
        #error = self.CNN.SoftMax_1.(y)

        weights = [
            self.CNN.conv1.Filter, self.CNN.conv2.Filter,
            self.CNN.conv3.Filter, self.CNN.conv4.Filter,
            self.CNN.conv5.Filter, self.CNN.conv6.Filter, self.CNN.FC_1.Filter,
            self.CNN.FC_1.Bias, self.CNN.FC_2.Filter, self.CNN.FC_2.Bias,
            self.CNN.SoftMax_1.Filter, self.CNN.SoftMax_1.Bias
        ]

        grads = T.grad(cost, weights, disconnected_inputs="raise")

        updates = [(param_i, param_i + (learningRate * grad_i))
                   for param_i, grad_i in zip(weights, grads)]

        #errors = self.CNN.SoftMax_1.

        self.train_model = theano.function(
            [index, learningRate],
            cost,
            updates=updates,
            givens={
                self.x:
                XimgLetrasShared[index * batch_size:(index + 1) * batch_size],
                self.y:
                YimgLetrasShared[index * batch_size:(index + 1) * batch_size]
            })

        return self()

    def GetWeigthsValuesByLayer(self, layer):
        if layer is LayerEnum.LayerEnum.conv1:
            return np.asarray(self.CNN.conv1.Filter.get_value(),
                              dtype=theano.config.floatX)
        if layer is LayerEnum.LayerEnum.conv2:
            return np.asarray(self.CNN.conv2.Filter.get_value(),
                              dtype=theano.config.floatX)
        if layer is LayerEnum.LayerEnum.conv3:
            return np.asarray(self.CNN.conv3.Filter.get_value(),
                              dtype=theano.config.floatX)
        if layer is LayerEnum.LayerEnum.conv4:
            return np.asarray(self.CNN.conv4.Filter.get_value(),
                              dtype=theano.config.floatX)
        if layer is LayerEnum.LayerEnum.conv5:
            return np.asarray(self.CNN.conv5.Filter.get_value(),
                              dtype=theano.config.floatX)
        if layer is LayerEnum.LayerEnum.conv6:
            return np.asarray(self.CNN.conv6.Filter.get_value(),
                              dtype=theano.config.floatX)
        elif layer is LayerEnum.LayerEnum.FC_1:
            return (np.asarray(self.CNN.FC_1.Filter.get_value(),
                               dtype=theano.config.floatX),
                    np.asarray(self.CNN.FC_1.Bias.get_value(),
                               dtype=theano.config.floatX))
        elif layer is LayerEnum.LayerEnum.FC_2:
            return (np.asarray(self.CNN.FC_2.Filter.get_value(),
                               dtype=theano.config.floatX),
                    np.asarray(self.CNN.FC_2.Bias.get_value(),
                               dtype=theano.config.floatX))
        elif layer is LayerEnum.LayerEnum.SoftMax_1:
            return (np.asarray(self.CNN.SoftMax_1.Filter.get_value(),
                               dtype=theano.config.floatX),
                    np.asarray(self.CNN.SoftMax_1.Bias.get_value(),
                               dtype=theano.config.floatX))

    def Train(self, current_epoch=0, id_train='', extra_info=''):
        for epoch_index in range(self.max_epochs):
            if epoch_index < current_epoch:  # hacemos esta verificacion pues solo tiene sentido iniciar en una epoca diferente cuando existen pesos iniciales (para reanudar)
                continue

            if epoch_index != 0 and self.with_lr_decay == True and epoch_index % self.frecuency_lr_decay == 0:
                self.learning_rate *= 0.1
            elif self.with_lr_decay == False:
                decreaseNow = self.experimentsRepo.ObtenerDecreaseNow()
                increaseNow = self.experimentsRepo.ObtenerIncreaseNow()
                if decreaseNow == True:
                    self.experimentsRepo.UpdateLearningRate(self.learning_rate)
                    self.experimentsRepo.SetFalseDecreaseNow()
                    self.learning_rate *= 0.1
                    print("Decremento mandatorio, learning rate: " +
                          str(self.learning_rate))
                elif increaseNow == True:
                    self.experimentsRepo.UpdateLearningRate(self.learning_rate)
                    self.experimentsRepo.SetFalseIncreaseNow()
                    self.learning_rate /= 0.1

            newOrder = self.theano_rng.permutation(n=self.trainDataSetSize,
                                                   size=(1, )),
            self.x = self.x[newOrder]
            self.y = self.y[newOrder]

            #Recorremos todo el dataset dividido en n Batches
            for batch_index in range(self.no_batchs_in_data_set):
                cost = self.train_model(batch_index, self.learning_rate)
                print("costo: " + str(cost) + " epoca: " + str(epoch_index) +
                      " Batch: " + str(batch_index) + "/" +
                      str(self.no_batchs_in_data_set) + " Learning Rate: " +
                      str(self.learning_rate))
                self.logger.LogTrain(cost, str(epoch_index), str(batch_index),
                                     str(self.learning_rate))
                #self.logger.Log(str(cost), "costo", str(epoch_index), str(batch_index), id_train,
                #                "learning rate: " + str(self.learning_rate) + "," + extra_info)
            if (epoch_index + 1) % self.saveWeigthsFrecuency == 0:
                self.SaveWeights(epoch_index, batch_index, -1)

    def SaveWeights(self,
                    epoch,
                    batch,
                    iteration,
                    cost=0,
                    error=0,
                    costVal=0,
                    errorVal=0,
                    costTest=0,
                    errorTest=0):
        allWeiths = {
            "conv1Values":
            self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.conv1),
            "conv2Values":
            self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.conv2),
            "conv3Values":
            self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.conv3),
            "conv4Values":
            self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.conv4),
            "conv5Values":
            self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.conv5),
            "conv6Values":
            self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.conv6),
            "FC1Values":
            self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.FC_1)[0],
            "FC1BiasValues":
            self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.FC_1)[1],
            "FC2Values":
            self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.FC_2)[0],
            "FC2BiasValues":
            self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.FC_2)[1],
            "SoftMax1Values":
            self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.SoftMax_1)[0],
            "SoftMax1BiasValues":
            self.GetWeigthsValuesByLayer(LayerEnum.LayerEnum.SoftMax_1)[1]
        }

        hyper_params = "learning rate: " + str(self.learning_rate)
        # = [c1_v,c3_v,fc5v_v,fc5b_v,fc6v,fc6b_v]
        self.weigthts_service.SaveWeights(allWeiths, self.idExperiment, epoch,
                                          batch, iteration, hyper_params, cost,
                                          error, costVal, errorVal, costTest,
                                          errorTest)

        return

    def CalculateCost(self, noBatchsToEvaluate=-1):

        if noBatchsToEvaluate == -1:
            noBatchsToEvaluate = self.no_batchs_in_data_set
        sumaCost = 0.0
        for batch_index in range(noBatchsToEvaluate):
            cost = self.evaluate_model_with_cost(batch_index)
            print("calculando costos: costo: " + str(cost) + " en batch: " +
                  str(batch_index))
            sumaCost = sumaCost + cost
        promedio = sumaCost / noBatchsToEvaluate
        return promedio

    def CalculateError(self, noBatchsToEvaluate=-1):

        if noBatchsToEvaluate == -1:
            noBatchsToEvaluate = self.no_batchs_in_data_set

        sumaCost = 0.0
        for batch_index in range(noBatchsToEvaluate):
            error = self.evaluate_model_with_error(batch_index)
            print("calculando costos: errores: " + str(error) + " en batch: " +
                  str(batch_index))
            sumaCost = sumaCost + error
        promedio = sumaCost / noBatchsToEvaluate
        return promedio
def do_gd(train_set, etaVal, epochs, layers, batch_size=100, scale=1):
    '''
    batch_size = 100
    '''
    SEED = 5318
    np.random.seed(SEED)
    X = T.matrix('X')
    Y = T.ivector('Y')
    index = T.lscalar('index')
    eta = T.fscalar('eta')
    
    n_in = layers[0]
    n_out = layers[-1]

    trainX, trainY = train_set
    dataset_size = trainX.get_value(borrow=True).shape[0]


    classifier = MLP(
                    rng = np.random.RandomState(SEED),
                    inpt = X,
                    layers = layers,
                    scale = scale
                )
    cost = classifier.negative_log_likelihood(Y)

    gparams = [T.grad(cost, param) for param in classifier.params]

    train_model = theano.function(
                 inputs = [index, eta],
                 outputs = cost,
                 updates = [(param, param - eta * gparam) 
                    for param, gparam in zip(classifier.params, gparams)],
                 givens = {
                         X : trainX[index * batch_size : (index + 1) * batch_size],
                         Y : trainY[index * batch_size : (index + 1) * batch_size]
                     }
             )

    # train_model = theano.function(
    #              inputs = [index, eta],
    #              outputs = cost,
    #              updates = [(param, param - eta * gparam) 
    #                  for param, gparam in zip(classifier.params, gparams)],
    #              givens = {
    #                      X : trainX[index],
    #                      Y : trainY[index]
    #                  }
    #          )
    
    # pydotprint(train_model,'./test.png')
    # d3v.d3viz(train_model,'./test.html')
    cost = []
    n_batches = int(dataset_size / batch_size)
    print dataset_size
    ANNEAL = 10*dataset_size # rate at which learning parameter "eta" is reduced as iterations increase ( momentum )
    print("Anneal = {}".format(ANNEAL))
    
    start_time = timeit.default_timer()
    learn_rate = etaVal
    for epoch in xrange(epochs):
        # shuffle data, reset the seed so that trainX and trainY are randomized
        # the same way
        theano_seed = int(np.random.rand()*100)
        theano_rng = RandomStreams(theano_seed)
        trainX = trainX[theano_rng.permutation(n=dataset_size, size=(1,)),]
        theano_rng = RandomStreams(theano_seed)
        trainY = trainY[theano_rng.permutation(n=dataset_size, size=(1,)),]

        for batch_idx in xrange(n_batches):
            cost.append(np.mean(np.asarray([train_model(batch_idx, learn_rate)])))

        time_check = timeit.default_timer()
        iteration = (epoch * batch_idx) + batch_idx
        print("epoch={}, mean cost={}, total_time(mins)={}, eta={}, iters={}".format(epoch, np.mean(cost[-n_batches:]), (time_check - start_time)/60.0, learn_rate, iteration))
        # Search and then converge
        learn_rate = etaVal / ( 1.0 + (iteration*1.0 / ANNEAL))

    print("Eta = {}, Cost Last= {} Mean last 10 Costs = {}".format(
            eta, cost[-1], np.mean(cost[-10:])) 
         )
    return np.mean(cost[-10:])