Esempio n. 1
0
def test_mnist():     
    import os
    os.environ['CUDA_VISIBLE_DEVICES']="2"
    os.environ['TF_FORCE_GPU_ALLOW_GROWTH']="true"
    #os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 
    import keras
    from keras.datasets import mnist,cifar10
    from keras.models import Sequential
    from keras.layers import Dense, Dropout, Flatten, Input, Add, ReLU
    from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, GlobalAveragePooling2D
    from keras.layers import BatchNormalization,Activation, Concatenate
    from keras.regularizers import l2,l1
    from keras.callbacks import LearningRateScheduler, ModelCheckpoint, TensorBoard
    from keras_preprocessing.image import ImageDataGenerator
    from keras.optimizers import SGD, Adadelta
    #from keras.initializers import glorot_uniform as w_ini
    from keras.initializers import he_uniform as w_ini
    from keras.initializers import VarianceScaling as VS_ini
    from keras import backend as K
    from keras_utils import RecordVariable, PrintLayerVariableStats, SGDwithLR
    
    #config = tf.ConfigProto()
    #config.gpu_options.allow_growth = True
    #config.gpu_options.per_process_gpu_memory_fraction = 0.1
   # Create a session with the above options specified.
    #K.tensorflow_backend.set_session(tf.Session(config=config))
    
    sid = 9
    #sess = K.get_session()
    K.clear_session()
    #sess = tf.Session(graph=g)
    #K.set_session(sess)
    np.random.seed(sid)
    tf.random.set_random_seed(sid)
    tf.compat.v1.random.set_random_seed(sid)
    
    
    #dset='cifar10'
    
    dset = 'mnist'
    batch_size = 512
    num_classes = 10
    epochs =200
    test_acnn = True
    regulazer = None
    prfw = 5
    fw = 5
    residual = False
    data_augmentation = False
    
    if dset=='mnist':
        # input image dimensions
        img_rows, img_cols = 28, 28  
        # the data, split between train and test sets
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        n_channels=1
    
    elif dset=='cifar10':    
        img_rows, img_cols = 32,32
        n_channels=3
        
        (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    
    
    if K.image_data_format() == 'channels_first':
        x_train = x_train.reshape(x_train.shape[0], n_channels, img_rows, img_cols)
        x_test = x_test.reshape(x_test.shape[0], n_channels, img_rows, img_cols)
        input_shape = (n_channels, img_rows, img_cols)
    else:
        x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, n_channels)
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, n_channels)
        input_shape = (img_rows, img_cols, n_channels)
            
    
        
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255
    x_test /= 255
    trn_mn = np.mean(x_train, axis=0)
    x_train -= trn_mn
    x_test -= trn_mn
    print('x_train shape:', x_train.shape)
    print(x_train.shape[0], 'train samples')
    print(x_test.shape[0], 'test samples')
    
    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    
    network=[]
    network.append(Input(shape=input_shape))

#    if test_acnn:
#        
#        prev_layer = network[-1]
#        
#        
#        conv_node = Conv2DAdaptive(rank=2,nfilters=32,kernel_size=(fw,fw), 
#                                data_format='channels_last',strides=1,
#                                padding='same',name='acnn-1', activation='linear',
#                                trainSigmas=True, trainWeights=True, 
#                                init_sigma=[0.15,1.0],
#                                gain = np.sqrt(1.0),
#                                kernel_regularizer=None,
#                                init_bias=initializers.Constant(0))(prev_layer)
#        if residual:
#            network.append(Add()([conv_node,prev_layer]))
#        else:
#            network.append(conv_node)
#        #, input_shape=input_shape))
#    else:
#        
#        network.append(Conv2D(24, (fw, fw), activation='linear', 
#                         kernel_initializer=w_ini(), 
#                         kernel_regularizer=None,
#                         padding='same')(network[-1]))
        
    network.append(Conv2D(32, kernel_size=(prfw, prfw),
                       activation='linear',padding='same', kernel_initializer=w_ini(),
                       kernel_regularizer=regulazer)(network[-1]))
    network.append(BatchNormalization()(network[-1]))
    network.append(Activation('relu')(network[-1]))
    network.append(Dropout(0.2)(network[-1]))
    network.append(Conv2D(32, (prfw, prfw), activation='linear', 
                     kernel_initializer=w_ini(), padding='same',
                     kernel_regularizer=regulazer)(network[-1]))
    #odel.add(MaxPooling2D(pool_size=(2, 2)))
    network.append(BatchNormalization()(network[-1]))
    network.append(Activation('relu')(network[-1]))
    
    network.append(Dropout(0.2)(network[-1]))
    
#    model.add(Conv2D(32, (3, 3), activation='linear', 
#                     kernel_initializer=w_ini(), padding='same',
#                     kernel_regularizer=regulazer))
#    #odel.add(MaxPooling2D(pool_size=(2, 2)))
#    model.add(BatchNormalization())
#    model.add(Activation('relu'))
#    
#    model.add(Dropout(0.2))
#    
    #odel.add(Dense(128, activation='relu'))
    
    #odel.add(Dropout(0.25))
    
      #=============================================================================
    nfilter= 32
    if test_acnn:
        
        prev_layer = network[-1]
        
        
        conv_node = Conv2DAdaptive(rank=2,nfilters=nfilter,kernel_size=(fw,fw), 
                                data_format='channels_last',strides=1,
                                padding='same',name='acnn-1', activation='linear',
                                trainSigmas=True, trainWeights=True, 
                                init_sigma=[0.25,0.75],
                                gain = 1.0,
                                kernel_regularizer=None,
                                init_bias=initializers.Constant(0),
                                norm=2)(prev_layer)
        if residual:
            network.append(Add()([conv_node,prev_layer]))
        else:
            network.append(conv_node)
        #, input_shape=input_shape))
    else:
        #fw = 7
        #v_ini = VS_ini(scale=0.25,mode='fan_in',distribution='uniform')
        network.append(Conv2D(nfilter, (fw, fw), activation='linear', 
                         kernel_initializer=w_ini(), 
                         kernel_regularizer=None,
                         padding='same')(network[-1]))
        #, input_shape=input_shape))
        
    network.append(BatchNormalization()(network[-1]))
    network.append(ReLU()(network[-1]))
    #network.append(ReLU(negative_slope=0.01)(network[-1]))
    #network.append(Activation('selu'))
    network.append(MaxPooling2D(pool_size=(2,2))(network[-1]))
    print("MAY BE MAXPOOL LAYER IS AFFECTING SIGNAL ")
    network.append(Dropout(0.2)(network[-1]))
    #model.add(keras.layers.AlphaDropout(0.2))
    #network.append(GlobalAveragePooling2D()(network[-1]))
    network.append(Flatten()(network[-1]))
    network.append(Dense(units=128, activation='linear',
                   kernel_regularizer=regulazer)(network[-1]))
    network.append(BatchNormalization()(network[-1]))
    network.append(ReLU()(network[-1]))
    network.append(Dropout(0.2)(network[-1]))
    
    
    network.append(Dense(num_classes, activation='softmax',
                    kernel_regularizer=regulazer)(network[-1]))
    model = keras.models.Model(inputs=[network[0]], outputs=network[-1])
    model.summary()
    print("MAY BE MAXPOOL LAYER IS AFFECTING SIGNAL ")
    
    
    from lr_multiplier import LearningRateMultiplier
    #lr=0.001
    #multipliers = {'acnn-1/Sigma:0': 1.0,'acnn-1/Weights:0': 1000.0,
    #               'acnn-2/Sigma:0': 1.0,'acnn-2/Weights:0': 1000.0}
    #opt = LearningRateMultiplier(SGD, lr_multipliers=multipliers,
    #                             lr=lr, momentum=0.9,decay=0)
    
    #opt= SGD(lr=lr,momentum=0.9,decay=0,nesterov=False)
    '''lr_dict = {'all':0.01,'acnn-1/Sigma:0': 0.01,'acnn-1/Weights:0': 1.0,
                   'acnn-2/Sigma:0': 0.01,'acnn-2/Weights:0': 0.1}
    
    
    mom_dict = {'all':0.9,'acnn-1/Sigma:0': 0.5,'acnn-1/Weights:0': 0.9,
                   'acnn-2/Sigma:0': 0.9,'acnn-2/Weights:0': 0.9}
    
    
    decay_dict = {'all':0.95, 'acnn-1/Sigma:0': 0.05, 'acnn-1/Weights:0':0.95,
                  'acnn-1/Sigma:0': 0.05,'acnn-2/Weights:0': 0.95}

    clip_dict = {'acnn-1/Sigma:0':(0.05,1.0),'acnn-2/Sigma:0':(0.05,1.0)}
    '''
    lr_dict = {'all':0.1,'acnn/Sigma:0': 0.1,'acnn/Weights:0': 0.1,
               'acnn-2/Sigma:0': 0.0001,'acnn-2/Weights:0': 0.01}
    
    mom_dict = {'all':0.9,'acnn/Sigma:0': 0.9,'acnn/Weights:0': 0.9,
                'acnn-2/Sigma:0': 0.9,'acnn-2/Weights:0': 0.9}
    clip_dict = {'acnn/Sigma:0': [0.1, 2.0]}
    
    decay_dict = {}
    decay_dict.update({'focus-1'+'/Sigma:0':0.9})  #best results 0.5
    decay_dict.update({'focus-1'+'/Mu:0':0.9})
    decay_dict.update({'all':0.9})
    
    e_i = x_train.shape[0] // batch_size
    decay_epochs =[e_i*10,e_i*30,e_i*60,e_i*90,e_i*100]
    
    opt = SGDwithLR(lr=lr_dict, momentum = mom_dict, decay=decay_dict, clips=clip_dict,
                    decay_epochs=decay_epochs,clipvalue=0.01)
        
    e_i = x_train.shape[0] // batch_size
    
    
    #decay_epochs =np.array([e_i*10], dtype='int64') #for 20 epochs
    #decay_epochs =np.array([e_i*10,e_i*80,e_i*120,e_i*160], dtype='int64')
    
    #opt = SGDwithLR(lr_dict, mom_dict,decay_dict,clip_dict, decay_epochs)#, decay=None)
    #opt= Adadelta()
    #lr_scheduler = LearningRateScheduler(lr_schedule,lr)
    
    # Prepare model model saving directory.
    save_dir = os.path.join(os.getcwd(), 'saved_models')
    if test_acnn:
        model_name = '%s_acnn%dx_model.{epoch:03d}.h5'% (dset, fw)
    else:
        model_name = '%s_cnn%dx_model.{epoch:03d}.h5'% (dset, fw)
    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)
        
    filepath = os.path.join(save_dir, model_name)
    print("Saving in ", filepath)

#    # Prepare callbacks for model saving and for learning rate adjustment.
#    checkpoint = ModelCheckpoint(filepath=filepath,
#                             monitor='val_acc',
#                             verbose=1,
#                             save_best_only=True)
    chkpt= keras.callbacks.ModelCheckpoint('best-model.h5', 
                                    monitor='val_acc', 
                                    verbose=1, 
                                    save_best_only=True, 
                                    save_weights_only=True, 
                                    mode='max', period=1)
    
    
    tb = TensorBoard(log_dir='./tb_logs/mnist/acnn-res-lr5',
                     histogram_freq = 1, 
                     write_grads=True,
                     write_graph=False)
    
    stat_func_name = ['max: ', 'mean: ', 'min: ', 'var: ', 'std: ']
    stat_func_list = [np.max, np.mean, np.min, np.var, np.std]
    callbacks = [tb]
    callbacks = []
    
    if test_acnn:
        pr_1 = PrintLayerVariableStats("acnn-1","Weights:0",stat_func_list,stat_func_name)
        pr_2 = PrintLayerVariableStats("acnn-1","Sigma:0",stat_func_list,stat_func_name)
        rv_weights_1 = RecordVariable("acnn-1","Weights:0")
        rv_sigma_1 = RecordVariable("acnn-1","Sigma:0")
        callbacks+=[pr_1,pr_2,rv_weights_1,rv_sigma_1]
    else:
        pr_1 = PrintLayerVariableStats("conv2d_3","kernel:0",stat_func_list,stat_func_name)
        rv_weights_1 = RecordVariable("conv2d_3","kernel:0")
        callbacks+=[pr_1, rv_weights_1]
    pr_3 = PrintLayerVariableStats("conv2d_1","kernel:0",stat_func_list,stat_func_name)
    rv_kernel = RecordVariable("conv2d_1","kernel:0")
    callbacks+=[pr_3,rv_kernel]
    
    print("CALLBACKS:",callbacks)
    
    print("TRAINABLE WEIGHTS:",model.trainable_weights)
    
    print("WARNING by BTEK: if you see An operation has `None` for gradient. \
          Please make sure that all of your ops have a gradient defined \
          (i.e. are differentiable). Common ops without gradient: \
              K.argmax, K.round, K.eval. REMOVE TENSORBOARD CALLBACK OR EDIT IT!")
    
    
    #print(opt)
    #opt = SGD(lr=0.01,momentum=0.9)
    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=opt,
                  metrics=['accuracy'])
    
    plt = False
    if plt and test_acnn:
        print("Plotting kernels before...")
        import matplotlib.pyplot as plt
        acnn_layer = model.get_layer('acnn-1')
        ws = acnn_layer.get_weights()
        print("Sigmas before",ws[0])
        u_func = K.function(inputs=[model.input], outputs=[acnn_layer.U()])
        output_func = K.function(inputs=[model.input], outputs=[acnn_layer.output])
    
        U_val=u_func([np.expand_dims(x_test[0], axis=0)])
        
        print("U shape", U_val[0].shape)
        print("U max:", np.max(U_val[0][:,:,:,:]))
        num_filt=min(U_val[0].shape[3],12)
        fig=plt.figure(figsize=(20,8))
        for i in range(num_filt):
            ax1=plt.subplot(1, num_filt, i+1)
            im = ax1.imshow(np.squeeze(U_val[0][:,:,0,i]))
        fig.colorbar(im, ax=ax1)
        
        plt.show(block=False)
        
        fig=plt.figure(figsize=(20,8))
        num_show = min(U_val[0].shape[3],12)
        indices = np.int32(np.linspace(0,U_val[0].shape[3]-1,num_show))
        for i in range(num_show):
            ax1=plt.subplot(1, num_filt, i+1)
            #print("U -shape: ", acnn_layer.U().shape,type(K.eval(acnn_layer.U()[:,:,0,i])))
            #print("Prod-shape", (ws[1][:,:,0,i]*acnn_layer.U()[:,:,0,i]).shape)
            plt.imshow(np.float32(ws[1][:,:,0,indices[i]]*
                                  K.eval(acnn_layer.U()[:,:,0,indices[i]])))
 
        plt.show(block=False)
        
    # Run training, with or without data augmentation.
    if not data_augmentation:
        print('Not using data augmentation.')
        model.fit(x_train, y_train,
                  batch_size=batch_size,
                  epochs=epochs,
                  validation_data=(x_test, y_test),
                  shuffle=True,
                  callbacks=callbacks,verbose=2)
    else:
        print('Using real-time data augmentation.')
        # This will do preprocessing and realtime data augmentation:
        datagen = ImageDataGenerator(
            # set input mean to 0 over the dataset
            featurewise_center=False,
            # set each sample mean to 0
            samplewise_center=False,
            # divide inputs by std of dataset
            featurewise_std_normalization=False,
            # divide each input by its std
            samplewise_std_normalization=False,
            # apply ZCA whitening
            zca_whitening=False,
            # epsilon for ZCA whitening
            zca_epsilon=1e-06,
            # randomly rotate images in the range (deg 0 to 180)
            rotation_range=0,
            # randomly shift images horizontally
            width_shift_range=0.1,
            # randomly shift images vertically
            height_shift_range=0.1,
            # set range for random shear
            shear_range=0.,
            # set range for random zoom
            zoom_range=0.,
            # set range for random channel shifts
            channel_shift_range=0.,
            # set mode for filling points outside the input boundaries
            fill_mode='nearest',
            # value used for fill_mode = "constant"
            cval=0.,
            # randomly flip images
            horizontal_flip=True,
            # randomly flip images
            vertical_flip=False,
            # set rescaling factor (applied before any other transformation)
            rescale=None,
            # set function that will be applied on each input
            preprocessing_function=None,
            # image data format, either "channels_first" or "channels_last"
            data_format='channels_last',
            # fraction of images reserved for validation (strictly between 0 and 1)
            validation_split=0.0)
    
        # Compute quantities required for featurewise normalization
        # (std, mean, and principal components if ZCA whitening is applied).
        datagen.fit(x_train)
    
        # Fit the model on the batches generated by datagen.flow().
        model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
                            validation_data=(x_test, y_test),
                            epochs=epochs, verbose=2, workers=4,
                            callbacks=callbacks, 
                            steps_per_epoch=x_train.shape[0]//batch_size)
    
    score = model.evaluate(x_test, y_test, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
    
    
    if plt and test_acnn:
        print("Plotting kernels after ...")
        
        print("U max:", np.max(U_val[0][:,:,:,:]))
        import matplotlib.pyplot as plt
        ws = acnn_layer.get_weights()
        print("Sigmas after",ws[0])
        U_val=u_func([np.expand_dims(x_test[2], axis=0)])
        
        print("U shape", U_val[0].shape)
        num_filt=min(U_val[0].shape[3],12)
        
        indices = np.int32(np.linspace(0,U_val[0].shape[3]-1,num_filt))

        fig=plt.figure(figsize=(16,5))
        for i in range(num_filt):
            ax=plt.subplot(1, num_filt, i+1)
            kernel_u = U_val[0][:,:,0,indices[i]]
            im = ax.imshow(np.squeeze(kernel_u))
            print("kernel mean,var,max,min",np.mean(kernel_u),
                                           np.var(kernel_u),
                                           np.max(kernel_u), np.min(kernel_u))
        #fig.colorbar(im, ax=ax1)
        plt.show(block=False)
        
        
        print("outputs  ...")
        
        n=5
        
        out_val=output_func([np.expand_dims(x_test[5], axis=0)])
        print("Outputs shape", out_val[0].shape)
        num_filt=min(out_val[0].shape[3],12)
        
        indices = np.int32(np.linspace(0,out_val[0].shape[3]-1,num_filt))
        fig=plt.figure(figsize=(20,8))
        ax=plt.subplot(1, num_filt+1, 1)
        im = ax.imshow(np.squeeze(x_test[5]))
        print(y_test[5])
        print("input mean,var,max",np.mean(x_test[n]),np.var(x_test[n]),np.max(x_test[n]))
        for i in range(num_filt):
            ax=plt.subplot(1, num_filt+1, i+2)
            out_im = out_val[0][0,:,:,indices[i]]
            im = ax.imshow(np.squeeze(out_im))
            
            print("ouput mean,var,max",np.mean(out_im),
                                           np.var(out_im),
                                           np.max(out_im),np.min(out_im))
            #plt.colorbar(im,ax=ax)
        plt.show(block=False)
        
        print("Weights")
        fig=plt.figure(figsize=(20,8))
        num_show = min(U_val[0].shape[3],12)
        indices = np.int32(np.linspace(0,U_val[0].shape[3]-1,num_show))
        for i in range(num_show):
            ax1=plt.subplot(1, num_filt, i+1)
            #print("U -shape: ", acnn_layer.U().shape,type(K.eval(acnn_layer.U()[:,:,0,i])))
            #print("Prod-shape", (ws[1][:,:,0,i]*acnn_layer.U()[:,:,0,i]).shape)
            plt.imshow(np.float32(ws[1][:,:,0,indices[i]]),cmap='gray')
 
        plt.show(block=False)
        
        print("ACNN Filters after")
        fig=plt.figure(figsize=(20,8))
        num_show = min(U_val[0].shape[3],12)
        indices = np.int32(np.linspace(0,U_val[0].shape[3]-1,num_show))
        for i in range(num_show):
            ax1=plt.subplot(1, num_filt, i+1)
            #print("U -shape: ", acnn_layer.U().shape,type(K.eval(acnn_layer.U()[:,:,0,i])))
            #print("Prod-shape", (ws[1][:,:,0,i]*acnn_layer.U()[:,:,0,i]).shape)
            plt.imshow(np.float32(ws[1][:,:,0,indices[i]]*
                                  K.eval(acnn_layer.U()[:,:,0,indices[i]])),cmap='gray')
 
        plt.show(block=False)
        
        
        cnn_layer = model.get_layer('conv2d_1')
        wcnn = cnn_layer.get_weights()
        print("CNN Filters of", cnn_layer)
        fig=plt.figure(figsize=(20,8))
        num_show = min(wcnn[0].shape[3],12)
        indices = np.int32(np.linspace(0,wcnn[0].shape[3]-1,num_show))
        for i in range(num_show):
            ax1=plt.subplot(1, num_filt, i+1)
            #print("U -shape: ", acnn_layer.U().shape,type(K.eval(acnn_layer.U()[:,:,0,i])))
            #print("Prod-shape", (ws[1][:,:,0,i]*acnn_layer.U()[:,:,0,i]).shape)
            plt.imshow(np.float32(wcnn[0][:,:,0,indices[i]]),cmap='gray')
 
        plt.show(block=False)
        
        
        rv_sigma_arr = np.array(rv_sigma_1.record)
        fig=plt.figure(figsize=(4,8))
        plt.plot(rv_sigma_arr)
        plt.title('Sigma')
        plt.show(block=False)
        
        rv_weights_arr = np.array(rv_weights_1.record)
        rv_weights_arr2d = np.reshape(rv_weights_arr,
                            (rv_weights_arr.shape[0],
                             np.prod(rv_weights_arr.shape[1:])))
        print(rv_weights_arr.shape)
        fig=plt.figure(figsize=(4,8))
        klist=[1,1,5,9,12,15,18,25,32,132,1132]
        for i in klist:
            plt.plot(rv_weights_arr2d[:,i])
        plt.title('weights-acnn')
        plt.show(block=False)
        
         
        
        rv_kernel_arr = np.array(rv_kernel.record)
        rv_kernel_arr2d = np.reshape(rv_kernel_arr,
                            (rv_kernel_arr.shape[0],
                             np.prod(rv_kernel_arr.shape[1:])))
        print(rv_kernel_arr.shape)
        fig=plt.figure(figsize=(4,8))
        klist=[1,1,5,9,12,15,18,25,32]
        for i in klist:
            plt.plot(rv_kernel_arr2d[:,i])
        plt.title('weights-conv2d-1')
        plt.show(block=False)
def test_comp(settings, random_sid=9):
    import keras
    from keras.optimizers import SGD
    from keras.datasets import mnist, fashion_mnist, cifar10
    from skimage import filters
    from keras import backend as K
    from keras_utils import WeightHistory as WeightHistory
    from keras_utils import RecordVariable, \
    PrintLayerVariableStats, PrintAnyVariable, SGDwithLR, eval_Kdict, standarize_image_025
    from keras_preprocessing.image import ImageDataGenerator

    K.clear_session()

    epochs = settings['Epochs']
    batch_size = settings['batch_size']

    sid = random_sid
    np.random.seed(sid)
    tf.random.set_random_seed(sid)
    tf.compat.v1.random.set_random_seed(sid)

    # MINIMUM SIGMA CAN EFFECT THE PERFORMANCE.
    # BECAUSE NEURON CAN GET SHRINK TOO MUCH IN INITIAL EPOCHS WITH LARGER GRADIENTS
    #, and GET STUCK!
    MIN_SIG = 0.01
    MAX_SIG = 1.0
    MIN_MU = 0.0
    MAX_MU = 1.0
    lr_dict = {'all': settings['lr_all']}  #0.1 is default for MNIST
    mom_dict = {'all': 0.9}
    decay_dict = {'all': 0.9}
    clip_dict = {}
    for i, n in enumerate(settings['nhidden']):
        lr_dict.update({'focus-' + str(i + 1) + '/Sigma:0': 0.01})
        lr_dict.update({'focus-' + str(i + 1) + '/Mu:0': 0.01})
        lr_dict.update({'focus-' + str(i + 1) + '/Weights:0': 0.1})

        mom_dict.update({'focus-' + str(i + 1) + '/Sigma:0': 0.9})
        mom_dict.update({'focus-' + str(i + 1) + '/Mu:0': 0.9})

        decay_dict.update({'focus-' + str(i + 1) + '/Sigma:0': 0.5})
        decay_dict.update({'focus-' + str(i + 1) + '/Mu:0': 0.9})

        clip_dict.update(
            {'focus-' + str(i + 1) + '/Sigma:0': (MIN_SIG, MAX_SIG)})
        clip_dict.update({'focus-' + str(i + 1) + '/Mu:0': (MIN_MU, MAX_MU)})

    print("Loading dataset")
    if settings['dset'] == 'mnist':
        # input image dimensions
        img_rows, img_cols = 28, 28
        # the data, split between train and test sets
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        n_channels = 1

        e_i = x_train.shape[0] // batch_size
        decay_epochs = np.array([e_i * 100, e_i * 150], dtype='int64')
        if settings['cnn_model']:

            decay_epochs = [e_i * 30, e_i * 100]

    elif settings['dset'] == 'cifar10':
        img_rows, img_cols = 32, 32
        n_channels = 3

        (x_train, y_train), (x_test, y_test) = cifar10.load_data()
        # works good as high as 77 for cnn-focus
        #decay_dict = {'all':0.9, 'focus-1/Sigma:0': 1.1,'focus-1/Mu:0':0.9,
        #          'focus-2/Sigma:0': 1.1,'focus-2/Mu:0': 0.9}
        #if cnn_model: batch_size=256 # this works better than 500 for cifar-10
        e_i = x_train.shape[0] // batch_size
        decay_epochs = np.array([e_i * 30, e_i * 80, e_i * 120, e_i * 180],
                                dtype='int64')
        #decay_epochs =np.array([e_i*10], dtype='int64')

    elif settings['dset'] == 'fashion':
        img_rows, img_cols = 28, 28
        n_channels = 1

        (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

        e_i = x_train.shape[0] // batch_size
        decay_epochs = np.array([e_i * 100, e_i * 150], dtype='int64')
        if settings['cnn_model']:
            decay_dict = {
                'all': 0.9,
                'focus-1/Sigma:0': 0.9,
                'focus-1/Mu:0': 0.9,
                'focus-2/Sigma:0': 0.9,
                'focus-2/Mu:0': 0.9
            }

            decay_epochs = [e_i * 30, e_i * 100]

    elif settings['dset'] == 'mnist-clut':

        img_rows, img_cols = 60, 60
        # the data, split between train and test sets

        folder = '/media/home/rdata/image/'
        data = np.load(folder + "mnist_cluttered_60x60_6distortions.npz")

        x_train, y_train = data['x_train'], np.argmax(data['y_train'], axis=-1)
        x_valid, y_valid = data['x_valid'], np.argmax(data['y_valid'], axis=-1)
        x_test, y_test = data['x_test'], np.argmax(data['y_test'], axis=-1)
        x_train = np.vstack((x_train, x_valid))
        y_train = np.concatenate((y_train, y_valid))
        n_channels = 1

        lr_dict = {'all': 0.01}

        e_i = x_train.shape[0] // batch_size
        decay_epochs = np.array([e_i * 100, e_i * 150], dtype='int64')
        if settings['cnn_model']:
            decay_epochs = [e_i * 30, e_i * 100]

    elif settings['dset'] == 'lfw_faces':
        from sklearn.datasets import fetch_lfw_people
        lfw_people = fetch_lfw_people(min_faces_per_person=20, resize=0.4)

        # introspect the images arrays to find the shapes (for plotting)
        n_samples, img_rows, img_cols = lfw_people.images.shape
        n_channels = 1

        X = lfw_people.data
        n_features = X.shape[1]

        # the label to predict is the id of the person
        y = lfw_people.target
        target_names = lfw_people.target_names
        n_classes = target_names.shape[0]

        print("Total dataset size:")
        print("n_samples: %d" % n_samples)
        print("n_features: %d" % n_features)
        print("n_classes: %d" % n_classes)

        from sklearn.model_selection import train_test_split

        #X -= X.mean()
        #X /= X.std()
        #split into a training and testing set
        x_train, x_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.25,
                                                            random_state=42)

        import matplotlib.pyplot as plt

        plt.imshow(X[0].reshape((img_rows, img_cols)))
        plt.show()
        lr_dict = {'all': 0.001}

        e_i = x_train.shape[0] // batch_size
        decay_epochs = np.array([e_i * 50, e_i * 100, e_i * 150],
                                dtype='int64')

    num_classes = np.unique(y_train).shape[0]
    if K.image_data_format() == 'channels_first':
        x_train = x_train.reshape(x_train.shape[0], n_channels, img_rows,
                                  img_cols)
        x_test = x_test.reshape(x_test.shape[0], n_channels, img_rows,
                                img_cols)
        input_shape = (n_channels, img_rows, img_cols)
    else:
        x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols,
                                  n_channels)
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols,
                                n_channels)
        input_shape = (img_rows, img_cols, n_channels)
    if settings['dset'] != 'mnist-clut':

        x_train = x_train.astype('float32')
        x_test = x_test.astype('float32')

        x_train, _, x_test = standarize_image_025(x_train, tst=x_test)
        x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols,
                                  n_channels)
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols,
                                n_channels)

    input_shape = (img_rows, img_cols, n_channels)

    print('x_train shape:', x_train.shape)
    print(x_train.shape[0], 'train samples')
    print(x_test.shape[0], 'test samples')

    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    sigma_reg = settings['focus_sigma_reg']
    sigma_reg = keras.regularizers.l2(
        sigma_reg) if sigma_reg is not None else sigma_reg
    settings['focus_sigma_reg'] = sigma_reg
    if settings['cnn_model']:
        model = create_cnn_model(input_shape, num_classes, settings=settings)
    else:
        model = create_simple_model(input_shape,
                                    num_classes,
                                    settings=settings)

    model.summary()

    print(lr_dict)
    print(mom_dict)
    print(decay_dict)
    print(clip_dict)

    opt = SGDwithLR(lr_dict, mom_dict, decay_dict, clip_dict,
                    decay_epochs)  #, decay=None)

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=opt,
                  metrics=['accuracy'])

    stat_func_name = ['max: ', 'mean: ', 'min: ', 'var: ', 'std: ']
    stat_func_list = [np.max, np.mean, np.min, np.var, np.std]
    #callbacks = [tb]
    callbacks = []

    if settings['neuron'] == 'focused':
        pr_1 = PrintLayerVariableStats("focus-1", "Weights:0", stat_func_list,
                                       stat_func_name)
        pr_2 = PrintLayerVariableStats("focus-1", "Sigma:0", stat_func_list,
                                       stat_func_name)
        pr_3 = PrintLayerVariableStats("focus-1", "Mu:0", stat_func_list,
                                       stat_func_name)
        rv_weights_1 = RecordVariable("focus-1", "Weights:0")
        rv_sigma_1 = RecordVariable("focus-1", "Sigma:0")
        rv_mu_1 = RecordVariable("focus-1", "Mu:0")
        print_lr_rates_callback = keras.callbacks.LambdaCallback(
            on_epoch_end=lambda epoch, logs: print(
                "iter: ", K.eval(model.optimizer.iterations), " LR RATES :",
                eval_Kdict(model.optimizer.lr)))

        callbacks += [
            pr_1, pr_2, pr_3, rv_weights_1, rv_sigma_1, rv_mu_1,
            print_lr_rates_callback
        ]

    if not settings['augment']:
        print('Not using data augmentation.')
        history = model.fit(x_train,
                            y_train,
                            batch_size=batch_size,
                            epochs=epochs,
                            validation_data=(x_test, y_test),
                            shuffle=True,
                            callbacks=callbacks)
    else:
        print('Using real-time data augmentation.')
        # This will do preprocessing and realtime data augmentation:
        datagen = ImageDataGenerator(
            # set input mean to 0 over the dataset
            featurewise_center=False,
            # set each sample mean to 0
            samplewise_center=False,
            # divide inputs by std of dataset
            featurewise_std_normalization=False,
            # divide each input by its std
            samplewise_std_normalization=False,
            # apply ZCA whitening
            zca_whitening=False,
            # epsilon for ZCA whitening
            zca_epsilon=1e-06,
            # randomly rotate images in the range (deg 0 to 180)
            rotation_range=0,
            # randomly shift images horizontally
            width_shift_range=0.1,
            # randomly shift images vertically
            height_shift_range=0.1,
            # set range for random shear
            shear_range=0.,
            # set range for random zoom
            zoom_range=0.,
            # set range for random channel shifts
            channel_shift_range=0.,
            # set mode for filling points outside the input boundaries
            fill_mode='nearest',
            # value used for fill_mode = "constant"
            cval=0.,
            # randomly flip images
            horizontal_flip=True,
            # randomly flip images
            vertical_flip=False,
            # set rescaling factor (applied before any other transformation)
            rescale=None,
            # set function that will be applied on each input
            preprocessing_function=None,
            # image data format, either "channels_first" or "channels_last"
            data_format='channels_last',
            # fraction of images reserved for validation (strictly between 0 and 1)
            validation_split=0.0)

        # Compute quantities required for featurewise normalization
        # (std, mean, and principal components if ZCA whitening is applied).
        datagen.fit(x_train)

        # Fit the model on the batches generated by datagen.flow().
        history = model.fit_generator(datagen.flow(x_train,
                                                   y_train,
                                                   batch_size=batch_size),
                                      validation_data=(x_test, y_test),
                                      epochs=epochs,
                                      verbose=1,
                                      workers=4,
                                      callbacks=callbacks,
                                      steps_per_epoch=x_train.shape[0] //
                                      batch_size)

    score = model.evaluate(x_test, y_test, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
    return score, history, model, callbacks
Esempio n. 3
0
def test():
    import os
    os.environ['CUDA_VISIBLE_DEVICES']="0"
    os.environ['TF_FORCE_GPU_ALLOW_GROWTH']="true"
    from keras.losses import mse
    import keras
    from keras.datasets import mnist,fashion_mnist, cifar10
    from keras.models import Sequential, Model
    from keras.layers import Input, Dense, Dropout, Flatten, Conv2D
    from skimage import filters
    from keras import backend as K
    from keras_utils import WeightHistory as WeightHistory
    from keras_utils import RecordVariable, PrintLayerVariableStats, SGDwithLR
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.1
   # Create a session with the above options specified.
    K.tensorflow_backend.set_session(tf.Session(config=config))
    K.clear_session()


    sid = 9
    # restarting everything requires sess.close()
    #g = tf.Graph()
    #sess = tf.InteractiveSession(graph=g)
    #sess = tf.Session(graph=g)
    #K.set_session(sess)
    np.random.seed(sid)
    tf.random.set_random_seed(sid)
    tf.compat.v1.random.set_random_seed(sid)
    
    from datetime import datetime
    now = datetime.now()
    timestr = now.strftime("%Y%m%d-%H%M%S")
    logdir = "tf_logs_cluttered/.../" + timestr + "/"
    
    
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    inputimg = x_train[0]/255
    sh = (inputimg.shape[0],inputimg.shape[1],1)
    outputimages = np.zeros(shape=[inputimg.shape[0],inputimg.shape[1],3],dtype='float32')
    outputimages[:,:,0] = filters.gaussian(inputimg,sigma=1)
    outputimages[:,:,1] = filters.sobel_h(inputimg)
    outputimages[:,:,2] = filters.sobel_v(filters.gaussian(inputimg,sigma=0.5))
    
    y = y_train[0]
    
    node_in = Input(shape=sh, name='inputlayer')
    # smaller initsigma does not work well. 
    node_acnn = Conv2DAdaptive(rank=2,nfilters=3,kernel_size=(5,5), 
                             data_format='channels_last',
                             strides=1,
                             padding='same',name='acnn',activation='linear',
                             init_sigma=0.5, trainSigmas=True, 
                             trainWeights=True,norm=2)(node_in)
    
    #node_acnn = Conv2D(filters=3,kernel_size=(7,7), 
    #                         data_format='channels_last',
    #                         padding='same',name='acnn',activation='linear')(node_in)
    
    
    model = Model(inputs=node_in, outputs=[node_acnn])
        
   # model.summary()

    from lr_multiplier import LearningRateMultiplier
    from keras.optimizers import SGD, Adadelta
    lr_dict = {'all':0.1,'acnn/Sigma:0': 0.1,'acnn/Weights:0': 0.1,
               'acnn-2/Sigma:0': 0.0001,'acnn-2/Weights:0': 0.01}
    
    mom_dict = {'all':0.9,'acnn/Sigma:0': 0.9,'acnn/Weights:0': 0.9,
                'acnn-2/Sigma:0': 0.9,'acnn-2/Weights:0': 0.9}
    clip_dict = {'acnn/Sigma:0': [0.1, 2.0]}
    

          
    # WORKS WITH OWN INIT With glorot uniform
    #print("WHAT THE ", lr_dict)
    opt = SGDwithLR(lr=lr_dict, momentum = mom_dict, decay=decay_dict,
                    clips=clip_dict, decay_epochs=decay_epochs, clipvalue=0.01)
    #(lr_dict, mom_dict) 
    #
    model.compile(loss=mse, optimizer=opt, metrics=None)
    model.summary()
    
    
    
    inputimg2 = np.expand_dims(np.expand_dims(inputimg,axis=0), axis=3)
    outputimages2 = np.expand_dims(outputimages,axis=0)
    
    #print info about weights
    acnn_layer = model.get_layer('acnn')    
    all_params=acnn_layer.weights
    print("All params:",all_params)
    acnn_params = acnn_layer.get_weights()
    for i,v in enumerate(all_params):
        print(v, ", max, mean", np.max(acnn_params[i]),np.mean(acnn_params[i]),"\n")
    
    
    mchkpt = keras.callbacks.ModelCheckpoint('models/weights.txt', monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1)
    wh0= WeightHistory(model, "acnn")

    sigma_history = []
    sigma_call = lambda x, batch=1, logs={}: x.append(acnn_layer.get_weights()[0])
    cov_dump = keras.callbacks.LambdaCallback(on_epoch_end=sigma_call(sigma_history))
    
    
    
    rv = RecordVariable("acnn","acnn/Sigma:0")
        
    history = model.fit(inputimg2, outputimages2,
              batch_size=1,
              epochs=1000,
              verbose=1, callbacks=[wh0,cov_dump,rv])
    
   
    print ("RECORD", len(rv.record), rv.record[0].shape)
    
    import matplotlib.pyplot as plt 
    rv_arr = np.array(rv.record)
    plt.plot(rv_arr)
    plt.title("Sigma")
    
    plt.figure()
    plt.plot(history.history['loss'])
    plt.title("Loss")

    
    #print info about weights
    print("After training:",all_params)
    acnn_params = acnn_layer.get_weights()
    for i,v in enumerate(all_params):
        print(v, ", max, mean", np.max(acnn_params[i]),np.mean(acnn_params[i]),"\n")
    
    # print info about sigmas
    #print("Recorded sigma history", sigma_history)
    #print("Recorded weight history", wh0.get_epochlist())
    
    
    pred_images = model.predict(inputimg2,  verbose=1)
    print("Prediction shape",pred_images.shape)
    plt = True
    if plt:
        print("Plotting kernels before...")
        import matplotlib.pyplot as plt
        num_images=min(pred_images.shape[3],12)
        fig=plt.figure(figsize=(10,5))
        plt.subplot(3, num_images, 2)
        plt.imshow(np.squeeze(inputimg2[0,:,:,0]))
        for i in range(num_images):
            plt.subplot(3, num_images, i+4)
            plt.imshow(np.squeeze(outputimages2[0,:,:,i]))
            plt.title("output image")
            print("Max-in:",i," ",np.max(np.squeeze(outputimages2[0,:,:,i])))
        
        for i in range(num_images):
            plt.subplot(3, num_images, i+7)
            plt.imshow(np.squeeze(pred_images[0,:,:,i]))
            plt.title("pred image")
            print("MAx:","pred",i,np.max(np.squeeze(pred_images[0,:,:,i])))
            
        plt.show()
        plt.figure()
        for i in range(num_images):
            plt.subplot(3, num_images, i+1)
            #print(acnn_params[1].shape)
            plt.imshow(np.squeeze(acnn_params[1][:,:,0,i]))
            #print("MAx:","pred",i,np.max(np.squeeze(acnn_params[i])))
        #fig.colorbar(im, ax=ax1)
        plt.show()
        
        
        plt.figure()
        for i in range(num_images):
            plt.subplot(3, num_images, i+1)
            print("U -shape: ", acnn_layer.U().shape,type(K.eval(acnn_layer.U()[:,:,0,i])))
            print("Prod-shape", (acnn_params[1][:,:,0,i]*acnn_layer.U()[:,:,0,i]).shape)
            plt.imshow(np.float32(K.eval(acnn_layer.U()[:,:,0,i])))
            plt.title("U func")
        plt.figure()
        for i in range(num_images):
            plt.subplot(3, num_images, i+1)
            print("U -shape: ", acnn_layer.U().shape,type(K.eval(acnn_layer.U()[:,:,0,i])))
            print("Prod-shape", (acnn_params[1][:,:,0,i]*acnn_layer.U()[:,:,0,i]).shape)
            plt.imshow(np.float32(acnn_params[1][:,:,0,i]*K.eval(acnn_layer.U()[:,:,0,i])))
            plt.title("Weights")

            #plt.imshow()
            
            #print("MAx:","pred",i,np.max(np.squeeze(acnn_params[i])))
        #fig.colorbar(im, ax=ax1)
        plt.show()
        
        
        
    #print( model.get_layer('acnn').output )
    print( "Final Sigmas", model.get_layer('acnn').get_weights()[0] )
    
    K.clear_session()
def test_transfer(dset='mnist', random_seed=9, epochs=10, 
                  data_augmentation=False,
                  batch_size = 512,ntrn=None,ntst=None,mod='focusing'):
    import os
    import numpy as np
    #os.environ['CUDA_VISIBLE_DEVICES']="0"
    #os.environ['TF_FORCE_GPU_ALLOW_GROWTH']="true"
    import keras
    from keras.losses import mse
    from keras.optimizers import SGD, RMSprop
    from keras.datasets import mnist,fashion_mnist, cifar10
    from keras.models import Sequential, Model
    from keras.layers import Input, Dense, Dropout, Flatten,Conv2D, BatchNormalization
    from keras.layers import Activation, Permute,Concatenate,GlobalAveragePooling2D
    from skimage import filters
    from keras import backend as K
    from keras_utils import WeightHistory as WeightHistory
    from keras_utils import RecordVariable, \
    PrintLayerVariableStats, PrintAnyVariable, \
    SGDwithLR, eval_Kdict, standarize_image_025
    from keras_preprocessing.image import ImageDataGenerator
    from Kfocusing import FocusedLayer1D
    
    from keras.engine.topology import Layer
    from keras import activations, regularizers, constraints
    from keras import initializers
    from keras.engine import InputSpec
    import tensorflow as tf
    from keras.applications.inception_v3 import InceptionV3
    #import keras.applications.resnet50 as resnet
    #from keras.applications.resnet50 import preprocess_input
    from keras.applications import VGG16
    from keras.applications.vgg16 import preprocess_input
    
    #Load the VGG model



    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.3
   # Create a session with the above options specified.
    K.tensorflow_backend.set_session(tf.Session(config=config))
    K.clear_session()


    sid = random_seed
 
    
    #test_acnn = True
    
    np.random.seed(sid)
    tf.random.set_random_seed(sid)
    tf.compat.v1.random.set_random_seed(sid)
    
    from datetime import datetime
    now = datetime.now()
    
    if dset=='mnist':
        # input image dimensions
        img_rows, img_cols = 28, 28  
        # the data, split between train and test sets
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        NTRN = ntrn if ntrn else x_train.shape[0]
        NTST = ntst if ntst else x_test.shape[0]
        n_channels=1
        
        lr_dict = {'all':0.1,
                   'focus-1/Sigma:0': 0.01,'focus-1/Mu:0': 0.01,'focus-1/Weights:0': 0.1,
                   'focus-2/Sigma:0': 0.01,'focus-2/Mu:0': 0.01,'focus-2/Weights:0': 0.1}

        mom_dict = {'all':0.9,'focus-1/Sigma:0': 0.9,'focus-1/Mu:0': 0.9,
                   'focus-2/Sigma:0': 0.9,'focus-2/Mu:0': 0.9}
    
        decay_dict = {'all':0.9, 'focus-1/Sigma:0': 0.1,'focus-1/Mu:0':0.1,
                  'focus-2/Sigma:0': 0.1,'focus-2/Mu:0': 0.1}

        clip_dict = {'focus-1/Sigma:0':(0.01,1.0),'focus-1/Mu:0':(0.0,1.0),
                 'focus-2/Sigma:0':(0.01,1.0),'focus-2/Mu:0':(0.0,1.0)}
        
        e_i = x_train.shape[0] // batch_size
        decay_epochs =np.array([e_i*100], dtype='int64')
    
    elif dset=='cifar10':    
        img_rows, img_cols = 32,32
        n_channels=3
        
        (x_train, y_train), (x_test, y_test) = cifar10.load_data()
        
        NTRN = ntrn if ntrn else x_train.shape[0]
        NTST = ntst if ntst else x_test.shape[0]
        
        lr_dict = {'all':1e-3,
                  'focus-1/Sigma:0': 1e-3,'focus-1/Mu:0': 1e-3,'focus-1/Weights:0':1e-3,
                  'focus-2/Sigma:0': 1e-3,'focus-2/Mu:0': 1e-3,'focus-2/Weights:0': 1e-3,
                  'dense_1/Weights:0':1e-3}
        
        # 1e-3 'all' reaches 91.43 at 250 epochs
        # 1e-3 'all' reached 90.75 at 100 epochs

        mom_dict = {'all':0.9,'focus-1/Sigma:0': 0.9,'focus-1/Mu:0': 0.9,
                   'focus-2/Sigma:0': 0.9,'focus-2/Mu:0': 0.9}
        #decay_dict = {'all':0.9}
        decay_dict = {'all':0.9, 'focus-1/Sigma:0': 0.9,'focus-1/Mu:0':0.9,
                      'focus-2/Sigma:0': 0.9,'focus-2/Mu:0': 0.9}

        clip_dict = {'focus-1/Sigma:0':(0.01,1.0),'focus-1/Mu:0':(0.0,1.0),
                 'focus-2/Sigma:0':(0.01,1.0),'focus-2/Mu:0':(0.0,1.0)}
        
        e_i = NTRN // batch_size
        
        #decay_epochs =np.array([e_i*10], dtype='int64') #for 20 epochs
        #decay_epochs =np.array([e_i*10,e_i*80,e_i*120,e_i*160], dtype='int64')
        decay_epochs =np.array([e_i*10, e_i*80, e_i*120, e_i*180], dtype='int64')
    
    num_classes = np.unique(y_train).shape[0]
    
    if K.image_data_format() == 'channels_first':
        x_train = x_train.reshape(x_train.shape[0], n_channels, img_rows, img_cols)
        x_test = x_test.reshape(x_test.shape[0], n_channels, img_rows, img_cols)
        input_shape = (n_channels, img_rows, img_cols)
    else:
        x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, n_channels)
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, n_channels)
        input_shape = (img_rows, img_cols, n_channels)

   
    

    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, n_channels)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, n_channels)
    input_shape = (img_rows, img_cols, n_channels)
    
    print('x_train shape:', x_train.shape)
    print(x_train.shape[0], 'train samples')
    print(x_test.shape[0], 'test samples')
    #FRAME_SIZE=(299,299,3)
    FRAME_SIZE = (224,224,3)

    idx = np.random.permutation(x_train.shape[0])  
    x_train = x_train[idx[0:NTRN]]
    y_train = y_train[idx[0:NTRN]]
    idx = np.random.permutation(x_test.shape[0])
    x_test = x_test[idx[0:NTST]]
    y_test = y_test[idx[0:NTST]]
    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    #x_train, _, x_test = paddataset(x_train,None,x_test,FRAME_SIZE,False,5)
    
    
    #x_train, _, x_test = standarize_image_025(x_train,tst=x_test)
    x_train=preprocess_input(x_train)
    x_test=preprocess_input(x_test)
    import matplotlib.pyplot as plt
    plt.imshow(x_train[0])
    print(np.max(x_train[0]),np.mean(x_train[0]))
    plt.show()
    plt.imshow(x_test[0])
    print(np.max(x_train[0]),np.mean(x_train[0]))
    plt.show()
    
    print(x_train.shape, 'train samples')
    print(np.mean(x_train))
    print(np.var(x_train))
    
    print(x_test.shape, 'test samples')
    print(np.mean(x_test))
    print(np.var(x_test))
    
   

    # create the base pre-trained model
    base_in = Input(shape=input_shape, name='inputlayer')
    
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape,
                      input_tensor=base_in)
    x=base_model.output
    x = GlobalAveragePooling2D()(x)
    
    pad_input =True
    if pad_input:
        print("PADDING LAYER OUPUT")
        
        paddings = tf.constant([[0, 0,], [3, 3]])
    
        padding_layer = keras.layers.Lambda(lambda x: tf.pad(x,paddings,"CONSTANT"))
        x = padding_layer(x)
    #x = Dropout(0.1)(x)
    # let's add a fully-connected layer
    focusing=mod=='focused'
    if focusing:
        nf = 40#init_sigma=np.exp(-(np.linspace(0.1, 0.9, nf)-0.5)**2/0.07),
        x = FocusedLayer1D(units=nf,
                           name='focus-1',
                           activation='linear',
                           init_sigma=0.08,
                           init_mu='spread',
                           init_w= None,
                           train_sigma=True,
                           train_weights=True,
                           train_mu = True,
                           si_regularizer=None,
                           normed=2)(x)
    elif mod=='dense':
        x = Dense(40, activation='linear')(x)
    else:
        print('unknown mod')
        return
        
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(0.2)(x)
    # and a logistic layer -- let's say we have 200 classes
    predictions = Dense(10, activation='softmax')(x)
    model = Model(inputs=base_in, outputs=[predictions])
    # opt= SGDwithLR(lr_dict, mom_dict,decay_dict,clip_dict, decay_epochs)#, decay=None)
    optimizer_s = 'SGDwithLR'
    if optimizer_s == 'SGDwithLR':
        opt = SGDwithLR(lr_dict, mom_dict,decay_dict,clip_dict, decay_epochs)#, decay=None)
    elif optimizer_s=='RMSprob':
        opt = RMSprop(lr=0.01, rho=0.9, epsilon=None, decay=0.0)
    else:
    # opt= SGDwithLR({'all': 0.01},{'all':0.9})#, decay=None)
        opt= SGD(lr=0.01, momentum=0.9)#, decay=None)
    
    # compile the model (should be done *after* setting layers to non-trainable)
    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=opt,
                  metrics=['accuracy'])
        
    model.summary()
    
    stat_func_name = ['max: ', 'mean: ', 'min: ', 'var: ', 'std: ']
    stat_func_list = [np.max, np.mean, np.min, np.var, np.std]
    
    callbacks = []
    
    if focusing:
        pr_1 = PrintLayerVariableStats("focus-1","Weights:0",stat_func_list,stat_func_name)
        pr_2 = PrintLayerVariableStats("focus-1","Sigma:0",stat_func_list,stat_func_name)
        pr_3 = PrintLayerVariableStats("focus-1","Mu:0",stat_func_list,stat_func_name)
        callbacks+=[pr_1,pr_2,pr_3]
    
    recordvariables=False
    if recordvariables:
            
        rv_weights_1 = RecordVariable("focus-1","Weights:0")
        rv_sigma_1 = RecordVariable("focus-1","Sigma:0")
        callbacks+=[rv_weights_1,rv_sigma_1]

    if optimizer_s =='SGDwithLR': 
        print_lr_rates_callback = keras.callbacks.LambdaCallback(
            on_epoch_end=lambda epoch, logs: print("iter: ", 
                                                   K.eval(model.optimizer.iterations),
                                                   " LR RATES :", 
                                                   eval_Kdict(model.optimizer.lr)))
    
        callbacks.append(print_lr_rates_callback)
    
        
    if not data_augmentation:
        print('Not using data augmentation.')
        history=model.fit(x_train, y_train,
                  batch_size=batch_size,
                  epochs=epochs,
                  validation_data=(x_test, y_test),
                  shuffle=True,
                  callbacks=callbacks)
    else:
        print('Using real-time data augmentation.')
        # This will do preprocessing and realtime data augmentation:
        datagen = ImageDataGenerator(
            # set input mean to 0 over the dataset
    
            featurewise_center=False,
            # set each sample mean to 0
            samplewise_center=False,
            # divide inputs by std of dataset
            featurewise_std_normalization=False,
            # divide each input by its std
            samplewise_std_normalization=False,
            # apply ZCA whitening
            zca_whitening=False,
            # epsilon for ZCA whitening
            zca_epsilon=1e-06,
            # randomly rotate images in the range (deg 0 to 180)
            rotation_range=0,
            # randomly shift images horizontally
            width_shift_range=0.2,
            # randomly shift images vertically
            height_shift_range=0.2,
            # set range for random shear
            shear_range=0.1,
            # set range for random zoom
            zoom_range=0.1,
            # set range for random channel shifts
            channel_shift_range=0.,
            # set mode for filling points outside the input boundaries
            fill_mode='nearest',
            # value used for fill_mode = "constant"
            cval=0.,
            # randomly flip images
            horizontal_flip=True,
            # randomly flip images
            vertical_flip=False,
            # set rescaling factor (applied before any other transformation)
            rescale=None,
            # set function that will be applied on each input
            preprocessing_function=None,
            # image data format, either "channels_first" or "channels_last"
            data_format='channels_last',
            # fraction of images reserved for validation (strictly between 0 and 1)
            validation_split=0.0)
    
        # Compute quantities required for featurewise normalization
        # (std, mean, and principal components if ZCA whitening is applied).
        datagen.fit(x_train)
        
        #x_test,_,_ = paddataset(x_test,None, None,frame_size=FRAME_SIZE, random_pos=False)
        # Fit the model on the batches generated by datagen.flow().
        history=model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
                            validation_data=(x_test, y_test),
	                   workers=4, use_multiprocessing=False,epochs=epochs, verbose=2,
                            callbacks=callbacks, 
                            steps_per_epoch=x_train.shape[0]//batch_size)
    
    score = model.evaluate(x_test, y_test, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
    return score,history,model