def test(): import os os.environ['CUDA_VISIBLE_DEVICES']="0" os.environ['TF_FORCE_GPU_ALLOW_GROWTH']="true" from keras.losses import mse import keras from keras.datasets import mnist,fashion_mnist, cifar10 from keras.models import Sequential, Model from keras.layers import Input, Dense, Dropout, Flatten, Conv2D from skimage import filters from keras import backend as K from keras_utils import WeightHistory as WeightHistory from keras_utils import RecordVariable, PrintLayerVariableStats, SGDwithLR config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.1 # Create a session with the above options specified. K.tensorflow_backend.set_session(tf.Session(config=config)) K.clear_session() sid = 9 # restarting everything requires sess.close() #g = tf.Graph() #sess = tf.InteractiveSession(graph=g) #sess = tf.Session(graph=g) #K.set_session(sess) np.random.seed(sid) tf.random.set_random_seed(sid) tf.compat.v1.random.set_random_seed(sid) from datetime import datetime now = datetime.now() timestr = now.strftime("%Y%m%d-%H%M%S") logdir = "tf_logs_cluttered/.../" + timestr + "/" (x_train, y_train), (x_test, y_test) = mnist.load_data() inputimg = x_train[0]/255 sh = (inputimg.shape[0],inputimg.shape[1],1) outputimages = np.zeros(shape=[inputimg.shape[0],inputimg.shape[1],3],dtype='float32') outputimages[:,:,0] = filters.gaussian(inputimg,sigma=1) outputimages[:,:,1] = filters.sobel_h(inputimg) outputimages[:,:,2] = filters.sobel_v(filters.gaussian(inputimg,sigma=0.5)) y = y_train[0] node_in = Input(shape=sh, name='inputlayer') # smaller initsigma does not work well. node_acnn = Conv2DAdaptive(rank=2,nfilters=3,kernel_size=(5,5), data_format='channels_last', strides=1, padding='same',name='acnn',activation='linear', init_sigma=0.5, trainSigmas=True, trainWeights=True,norm=2)(node_in) #node_acnn = Conv2D(filters=3,kernel_size=(7,7), # data_format='channels_last', # padding='same',name='acnn',activation='linear')(node_in) model = Model(inputs=node_in, outputs=[node_acnn]) # model.summary() from lr_multiplier import LearningRateMultiplier from keras.optimizers import SGD, Adadelta lr_dict = {'all':0.1,'acnn/Sigma:0': 0.1,'acnn/Weights:0': 0.1, 'acnn-2/Sigma:0': 0.0001,'acnn-2/Weights:0': 0.01} mom_dict = {'all':0.9,'acnn/Sigma:0': 0.9,'acnn/Weights:0': 0.9, 'acnn-2/Sigma:0': 0.9,'acnn-2/Weights:0': 0.9} clip_dict = {'acnn/Sigma:0': [0.1, 2.0]} # WORKS WITH OWN INIT With glorot uniform #print("WHAT THE ", lr_dict) opt = SGDwithLR(lr=lr_dict, momentum = mom_dict, decay=decay_dict, clips=clip_dict, decay_epochs=decay_epochs, clipvalue=0.01) #(lr_dict, mom_dict) # model.compile(loss=mse, optimizer=opt, metrics=None) model.summary() inputimg2 = np.expand_dims(np.expand_dims(inputimg,axis=0), axis=3) outputimages2 = np.expand_dims(outputimages,axis=0) #print info about weights acnn_layer = model.get_layer('acnn') all_params=acnn_layer.weights print("All params:",all_params) acnn_params = acnn_layer.get_weights() for i,v in enumerate(all_params): print(v, ", max, mean", np.max(acnn_params[i]),np.mean(acnn_params[i]),"\n") mchkpt = keras.callbacks.ModelCheckpoint('models/weights.txt', monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1) wh0= WeightHistory(model, "acnn") sigma_history = [] sigma_call = lambda x, batch=1, logs={}: x.append(acnn_layer.get_weights()[0]) cov_dump = keras.callbacks.LambdaCallback(on_epoch_end=sigma_call(sigma_history)) rv = RecordVariable("acnn","acnn/Sigma:0") history = model.fit(inputimg2, outputimages2, batch_size=1, epochs=1000, verbose=1, callbacks=[wh0,cov_dump,rv]) print ("RECORD", len(rv.record), rv.record[0].shape) import matplotlib.pyplot as plt rv_arr = np.array(rv.record) plt.plot(rv_arr) plt.title("Sigma") plt.figure() plt.plot(history.history['loss']) plt.title("Loss") #print info about weights print("After training:",all_params) acnn_params = acnn_layer.get_weights() for i,v in enumerate(all_params): print(v, ", max, mean", np.max(acnn_params[i]),np.mean(acnn_params[i]),"\n") # print info about sigmas #print("Recorded sigma history", sigma_history) #print("Recorded weight history", wh0.get_epochlist()) pred_images = model.predict(inputimg2, verbose=1) print("Prediction shape",pred_images.shape) plt = True if plt: print("Plotting kernels before...") import matplotlib.pyplot as plt num_images=min(pred_images.shape[3],12) fig=plt.figure(figsize=(10,5)) plt.subplot(3, num_images, 2) plt.imshow(np.squeeze(inputimg2[0,:,:,0])) for i in range(num_images): plt.subplot(3, num_images, i+4) plt.imshow(np.squeeze(outputimages2[0,:,:,i])) plt.title("output image") print("Max-in:",i," ",np.max(np.squeeze(outputimages2[0,:,:,i]))) for i in range(num_images): plt.subplot(3, num_images, i+7) plt.imshow(np.squeeze(pred_images[0,:,:,i])) plt.title("pred image") print("MAx:","pred",i,np.max(np.squeeze(pred_images[0,:,:,i]))) plt.show() plt.figure() for i in range(num_images): plt.subplot(3, num_images, i+1) #print(acnn_params[1].shape) plt.imshow(np.squeeze(acnn_params[1][:,:,0,i])) #print("MAx:","pred",i,np.max(np.squeeze(acnn_params[i]))) #fig.colorbar(im, ax=ax1) plt.show() plt.figure() for i in range(num_images): plt.subplot(3, num_images, i+1) print("U -shape: ", acnn_layer.U().shape,type(K.eval(acnn_layer.U()[:,:,0,i]))) print("Prod-shape", (acnn_params[1][:,:,0,i]*acnn_layer.U()[:,:,0,i]).shape) plt.imshow(np.float32(K.eval(acnn_layer.U()[:,:,0,i]))) plt.title("U func") plt.figure() for i in range(num_images): plt.subplot(3, num_images, i+1) print("U -shape: ", acnn_layer.U().shape,type(K.eval(acnn_layer.U()[:,:,0,i]))) print("Prod-shape", (acnn_params[1][:,:,0,i]*acnn_layer.U()[:,:,0,i]).shape) plt.imshow(np.float32(acnn_params[1][:,:,0,i]*K.eval(acnn_layer.U()[:,:,0,i]))) plt.title("Weights") #plt.imshow() #print("MAx:","pred",i,np.max(np.squeeze(acnn_params[i]))) #fig.colorbar(im, ax=ax1) plt.show() #print( model.get_layer('acnn').output ) print( "Final Sigmas", model.get_layer('acnn').get_weights()[0] ) K.clear_session()
def test_mnist(): import os os.environ['CUDA_VISIBLE_DEVICES']="2" os.environ['TF_FORCE_GPU_ALLOW_GROWTH']="true" #os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import keras from keras.datasets import mnist,cifar10 from keras.models import Sequential from keras.layers import Dense, Dropout, Flatten, Input, Add, ReLU from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, GlobalAveragePooling2D from keras.layers import BatchNormalization,Activation, Concatenate from keras.regularizers import l2,l1 from keras.callbacks import LearningRateScheduler, ModelCheckpoint, TensorBoard from keras_preprocessing.image import ImageDataGenerator from keras.optimizers import SGD, Adadelta #from keras.initializers import glorot_uniform as w_ini from keras.initializers import he_uniform as w_ini from keras.initializers import VarianceScaling as VS_ini from keras import backend as K from keras_utils import RecordVariable, PrintLayerVariableStats, SGDwithLR #config = tf.ConfigProto() #config.gpu_options.allow_growth = True #config.gpu_options.per_process_gpu_memory_fraction = 0.1 # Create a session with the above options specified. #K.tensorflow_backend.set_session(tf.Session(config=config)) sid = 9 #sess = K.get_session() K.clear_session() #sess = tf.Session(graph=g) #K.set_session(sess) np.random.seed(sid) tf.random.set_random_seed(sid) tf.compat.v1.random.set_random_seed(sid) #dset='cifar10' dset = 'mnist' batch_size = 512 num_classes = 10 epochs =200 test_acnn = True regulazer = None prfw = 5 fw = 5 residual = False data_augmentation = False if dset=='mnist': # input image dimensions img_rows, img_cols = 28, 28 # the data, split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() n_channels=1 elif dset=='cifar10': img_rows, img_cols = 32,32 n_channels=3 (x_train, y_train), (x_test, y_test) = cifar10.load_data() if K.image_data_format() == 'channels_first': x_train = x_train.reshape(x_train.shape[0], n_channels, img_rows, img_cols) x_test = x_test.reshape(x_test.shape[0], n_channels, img_rows, img_cols) input_shape = (n_channels, img_rows, img_cols) else: x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, n_channels) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, n_channels) input_shape = (img_rows, img_cols, n_channels) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 trn_mn = np.mean(x_train, axis=0) x_train -= trn_mn x_test -= trn_mn print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) network=[] network.append(Input(shape=input_shape)) # if test_acnn: # # prev_layer = network[-1] # # # conv_node = Conv2DAdaptive(rank=2,nfilters=32,kernel_size=(fw,fw), # data_format='channels_last',strides=1, # padding='same',name='acnn-1', activation='linear', # trainSigmas=True, trainWeights=True, # init_sigma=[0.15,1.0], # gain = np.sqrt(1.0), # kernel_regularizer=None, # init_bias=initializers.Constant(0))(prev_layer) # if residual: # network.append(Add()([conv_node,prev_layer])) # else: # network.append(conv_node) # #, input_shape=input_shape)) # else: # # network.append(Conv2D(24, (fw, fw), activation='linear', # kernel_initializer=w_ini(), # kernel_regularizer=None, # padding='same')(network[-1])) network.append(Conv2D(32, kernel_size=(prfw, prfw), activation='linear',padding='same', kernel_initializer=w_ini(), kernel_regularizer=regulazer)(network[-1])) network.append(BatchNormalization()(network[-1])) network.append(Activation('relu')(network[-1])) network.append(Dropout(0.2)(network[-1])) network.append(Conv2D(32, (prfw, prfw), activation='linear', kernel_initializer=w_ini(), padding='same', kernel_regularizer=regulazer)(network[-1])) #odel.add(MaxPooling2D(pool_size=(2, 2))) network.append(BatchNormalization()(network[-1])) network.append(Activation('relu')(network[-1])) network.append(Dropout(0.2)(network[-1])) # model.add(Conv2D(32, (3, 3), activation='linear', # kernel_initializer=w_ini(), padding='same', # kernel_regularizer=regulazer)) # #odel.add(MaxPooling2D(pool_size=(2, 2))) # model.add(BatchNormalization()) # model.add(Activation('relu')) # # model.add(Dropout(0.2)) # #odel.add(Dense(128, activation='relu')) #odel.add(Dropout(0.25)) #============================================================================= nfilter= 32 if test_acnn: prev_layer = network[-1] conv_node = Conv2DAdaptive(rank=2,nfilters=nfilter,kernel_size=(fw,fw), data_format='channels_last',strides=1, padding='same',name='acnn-1', activation='linear', trainSigmas=True, trainWeights=True, init_sigma=[0.25,0.75], gain = 1.0, kernel_regularizer=None, init_bias=initializers.Constant(0), norm=2)(prev_layer) if residual: network.append(Add()([conv_node,prev_layer])) else: network.append(conv_node) #, input_shape=input_shape)) else: #fw = 7 #v_ini = VS_ini(scale=0.25,mode='fan_in',distribution='uniform') network.append(Conv2D(nfilter, (fw, fw), activation='linear', kernel_initializer=w_ini(), kernel_regularizer=None, padding='same')(network[-1])) #, input_shape=input_shape)) network.append(BatchNormalization()(network[-1])) network.append(ReLU()(network[-1])) #network.append(ReLU(negative_slope=0.01)(network[-1])) #network.append(Activation('selu')) network.append(MaxPooling2D(pool_size=(2,2))(network[-1])) print("MAY BE MAXPOOL LAYER IS AFFECTING SIGNAL ") network.append(Dropout(0.2)(network[-1])) #model.add(keras.layers.AlphaDropout(0.2)) #network.append(GlobalAveragePooling2D()(network[-1])) network.append(Flatten()(network[-1])) network.append(Dense(units=128, activation='linear', kernel_regularizer=regulazer)(network[-1])) network.append(BatchNormalization()(network[-1])) network.append(ReLU()(network[-1])) network.append(Dropout(0.2)(network[-1])) network.append(Dense(num_classes, activation='softmax', kernel_regularizer=regulazer)(network[-1])) model = keras.models.Model(inputs=[network[0]], outputs=network[-1]) model.summary() print("MAY BE MAXPOOL LAYER IS AFFECTING SIGNAL ") from lr_multiplier import LearningRateMultiplier #lr=0.001 #multipliers = {'acnn-1/Sigma:0': 1.0,'acnn-1/Weights:0': 1000.0, # 'acnn-2/Sigma:0': 1.0,'acnn-2/Weights:0': 1000.0} #opt = LearningRateMultiplier(SGD, lr_multipliers=multipliers, # lr=lr, momentum=0.9,decay=0) #opt= SGD(lr=lr,momentum=0.9,decay=0,nesterov=False) '''lr_dict = {'all':0.01,'acnn-1/Sigma:0': 0.01,'acnn-1/Weights:0': 1.0, 'acnn-2/Sigma:0': 0.01,'acnn-2/Weights:0': 0.1} mom_dict = {'all':0.9,'acnn-1/Sigma:0': 0.5,'acnn-1/Weights:0': 0.9, 'acnn-2/Sigma:0': 0.9,'acnn-2/Weights:0': 0.9} decay_dict = {'all':0.95, 'acnn-1/Sigma:0': 0.05, 'acnn-1/Weights:0':0.95, 'acnn-1/Sigma:0': 0.05,'acnn-2/Weights:0': 0.95} clip_dict = {'acnn-1/Sigma:0':(0.05,1.0),'acnn-2/Sigma:0':(0.05,1.0)} ''' lr_dict = {'all':0.1,'acnn/Sigma:0': 0.1,'acnn/Weights:0': 0.1, 'acnn-2/Sigma:0': 0.0001,'acnn-2/Weights:0': 0.01} mom_dict = {'all':0.9,'acnn/Sigma:0': 0.9,'acnn/Weights:0': 0.9, 'acnn-2/Sigma:0': 0.9,'acnn-2/Weights:0': 0.9} clip_dict = {'acnn/Sigma:0': [0.1, 2.0]} decay_dict = {} decay_dict.update({'focus-1'+'/Sigma:0':0.9}) #best results 0.5 decay_dict.update({'focus-1'+'/Mu:0':0.9}) decay_dict.update({'all':0.9}) e_i = x_train.shape[0] // batch_size decay_epochs =[e_i*10,e_i*30,e_i*60,e_i*90,e_i*100] opt = SGDwithLR(lr=lr_dict, momentum = mom_dict, decay=decay_dict, clips=clip_dict, decay_epochs=decay_epochs,clipvalue=0.01) e_i = x_train.shape[0] // batch_size #decay_epochs =np.array([e_i*10], dtype='int64') #for 20 epochs #decay_epochs =np.array([e_i*10,e_i*80,e_i*120,e_i*160], dtype='int64') #opt = SGDwithLR(lr_dict, mom_dict,decay_dict,clip_dict, decay_epochs)#, decay=None) #opt= Adadelta() #lr_scheduler = LearningRateScheduler(lr_schedule,lr) # Prepare model model saving directory. save_dir = os.path.join(os.getcwd(), 'saved_models') if test_acnn: model_name = '%s_acnn%dx_model.{epoch:03d}.h5'% (dset, fw) else: model_name = '%s_cnn%dx_model.{epoch:03d}.h5'% (dset, fw) if not os.path.isdir(save_dir): os.makedirs(save_dir) filepath = os.path.join(save_dir, model_name) print("Saving in ", filepath) # # Prepare callbacks for model saving and for learning rate adjustment. # checkpoint = ModelCheckpoint(filepath=filepath, # monitor='val_acc', # verbose=1, # save_best_only=True) chkpt= keras.callbacks.ModelCheckpoint('best-model.h5', monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=True, mode='max', period=1) tb = TensorBoard(log_dir='./tb_logs/mnist/acnn-res-lr5', histogram_freq = 1, write_grads=True, write_graph=False) stat_func_name = ['max: ', 'mean: ', 'min: ', 'var: ', 'std: '] stat_func_list = [np.max, np.mean, np.min, np.var, np.std] callbacks = [tb] callbacks = [] if test_acnn: pr_1 = PrintLayerVariableStats("acnn-1","Weights:0",stat_func_list,stat_func_name) pr_2 = PrintLayerVariableStats("acnn-1","Sigma:0",stat_func_list,stat_func_name) rv_weights_1 = RecordVariable("acnn-1","Weights:0") rv_sigma_1 = RecordVariable("acnn-1","Sigma:0") callbacks+=[pr_1,pr_2,rv_weights_1,rv_sigma_1] else: pr_1 = PrintLayerVariableStats("conv2d_3","kernel:0",stat_func_list,stat_func_name) rv_weights_1 = RecordVariable("conv2d_3","kernel:0") callbacks+=[pr_1, rv_weights_1] pr_3 = PrintLayerVariableStats("conv2d_1","kernel:0",stat_func_list,stat_func_name) rv_kernel = RecordVariable("conv2d_1","kernel:0") callbacks+=[pr_3,rv_kernel] print("CALLBACKS:",callbacks) print("TRAINABLE WEIGHTS:",model.trainable_weights) print("WARNING by BTEK: if you see An operation has `None` for gradient. \ Please make sure that all of your ops have a gradient defined \ (i.e. are differentiable). Common ops without gradient: \ K.argmax, K.round, K.eval. REMOVE TENSORBOARD CALLBACK OR EDIT IT!") #print(opt) #opt = SGD(lr=0.01,momentum=0.9) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy']) plt = False if plt and test_acnn: print("Plotting kernels before...") import matplotlib.pyplot as plt acnn_layer = model.get_layer('acnn-1') ws = acnn_layer.get_weights() print("Sigmas before",ws[0]) u_func = K.function(inputs=[model.input], outputs=[acnn_layer.U()]) output_func = K.function(inputs=[model.input], outputs=[acnn_layer.output]) U_val=u_func([np.expand_dims(x_test[0], axis=0)]) print("U shape", U_val[0].shape) print("U max:", np.max(U_val[0][:,:,:,:])) num_filt=min(U_val[0].shape[3],12) fig=plt.figure(figsize=(20,8)) for i in range(num_filt): ax1=plt.subplot(1, num_filt, i+1) im = ax1.imshow(np.squeeze(U_val[0][:,:,0,i])) fig.colorbar(im, ax=ax1) plt.show(block=False) fig=plt.figure(figsize=(20,8)) num_show = min(U_val[0].shape[3],12) indices = np.int32(np.linspace(0,U_val[0].shape[3]-1,num_show)) for i in range(num_show): ax1=plt.subplot(1, num_filt, i+1) #print("U -shape: ", acnn_layer.U().shape,type(K.eval(acnn_layer.U()[:,:,0,i]))) #print("Prod-shape", (ws[1][:,:,0,i]*acnn_layer.U()[:,:,0,i]).shape) plt.imshow(np.float32(ws[1][:,:,0,indices[i]]* K.eval(acnn_layer.U()[:,:,0,indices[i]]))) plt.show(block=False) # Run training, with or without data augmentation. if not data_augmentation: print('Not using data augmentation.') model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), shuffle=True, callbacks=callbacks,verbose=2) else: print('Using real-time data augmentation.') # This will do preprocessing and realtime data augmentation: datagen = ImageDataGenerator( # set input mean to 0 over the dataset featurewise_center=False, # set each sample mean to 0 samplewise_center=False, # divide inputs by std of dataset featurewise_std_normalization=False, # divide each input by its std samplewise_std_normalization=False, # apply ZCA whitening zca_whitening=False, # epsilon for ZCA whitening zca_epsilon=1e-06, # randomly rotate images in the range (deg 0 to 180) rotation_range=0, # randomly shift images horizontally width_shift_range=0.1, # randomly shift images vertically height_shift_range=0.1, # set range for random shear shear_range=0., # set range for random zoom zoom_range=0., # set range for random channel shifts channel_shift_range=0., # set mode for filling points outside the input boundaries fill_mode='nearest', # value used for fill_mode = "constant" cval=0., # randomly flip images horizontal_flip=True, # randomly flip images vertical_flip=False, # set rescaling factor (applied before any other transformation) rescale=None, # set function that will be applied on each input preprocessing_function=None, # image data format, either "channels_first" or "channels_last" data_format='channels_last', # fraction of images reserved for validation (strictly between 0 and 1) validation_split=0.0) # Compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied). datagen.fit(x_train) # Fit the model on the batches generated by datagen.flow(). model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), validation_data=(x_test, y_test), epochs=epochs, verbose=2, workers=4, callbacks=callbacks, steps_per_epoch=x_train.shape[0]//batch_size) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) if plt and test_acnn: print("Plotting kernels after ...") print("U max:", np.max(U_val[0][:,:,:,:])) import matplotlib.pyplot as plt ws = acnn_layer.get_weights() print("Sigmas after",ws[0]) U_val=u_func([np.expand_dims(x_test[2], axis=0)]) print("U shape", U_val[0].shape) num_filt=min(U_val[0].shape[3],12) indices = np.int32(np.linspace(0,U_val[0].shape[3]-1,num_filt)) fig=plt.figure(figsize=(16,5)) for i in range(num_filt): ax=plt.subplot(1, num_filt, i+1) kernel_u = U_val[0][:,:,0,indices[i]] im = ax.imshow(np.squeeze(kernel_u)) print("kernel mean,var,max,min",np.mean(kernel_u), np.var(kernel_u), np.max(kernel_u), np.min(kernel_u)) #fig.colorbar(im, ax=ax1) plt.show(block=False) print("outputs ...") n=5 out_val=output_func([np.expand_dims(x_test[5], axis=0)]) print("Outputs shape", out_val[0].shape) num_filt=min(out_val[0].shape[3],12) indices = np.int32(np.linspace(0,out_val[0].shape[3]-1,num_filt)) fig=plt.figure(figsize=(20,8)) ax=plt.subplot(1, num_filt+1, 1) im = ax.imshow(np.squeeze(x_test[5])) print(y_test[5]) print("input mean,var,max",np.mean(x_test[n]),np.var(x_test[n]),np.max(x_test[n])) for i in range(num_filt): ax=plt.subplot(1, num_filt+1, i+2) out_im = out_val[0][0,:,:,indices[i]] im = ax.imshow(np.squeeze(out_im)) print("ouput mean,var,max",np.mean(out_im), np.var(out_im), np.max(out_im),np.min(out_im)) #plt.colorbar(im,ax=ax) plt.show(block=False) print("Weights") fig=plt.figure(figsize=(20,8)) num_show = min(U_val[0].shape[3],12) indices = np.int32(np.linspace(0,U_val[0].shape[3]-1,num_show)) for i in range(num_show): ax1=plt.subplot(1, num_filt, i+1) #print("U -shape: ", acnn_layer.U().shape,type(K.eval(acnn_layer.U()[:,:,0,i]))) #print("Prod-shape", (ws[1][:,:,0,i]*acnn_layer.U()[:,:,0,i]).shape) plt.imshow(np.float32(ws[1][:,:,0,indices[i]]),cmap='gray') plt.show(block=False) print("ACNN Filters after") fig=plt.figure(figsize=(20,8)) num_show = min(U_val[0].shape[3],12) indices = np.int32(np.linspace(0,U_val[0].shape[3]-1,num_show)) for i in range(num_show): ax1=plt.subplot(1, num_filt, i+1) #print("U -shape: ", acnn_layer.U().shape,type(K.eval(acnn_layer.U()[:,:,0,i]))) #print("Prod-shape", (ws[1][:,:,0,i]*acnn_layer.U()[:,:,0,i]).shape) plt.imshow(np.float32(ws[1][:,:,0,indices[i]]* K.eval(acnn_layer.U()[:,:,0,indices[i]])),cmap='gray') plt.show(block=False) cnn_layer = model.get_layer('conv2d_1') wcnn = cnn_layer.get_weights() print("CNN Filters of", cnn_layer) fig=plt.figure(figsize=(20,8)) num_show = min(wcnn[0].shape[3],12) indices = np.int32(np.linspace(0,wcnn[0].shape[3]-1,num_show)) for i in range(num_show): ax1=plt.subplot(1, num_filt, i+1) #print("U -shape: ", acnn_layer.U().shape,type(K.eval(acnn_layer.U()[:,:,0,i]))) #print("Prod-shape", (ws[1][:,:,0,i]*acnn_layer.U()[:,:,0,i]).shape) plt.imshow(np.float32(wcnn[0][:,:,0,indices[i]]),cmap='gray') plt.show(block=False) rv_sigma_arr = np.array(rv_sigma_1.record) fig=plt.figure(figsize=(4,8)) plt.plot(rv_sigma_arr) plt.title('Sigma') plt.show(block=False) rv_weights_arr = np.array(rv_weights_1.record) rv_weights_arr2d = np.reshape(rv_weights_arr, (rv_weights_arr.shape[0], np.prod(rv_weights_arr.shape[1:]))) print(rv_weights_arr.shape) fig=plt.figure(figsize=(4,8)) klist=[1,1,5,9,12,15,18,25,32,132,1132] for i in klist: plt.plot(rv_weights_arr2d[:,i]) plt.title('weights-acnn') plt.show(block=False) rv_kernel_arr = np.array(rv_kernel.record) rv_kernel_arr2d = np.reshape(rv_kernel_arr, (rv_kernel_arr.shape[0], np.prod(rv_kernel_arr.shape[1:]))) print(rv_kernel_arr.shape) fig=plt.figure(figsize=(4,8)) klist=[1,1,5,9,12,15,18,25,32] for i in klist: plt.plot(rv_kernel_arr2d[:,i]) plt.title('weights-conv2d-1') plt.show(block=False)
def RunBoth(data,settings={},rseed = 42) : ##################################################################################### #######YT parameters : settings['cnn_model'] if True builds the model by updating the ConvSpeechModel #### else builds the model by updating the AttentionRNN Model #### settings['Optimizer'] SGD or Adam ###################################################################################### if (settings['model']=='conv_model'): model = ConvSpeechModel (settings) elif settings['model']=='paper': model = SpeechModels.AttRNNSpeechModelMELSPEC(nCategs, samplingrate = sr, inputShape = (125,80,1), rnn_func=L.CuDNNGRU)#, rnn_func=L.LSTM) if settings['Optimizer'] =='SGD': opt = optimizers.SGD(lr=settings['lr_all'], momentum=0.9, clipvalue=1.0) else: opt = optimizers.Adam(clipvalue=1.0) elif settings['model']=='focused': model = FNSpeechModel(settings) if settings['Optimizer'] =='SGD': lr_dict = {'all':settings['lr_all'],'Sigma':0.01,'Mu':0.01} mom_dict= {'all':0.9} clip_dict = {'Sigma': [0.01, 2.0],'Mu':[0.01,0.99]} decay_dict = {'all':0.9} e_i = data[0].shape[0]/settings['batch_size'] decay_epochs =np.array([e_i*20,e_i*20], dtype='int64') opt = SGDwithLR(lr=lr_dict, momentum = mom_dict, decay=decay_dict, clips=clip_dict,decay_epochs=decay_epochs, verbose=1,clipvalue=1.0) #, update_clip=1.0 not used as gives error else : clip_dict = {'Sigma': [0.05, 2.0], 'Mu':[0.01,0.99]} opt = AdamwithClip(clips=clip_dict,clipvalue=1.0) #print(model.trainable_weights) model.compile(optimizer= opt, loss=['sparse_categorical_crossentropy'], metrics=['accuracy']) model.summary() model.save('epoch0_fo_'+str(rseed)+'.h5') #print(" Number of batches per epoch: ", trainGen.__len__()) # BTEK: added memory data x_train,y_train,x_val,y_val,x_test,y_test,x_testR, y_testR = data # checkpoint now = datetime.now() timestr = now.strftime("%Y%m%d-%H%M%S") filepath=outputpath+settings['model']+str(rseed)+'best_weights'+timestr+'.hdf5' checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') #callbacks_list = [checkpoint] callbacks = [checkpoint] if settings['brep_att_dense']: stat_func_name = ['max: ', 'mean: ', 'min: ', 'var: ', 'std: '] stat_func_list = [np.max, np.mean, np.min, np.var, np.std] if settings['model']=='focused' and settings['verbose']: pr_1 = PrintLayerVariableStats("focus-att","Weights:0",stat_func_list,stat_func_name) pr_2 = PrintLayerVariableStats("focus-att","Sigma:0",stat_func_list,stat_func_name) pr_3 = PrintLayerVariableStats("focus-att","Mu:0",stat_func_list,stat_func_name) callbacks+=[pr_1,pr_2,pr_3] results = model.fit(x_train,y_train, epochs=settings['Epochs'], validation_data=(x_val,y_val), batch_size=settings['batch_size'], callbacks=callbacks, verbose=1, shuffle=True) model.load_weights(filepath) score = model.evaluate(x_test,y_test, verbose=1) scoreR = model.evaluate(x_testR, y_testR, verbose=1) print("Test:",score,"TestR:",scoreR) return score, scoreR, results, model
def test_comp(settings, random_sid=9): import keras from keras.optimizers import SGD from keras.datasets import mnist, fashion_mnist, cifar10 from skimage import filters from keras import backend as K from keras_utils import WeightHistory as WeightHistory from keras_utils import RecordVariable, \ PrintLayerVariableStats, PrintAnyVariable, SGDwithLR, eval_Kdict, standarize_image_025 from keras_preprocessing.image import ImageDataGenerator K.clear_session() epochs = settings['Epochs'] batch_size = settings['batch_size'] sid = random_sid np.random.seed(sid) tf.random.set_random_seed(sid) tf.compat.v1.random.set_random_seed(sid) # MINIMUM SIGMA CAN EFFECT THE PERFORMANCE. # BECAUSE NEURON CAN GET SHRINK TOO MUCH IN INITIAL EPOCHS WITH LARGER GRADIENTS #, and GET STUCK! MIN_SIG = 0.01 MAX_SIG = 1.0 MIN_MU = 0.0 MAX_MU = 1.0 lr_dict = {'all': settings['lr_all']} #0.1 is default for MNIST mom_dict = {'all': 0.9} decay_dict = {'all': 0.9} clip_dict = {} for i, n in enumerate(settings['nhidden']): lr_dict.update({'focus-' + str(i + 1) + '/Sigma:0': 0.01}) lr_dict.update({'focus-' + str(i + 1) + '/Mu:0': 0.01}) lr_dict.update({'focus-' + str(i + 1) + '/Weights:0': 0.1}) mom_dict.update({'focus-' + str(i + 1) + '/Sigma:0': 0.9}) mom_dict.update({'focus-' + str(i + 1) + '/Mu:0': 0.9}) decay_dict.update({'focus-' + str(i + 1) + '/Sigma:0': 0.5}) decay_dict.update({'focus-' + str(i + 1) + '/Mu:0': 0.9}) clip_dict.update( {'focus-' + str(i + 1) + '/Sigma:0': (MIN_SIG, MAX_SIG)}) clip_dict.update({'focus-' + str(i + 1) + '/Mu:0': (MIN_MU, MAX_MU)}) print("Loading dataset") if settings['dset'] == 'mnist': # input image dimensions img_rows, img_cols = 28, 28 # the data, split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() n_channels = 1 e_i = x_train.shape[0] // batch_size decay_epochs = np.array([e_i * 100, e_i * 150], dtype='int64') if settings['cnn_model']: decay_epochs = [e_i * 30, e_i * 100] elif settings['dset'] == 'cifar10': img_rows, img_cols = 32, 32 n_channels = 3 (x_train, y_train), (x_test, y_test) = cifar10.load_data() # works good as high as 77 for cnn-focus #decay_dict = {'all':0.9, 'focus-1/Sigma:0': 1.1,'focus-1/Mu:0':0.9, # 'focus-2/Sigma:0': 1.1,'focus-2/Mu:0': 0.9} #if cnn_model: batch_size=256 # this works better than 500 for cifar-10 e_i = x_train.shape[0] // batch_size decay_epochs = np.array([e_i * 30, e_i * 80, e_i * 120, e_i * 180], dtype='int64') #decay_epochs =np.array([e_i*10], dtype='int64') elif settings['dset'] == 'fashion': img_rows, img_cols = 28, 28 n_channels = 1 (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data() e_i = x_train.shape[0] // batch_size decay_epochs = np.array([e_i * 100, e_i * 150], dtype='int64') if settings['cnn_model']: decay_dict = { 'all': 0.9, 'focus-1/Sigma:0': 0.9, 'focus-1/Mu:0': 0.9, 'focus-2/Sigma:0': 0.9, 'focus-2/Mu:0': 0.9 } decay_epochs = [e_i * 30, e_i * 100] elif settings['dset'] == 'mnist-clut': img_rows, img_cols = 60, 60 # the data, split between train and test sets folder = '/media/home/rdata/image/' data = np.load(folder + "mnist_cluttered_60x60_6distortions.npz") x_train, y_train = data['x_train'], np.argmax(data['y_train'], axis=-1) x_valid, y_valid = data['x_valid'], np.argmax(data['y_valid'], axis=-1) x_test, y_test = data['x_test'], np.argmax(data['y_test'], axis=-1) x_train = np.vstack((x_train, x_valid)) y_train = np.concatenate((y_train, y_valid)) n_channels = 1 lr_dict = {'all': 0.01} e_i = x_train.shape[0] // batch_size decay_epochs = np.array([e_i * 100, e_i * 150], dtype='int64') if settings['cnn_model']: decay_epochs = [e_i * 30, e_i * 100] elif settings['dset'] == 'lfw_faces': from sklearn.datasets import fetch_lfw_people lfw_people = fetch_lfw_people(min_faces_per_person=20, resize=0.4) # introspect the images arrays to find the shapes (for plotting) n_samples, img_rows, img_cols = lfw_people.images.shape n_channels = 1 X = lfw_people.data n_features = X.shape[1] # the label to predict is the id of the person y = lfw_people.target target_names = lfw_people.target_names n_classes = target_names.shape[0] print("Total dataset size:") print("n_samples: %d" % n_samples) print("n_features: %d" % n_features) print("n_classes: %d" % n_classes) from sklearn.model_selection import train_test_split #X -= X.mean() #X /= X.std() #split into a training and testing set x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) import matplotlib.pyplot as plt plt.imshow(X[0].reshape((img_rows, img_cols))) plt.show() lr_dict = {'all': 0.001} e_i = x_train.shape[0] // batch_size decay_epochs = np.array([e_i * 50, e_i * 100, e_i * 150], dtype='int64') num_classes = np.unique(y_train).shape[0] if K.image_data_format() == 'channels_first': x_train = x_train.reshape(x_train.shape[0], n_channels, img_rows, img_cols) x_test = x_test.reshape(x_test.shape[0], n_channels, img_rows, img_cols) input_shape = (n_channels, img_rows, img_cols) else: x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, n_channels) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, n_channels) input_shape = (img_rows, img_cols, n_channels) if settings['dset'] != 'mnist-clut': x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train, _, x_test = standarize_image_025(x_train, tst=x_test) x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, n_channels) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, n_channels) input_shape = (img_rows, img_cols, n_channels) print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) sigma_reg = settings['focus_sigma_reg'] sigma_reg = keras.regularizers.l2( sigma_reg) if sigma_reg is not None else sigma_reg settings['focus_sigma_reg'] = sigma_reg if settings['cnn_model']: model = create_cnn_model(input_shape, num_classes, settings=settings) else: model = create_simple_model(input_shape, num_classes, settings=settings) model.summary() print(lr_dict) print(mom_dict) print(decay_dict) print(clip_dict) opt = SGDwithLR(lr_dict, mom_dict, decay_dict, clip_dict, decay_epochs) #, decay=None) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy']) stat_func_name = ['max: ', 'mean: ', 'min: ', 'var: ', 'std: '] stat_func_list = [np.max, np.mean, np.min, np.var, np.std] #callbacks = [tb] callbacks = [] if settings['neuron'] == 'focused': pr_1 = PrintLayerVariableStats("focus-1", "Weights:0", stat_func_list, stat_func_name) pr_2 = PrintLayerVariableStats("focus-1", "Sigma:0", stat_func_list, stat_func_name) pr_3 = PrintLayerVariableStats("focus-1", "Mu:0", stat_func_list, stat_func_name) rv_weights_1 = RecordVariable("focus-1", "Weights:0") rv_sigma_1 = RecordVariable("focus-1", "Sigma:0") rv_mu_1 = RecordVariable("focus-1", "Mu:0") print_lr_rates_callback = keras.callbacks.LambdaCallback( on_epoch_end=lambda epoch, logs: print( "iter: ", K.eval(model.optimizer.iterations), " LR RATES :", eval_Kdict(model.optimizer.lr))) callbacks += [ pr_1, pr_2, pr_3, rv_weights_1, rv_sigma_1, rv_mu_1, print_lr_rates_callback ] if not settings['augment']: print('Not using data augmentation.') history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), shuffle=True, callbacks=callbacks) else: print('Using real-time data augmentation.') # This will do preprocessing and realtime data augmentation: datagen = ImageDataGenerator( # set input mean to 0 over the dataset featurewise_center=False, # set each sample mean to 0 samplewise_center=False, # divide inputs by std of dataset featurewise_std_normalization=False, # divide each input by its std samplewise_std_normalization=False, # apply ZCA whitening zca_whitening=False, # epsilon for ZCA whitening zca_epsilon=1e-06, # randomly rotate images in the range (deg 0 to 180) rotation_range=0, # randomly shift images horizontally width_shift_range=0.1, # randomly shift images vertically height_shift_range=0.1, # set range for random shear shear_range=0., # set range for random zoom zoom_range=0., # set range for random channel shifts channel_shift_range=0., # set mode for filling points outside the input boundaries fill_mode='nearest', # value used for fill_mode = "constant" cval=0., # randomly flip images horizontal_flip=True, # randomly flip images vertical_flip=False, # set rescaling factor (applied before any other transformation) rescale=None, # set function that will be applied on each input preprocessing_function=None, # image data format, either "channels_first" or "channels_last" data_format='channels_last', # fraction of images reserved for validation (strictly between 0 and 1) validation_split=0.0) # Compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied). datagen.fit(x_train) # Fit the model on the batches generated by datagen.flow(). history = model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), validation_data=(x_test, y_test), epochs=epochs, verbose=1, workers=4, callbacks=callbacks, steps_per_epoch=x_train.shape[0] // batch_size) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) return score, history, model, callbacks
def test_transfer(dset='mnist', random_seed=9, epochs=10, data_augmentation=False, batch_size = 512,ntrn=None,ntst=None,mod='focusing'): import os import numpy as np #os.environ['CUDA_VISIBLE_DEVICES']="0" #os.environ['TF_FORCE_GPU_ALLOW_GROWTH']="true" import keras from keras.losses import mse from keras.optimizers import SGD, RMSprop from keras.datasets import mnist,fashion_mnist, cifar10 from keras.models import Sequential, Model from keras.layers import Input, Dense, Dropout, Flatten,Conv2D, BatchNormalization from keras.layers import Activation, Permute,Concatenate,GlobalAveragePooling2D from skimage import filters from keras import backend as K from keras_utils import WeightHistory as WeightHistory from keras_utils import RecordVariable, \ PrintLayerVariableStats, PrintAnyVariable, \ SGDwithLR, eval_Kdict, standarize_image_025 from keras_preprocessing.image import ImageDataGenerator from Kfocusing import FocusedLayer1D from keras.engine.topology import Layer from keras import activations, regularizers, constraints from keras import initializers from keras.engine import InputSpec import tensorflow as tf from keras.applications.inception_v3 import InceptionV3 #import keras.applications.resnet50 as resnet #from keras.applications.resnet50 import preprocess_input from keras.applications import VGG16 from keras.applications.vgg16 import preprocess_input #Load the VGG model config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.3 # Create a session with the above options specified. K.tensorflow_backend.set_session(tf.Session(config=config)) K.clear_session() sid = random_seed #test_acnn = True np.random.seed(sid) tf.random.set_random_seed(sid) tf.compat.v1.random.set_random_seed(sid) from datetime import datetime now = datetime.now() if dset=='mnist': # input image dimensions img_rows, img_cols = 28, 28 # the data, split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() NTRN = ntrn if ntrn else x_train.shape[0] NTST = ntst if ntst else x_test.shape[0] n_channels=1 lr_dict = {'all':0.1, 'focus-1/Sigma:0': 0.01,'focus-1/Mu:0': 0.01,'focus-1/Weights:0': 0.1, 'focus-2/Sigma:0': 0.01,'focus-2/Mu:0': 0.01,'focus-2/Weights:0': 0.1} mom_dict = {'all':0.9,'focus-1/Sigma:0': 0.9,'focus-1/Mu:0': 0.9, 'focus-2/Sigma:0': 0.9,'focus-2/Mu:0': 0.9} decay_dict = {'all':0.9, 'focus-1/Sigma:0': 0.1,'focus-1/Mu:0':0.1, 'focus-2/Sigma:0': 0.1,'focus-2/Mu:0': 0.1} clip_dict = {'focus-1/Sigma:0':(0.01,1.0),'focus-1/Mu:0':(0.0,1.0), 'focus-2/Sigma:0':(0.01,1.0),'focus-2/Mu:0':(0.0,1.0)} e_i = x_train.shape[0] // batch_size decay_epochs =np.array([e_i*100], dtype='int64') elif dset=='cifar10': img_rows, img_cols = 32,32 n_channels=3 (x_train, y_train), (x_test, y_test) = cifar10.load_data() NTRN = ntrn if ntrn else x_train.shape[0] NTST = ntst if ntst else x_test.shape[0] lr_dict = {'all':1e-3, 'focus-1/Sigma:0': 1e-3,'focus-1/Mu:0': 1e-3,'focus-1/Weights:0':1e-3, 'focus-2/Sigma:0': 1e-3,'focus-2/Mu:0': 1e-3,'focus-2/Weights:0': 1e-3, 'dense_1/Weights:0':1e-3} # 1e-3 'all' reaches 91.43 at 250 epochs # 1e-3 'all' reached 90.75 at 100 epochs mom_dict = {'all':0.9,'focus-1/Sigma:0': 0.9,'focus-1/Mu:0': 0.9, 'focus-2/Sigma:0': 0.9,'focus-2/Mu:0': 0.9} #decay_dict = {'all':0.9} decay_dict = {'all':0.9, 'focus-1/Sigma:0': 0.9,'focus-1/Mu:0':0.9, 'focus-2/Sigma:0': 0.9,'focus-2/Mu:0': 0.9} clip_dict = {'focus-1/Sigma:0':(0.01,1.0),'focus-1/Mu:0':(0.0,1.0), 'focus-2/Sigma:0':(0.01,1.0),'focus-2/Mu:0':(0.0,1.0)} e_i = NTRN // batch_size #decay_epochs =np.array([e_i*10], dtype='int64') #for 20 epochs #decay_epochs =np.array([e_i*10,e_i*80,e_i*120,e_i*160], dtype='int64') decay_epochs =np.array([e_i*10, e_i*80, e_i*120, e_i*180], dtype='int64') num_classes = np.unique(y_train).shape[0] if K.image_data_format() == 'channels_first': x_train = x_train.reshape(x_train.shape[0], n_channels, img_rows, img_cols) x_test = x_test.reshape(x_test.shape[0], n_channels, img_rows, img_cols) input_shape = (n_channels, img_rows, img_cols) else: x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, n_channels) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, n_channels) input_shape = (img_rows, img_cols, n_channels) x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, n_channels) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, n_channels) input_shape = (img_rows, img_cols, n_channels) print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') #FRAME_SIZE=(299,299,3) FRAME_SIZE = (224,224,3) idx = np.random.permutation(x_train.shape[0]) x_train = x_train[idx[0:NTRN]] y_train = y_train[idx[0:NTRN]] idx = np.random.permutation(x_test.shape[0]) x_test = x_test[idx[0:NTST]] y_test = y_test[idx[0:NTST]] # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) x_train = x_train.astype('float32') x_test = x_test.astype('float32') #x_train, _, x_test = paddataset(x_train,None,x_test,FRAME_SIZE,False,5) #x_train, _, x_test = standarize_image_025(x_train,tst=x_test) x_train=preprocess_input(x_train) x_test=preprocess_input(x_test) import matplotlib.pyplot as plt plt.imshow(x_train[0]) print(np.max(x_train[0]),np.mean(x_train[0])) plt.show() plt.imshow(x_test[0]) print(np.max(x_train[0]),np.mean(x_train[0])) plt.show() print(x_train.shape, 'train samples') print(np.mean(x_train)) print(np.var(x_train)) print(x_test.shape, 'test samples') print(np.mean(x_test)) print(np.var(x_test)) # create the base pre-trained model base_in = Input(shape=input_shape, name='inputlayer') base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape, input_tensor=base_in) x=base_model.output x = GlobalAveragePooling2D()(x) pad_input =True if pad_input: print("PADDING LAYER OUPUT") paddings = tf.constant([[0, 0,], [3, 3]]) padding_layer = keras.layers.Lambda(lambda x: tf.pad(x,paddings,"CONSTANT")) x = padding_layer(x) #x = Dropout(0.1)(x) # let's add a fully-connected layer focusing=mod=='focused' if focusing: nf = 40#init_sigma=np.exp(-(np.linspace(0.1, 0.9, nf)-0.5)**2/0.07), x = FocusedLayer1D(units=nf, name='focus-1', activation='linear', init_sigma=0.08, init_mu='spread', init_w= None, train_sigma=True, train_weights=True, train_mu = True, si_regularizer=None, normed=2)(x) elif mod=='dense': x = Dense(40, activation='linear')(x) else: print('unknown mod') return x = BatchNormalization()(x) x = Activation('relu')(x) x = Dropout(0.2)(x) # and a logistic layer -- let's say we have 200 classes predictions = Dense(10, activation='softmax')(x) model = Model(inputs=base_in, outputs=[predictions]) # opt= SGDwithLR(lr_dict, mom_dict,decay_dict,clip_dict, decay_epochs)#, decay=None) optimizer_s = 'SGDwithLR' if optimizer_s == 'SGDwithLR': opt = SGDwithLR(lr_dict, mom_dict,decay_dict,clip_dict, decay_epochs)#, decay=None) elif optimizer_s=='RMSprob': opt = RMSprop(lr=0.01, rho=0.9, epsilon=None, decay=0.0) else: # opt= SGDwithLR({'all': 0.01},{'all':0.9})#, decay=None) opt= SGD(lr=0.01, momentum=0.9)#, decay=None) # compile the model (should be done *after* setting layers to non-trainable) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy']) model.summary() stat_func_name = ['max: ', 'mean: ', 'min: ', 'var: ', 'std: '] stat_func_list = [np.max, np.mean, np.min, np.var, np.std] callbacks = [] if focusing: pr_1 = PrintLayerVariableStats("focus-1","Weights:0",stat_func_list,stat_func_name) pr_2 = PrintLayerVariableStats("focus-1","Sigma:0",stat_func_list,stat_func_name) pr_3 = PrintLayerVariableStats("focus-1","Mu:0",stat_func_list,stat_func_name) callbacks+=[pr_1,pr_2,pr_3] recordvariables=False if recordvariables: rv_weights_1 = RecordVariable("focus-1","Weights:0") rv_sigma_1 = RecordVariable("focus-1","Sigma:0") callbacks+=[rv_weights_1,rv_sigma_1] if optimizer_s =='SGDwithLR': print_lr_rates_callback = keras.callbacks.LambdaCallback( on_epoch_end=lambda epoch, logs: print("iter: ", K.eval(model.optimizer.iterations), " LR RATES :", eval_Kdict(model.optimizer.lr))) callbacks.append(print_lr_rates_callback) if not data_augmentation: print('Not using data augmentation.') history=model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), shuffle=True, callbacks=callbacks) else: print('Using real-time data augmentation.') # This will do preprocessing and realtime data augmentation: datagen = ImageDataGenerator( # set input mean to 0 over the dataset featurewise_center=False, # set each sample mean to 0 samplewise_center=False, # divide inputs by std of dataset featurewise_std_normalization=False, # divide each input by its std samplewise_std_normalization=False, # apply ZCA whitening zca_whitening=False, # epsilon for ZCA whitening zca_epsilon=1e-06, # randomly rotate images in the range (deg 0 to 180) rotation_range=0, # randomly shift images horizontally width_shift_range=0.2, # randomly shift images vertically height_shift_range=0.2, # set range for random shear shear_range=0.1, # set range for random zoom zoom_range=0.1, # set range for random channel shifts channel_shift_range=0., # set mode for filling points outside the input boundaries fill_mode='nearest', # value used for fill_mode = "constant" cval=0., # randomly flip images horizontal_flip=True, # randomly flip images vertical_flip=False, # set rescaling factor (applied before any other transformation) rescale=None, # set function that will be applied on each input preprocessing_function=None, # image data format, either "channels_first" or "channels_last" data_format='channels_last', # fraction of images reserved for validation (strictly between 0 and 1) validation_split=0.0) # Compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied). datagen.fit(x_train) #x_test,_,_ = paddataset(x_test,None, None,frame_size=FRAME_SIZE, random_pos=False) # Fit the model on the batches generated by datagen.flow(). history=model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), validation_data=(x_test, y_test), workers=4, use_multiprocessing=False,epochs=epochs, verbose=2, callbacks=callbacks, steps_per_epoch=x_train.shape[0]//batch_size) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) return score,history,model
def build_model(N=64, mod='dense', optimizer_s='SGDwithLR', dropout=0.2, recurrent_dropout=0.2, init_sigma_current=0.1, init_sigma_prev=0.1, num_epochs=15, sgd_settings=None, dataset_percantage=100): top_words = 5000 (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words) # truncate and pad input sequences max_review_length = 500 if dataset_percantage == 100: X_train = sequence.pad_sequences(X_train, maxlen=max_review_length) X_test = sequence.pad_sequences(X_test, maxlen=max_review_length) elif dataset_percantage == 75: X_train = sequence.pad_sequences(X_train[0:18750], maxlen=max_review_length) X_test = sequence.pad_sequences(X_test[0:18750], maxlen=max_review_length) y_train = y_train[0:18750] y_test = y_test[0:18750] elif dataset_percantage == 50: X_train = sequence.pad_sequences(X_train[0:12500], maxlen=max_review_length) X_test = sequence.pad_sequences(X_test[0:12500], maxlen=max_review_length) y_train = y_train[0:12500] y_test = y_test[0:12500] elif dataset_percantage == 25: X_train = sequence.pad_sequences(X_train[0:6250], maxlen=max_review_length) X_test = sequence.pad_sequences(X_test[0:6250], maxlen=max_review_length) y_train = y_train[0:6250] y_test = y_test[0:6250] elif dataset_percantage == 10: X_train = sequence.pad_sequences(X_train[0:2500], maxlen=max_review_length) X_test = sequence.pad_sequences(X_test[0:2500], maxlen=max_review_length) y_train = y_train[0:2500] y_test = y_test[0:2500] embedding_vecor_length = 32 model = Sequential() model.add( layers.Embedding(top_words, embedding_vecor_length, input_length=max_review_length)) #model.add(Flatten()) if mod == 'simplernn': model.add(SimpleRNN(100)) elif mod == 'focused': model.add( SimpleFocusedRNN(units=N, name='focus-1', kernel_initializer='he_normal', dropout=dropout, recurrent_dropout=recurrent_dropout, init_sigma_current=init_sigma_current, init_sigma_prev=init_sigma_prev)) model.add(layers.Dense(1, name='dense-3', activation='sigmoid')) if optimizer_s == 'SGDwithLR' and sgd_settings != None: decay_epochs = [3, 5, 8, 11] opt = SGDwithLR(lr=sgd_settings[0], momentum=sgd_settings[1], decay=sgd_settings[2], clips=sgd_settings[3], decay_epochs=decay_epochs, verbose=1) #, decay=None) elif optimizer_s == 'AdamwithClip': opt = AdamwithClip() elif optimizer_s == 'RMSpropwithClip': opt = RMSpropwithClip(lr=0.001, rho=0.9, epsilon=None, decay=0.0, clips=clip_dict) elif optimizer_s == 'adam': opt = 'adam' else: opt = SGD(lr=0.01, momentum=0.9) #, decay=None) print("opt= ", opt) model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) stat_func_name = ['max: ', 'mean: ', 'min: ', 'var: ', 'std: '] stat_func_list = [np.max, np.mean, np.min, np.var, np.std] callbacks = [] pr_0 = PrintLayerVariableStats("focus-1", "kernel:0", stat_func_list, stat_func_name, 0) pr_1 = PrintLayerVariableStats("focus-1", "Sigma_current:0", stat_func_list, stat_func_name, 1) pr_2 = PrintLayerVariableStats("focus-1", "Mu_current:0", stat_func_list, stat_func_name, 2) pr_3 = PrintLayerVariableStats("focus-1", "recurrent_kernel:0", stat_func_list, stat_func_name, 3) callbacks += [pr_0, pr_1, pr_2, pr_3] print(model.summary()) model.fit(X_train, y_train, epochs=num_epochs, batch_size=64, verbose=1, callbacks=callbacks) # Final evaluation of the model scores = model.evaluate(X_test, y_test, verbose=0) print("Accuracy: %.2f%%" % (scores[1] * 100)) return model
def test(settings, sid=9): #sess = K.get_session() K.clear_session() #sess = tf.Session(graph=g) #K.set_session(sess) np.random.seed(sid) tf.random.set_random_seed(sid) tf.compat.v1.random.set_random_seed(sid) # Model parameter # ---------------------------------------------------------------------------- # | | 200-epoch | Orig Paper| 200-epoch | Orig Paper| sec/epoch # Model | n | ResNet v1 | ResNet v1 | ResNet v2 | ResNet v2 | GTX1080Ti # |v1(v2)| %Accuracy | %Accuracy | %Accuracy | %Accuracy | v1 (v2) # ---------------------------------------------------------------------------- # ResNet20 | 3 (2)| 92.16 | 91.25 | ----- | ----- | 35 (---) # ResNet32 | 5(NA)| 92.46 | 92.49 | NA | NA | 50 ( NA) # ResNet44 | 7(NA)| 92.50 | 92.83 | NA | NA | 70 ( NA) # ResNet56 | 9 (6)| 92.71 | 93.03 | 93.01 | NA | 90 (100) # ResNet110 |18(12)| 92.65 | 93.39+-.16| 93.15 | 93.63 | 165(180) # ResNet164 |27(18)| ----- | 94.07 | ----- | 94.54 | ---(---) # ResNet1001| (111)| ----- | 92.39 | ----- | 95.08+-.14| ---(---) # --------------------------------------------------------------------------- n = 3 # Model version # Orig paper: version = 1 (ResNet v1), Improved ResNet: version = 2 (ResNet v2) version = 1 # Computed depth from supplied model parameter n if version == 1: depth = n * 6 + 2 elif version == 2: depth = n * 9 + 2 # Model name, depth and version model_type = 'ResNet%dv%d' % (depth, version) #sess = K.get_session() dset = 'cifar10' dset = settings['dset'] batch_size = settings['batch'] num_classes = 10 epochs = settings['epochs'] test_acnn = settings['test_layer'] == 'aconv' akernel_size = settings['kernel_size'] data_augmentation = settings['data_augmentation'] num_blocks = settings['depth'] lr_multiplier = settings['lr_multiplier'] nfilters = 16 normalize_data = True acnn_options = { 'init_sigma': 0.15, 'norm': 2, 'kernel_size': (akernel_size, akernel_size) } if dset == 'mnist': acnn_options.update({'dropout': 0.25}) elif dset == 'mnist-clut': normalize_data = False if 'init_sigma' in settings.keys(): acnn_options['init_sigma'] = settings['init_sigma'] if 'norm' in settings.keys(): acnn_options['norm'] = settings['norm'] if 'nfilters' in settings.keys(): nfilters = settings['nfilters'] ld_data = load_dataset(dset, normalize_data, options=[]) x_train, y_train, x_test, y_test, input_shape, num_classes = ld_data inputs = Input(shape=input_shape) outputs = resnet(inputs, num_classes, num_blocks=num_blocks, kernel_size=akernel_size, num_filters=nfilters, acnn=test_acnn, acnn_options=acnn_options) model = Model(inputs, outputs) model.summary() print(model_type) #lr_dict = {'all':0.001,'acnn-1/Sigma:0': 0.001,'acnn-1/Weights:0': 0.001, # 'acnn-2/Sigma:0': 0.001,'acnn-2/Weights:0': 0.001} lr_dict = {'all': 0.01, 'Sigma': 0.01} for i in lr_dict.keys(): lr_dict[i] *= settings['lr_multiplier'] MIN_SIG = 1.0 / akernel_size MAX_SIG = akernel_size * 1.0 mom_dict = {'all': 0.9} clip_dict = {'Sigma': [MIN_SIG, MAX_SIG]} decay_dict = {'all': 0.1} e_i = x_train.shape[0] // batch_size #decay_epochs =np.array([e_i*1,e_i*2,e_i*3,e_i*4,e_i*80,e_i*120,e_i*160], dtype='int64') decay_epochs = np.array([e_i * 80, e_i * 120, e_i * 160], dtype='int64') #print("WHAT THE ", lr_dict) #opt = SGDwithLR(lr=lr_dict, momentum = mom_dict, decay=decay_dict, # clips=clip_dict,decay_epochs=decay_epochs, clipvalue=1.0, # verbose=2) #peaklriter,lr={'all':0.01}, momentum={'all':0.0}, # min_lr={'all':0.0001}, peak_lr={'all':2.0}, dropsigma = 0.5, # clips={}, nesterov=False, verbose=0, update_clip=100.0, # pattern_search=True,**kwargs): lr_cyclic = True if lr_cyclic: opt = SGDwithCyclicLR( peaklriter=epochs / 2 * e_i, lr=lr_dict, momentum=mom_dict, min_lr={'all': 0.0001}, #0.0001 peak_lr={ 'all': 0.5 * lr_multiplier, 'Sigma': 0.1 * lr_multiplier }, lrsigma=0.5, clips=clip_dict, clipvalue=1.0, verbose=2) else: opt = SGDwithLR(lr=lr_dict, momentum=mom_dict, decay=decay_dict, clips=clip_dict, decay_epochs=decay_epochs, clipvalue=1.0, verbose=2) # if dset=='lfw_faces': does not get more than 90s # print("USING ADAM") # opt = AdamwithClip(1e-2, clips=clip_dict, clipvalue=1.0) # red_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', # factor=0.9, patience=10, # verbose=1, mode='auto', # min_delta=0.0001, # cooldown=10, min_lr=1e-5) # gives 92.24 at 150 epochs for CIFAR #pwl = lambda t: np.interp(t,[0, 15, 30, 35], [0, 0.1, 0.005, 0]) from keras.optimizers import SGD, Nadam #opt = SGDwithLR(lr=0.01,momentum=0.9,nesterov=True, decay = 5e-4*128) #opt = SGDwithLR(lr=0.01,momentum=0.9,nesterov=True, decay = 5e-4*128) #opt = Nadam() model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) #model.summary() # Prepare model model saving directory. save_dir = os.path.join(os.getcwd(), 'saved_models') if test_acnn: model_name = dset + '_%s_acnn_resnet_model_best_sofar.h5' % model_type else: model_name = dset + '_%s_resnet_model_best_sofar.h5' % model_type if not os.path.isdir(save_dir): os.makedirs(save_dir) filepath = os.path.join(save_dir, model_name) from keras.utils import plot_model plot_model(model, 'figures/may2020/simple_model.png', show_shapes=True, show_layer_names=False) #input('wairing') # Prepare callbacks for model saving and for learning rate adjustment. checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_acc', verbose=1, save_best_only=True) # lr_scheduler = LearningRateScheduler(lr_schedule) # # lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), # cooldown=0, # patience=5, # min_lr=0.5e-6) #callbacks = [checkpoint, lr_reducer, lr_scheduler] stat_func_name = ['max: ', 'mean: ', 'min: ', 'var: ', 'std: '] stat_func_list = [np.max, np.mean, np.min, np.var, np.std] callbacks = [] #LearningRateScheduler(pwl) silent_mode = True if not silent_mode: from keras_utils import PrintLayerVariableStats if test_acnn: # pr_1 = PrintLayerVariableStats("conv2d_adaptive_2","Weights:0",stat_func_list,stat_func_name) # pr_2 = PrintLayerVariableStats("conv2d_adaptive_2","Sigma:0",stat_func_list,stat_func_name) # pr_3 = PrintLayerVariableStats("conv2d_adaptive_4","Weights:0",stat_func_list,stat_func_name) # pr_4 = PrintLayerVariableStats("conv2d_adaptive_4","Sigma:0",stat_func_list,stat_func_name) # pr_1 = PrintLayerVariableStats("lv1_blk1_res_conv1_aconv2D", "Weights:0", stat_func_list, stat_func_name) pr_2 = PrintLayerVariableStats("lv1_blk1_res_conv1_aconv2D", "Sigma:0", stat_func_list, stat_func_name) pr_3 = PrintLayerVariableStats("lv1_blk1_res_conv2_aconv2D", "Weights:0", stat_func_list, stat_func_name) pr_4 = PrintLayerVariableStats("lv1_blk1_res_conv2_aconv2D", "Sigma:0", stat_func_list, stat_func_name) keras.callbacks.ReduceLROnPlateau() #rv_weights_1 = RecordVariable("acnn-1","Weights:0") #rv_sigma_1 = RecordVariable("acnn-1","Sigma:0") callbacks += [pr_1, pr_2, pr_3, pr_4] #,rv_weights_1,rv_sigma_1] else: #pr_1 = PrintLayerVariableStats("conv2d_3","kernel:0",stat_func_list,stat_func_name) #rv_weights_1 = RecordVariable("conv2d_3","kernel:0") pass # if dset=='lfw_faces': # callbacks+=[red_lr] # Run training, with or without data augmentation. if not data_augmentation: print('Not using data augmentation.') his = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), shuffle=True, callbacks=callbacks) else: print('Using real-time data augmentation.') # This will do preprocessing and realtime data augmentation: datagen = ImageDataGenerator( # set input mean to 0 over the dataset featurewise_center=False, # set each sample mean to 0 samplewise_center=False, # divide inputs by std of dataset featurewise_std_normalization=False, # divide each input by its std samplewise_std_normalization=False, # apply ZCA whitening zca_whitening=False, # epsilon for ZCA whitening zca_epsilon=1e-06, # randomly rotate images in the range (deg 0 to 180) rotation_range=0, # brightness_range=[0.9,1.1], # randomly shift images horizontally width_shift_range=0.2, # randomly shift images vertically height_shift_range=0.2, # set range for random shear shear_range=0., # set range for random zoom #zoom_range=[0.9,1.1], # set range for random channel shifts channel_shift_range=0., # set mode for filling points outside the input boundaries fill_mode='nearest', # value used for fill_mode = "constant" cval=0., # randomly flip images horizontal_flip=True, # randomly flip images vertical_flip=False, # set rescaling factor (applied before any other transformation) rescale=None, # set function that will be applied on each input preprocessing_function=None, # image data format, either "channels_first" or "channels_last" data_format=None, # fraction of images reserved for validation (strictly between 0 and 1) validation_split=0.0) # Compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied). datagen.fit(x_train) # Fit the model on the batches generated by datagen.flow(). his = model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), validation_data=(x_test, y_test), epochs=epochs, verbose=1, workers=4, callbacks=callbacks, steps_per_epoch=x_train.shape[0] // batch_size) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) return score, his, model